1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set sw=4 ts=8 et tw=78:
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
17 * The Original Code is Mozilla Communicator client code, released
20 * The Initial Developer of the Original Code is
21 * Netscape Communications Corporation.
22 * Portions created by the Initial Developer are Copyright (C) 1998
23 * the Initial Developer. All Rights Reserved.
27 * Alternatively, the contents of this file may be used under the terms of
28 * either of the GNU General Public License Version 2 or later (the "GPL"),
29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
39 * ***** END LICENSE BLOCK ***** */
42 * JS regular expressions, after Perl.
49 #include "jsarena.h" /* Added by JSIFY */
50 #include "jsutil.h" /* Added by JSIFY */
54 #include "jsbuiltins.h"
56 #include "jsversion.h"
67 #include "jsstaticcheck.h"
72 using namespace avmplus
;
73 using namespace nanojit
;
77 #define REOP_DEF(opcode, name) opcode,
78 #include "jsreops.tbl"
80 REOP_LIMIT
/* META: no operator >= to this */
83 #define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
86 const char *reop_names
[] = {
87 #define REOP_DEF(opcode, name) name,
88 #include "jsreops.tbl"
96 re_debug(const char *fmt
, ...) __attribute__ ((format(printf
, 1, 2)));
101 re_debug(const char *fmt
, ...)
107 retval
= vprintf(fmt
, ap
);
113 re_debug_chars(const jschar
*chrs
, size_t length
)
118 while (*chrs
&& i
++ < length
) {
119 putchar((char)*chrs
++);
123 #else /* !REGEXP_DEBUG */
124 /* This should be optimized to a no-op by our tier-1 compilers. */
126 re_debug(const char *fmt
, ...)
132 re_debug_chars(const jschar
*chrs
, size_t length
)
135 #endif /* !REGEXP_DEBUG */
138 REOp op
; /* r.e. op bytecode */
139 RENode
*next
; /* next in concatenation order */
140 void *kid
; /* first operand */
142 void *kid2
; /* second operand */
143 jsint num
; /* could be a number */
144 size_t parenIndex
; /* or a parenthesis index */
145 struct { /* or a quantifier range */
150 struct { /* or a character class */
152 size_t kidlen
; /* length of string at kid, in jschars */
153 size_t index
; /* index into class list */
154 uint16 bmsize
; /* bitmap size, based on max char code */
157 struct { /* or a literal sequence */
158 jschar chr
; /* of one character */
159 size_t length
; /* or many (via the kid) */
162 RENode
*kid2
; /* second operand from ALT */
163 jschar ch1
; /* match char for ALTPREREQ */
164 jschar ch2
; /* ditto, or class index for ALTPREREQ2 */
169 #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
170 ((c >= 'a') && (c <= 'z')) )
171 #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
172 (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
174 #define CLASS_CACHE_SIZE 4
176 typedef struct CompilerState
{
178 JSTokenStream
*tokenStream
; /* For reporting errors */
179 const jschar
*cpbegin
;
183 size_t classCount
; /* number of [] encountered */
184 size_t treeDepth
; /* maximum depth of parse tree */
185 size_t progLength
; /* estimated bytecode length */
187 size_t classBitmapsMem
; /* memory to hold all class bitmaps */
189 const jschar
*start
; /* small cache of class strings */
190 size_t length
; /* since they're often the same */
192 } classCache
[CLASS_CACHE_SIZE
];
196 typedef struct EmitStateStackEntry
{
197 jsbytecode
*altHead
; /* start of REOP_ALT* opcode */
198 jsbytecode
*nextAltFixup
; /* fixup pointer to next-alt offset */
199 jsbytecode
*nextTermFixup
; /* fixup ptr. to REOP_JUMP offset */
200 jsbytecode
*endTermFixup
; /* fixup ptr. to REOPT_ALTPREREQ* offset */
201 RENode
*continueNode
; /* original REOP_ALT* node being stacked */
202 jsbytecode continueOp
; /* REOP_JUMP or REOP_ENDALT continuation */
203 JSPackedBool jumpToJumpFlag
; /* true if we've patched jump-to-jump to
204 avoid 16-bit unsigned offset overflow */
205 } EmitStateStackEntry
;
208 * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
209 * the getters and setters take the pc of the offset, not of the opcode before
213 #define GET_ARG(pc) ((uint16)(((pc)[0] << 8) | (pc)[1]))
214 #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
215 (pc)[1] = (jsbytecode) (arg))
217 #define OFFSET_LEN ARG_LEN
218 #define OFFSET_MAX (JS_BIT(ARG_LEN * 8) - 1)
219 #define GET_OFFSET(pc) GET_ARG(pc)
222 * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
223 * For sanity, we limit it to 2^24 bytes.
225 #define TREE_DEPTH_MAX (JS_BIT(24) / sizeof(EmitStateStackEntry))
228 * The maximum memory that can be allocated for class bitmaps.
229 * For sanity, we limit it to 2^24 bytes.
231 #define CLASS_BITMAPS_MEM_LIMIT JS_BIT(24)
234 * Functions to get size and write/read bytecode that represent small indexes
236 * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
237 * indicates that the following byte brings more bits to the index. Otherwise
238 * this is the last byte in the index bytecode representing highest index bits.
241 GetCompactIndexWidth(size_t index
)
245 for (width
= 1; (index
>>= 7) != 0; ++width
) { }
249 static JS_ALWAYS_INLINE jsbytecode
*
250 WriteCompactIndex(jsbytecode
*pc
, size_t index
)
254 while ((next
= index
>> 7) != 0) {
255 *pc
++ = (jsbytecode
)(index
| 0x80);
258 *pc
++ = (jsbytecode
)index
;
262 static JS_ALWAYS_INLINE jsbytecode
*
263 ReadCompactIndex(jsbytecode
*pc
, size_t *result
)
268 if ((nextByte
& 0x80) == 0) {
270 * Short-circuit the most common case when compact index <= 127.
275 *result
= 0x7F & nextByte
;
278 *result
|= (nextByte
& 0x7F) << shift
;
280 } while ((nextByte
& 0x80) != 0);
285 typedef struct RECapture
{
286 ptrdiff_t index
; /* start of contents, -1 for empty */
287 size_t length
; /* length of capture */
290 typedef struct REMatchState
{
292 RECapture parens
[1]; /* first of 're->parenCount' captures,
293 allocated at end of this struct */
296 struct REBackTrackData
;
298 typedef struct REProgState
{
299 jsbytecode
*continue_pc
; /* current continuation data */
300 jsbytecode continue_op
;
301 ptrdiff_t index
; /* progress in text */
302 size_t parenSoFar
; /* highest indexed paren started */
305 uintN min
; /* current quantifier limits */
309 size_t top
; /* backtrack stack state */
315 typedef struct REBackTrackData
{
316 size_t sz
; /* size of previous stack entry */
317 jsbytecode
*backtrack_pc
; /* where to backtrack to */
318 jsbytecode backtrack_op
;
319 const jschar
*cp
; /* index in text of match at backtrack */
320 size_t parenIndex
; /* start index of saved paren contents */
321 size_t parenCount
; /* # of saved paren contents */
322 size_t saveStateStackTop
; /* number of parent states */
323 /* saved parent states follow */
324 /* saved paren contents follow */
327 #define INITIAL_STATESTACK 100
328 #define INITIAL_BACKTRACK 8000
330 typedef struct REGlobalData
{
332 JSRegExp
*regexp
; /* the RE in execution */
333 JSBool ok
; /* runtime error (out_of_memory only?) */
334 size_t start
; /* offset to start at */
335 ptrdiff_t skipped
; /* chars skipped anchoring this r.e. */
336 const jschar
*cpbegin
; /* text base address */
337 const jschar
*cpend
; /* text limit address */
339 REProgState
*stateStack
; /* stack of state of current parents */
340 size_t stateStackTop
;
341 size_t stateStackLimit
;
343 REBackTrackData
*backTrackStack
;/* stack of matched-so-far positions */
344 REBackTrackData
*backTrackSP
;
345 size_t backTrackStackSize
;
346 size_t cursz
; /* size of current stack entry */
347 size_t backTrackCount
; /* how many times we've backtracked */
348 size_t backTrackLimit
; /* upper limit on backtrack states */
352 * 1. If IgnoreCase is false, return ch.
353 * 2. Let u be ch converted to upper case as if by calling
354 * String.prototype.toUpperCase on the one-character string ch.
355 * 3. If u does not consist of a single character, return ch.
356 * 4. Let cu be u's character.
357 * 5. If ch's code point value is greater than or equal to decimal 128 and cu's
358 * code point value is less than decimal 128, then return ch.
361 static JS_ALWAYS_INLINE uintN
366 JS_ASSERT((uintN
) (jschar
) ch
== ch
);
368 if (ch
- (uintN
) 'a' <= (uintN
) ('z' - 'a'))
369 ch
-= (uintN
) ('a' - 'A');
374 return (cu
< 128) ? ch
: cu
;
377 static JS_ALWAYS_INLINE uintN
380 JS_ASSERT((uintN
) (jschar
) ch
== ch
);
382 if (ch
- (uintN
) 'A' <= (uintN
) ('Z' - 'A'))
383 ch
+= (uintN
) ('a' - 'A');
387 return JS_TOLOWER(ch
);
390 /* Construct and initialize an RENode, returning NULL for out-of-memory */
392 NewRENode(CompilerState
*state
, REOp op
)
398 JS_ARENA_ALLOCATE_CAST(ren
, RENode
*, &cx
->tempPool
, sizeof *ren
);
400 js_ReportOutOfScriptQuota(cx
);
410 * Validates and converts hex ascii value.
413 isASCIIHexDigit(jschar c
, uintN
*digit
)
424 if (cv
>= 'a' && cv
<= 'f') {
425 *digit
= cv
- 'a' + 10;
434 const jschar
*errPos
;
439 ReportRegExpErrorHelper(CompilerState
*state
, uintN flags
, uintN errorNumber
,
442 if (state
->tokenStream
) {
443 return js_ReportCompileErrorNumber(state
->context
, state
->tokenStream
,
444 NULL
, JSREPORT_UC
| flags
,
447 return JS_ReportErrorFlagsAndNumberUC(state
->context
, flags
,
448 js_GetErrorMessage
, NULL
,
453 ReportRegExpError(CompilerState
*state
, uintN flags
, uintN errorNumber
)
455 return ReportRegExpErrorHelper(state
, flags
, errorNumber
, NULL
);
459 * Process the op against the two top operands, reducing them to a single
460 * operand in the penultimate slot. Update progLength and treeDepth.
463 ProcessOp(CompilerState
*state
, REOpData
*opData
, RENode
**operandStack
,
468 switch (opData
->op
) {
470 result
= NewRENode(state
, REOP_ALT
);
473 result
->kid
= operandStack
[operandSP
- 2];
474 result
->u
.kid2
= operandStack
[operandSP
- 1];
475 operandStack
[operandSP
- 2] = result
;
477 if (state
->treeDepth
== TREE_DEPTH_MAX
) {
478 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_REGEXP_TOO_COMPLEX
);
484 * Look at both alternates to see if there's a FLAT or a CLASS at
485 * the start of each. If so, use a prerequisite match.
487 if (((RENode
*) result
->kid
)->op
== REOP_FLAT
&&
488 ((RENode
*) result
->u
.kid2
)->op
== REOP_FLAT
&&
489 (state
->flags
& JSREG_FOLD
) == 0) {
490 result
->op
= REOP_ALTPREREQ
;
491 result
->u
.altprereq
.ch1
= ((RENode
*) result
->kid
)->u
.flat
.chr
;
492 result
->u
.altprereq
.ch2
= ((RENode
*) result
->u
.kid2
)->u
.flat
.chr
;
493 /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
494 JUMP, <end> ... ENDALT */
495 state
->progLength
+= 13;
498 if (((RENode
*) result
->kid
)->op
== REOP_CLASS
&&
499 ((RENode
*) result
->kid
)->u
.ucclass
.index
< 256 &&
500 ((RENode
*) result
->u
.kid2
)->op
== REOP_FLAT
&&
501 (state
->flags
& JSREG_FOLD
) == 0) {
502 result
->op
= REOP_ALTPREREQ2
;
503 result
->u
.altprereq
.ch1
= ((RENode
*) result
->u
.kid2
)->u
.flat
.chr
;
504 result
->u
.altprereq
.ch2
= ((RENode
*) result
->kid
)->u
.ucclass
.index
;
505 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
506 JUMP, <end> ... ENDALT */
507 state
->progLength
+= 13;
510 if (((RENode
*) result
->kid
)->op
== REOP_FLAT
&&
511 ((RENode
*) result
->u
.kid2
)->op
== REOP_CLASS
&&
512 ((RENode
*) result
->u
.kid2
)->u
.ucclass
.index
< 256 &&
513 (state
->flags
& JSREG_FOLD
) == 0) {
514 result
->op
= REOP_ALTPREREQ2
;
515 result
->u
.altprereq
.ch1
= ((RENode
*) result
->kid
)->u
.flat
.chr
;
516 result
->u
.altprereq
.ch2
=
517 ((RENode
*) result
->u
.kid2
)->u
.ucclass
.index
;
518 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
519 JUMP, <end> ... ENDALT */
520 state
->progLength
+= 13;
523 /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
524 state
->progLength
+= 7;
529 result
= operandStack
[operandSP
- 2];
531 result
= result
->next
;
532 result
->next
= operandStack
[operandSP
- 1];
536 case REOP_ASSERT_NOT
:
539 /* These should have been processed by a close paren. */
540 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
, JSMSG_MISSING_PAREN
,
550 * Parser forward declarations.
552 static JSBool
ParseTerm(CompilerState
*state
);
553 static JSBool
ParseQuantifier(CompilerState
*state
);
554 static intN
ParseMinMaxQuantifier(CompilerState
*state
, JSBool ignoreValues
);
557 * Top-down regular expression grammar, based closely on Perl4.
559 * regexp: altern A regular expression is one or more
560 * altern '|' regexp alternatives separated by vertical bar.
562 #define INITIAL_STACK_SIZE 128
565 ParseRegExp(CompilerState
*state
)
569 REOpData
*operatorStack
;
570 RENode
**operandStack
;
573 JSBool result
= JS_FALSE
;
575 intN operatorSP
= 0, operatorStackSize
= INITIAL_STACK_SIZE
;
576 intN operandSP
= 0, operandStackSize
= INITIAL_STACK_SIZE
;
578 /* Watch out for empty regexp */
579 if (state
->cp
== state
->cpend
) {
580 state
->result
= NewRENode(state
, REOP_EMPTY
);
581 return (state
->result
!= NULL
);
584 operatorStack
= (REOpData
*)
585 JS_malloc(state
->context
, sizeof(REOpData
) * operatorStackSize
);
589 operandStack
= (RENode
**)
590 JS_malloc(state
->context
, sizeof(RENode
*) * operandStackSize
);
595 parenIndex
= state
->parenCount
;
596 if (state
->cp
== state
->cpend
) {
598 * If we are at the end of the regexp and we're short one or more
599 * operands, the regexp must have the form /x|/ or some such, with
600 * left parentheses making us short more than one operand.
602 if (operatorSP
>= operandSP
) {
603 operand
= NewRENode(state
, REOP_EMPTY
);
609 switch (*state
->cp
) {
612 if (state
->cp
+ 1 < state
->cpend
&&
614 (state
->cp
[1] == '=' ||
615 state
->cp
[1] == '!' ||
616 state
->cp
[1] == ':')) {
617 switch (state
->cp
[1]) {
620 /* ASSERT, <next>, ... ASSERTTEST */
621 state
->progLength
+= 4;
624 op
= REOP_ASSERT_NOT
;
625 /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
626 state
->progLength
+= 4;
635 /* LPAREN, <index>, ... RPAREN, <index> */
637 += 2 * (1 + GetCompactIndexWidth(parenIndex
));
639 if (state
->parenCount
== 65535) {
640 ReportRegExpError(state
, JSREPORT_ERROR
,
641 JSMSG_TOO_MANY_PARENS
);
649 * If there's no stacked open parenthesis, throw syntax error.
651 for (i
= operatorSP
- 1; ; i
--) {
653 ReportRegExpError(state
, JSREPORT_ERROR
,
654 JSMSG_UNMATCHED_RIGHT_PAREN
);
657 if (operatorStack
[i
].op
== REOP_ASSERT
||
658 operatorStack
[i
].op
== REOP_ASSERT_NOT
||
659 operatorStack
[i
].op
== REOP_LPARENNON
||
660 operatorStack
[i
].op
== REOP_LPAREN
) {
667 /* Expected an operand before these, so make an empty one */
668 operand
= NewRENode(state
, REOP_EMPTY
);
674 if (!ParseTerm(state
))
676 operand
= state
->result
;
678 if (operandSP
== operandStackSize
) {
680 operandStackSize
+= operandStackSize
;
682 JS_realloc(state
->context
, operandStack
,
683 sizeof(RENode
*) * operandStackSize
);
688 operandStack
[operandSP
++] = operand
;
693 /* At the end; process remaining operators. */
695 if (state
->cp
== state
->cpend
) {
698 if (!ProcessOp(state
, &operatorStack
[operatorSP
],
699 operandStack
, operandSP
))
703 JS_ASSERT(operandSP
== 1);
704 state
->result
= operandStack
[0];
709 switch (*state
->cp
) {
711 /* Process any stacked 'concat' operators */
714 operatorStack
[operatorSP
- 1].op
== REOP_CONCAT
) {
716 if (!ProcessOp(state
, &operatorStack
[operatorSP
],
717 operandStack
, operandSP
)) {
727 * If there's no stacked open parenthesis, throw syntax error.
729 for (i
= operatorSP
- 1; ; i
--) {
731 ReportRegExpError(state
, JSREPORT_ERROR
,
732 JSMSG_UNMATCHED_RIGHT_PAREN
);
735 if (operatorStack
[i
].op
== REOP_ASSERT
||
736 operatorStack
[i
].op
== REOP_ASSERT_NOT
||
737 operatorStack
[i
].op
== REOP_LPARENNON
||
738 operatorStack
[i
].op
== REOP_LPAREN
) {
744 /* Process everything on the stack until the open parenthesis. */
746 JS_ASSERT(operatorSP
);
748 switch (operatorStack
[operatorSP
].op
) {
750 case REOP_ASSERT_NOT
:
752 operand
= NewRENode(state
, operatorStack
[operatorSP
].op
);
755 operand
->u
.parenIndex
=
756 operatorStack
[operatorSP
].parenIndex
;
757 JS_ASSERT(operandSP
);
758 operand
->kid
= operandStack
[operandSP
- 1];
759 operandStack
[operandSP
- 1] = operand
;
760 if (state
->treeDepth
== TREE_DEPTH_MAX
) {
761 ReportRegExpError(state
, JSREPORT_ERROR
,
762 JSMSG_REGEXP_TOO_COMPLEX
);
769 state
->result
= operandStack
[operandSP
- 1];
770 if (!ParseQuantifier(state
))
772 operandStack
[operandSP
- 1] = state
->result
;
773 goto restartOperator
;
775 if (!ProcessOp(state
, &operatorStack
[operatorSP
],
776 operandStack
, operandSP
))
786 const jschar
*errp
= state
->cp
;
788 if (ParseMinMaxQuantifier(state
, JS_TRUE
) < 0) {
790 * This didn't even scan correctly as a quantifier, so we should
804 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
, JSMSG_BAD_QUANTIFIER
,
810 /* Anything else is the start of the next term. */
813 if (operatorSP
== operatorStackSize
) {
815 operatorStackSize
+= operatorStackSize
;
817 JS_realloc(state
->context
, operatorStack
,
818 sizeof(REOpData
) * operatorStackSize
);
823 operatorStack
[operatorSP
].op
= op
;
824 operatorStack
[operatorSP
].errPos
= state
->cp
;
825 operatorStack
[operatorSP
++].parenIndex
= parenIndex
;
831 JS_free(state
->context
, operatorStack
);
833 JS_free(state
->context
, operandStack
);
838 * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
839 * its being on the stack, and to propagate errors to its callers.
841 #define JSREG_FIND_PAREN_COUNT 0x8000
842 #define JSREG_FIND_PAREN_ERROR 0x4000
845 * Magic return value from FindParenCount and GetDecimalValue, to indicate
846 * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
847 * its findMax parameter is non-null.
849 #define OVERFLOW_VALUE ((uintN)-1)
852 FindParenCount(CompilerState
*state
)
857 if (state
->flags
& JSREG_FIND_PAREN_COUNT
)
858 return OVERFLOW_VALUE
;
861 * Copy state into temp, flag it so we never report an invalid backref,
862 * and reset its members to parse the entire regexp. This is obviously
863 * suboptimal, but GetDecimalValue calls us only if a backref appears to
864 * refer to a forward parenthetical, which is rare.
867 temp
.flags
|= JSREG_FIND_PAREN_COUNT
;
868 temp
.cp
= temp
.cpbegin
;
873 temp
.classBitmapsMem
= 0;
874 for (i
= 0; i
< CLASS_CACHE_SIZE
; i
++)
875 temp
.classCache
[i
].start
= NULL
;
877 if (!ParseRegExp(&temp
)) {
878 state
->flags
|= JSREG_FIND_PAREN_ERROR
;
879 return OVERFLOW_VALUE
;
881 return temp
.parenCount
;
885 * Extract and return a decimal value at state->cp. The initial character c
886 * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
887 * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
888 * state->flags to discover whether an error occurred under findMax.
891 GetDecimalValue(jschar c
, uintN max
, uintN (*findMax
)(CompilerState
*state
),
892 CompilerState
*state
)
894 uintN value
= JS7_UNDEC(c
);
895 JSBool overflow
= (value
> max
&& (!findMax
|| value
> findMax(state
)));
897 /* The following restriction allows simpler overflow checks. */
898 JS_ASSERT(max
<= ((uintN
)-1 - 9) / 10);
899 while (state
->cp
< state
->cpend
) {
903 value
= 10 * value
+ JS7_UNDEC(c
);
904 if (!overflow
&& value
> max
&& (!findMax
|| value
> findMax(state
)))
908 return overflow
? OVERFLOW_VALUE
: value
;
912 * Calculate the total size of the bitmap required for a class expression.
915 CalculateBitmapSize(CompilerState
*state
, RENode
*target
, const jschar
*src
,
919 JSBool inRange
= JS_FALSE
;
920 jschar c
, rangeStart
= 0;
921 uintN n
, digit
, nDigits
, i
;
923 target
->u
.ucclass
.bmsize
= 0;
924 target
->u
.ucclass
.sense
= JS_TRUE
;
931 target
->u
.ucclass
.sense
= JS_FALSE
;
935 JSBool canStartRange
= JS_TRUE
;
962 if (src
< end
&& RE_IS_LETTER(*src
)) {
963 localMax
= (uintN
) (*src
++) & 0x1F;
976 for (i
= 0; (i
< nDigits
) && (src
< end
); i
++) {
978 if (!isASCIIHexDigit(c
, &digit
)) {
980 * Back off to accepting the original
987 n
= (n
<< 4) | digit
;
992 canStartRange
= JS_FALSE
;
994 JS_ReportErrorNumber(state
->context
,
995 js_GetErrorMessage
, NULL
,
996 JSMSG_BAD_CLASS_RANGE
);
1006 canStartRange
= JS_FALSE
;
1008 JS_ReportErrorNumber(state
->context
,
1009 js_GetErrorMessage
, NULL
,
1010 JSMSG_BAD_CLASS_RANGE
);
1016 * If this is the start of a range, ensure that it's less than
1030 * This is a non-ECMA extension - decimal escapes (in this
1031 * case, octal!) are supposed to be an error inside class
1032 * ranges, but supported here for backwards compatibility.
1037 if ('0' <= c
&& c
<= '7') {
1039 n
= 8 * n
+ JS7_UNDEC(c
);
1041 if ('0' <= c
&& c
<= '7') {
1043 i
= 8 * n
+ JS7_UNDEC(c
);
1064 /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1065 if (rangeStart
> localMax
) {
1066 JS_ReportErrorNumber(state
->context
,
1067 js_GetErrorMessage
, NULL
,
1068 JSMSG_BAD_CLASS_RANGE
);
1073 if (canStartRange
&& src
< end
- 1) {
1077 rangeStart
= (jschar
)localMax
;
1081 if (state
->flags
& JSREG_FOLD
)
1082 rangeStart
= localMax
; /* one run of the uc/dc loop below */
1085 if (state
->flags
& JSREG_FOLD
) {
1086 jschar maxch
= localMax
;
1088 for (i
= rangeStart
; i
<= localMax
; i
++) {
1093 maxch
= JS_MAX(maxch
, uch
);
1094 maxch
= JS_MAX(maxch
, dch
);
1102 target
->u
.ucclass
.bmsize
= max
;
1107 * item: assertion An item is either an assertion or
1108 * quantatom a quantified atom.
1110 * assertion: '^' Assertions match beginning of string
1111 * (or line if the class static property
1112 * RegExp.multiline is true).
1113 * '$' End of string (or line if the class
1114 * static property RegExp.multiline is
1116 * '\b' Word boundary (between \w and \W).
1117 * '\B' Word non-boundary.
1119 * quantatom: atom An unquantified atom.
1120 * quantatom '{' n ',' m '}'
1121 * Atom must occur between n and m times.
1122 * quantatom '{' n ',' '}' Atom must occur at least n times.
1123 * quantatom '{' n '}' Atom must occur exactly n times.
1124 * quantatom '*' Zero or more times (same as {0,}).
1125 * quantatom '+' One or more times (same as {1,}).
1126 * quantatom '?' Zero or one time (same as {0,1}).
1128 * any of which can be optionally followed by '?' for ungreedy
1130 * atom: '(' regexp ')' A parenthesized regexp (what matched
1131 * can be addressed using a backreference,
1133 * '.' Matches any char except '\n'.
1134 * '[' classlist ']' A character class.
1135 * '[' '^' classlist ']' A negated character class.
1137 * '\n' Newline (Line Feed).
1138 * '\r' Carriage Return.
1139 * '\t' Horizontal Tab.
1140 * '\v' Vertical Tab.
1141 * '\d' A digit (same as [0-9]).
1143 * '\w' A word character, [0-9a-z_A-Z].
1144 * '\W' A non-word character.
1145 * '\s' A whitespace character, [ \b\f\n\r\t\v].
1146 * '\S' A non-whitespace character.
1147 * '\' n A backreference to the nth (n decimal
1148 * and positive) parenthesized expression.
1149 * '\' octal An octal escape sequence (octal must be
1150 * two or three digits long, unless it is
1151 * 0 for the null character).
1152 * '\x' hex A hex escape (hex must be two digits).
1153 * '\u' unicode A unicode escape (must be four digits).
1154 * '\c' ctrl A control character, ctrl is a letter.
1155 * '\' literalatomchar Any character except one of the above
1156 * that follow '\' in an atom.
1157 * otheratomchar Any character not first among the other
1158 * atom right-hand sides.
1161 ParseTerm(CompilerState
*state
)
1163 jschar c
= *state
->cp
++;
1165 uintN num
, tmp
, n
, i
;
1166 const jschar
*termStart
;
1169 /* assertions and atoms */
1171 state
->result
= NewRENode(state
, REOP_BOL
);
1174 state
->progLength
++;
1177 state
->result
= NewRENode(state
, REOP_EOL
);
1180 state
->progLength
++;
1183 if (state
->cp
>= state
->cpend
) {
1184 /* a trailing '\' is an error */
1185 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_TRAILING_SLASH
);
1190 /* assertion escapes */
1192 state
->result
= NewRENode(state
, REOP_WBDRY
);
1195 state
->progLength
++;
1198 state
->result
= NewRENode(state
, REOP_WNONBDRY
);
1201 state
->progLength
++;
1203 /* Decimal escape */
1205 /* Give a strict warning. See also the note below. */
1206 if (!ReportRegExpError(state
, JSREPORT_WARNING
| JSREPORT_STRICT
,
1207 JSMSG_INVALID_BACKREF
)) {
1212 while (state
->cp
< state
->cpend
) {
1214 if (c
< '0' || '7' < c
)
1217 tmp
= 8 * num
+ (uintN
)JS7_UNDEC(c
);
1224 state
->result
= NewRENode(state
, REOP_FLAT
);
1227 state
->result
->u
.flat
.chr
= c
;
1228 state
->result
->u
.flat
.length
= 1;
1229 state
->progLength
+= 3;
1240 termStart
= state
->cp
- 1;
1241 num
= GetDecimalValue(c
, state
->parenCount
, FindParenCount
, state
);
1242 if (state
->flags
& JSREG_FIND_PAREN_ERROR
)
1244 if (num
== OVERFLOW_VALUE
) {
1245 /* Give a strict mode warning. */
1246 if (!ReportRegExpError(state
,
1247 JSREPORT_WARNING
| JSREPORT_STRICT
,
1249 ? JSMSG_INVALID_BACKREF
1250 : JSMSG_BAD_BACKREF
)) {
1255 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1256 * error here. However, for compatibility with IE, we treat the
1257 * whole backref as flat if the first character in it is not a
1258 * valid octal character, and as an octal escape otherwise.
1260 state
->cp
= termStart
;
1262 /* Treat this as flat. termStart - 1 is the \. */
1267 /* Treat this as an octal escape. */
1270 JS_ASSERT(1 <= num
&& num
<= 0x10000);
1271 state
->result
= NewRENode(state
, REOP_BACKREF
);
1274 state
->result
->u
.parenIndex
= num
- 1;
1276 += 1 + GetCompactIndexWidth(state
->result
->u
.parenIndex
);
1278 /* Control escape */
1294 /* Control letter */
1296 if (state
->cp
< state
->cpend
&& RE_IS_LETTER(*state
->cp
)) {
1297 c
= (jschar
) (*state
->cp
++ & 0x1F);
1299 /* back off to accepting the original '\' as a literal */
1304 /* HexEscapeSequence */
1308 /* UnicodeEscapeSequence */
1313 for (i
= 0; i
< nDigits
&& state
->cp
< state
->cpend
; i
++) {
1316 if (!isASCIIHexDigit(c
, &digit
)) {
1318 * Back off to accepting the original 'u' or 'x' as a
1325 n
= (n
<< 4) | digit
;
1329 /* Character class escapes */
1331 state
->result
= NewRENode(state
, REOP_DIGIT
);
1335 state
->progLength
++;
1338 state
->result
= NewRENode(state
, REOP_NONDIGIT
);
1341 state
->result
= NewRENode(state
, REOP_SPACE
);
1344 state
->result
= NewRENode(state
, REOP_NONSPACE
);
1347 state
->result
= NewRENode(state
, REOP_ALNUM
);
1350 state
->result
= NewRENode(state
, REOP_NONALNUM
);
1352 /* IdentityEscape */
1354 state
->result
= NewRENode(state
, REOP_FLAT
);
1357 state
->result
->u
.flat
.chr
= c
;
1358 state
->result
->u
.flat
.length
= 1;
1359 state
->result
->kid
= (void *) (state
->cp
- 1);
1360 state
->progLength
+= 3;
1365 state
->result
= NewRENode(state
, REOP_CLASS
);
1368 termStart
= state
->cp
;
1369 state
->result
->u
.ucclass
.startIndex
= termStart
- state
->cpbegin
;
1371 if (state
->cp
== state
->cpend
) {
1372 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
,
1373 JSMSG_UNTERM_CLASS
, termStart
);
1377 if (*state
->cp
== '\\') {
1379 if (state
->cp
!= state
->cpend
)
1383 if (*state
->cp
== ']') {
1384 state
->result
->u
.ucclass
.kidlen
= state
->cp
- termStart
;
1389 for (i
= 0; i
< CLASS_CACHE_SIZE
; i
++) {
1390 if (!state
->classCache
[i
].start
) {
1391 state
->classCache
[i
].start
= termStart
;
1392 state
->classCache
[i
].length
= state
->result
->u
.ucclass
.kidlen
;
1393 state
->classCache
[i
].index
= state
->classCount
;
1396 if (state
->classCache
[i
].length
==
1397 state
->result
->u
.ucclass
.kidlen
) {
1398 for (n
= 0; ; n
++) {
1399 if (n
== state
->classCache
[i
].length
) {
1400 state
->result
->u
.ucclass
.index
1401 = state
->classCache
[i
].index
;
1404 if (state
->classCache
[i
].start
[n
] != termStart
[n
])
1409 state
->result
->u
.ucclass
.index
= state
->classCount
++;
1413 * Call CalculateBitmapSize now as we want any errors it finds
1414 * to be reported during the parse phase, not at execution.
1416 if (!CalculateBitmapSize(state
, state
->result
, termStart
, state
->cp
++))
1419 * Update classBitmapsMem with number of bytes to hold bmsize bits,
1420 * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1421 * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1423 n
= (state
->result
->u
.ucclass
.bmsize
>> 3) + 1;
1424 if (n
> CLASS_BITMAPS_MEM_LIMIT
- state
->classBitmapsMem
) {
1425 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_REGEXP_TOO_COMPLEX
);
1428 state
->classBitmapsMem
+= n
;
1429 /* CLASS, <index> */
1431 += 1 + GetCompactIndexWidth(state
->result
->u
.ucclass
.index
);
1435 state
->result
= NewRENode(state
, REOP_DOT
);
1440 const jschar
*errp
= state
->cp
--;
1443 err
= ParseMinMaxQuantifier(state
, JS_TRUE
);
1454 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
,
1455 JSMSG_BAD_QUANTIFIER
, state
->cp
- 1);
1459 state
->result
= NewRENode(state
, REOP_FLAT
);
1462 state
->result
->u
.flat
.chr
= c
;
1463 state
->result
->u
.flat
.length
= 1;
1464 state
->result
->kid
= (void *) (state
->cp
- 1);
1465 state
->progLength
+= 3;
1468 return ParseQuantifier(state
);
1472 ParseQuantifier(CompilerState
*state
)
1475 term
= state
->result
;
1476 if (state
->cp
< state
->cpend
) {
1477 switch (*state
->cp
) {
1479 state
->result
= NewRENode(state
, REOP_QUANT
);
1482 state
->result
->u
.range
.min
= 1;
1483 state
->result
->u
.range
.max
= (uintN
)-1;
1484 /* <PLUS>, <next> ... <ENDCHILD> */
1485 state
->progLength
+= 4;
1488 state
->result
= NewRENode(state
, REOP_QUANT
);
1491 state
->result
->u
.range
.min
= 0;
1492 state
->result
->u
.range
.max
= (uintN
)-1;
1493 /* <STAR>, <next> ... <ENDCHILD> */
1494 state
->progLength
+= 4;
1497 state
->result
= NewRENode(state
, REOP_QUANT
);
1500 state
->result
->u
.range
.min
= 0;
1501 state
->result
->u
.range
.max
= 1;
1502 /* <OPT>, <next> ... <ENDCHILD> */
1503 state
->progLength
+= 4;
1505 case '{': /* balance '}' */
1508 const jschar
*errp
= state
->cp
;
1510 err
= ParseMinMaxQuantifier(state
, JS_FALSE
);
1516 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
, err
, errp
);
1525 if (state
->treeDepth
== TREE_DEPTH_MAX
) {
1526 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_REGEXP_TOO_COMPLEX
);
1532 state
->result
->kid
= term
;
1533 if (state
->cp
< state
->cpend
&& *state
->cp
== '?') {
1535 state
->result
->u
.range
.greedy
= JS_FALSE
;
1537 state
->result
->u
.range
.greedy
= JS_TRUE
;
1543 ParseMinMaxQuantifier(CompilerState
*state
, JSBool ignoreValues
)
1547 const jschar
*errp
= state
->cp
++;
1552 min
= GetDecimalValue(c
, 0xFFFF, NULL
, state
);
1555 if (!ignoreValues
&& min
== OVERFLOW_VALUE
)
1556 return JSMSG_MIN_TOO_BIG
;
1562 max
= GetDecimalValue(c
, 0xFFFF, NULL
, state
);
1564 if (!ignoreValues
&& max
== OVERFLOW_VALUE
)
1565 return JSMSG_MAX_TOO_BIG
;
1566 if (!ignoreValues
&& min
> max
)
1567 return JSMSG_OUT_OF_ORDER
;
1575 state
->result
= NewRENode(state
, REOP_QUANT
);
1577 return JSMSG_OUT_OF_MEMORY
;
1578 state
->result
->u
.range
.min
= min
;
1579 state
->result
->u
.range
.max
= max
;
1581 * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1582 * where <max> is written as compact(max+1) to make
1583 * (uintN)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1585 state
->progLength
+= (1 + GetCompactIndexWidth(min
)
1586 + GetCompactIndexWidth(max
+ 1)
1597 SetForwardJumpOffset(jsbytecode
*jump
, jsbytecode
*target
)
1599 ptrdiff_t offset
= target
- jump
;
1601 /* Check that target really points forward. */
1602 JS_ASSERT(offset
>= 2);
1603 if ((size_t)offset
> OFFSET_MAX
)
1606 jump
[0] = JUMP_OFFSET_HI(offset
);
1607 jump
[1] = JUMP_OFFSET_LO(offset
);
1611 /* Copy the charset data from a character class node to the charset list
1612 * in the regexp object. */
1613 static JS_ALWAYS_INLINE RECharSet
*
1614 InitNodeCharSet(JSRegExp
*re
, RENode
*node
)
1616 RECharSet
*charSet
= &re
->classList
[node
->u
.ucclass
.index
];
1617 charSet
->converted
= JS_FALSE
;
1618 charSet
->length
= node
->u
.ucclass
.bmsize
;
1619 charSet
->u
.src
.startIndex
= node
->u
.ucclass
.startIndex
;
1620 charSet
->u
.src
.length
= node
->u
.ucclass
.kidlen
;
1621 charSet
->sense
= node
->u
.ucclass
.sense
;
1626 * Generate bytecode for the tree rooted at t using an explicit stack instead
1630 EmitREBytecode(CompilerState
*state
, JSRegExp
*re
, size_t treeDepth
,
1631 jsbytecode
*pc
, RENode
*t
)
1633 EmitStateStackEntry
*emitStateSP
, *emitStateStack
;
1636 if (treeDepth
== 0) {
1637 emitStateStack
= NULL
;
1640 (EmitStateStackEntry
*)JS_malloc(state
->context
,
1641 sizeof(EmitStateStackEntry
) *
1643 if (!emitStateStack
)
1646 emitStateSP
= emitStateStack
;
1648 JS_ASSERT(op
< REOP_LIMIT
);
1657 case REOP_ALTPREREQ2
:
1658 case REOP_ALTPREREQ
:
1659 JS_ASSERT(emitStateSP
);
1660 emitStateSP
->altHead
= pc
- 1;
1661 emitStateSP
->endTermFixup
= pc
;
1663 SET_ARG(pc
, t
->u
.altprereq
.ch1
);
1665 SET_ARG(pc
, t
->u
.altprereq
.ch2
);
1668 emitStateSP
->nextAltFixup
= pc
; /* offset to next alternate */
1671 emitStateSP
->continueNode
= t
;
1672 emitStateSP
->continueOp
= REOP_JUMP
;
1673 emitStateSP
->jumpToJumpFlag
= JS_FALSE
;
1675 JS_ASSERT((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
1676 t
= (RENode
*) t
->kid
;
1678 JS_ASSERT(op
< REOP_LIMIT
);
1682 emitStateSP
->nextTermFixup
= pc
; /* offset to following term */
1684 if (!SetForwardJumpOffset(emitStateSP
->nextAltFixup
, pc
))
1686 emitStateSP
->continueOp
= REOP_ENDALT
;
1688 JS_ASSERT((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
1689 t
= (RENode
*) t
->u
.kid2
;
1691 JS_ASSERT(op
< REOP_LIMIT
);
1696 * If we already patched emitStateSP->nextTermFixup to jump to
1697 * a nearer jump, to avoid 16-bit immediate offset overflow, we
1700 if (emitStateSP
->jumpToJumpFlag
)
1704 * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
1705 * REOP_ENDALT is executed only on successful match of the last
1706 * alternate in a group.
1708 if (!SetForwardJumpOffset(emitStateSP
->nextTermFixup
, pc
))
1710 if (t
->op
!= REOP_ALT
) {
1711 if (!SetForwardJumpOffset(emitStateSP
->endTermFixup
, pc
))
1716 * If the program is bigger than the REOP_JUMP offset range, then
1717 * we must check for alternates before this one that are part of
1718 * the same group, and fix up their jump offsets to target jumps
1719 * close enough to fit in a 16-bit unsigned offset immediate.
1721 if ((size_t)(pc
- re
->program
) > OFFSET_MAX
&&
1722 emitStateSP
> emitStateStack
) {
1723 EmitStateStackEntry
*esp
, *esp2
;
1724 jsbytecode
*alt
, *jump
;
1725 ptrdiff_t span
, header
;
1728 alt
= esp2
->altHead
;
1729 for (esp
= esp2
- 1; esp
>= emitStateStack
; --esp
) {
1730 if (esp
->continueOp
== REOP_ENDALT
&&
1731 !esp
->jumpToJumpFlag
&&
1732 esp
->nextTermFixup
+ OFFSET_LEN
== alt
&&
1733 (size_t)(pc
- ((esp
->continueNode
->op
!= REOP_ALT
)
1735 : esp
->nextTermFixup
)) > OFFSET_MAX
) {
1737 jump
= esp
->nextTermFixup
;
1740 * The span must be 1 less than the distance from
1741 * jump offset to jump offset, so we actually jump
1742 * to a REOP_JUMP bytecode, not to its offset!
1745 JS_ASSERT(jump
< esp2
->nextTermFixup
);
1746 span
= esp2
->nextTermFixup
- jump
- 1;
1747 if ((size_t)span
<= OFFSET_MAX
)
1752 } while (esp2
->continueOp
!= REOP_ENDALT
);
1755 jump
[0] = JUMP_OFFSET_HI(span
);
1756 jump
[1] = JUMP_OFFSET_LO(span
);
1758 if (esp
->continueNode
->op
!= REOP_ALT
) {
1760 * We must patch the offset at esp->endTermFixup
1761 * as well, for the REOP_ALTPREREQ{,2} opcodes.
1762 * If we're unlucky and endTermFixup is more than
1763 * OFFSET_MAX bytes from its target, we cheat by
1764 * jumping 6 bytes to the jump whose offset is at
1765 * esp->nextTermFixup, which has the same target.
1767 jump
= esp
->endTermFixup
;
1768 header
= esp
->nextTermFixup
- jump
;
1770 if ((size_t)span
> OFFSET_MAX
)
1773 jump
[0] = JUMP_OFFSET_HI(span
);
1774 jump
[1] = JUMP_OFFSET_LO(span
);
1777 esp
->jumpToJumpFlag
= JS_TRUE
;
1784 JS_ASSERT(emitStateSP
);
1785 emitStateSP
->altHead
= pc
- 1;
1786 emitStateSP
->nextAltFixup
= pc
; /* offset to next alternate */
1788 emitStateSP
->continueNode
= t
;
1789 emitStateSP
->continueOp
= REOP_JUMP
;
1790 emitStateSP
->jumpToJumpFlag
= JS_FALSE
;
1792 JS_ASSERT((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
1793 t
= (RENode
*) t
->kid
;
1795 JS_ASSERT(op
< REOP_LIMIT
);
1800 * Coalesce FLATs if possible and if it would not increase bytecode
1801 * beyond preallocated limit. The latter happens only when bytecode
1802 * size for coalesced string with offset p and length 2 exceeds 6
1803 * bytes preallocated for 2 single char nodes, i.e. when
1804 * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
1805 * GetCompactIndexWidth(p) > 4.
1806 * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
1807 * nodes strictly decreases bytecode size, the check has to be
1808 * done only for the first coalescing.
1811 GetCompactIndexWidth((jschar
*)t
->kid
- state
->cpbegin
) <= 4)
1814 t
->next
->op
== REOP_FLAT
&&
1815 (jschar
*)t
->kid
+ t
->u
.flat
.length
==
1816 (jschar
*)t
->next
->kid
) {
1817 t
->u
.flat
.length
+= t
->next
->u
.flat
.length
;
1818 t
->next
= t
->next
->next
;
1821 if (t
->kid
&& t
->u
.flat
.length
> 1) {
1822 pc
[-1] = (state
->flags
& JSREG_FOLD
) ? REOP_FLATi
: REOP_FLAT
;
1823 pc
= WriteCompactIndex(pc
, (jschar
*)t
->kid
- state
->cpbegin
);
1824 pc
= WriteCompactIndex(pc
, t
->u
.flat
.length
);
1825 } else if (t
->u
.flat
.chr
< 256) {
1826 pc
[-1] = (state
->flags
& JSREG_FOLD
) ? REOP_FLAT1i
: REOP_FLAT1
;
1827 *pc
++ = (jsbytecode
) t
->u
.flat
.chr
;
1829 pc
[-1] = (state
->flags
& JSREG_FOLD
)
1832 SET_ARG(pc
, t
->u
.flat
.chr
);
1838 JS_ASSERT(emitStateSP
);
1839 pc
= WriteCompactIndex(pc
, t
->u
.parenIndex
);
1840 emitStateSP
->continueNode
= t
;
1841 emitStateSP
->continueOp
= REOP_RPAREN
;
1843 JS_ASSERT((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
1844 t
= (RENode
*) t
->kid
;
1849 pc
= WriteCompactIndex(pc
, t
->u
.parenIndex
);
1853 pc
= WriteCompactIndex(pc
, t
->u
.parenIndex
);
1857 JS_ASSERT(emitStateSP
);
1858 emitStateSP
->nextTermFixup
= pc
;
1860 emitStateSP
->continueNode
= t
;
1861 emitStateSP
->continueOp
= REOP_ASSERTTEST
;
1863 JS_ASSERT((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
1864 t
= (RENode
*) t
->kid
;
1868 case REOP_ASSERTTEST
:
1869 case REOP_ASSERTNOTTEST
:
1870 if (!SetForwardJumpOffset(emitStateSP
->nextTermFixup
, pc
))
1874 case REOP_ASSERT_NOT
:
1875 JS_ASSERT(emitStateSP
);
1876 emitStateSP
->nextTermFixup
= pc
;
1878 emitStateSP
->continueNode
= t
;
1879 emitStateSP
->continueOp
= REOP_ASSERTNOTTEST
;
1881 JS_ASSERT((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
1882 t
= (RENode
*) t
->kid
;
1887 JS_ASSERT(emitStateSP
);
1888 if (t
->u
.range
.min
== 0 && t
->u
.range
.max
== (uintN
)-1) {
1889 pc
[-1] = (t
->u
.range
.greedy
) ? REOP_STAR
: REOP_MINIMALSTAR
;
1890 } else if (t
->u
.range
.min
== 0 && t
->u
.range
.max
== 1) {
1891 pc
[-1] = (t
->u
.range
.greedy
) ? REOP_OPT
: REOP_MINIMALOPT
;
1892 } else if (t
->u
.range
.min
== 1 && t
->u
.range
.max
== (uintN
) -1) {
1893 pc
[-1] = (t
->u
.range
.greedy
) ? REOP_PLUS
: REOP_MINIMALPLUS
;
1895 if (!t
->u
.range
.greedy
)
1896 pc
[-1] = REOP_MINIMALQUANT
;
1897 pc
= WriteCompactIndex(pc
, t
->u
.range
.min
);
1899 * Write max + 1 to avoid using size_t(max) + 1 bytes
1900 * for (uintN)-1 sentinel.
1902 pc
= WriteCompactIndex(pc
, t
->u
.range
.max
+ 1);
1904 emitStateSP
->nextTermFixup
= pc
;
1906 emitStateSP
->continueNode
= t
;
1907 emitStateSP
->continueOp
= REOP_ENDCHILD
;
1909 JS_ASSERT((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
1910 t
= (RENode
*) t
->kid
;
1915 if (!SetForwardJumpOffset(emitStateSP
->nextTermFixup
, pc
))
1920 if (!t
->u
.ucclass
.sense
)
1921 pc
[-1] = REOP_NCLASS
;
1922 pc
= WriteCompactIndex(pc
, t
->u
.ucclass
.index
);
1923 InitNodeCharSet(re
, t
);
1934 if (emitStateSP
== emitStateStack
)
1937 t
= emitStateSP
->continueNode
;
1938 op
= (REOp
) emitStateSP
->continueOp
;
1944 JS_free(state
->context
, emitStateStack
);
1948 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_REGEXP_TOO_COMPLEX
);
1954 CompileRegExpToAST(JSContext
* cx
, JSTokenStream
* ts
,
1955 JSString
* str
, uintN flags
, CompilerState
& state
)
1960 len
= JSSTRING_LENGTH(str
);
1963 state
.tokenStream
= ts
;
1964 state
.cp
= js_UndependString(cx
, str
);
1967 state
.cpbegin
= state
.cp
;
1968 state
.cpend
= state
.cp
+ len
;
1969 state
.flags
= flags
;
1970 state
.parenCount
= 0;
1971 state
.classCount
= 0;
1972 state
.progLength
= 0;
1973 state
.treeDepth
= 0;
1974 state
.classBitmapsMem
= 0;
1975 for (i
= 0; i
< CLASS_CACHE_SIZE
; i
++)
1976 state
.classCache
[i
].start
= NULL
;
1978 if (len
!= 0 && (flags
& JSREG_FLAT
)) {
1979 state
.result
= NewRENode(&state
, REOP_FLAT
);
1982 state
.result
->u
.flat
.chr
= *state
.cpbegin
;
1983 state
.result
->u
.flat
.length
= len
;
1984 state
.result
->kid
= (void *) state
.cpbegin
;
1985 /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
1986 state
.progLength
+= 1 + GetCompactIndexWidth(0)
1987 + GetCompactIndexWidth(len
);
1991 return ParseRegExp(&state
);
1995 typedef List
<LIns
*, LIST_NonGCObjects
> LInsList
;
1997 /* Dummy GC for nanojit placement new. */
2001 HashRegExp(uint16 flags
, jschar
* s
, size_t n
)
2005 for (h
= 0; n
; s
++, n
--)
2006 h
= JS_ROTATE_LEFT32(h
, 4) ^ *s
;
2007 return (void*)(h
+ flags
);
2010 struct RESideExit
: public SideExit
{
2016 /* Return the cached fragment for the given regexp, or NULL. */
2018 LookupNativeRegExp(JSContext
* cx
, void* hash
, uint16 re_flags
,
2019 jschar
* re_chars
, size_t re_length
)
2021 Fragmento
* fragmento
= JS_TRACE_MONITOR(cx
).reFragmento
;
2022 Fragment
* fragment
= fragmento
->getLoop(hash
);
2024 if (fragment
->lastIns
) {
2025 RESideExit
* exit
= (RESideExit
*)fragment
->lastIns
->record()->exit
;
2026 if (exit
->re_flags
== re_flags
&&
2027 exit
->re_length
== re_length
&&
2028 !memcmp(exit
->re_chars
, re_chars
, re_length
)) {
2032 fragment
= fragment
->peer
;
2038 ProcessCharSet(JSContext
*cx
, JSRegExp
*re
, RECharSet
*charSet
);
2040 class RegExpNativeCompiler
{
2044 CompilerState
* cs
; /* RegExp to compile */
2048 LirBufWriter
* lirBufWriter
; /* for skip */
2054 JSBool
isCaseInsensitive() const { return cs
->flags
& JSREG_FOLD
; }
2056 JSBool
targetCurrentPoint(LIns
* ins
)
2058 if (fragment
->lirbuf
->outOMem())
2060 ins
->target(lir
->ins0(LIR_label
));
2064 JSBool
targetCurrentPoint(LInsList
& fails
)
2066 if (fragment
->lirbuf
->outOMem())
2068 LIns
* fail
= lir
->ins0(LIR_label
);
2069 for (size_t i
= 0; i
< fails
.size(); ++i
) {
2070 fails
[i
]->target(fail
);
2077 * These functions return the new position after their match operation,
2078 * or NULL if there was an error.
2080 LIns
* compileEmpty(RENode
* node
, LIns
* pos
, LInsList
& fails
)
2085 LIns
* compileFlatSingleChar(jschar ch
, LIns
* pos
, LInsList
& fails
)
2088 * Fast case-insensitive test for ASCII letters: convert text
2089 * char to lower case by bit-or-ing in 32 and compare.
2091 JSBool useFastCI
= JS_FALSE
;
2092 jschar ch2
= ch
; /* 2nd char to test for if ci */
2093 if (cs
->flags
& JSREG_FOLD
) {
2094 if ((L
'A' <= ch
&& ch
<= L
'Z') || (L
'a' <= ch
&& ch
<= L
'z')) {
2097 useFastCI
= JS_TRUE
;
2098 } else if (JS_TOLOWER(ch
) != ch
) {
2099 ch2
= JS_TOLOWER(ch
);
2100 ch
= JS_TOUPPER(ch
);
2104 LIns
* to_fail
= lir
->insBranch(LIR_jf
, lir
->ins2(LIR_lt
, pos
, cpend
), 0);
2106 LIns
* text_ch
= lir
->insLoad(LIR_ldcs
, pos
, lir
->insImm(0));
2107 LIns
* comp_ch
= useFastCI
?
2108 lir
->ins2(LIR_or
, text_ch
, lir
->insImm(32)) :
2111 fails
.add(lir
->insBranch(LIR_jf
, lir
->ins2(LIR_eq
, comp_ch
, lir
->insImm(ch
)), 0));
2113 LIns
* to_ok
= lir
->insBranch(LIR_jt
, lir
->ins2(LIR_eq
, comp_ch
, lir
->insImm(ch
)), 0);
2114 fails
.add(lir
->insBranch(LIR_jf
, lir
->ins2(LIR_eq
, comp_ch
, lir
->insImm(ch2
)), 0));
2115 if (!targetCurrentPoint(to_ok
))
2119 return lir
->ins2(LIR_piadd
, pos
, lir
->insImm(2));
2122 LIns
* compileClass(RENode
* node
, LIns
* pos
, LInsList
& fails
)
2124 if (!node
->u
.ucclass
.sense
)
2127 * If we share generated native code, we need to make a copy
2128 * of the bitmap because the original regexp's copy is destroyed
2129 * when that regexp is.
2131 RECharSet
*charSet
= &re
->classList
[node
->u
.ucclass
.index
];
2132 size_t bitmapLen
= (charSet
->length
>> 3) + 1;
2133 /* skip() can't hold large data blocks. */
2134 if (bitmapLen
> 1024)
2136 /* The following line allocates charSet.u.bits if successful. */
2137 if (!charSet
->converted
&& !ProcessCharSet(cx
, re
, charSet
))
2139 LIns
* skip
= lirBufWriter
->skip(bitmapLen
);
2140 if (fragment
->lirbuf
->outOMem())
2142 void* bitmapData
= skip
->payload();
2143 memcpy(bitmapData
, charSet
->u
.bits
, bitmapLen
);
2145 LIns
* to_fail
= lir
->insBranch(LIR_jf
, lir
->ins2(LIR_lt
, pos
, cpend
), 0);
2147 LIns
* text_ch
= lir
->insLoad(LIR_ldcs
, pos
, lir
->insImm(0));
2148 fails
.add(lir
->insBranch(LIR_jf
, lir
->ins2(LIR_le
, text_ch
, lir
->insImm(charSet
->length
)), 0));
2149 LIns
* byteIndex
= lir
->ins2(LIR_rsh
, text_ch
, lir
->insImm(3));
2150 LIns
* bitmap
= lir
->insImmPtr(bitmapData
);
2151 LIns
* byte
= lir
->insLoad(LIR_ldcb
, lir
->ins2(LIR_piadd
, bitmap
, byteIndex
), (int) 0);
2152 LIns
* bitMask
= lir
->ins2(LIR_lsh
, lir
->insImm(1),
2153 lir
->ins2(LIR_and
, text_ch
, lir
->insImm(0x7)));
2154 LIns
* test
= lir
->ins2(LIR_eq
, lir
->ins2(LIR_and
, byte
, bitMask
), lir
->insImm(0));
2156 LIns
* to_next
= lir
->insBranch(LIR_jt
, test
, 0);
2158 return lir
->ins2(LIR_piadd
, pos
, lir
->insImm(2));
2161 LIns
* compileAlt(RENode
* node
, LIns
* pos
, LInsList
& fails
)
2163 LInsList
kidFails(NULL
);
2164 if (!compileNode((RENode
*) node
->kid
, pos
, kidFails
))
2166 if (!compileNode(node
->next
, pos
, kidFails
))
2169 if (!targetCurrentPoint(kidFails
))
2171 if (!compileNode(node
->u
.altprereq
.kid2
, pos
, fails
))
2174 * Disable compilation for any regexp where something follows an
2175 * alternation. To make this work, we need to redesign to either
2176 * (a) pass around continuations so we know the right place to go
2177 * when we logically return, or (b) generate explicit backtracking
2185 JSBool
compileNode(RENode
* node
, LIns
* pos
, LInsList
& fails
)
2187 for (; node
; node
= node
->next
) {
2188 if (fragment
->lirbuf
->outOMem())
2193 pos
= compileEmpty(node
, pos
, fails
);
2196 if (node
->u
.flat
.length
== 1) {
2197 pos
= compileFlatSingleChar(node
->u
.flat
.chr
, pos
, fails
);
2199 for (size_t i
= 0; i
< node
->u
.flat
.length
; ++i
) {
2200 if (fragment
->lirbuf
->outOMem())
2202 pos
= compileFlatSingleChar(((jschar
*) node
->kid
)[i
], pos
, fails
);
2208 case REOP_ALTPREREQ
:
2209 pos
= compileAlt(node
, pos
, fails
);
2212 pos
= compileClass(node
, pos
, fails
);
2221 lir
->insStorei(pos
, state
, (int) offsetof(REMatchState
, cp
));
2222 lir
->ins1(LIR_ret
, state
);
2226 JSBool
compileSticky(RENode
* root
, LIns
* start
)
2228 LInsList
fails(NULL
);
2229 if (!compileNode(root
, start
, fails
))
2231 if (!targetCurrentPoint(fails
))
2233 lir
->ins1(LIR_ret
, lir
->insImm(0));
2237 JSBool
compileAnchoring(RENode
* root
, LIns
* start
)
2239 /* Even at the end, the empty regexp would match. */
2240 LIns
* to_next
= lir
->insBranch(LIR_jf
,
2241 lir
->ins2(LIR_le
, start
, cpend
), 0);
2242 LInsList
fails(NULL
);
2243 if (!compileNode(root
, start
, fails
))
2246 if (!targetCurrentPoint(to_next
))
2248 lir
->ins1(LIR_ret
, lir
->insImm(0));
2250 if (!targetCurrentPoint(fails
))
2252 lir
->insStorei(lir
->ins2(LIR_piadd
, start
, lir
->insImm(2)), gdata
,
2253 (int) offsetof(REGlobalData
, skipped
));
2259 addName(LirBuffer
* lirbuf
, LIns
* ins
, const char* name
)
2262 debug_only_v(lirbuf
->names
->addName(ins
, name
);)
2268 * Insert the side exit and guard record for a compiled regexp. Most
2269 * of the fields are not used. The important part is the regexp source
2270 * and flags, which we use as the fragment lookup key.
2272 GuardRecord
* insertGuard(jschar
* re_chars
, size_t re_length
)
2274 LIns
* skip
= lirBufWriter
->skip(sizeof(GuardRecord
) +
2275 sizeof(RESideExit
) +
2276 re_length
- sizeof(jschar
));
2277 GuardRecord
* guard
= (GuardRecord
*) skip
->payload();
2278 memset(guard
, 0, sizeof(*guard
));
2279 RESideExit
* exit
= (RESideExit
*)(guard
+1);
2281 guard
->exit
->target
= fragment
;
2282 exit
->re_flags
= re
->flags
;
2283 exit
->re_length
= re_length
;
2284 memcpy(exit
->re_chars
, re_chars
, re_length
);
2285 fragment
->lastIns
= lir
->insGuard(LIR_loop
, lir
->insImm(1), skip
);
2290 RegExpNativeCompiler(JSRegExp
* re
, CompilerState
* cs
, Fragment
* fragment
)
2291 : re(re
), cs(cs
), fragment(fragment
), lir(NULL
), lirBufWriter(NULL
) { }
2293 JSBool
compile(JSContext
* cx
)
2295 GuardRecord
* guard
= NULL
;
2300 Fragmento
* fragmento
= JS_TRACE_MONITOR(cx
).reFragmento
;
2302 JSSTRING_CHARS_AND_LENGTH(re
->source
, re_chars
, re_length
);
2304 * If the regexp is too long nanojit will assert when we
2305 * try to insert the guard record.
2307 if (re_length
> 1024)
2311 /* At this point we have an empty fragment. */
2312 LirBuffer
* lirbuf
= fragment
->lirbuf
;
2313 if (lirbuf
->outOMem())
2315 /* FIXME Use bug 463260 smart pointer when available. */
2316 lir
= lirBufWriter
= new (&gc
) LirBufWriter(lirbuf
);
2318 /* FIXME Use bug 463260 smart pointer when available. */
2320 debug_only_v(fragment
->lirbuf
->names
= new (&gc
) LirNameMap(&gc
, NULL
, fragmento
->labels
);)
2322 /* FIXME Use bug 463260 smart pointer when available. */
2324 debug_only_v(lir
= new (&gc
) VerboseWriter(&gc
, lir
, lirbuf
->names
);)
2327 lir
->ins0(LIR_start
);
2328 lirbuf
->state
= state
= addName(lirbuf
, lir
->insParam(0, 0), "state");
2329 lirbuf
->param1
= gdata
= addName(lirbuf
, lir
->insParam(1, 0), "gdata");
2330 start
= addName(lirbuf
, lir
->insLoad(LIR_ldp
, lirbuf
->param1
, (int) offsetof(REGlobalData
, skipped
)), "start");
2331 cpend
= addName(lirbuf
, lir
->insLoad(LIR_ldp
, lirbuf
->param1
, offsetof(REGlobalData
, cpend
)), "cpend");
2333 if (cs
->flags
& JSREG_STICKY
) {
2334 if (!compileSticky(cs
->result
, start
))
2337 if (!compileAnchoring(cs
->result
, start
))
2341 guard
= insertGuard(re_chars
, re_length
);
2343 if (lirbuf
->outOMem())
2345 ::compile(fragmento
->assm(), fragment
);
2346 if (fragmento
->assm()->error() != nanojit::None
) {
2347 oom
= fragmento
->assm()->error() == nanojit::OutOMem
;
2351 delete lirBufWriter
;
2353 debug_only_v(delete lir
;)
2357 if (lirbuf
->outOMem() || oom
) {
2358 fragmento
->clearFrags();
2361 if (!guard
) insertGuard(re_chars
, re_length
);
2362 fragment
->blacklist();
2364 delete lirBufWriter
;
2366 debug_only_v(delete lir
;)
2373 * Compile a regexp to native code in the given fragment.
2375 static inline JSBool
2376 CompileRegExpToNative(JSContext
* cx
, JSRegExp
* re
, Fragment
* fragment
)
2378 JSBool rv
= JS_FALSE
;
2380 CompilerState state
;
2381 RegExpNativeCompiler
rc(re
, &state
, fragment
);
2383 JS_ASSERT(!fragment
->code());
2384 JS_ASSERT(!fragment
->isBlacklisted());
2386 mark
= JS_ARENA_MARK(&cx
->tempPool
);
2387 if (!CompileRegExpToAST(cx
, NULL
, re
->source
, re
->flags
, state
)) {
2390 rv
= rc
.compile(cx
);
2392 JS_ARENA_RELEASE(&cx
->tempPool
, mark
);
2396 /* Function type for a compiled native regexp. */
2397 typedef REMatchState
* (FASTCALL
*NativeRegExp
)(REMatchState
*, REGlobalData
*);
2400 * Return a compiled native regexp if one already exists or can be created
2401 * now, or NULL otherwise.
2404 GetNativeRegExp(JSContext
* cx
, JSRegExp
* re
)
2409 Fragmento
* fragmento
= JS_TRACE_MONITOR(cx
).reFragmento
;
2411 JSSTRING_CHARS_AND_LENGTH(re
->source
, re_chars
, re_length
);
2412 void* hash
= HashRegExp(re
->flags
, re_chars
, re_length
);
2413 fragment
= LookupNativeRegExp(cx
, hash
, re
->flags
, re_chars
, re_length
);
2415 if (fragment
->code())
2417 if (fragment
->isBlacklisted())
2420 fragment
= fragmento
->getAnchor(hash
);
2421 fragment
->lirbuf
= JS_TRACE_MONITOR(cx
).reLirBuf
;
2422 fragment
->root
= fragment
;
2425 if (!CompileRegExpToNative(cx
, re
, fragment
))
2428 union { NIns
*code
; NativeRegExp func
; } u
;
2429 u
.code
= fragment
->code();
2435 js_NewRegExp(JSContext
*cx
, JSTokenStream
*ts
,
2436 JSString
*str
, uintN flags
, JSBool flat
)
2440 CompilerState state
;
2446 mark
= JS_ARENA_MARK(&cx
->tempPool
);
2449 * Parsing the string as flat is now expressed internally using
2450 * a flag, so that we keep this information in the JSRegExp, but
2451 * we keep the 'flat' parameter for now for compatibility.
2453 if (flat
) flags
|= JSREG_FLAT
;
2454 if (!CompileRegExpToAST(cx
, ts
, str
, flags
, state
))
2457 resize
= offsetof(JSRegExp
, program
) + state
.progLength
+ 1;
2458 re
= (JSRegExp
*) JS_malloc(cx
, resize
);
2463 JS_ASSERT(state
.classBitmapsMem
<= CLASS_BITMAPS_MEM_LIMIT
);
2464 re
->classCount
= state
.classCount
;
2465 if (re
->classCount
) {
2466 re
->classList
= (RECharSet
*)
2467 JS_malloc(cx
, re
->classCount
* sizeof(RECharSet
));
2468 if (!re
->classList
) {
2469 js_DestroyRegExp(cx
, re
);
2473 for (i
= 0; i
< re
->classCount
; i
++)
2474 re
->classList
[i
].converted
= JS_FALSE
;
2476 re
->classList
= NULL
;
2479 /* Compile the bytecode version. */
2480 endPC
= EmitREBytecode(&state
, re
, state
.treeDepth
, re
->program
, state
.result
);
2482 js_DestroyRegExp(cx
, re
);
2486 *endPC
++ = REOP_END
;
2488 * Check whether size was overestimated and shrink using realloc.
2489 * This is safe since no pointers to newly parsed regexp or its parts
2490 * besides re exist here.
2492 if ((size_t)(endPC
- re
->program
) != state
.progLength
+ 1) {
2494 JS_ASSERT((size_t)(endPC
- re
->program
) < state
.progLength
+ 1);
2495 resize
= offsetof(JSRegExp
, program
) + (endPC
- re
->program
);
2496 tmp
= (JSRegExp
*) JS_realloc(cx
, re
, resize
);
2502 re
->parenCount
= state
.parenCount
;
2506 JS_ARENA_RELEASE(&cx
->tempPool
, mark
);
2511 js_NewRegExpOpt(JSContext
*cx
, JSString
*str
, JSString
*opt
, JSBool flat
)
2520 JSSTRING_CHARS_AND_LENGTH(opt
, s
, n
);
2521 for (i
= 0; i
< n
; i
++) {
2524 flags
|= JSREG_GLOB
;
2527 flags
|= JSREG_FOLD
;
2530 flags
|= JSREG_MULTILINE
;
2533 flags
|= JSREG_STICKY
;
2536 charBuf
[0] = (char)s
[i
];
2538 JS_ReportErrorFlagsAndNumber(cx
, JSREPORT_ERROR
,
2539 js_GetErrorMessage
, NULL
,
2540 JSMSG_BAD_FLAG
, charBuf
);
2545 return js_NewRegExp(cx
, NULL
, str
, flags
, flat
);
2549 * Save the current state of the match - the position in the input
2550 * text as well as the position in the bytecode. The state of any
2551 * parent expressions is also saved (preceding state).
2552 * Contents of parenCount parentheses from parenIndex are also saved.
2554 static REBackTrackData
*
2555 PushBackTrackState(REGlobalData
*gData
, REOp op
,
2556 jsbytecode
*target
, REMatchState
*x
, const jschar
*cp
,
2557 size_t parenIndex
, size_t parenCount
)
2560 REBackTrackData
*result
=
2561 (REBackTrackData
*) ((char *)gData
->backTrackSP
+ gData
->cursz
);
2563 size_t sz
= sizeof(REBackTrackData
) +
2564 gData
->stateStackTop
* sizeof(REProgState
) +
2565 parenCount
* sizeof(RECapture
);
2567 ptrdiff_t btsize
= gData
->backTrackStackSize
;
2568 ptrdiff_t btincr
= ((char *)result
+ sz
) -
2569 ((char *)gData
->backTrackStack
+ btsize
);
2571 re_debug("\tBT_Push: %lu,%lu",
2572 (unsigned long) parenIndex
, (unsigned long) parenCount
);
2574 JS_COUNT_OPERATION(gData
->cx
, JSOW_JUMP
* (1 + parenCount
));
2576 ptrdiff_t offset
= (char *)result
- (char *)gData
->backTrackStack
;
2578 JS_COUNT_OPERATION(gData
->cx
, JSOW_ALLOCATION
);
2579 btincr
= JS_ROUNDUP(btincr
, btsize
);
2580 JS_ARENA_GROW_CAST(gData
->backTrackStack
, REBackTrackData
*,
2581 &gData
->cx
->regexpPool
, btsize
, btincr
);
2582 if (!gData
->backTrackStack
) {
2583 js_ReportOutOfScriptQuota(gData
->cx
);
2584 gData
->ok
= JS_FALSE
;
2587 gData
->backTrackStackSize
= btsize
+ btincr
;
2588 result
= (REBackTrackData
*) ((char *)gData
->backTrackStack
+ offset
);
2590 gData
->backTrackSP
= result
;
2591 result
->sz
= gData
->cursz
;
2594 result
->backtrack_op
= op
;
2595 result
->backtrack_pc
= target
;
2597 result
->parenCount
= parenCount
;
2598 result
->parenIndex
= parenIndex
;
2600 result
->saveStateStackTop
= gData
->stateStackTop
;
2601 JS_ASSERT(gData
->stateStackTop
);
2602 memcpy(result
+ 1, gData
->stateStack
,
2603 sizeof(REProgState
) * result
->saveStateStackTop
);
2605 if (parenCount
!= 0) {
2606 memcpy((char *)(result
+ 1) +
2607 sizeof(REProgState
) * result
->saveStateStackTop
,
2608 &x
->parens
[parenIndex
],
2609 sizeof(RECapture
) * parenCount
);
2610 for (i
= 0; i
!= parenCount
; i
++)
2611 x
->parens
[parenIndex
+ i
].index
= -1;
2619 * Consecutive literal characters.
2622 static REMatchState
*
2623 FlatNMatcher(REGlobalData
*gData
, REMatchState
*x
, jschar
*matchChars
,
2627 if (length
> gData
->cpend
- x
->cp
)
2629 for (i
= 0; i
!= length
; i
++) {
2630 if (matchChars
[i
] != x
->cp
[i
])
2638 static JS_ALWAYS_INLINE REMatchState
*
2639 FlatNIMatcher(REGlobalData
*gData
, REMatchState
*x
, jschar
*matchChars
,
2643 JS_ASSERT(gData
->cpend
>= x
->cp
);
2644 if (length
> (size_t)(gData
->cpend
- x
->cp
))
2646 for (i
= 0; i
!= length
; i
++) {
2647 if (upcase(matchChars
[i
]) != upcase(x
->cp
[i
]))
2655 * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2656 * 2. If E is not a character then go to step 6.
2657 * 3. Let ch be E's character.
2658 * 4. Let A be a one-element RECharSet containing the character ch.
2659 * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2660 * 6. E must be an integer. Let n be that integer.
2661 * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2662 * 8. Return an internal Matcher closure that takes two arguments, a State x
2663 * and a Continuation c, and performs the following:
2664 * 1. Let cap be x's captures internal array.
2665 * 2. Let s be cap[n].
2666 * 3. If s is undefined, then call c(x) and return its result.
2667 * 4. Let e be x's endIndex.
2668 * 5. Let len be s's length.
2669 * 6. Let f be e+len.
2670 * 7. If f>InputLength, return failure.
2671 * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2672 * such that Canonicalize(s[i]) is not the same character as
2673 * Canonicalize(Input [e+i]), then return failure.
2674 * 9. Let y be the State (f, cap).
2675 * 10. Call c(y) and return its result.
2677 static REMatchState
*
2678 BackrefMatcher(REGlobalData
*gData
, REMatchState
*x
, size_t parenIndex
)
2681 const jschar
*parenContent
;
2682 RECapture
*cap
= &x
->parens
[parenIndex
];
2684 if (cap
->index
== -1)
2688 if (x
->cp
+ len
> gData
->cpend
)
2691 parenContent
= &gData
->cpbegin
[cap
->index
];
2692 if (gData
->regexp
->flags
& JSREG_FOLD
) {
2693 for (i
= 0; i
< len
; i
++) {
2694 if (upcase(parenContent
[i
]) != upcase(x
->cp
[i
]))
2698 for (i
= 0; i
< len
; i
++) {
2699 if (parenContent
[i
] != x
->cp
[i
])
2708 /* Add a single character to the RECharSet */
2710 AddCharacterToCharSet(RECharSet
*cs
, jschar c
)
2712 uintN byteIndex
= (uintN
)(c
>> 3);
2713 JS_ASSERT(c
<= cs
->length
);
2714 cs
->u
.bits
[byteIndex
] |= 1 << (c
& 0x7);
2718 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2720 AddCharacterRangeToCharSet(RECharSet
*cs
, uintN c1
, uintN c2
)
2724 uintN byteIndex1
= c1
>> 3;
2725 uintN byteIndex2
= c2
>> 3;
2727 JS_ASSERT(c2
<= cs
->length
&& c1
<= c2
);
2732 if (byteIndex1
== byteIndex2
) {
2733 cs
->u
.bits
[byteIndex1
] |= ((uint8
)0xFF >> (7 - (c2
- c1
))) << c1
;
2735 cs
->u
.bits
[byteIndex1
] |= 0xFF << c1
;
2736 for (i
= byteIndex1
+ 1; i
< byteIndex2
; i
++)
2737 cs
->u
.bits
[i
] = 0xFF;
2738 cs
->u
.bits
[byteIndex2
] |= (uint8
)0xFF >> (7 - c2
);
2742 struct CharacterRange
{
2748 * The following characters are taken from the ECMA-262 standard, section 7.2
2749 * and 7.3, and the Unicode 3 standard, Table 6-1.
2751 static const CharacterRange WhiteSpaceRanges
[] = {
2752 /* TAB, LF, VT, FF, CR */
2756 /* NO-BREAK SPACE */
2759 * EN QUAD, EM QUAD, EN SPACE, EM SPACE, THREE-PER-EM SPACE, FOUR-PER-EM
2760 * SPACE, SIX-PER-EM SPACE, FIGURE SPACE, PUNCTUATION SPACE, THIN SPACE,
2761 * HAIR SPACE, ZERO WIDTH SPACE
2766 /* NARROW NO-BREAK SPACE */
2768 /* IDEOGRAPHIC SPACE */
2772 /* ECMA-262 standard, section 15.10.2.6. */
2773 static const CharacterRange WordRanges
[] = {
2774 { jschar('0'), jschar('9') },
2775 { jschar('A'), jschar('Z') },
2776 { jschar('_'), jschar('_') },
2777 { jschar('a'), jschar('z') }
2781 AddCharacterRanges(RECharSet
*charSet
,
2782 const CharacterRange
*range
,
2783 const CharacterRange
*end
)
2785 for (; range
< end
; ++range
)
2786 AddCharacterRangeToCharSet(charSet
, range
->start
, range
->end
);
2790 AddInvertedCharacterRanges(RECharSet
*charSet
,
2791 const CharacterRange
*range
,
2792 const CharacterRange
*end
)
2794 uint16 previous
= 0;
2795 for (; range
< end
; ++range
) {
2796 AddCharacterRangeToCharSet(charSet
, previous
, range
->start
- 1);
2797 previous
= range
->end
+ 1;
2799 AddCharacterRangeToCharSet(charSet
, previous
, charSet
->length
);
2802 /* Compile the source of the class into a RECharSet */
2804 ProcessCharSet(JSContext
*cx
, JSRegExp
*re
, RECharSet
*charSet
)
2806 const jschar
*src
, *end
;
2807 JSBool inRange
= JS_FALSE
;
2808 jschar rangeStart
= 0;
2809 uintN byteLength
, n
;
2813 JS_ASSERT(!charSet
->converted
);
2815 * Assert that startIndex and length points to chars inside [] inside
2818 JS_ASSERT(1 <= charSet
->u
.src
.startIndex
);
2819 JS_ASSERT(charSet
->u
.src
.startIndex
2820 < JSSTRING_LENGTH(re
->source
));
2821 JS_ASSERT(charSet
->u
.src
.length
<= JSSTRING_LENGTH(re
->source
)
2822 - 1 - charSet
->u
.src
.startIndex
);
2824 charSet
->converted
= JS_TRUE
;
2825 src
= JSSTRING_CHARS(re
->source
) + charSet
->u
.src
.startIndex
;
2826 end
= src
+ charSet
->u
.src
.length
;
2827 JS_ASSERT(src
[-1] == '[');
2828 JS_ASSERT(end
[0] == ']');
2830 byteLength
= (charSet
->length
>> 3) + 1;
2831 charSet
->u
.bits
= (uint8
*)JS_malloc(cx
, byteLength
);
2832 if (!charSet
->u
.bits
) {
2833 JS_ReportOutOfMemory(cx
);
2836 memset(charSet
->u
.bits
, 0, byteLength
);
2842 JS_ASSERT(charSet
->sense
== JS_FALSE
);
2845 JS_ASSERT(charSet
->sense
== JS_TRUE
);
2848 while (src
!= end
) {
2873 if (src
< end
&& JS_ISWORD(*src
)) {
2874 thisCh
= (jschar
)(*src
++ & 0x1F);
2887 for (i
= 0; (i
< nDigits
) && (src
< end
); i
++) {
2890 if (!isASCIIHexDigit(c
, &digit
)) {
2892 * Back off to accepting the original '\'
2899 n
= (n
<< 4) | digit
;
2912 * This is a non-ECMA extension - decimal escapes (in this
2913 * case, octal!) are supposed to be an error inside class
2914 * ranges, but supported here for backwards compatibility.
2918 if ('0' <= c
&& c
<= '7') {
2920 n
= 8 * n
+ JS7_UNDEC(c
);
2922 if ('0' <= c
&& c
<= '7') {
2924 i
= 8 * n
+ JS7_UNDEC(c
);
2935 AddCharacterRangeToCharSet(charSet
, '0', '9');
2936 continue; /* don't need range processing */
2938 AddCharacterRangeToCharSet(charSet
, 0, '0' - 1);
2939 AddCharacterRangeToCharSet(charSet
,
2941 (jschar
)charSet
->length
);
2944 AddCharacterRanges(charSet
, WhiteSpaceRanges
,
2945 WhiteSpaceRanges
+ JS_ARRAY_LENGTH(WhiteSpaceRanges
));
2948 AddInvertedCharacterRanges(charSet
, WhiteSpaceRanges
,
2949 WhiteSpaceRanges
+ JS_ARRAY_LENGTH(WhiteSpaceRanges
));
2952 AddCharacterRanges(charSet
, WordRanges
,
2953 WordRanges
+ JS_ARRAY_LENGTH(WordRanges
));
2956 AddInvertedCharacterRanges(charSet
, WordRanges
,
2957 WordRanges
+ JS_ARRAY_LENGTH(WordRanges
));
2972 if (re
->flags
& JSREG_FOLD
) {
2975 JS_ASSERT(rangeStart
<= thisCh
);
2976 for (i
= rangeStart
; i
<= thisCh
; i
++) {
2979 AddCharacterToCharSet(charSet
, i
);
2983 AddCharacterToCharSet(charSet
, uch
);
2985 AddCharacterToCharSet(charSet
, dch
);
2988 AddCharacterRangeToCharSet(charSet
, rangeStart
, thisCh
);
2992 if (re
->flags
& JSREG_FOLD
) {
2993 AddCharacterToCharSet(charSet
, upcase(thisCh
));
2994 AddCharacterToCharSet(charSet
, downcase(thisCh
));
2996 AddCharacterToCharSet(charSet
, thisCh
);
2998 if (src
< end
- 1) {
3002 rangeStart
= thisCh
;
3010 static inline JSBool
3011 MatcherProcessCharSet(REGlobalData
*gData
, RECharSet
*charSet
) {
3012 JSBool rv
= ProcessCharSet(gData
->cx
, gData
->regexp
, charSet
);
3013 if (!rv
) gData
->ok
= JS_FALSE
;
3018 js_DestroyRegExp(JSContext
*cx
, JSRegExp
*re
)
3020 if (JS_ATOMIC_DECREMENT(&re
->nrefs
) == 0) {
3021 if (re
->classList
) {
3023 for (i
= 0; i
< re
->classCount
; i
++) {
3024 if (re
->classList
[i
].converted
)
3025 JS_free(cx
, re
->classList
[i
].u
.bits
);
3026 re
->classList
[i
].u
.bits
= NULL
;
3028 JS_free(cx
, re
->classList
);
3035 ReallocStateStack(REGlobalData
*gData
)
3037 size_t limit
= gData
->stateStackLimit
;
3038 size_t sz
= sizeof(REProgState
) * limit
;
3040 JS_ARENA_GROW_CAST(gData
->stateStack
, REProgState
*,
3041 &gData
->cx
->regexpPool
, sz
, sz
);
3042 if (!gData
->stateStack
) {
3043 js_ReportOutOfScriptQuota(gData
->cx
);
3044 gData
->ok
= JS_FALSE
;
3047 gData
->stateStackLimit
= limit
+ limit
;
3051 #define PUSH_STATE_STACK(data) \
3053 ++(data)->stateStackTop; \
3054 if ((data)->stateStackTop == (data)->stateStackLimit && \
3055 !ReallocStateStack((data))) { \
3061 * Apply the current op against the given input to see if it's going to match
3062 * or fail. Return false if we don't get a match, true if we do. If updatecp is
3063 * true, then update the current state's cp. Always update startpc to the next
3066 static JS_ALWAYS_INLINE REMatchState
*
3067 SimpleMatch(REGlobalData
*gData
, REMatchState
*x
, REOp op
,
3068 jsbytecode
**startpc
, JSBool updatecp
)
3070 REMatchState
*result
= NULL
;
3073 size_t offset
, length
, index
;
3074 jsbytecode
*pc
= *startpc
; /* pc has already been incremented past op */
3076 const jschar
*startcp
= x
->cp
;
3081 const char *opname
= reop_names
[op
];
3082 re_debug("\n%06d: %*s%s", pc
- gData
->regexp
->program
,
3083 gData
->stateStackTop
* 2, "", opname
);
3090 if (x
->cp
!= gData
->cpbegin
) {
3091 if (!gData
->cx
->regExpStatics
.multiline
&&
3092 !(gData
->regexp
->flags
& JSREG_MULTILINE
)) {
3095 if (!RE_IS_LINE_TERM(x
->cp
[-1]))
3101 if (x
->cp
!= gData
->cpend
) {
3102 if (!gData
->cx
->regExpStatics
.multiline
&&
3103 !(gData
->regexp
->flags
& JSREG_MULTILINE
)) {
3106 if (!RE_IS_LINE_TERM(*x
->cp
))
3112 if ((x
->cp
== gData
->cpbegin
|| !JS_ISWORD(x
->cp
[-1])) ^
3113 !(x
->cp
!= gData
->cpend
&& JS_ISWORD(*x
->cp
))) {
3118 if ((x
->cp
== gData
->cpbegin
|| !JS_ISWORD(x
->cp
[-1])) ^
3119 (x
->cp
!= gData
->cpend
&& JS_ISWORD(*x
->cp
))) {
3124 if (x
->cp
!= gData
->cpend
&& !RE_IS_LINE_TERM(*x
->cp
)) {
3130 if (x
->cp
!= gData
->cpend
&& JS7_ISDEC(*x
->cp
)) {
3136 if (x
->cp
!= gData
->cpend
&& !JS7_ISDEC(*x
->cp
)) {
3142 if (x
->cp
!= gData
->cpend
&& JS_ISWORD(*x
->cp
)) {
3148 if (x
->cp
!= gData
->cpend
&& !JS_ISWORD(*x
->cp
)) {
3154 if (x
->cp
!= gData
->cpend
&& JS_ISSPACE(*x
->cp
)) {
3160 if (x
->cp
!= gData
->cpend
&& !JS_ISSPACE(*x
->cp
)) {
3166 pc
= ReadCompactIndex(pc
, &parenIndex
);
3167 JS_ASSERT(parenIndex
< gData
->regexp
->parenCount
);
3168 result
= BackrefMatcher(gData
, x
, parenIndex
);
3171 pc
= ReadCompactIndex(pc
, &offset
);
3172 JS_ASSERT(offset
< JSSTRING_LENGTH(gData
->regexp
->source
));
3173 pc
= ReadCompactIndex(pc
, &length
);
3174 JS_ASSERT(1 <= length
);
3175 JS_ASSERT(length
<= JSSTRING_LENGTH(gData
->regexp
->source
) - offset
);
3176 if (length
<= (size_t)(gData
->cpend
- x
->cp
)) {
3177 source
= JSSTRING_CHARS(gData
->regexp
->source
) + offset
;
3178 re_debug_chars(source
, length
);
3179 for (index
= 0; index
!= length
; index
++) {
3180 if (source
[index
] != x
->cp
[index
])
3189 re_debug(" '%c' == '%c'", (char)matchCh
, (char)*x
->cp
);
3190 if (x
->cp
!= gData
->cpend
&& *x
->cp
== matchCh
) {
3196 pc
= ReadCompactIndex(pc
, &offset
);
3197 JS_ASSERT(offset
< JSSTRING_LENGTH(gData
->regexp
->source
));
3198 pc
= ReadCompactIndex(pc
, &length
);
3199 JS_ASSERT(1 <= length
);
3200 JS_ASSERT(length
<= JSSTRING_LENGTH(gData
->regexp
->source
) - offset
);
3201 source
= JSSTRING_CHARS(gData
->regexp
->source
);
3202 result
= FlatNIMatcher(gData
, x
, source
+ offset
, length
);
3206 if (x
->cp
!= gData
->cpend
&& upcase(*x
->cp
) == upcase(matchCh
)) {
3212 matchCh
= GET_ARG(pc
);
3213 re_debug(" '%c' == '%c'", (char)matchCh
, (char)*x
->cp
);
3215 if (x
->cp
!= gData
->cpend
&& *x
->cp
== matchCh
) {
3221 matchCh
= GET_ARG(pc
);
3223 if (x
->cp
!= gData
->cpend
&& upcase(*x
->cp
) == upcase(matchCh
)) {
3229 pc
= ReadCompactIndex(pc
, &index
);
3230 JS_ASSERT(index
< gData
->regexp
->classCount
);
3231 if (x
->cp
!= gData
->cpend
) {
3232 charSet
= &gData
->regexp
->classList
[index
];
3233 JS_ASSERT(charSet
->converted
);
3236 if (charSet
->length
!= 0 &&
3237 ch
<= charSet
->length
&&
3238 (charSet
->u
.bits
[index
] & (1 << (ch
& 0x7)))) {
3245 pc
= ReadCompactIndex(pc
, &index
);
3246 JS_ASSERT(index
< gData
->regexp
->classCount
);
3247 if (x
->cp
!= gData
->cpend
) {
3248 charSet
= &gData
->regexp
->classList
[index
];
3249 JS_ASSERT(charSet
->converted
);
3252 if (charSet
->length
== 0 ||
3253 ch
> charSet
->length
||
3254 !(charSet
->u
.bits
[index
] & (1 << (ch
& 0x7)))) {
3262 JS_ASSERT(JS_FALSE
);
3275 static JS_ALWAYS_INLINE REMatchState
*
3276 ExecuteREBytecode(REGlobalData
*gData
, REMatchState
*x
)
3278 REMatchState
*result
= NULL
;
3279 REBackTrackData
*backTrackData
;
3280 jsbytecode
*nextpc
, *testpc
;
3283 REProgState
*curState
;
3284 const jschar
*startcp
;
3285 size_t parenIndex
, k
;
3286 size_t parenSoFar
= 0;
3288 jschar matchCh1
, matchCh2
;
3292 jsbytecode
*pc
= gData
->regexp
->program
;
3293 REOp op
= (REOp
) *pc
++;
3296 * If the first node is a simple match, step the index into the string
3297 * until that match is made, or fail if it can't be found at all.
3299 if (REOP_IS_SIMPLE(op
) && !(gData
->regexp
->flags
& JSREG_STICKY
)) {
3301 while (x
->cp
<= gData
->cpend
) {
3302 nextpc
= pc
; /* reset back to start each time */
3303 result
= SimpleMatch(gData
, x
, op
, &nextpc
, JS_TRUE
);
3307 pc
= nextpc
; /* accept skip to next opcode */
3309 JS_ASSERT(op
< REOP_LIMIT
);
3321 const char *opname
= reop_names
[op
];
3322 re_debug("\n%06d: %*s%s", pc
- gData
->regexp
->program
,
3323 gData
->stateStackTop
* 2, "", opname
);
3325 if (REOP_IS_SIMPLE(op
)) {
3326 result
= SimpleMatch(gData
, x
, op
, &pc
, JS_TRUE
);
3328 curState
= &gData
->stateStack
[gData
->stateStackTop
];
3332 case REOP_ALTPREREQ2
:
3333 nextpc
= pc
+ GET_OFFSET(pc
); /* start of next op */
3335 matchCh2
= GET_ARG(pc
);
3340 if (x
->cp
!= gData
->cpend
) {
3341 if (*x
->cp
== matchCh2
)
3344 charSet
= &gData
->regexp
->classList
[k
];
3345 if (!charSet
->converted
&& !MatcherProcessCharSet(gData
, charSet
))
3349 if ((charSet
->length
== 0 ||
3350 matchCh1
> charSet
->length
||
3351 !(charSet
->u
.bits
[k
] & (1 << (matchCh1
& 0x7)))) ^
3359 case REOP_ALTPREREQ
:
3360 nextpc
= pc
+ GET_OFFSET(pc
); /* start of next op */
3362 matchCh1
= GET_ARG(pc
);
3364 matchCh2
= GET_ARG(pc
);
3366 if (x
->cp
== gData
->cpend
||
3367 (*x
->cp
!= matchCh1
&& *x
->cp
!= matchCh2
)) {
3371 /* else false thru... */
3375 nextpc
= pc
+ GET_OFFSET(pc
); /* start of next alternate */
3376 pc
+= ARG_LEN
; /* start of this alternate */
3377 curState
->parenSoFar
= parenSoFar
;
3378 PUSH_STATE_STACK(gData
);
3381 if (REOP_IS_SIMPLE(op
)) {
3382 if (!SimpleMatch(gData
, x
, op
, &pc
, JS_TRUE
)) {
3383 op
= (REOp
) *nextpc
++;
3390 nextop
= (REOp
) *nextpc
++;
3391 if (!PushBackTrackState(gData
, nextop
, nextpc
, x
, startcp
, 0, 0))
3396 * Occurs at (successful) end of REOP_ALT,
3400 * If we have not gotten a result here, it is because of an
3401 * empty match. Do the same thing REOP_EMPTY would do.
3406 --gData
->stateStackTop
;
3407 pc
+= GET_OFFSET(pc
);
3412 * Occurs at last (successful) end of REOP_ALT,
3416 * If we have not gotten a result here, it is because of an
3417 * empty match. Do the same thing REOP_EMPTY would do.
3422 --gData
->stateStackTop
;
3427 pc
= ReadCompactIndex(pc
, &parenIndex
);
3428 re_debug("[ %lu ]", (unsigned long) parenIndex
);
3429 JS_ASSERT(parenIndex
< gData
->regexp
->parenCount
);
3430 if (parenIndex
+ 1 > parenSoFar
)
3431 parenSoFar
= parenIndex
+ 1;
3432 x
->parens
[parenIndex
].index
= x
->cp
- gData
->cpbegin
;
3433 x
->parens
[parenIndex
].length
= 0;
3441 pc
= ReadCompactIndex(pc
, &parenIndex
);
3442 JS_ASSERT(parenIndex
< gData
->regexp
->parenCount
);
3443 cap
= &x
->parens
[parenIndex
];
3444 delta
= x
->cp
- (gData
->cpbegin
+ cap
->index
);
3445 cap
->length
= (delta
< 0) ? 0 : (size_t) delta
;
3453 nextpc
= pc
+ GET_OFFSET(pc
); /* start of term after ASSERT */
3454 pc
+= ARG_LEN
; /* start of ASSERT child */
3457 if (REOP_IS_SIMPLE(op
) &&
3458 !SimpleMatch(gData
, x
, op
, &testpc
, JS_FALSE
)) {
3462 curState
->u
.assertion
.top
=
3463 (char *)gData
->backTrackSP
- (char *)gData
->backTrackStack
;
3464 curState
->u
.assertion
.sz
= gData
->cursz
;
3465 curState
->index
= x
->cp
- gData
->cpbegin
;
3466 curState
->parenSoFar
= parenSoFar
;
3467 PUSH_STATE_STACK(gData
);
3468 if (!PushBackTrackState(gData
, REOP_ASSERTTEST
,
3469 nextpc
, x
, x
->cp
, 0, 0)) {
3474 case REOP_ASSERT_NOT
:
3475 nextpc
= pc
+ GET_OFFSET(pc
);
3479 if (REOP_IS_SIMPLE(op
) /* Note - fail to fail! */ &&
3480 SimpleMatch(gData
, x
, op
, &testpc
, JS_FALSE
) &&
3481 *testpc
== REOP_ASSERTNOTTEST
) {
3485 curState
->u
.assertion
.top
3486 = (char *)gData
->backTrackSP
-
3487 (char *)gData
->backTrackStack
;
3488 curState
->u
.assertion
.sz
= gData
->cursz
;
3489 curState
->index
= x
->cp
- gData
->cpbegin
;
3490 curState
->parenSoFar
= parenSoFar
;
3491 PUSH_STATE_STACK(gData
);
3492 if (!PushBackTrackState(gData
, REOP_ASSERTNOTTEST
,
3493 nextpc
, x
, x
->cp
, 0, 0)) {
3498 case REOP_ASSERTTEST
:
3499 --gData
->stateStackTop
;
3501 x
->cp
= gData
->cpbegin
+ curState
->index
;
3502 gData
->backTrackSP
=
3503 (REBackTrackData
*) ((char *)gData
->backTrackStack
+
3504 curState
->u
.assertion
.top
);
3505 gData
->cursz
= curState
->u
.assertion
.sz
;
3510 case REOP_ASSERTNOTTEST
:
3511 --gData
->stateStackTop
;
3513 x
->cp
= gData
->cpbegin
+ curState
->index
;
3514 gData
->backTrackSP
=
3515 (REBackTrackData
*) ((char *)gData
->backTrackStack
+
3516 curState
->u
.assertion
.top
);
3517 gData
->cursz
= curState
->u
.assertion
.sz
;
3518 result
= (!result
) ? x
: NULL
;
3521 curState
->u
.quantifier
.min
= 0;
3522 curState
->u
.quantifier
.max
= (uintN
)-1;
3525 curState
->u
.quantifier
.min
= 1;
3526 curState
->u
.quantifier
.max
= (uintN
)-1;
3529 curState
->u
.quantifier
.min
= 0;
3530 curState
->u
.quantifier
.max
= 1;
3533 pc
= ReadCompactIndex(pc
, &k
);
3534 curState
->u
.quantifier
.min
= k
;
3535 pc
= ReadCompactIndex(pc
, &k
);
3536 /* max is k - 1 to use one byte for (uintN)-1 sentinel. */
3537 curState
->u
.quantifier
.max
= k
- 1;
3538 JS_ASSERT(curState
->u
.quantifier
.min
3539 <= curState
->u
.quantifier
.max
);
3541 if (curState
->u
.quantifier
.max
== 0) {
3542 pc
= pc
+ GET_OFFSET(pc
);
3547 /* Step over <next> */
3548 nextpc
= pc
+ ARG_LEN
;
3549 op
= (REOp
) *nextpc
++;
3551 if (REOP_IS_SIMPLE(op
)) {
3552 if (!SimpleMatch(gData
, x
, op
, &nextpc
, JS_TRUE
)) {
3553 if (curState
->u
.quantifier
.min
== 0)
3557 pc
= pc
+ GET_OFFSET(pc
);
3560 op
= (REOp
) *nextpc
++;
3563 curState
->index
= startcp
- gData
->cpbegin
;
3564 curState
->continue_op
= REOP_REPEAT
;
3565 curState
->continue_pc
= pc
;
3566 curState
->parenSoFar
= parenSoFar
;
3567 PUSH_STATE_STACK(gData
);
3568 if (curState
->u
.quantifier
.min
== 0 &&
3569 !PushBackTrackState(gData
, REOP_REPEAT
, pc
, x
, startcp
,
3576 case REOP_ENDCHILD
: /* marks the end of a quantifier child */
3577 pc
= curState
[-1].continue_pc
;
3578 op
= (REOp
) curState
[-1].continue_op
;
3587 --gData
->stateStackTop
;
3589 /* Failed, see if we have enough children. */
3590 if (curState
->u
.quantifier
.min
== 0)
3594 if (curState
->u
.quantifier
.min
== 0 &&
3595 x
->cp
== gData
->cpbegin
+ curState
->index
) {
3596 /* matched an empty string, that'll get us nowhere */
3600 if (curState
->u
.quantifier
.min
!= 0)
3601 curState
->u
.quantifier
.min
--;
3602 if (curState
->u
.quantifier
.max
!= (uintN
) -1)
3603 curState
->u
.quantifier
.max
--;
3604 if (curState
->u
.quantifier
.max
== 0)
3606 nextpc
= pc
+ ARG_LEN
;
3607 nextop
= (REOp
) *nextpc
;
3609 if (REOP_IS_SIMPLE(nextop
)) {
3611 if (!SimpleMatch(gData
, x
, nextop
, &nextpc
, JS_TRUE
)) {
3612 if (curState
->u
.quantifier
.min
== 0)
3619 curState
->index
= startcp
- gData
->cpbegin
;
3620 PUSH_STATE_STACK(gData
);
3621 if (curState
->u
.quantifier
.min
== 0 &&
3622 !PushBackTrackState(gData
, REOP_REPEAT
,
3624 curState
->parenSoFar
,
3626 curState
->parenSoFar
)) {
3629 } while (*nextpc
== REOP_ENDCHILD
);
3632 parenSoFar
= curState
->parenSoFar
;
3637 pc
+= GET_OFFSET(pc
);
3640 case REOP_MINIMALSTAR
:
3641 curState
->u
.quantifier
.min
= 0;
3642 curState
->u
.quantifier
.max
= (uintN
)-1;
3643 goto minimalquantcommon
;
3644 case REOP_MINIMALPLUS
:
3645 curState
->u
.quantifier
.min
= 1;
3646 curState
->u
.quantifier
.max
= (uintN
)-1;
3647 goto minimalquantcommon
;
3648 case REOP_MINIMALOPT
:
3649 curState
->u
.quantifier
.min
= 0;
3650 curState
->u
.quantifier
.max
= 1;
3651 goto minimalquantcommon
;
3652 case REOP_MINIMALQUANT
:
3653 pc
= ReadCompactIndex(pc
, &k
);
3654 curState
->u
.quantifier
.min
= k
;
3655 pc
= ReadCompactIndex(pc
, &k
);
3656 /* See REOP_QUANT comments about k - 1. */
3657 curState
->u
.quantifier
.max
= k
- 1;
3658 JS_ASSERT(curState
->u
.quantifier
.min
3659 <= curState
->u
.quantifier
.max
);
3661 curState
->index
= x
->cp
- gData
->cpbegin
;
3662 curState
->parenSoFar
= parenSoFar
;
3663 PUSH_STATE_STACK(gData
);
3664 if (curState
->u
.quantifier
.min
!= 0) {
3665 curState
->continue_op
= REOP_MINIMALREPEAT
;
3666 curState
->continue_pc
= pc
;
3667 /* step over <next> */
3671 if (!PushBackTrackState(gData
, REOP_MINIMALREPEAT
,
3672 pc
, x
, x
->cp
, 0, 0)) {
3675 --gData
->stateStackTop
;
3676 pc
= pc
+ GET_OFFSET(pc
);
3681 case REOP_MINIMALREPEAT
:
3682 --gData
->stateStackTop
;
3685 re_debug("{%d,%d}", curState
->u
.quantifier
.min
,
3686 curState
->u
.quantifier
.max
);
3687 #define PREPARE_REPEAT() \
3689 curState->index = x->cp - gData->cpbegin; \
3690 curState->continue_op = REOP_MINIMALREPEAT; \
3691 curState->continue_pc = pc; \
3693 for (k = curState->parenSoFar; k < parenSoFar; k++) \
3694 x->parens[k].index = -1; \
3695 PUSH_STATE_STACK(gData); \
3696 op = (REOp) *pc++; \
3697 JS_ASSERT(op < REOP_LIMIT); \
3703 * Non-greedy failure - try to consume another child.
3705 if (curState
->u
.quantifier
.max
== (uintN
) -1 ||
3706 curState
->u
.quantifier
.max
> 0) {
3710 /* Don't need to adjust pc since we're going to pop. */
3713 if (curState
->u
.quantifier
.min
== 0 &&
3714 x
->cp
== gData
->cpbegin
+ curState
->index
) {
3715 /* Matched an empty string, that'll get us nowhere. */
3719 if (curState
->u
.quantifier
.min
!= 0)
3720 curState
->u
.quantifier
.min
--;
3721 if (curState
->u
.quantifier
.max
!= (uintN
) -1)
3722 curState
->u
.quantifier
.max
--;
3723 if (curState
->u
.quantifier
.min
!= 0) {
3727 curState
->index
= x
->cp
- gData
->cpbegin
;
3728 curState
->parenSoFar
= parenSoFar
;
3729 PUSH_STATE_STACK(gData
);
3730 if (!PushBackTrackState(gData
, REOP_MINIMALREPEAT
,
3732 curState
->parenSoFar
,
3733 parenSoFar
- curState
->parenSoFar
)) {
3736 --gData
->stateStackTop
;
3737 pc
= pc
+ GET_OFFSET(pc
);
3739 JS_ASSERT(op
< REOP_LIMIT
);
3742 JS_ASSERT(JS_FALSE
);
3749 * If the match failed and there's a backtrack option, take it.
3750 * Otherwise this is a complete and utter failure.
3753 if (gData
->cursz
== 0)
3755 if (!JS_CHECK_OPERATION_LIMIT(gData
->cx
, JSOW_JUMP
)) {
3756 gData
->ok
= JS_FALSE
;
3760 /* Potentially detect explosive regex here. */
3761 gData
->backTrackCount
++;
3762 if (gData
->backTrackLimit
&&
3763 gData
->backTrackCount
>= gData
->backTrackLimit
) {
3764 JS_ReportErrorNumber(gData
->cx
, js_GetErrorMessage
, NULL
,
3765 JSMSG_REGEXP_TOO_COMPLEX
);
3766 gData
->ok
= JS_FALSE
;
3770 backTrackData
= gData
->backTrackSP
;
3771 gData
->cursz
= backTrackData
->sz
;
3772 gData
->backTrackSP
=
3773 (REBackTrackData
*) ((char *)backTrackData
- backTrackData
->sz
);
3774 x
->cp
= backTrackData
->cp
;
3775 pc
= backTrackData
->backtrack_pc
;
3776 op
= (REOp
) backTrackData
->backtrack_op
;
3777 JS_ASSERT(op
< REOP_LIMIT
);
3778 gData
->stateStackTop
= backTrackData
->saveStateStackTop
;
3779 JS_ASSERT(gData
->stateStackTop
);
3781 memcpy(gData
->stateStack
, backTrackData
+ 1,
3782 sizeof(REProgState
) * backTrackData
->saveStateStackTop
);
3783 curState
= &gData
->stateStack
[gData
->stateStackTop
- 1];
3785 if (backTrackData
->parenCount
) {
3786 memcpy(&x
->parens
[backTrackData
->parenIndex
],
3787 (char *)(backTrackData
+ 1) +
3788 sizeof(REProgState
) * backTrackData
->saveStateStackTop
,
3789 sizeof(RECapture
) * backTrackData
->parenCount
);
3790 parenSoFar
= backTrackData
->parenIndex
+ backTrackData
->parenCount
;
3792 for (k
= curState
->parenSoFar
; k
< parenSoFar
; k
++)
3793 x
->parens
[k
].index
= -1;
3794 parenSoFar
= curState
->parenSoFar
;
3797 re_debug("\tBT_Pop: %ld,%ld",
3798 (unsigned long) backTrackData
->parenIndex
,
3799 (unsigned long) backTrackData
->parenCount
);
3805 * Continue with the expression.
3808 JS_ASSERT(op
< REOP_LIMIT
);
3820 static REMatchState
*
3821 MatchRegExp(REGlobalData
*gData
, REMatchState
*x
)
3823 REMatchState
*result
;
3824 const jschar
*cp
= x
->cp
;
3828 NativeRegExp native
;
3830 /* Run with native regexp if possible. */
3831 if (TRACING_ENABLED(gData
->cx
) &&
3832 (native
= GetNativeRegExp(gData
->cx
, gData
->regexp
))) {
3833 gData
->skipped
= (ptrdiff_t) x
->cp
;
3837 VOUCH_DOES_NOT_REQUIRE_STACK();
3838 JSStackFrame
*caller
= (JS_ON_TRACE(gData
->cx
))
3840 : js_GetScriptedCaller(gData
->cx
, NULL
);
3841 printf("entering REGEXP trace at %s:%u@%u, code: %p\n",
3842 caller
? caller
->script
->filename
: "<unknown>",
3843 caller
? js_FramePCToLineNumber(gData
->cx
, caller
) : 0,
3844 caller
? FramePCOffset(caller
) : 0,
3849 #if defined(JS_NO_FASTCALL) && defined(NANOJIT_IA32)
3850 SIMULATE_FASTCALL(result
, x
, gData
, native
);
3852 result
= native(x
, gData
);
3855 debug_only_v(printf("leaving REGEXP trace\n"));
3857 gData
->skipped
= ((const jschar
*) gData
->skipped
) - cp
;
3862 * Have to include the position beyond the last character
3863 * in order to detect end-of-input/line condition.
3865 for (cp2
= cp
; cp2
<= gData
->cpend
; cp2
++) {
3866 gData
->skipped
= cp2
- cp
;
3868 for (j
= 0; j
< gData
->regexp
->parenCount
; j
++)
3869 x
->parens
[j
].index
= -1;
3870 result
= ExecuteREBytecode(gData
, x
);
3871 if (!gData
->ok
|| result
|| (gData
->regexp
->flags
& JSREG_STICKY
))
3873 gData
->backTrackSP
= gData
->backTrackStack
;
3875 gData
->stateStackTop
= 0;
3876 cp2
= cp
+ gData
->skipped
;
3881 #define MIN_BACKTRACK_LIMIT 400000
3883 static REMatchState
*
3884 InitMatch(JSContext
*cx
, REGlobalData
*gData
, JSRegExp
*re
, size_t length
)
3886 REMatchState
*result
;
3889 gData
->backTrackStackSize
= INITIAL_BACKTRACK
;
3890 JS_ARENA_ALLOCATE_CAST(gData
->backTrackStack
, REBackTrackData
*,
3893 if (!gData
->backTrackStack
)
3896 gData
->backTrackSP
= gData
->backTrackStack
;
3898 gData
->backTrackCount
= 0;
3899 gData
->backTrackLimit
= 0;
3900 if (JS_GetOptions(cx
) & JSOPTION_RELIMIT
) {
3901 gData
->backTrackLimit
= length
* length
* length
; /* O(n^3) */
3902 if (gData
->backTrackLimit
< MIN_BACKTRACK_LIMIT
)
3903 gData
->backTrackLimit
= MIN_BACKTRACK_LIMIT
;
3906 gData
->stateStackLimit
= INITIAL_STATESTACK
;
3907 JS_ARENA_ALLOCATE_CAST(gData
->stateStack
, REProgState
*,
3909 sizeof(REProgState
) * INITIAL_STATESTACK
);
3910 if (!gData
->stateStack
)
3913 gData
->stateStackTop
= 0;
3916 gData
->ok
= JS_TRUE
;
3918 JS_ARENA_ALLOCATE_CAST(result
, REMatchState
*,
3920 offsetof(REMatchState
, parens
)
3921 + re
->parenCount
* sizeof(RECapture
));
3925 for (i
= 0; i
< re
->classCount
; i
++) {
3926 if (!re
->classList
[i
].converted
&&
3927 !MatcherProcessCharSet(gData
, &re
->classList
[i
])) {
3935 js_ReportOutOfScriptQuota(cx
);
3936 gData
->ok
= JS_FALSE
;
3941 js_ExecuteRegExp(JSContext
*cx
, JSRegExp
*re
, JSString
*str
, size_t *indexp
,
3942 JSBool test
, jsval
*rval
)
3945 REMatchState
*x
, *result
;
3947 const jschar
*cp
, *ep
;
3948 size_t i
, length
, start
;
3949 JSSubString
*morepar
;
3951 JSRegExpStatics
*res
;
3954 JSString
*parstr
, *matchstr
;
3957 RECapture
*parsub
= NULL
;
3962 * It's safe to load from cp because JSStrings have a zero at the end,
3963 * and we never let cp get beyond cpend.
3966 JSSTRING_CHARS_AND_LENGTH(str
, cp
, length
);
3970 gData
.cpend
= cp
+ length
;
3972 gData
.start
= start
;
3975 if (!cx
->regexpPool
.first
.next
) {
3977 * The first arena in the regexpPool must have a timestamp at its base.
3979 JS_ARENA_ALLOCATE_CAST(timestamp
, int64
*,
3980 &cx
->regexpPool
, sizeof *timestamp
);
3983 *timestamp
= JS_Now();
3985 mark
= JS_ARENA_MARK(&cx
->regexpPool
);
3987 x
= InitMatch(cx
, &gData
, re
, length
);
3996 * Call the recursive matcher to do the real work. Return null on mismatch
3997 * whether testing or not. On match, return an extended Array object.
3999 result
= MatchRegExp(&gData
, x
);
4008 i
= cp
- gData
.cpbegin
;
4010 matchlen
= i
- (start
+ gData
.skipped
);
4011 JS_ASSERT(matchlen
>= 0);
4017 * Testing for a match and updating cx->regExpStatics: don't allocate
4018 * an array object, do return true.
4022 /* Avoid warning. (gcc doesn't detect that obj is needed iff !test); */
4026 * The array returned on match has element 0 bound to the matched
4027 * string, elements 1 through state.parenCount bound to the paren
4028 * matches, an index property telling the length of the left context,
4029 * and an input property referring to the input string.
4031 obj
= js_NewSlowArrayObject(cx
);
4036 *rval
= OBJECT_TO_JSVAL(obj
);
4038 #define DEFVAL(val, id) { \
4039 ok = js_DefineProperty(cx, obj, id, val, \
4040 JS_PropertyStub, JS_PropertyStub, \
4041 JSPROP_ENUMERATE, NULL); \
4043 cx->weakRoots.newborn[GCX_OBJECT] = NULL; \
4044 cx->weakRoots.newborn[GCX_STRING] = NULL; \
4049 matchstr
= js_NewDependentString(cx
, str
, cp
- JSSTRING_CHARS(str
),
4052 cx
->weakRoots
.newborn
[GCX_OBJECT
] = NULL
;
4056 DEFVAL(STRING_TO_JSVAL(matchstr
), INT_TO_JSID(0));
4059 res
= &cx
->regExpStatics
;
4061 res
->parenCount
= re
->parenCount
;
4062 if (re
->parenCount
== 0) {
4063 res
->lastParen
= js_EmptySubString
;
4065 for (num
= 0; num
< re
->parenCount
; num
++) {
4066 parsub
= &result
->parens
[num
];
4068 if (parsub
->index
== -1) {
4069 res
->parens
[num
].chars
= NULL
;
4070 res
->parens
[num
].length
= 0;
4072 res
->parens
[num
].chars
= gData
.cpbegin
+ parsub
->index
;
4073 res
->parens
[num
].length
= parsub
->length
;
4077 morepar
= res
->moreParens
;
4079 res
->moreLength
= 10;
4080 morepar
= (JSSubString
*)
4081 JS_malloc(cx
, 10 * sizeof(JSSubString
));
4082 } else if (morenum
>= res
->moreLength
) {
4083 res
->moreLength
+= 10;
4084 morepar
= (JSSubString
*)
4085 JS_realloc(cx
, morepar
,
4086 res
->moreLength
* sizeof(JSSubString
));
4089 cx
->weakRoots
.newborn
[GCX_OBJECT
] = NULL
;
4090 cx
->weakRoots
.newborn
[GCX_STRING
] = NULL
;
4094 res
->moreParens
= morepar
;
4095 if (parsub
->index
== -1) {
4096 morepar
[morenum
].chars
= NULL
;
4097 morepar
[morenum
].length
= 0;
4099 morepar
[morenum
].chars
= gData
.cpbegin
+ parsub
->index
;
4100 morepar
[morenum
].length
= parsub
->length
;
4105 if (parsub
->index
== -1) {
4106 ok
= js_DefineProperty(cx
, obj
, INT_TO_JSID(num
+ 1),
4107 JSVAL_VOID
, NULL
, NULL
,
4108 JSPROP_ENUMERATE
, NULL
);
4110 parstr
= js_NewDependentString(cx
, str
,
4111 gData
.cpbegin
+ parsub
->index
-
4112 JSSTRING_CHARS(str
),
4115 cx
->weakRoots
.newborn
[GCX_OBJECT
] = NULL
;
4116 cx
->weakRoots
.newborn
[GCX_STRING
] = NULL
;
4120 ok
= js_DefineProperty(cx
, obj
, INT_TO_JSID(num
+ 1),
4121 STRING_TO_JSVAL(parstr
), NULL
, NULL
,
4122 JSPROP_ENUMERATE
, NULL
);
4125 cx
->weakRoots
.newborn
[GCX_OBJECT
] = NULL
;
4126 cx
->weakRoots
.newborn
[GCX_STRING
] = NULL
;
4130 if (parsub
->index
== -1) {
4131 res
->lastParen
= js_EmptySubString
;
4133 res
->lastParen
.chars
= gData
.cpbegin
+ parsub
->index
;
4134 res
->lastParen
.length
= parsub
->length
;
4140 * Define the index and input properties last for better for/in loop
4141 * order (so they come after the elements).
4143 DEFVAL(INT_TO_JSVAL(start
+ gData
.skipped
),
4144 ATOM_TO_JSID(cx
->runtime
->atomState
.indexAtom
));
4145 DEFVAL(STRING_TO_JSVAL(str
),
4146 ATOM_TO_JSID(cx
->runtime
->atomState
.inputAtom
));
4151 res
->lastMatch
.chars
= cp
;
4152 res
->lastMatch
.length
= matchlen
;
4155 * For JS1.3 and ECMAv2, emulate Perl5 exactly:
4157 * js1.3 "hi", "hi there" "hihitherehi therebye"
4159 res
->leftContext
.chars
= JSSTRING_CHARS(str
);
4160 res
->leftContext
.length
= start
+ gData
.skipped
;
4161 res
->rightContext
.chars
= ep
;
4162 res
->rightContext
.length
= gData
.cpend
- ep
;
4165 JS_ARENA_RELEASE(&cx
->regexpPool
, mark
);
4169 /************************************************************************/
4171 #define REGEXP_PROP_ATTRS (JSPROP_PERMANENT | JSPROP_SHARED)
4172 #define RO_REGEXP_PROP_ATTRS (REGEXP_PROP_ATTRS | JSPROP_READONLY)
4174 static JSPropertySpec regexp_props
[] = {
4175 {"source", REGEXP_SOURCE
, RO_REGEXP_PROP_ATTRS
,0,0},
4176 {"global", REGEXP_GLOBAL
, RO_REGEXP_PROP_ATTRS
,0,0},
4177 {"ignoreCase", REGEXP_IGNORE_CASE
, RO_REGEXP_PROP_ATTRS
,0,0},
4178 {"lastIndex", REGEXP_LAST_INDEX
, REGEXP_PROP_ATTRS
,0,0},
4179 {"multiline", REGEXP_MULTILINE
, RO_REGEXP_PROP_ATTRS
,0,0},
4180 {"sticky", REGEXP_STICKY
, RO_REGEXP_PROP_ATTRS
,0,0},
4185 regexp_getProperty(JSContext
*cx
, JSObject
*obj
, jsval id
, jsval
*vp
)
4190 if (!JSVAL_IS_INT(id
))
4192 while (OBJ_GET_CLASS(cx
, obj
) != &js_RegExpClass
) {
4193 obj
= OBJ_GET_PROTO(cx
, obj
);
4197 slot
= JSVAL_TO_INT(id
);
4198 if (slot
== REGEXP_LAST_INDEX
)
4199 return JS_GetReservedSlot(cx
, obj
, 0, vp
);
4201 JS_LOCK_OBJ(cx
, obj
);
4202 re
= (JSRegExp
*) JS_GetPrivate(cx
, obj
);
4206 *vp
= STRING_TO_JSVAL(re
->source
);
4209 *vp
= BOOLEAN_TO_JSVAL((re
->flags
& JSREG_GLOB
) != 0);
4211 case REGEXP_IGNORE_CASE
:
4212 *vp
= BOOLEAN_TO_JSVAL((re
->flags
& JSREG_FOLD
) != 0);
4214 case REGEXP_MULTILINE
:
4215 *vp
= BOOLEAN_TO_JSVAL((re
->flags
& JSREG_MULTILINE
) != 0);
4218 *vp
= BOOLEAN_TO_JSVAL((re
->flags
& JSREG_STICKY
) != 0);
4222 JS_UNLOCK_OBJ(cx
, obj
);
4227 regexp_setProperty(JSContext
*cx
, JSObject
*obj
, jsval id
, jsval
*vp
)
4234 if (!JSVAL_IS_INT(id
))
4236 while (OBJ_GET_CLASS(cx
, obj
) != &js_RegExpClass
) {
4237 obj
= OBJ_GET_PROTO(cx
, obj
);
4241 slot
= JSVAL_TO_INT(id
);
4242 if (slot
== REGEXP_LAST_INDEX
) {
4243 if (!JS_ValueToNumber(cx
, *vp
, &lastIndex
))
4245 lastIndex
= js_DoubleToInteger(lastIndex
);
4246 ok
= JS_NewNumberValue(cx
, lastIndex
, vp
) &&
4247 JS_SetReservedSlot(cx
, obj
, 0, *vp
);
4253 * RegExp class static properties and their Perl counterparts:
4256 * RegExp.multiline $*
4257 * RegExp.lastMatch $&
4258 * RegExp.lastParen $+
4259 * RegExp.leftContext $`
4260 * RegExp.rightContext $'
4262 enum regexp_static_tinyid
{
4263 REGEXP_STATIC_INPUT
= -1,
4264 REGEXP_STATIC_MULTILINE
= -2,
4265 REGEXP_STATIC_LAST_MATCH
= -3,
4266 REGEXP_STATIC_LAST_PAREN
= -4,
4267 REGEXP_STATIC_LEFT_CONTEXT
= -5,
4268 REGEXP_STATIC_RIGHT_CONTEXT
= -6
4272 js_InitRegExpStatics(JSContext
*cx
)
4275 * To avoid multiple allocations in InitMatch(), the arena size parameter
4276 * should be at least as big as:
4278 * + (sizeof(REProgState) * INITIAL_STATESTACK)
4279 * + (offsetof(REMatchState, parens) + avgParanSize * sizeof(RECapture))
4281 JS_INIT_ARENA_POOL(&cx
->regexpPool
, "regexp",
4282 12 * 1024 - 40, /* FIXME: bug 421435 */
4283 sizeof(void *), &cx
->scriptStackQuota
);
4285 JS_ClearRegExpStatics(cx
);
4289 js_TraceRegExpStatics(JSTracer
*trc
, JSContext
*acx
)
4291 JSRegExpStatics
*res
= &acx
->regExpStatics
;
4294 JS_CALL_STRING_TRACER(trc
, res
->input
, "res->input");
4298 js_FreeRegExpStatics(JSContext
*cx
)
4300 JSRegExpStatics
*res
= &cx
->regExpStatics
;
4302 if (res
->moreParens
) {
4303 JS_free(cx
, res
->moreParens
);
4304 res
->moreParens
= NULL
;
4306 JS_FinishArenaPool(&cx
->regexpPool
);
4310 regexp_static_getProperty(JSContext
*cx
, JSObject
*obj
, jsval id
, jsval
*vp
)
4313 JSRegExpStatics
*res
;
4317 res
= &cx
->regExpStatics
;
4318 if (!JSVAL_IS_INT(id
))
4320 slot
= JSVAL_TO_INT(id
);
4322 case REGEXP_STATIC_INPUT
:
4323 *vp
= res
->input
? STRING_TO_JSVAL(res
->input
)
4324 : JS_GetEmptyStringValue(cx
);
4326 case REGEXP_STATIC_MULTILINE
:
4327 *vp
= BOOLEAN_TO_JSVAL(res
->multiline
);
4329 case REGEXP_STATIC_LAST_MATCH
:
4330 sub
= &res
->lastMatch
;
4332 case REGEXP_STATIC_LAST_PAREN
:
4333 sub
= &res
->lastParen
;
4335 case REGEXP_STATIC_LEFT_CONTEXT
:
4336 sub
= &res
->leftContext
;
4338 case REGEXP_STATIC_RIGHT_CONTEXT
:
4339 sub
= &res
->rightContext
;
4342 sub
= REGEXP_PAREN_SUBSTRING(res
, slot
);
4345 str
= js_NewStringCopyN(cx
, sub
->chars
, sub
->length
);
4348 *vp
= STRING_TO_JSVAL(str
);
4353 regexp_static_setProperty(JSContext
*cx
, JSObject
*obj
, jsval id
, jsval
*vp
)
4355 JSRegExpStatics
*res
;
4357 if (!JSVAL_IS_INT(id
))
4359 res
= &cx
->regExpStatics
;
4360 /* XXX use if-else rather than switch to keep MSVC1.52 from crashing */
4361 if (JSVAL_TO_INT(id
) == REGEXP_STATIC_INPUT
) {
4362 if (!JSVAL_IS_STRING(*vp
) &&
4363 !JS_ConvertValue(cx
, *vp
, JSTYPE_STRING
, vp
)) {
4366 res
->input
= JSVAL_TO_STRING(*vp
);
4367 } else if (JSVAL_TO_INT(id
) == REGEXP_STATIC_MULTILINE
) {
4368 if (!JSVAL_IS_BOOLEAN(*vp
) &&
4369 !JS_ConvertValue(cx
, *vp
, JSTYPE_BOOLEAN
, vp
)) {
4372 res
->multiline
= JSVAL_TO_BOOLEAN(*vp
);
4376 #define REGEXP_STATIC_PROP_ATTRS (REGEXP_PROP_ATTRS | JSPROP_ENUMERATE)
4377 #define RO_REGEXP_STATIC_PROP_ATTRS (REGEXP_STATIC_PROP_ATTRS | JSPROP_READONLY)
4379 static JSPropertySpec regexp_static_props
[] = {
4381 REGEXP_STATIC_INPUT
,
4382 REGEXP_STATIC_PROP_ATTRS
,
4383 regexp_static_getProperty
, regexp_static_setProperty
},
4385 REGEXP_STATIC_MULTILINE
,
4386 REGEXP_STATIC_PROP_ATTRS
,
4387 regexp_static_getProperty
, regexp_static_setProperty
},
4389 REGEXP_STATIC_LAST_MATCH
,
4390 RO_REGEXP_STATIC_PROP_ATTRS
,
4391 regexp_static_getProperty
, regexp_static_getProperty
},
4393 REGEXP_STATIC_LAST_PAREN
,
4394 RO_REGEXP_STATIC_PROP_ATTRS
,
4395 regexp_static_getProperty
, regexp_static_getProperty
},
4397 REGEXP_STATIC_LEFT_CONTEXT
,
4398 RO_REGEXP_STATIC_PROP_ATTRS
,
4399 regexp_static_getProperty
, regexp_static_getProperty
},
4401 REGEXP_STATIC_RIGHT_CONTEXT
,
4402 RO_REGEXP_STATIC_PROP_ATTRS
,
4403 regexp_static_getProperty
, regexp_static_getProperty
},
4405 /* XXX should have block scope and local $1, etc. */
4406 {"$1", 0, RO_REGEXP_STATIC_PROP_ATTRS
,
4407 regexp_static_getProperty
, regexp_static_getProperty
},
4408 {"$2", 1, RO_REGEXP_STATIC_PROP_ATTRS
,
4409 regexp_static_getProperty
, regexp_static_getProperty
},
4410 {"$3", 2, RO_REGEXP_STATIC_PROP_ATTRS
,
4411 regexp_static_getProperty
, regexp_static_getProperty
},
4412 {"$4", 3, RO_REGEXP_STATIC_PROP_ATTRS
,
4413 regexp_static_getProperty
, regexp_static_getProperty
},
4414 {"$5", 4, RO_REGEXP_STATIC_PROP_ATTRS
,
4415 regexp_static_getProperty
, regexp_static_getProperty
},
4416 {"$6", 5, RO_REGEXP_STATIC_PROP_ATTRS
,
4417 regexp_static_getProperty
, regexp_static_getProperty
},
4418 {"$7", 6, RO_REGEXP_STATIC_PROP_ATTRS
,
4419 regexp_static_getProperty
, regexp_static_getProperty
},
4420 {"$8", 7, RO_REGEXP_STATIC_PROP_ATTRS
,
4421 regexp_static_getProperty
, regexp_static_getProperty
},
4422 {"$9", 8, RO_REGEXP_STATIC_PROP_ATTRS
,
4423 regexp_static_getProperty
, regexp_static_getProperty
},
4429 regexp_finalize(JSContext
*cx
, JSObject
*obj
)
4433 re
= (JSRegExp
*) JS_GetPrivate(cx
, obj
);
4436 js_DestroyRegExp(cx
, re
);
4439 /* Forward static prototype. */
4441 regexp_exec_sub(JSContext
*cx
, JSObject
*obj
, uintN argc
, jsval
*argv
,
4442 JSBool test
, jsval
*rval
);
4445 regexp_call(JSContext
*cx
, JSObject
*obj
, uintN argc
, jsval
*argv
, jsval
*rval
)
4447 return regexp_exec_sub(cx
, JSVAL_TO_OBJECT(argv
[-2]), argc
, argv
,
4453 #include "jsxdrapi.h"
4456 regexp_xdrObject(JSXDRState
*xdr
, JSObject
**objp
)
4463 if (xdr
->mode
== JSXDR_ENCODE
) {
4464 re
= (JSRegExp
*) JS_GetPrivate(xdr
->cx
, *objp
);
4467 source
= re
->source
;
4468 flagsword
= (uint32
)re
->flags
;
4470 if (!JS_XDRString(xdr
, &source
) ||
4471 !JS_XDRUint32(xdr
, &flagsword
)) {
4474 if (xdr
->mode
== JSXDR_DECODE
) {
4475 obj
= js_NewObject(xdr
->cx
, &js_RegExpClass
, NULL
, NULL
, 0);
4478 STOBJ_CLEAR_PARENT(obj
);
4479 STOBJ_CLEAR_PROTO(obj
);
4480 re
= js_NewRegExp(xdr
->cx
, NULL
, source
, (uint8
)flagsword
, JS_FALSE
);
4483 if (!JS_SetPrivate(xdr
->cx
, obj
, re
) ||
4484 !js_SetLastIndex(xdr
->cx
, obj
, 0)) {
4485 js_DestroyRegExp(xdr
->cx
, re
);
4493 #else /* !JS_HAS_XDR */
4495 #define regexp_xdrObject NULL
4497 #endif /* !JS_HAS_XDR */
4500 regexp_trace(JSTracer
*trc
, JSObject
*obj
)
4504 re
= (JSRegExp
*) JS_GetPrivate(trc
->context
, obj
);
4505 if (re
&& re
->source
)
4506 JS_CALL_STRING_TRACER(trc
, re
->source
, "source");
4509 JSClass js_RegExpClass
= {
4511 JSCLASS_HAS_PRIVATE
| JSCLASS_HAS_RESERVED_SLOTS(1) |
4512 JSCLASS_MARK_IS_TRACE
| JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp
),
4513 JS_PropertyStub
, JS_PropertyStub
,
4514 regexp_getProperty
, regexp_setProperty
,
4515 JS_EnumerateStub
, JS_ResolveStub
,
4516 JS_ConvertStub
, regexp_finalize
,
4519 regexp_xdrObject
, NULL
,
4520 JS_CLASS_TRACE(regexp_trace
), 0
4523 static const jschar empty_regexp_ucstr
[] = {'(', '?', ':', ')', 0};
4526 js_regexp_toString(JSContext
*cx
, JSObject
*obj
, jsval
*vp
)
4529 const jschar
*source
;
4531 size_t length
, nflags
;
4535 if (!JS_InstanceOf(cx
, obj
, &js_RegExpClass
, vp
+ 2))
4537 JS_LOCK_OBJ(cx
, obj
);
4538 re
= (JSRegExp
*) JS_GetPrivate(cx
, obj
);
4540 JS_UNLOCK_OBJ(cx
, obj
);
4541 *vp
= STRING_TO_JSVAL(cx
->runtime
->emptyString
);
4545 JSSTRING_CHARS_AND_LENGTH(re
->source
, source
, length
);
4547 source
= empty_regexp_ucstr
;
4548 length
= JS_ARRAY_LENGTH(empty_regexp_ucstr
) - 1;
4552 for (flags
= re
->flags
; flags
!= 0; flags
&= flags
- 1)
4554 chars
= (jschar
*) JS_malloc(cx
, (length
+ nflags
+ 1) * sizeof(jschar
));
4556 JS_UNLOCK_OBJ(cx
, obj
);
4561 js_strncpy(&chars
[1], source
, length
- 2);
4562 chars
[length
-1] = '/';
4564 if (re
->flags
& JSREG_GLOB
)
4565 chars
[length
++] = 'g';
4566 if (re
->flags
& JSREG_FOLD
)
4567 chars
[length
++] = 'i';
4568 if (re
->flags
& JSREG_MULTILINE
)
4569 chars
[length
++] = 'm';
4570 if (re
->flags
& JSREG_STICKY
)
4571 chars
[length
++] = 'y';
4573 JS_UNLOCK_OBJ(cx
, obj
);
4576 str
= js_NewString(cx
, chars
, length
);
4581 *vp
= STRING_TO_JSVAL(str
);
4586 regexp_toString(JSContext
*cx
, uintN argc
, jsval
*vp
)
4590 obj
= JS_THIS_OBJECT(cx
, vp
);
4591 return obj
&& js_regexp_toString(cx
, obj
, vp
);
4595 regexp_compile_sub(JSContext
*cx
, JSObject
*obj
, uintN argc
, jsval
*argv
,
4598 JSString
*opt
, *str
;
4599 JSRegExp
*oldre
, *re
;
4602 size_t length
, nbytes
;
4603 const jschar
*cp
, *start
, *end
;
4604 jschar
*nstart
, *ncp
, *tmp
;
4606 if (!JS_InstanceOf(cx
, obj
, &js_RegExpClass
, argv
))
4610 str
= cx
->runtime
->emptyString
;
4612 if (JSVAL_IS_OBJECT(argv
[0])) {
4614 * If we get passed in a RegExp object we construct a new
4615 * RegExp that is a duplicate of it by re-compiling the
4616 * original source code. ECMA requires that it be an error
4617 * here if the flags are specified. (We must use the flags
4618 * from the original RegExp also).
4620 obj2
= JSVAL_TO_OBJECT(argv
[0]);
4621 if (obj2
&& OBJ_GET_CLASS(cx
, obj2
) == &js_RegExpClass
) {
4622 if (argc
>= 2 && !JSVAL_IS_VOID(argv
[1])) { /* 'flags' passed */
4623 JS_ReportErrorNumber(cx
, js_GetErrorMessage
, NULL
,
4624 JSMSG_NEWREGEXP_FLAGGED
);
4627 JS_LOCK_OBJ(cx
, obj2
);
4628 re
= (JSRegExp
*) JS_GetPrivate(cx
, obj2
);
4630 JS_UNLOCK_OBJ(cx
, obj2
);
4633 re
= js_NewRegExp(cx
, NULL
, re
->source
, re
->flags
, JS_FALSE
);
4634 JS_UNLOCK_OBJ(cx
, obj2
);
4638 str
= js_ValueToString(cx
, argv
[0]);
4641 argv
[0] = STRING_TO_JSVAL(str
);
4643 if (JSVAL_IS_VOID(argv
[1])) {
4646 opt
= js_ValueToString(cx
, argv
[1]);
4649 argv
[1] = STRING_TO_JSVAL(opt
);
4653 /* Escape any naked slashes in the regexp source. */
4654 JSSTRING_CHARS_AND_LENGTH(str
, start
, length
);
4655 end
= start
+ length
;
4656 nstart
= ncp
= NULL
;
4657 for (cp
= start
; cp
< end
; cp
++) {
4658 if (*cp
== '/' && (cp
== start
|| cp
[-1] != '\\')) {
4659 nbytes
= (++length
+ 1) * sizeof(jschar
);
4661 nstart
= (jschar
*) JS_malloc(cx
, nbytes
);
4664 ncp
= nstart
+ (cp
- start
);
4665 js_strncpy(nstart
, start
, cp
- start
);
4667 tmp
= (jschar
*) JS_realloc(cx
, nstart
, nbytes
);
4669 JS_free(cx
, nstart
);
4672 ncp
= tmp
+ (ncp
- nstart
);
4682 /* Don't forget to store the backstop after the new string. */
4683 JS_ASSERT((size_t)(ncp
- nstart
) == length
);
4685 str
= js_NewString(cx
, nstart
, length
);
4687 JS_free(cx
, nstart
);
4690 argv
[0] = STRING_TO_JSVAL(str
);
4694 re
= js_NewRegExpOpt(cx
, str
, opt
, JS_FALSE
);
4698 JS_LOCK_OBJ(cx
, obj
);
4699 oldre
= (JSRegExp
*) JS_GetPrivate(cx
, obj
);
4700 ok
= JS_SetPrivate(cx
, obj
, re
);
4701 ok2
= js_SetLastIndex(cx
, obj
, 0);
4702 JS_UNLOCK_OBJ(cx
, obj
);
4704 js_DestroyRegExp(cx
, re
);
4708 js_DestroyRegExp(cx
, oldre
);
4709 *rval
= OBJECT_TO_JSVAL(obj
);
4714 regexp_compile(JSContext
*cx
, uintN argc
, jsval
*vp
)
4718 obj
= JS_THIS_OBJECT(cx
, vp
);
4719 return obj
&& regexp_compile_sub(cx
, obj
, argc
, vp
+ 2, vp
);
4723 regexp_exec_sub(JSContext
*cx
, JSObject
*obj
, uintN argc
, jsval
*argv
,
4724 JSBool test
, jsval
*rval
)
4732 ok
= JS_InstanceOf(cx
, obj
, &js_RegExpClass
, argv
);
4735 JS_LOCK_OBJ(cx
, obj
);
4736 re
= (JSRegExp
*) JS_GetPrivate(cx
, obj
);
4738 JS_UNLOCK_OBJ(cx
, obj
);
4742 /* NB: we must reach out: after this paragraph, in order to drop re. */
4743 HOLD_REGEXP(cx
, re
);
4744 sticky
= (re
->flags
& JSREG_STICKY
) != 0;
4745 if (re
->flags
& (JSREG_GLOB
| JSREG_STICKY
)) {
4746 ok
= js_GetLastIndex(cx
, obj
, &lastIndex
);
4750 JS_UNLOCK_OBJ(cx
, obj
);
4754 /* Now that obj is unlocked, it's safe to (potentially) grab the GC lock. */
4756 str
= cx
->regExpStatics
.input
;
4758 const char *bytes
= js_GetStringBytes(cx
, re
->source
);
4761 JS_ReportErrorNumber(cx
, js_GetErrorMessage
, NULL
,
4764 (re
->flags
& JSREG_GLOB
) ? "g" : "",
4765 (re
->flags
& JSREG_FOLD
) ? "i" : "",
4766 (re
->flags
& JSREG_MULTILINE
) ? "m" : "",
4767 (re
->flags
& JSREG_STICKY
) ? "y" : "");
4773 str
= js_ValueToString(cx
, argv
[0]);
4778 argv
[0] = STRING_TO_JSVAL(str
);
4781 if (lastIndex
< 0 || JSSTRING_LENGTH(str
) < lastIndex
) {
4782 ok
= js_SetLastIndex(cx
, obj
, 0);
4785 i
= (size_t) lastIndex
;
4786 ok
= js_ExecuteRegExp(cx
, re
, str
, &i
, test
, rval
);
4788 ((re
->flags
& JSREG_GLOB
) || (*rval
!= JSVAL_NULL
&& sticky
))) {
4789 ok
= js_SetLastIndex(cx
, obj
, (*rval
== JSVAL_NULL
) ? 0 : i
);
4794 DROP_REGEXP(cx
, re
);
4799 regexp_exec(JSContext
*cx
, uintN argc
, jsval
*vp
)
4801 return regexp_exec_sub(cx
, JS_THIS_OBJECT(cx
, vp
), argc
, vp
+ 2, JS_FALSE
,
4806 regexp_test(JSContext
*cx
, uintN argc
, jsval
*vp
)
4808 if (!regexp_exec_sub(cx
, JS_THIS_OBJECT(cx
, vp
), argc
, vp
+ 2, JS_TRUE
, vp
))
4810 if (*vp
!= JSVAL_TRUE
)
4816 static jsint FASTCALL
4817 Regexp_p_test(JSContext
* cx
, JSObject
* regexp
, JSString
* str
)
4819 jsval vp
[3] = { JSVAL_NULL
, OBJECT_TO_JSVAL(regexp
), STRING_TO_JSVAL(str
) };
4820 if (!regexp_exec_sub(cx
, regexp
, 1, vp
+ 2, JS_TRUE
, vp
))
4821 return JSVAL_TO_BOOLEAN(JSVAL_VOID
);
4822 return *vp
== JSVAL_TRUE
;
4825 JS_DEFINE_TRCINFO_1(regexp_test
,
4826 (3, (static, BOOL_RETRY
, Regexp_p_test
, CONTEXT
, THIS
, STRING
, 1, 1)))
4830 static JSFunctionSpec regexp_methods
[] = {
4832 JS_FN(js_toSource_str
, regexp_toString
, 0,0),
4834 JS_FN(js_toString_str
, regexp_toString
, 0,0),
4835 JS_FN("compile", regexp_compile
, 2,0),
4836 JS_FN("exec", regexp_exec
, 1,0),
4837 JS_TN("test", regexp_test
, 1,0, regexp_test_trcinfo
),
4842 RegExp(JSContext
*cx
, JSObject
*obj
, uintN argc
, jsval
*argv
, jsval
*rval
)
4844 if (!JS_IsConstructing(cx
)) {
4846 * If first arg is regexp and no flags are given, just return the arg.
4847 * (regexp_compile_sub detects the regexp + flags case and throws a
4848 * TypeError.) See 10.15.3.1.
4850 if ((argc
< 2 || JSVAL_IS_VOID(argv
[1])) &&
4851 !JSVAL_IS_PRIMITIVE(argv
[0]) &&
4852 OBJ_GET_CLASS(cx
, JSVAL_TO_OBJECT(argv
[0])) == &js_RegExpClass
) {
4857 /* Otherwise, replace obj with a new RegExp object. */
4858 obj
= js_NewObject(cx
, &js_RegExpClass
, NULL
, NULL
, 0);
4863 * regexp_compile_sub does not use rval to root its temporaries so we
4864 * can use it to root obj.
4866 *rval
= OBJECT_TO_JSVAL(obj
);
4868 return regexp_compile_sub(cx
, obj
, argc
, argv
, rval
);
4872 js_InitRegExpClass(JSContext
*cx
, JSObject
*obj
)
4874 JSObject
*proto
, *ctor
;
4877 proto
= JS_InitClass(cx
, obj
, NULL
, &js_RegExpClass
, RegExp
, 1,
4878 regexp_props
, regexp_methods
,
4879 regexp_static_props
, NULL
);
4881 if (!proto
|| !(ctor
= JS_GetConstructor(cx
, proto
)))
4883 if (!JS_AliasProperty(cx
, ctor
, "input", "$_") ||
4884 !JS_AliasProperty(cx
, ctor
, "multiline", "$*") ||
4885 !JS_AliasProperty(cx
, ctor
, "lastMatch", "$&") ||
4886 !JS_AliasProperty(cx
, ctor
, "lastParen", "$+") ||
4887 !JS_AliasProperty(cx
, ctor
, "leftContext", "$`") ||
4888 !JS_AliasProperty(cx
, ctor
, "rightContext", "$'")) {
4892 /* Give RegExp.prototype private data so it matches the empty string. */
4893 if (!regexp_compile_sub(cx
, proto
, 0, NULL
, &rval
))
4898 JS_DeleteProperty(cx
, obj
, js_RegExpClass
.name
);
4903 js_NewRegExpObject(JSContext
*cx
, JSTokenStream
*ts
,
4904 jschar
*chars
, size_t length
, uintN flags
)
4910 str
= js_NewStringCopyN(cx
, chars
, length
);
4913 JSAutoTempValueRooter
tvr(cx
, str
);
4914 re
= js_NewRegExp(cx
, ts
, str
, flags
, JS_FALSE
);
4917 obj
= js_NewObject(cx
, &js_RegExpClass
, NULL
, NULL
, 0);
4918 if (!obj
|| !JS_SetPrivate(cx
, obj
, re
)) {
4919 js_DestroyRegExp(cx
, re
);
4922 if (obj
&& !js_SetLastIndex(cx
, obj
, 0))
4928 js_CloneRegExpObject(JSContext
*cx
, JSObject
*obj
, JSObject
*parent
)
4933 JS_ASSERT(OBJ_GET_CLASS(cx
, obj
) == &js_RegExpClass
);
4934 clone
= js_NewObject(cx
, &js_RegExpClass
, NULL
, parent
, 0);
4937 re
= (JSRegExp
*) JS_GetPrivate(cx
, obj
);
4938 if (!JS_SetPrivate(cx
, clone
, re
) || !js_SetLastIndex(cx
, clone
, 0)) {
4939 cx
->weakRoots
.newborn
[GCX_OBJECT
] = NULL
;
4942 HOLD_REGEXP(cx
, re
);
4947 js_GetLastIndex(JSContext
*cx
, JSObject
*obj
, jsdouble
*lastIndex
)
4951 return JS_GetReservedSlot(cx
, obj
, 0, &v
) &&
4952 JS_ValueToNumber(cx
, v
, lastIndex
);
4956 js_SetLastIndex(JSContext
*cx
, JSObject
*obj
, jsdouble lastIndex
)
4960 return JS_NewNumberValue(cx
, lastIndex
, &v
) &&
4961 JS_SetReservedSlot(cx
, obj
, 0, v
);