2 * Copyright 2008 Jacek Caban for CodeWeavers
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20 * Code in this file is based on files:
23 * from Mozilla project, released under LGPL 2.1 or later.
25 * The Original Code is Mozilla Communicator client code, released
28 * The Initial Developer of the Original Code is
29 * Netscape Communications Corporation.
30 * Portions created by the Initial Developer are Copyright (C) 1998
31 * the Initial Developer. All Rights Reserved.
39 #include "wine/debug.h"
41 WINE_DEFAULT_DEBUG_CHANNEL(jscript
);
43 #define JSREG_FOLD 0x01 /* fold uppercase to lowercase */
44 #define JSREG_GLOB 0x02 /* global exec, creates array of matches */
45 #define JSREG_MULTILINE 0x04 /* treat ^ and $ as begin and end of line */
46 #define JSREG_STICKY 0x08 /* only match starting at lastIndex */
48 typedef BYTE JSPackedBool
;
49 typedef BYTE jsbytecode
;
52 * This struct holds a bitmap representation of a class from a regexp.
53 * There's a list of these referenced by the classList field in the JSRegExp
54 * struct below. The initial state has startIndex set to the offset in the
55 * original regexp source of the beginning of the class contents. The first
56 * use of the class converts the source representation into a bitmap.
59 typedef struct RECharSet
{
60 JSPackedBool converted
;
73 WORD flags
; /* flags, see jsapi.h's JSREG_* defines */
74 size_t parenCount
; /* number of parenthesized submatches */
75 size_t classCount
; /* count [...] bitmaps */
76 RECharSet
*classList
; /* list of [...] bitmaps */
77 BSTR source
; /* locked source string, sans // */
78 jsbytecode program
[1]; /* regular expression bytecode */
87 jsval_t last_index_val
;
90 static const WCHAR sourceW
[] = {'s','o','u','r','c','e',0};
91 static const WCHAR globalW
[] = {'g','l','o','b','a','l',0};
92 static const WCHAR ignoreCaseW
[] = {'i','g','n','o','r','e','C','a','s','e',0};
93 static const WCHAR multilineW
[] = {'m','u','l','t','i','l','i','n','e',0};
94 static const WCHAR lastIndexW
[] = {'l','a','s','t','I','n','d','e','x',0};
95 static const WCHAR toStringW
[] = {'t','o','S','t','r','i','n','g',0};
96 static const WCHAR execW
[] = {'e','x','e','c',0};
97 static const WCHAR testW
[] = {'t','e','s','t',0};
99 static const WCHAR leftContextW
[] =
100 {'l','e','f','t','C','o','n','t','e','x','t',0};
101 static const WCHAR rightContextW
[] =
102 {'r','i','g','h','t','C','o','n','t','e','x','t',0};
104 static const WCHAR idx1W
[] = {'$','1',0};
105 static const WCHAR idx2W
[] = {'$','2',0};
106 static const WCHAR idx3W
[] = {'$','3',0};
107 static const WCHAR idx4W
[] = {'$','4',0};
108 static const WCHAR idx5W
[] = {'$','5',0};
109 static const WCHAR idx6W
[] = {'$','6',0};
110 static const WCHAR idx7W
[] = {'$','7',0};
111 static const WCHAR idx8W
[] = {'$','8',0};
112 static const WCHAR idx9W
[] = {'$','9',0};
114 static const WCHAR undefinedW
[] = {'u','n','d','e','f','i','n','e','d',0};
115 static const WCHAR emptyW
[] = {0};
117 /* FIXME: Better error handling */
118 #define ReportRegExpError(a,b,c)
119 #define ReportRegExpErrorHelper(a,b,c,d)
120 #define JS_ReportErrorNumber(a,b,c,d)
121 #define JS_ReportErrorFlagsAndNumber(a,b,c,d,e,f)
122 #define js_ReportOutOfScriptQuota(a)
123 #define JS_ReportOutOfMemory(a)
124 #define JS_COUNT_OPERATION(a,b)
126 #define JSMSG_MIN_TOO_BIG 47
127 #define JSMSG_MAX_TOO_BIG 48
128 #define JSMSG_OUT_OF_ORDER 49
129 #define JSMSG_OUT_OF_MEMORY 137
131 #define LINE_SEPARATOR 0x2028
132 #define PARA_SEPARATOR 0x2029
134 #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
135 ((c >= 'a') && (c <= 'z')) )
136 #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
137 (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
139 #define JS_ISWORD(c) ((c) < 128 && (isalnum(c) || (c) == '_'))
141 #define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9)
142 #define JS7_UNDEC(c) ((c) - '0')
194 REOP_LIMIT
/* META: no operator >= to this */
197 #define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
199 static const char *reop_names
[] = {
252 typedef struct RECapture
{
253 ptrdiff_t index
; /* start of contents, -1 for empty */
254 size_t length
; /* length of capture */
257 typedef struct REMatchState
{
259 RECapture parens
[1]; /* first of 're->parenCount' captures,
260 allocated at end of this struct */
263 typedef struct REProgState
{
264 jsbytecode
*continue_pc
; /* current continuation data */
265 jsbytecode continue_op
;
266 ptrdiff_t index
; /* progress in text */
267 size_t parenSoFar
; /* highest indexed paren started */
270 UINT min
; /* current quantifier limits */
274 size_t top
; /* backtrack stack state */
280 typedef struct REBackTrackData
{
281 size_t sz
; /* size of previous stack entry */
282 jsbytecode
*backtrack_pc
; /* where to backtrack to */
283 jsbytecode backtrack_op
;
284 const WCHAR
*cp
; /* index in text of match at backtrack */
285 size_t parenIndex
; /* start index of saved paren contents */
286 size_t parenCount
; /* # of saved paren contents */
287 size_t saveStateStackTop
; /* number of parent states */
288 /* saved parent states follow */
289 /* saved paren contents follow */
292 #define INITIAL_STATESTACK 100
293 #define INITIAL_BACKTRACK 8000
295 typedef struct REGlobalData
{
297 JSRegExp
*regexp
; /* the RE in execution */
298 BOOL ok
; /* runtime error (out_of_memory only?) */
299 size_t start
; /* offset to start at */
300 ptrdiff_t skipped
; /* chars skipped anchoring this r.e. */
301 const WCHAR
*cpbegin
; /* text base address */
302 const WCHAR
*cpend
; /* text limit address */
304 REProgState
*stateStack
; /* stack of state of current parents */
305 size_t stateStackTop
;
306 size_t stateStackLimit
;
308 REBackTrackData
*backTrackStack
;/* stack of matched-so-far positions */
309 REBackTrackData
*backTrackSP
;
310 size_t backTrackStackSize
;
311 size_t cursz
; /* size of current stack entry */
312 size_t backTrackCount
; /* how many times we've backtracked */
313 size_t backTrackLimit
; /* upper limit on backtrack states */
315 jsheap_t
*pool
; /* It's faster to use one malloc'd pool
316 than to malloc/free the three items
317 that are allocated from this pool */
320 typedef struct RENode RENode
;
322 REOp op
; /* r.e. op bytecode */
323 RENode
*next
; /* next in concatenation order */
324 void *kid
; /* first operand */
326 void *kid2
; /* second operand */
327 INT num
; /* could be a number */
328 size_t parenIndex
; /* or a parenthesis index */
329 struct { /* or a quantifier range */
334 struct { /* or a character class */
336 size_t kidlen
; /* length of string at kid, in jschars */
337 size_t index
; /* index into class list */
338 WORD bmsize
; /* bitmap size, based on max char code */
341 struct { /* or a literal sequence */
342 WCHAR chr
; /* of one character */
343 size_t length
; /* or many (via the kid) */
346 RENode
*kid2
; /* second operand from ALT */
347 WCHAR ch1
; /* match char for ALTPREREQ */
348 WCHAR ch2
; /* ditto, or class index for ALTPREREQ2 */
353 #define CLASS_CACHE_SIZE 4
355 typedef struct CompilerState
{
356 script_ctx_t
*context
;
357 const WCHAR
*cpbegin
;
361 size_t classCount
; /* number of [] encountered */
362 size_t treeDepth
; /* maximum depth of parse tree */
363 size_t progLength
; /* estimated bytecode length */
365 size_t classBitmapsMem
; /* memory to hold all class bitmaps */
367 const WCHAR
*start
; /* small cache of class strings */
368 size_t length
; /* since they're often the same */
370 } classCache
[CLASS_CACHE_SIZE
];
374 typedef struct EmitStateStackEntry
{
375 jsbytecode
*altHead
; /* start of REOP_ALT* opcode */
376 jsbytecode
*nextAltFixup
; /* fixup pointer to next-alt offset */
377 jsbytecode
*nextTermFixup
; /* fixup ptr. to REOP_JUMP offset */
378 jsbytecode
*endTermFixup
; /* fixup ptr. to REOPT_ALTPREREQ* offset */
379 RENode
*continueNode
; /* original REOP_ALT* node being stacked */
380 jsbytecode continueOp
; /* REOP_JUMP or REOP_ENDALT continuation */
381 JSPackedBool jumpToJumpFlag
; /* true if we've patched jump-to-jump to
382 avoid 16-bit unsigned offset overflow */
383 } EmitStateStackEntry
;
386 * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
387 * the getters and setters take the pc of the offset, not of the opcode before
391 #define GET_ARG(pc) ((WORD)(((pc)[0] << 8) | (pc)[1]))
392 #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
393 (pc)[1] = (jsbytecode) (arg))
395 #define OFFSET_LEN ARG_LEN
396 #define OFFSET_MAX ((1 << (ARG_LEN * 8)) - 1)
397 #define GET_OFFSET(pc) GET_ARG(pc)
399 static BOOL
ParseRegExp(CompilerState
*);
402 * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
403 * For sanity, we limit it to 2^24 bytes.
405 #define TREE_DEPTH_MAX ((1 << 24) / sizeof(EmitStateStackEntry))
408 * The maximum memory that can be allocated for class bitmaps.
409 * For sanity, we limit it to 2^24 bytes.
411 #define CLASS_BITMAPS_MEM_LIMIT (1 << 24)
414 * Functions to get size and write/read bytecode that represent small indexes
416 * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
417 * indicates that the following byte brings more bits to the index. Otherwise
418 * this is the last byte in the index bytecode representing highest index bits.
421 GetCompactIndexWidth(size_t index
)
425 for (width
= 1; (index
>>= 7) != 0; ++width
) { }
429 static inline jsbytecode
*
430 WriteCompactIndex(jsbytecode
*pc
, size_t index
)
434 while ((next
= index
>> 7) != 0) {
435 *pc
++ = (jsbytecode
)(index
| 0x80);
438 *pc
++ = (jsbytecode
)index
;
442 static inline jsbytecode
*
443 ReadCompactIndex(jsbytecode
*pc
, size_t *result
)
448 if ((nextByte
& 0x80) == 0) {
450 * Short-circuit the most common case when compact index <= 127.
455 *result
= 0x7F & nextByte
;
458 *result
|= (nextByte
& 0x7F) << shift
;
460 } while ((nextByte
& 0x80) != 0);
465 /* Construct and initialize an RENode, returning NULL for out-of-memory */
467 NewRENode(CompilerState
*state
, REOp op
)
471 ren
= jsheap_alloc(&state
->context
->tmp_heap
, sizeof(*ren
));
473 /* js_ReportOutOfScriptQuota(cx); */
483 * Validates and converts hex ascii value.
486 isASCIIHexDigit(WCHAR c
, UINT
*digit
)
497 if (cv
>= 'a' && cv
<= 'f') {
498 *digit
= cv
- 'a' + 10;
510 #define JUMP_OFFSET_HI(off) ((jsbytecode)((off) >> 8))
511 #define JUMP_OFFSET_LO(off) ((jsbytecode)(off))
514 SetForwardJumpOffset(jsbytecode
*jump
, jsbytecode
*target
)
516 ptrdiff_t offset
= target
- jump
;
518 /* Check that target really points forward. */
520 if ((size_t)offset
> OFFSET_MAX
)
523 jump
[0] = JUMP_OFFSET_HI(offset
);
524 jump
[1] = JUMP_OFFSET_LO(offset
);
529 * Generate bytecode for the tree rooted at t using an explicit stack instead
533 EmitREBytecode(CompilerState
*state
, JSRegExp
*re
, size_t treeDepth
,
534 jsbytecode
*pc
, RENode
*t
)
536 EmitStateStackEntry
*emitStateSP
, *emitStateStack
;
540 if (treeDepth
== 0) {
541 emitStateStack
= NULL
;
543 emitStateStack
= heap_alloc(sizeof(EmitStateStackEntry
) * treeDepth
);
547 emitStateSP
= emitStateStack
;
549 assert(op
< REOP_LIMIT
);
558 case REOP_ALTPREREQ2
:
561 emitStateSP
->altHead
= pc
- 1;
562 emitStateSP
->endTermFixup
= pc
;
564 SET_ARG(pc
, t
->u
.altprereq
.ch1
);
566 SET_ARG(pc
, t
->u
.altprereq
.ch2
);
569 emitStateSP
->nextAltFixup
= pc
; /* offset to next alternate */
572 emitStateSP
->continueNode
= t
;
573 emitStateSP
->continueOp
= REOP_JUMP
;
574 emitStateSP
->jumpToJumpFlag
= FALSE
;
576 assert((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
579 assert(op
< REOP_LIMIT
);
583 emitStateSP
->nextTermFixup
= pc
; /* offset to following term */
585 if (!SetForwardJumpOffset(emitStateSP
->nextAltFixup
, pc
))
587 emitStateSP
->continueOp
= REOP_ENDALT
;
589 assert((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
592 assert(op
< REOP_LIMIT
);
597 * If we already patched emitStateSP->nextTermFixup to jump to
598 * a nearer jump, to avoid 16-bit immediate offset overflow, we
601 if (emitStateSP
->jumpToJumpFlag
)
605 * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
606 * REOP_ENDALT is executed only on successful match of the last
607 * alternate in a group.
609 if (!SetForwardJumpOffset(emitStateSP
->nextTermFixup
, pc
))
611 if (t
->op
!= REOP_ALT
) {
612 if (!SetForwardJumpOffset(emitStateSP
->endTermFixup
, pc
))
617 * If the program is bigger than the REOP_JUMP offset range, then
618 * we must check for alternates before this one that are part of
619 * the same group, and fix up their jump offsets to target jumps
620 * close enough to fit in a 16-bit unsigned offset immediate.
622 if ((size_t)(pc
- re
->program
) > OFFSET_MAX
&&
623 emitStateSP
> emitStateStack
) {
624 EmitStateStackEntry
*esp
, *esp2
;
625 jsbytecode
*alt
, *jump
;
626 ptrdiff_t span
, header
;
630 for (esp
= esp2
- 1; esp
>= emitStateStack
; --esp
) {
631 if (esp
->continueOp
== REOP_ENDALT
&&
632 !esp
->jumpToJumpFlag
&&
633 esp
->nextTermFixup
+ OFFSET_LEN
== alt
&&
634 (size_t)(pc
- ((esp
->continueNode
->op
!= REOP_ALT
)
636 : esp
->nextTermFixup
)) > OFFSET_MAX
) {
638 jump
= esp
->nextTermFixup
;
641 * The span must be 1 less than the distance from
642 * jump offset to jump offset, so we actually jump
643 * to a REOP_JUMP bytecode, not to its offset!
646 assert(jump
< esp2
->nextTermFixup
);
647 span
= esp2
->nextTermFixup
- jump
- 1;
648 if ((size_t)span
<= OFFSET_MAX
)
653 } while (esp2
->continueOp
!= REOP_ENDALT
);
656 jump
[0] = JUMP_OFFSET_HI(span
);
657 jump
[1] = JUMP_OFFSET_LO(span
);
659 if (esp
->continueNode
->op
!= REOP_ALT
) {
661 * We must patch the offset at esp->endTermFixup
662 * as well, for the REOP_ALTPREREQ{,2} opcodes.
663 * If we're unlucky and endTermFixup is more than
664 * OFFSET_MAX bytes from its target, we cheat by
665 * jumping 6 bytes to the jump whose offset is at
666 * esp->nextTermFixup, which has the same target.
668 jump
= esp
->endTermFixup
;
669 header
= esp
->nextTermFixup
- jump
;
671 if ((size_t)span
> OFFSET_MAX
)
674 jump
[0] = JUMP_OFFSET_HI(span
);
675 jump
[1] = JUMP_OFFSET_LO(span
);
678 esp
->jumpToJumpFlag
= TRUE
;
686 emitStateSP
->altHead
= pc
- 1;
687 emitStateSP
->nextAltFixup
= pc
; /* offset to next alternate */
689 emitStateSP
->continueNode
= t
;
690 emitStateSP
->continueOp
= REOP_JUMP
;
691 emitStateSP
->jumpToJumpFlag
= FALSE
;
693 assert((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
696 assert(op
< REOP_LIMIT
);
701 * Coalesce FLATs if possible and if it would not increase bytecode
702 * beyond preallocated limit. The latter happens only when bytecode
703 * size for coalesced string with offset p and length 2 exceeds 6
704 * bytes preallocated for 2 single char nodes, i.e. when
705 * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
706 * GetCompactIndexWidth(p) > 4.
707 * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
708 * nodes strictly decreases bytecode size, the check has to be
709 * done only for the first coalescing.
712 GetCompactIndexWidth((WCHAR
*)t
->kid
- state
->cpbegin
) <= 4)
715 t
->next
->op
== REOP_FLAT
&&
716 (WCHAR
*)t
->kid
+ t
->u
.flat
.length
==
718 t
->u
.flat
.length
+= t
->next
->u
.flat
.length
;
719 t
->next
= t
->next
->next
;
722 if (t
->kid
&& t
->u
.flat
.length
> 1) {
723 pc
[-1] = (state
->flags
& JSREG_FOLD
) ? REOP_FLATi
: REOP_FLAT
;
724 pc
= WriteCompactIndex(pc
, (WCHAR
*)t
->kid
- state
->cpbegin
);
725 pc
= WriteCompactIndex(pc
, t
->u
.flat
.length
);
726 } else if (t
->u
.flat
.chr
< 256) {
727 pc
[-1] = (state
->flags
& JSREG_FOLD
) ? REOP_FLAT1i
: REOP_FLAT1
;
728 *pc
++ = (jsbytecode
) t
->u
.flat
.chr
;
730 pc
[-1] = (state
->flags
& JSREG_FOLD
)
733 SET_ARG(pc
, t
->u
.flat
.chr
);
740 pc
= WriteCompactIndex(pc
, t
->u
.parenIndex
);
741 emitStateSP
->continueNode
= t
;
742 emitStateSP
->continueOp
= REOP_RPAREN
;
744 assert((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
750 pc
= WriteCompactIndex(pc
, t
->u
.parenIndex
);
754 pc
= WriteCompactIndex(pc
, t
->u
.parenIndex
);
759 emitStateSP
->nextTermFixup
= pc
;
761 emitStateSP
->continueNode
= t
;
762 emitStateSP
->continueOp
= REOP_ASSERTTEST
;
764 assert((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
769 case REOP_ASSERTTEST
:
770 case REOP_ASSERTNOTTEST
:
771 if (!SetForwardJumpOffset(emitStateSP
->nextTermFixup
, pc
))
775 case REOP_ASSERT_NOT
:
777 emitStateSP
->nextTermFixup
= pc
;
779 emitStateSP
->continueNode
= t
;
780 emitStateSP
->continueOp
= REOP_ASSERTNOTTEST
;
782 assert((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
789 if (t
->u
.range
.min
== 0 && t
->u
.range
.max
== (UINT
)-1) {
790 pc
[-1] = (t
->u
.range
.greedy
) ? REOP_STAR
: REOP_MINIMALSTAR
;
791 } else if (t
->u
.range
.min
== 0 && t
->u
.range
.max
== 1) {
792 pc
[-1] = (t
->u
.range
.greedy
) ? REOP_OPT
: REOP_MINIMALOPT
;
793 } else if (t
->u
.range
.min
== 1 && t
->u
.range
.max
== (UINT
) -1) {
794 pc
[-1] = (t
->u
.range
.greedy
) ? REOP_PLUS
: REOP_MINIMALPLUS
;
796 if (!t
->u
.range
.greedy
)
797 pc
[-1] = REOP_MINIMALQUANT
;
798 pc
= WriteCompactIndex(pc
, t
->u
.range
.min
);
800 * Write max + 1 to avoid using size_t(max) + 1 bytes
801 * for (UINT)-1 sentinel.
803 pc
= WriteCompactIndex(pc
, t
->u
.range
.max
+ 1);
805 emitStateSP
->nextTermFixup
= pc
;
807 emitStateSP
->continueNode
= t
;
808 emitStateSP
->continueOp
= REOP_ENDCHILD
;
810 assert((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
816 if (!SetForwardJumpOffset(emitStateSP
->nextTermFixup
, pc
))
821 if (!t
->u
.ucclass
.sense
)
822 pc
[-1] = REOP_NCLASS
;
823 pc
= WriteCompactIndex(pc
, t
->u
.ucclass
.index
);
824 charSet
= &re
->classList
[t
->u
.ucclass
.index
];
825 charSet
->converted
= FALSE
;
826 charSet
->length
= t
->u
.ucclass
.bmsize
;
827 charSet
->u
.src
.startIndex
= t
->u
.ucclass
.startIndex
;
828 charSet
->u
.src
.length
= t
->u
.ucclass
.kidlen
;
829 charSet
->sense
= t
->u
.ucclass
.sense
;
840 if (emitStateSP
== emitStateStack
)
843 t
= emitStateSP
->continueNode
;
844 op
= (REOp
) emitStateSP
->continueOp
;
849 heap_free(emitStateStack
);
853 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_REGEXP_TOO_COMPLEX
);
859 * Process the op against the two top operands, reducing them to a single
860 * operand in the penultimate slot. Update progLength and treeDepth.
863 ProcessOp(CompilerState
*state
, REOpData
*opData
, RENode
**operandStack
,
868 switch (opData
->op
) {
870 result
= NewRENode(state
, REOP_ALT
);
873 result
->kid
= operandStack
[operandSP
- 2];
874 result
->u
.kid2
= operandStack
[operandSP
- 1];
875 operandStack
[operandSP
- 2] = result
;
877 if (state
->treeDepth
== TREE_DEPTH_MAX
) {
878 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_REGEXP_TOO_COMPLEX
);
884 * Look at both alternates to see if there's a FLAT or a CLASS at
885 * the start of each. If so, use a prerequisite match.
887 if (((RENode
*) result
->kid
)->op
== REOP_FLAT
&&
888 ((RENode
*) result
->u
.kid2
)->op
== REOP_FLAT
&&
889 (state
->flags
& JSREG_FOLD
) == 0) {
890 result
->op
= REOP_ALTPREREQ
;
891 result
->u
.altprereq
.ch1
= ((RENode
*) result
->kid
)->u
.flat
.chr
;
892 result
->u
.altprereq
.ch2
= ((RENode
*) result
->u
.kid2
)->u
.flat
.chr
;
893 /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
894 JUMP, <end> ... ENDALT */
895 state
->progLength
+= 13;
898 if (((RENode
*) result
->kid
)->op
== REOP_CLASS
&&
899 ((RENode
*) result
->kid
)->u
.ucclass
.index
< 256 &&
900 ((RENode
*) result
->u
.kid2
)->op
== REOP_FLAT
&&
901 (state
->flags
& JSREG_FOLD
) == 0) {
902 result
->op
= REOP_ALTPREREQ2
;
903 result
->u
.altprereq
.ch1
= ((RENode
*) result
->u
.kid2
)->u
.flat
.chr
;
904 result
->u
.altprereq
.ch2
= ((RENode
*) result
->kid
)->u
.ucclass
.index
;
905 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
906 JUMP, <end> ... ENDALT */
907 state
->progLength
+= 13;
910 if (((RENode
*) result
->kid
)->op
== REOP_FLAT
&&
911 ((RENode
*) result
->u
.kid2
)->op
== REOP_CLASS
&&
912 ((RENode
*) result
->u
.kid2
)->u
.ucclass
.index
< 256 &&
913 (state
->flags
& JSREG_FOLD
) == 0) {
914 result
->op
= REOP_ALTPREREQ2
;
915 result
->u
.altprereq
.ch1
= ((RENode
*) result
->kid
)->u
.flat
.chr
;
916 result
->u
.altprereq
.ch2
=
917 ((RENode
*) result
->u
.kid2
)->u
.ucclass
.index
;
918 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
919 JUMP, <end> ... ENDALT */
920 state
->progLength
+= 13;
923 /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
924 state
->progLength
+= 7;
929 result
= operandStack
[operandSP
- 2];
931 result
= result
->next
;
932 result
->next
= operandStack
[operandSP
- 1];
936 case REOP_ASSERT_NOT
:
939 /* These should have been processed by a close paren. */
940 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
, JSMSG_MISSING_PAREN
,
950 * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
951 * its being on the stack, and to propagate errors to its callers.
953 #define JSREG_FIND_PAREN_COUNT 0x8000
954 #define JSREG_FIND_PAREN_ERROR 0x4000
957 * Magic return value from FindParenCount and GetDecimalValue, to indicate
958 * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
959 * its findMax parameter is non-null.
961 #define OVERFLOW_VALUE ((UINT)-1)
964 FindParenCount(CompilerState
*state
)
969 if (state
->flags
& JSREG_FIND_PAREN_COUNT
)
970 return OVERFLOW_VALUE
;
973 * Copy state into temp, flag it so we never report an invalid backref,
974 * and reset its members to parse the entire regexp. This is obviously
975 * suboptimal, but GetDecimalValue calls us only if a backref appears to
976 * refer to a forward parenthetical, which is rare.
979 temp
.flags
|= JSREG_FIND_PAREN_COUNT
;
980 temp
.cp
= temp
.cpbegin
;
985 temp
.classBitmapsMem
= 0;
986 for (i
= 0; i
< CLASS_CACHE_SIZE
; i
++)
987 temp
.classCache
[i
].start
= NULL
;
989 if (!ParseRegExp(&temp
)) {
990 state
->flags
|= JSREG_FIND_PAREN_ERROR
;
991 return OVERFLOW_VALUE
;
993 return temp
.parenCount
;
997 * Extract and return a decimal value at state->cp. The initial character c
998 * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
999 * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
1000 * state->flags to discover whether an error occurred under findMax.
1003 GetDecimalValue(WCHAR c
, UINT max
, UINT (*findMax
)(CompilerState
*state
),
1004 CompilerState
*state
)
1006 UINT value
= JS7_UNDEC(c
);
1007 BOOL overflow
= (value
> max
&& (!findMax
|| value
> findMax(state
)));
1009 /* The following restriction allows simpler overflow checks. */
1010 assert(max
<= ((UINT
)-1 - 9) / 10);
1011 while (state
->cp
< state
->cpend
) {
1015 value
= 10 * value
+ JS7_UNDEC(c
);
1016 if (!overflow
&& value
> max
&& (!findMax
|| value
> findMax(state
)))
1020 return overflow
? OVERFLOW_VALUE
: value
;
1024 * Calculate the total size of the bitmap required for a class expression.
1027 CalculateBitmapSize(CompilerState
*state
, RENode
*target
, const WCHAR
*src
,
1031 BOOL inRange
= FALSE
;
1032 WCHAR c
, rangeStart
= 0;
1033 UINT n
, digit
, nDigits
, i
;
1035 target
->u
.ucclass
.bmsize
= 0;
1036 target
->u
.ucclass
.sense
= TRUE
;
1043 target
->u
.ucclass
.sense
= FALSE
;
1046 while (src
!= end
) {
1047 BOOL canStartRange
= TRUE
;
1074 if (src
< end
&& RE_IS_LETTER(*src
)) {
1075 localMax
= (UINT
) (*src
++) & 0x1F;
1088 for (i
= 0; (i
< nDigits
) && (src
< end
); i
++) {
1090 if (!isASCIIHexDigit(c
, &digit
)) {
1092 * Back off to accepting the original
1099 n
= (n
<< 4) | digit
;
1104 canStartRange
= FALSE
;
1106 JS_ReportErrorNumber(state
->context
,
1107 js_GetErrorMessage
, NULL
,
1108 JSMSG_BAD_CLASS_RANGE
);
1118 canStartRange
= FALSE
;
1120 JS_ReportErrorNumber(state
->context
,
1121 js_GetErrorMessage
, NULL
,
1122 JSMSG_BAD_CLASS_RANGE
);
1128 * If this is the start of a range, ensure that it's less than
1142 * This is a non-ECMA extension - decimal escapes (in this
1143 * case, octal!) are supposed to be an error inside class
1144 * ranges, but supported here for backwards compatibility.
1149 if ('0' <= c
&& c
<= '7') {
1151 n
= 8 * n
+ JS7_UNDEC(c
);
1153 if ('0' <= c
&& c
<= '7') {
1155 i
= 8 * n
+ JS7_UNDEC(c
);
1176 /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1177 if (rangeStart
> localMax
) {
1178 JS_ReportErrorNumber(state
->context
,
1179 js_GetErrorMessage
, NULL
,
1180 JSMSG_BAD_CLASS_RANGE
);
1185 if (canStartRange
&& src
< end
- 1) {
1189 rangeStart
= (WCHAR
)localMax
;
1193 if (state
->flags
& JSREG_FOLD
)
1194 rangeStart
= localMax
; /* one run of the uc/dc loop below */
1197 if (state
->flags
& JSREG_FOLD
) {
1198 WCHAR maxch
= localMax
;
1200 for (i
= rangeStart
; i
<= localMax
; i
++) {
1216 target
->u
.ucclass
.bmsize
= max
;
1221 ParseMinMaxQuantifier(CompilerState
*state
, BOOL ignoreValues
)
1225 const WCHAR
*errp
= state
->cp
++;
1230 min
= GetDecimalValue(c
, 0xFFFF, NULL
, state
);
1233 if (!ignoreValues
&& min
== OVERFLOW_VALUE
)
1234 return JSMSG_MIN_TOO_BIG
;
1240 max
= GetDecimalValue(c
, 0xFFFF, NULL
, state
);
1242 if (!ignoreValues
&& max
== OVERFLOW_VALUE
)
1243 return JSMSG_MAX_TOO_BIG
;
1244 if (!ignoreValues
&& min
> max
)
1245 return JSMSG_OUT_OF_ORDER
;
1253 state
->result
= NewRENode(state
, REOP_QUANT
);
1255 return JSMSG_OUT_OF_MEMORY
;
1256 state
->result
->u
.range
.min
= min
;
1257 state
->result
->u
.range
.max
= max
;
1259 * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1260 * where <max> is written as compact(max+1) to make
1261 * (UINT)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1263 state
->progLength
+= (1 + GetCompactIndexWidth(min
)
1264 + GetCompactIndexWidth(max
+ 1)
1275 ParseQuantifier(CompilerState
*state
)
1278 term
= state
->result
;
1279 if (state
->cp
< state
->cpend
) {
1280 switch (*state
->cp
) {
1282 state
->result
= NewRENode(state
, REOP_QUANT
);
1285 state
->result
->u
.range
.min
= 1;
1286 state
->result
->u
.range
.max
= (UINT
)-1;
1287 /* <PLUS>, <next> ... <ENDCHILD> */
1288 state
->progLength
+= 4;
1291 state
->result
= NewRENode(state
, REOP_QUANT
);
1294 state
->result
->u
.range
.min
= 0;
1295 state
->result
->u
.range
.max
= (UINT
)-1;
1296 /* <STAR>, <next> ... <ENDCHILD> */
1297 state
->progLength
+= 4;
1300 state
->result
= NewRENode(state
, REOP_QUANT
);
1303 state
->result
->u
.range
.min
= 0;
1304 state
->result
->u
.range
.max
= 1;
1305 /* <OPT>, <next> ... <ENDCHILD> */
1306 state
->progLength
+= 4;
1308 case '{': /* balance '}' */
1312 err
= ParseMinMaxQuantifier(state
, FALSE
);
1318 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
, err
, errp
);
1327 if (state
->treeDepth
== TREE_DEPTH_MAX
) {
1328 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_REGEXP_TOO_COMPLEX
);
1334 state
->result
->kid
= term
;
1335 if (state
->cp
< state
->cpend
&& *state
->cp
== '?') {
1337 state
->result
->u
.range
.greedy
= FALSE
;
1339 state
->result
->u
.range
.greedy
= TRUE
;
1345 * item: assertion An item is either an assertion or
1346 * quantatom a quantified atom.
1348 * assertion: '^' Assertions match beginning of string
1349 * (or line if the class static property
1350 * RegExp.multiline is true).
1351 * '$' End of string (or line if the class
1352 * static property RegExp.multiline is
1354 * '\b' Word boundary (between \w and \W).
1355 * '\B' Word non-boundary.
1357 * quantatom: atom An unquantified atom.
1358 * quantatom '{' n ',' m '}'
1359 * Atom must occur between n and m times.
1360 * quantatom '{' n ',' '}' Atom must occur at least n times.
1361 * quantatom '{' n '}' Atom must occur exactly n times.
1362 * quantatom '*' Zero or more times (same as {0,}).
1363 * quantatom '+' One or more times (same as {1,}).
1364 * quantatom '?' Zero or one time (same as {0,1}).
1366 * any of which can be optionally followed by '?' for ungreedy
1368 * atom: '(' regexp ')' A parenthesized regexp (what matched
1369 * can be addressed using a backreference,
1371 * '.' Matches any char except '\n'.
1372 * '[' classlist ']' A character class.
1373 * '[' '^' classlist ']' A negated character class.
1375 * '\n' Newline (Line Feed).
1376 * '\r' Carriage Return.
1377 * '\t' Horizontal Tab.
1378 * '\v' Vertical Tab.
1379 * '\d' A digit (same as [0-9]).
1381 * '\w' A word character, [0-9a-z_A-Z].
1382 * '\W' A non-word character.
1383 * '\s' A whitespace character, [ \b\f\n\r\t\v].
1384 * '\S' A non-whitespace character.
1385 * '\' n A backreference to the nth (n decimal
1386 * and positive) parenthesized expression.
1387 * '\' octal An octal escape sequence (octal must be
1388 * two or three digits long, unless it is
1389 * 0 for the null character).
1390 * '\x' hex A hex escape (hex must be two digits).
1391 * '\u' unicode A unicode escape (must be four digits).
1392 * '\c' ctrl A control character, ctrl is a letter.
1393 * '\' literalatomchar Any character except one of the above
1394 * that follow '\' in an atom.
1395 * otheratomchar Any character not first among the other
1396 * atom right-hand sides.
1399 ParseTerm(CompilerState
*state
)
1401 WCHAR c
= *state
->cp
++;
1403 UINT num
, tmp
, n
, i
;
1404 const WCHAR
*termStart
;
1407 /* assertions and atoms */
1409 state
->result
= NewRENode(state
, REOP_BOL
);
1412 state
->progLength
++;
1415 state
->result
= NewRENode(state
, REOP_EOL
);
1418 state
->progLength
++;
1421 if (state
->cp
>= state
->cpend
) {
1422 /* a trailing '\' is an error */
1423 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_TRAILING_SLASH
);
1428 /* assertion escapes */
1430 state
->result
= NewRENode(state
, REOP_WBDRY
);
1433 state
->progLength
++;
1436 state
->result
= NewRENode(state
, REOP_WNONBDRY
);
1439 state
->progLength
++;
1441 /* Decimal escape */
1443 /* Give a strict warning. See also the note below. */
1444 WARN("non-octal digit in an escape sequence that doesn't match a back-reference\n");
1447 while (state
->cp
< state
->cpend
) {
1449 if (c
< '0' || '7' < c
)
1452 tmp
= 8 * num
+ (UINT
)JS7_UNDEC(c
);
1459 state
->result
= NewRENode(state
, REOP_FLAT
);
1462 state
->result
->u
.flat
.chr
= c
;
1463 state
->result
->u
.flat
.length
= 1;
1464 state
->progLength
+= 3;
1475 termStart
= state
->cp
- 1;
1476 num
= GetDecimalValue(c
, state
->parenCount
, FindParenCount
, state
);
1477 if (state
->flags
& JSREG_FIND_PAREN_ERROR
)
1479 if (num
== OVERFLOW_VALUE
) {
1480 /* Give a strict mode warning. */
1481 WARN("back-reference exceeds number of capturing parentheses\n");
1484 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1485 * error here. However, for compatibility with IE, we treat the
1486 * whole backref as flat if the first character in it is not a
1487 * valid octal character, and as an octal escape otherwise.
1489 state
->cp
= termStart
;
1491 /* Treat this as flat. termStart - 1 is the \. */
1496 /* Treat this as an octal escape. */
1499 assert(1 <= num
&& num
<= 0x10000);
1500 state
->result
= NewRENode(state
, REOP_BACKREF
);
1503 state
->result
->u
.parenIndex
= num
- 1;
1505 += 1 + GetCompactIndexWidth(state
->result
->u
.parenIndex
);
1507 /* Control escape */
1523 /* Control letter */
1525 if (state
->cp
< state
->cpend
&& RE_IS_LETTER(*state
->cp
)) {
1526 c
= (WCHAR
) (*state
->cp
++ & 0x1F);
1528 /* back off to accepting the original '\' as a literal */
1533 /* HexEscapeSequence */
1537 /* UnicodeEscapeSequence */
1542 for (i
= 0; i
< nDigits
&& state
->cp
< state
->cpend
; i
++) {
1545 if (!isASCIIHexDigit(c
, &digit
)) {
1547 * Back off to accepting the original 'u' or 'x' as a
1554 n
= (n
<< 4) | digit
;
1558 /* Character class escapes */
1560 state
->result
= NewRENode(state
, REOP_DIGIT
);
1564 state
->progLength
++;
1567 state
->result
= NewRENode(state
, REOP_NONDIGIT
);
1570 state
->result
= NewRENode(state
, REOP_SPACE
);
1573 state
->result
= NewRENode(state
, REOP_NONSPACE
);
1576 state
->result
= NewRENode(state
, REOP_ALNUM
);
1579 state
->result
= NewRENode(state
, REOP_NONALNUM
);
1581 /* IdentityEscape */
1583 state
->result
= NewRENode(state
, REOP_FLAT
);
1586 state
->result
->u
.flat
.chr
= c
;
1587 state
->result
->u
.flat
.length
= 1;
1588 state
->result
->kid
= (void *) (state
->cp
- 1);
1589 state
->progLength
+= 3;
1594 state
->result
= NewRENode(state
, REOP_CLASS
);
1597 termStart
= state
->cp
;
1598 state
->result
->u
.ucclass
.startIndex
= termStart
- state
->cpbegin
;
1600 if (state
->cp
== state
->cpend
) {
1601 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
,
1602 JSMSG_UNTERM_CLASS
, termStart
);
1606 if (*state
->cp
== '\\') {
1608 if (state
->cp
!= state
->cpend
)
1612 if (*state
->cp
== ']') {
1613 state
->result
->u
.ucclass
.kidlen
= state
->cp
- termStart
;
1618 for (i
= 0; i
< CLASS_CACHE_SIZE
; i
++) {
1619 if (!state
->classCache
[i
].start
) {
1620 state
->classCache
[i
].start
= termStart
;
1621 state
->classCache
[i
].length
= state
->result
->u
.ucclass
.kidlen
;
1622 state
->classCache
[i
].index
= state
->classCount
;
1625 if (state
->classCache
[i
].length
==
1626 state
->result
->u
.ucclass
.kidlen
) {
1627 for (n
= 0; ; n
++) {
1628 if (n
== state
->classCache
[i
].length
) {
1629 state
->result
->u
.ucclass
.index
1630 = state
->classCache
[i
].index
;
1633 if (state
->classCache
[i
].start
[n
] != termStart
[n
])
1638 state
->result
->u
.ucclass
.index
= state
->classCount
++;
1642 * Call CalculateBitmapSize now as we want any errors it finds
1643 * to be reported during the parse phase, not at execution.
1645 if (!CalculateBitmapSize(state
, state
->result
, termStart
, state
->cp
++))
1648 * Update classBitmapsMem with number of bytes to hold bmsize bits,
1649 * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1650 * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1652 n
= (state
->result
->u
.ucclass
.bmsize
>> 3) + 1;
1653 if (n
> CLASS_BITMAPS_MEM_LIMIT
- state
->classBitmapsMem
) {
1654 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_REGEXP_TOO_COMPLEX
);
1657 state
->classBitmapsMem
+= n
;
1658 /* CLASS, <index> */
1660 += 1 + GetCompactIndexWidth(state
->result
->u
.ucclass
.index
);
1664 state
->result
= NewRENode(state
, REOP_DOT
);
1669 const WCHAR
*errp
= state
->cp
--;
1672 err
= ParseMinMaxQuantifier(state
, TRUE
);
1683 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
,
1684 JSMSG_BAD_QUANTIFIER
, state
->cp
- 1);
1688 state
->result
= NewRENode(state
, REOP_FLAT
);
1691 state
->result
->u
.flat
.chr
= c
;
1692 state
->result
->u
.flat
.length
= 1;
1693 state
->result
->kid
= (void *) (state
->cp
- 1);
1694 state
->progLength
+= 3;
1697 return ParseQuantifier(state
);
1701 * Top-down regular expression grammar, based closely on Perl4.
1703 * regexp: altern A regular expression is one or more
1704 * altern '|' regexp alternatives separated by vertical bar.
1706 #define INITIAL_STACK_SIZE 128
1709 ParseRegExp(CompilerState
*state
)
1713 REOpData
*operatorStack
;
1714 RENode
**operandStack
;
1717 BOOL result
= FALSE
;
1719 INT operatorSP
= 0, operatorStackSize
= INITIAL_STACK_SIZE
;
1720 INT operandSP
= 0, operandStackSize
= INITIAL_STACK_SIZE
;
1722 /* Watch out for empty regexp */
1723 if (state
->cp
== state
->cpend
) {
1724 state
->result
= NewRENode(state
, REOP_EMPTY
);
1725 return (state
->result
!= NULL
);
1728 operatorStack
= heap_alloc(sizeof(REOpData
) * operatorStackSize
);
1732 operandStack
= heap_alloc(sizeof(RENode
*) * operandStackSize
);
1737 parenIndex
= state
->parenCount
;
1738 if (state
->cp
== state
->cpend
) {
1740 * If we are at the end of the regexp and we're short one or more
1741 * operands, the regexp must have the form /x|/ or some such, with
1742 * left parentheses making us short more than one operand.
1744 if (operatorSP
>= operandSP
) {
1745 operand
= NewRENode(state
, REOP_EMPTY
);
1751 switch (*state
->cp
) {
1754 if (state
->cp
+ 1 < state
->cpend
&&
1755 *state
->cp
== '?' &&
1756 (state
->cp
[1] == '=' ||
1757 state
->cp
[1] == '!' ||
1758 state
->cp
[1] == ':')) {
1759 switch (state
->cp
[1]) {
1762 /* ASSERT, <next>, ... ASSERTTEST */
1763 state
->progLength
+= 4;
1766 op
= REOP_ASSERT_NOT
;
1767 /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
1768 state
->progLength
+= 4;
1771 op
= REOP_LPARENNON
;
1777 /* LPAREN, <index>, ... RPAREN, <index> */
1779 += 2 * (1 + GetCompactIndexWidth(parenIndex
));
1780 state
->parenCount
++;
1781 if (state
->parenCount
== 65535) {
1782 ReportRegExpError(state
, JSREPORT_ERROR
,
1783 JSMSG_TOO_MANY_PARENS
);
1791 * If there's no stacked open parenthesis, throw syntax error.
1793 for (i
= operatorSP
- 1; ; i
--) {
1795 ReportRegExpError(state
, JSREPORT_ERROR
,
1796 JSMSG_UNMATCHED_RIGHT_PAREN
);
1799 if (operatorStack
[i
].op
== REOP_ASSERT
||
1800 operatorStack
[i
].op
== REOP_ASSERT_NOT
||
1801 operatorStack
[i
].op
== REOP_LPARENNON
||
1802 operatorStack
[i
].op
== REOP_LPAREN
) {
1809 /* Expected an operand before these, so make an empty one */
1810 operand
= NewRENode(state
, REOP_EMPTY
);
1816 if (!ParseTerm(state
))
1818 operand
= state
->result
;
1820 if (operandSP
== operandStackSize
) {
1822 operandStackSize
+= operandStackSize
;
1823 tmp
= heap_realloc(operandStack
, sizeof(RENode
*) * operandStackSize
);
1828 operandStack
[operandSP
++] = operand
;
1833 /* At the end; process remaining operators. */
1835 if (state
->cp
== state
->cpend
) {
1836 while (operatorSP
) {
1838 if (!ProcessOp(state
, &operatorStack
[operatorSP
],
1839 operandStack
, operandSP
))
1843 assert(operandSP
== 1);
1844 state
->result
= operandStack
[0];
1849 switch (*state
->cp
) {
1851 /* Process any stacked 'concat' operators */
1853 while (operatorSP
&&
1854 operatorStack
[operatorSP
- 1].op
== REOP_CONCAT
) {
1856 if (!ProcessOp(state
, &operatorStack
[operatorSP
],
1857 operandStack
, operandSP
)) {
1867 * If there's no stacked open parenthesis, throw syntax error.
1869 for (i
= operatorSP
- 1; ; i
--) {
1871 ReportRegExpError(state
, JSREPORT_ERROR
,
1872 JSMSG_UNMATCHED_RIGHT_PAREN
);
1875 if (operatorStack
[i
].op
== REOP_ASSERT
||
1876 operatorStack
[i
].op
== REOP_ASSERT_NOT
||
1877 operatorStack
[i
].op
== REOP_LPARENNON
||
1878 operatorStack
[i
].op
== REOP_LPAREN
) {
1884 /* Process everything on the stack until the open parenthesis. */
1888 switch (operatorStack
[operatorSP
].op
) {
1890 case REOP_ASSERT_NOT
:
1892 operand
= NewRENode(state
, operatorStack
[operatorSP
].op
);
1895 operand
->u
.parenIndex
=
1896 operatorStack
[operatorSP
].parenIndex
;
1898 operand
->kid
= operandStack
[operandSP
- 1];
1899 operandStack
[operandSP
- 1] = operand
;
1900 if (state
->treeDepth
== TREE_DEPTH_MAX
) {
1901 ReportRegExpError(state
, JSREPORT_ERROR
,
1902 JSMSG_REGEXP_TOO_COMPLEX
);
1908 case REOP_LPARENNON
:
1909 state
->result
= operandStack
[operandSP
- 1];
1910 if (!ParseQuantifier(state
))
1912 operandStack
[operandSP
- 1] = state
->result
;
1913 goto restartOperator
;
1915 if (!ProcessOp(state
, &operatorStack
[operatorSP
],
1916 operandStack
, operandSP
))
1926 const WCHAR
*errp
= state
->cp
;
1928 if (ParseMinMaxQuantifier(state
, TRUE
) < 0) {
1930 * This didn't even scan correctly as a quantifier, so we should
1944 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
, JSMSG_BAD_QUANTIFIER
,
1950 /* Anything else is the start of the next term. */
1953 if (operatorSP
== operatorStackSize
) {
1955 operatorStackSize
+= operatorStackSize
;
1956 tmp
= heap_realloc(operatorStack
, sizeof(REOpData
) * operatorStackSize
);
1959 operatorStack
= tmp
;
1961 operatorStack
[operatorSP
].op
= op
;
1962 operatorStack
[operatorSP
].errPos
= state
->cp
;
1963 operatorStack
[operatorSP
++].parenIndex
= parenIndex
;
1968 heap_free(operatorStack
);
1969 heap_free(operandStack
);
1974 * Save the current state of the match - the position in the input
1975 * text as well as the position in the bytecode. The state of any
1976 * parent expressions is also saved (preceding state).
1977 * Contents of parenCount parentheses from parenIndex are also saved.
1979 static REBackTrackData
*
1980 PushBackTrackState(REGlobalData
*gData
, REOp op
,
1981 jsbytecode
*target
, REMatchState
*x
, const WCHAR
*cp
,
1982 size_t parenIndex
, size_t parenCount
)
1985 REBackTrackData
*result
=
1986 (REBackTrackData
*) ((char *)gData
->backTrackSP
+ gData
->cursz
);
1988 size_t sz
= sizeof(REBackTrackData
) +
1989 gData
->stateStackTop
* sizeof(REProgState
) +
1990 parenCount
* sizeof(RECapture
);
1992 ptrdiff_t btsize
= gData
->backTrackStackSize
;
1993 ptrdiff_t btincr
= ((char *)result
+ sz
) -
1994 ((char *)gData
->backTrackStack
+ btsize
);
1996 TRACE("\tBT_Push: %lu,%lu\n", (ULONG_PTR
)parenIndex
, (ULONG_PTR
)parenCount
);
1998 JS_COUNT_OPERATION(gData
->cx
, JSOW_JUMP
* (1 + parenCount
));
2000 ptrdiff_t offset
= (char *)result
- (char *)gData
->backTrackStack
;
2002 JS_COUNT_OPERATION(gData
->cx
, JSOW_ALLOCATION
);
2003 btincr
= ((btincr
+btsize
-1)/btsize
)*btsize
;
2004 gData
->backTrackStack
= jsheap_grow(gData
->pool
, gData
->backTrackStack
, btsize
, btincr
);
2005 if (!gData
->backTrackStack
) {
2006 js_ReportOutOfScriptQuota(gData
->cx
);
2010 gData
->backTrackStackSize
= btsize
+ btincr
;
2011 result
= (REBackTrackData
*) ((char *)gData
->backTrackStack
+ offset
);
2013 gData
->backTrackSP
= result
;
2014 result
->sz
= gData
->cursz
;
2017 result
->backtrack_op
= op
;
2018 result
->backtrack_pc
= target
;
2020 result
->parenCount
= parenCount
;
2021 result
->parenIndex
= parenIndex
;
2023 result
->saveStateStackTop
= gData
->stateStackTop
;
2024 assert(gData
->stateStackTop
);
2025 memcpy(result
+ 1, gData
->stateStack
,
2026 sizeof(REProgState
) * result
->saveStateStackTop
);
2028 if (parenCount
!= 0) {
2029 memcpy((char *)(result
+ 1) +
2030 sizeof(REProgState
) * result
->saveStateStackTop
,
2031 &x
->parens
[parenIndex
],
2032 sizeof(RECapture
) * parenCount
);
2033 for (i
= 0; i
!= parenCount
; i
++)
2034 x
->parens
[parenIndex
+ i
].index
= -1;
2040 static inline REMatchState
*
2041 FlatNIMatcher(REGlobalData
*gData
, REMatchState
*x
, WCHAR
*matchChars
,
2045 assert(gData
->cpend
>= x
->cp
);
2046 if (length
> (size_t)(gData
->cpend
- x
->cp
))
2048 for (i
= 0; i
!= length
; i
++) {
2049 if (toupperW(matchChars
[i
]) != toupperW(x
->cp
[i
]))
2057 * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2058 * 2. If E is not a character then go to step 6.
2059 * 3. Let ch be E's character.
2060 * 4. Let A be a one-element RECharSet containing the character ch.
2061 * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2062 * 6. E must be an integer. Let n be that integer.
2063 * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2064 * 8. Return an internal Matcher closure that takes two arguments, a State x
2065 * and a Continuation c, and performs the following:
2066 * 1. Let cap be x's captures internal array.
2067 * 2. Let s be cap[n].
2068 * 3. If s is undefined, then call c(x) and return its result.
2069 * 4. Let e be x's endIndex.
2070 * 5. Let len be s's length.
2071 * 6. Let f be e+len.
2072 * 7. If f>InputLength, return failure.
2073 * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2074 * such that Canonicalize(s[i]) is not the same character as
2075 * Canonicalize(Input [e+i]), then return failure.
2076 * 9. Let y be the State (f, cap).
2077 * 10. Call c(y) and return its result.
2079 static REMatchState
*
2080 BackrefMatcher(REGlobalData
*gData
, REMatchState
*x
, size_t parenIndex
)
2083 const WCHAR
*parenContent
;
2084 RECapture
*cap
= &x
->parens
[parenIndex
];
2086 if (cap
->index
== -1)
2090 if (x
->cp
+ len
> gData
->cpend
)
2093 parenContent
= &gData
->cpbegin
[cap
->index
];
2094 if (gData
->regexp
->flags
& JSREG_FOLD
) {
2095 for (i
= 0; i
< len
; i
++) {
2096 if (toupperW(parenContent
[i
]) != toupperW(x
->cp
[i
]))
2100 for (i
= 0; i
< len
; i
++) {
2101 if (parenContent
[i
] != x
->cp
[i
])
2109 /* Add a single character to the RECharSet */
2111 AddCharacterToCharSet(RECharSet
*cs
, WCHAR c
)
2113 UINT byteIndex
= (UINT
)(c
>> 3);
2114 assert(c
<= cs
->length
);
2115 cs
->u
.bits
[byteIndex
] |= 1 << (c
& 0x7);
2119 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2121 AddCharacterRangeToCharSet(RECharSet
*cs
, UINT c1
, UINT c2
)
2125 UINT byteIndex1
= c1
>> 3;
2126 UINT byteIndex2
= c2
>> 3;
2128 assert(c2
<= cs
->length
&& c1
<= c2
);
2133 if (byteIndex1
== byteIndex2
) {
2134 cs
->u
.bits
[byteIndex1
] |= ((BYTE
)0xFF >> (7 - (c2
- c1
))) << c1
;
2136 cs
->u
.bits
[byteIndex1
] |= 0xFF << c1
;
2137 for (i
= byteIndex1
+ 1; i
< byteIndex2
; i
++)
2138 cs
->u
.bits
[i
] = 0xFF;
2139 cs
->u
.bits
[byteIndex2
] |= (BYTE
)0xFF >> (7 - c2
);
2143 /* Compile the source of the class into a RECharSet */
2145 ProcessCharSet(REGlobalData
*gData
, RECharSet
*charSet
)
2147 const WCHAR
*src
, *end
;
2148 BOOL inRange
= FALSE
;
2149 WCHAR rangeStart
= 0;
2154 assert(!charSet
->converted
);
2156 * Assert that startIndex and length points to chars inside [] inside
2159 assert(1 <= charSet
->u
.src
.startIndex
);
2160 assert(charSet
->u
.src
.startIndex
2161 < SysStringLen(gData
->regexp
->source
));
2162 assert(charSet
->u
.src
.length
<= SysStringLen(gData
->regexp
->source
)
2163 - 1 - charSet
->u
.src
.startIndex
);
2165 charSet
->converted
= TRUE
;
2166 src
= gData
->regexp
->source
+ charSet
->u
.src
.startIndex
;
2168 end
= src
+ charSet
->u
.src
.length
;
2170 assert(src
[-1] == '[' && end
[0] == ']');
2172 byteLength
= (charSet
->length
>> 3) + 1;
2173 charSet
->u
.bits
= heap_alloc(byteLength
);
2174 if (!charSet
->u
.bits
) {
2175 JS_ReportOutOfMemory(gData
->cx
);
2179 memset(charSet
->u
.bits
, 0, byteLength
);
2185 assert(charSet
->sense
== FALSE
);
2188 assert(charSet
->sense
== TRUE
);
2191 while (src
!= end
) {
2216 if (src
< end
&& JS_ISWORD(*src
)) {
2217 thisCh
= (WCHAR
)(*src
++ & 0x1F);
2230 for (i
= 0; (i
< nDigits
) && (src
< end
); i
++) {
2233 if (!isASCIIHexDigit(c
, &digit
)) {
2235 * Back off to accepting the original '\'
2242 n
= (n
<< 4) | digit
;
2255 * This is a non-ECMA extension - decimal escapes (in this
2256 * case, octal!) are supposed to be an error inside class
2257 * ranges, but supported here for backwards compatibility.
2261 if ('0' <= c
&& c
<= '7') {
2263 n
= 8 * n
+ JS7_UNDEC(c
);
2265 if ('0' <= c
&& c
<= '7') {
2267 i
= 8 * n
+ JS7_UNDEC(c
);
2278 AddCharacterRangeToCharSet(charSet
, '0', '9');
2279 continue; /* don't need range processing */
2281 AddCharacterRangeToCharSet(charSet
, 0, '0' - 1);
2282 AddCharacterRangeToCharSet(charSet
,
2284 (WCHAR
)charSet
->length
);
2287 for (i
= (INT
)charSet
->length
; i
>= 0; i
--)
2289 AddCharacterToCharSet(charSet
, (WCHAR
)i
);
2292 for (i
= (INT
)charSet
->length
; i
>= 0; i
--)
2294 AddCharacterToCharSet(charSet
, (WCHAR
)i
);
2297 for (i
= (INT
)charSet
->length
; i
>= 0; i
--)
2299 AddCharacterToCharSet(charSet
, (WCHAR
)i
);
2302 for (i
= (INT
)charSet
->length
; i
>= 0; i
--)
2304 AddCharacterToCharSet(charSet
, (WCHAR
)i
);
2319 if (gData
->regexp
->flags
& JSREG_FOLD
) {
2320 assert(rangeStart
<= thisCh
);
2321 for (i
= rangeStart
; i
<= thisCh
; i
++) {
2324 AddCharacterToCharSet(charSet
, i
);
2328 AddCharacterToCharSet(charSet
, uch
);
2330 AddCharacterToCharSet(charSet
, dch
);
2333 AddCharacterRangeToCharSet(charSet
, rangeStart
, thisCh
);
2337 if (gData
->regexp
->flags
& JSREG_FOLD
) {
2338 AddCharacterToCharSet(charSet
, toupperW(thisCh
));
2339 AddCharacterToCharSet(charSet
, tolowerW(thisCh
));
2341 AddCharacterToCharSet(charSet
, thisCh
);
2343 if (src
< end
- 1) {
2347 rangeStart
= thisCh
;
2356 ReallocStateStack(REGlobalData
*gData
)
2358 size_t limit
= gData
->stateStackLimit
;
2359 size_t sz
= sizeof(REProgState
) * limit
;
2361 gData
->stateStack
= jsheap_grow(gData
->pool
, gData
->stateStack
, sz
, sz
);
2362 if (!gData
->stateStack
) {
2363 js_ReportOutOfScriptQuota(gData
->cx
);
2367 gData
->stateStackLimit
= limit
+ limit
;
2371 #define PUSH_STATE_STACK(data) \
2373 ++(data)->stateStackTop; \
2374 if ((data)->stateStackTop == (data)->stateStackLimit && \
2375 !ReallocStateStack((data))) { \
2381 * Apply the current op against the given input to see if it's going to match
2382 * or fail. Return false if we don't get a match, true if we do. If updatecp is
2383 * true, then update the current state's cp. Always update startpc to the next
2386 static inline REMatchState
*
2387 SimpleMatch(REGlobalData
*gData
, REMatchState
*x
, REOp op
,
2388 jsbytecode
**startpc
, BOOL updatecp
)
2390 REMatchState
*result
= NULL
;
2393 size_t offset
, length
, index
;
2394 jsbytecode
*pc
= *startpc
; /* pc has already been incremented past op */
2396 const WCHAR
*startcp
= x
->cp
;
2400 const char *opname
= reop_names
[op
];
2401 TRACE("\n%06d: %*s%s\n", (int)(pc
- gData
->regexp
->program
),
2402 (int)gData
->stateStackTop
* 2, "", opname
);
2409 if (x
->cp
!= gData
->cpbegin
) {
2410 if (/*!gData->cx->regExpStatics.multiline && FIXME !!! */
2411 !(gData
->regexp
->flags
& JSREG_MULTILINE
)) {
2414 if (!RE_IS_LINE_TERM(x
->cp
[-1]))
2420 if (x
->cp
!= gData
->cpend
) {
2421 if (/*!gData->cx->regExpStatics.multiline &&*/
2422 !(gData
->regexp
->flags
& JSREG_MULTILINE
)) {
2425 if (!RE_IS_LINE_TERM(*x
->cp
))
2431 if ((x
->cp
== gData
->cpbegin
|| !JS_ISWORD(x
->cp
[-1])) ^
2432 !(x
->cp
!= gData
->cpend
&& JS_ISWORD(*x
->cp
))) {
2437 if ((x
->cp
== gData
->cpbegin
|| !JS_ISWORD(x
->cp
[-1])) ^
2438 (x
->cp
!= gData
->cpend
&& JS_ISWORD(*x
->cp
))) {
2443 if (x
->cp
!= gData
->cpend
&& !RE_IS_LINE_TERM(*x
->cp
)) {
2449 if (x
->cp
!= gData
->cpend
&& JS7_ISDEC(*x
->cp
)) {
2455 if (x
->cp
!= gData
->cpend
&& !JS7_ISDEC(*x
->cp
)) {
2461 if (x
->cp
!= gData
->cpend
&& JS_ISWORD(*x
->cp
)) {
2467 if (x
->cp
!= gData
->cpend
&& !JS_ISWORD(*x
->cp
)) {
2473 if (x
->cp
!= gData
->cpend
&& isspaceW(*x
->cp
)) {
2479 if (x
->cp
!= gData
->cpend
&& !isspaceW(*x
->cp
)) {
2485 pc
= ReadCompactIndex(pc
, &parenIndex
);
2486 assert(parenIndex
< gData
->regexp
->parenCount
);
2487 result
= BackrefMatcher(gData
, x
, parenIndex
);
2490 pc
= ReadCompactIndex(pc
, &offset
);
2491 assert(offset
< SysStringLen(gData
->regexp
->source
));
2492 pc
= ReadCompactIndex(pc
, &length
);
2493 assert(1 <= length
);
2494 assert(length
<= SysStringLen(gData
->regexp
->source
) - offset
);
2495 if (length
<= (size_t)(gData
->cpend
- x
->cp
)) {
2496 source
= gData
->regexp
->source
+ offset
;
2497 TRACE("%s\n", debugstr_wn(source
, length
));
2498 for (index
= 0; index
!= length
; index
++) {
2499 if (source
[index
] != x
->cp
[index
])
2508 TRACE(" '%c' == '%c'\n", (char)matchCh
, (char)*x
->cp
);
2509 if (x
->cp
!= gData
->cpend
&& *x
->cp
== matchCh
) {
2515 pc
= ReadCompactIndex(pc
, &offset
);
2516 assert(offset
< SysStringLen(gData
->regexp
->source
));
2517 pc
= ReadCompactIndex(pc
, &length
);
2518 assert(1 <= length
);
2519 assert(length
<= SysStringLen(gData
->regexp
->source
) - offset
);
2520 source
= gData
->regexp
->source
;
2521 result
= FlatNIMatcher(gData
, x
, source
+ offset
, length
);
2525 if (x
->cp
!= gData
->cpend
&& toupperW(*x
->cp
) == toupperW(matchCh
)) {
2531 matchCh
= GET_ARG(pc
);
2532 TRACE(" '%c' == '%c'\n", (char)matchCh
, (char)*x
->cp
);
2534 if (x
->cp
!= gData
->cpend
&& *x
->cp
== matchCh
) {
2540 matchCh
= GET_ARG(pc
);
2542 if (x
->cp
!= gData
->cpend
&& toupperW(*x
->cp
) == toupperW(matchCh
)) {
2548 pc
= ReadCompactIndex(pc
, &index
);
2549 assert(index
< gData
->regexp
->classCount
);
2550 if (x
->cp
!= gData
->cpend
) {
2551 charSet
= &gData
->regexp
->classList
[index
];
2552 assert(charSet
->converted
);
2555 if (charSet
->length
!= 0 &&
2556 ch
<= charSet
->length
&&
2557 (charSet
->u
.bits
[index
] & (1 << (ch
& 0x7)))) {
2564 pc
= ReadCompactIndex(pc
, &index
);
2565 assert(index
< gData
->regexp
->classCount
);
2566 if (x
->cp
!= gData
->cpend
) {
2567 charSet
= &gData
->regexp
->classList
[index
];
2568 assert(charSet
->converted
);
2571 if (charSet
->length
== 0 ||
2572 ch
> charSet
->length
||
2573 !(charSet
->u
.bits
[index
] & (1 << (ch
& 0x7)))) {
2594 static inline REMatchState
*
2595 ExecuteREBytecode(REGlobalData
*gData
, REMatchState
*x
)
2597 REMatchState
*result
= NULL
;
2598 REBackTrackData
*backTrackData
;
2599 jsbytecode
*nextpc
, *testpc
;
2602 REProgState
*curState
;
2603 const WCHAR
*startcp
;
2604 size_t parenIndex
, k
;
2605 size_t parenSoFar
= 0;
2607 WCHAR matchCh1
, matchCh2
;
2611 jsbytecode
*pc
= gData
->regexp
->program
;
2612 REOp op
= (REOp
) *pc
++;
2615 * If the first node is a simple match, step the index into the string
2616 * until that match is made, or fail if it can't be found at all.
2618 if (REOP_IS_SIMPLE(op
) && !(gData
->regexp
->flags
& JSREG_STICKY
)) {
2620 while (x
->cp
<= gData
->cpend
) {
2621 nextpc
= pc
; /* reset back to start each time */
2622 result
= SimpleMatch(gData
, x
, op
, &nextpc
, TRUE
);
2626 pc
= nextpc
; /* accept skip to next opcode */
2628 assert(op
< REOP_LIMIT
);
2639 const char *opname
= reop_names
[op
];
2640 TRACE("\n%06d: %*s%s\n", (int)(pc
- gData
->regexp
->program
),
2641 (int)gData
->stateStackTop
* 2, "", opname
);
2643 if (REOP_IS_SIMPLE(op
)) {
2644 result
= SimpleMatch(gData
, x
, op
, &pc
, TRUE
);
2646 curState
= &gData
->stateStack
[gData
->stateStackTop
];
2650 case REOP_ALTPREREQ2
:
2651 nextpc
= pc
+ GET_OFFSET(pc
); /* start of next op */
2653 matchCh2
= GET_ARG(pc
);
2658 if (x
->cp
!= gData
->cpend
) {
2659 if (*x
->cp
== matchCh2
)
2662 charSet
= &gData
->regexp
->classList
[k
];
2663 if (!charSet
->converted
&& !ProcessCharSet(gData
, charSet
))
2667 if ((charSet
->length
== 0 ||
2668 matchCh1
> charSet
->length
||
2669 !(charSet
->u
.bits
[k
] & (1 << (matchCh1
& 0x7)))) ^
2677 case REOP_ALTPREREQ
:
2678 nextpc
= pc
+ GET_OFFSET(pc
); /* start of next op */
2680 matchCh1
= GET_ARG(pc
);
2682 matchCh2
= GET_ARG(pc
);
2684 if (x
->cp
== gData
->cpend
||
2685 (*x
->cp
!= matchCh1
&& *x
->cp
!= matchCh2
)) {
2689 /* else fall through... */
2693 nextpc
= pc
+ GET_OFFSET(pc
); /* start of next alternate */
2694 pc
+= ARG_LEN
; /* start of this alternate */
2695 curState
->parenSoFar
= parenSoFar
;
2696 PUSH_STATE_STACK(gData
);
2699 if (REOP_IS_SIMPLE(op
)) {
2700 if (!SimpleMatch(gData
, x
, op
, &pc
, TRUE
)) {
2701 op
= (REOp
) *nextpc
++;
2708 nextop
= (REOp
) *nextpc
++;
2709 if (!PushBackTrackState(gData
, nextop
, nextpc
, x
, startcp
, 0, 0))
2714 * Occurs at (successful) end of REOP_ALT,
2718 * If we have not gotten a result here, it is because of an
2719 * empty match. Do the same thing REOP_EMPTY would do.
2724 --gData
->stateStackTop
;
2725 pc
+= GET_OFFSET(pc
);
2730 * Occurs at last (successful) end of REOP_ALT,
2734 * If we have not gotten a result here, it is because of an
2735 * empty match. Do the same thing REOP_EMPTY would do.
2740 --gData
->stateStackTop
;
2745 pc
= ReadCompactIndex(pc
, &parenIndex
);
2746 TRACE("[ %lu ]\n", (ULONG_PTR
)parenIndex
);
2747 assert(parenIndex
< gData
->regexp
->parenCount
);
2748 if (parenIndex
+ 1 > parenSoFar
)
2749 parenSoFar
= parenIndex
+ 1;
2750 x
->parens
[parenIndex
].index
= x
->cp
- gData
->cpbegin
;
2751 x
->parens
[parenIndex
].length
= 0;
2759 pc
= ReadCompactIndex(pc
, &parenIndex
);
2760 assert(parenIndex
< gData
->regexp
->parenCount
);
2761 cap
= &x
->parens
[parenIndex
];
2762 delta
= x
->cp
- (gData
->cpbegin
+ cap
->index
);
2763 cap
->length
= (delta
< 0) ? 0 : (size_t) delta
;
2771 nextpc
= pc
+ GET_OFFSET(pc
); /* start of term after ASSERT */
2772 pc
+= ARG_LEN
; /* start of ASSERT child */
2775 if (REOP_IS_SIMPLE(op
) &&
2776 !SimpleMatch(gData
, x
, op
, &testpc
, FALSE
)) {
2780 curState
->u
.assertion
.top
=
2781 (char *)gData
->backTrackSP
- (char *)gData
->backTrackStack
;
2782 curState
->u
.assertion
.sz
= gData
->cursz
;
2783 curState
->index
= x
->cp
- gData
->cpbegin
;
2784 curState
->parenSoFar
= parenSoFar
;
2785 PUSH_STATE_STACK(gData
);
2786 if (!PushBackTrackState(gData
, REOP_ASSERTTEST
,
2787 nextpc
, x
, x
->cp
, 0, 0)) {
2792 case REOP_ASSERT_NOT
:
2793 nextpc
= pc
+ GET_OFFSET(pc
);
2797 if (REOP_IS_SIMPLE(op
) /* Note - fail to fail! */ &&
2798 SimpleMatch(gData
, x
, op
, &testpc
, FALSE
) &&
2799 *testpc
== REOP_ASSERTNOTTEST
) {
2803 curState
->u
.assertion
.top
2804 = (char *)gData
->backTrackSP
-
2805 (char *)gData
->backTrackStack
;
2806 curState
->u
.assertion
.sz
= gData
->cursz
;
2807 curState
->index
= x
->cp
- gData
->cpbegin
;
2808 curState
->parenSoFar
= parenSoFar
;
2809 PUSH_STATE_STACK(gData
);
2810 if (!PushBackTrackState(gData
, REOP_ASSERTNOTTEST
,
2811 nextpc
, x
, x
->cp
, 0, 0)) {
2816 case REOP_ASSERTTEST
:
2817 --gData
->stateStackTop
;
2819 x
->cp
= gData
->cpbegin
+ curState
->index
;
2820 gData
->backTrackSP
=
2821 (REBackTrackData
*) ((char *)gData
->backTrackStack
+
2822 curState
->u
.assertion
.top
);
2823 gData
->cursz
= curState
->u
.assertion
.sz
;
2828 case REOP_ASSERTNOTTEST
:
2829 --gData
->stateStackTop
;
2831 x
->cp
= gData
->cpbegin
+ curState
->index
;
2832 gData
->backTrackSP
=
2833 (REBackTrackData
*) ((char *)gData
->backTrackStack
+
2834 curState
->u
.assertion
.top
);
2835 gData
->cursz
= curState
->u
.assertion
.sz
;
2836 result
= (!result
) ? x
: NULL
;
2839 curState
->u
.quantifier
.min
= 0;
2840 curState
->u
.quantifier
.max
= (UINT
)-1;
2843 curState
->u
.quantifier
.min
= 1;
2844 curState
->u
.quantifier
.max
= (UINT
)-1;
2847 curState
->u
.quantifier
.min
= 0;
2848 curState
->u
.quantifier
.max
= 1;
2851 pc
= ReadCompactIndex(pc
, &k
);
2852 curState
->u
.quantifier
.min
= k
;
2853 pc
= ReadCompactIndex(pc
, &k
);
2854 /* max is k - 1 to use one byte for (UINT)-1 sentinel. */
2855 curState
->u
.quantifier
.max
= k
- 1;
2856 assert(curState
->u
.quantifier
.min
<= curState
->u
.quantifier
.max
);
2858 if (curState
->u
.quantifier
.max
== 0) {
2859 pc
= pc
+ GET_OFFSET(pc
);
2864 /* Step over <next> */
2865 nextpc
= pc
+ ARG_LEN
;
2866 op
= (REOp
) *nextpc
++;
2868 if (REOP_IS_SIMPLE(op
)) {
2869 if (!SimpleMatch(gData
, x
, op
, &nextpc
, TRUE
)) {
2870 if (curState
->u
.quantifier
.min
== 0)
2874 pc
= pc
+ GET_OFFSET(pc
);
2877 op
= (REOp
) *nextpc
++;
2880 curState
->index
= startcp
- gData
->cpbegin
;
2881 curState
->continue_op
= REOP_REPEAT
;
2882 curState
->continue_pc
= pc
;
2883 curState
->parenSoFar
= parenSoFar
;
2884 PUSH_STATE_STACK(gData
);
2885 if (curState
->u
.quantifier
.min
== 0 &&
2886 !PushBackTrackState(gData
, REOP_REPEAT
, pc
, x
, startcp
,
2893 case REOP_ENDCHILD
: /* marks the end of a quantifier child */
2894 pc
= curState
[-1].continue_pc
;
2895 op
= (REOp
) curState
[-1].continue_op
;
2904 --gData
->stateStackTop
;
2906 /* Failed, see if we have enough children. */
2907 if (curState
->u
.quantifier
.min
== 0)
2911 if (curState
->u
.quantifier
.min
== 0 &&
2912 x
->cp
== gData
->cpbegin
+ curState
->index
) {
2913 /* matched an empty string, that'll get us nowhere */
2917 if (curState
->u
.quantifier
.min
!= 0)
2918 curState
->u
.quantifier
.min
--;
2919 if (curState
->u
.quantifier
.max
!= (UINT
) -1)
2920 curState
->u
.quantifier
.max
--;
2921 if (curState
->u
.quantifier
.max
== 0)
2923 nextpc
= pc
+ ARG_LEN
;
2924 nextop
= (REOp
) *nextpc
;
2926 if (REOP_IS_SIMPLE(nextop
)) {
2928 if (!SimpleMatch(gData
, x
, nextop
, &nextpc
, TRUE
)) {
2929 if (curState
->u
.quantifier
.min
== 0)
2936 curState
->index
= startcp
- gData
->cpbegin
;
2937 PUSH_STATE_STACK(gData
);
2938 if (curState
->u
.quantifier
.min
== 0 &&
2939 !PushBackTrackState(gData
, REOP_REPEAT
,
2941 curState
->parenSoFar
,
2943 curState
->parenSoFar
)) {
2946 } while (*nextpc
== REOP_ENDCHILD
);
2949 parenSoFar
= curState
->parenSoFar
;
2954 pc
+= GET_OFFSET(pc
);
2957 case REOP_MINIMALSTAR
:
2958 curState
->u
.quantifier
.min
= 0;
2959 curState
->u
.quantifier
.max
= (UINT
)-1;
2960 goto minimalquantcommon
;
2961 case REOP_MINIMALPLUS
:
2962 curState
->u
.quantifier
.min
= 1;
2963 curState
->u
.quantifier
.max
= (UINT
)-1;
2964 goto minimalquantcommon
;
2965 case REOP_MINIMALOPT
:
2966 curState
->u
.quantifier
.min
= 0;
2967 curState
->u
.quantifier
.max
= 1;
2968 goto minimalquantcommon
;
2969 case REOP_MINIMALQUANT
:
2970 pc
= ReadCompactIndex(pc
, &k
);
2971 curState
->u
.quantifier
.min
= k
;
2972 pc
= ReadCompactIndex(pc
, &k
);
2973 /* See REOP_QUANT comments about k - 1. */
2974 curState
->u
.quantifier
.max
= k
- 1;
2975 assert(curState
->u
.quantifier
.min
2976 <= curState
->u
.quantifier
.max
);
2978 curState
->index
= x
->cp
- gData
->cpbegin
;
2979 curState
->parenSoFar
= parenSoFar
;
2980 PUSH_STATE_STACK(gData
);
2981 if (curState
->u
.quantifier
.min
!= 0) {
2982 curState
->continue_op
= REOP_MINIMALREPEAT
;
2983 curState
->continue_pc
= pc
;
2984 /* step over <next> */
2988 if (!PushBackTrackState(gData
, REOP_MINIMALREPEAT
,
2989 pc
, x
, x
->cp
, 0, 0)) {
2992 --gData
->stateStackTop
;
2993 pc
= pc
+ GET_OFFSET(pc
);
2998 case REOP_MINIMALREPEAT
:
2999 --gData
->stateStackTop
;
3002 TRACE("{%d,%d}\n", curState
->u
.quantifier
.min
, curState
->u
.quantifier
.max
);
3003 #define PREPARE_REPEAT() \
3005 curState->index = x->cp - gData->cpbegin; \
3006 curState->continue_op = REOP_MINIMALREPEAT; \
3007 curState->continue_pc = pc; \
3009 for (k = curState->parenSoFar; k < parenSoFar; k++) \
3010 x->parens[k].index = -1; \
3011 PUSH_STATE_STACK(gData); \
3012 op = (REOp) *pc++; \
3013 assert(op < REOP_LIMIT); \
3019 * Non-greedy failure - try to consume another child.
3021 if (curState
->u
.quantifier
.max
== (UINT
) -1 ||
3022 curState
->u
.quantifier
.max
> 0) {
3026 /* Don't need to adjust pc since we're going to pop. */
3029 if (curState
->u
.quantifier
.min
== 0 &&
3030 x
->cp
== gData
->cpbegin
+ curState
->index
) {
3031 /* Matched an empty string, that'll get us nowhere. */
3035 if (curState
->u
.quantifier
.min
!= 0)
3036 curState
->u
.quantifier
.min
--;
3037 if (curState
->u
.quantifier
.max
!= (UINT
) -1)
3038 curState
->u
.quantifier
.max
--;
3039 if (curState
->u
.quantifier
.min
!= 0) {
3043 curState
->index
= x
->cp
- gData
->cpbegin
;
3044 curState
->parenSoFar
= parenSoFar
;
3045 PUSH_STATE_STACK(gData
);
3046 if (!PushBackTrackState(gData
, REOP_MINIMALREPEAT
,
3048 curState
->parenSoFar
,
3049 parenSoFar
- curState
->parenSoFar
)) {
3052 --gData
->stateStackTop
;
3053 pc
= pc
+ GET_OFFSET(pc
);
3055 assert(op
< REOP_LIMIT
);
3065 * If the match failed and there's a backtrack option, take it.
3066 * Otherwise this is a complete and utter failure.
3069 if (gData
->cursz
== 0)
3072 /* Potentially detect explosive regex here. */
3073 gData
->backTrackCount
++;
3074 if (gData
->backTrackLimit
&&
3075 gData
->backTrackCount
>= gData
->backTrackLimit
) {
3076 JS_ReportErrorNumber(gData
->cx
, js_GetErrorMessage
, NULL
,
3077 JSMSG_REGEXP_TOO_COMPLEX
);
3082 backTrackData
= gData
->backTrackSP
;
3083 gData
->cursz
= backTrackData
->sz
;
3084 gData
->backTrackSP
=
3085 (REBackTrackData
*) ((char *)backTrackData
- backTrackData
->sz
);
3086 x
->cp
= backTrackData
->cp
;
3087 pc
= backTrackData
->backtrack_pc
;
3088 op
= (REOp
) backTrackData
->backtrack_op
;
3089 assert(op
< REOP_LIMIT
);
3090 gData
->stateStackTop
= backTrackData
->saveStateStackTop
;
3091 assert(gData
->stateStackTop
);
3093 memcpy(gData
->stateStack
, backTrackData
+ 1,
3094 sizeof(REProgState
) * backTrackData
->saveStateStackTop
);
3095 curState
= &gData
->stateStack
[gData
->stateStackTop
- 1];
3097 if (backTrackData
->parenCount
) {
3098 memcpy(&x
->parens
[backTrackData
->parenIndex
],
3099 (char *)(backTrackData
+ 1) +
3100 sizeof(REProgState
) * backTrackData
->saveStateStackTop
,
3101 sizeof(RECapture
) * backTrackData
->parenCount
);
3102 parenSoFar
= backTrackData
->parenIndex
+ backTrackData
->parenCount
;
3104 for (k
= curState
->parenSoFar
; k
< parenSoFar
; k
++)
3105 x
->parens
[k
].index
= -1;
3106 parenSoFar
= curState
->parenSoFar
;
3109 TRACE("\tBT_Pop: %ld,%ld\n",
3110 (ULONG_PTR
)backTrackData
->parenIndex
,
3111 (ULONG_PTR
)backTrackData
->parenCount
);
3117 * Continue with the expression.
3120 assert(op
< REOP_LIMIT
);
3132 static REMatchState
*MatchRegExp(REGlobalData
*gData
, REMatchState
*x
)
3134 REMatchState
*result
;
3135 const WCHAR
*cp
= x
->cp
;
3140 * Have to include the position beyond the last character
3141 * in order to detect end-of-input/line condition.
3143 for (cp2
= cp
; cp2
<= gData
->cpend
; cp2
++) {
3144 gData
->skipped
= cp2
- cp
;
3146 for (j
= 0; j
< gData
->regexp
->parenCount
; j
++)
3147 x
->parens
[j
].index
= -1;
3148 result
= ExecuteREBytecode(gData
, x
);
3149 if (!gData
->ok
|| result
|| (gData
->regexp
->flags
& JSREG_STICKY
))
3151 gData
->backTrackSP
= gData
->backTrackStack
;
3153 gData
->stateStackTop
= 0;
3154 cp2
= cp
+ gData
->skipped
;
3159 #define MIN_BACKTRACK_LIMIT 400000
3161 static REMatchState
*InitMatch(script_ctx_t
*cx
, REGlobalData
*gData
, JSRegExp
*re
, size_t length
)
3163 REMatchState
*result
;
3166 gData
->backTrackStackSize
= INITIAL_BACKTRACK
;
3167 gData
->backTrackStack
= jsheap_alloc(gData
->pool
, INITIAL_BACKTRACK
);
3168 if (!gData
->backTrackStack
)
3171 gData
->backTrackSP
= gData
->backTrackStack
;
3173 gData
->backTrackCount
= 0;
3174 gData
->backTrackLimit
= 0;
3176 gData
->stateStackLimit
= INITIAL_STATESTACK
;
3177 gData
->stateStack
= jsheap_alloc(gData
->pool
, sizeof(REProgState
) * INITIAL_STATESTACK
);
3178 if (!gData
->stateStack
)
3181 gData
->stateStackTop
= 0;
3186 result
= jsheap_alloc(gData
->pool
, offsetof(REMatchState
, parens
) + re
->parenCount
* sizeof(RECapture
));
3190 for (i
= 0; i
< re
->classCount
; i
++) {
3191 if (!re
->classList
[i
].converted
&&
3192 !ProcessCharSet(gData
, &re
->classList
[i
])) {
3200 js_ReportOutOfScriptQuota(cx
);
3206 js_DestroyRegExp(JSRegExp
*re
)
3208 if (re
->classList
) {
3210 for (i
= 0; i
< re
->classCount
; i
++) {
3211 if (re
->classList
[i
].converted
)
3212 heap_free(re
->classList
[i
].u
.bits
);
3213 re
->classList
[i
].u
.bits
= NULL
;
3215 heap_free(re
->classList
);
3221 js_NewRegExp(script_ctx_t
*cx
, BSTR str
, UINT flags
, BOOL flat
)
3225 CompilerState state
;
3232 mark
= jsheap_mark(&cx
->tmp_heap
);
3233 len
= SysStringLen(str
);
3239 state
.cpbegin
= state
.cp
;
3240 state
.cpend
= state
.cp
+ len
;
3241 state
.flags
= flags
;
3242 state
.parenCount
= 0;
3243 state
.classCount
= 0;
3244 state
.progLength
= 0;
3245 state
.treeDepth
= 0;
3246 state
.classBitmapsMem
= 0;
3247 for (i
= 0; i
< CLASS_CACHE_SIZE
; i
++)
3248 state
.classCache
[i
].start
= NULL
;
3250 if (len
!= 0 && flat
) {
3251 state
.result
= NewRENode(&state
, REOP_FLAT
);
3254 state
.result
->u
.flat
.chr
= *state
.cpbegin
;
3255 state
.result
->u
.flat
.length
= len
;
3256 state
.result
->kid
= (void *) state
.cpbegin
;
3257 /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
3258 state
.progLength
+= 1 + GetCompactIndexWidth(0)
3259 + GetCompactIndexWidth(len
);
3261 if (!ParseRegExp(&state
))
3264 resize
= offsetof(JSRegExp
, program
) + state
.progLength
+ 1;
3265 re
= heap_alloc(resize
);
3269 assert(state
.classBitmapsMem
<= CLASS_BITMAPS_MEM_LIMIT
);
3270 re
->classCount
= state
.classCount
;
3271 if (re
->classCount
) {
3272 re
->classList
= heap_alloc(re
->classCount
* sizeof(RECharSet
));
3273 if (!re
->classList
) {
3274 js_DestroyRegExp(re
);
3278 for (i
= 0; i
< re
->classCount
; i
++)
3279 re
->classList
[i
].converted
= FALSE
;
3281 re
->classList
= NULL
;
3283 endPC
= EmitREBytecode(&state
, re
, state
.treeDepth
, re
->program
, state
.result
);
3285 js_DestroyRegExp(re
);
3289 *endPC
++ = REOP_END
;
3291 * Check whether size was overestimated and shrink using realloc.
3292 * This is safe since no pointers to newly parsed regexp or its parts
3293 * besides re exist here.
3295 if ((size_t)(endPC
- re
->program
) != state
.progLength
+ 1) {
3297 assert((size_t)(endPC
- re
->program
) < state
.progLength
+ 1);
3298 resize
= offsetof(JSRegExp
, program
) + (endPC
- re
->program
);
3299 tmp
= heap_realloc(re
, resize
);
3305 re
->parenCount
= state
.parenCount
;
3313 static inline RegExpInstance
*regexp_from_vdisp(vdisp_t
*vdisp
)
3315 return (RegExpInstance
*)vdisp
->u
.jsdisp
;
3318 static void set_last_index(RegExpInstance
*This
, DWORD last_index
)
3320 This
->last_index
= last_index
;
3321 jsval_release(This
->last_index_val
);
3322 This
->last_index_val
= jsval_number(last_index
);
3325 static HRESULT
do_regexp_match_next(script_ctx_t
*ctx
, RegExpInstance
*regexp
, DWORD rem_flags
,
3326 const WCHAR
*str
, DWORD len
, const WCHAR
**cp
, match_result_t
**parens
, DWORD
*parens_size
,
3327 DWORD
*parens_cnt
, match_result_t
*ret
)
3329 REMatchState
*x
, *result
;
3333 gData
.cpbegin
= str
;
3334 gData
.cpend
= str
+ len
;
3335 gData
.start
= *cp
-str
;
3337 gData
.pool
= &ctx
->tmp_heap
;
3339 x
= InitMatch(NULL
, &gData
, regexp
->jsregexp
, gData
.cpend
- gData
.cpbegin
);
3341 WARN("InitMatch failed\n");
3346 result
= MatchRegExp(&gData
, x
);
3348 WARN("MatchRegExp failed\n");
3353 if(rem_flags
& REM_RESET_INDEX
)
3354 set_last_index(regexp
, 0);
3359 if(regexp
->jsregexp
->parenCount
> *parens_size
) {
3360 match_result_t
*new_parens
;
3363 new_parens
= heap_realloc(*parens
, sizeof(match_result_t
)*regexp
->jsregexp
->parenCount
);
3365 new_parens
= heap_alloc(sizeof(match_result_t
)*regexp
->jsregexp
->parenCount
);
3367 return E_OUTOFMEMORY
;
3369 *parens
= new_parens
;
3373 /* FIXME: We often already have a copy of input string that we could use to store last match */
3374 if(!(rem_flags
& REM_NO_CTX_UPDATE
) &&
3375 (!ctx
->last_match
|| len
!= SysStringLen(ctx
->last_match
) || strncmpW(ctx
->last_match
, str
, len
))) {
3378 last_match
= SysAllocStringLen(str
, len
);
3380 return E_OUTOFMEMORY
;
3381 SysFreeString(ctx
->last_match
);
3382 ctx
->last_match
= last_match
;
3388 *parens_cnt
= regexp
->jsregexp
->parenCount
;
3390 for(i
=0; i
< regexp
->jsregexp
->parenCount
; i
++) {
3391 if(result
->parens
[i
].index
== -1) {
3392 (*parens
)[i
].str
= NULL
;
3393 (*parens
)[i
].len
= 0;
3395 (*parens
)[i
].str
= str
+ result
->parens
[i
].index
;
3396 (*parens
)[i
].len
= result
->parens
[i
].length
;
3401 if(!(rem_flags
& REM_NO_CTX_UPDATE
)) {
3402 DWORD i
, n
= min(sizeof(ctx
->match_parens
)/sizeof(ctx
->match_parens
[0]), regexp
->jsregexp
->parenCount
);
3404 for(i
=0; i
< n
; i
++) {
3405 if(result
->parens
[i
].index
== -1) {
3406 ctx
->match_parens
[i
].str
= NULL
;
3407 ctx
->match_parens
[i
].len
= 0;
3409 ctx
->match_parens
[i
].str
= ctx
->last_match
+ result
->parens
[i
].index
;
3410 ctx
->match_parens
[i
].len
= result
->parens
[i
].length
;
3414 if(n
< sizeof(ctx
->match_parens
)/sizeof(ctx
->match_parens
[0]))
3415 memset(ctx
->match_parens
+n
, 0, sizeof(ctx
->match_parens
) - n
*sizeof(ctx
->match_parens
[0]));
3418 matchlen
= (result
->cp
-*cp
) - gData
.skipped
;
3420 ret
->str
= result
->cp
-matchlen
;
3421 ret
->len
= matchlen
;
3422 set_last_index(regexp
, result
->cp
-str
);
3424 if(!(rem_flags
& REM_NO_CTX_UPDATE
)) {
3425 ctx
->last_match_index
= ret
->str
-str
;
3426 ctx
->last_match_length
= matchlen
;
3432 HRESULT
regexp_match_next(script_ctx_t
*ctx
, jsdisp_t
*dispex
, DWORD rem_flags
, const WCHAR
*str
,
3433 DWORD len
, const WCHAR
**cp
, match_result_t
**parens
, DWORD
*parens_size
, DWORD
*parens_cnt
,
3434 match_result_t
*ret
)
3436 RegExpInstance
*regexp
= (RegExpInstance
*)dispex
;
3440 if((rem_flags
& REM_CHECK_GLOBAL
) && !(regexp
->jsregexp
->flags
& JSREG_GLOB
))
3443 mark
= jsheap_mark(&ctx
->tmp_heap
);
3445 hres
= do_regexp_match_next(ctx
, regexp
, rem_flags
, str
, len
, cp
, parens
, parens_size
, parens_cnt
, ret
);
3451 HRESULT
regexp_match(script_ctx_t
*ctx
, jsdisp_t
*dispex
, const WCHAR
*str
, DWORD len
, BOOL gflag
,
3452 match_result_t
**match_result
, DWORD
*result_cnt
)
3454 RegExpInstance
*This
= (RegExpInstance
*)dispex
;
3455 match_result_t
*ret
= NULL
, cres
;
3456 const WCHAR
*cp
= str
;
3457 DWORD i
=0, ret_size
= 0;
3461 mark
= jsheap_mark(&ctx
->tmp_heap
);
3464 hres
= do_regexp_match_next(ctx
, This
, 0, str
, len
, &cp
, NULL
, NULL
, NULL
, &cres
);
3465 if(hres
== S_FALSE
) {
3475 ret
= heap_realloc(ret
, (ret_size
<<= 1) * sizeof(match_result_t
));
3477 ret
= heap_alloc((ret_size
=4) * sizeof(match_result_t
));
3479 hres
= E_OUTOFMEMORY
;
3486 if(!gflag
&& !(This
->jsregexp
->flags
& JSREG_GLOB
)) {
3498 *match_result
= ret
;
3503 static HRESULT
RegExp_source(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
, unsigned argc
, jsval_t
*argv
,
3509 case DISPATCH_PROPERTYGET
: {
3510 RegExpInstance
*This
= regexp_from_vdisp(jsthis
);
3511 BSTR ret
= SysAllocString(This
->str
);
3513 return E_OUTOFMEMORY
;
3514 *r
= jsval_string(ret
);
3518 FIXME("Unimplemented flags %x\n", flags
);
3525 static HRESULT
RegExp_global(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
, unsigned argc
, jsval_t
*argv
,
3532 static HRESULT
RegExp_ignoreCase(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
, unsigned argc
, jsval_t
*argv
,
3539 static HRESULT
RegExp_multiline(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
, unsigned argc
, jsval_t
*argv
,
3546 static INT
index_from_val(script_ctx_t
*ctx
, jsval_t v
)
3551 hres
= to_number(ctx
, v
, &n
);
3553 clear_ei(ctx
); /* FIXME: Move ignoring exceptions to to_primitive */
3558 return is_int32(n
) ? n
: 0;
3561 static HRESULT
RegExp_lastIndex(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
, unsigned argc
, jsval_t
*argv
,
3567 case DISPATCH_PROPERTYGET
: {
3568 RegExpInstance
*regexp
= regexp_from_vdisp(jsthis
);
3570 return jsval_copy(regexp
->last_index_val
, r
);
3572 case DISPATCH_PROPERTYPUT
: {
3573 RegExpInstance
*regexp
= regexp_from_vdisp(jsthis
);
3576 hres
= jsval_copy(argv
[0], ®exp
->last_index_val
);
3580 regexp
->last_index
= index_from_val(ctx
, argv
[0]);
3584 FIXME("unimplemented flags: %x\n", flags
);
3591 static HRESULT
RegExp_toString(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
, unsigned argc
, jsval_t
*argv
,
3598 static HRESULT
create_match_array(script_ctx_t
*ctx
, BSTR input
, const match_result_t
*result
,
3599 const match_result_t
*parens
, DWORD parens_cnt
, IDispatch
**ret
)
3604 HRESULT hres
= S_OK
;
3606 static const WCHAR indexW
[] = {'i','n','d','e','x',0};
3607 static const WCHAR inputW
[] = {'i','n','p','u','t',0};
3608 static const WCHAR lastIndexW
[] = {'l','a','s','t','I','n','d','e','x',0};
3609 static const WCHAR zeroW
[] = {'0',0};
3611 hres
= create_array(ctx
, parens_cnt
+1, &array
);
3615 for(i
=0; i
< parens_cnt
; i
++) {
3616 str
= SysAllocStringLen(parens
[i
].str
, parens
[i
].len
);
3618 hres
= E_OUTOFMEMORY
;
3622 hres
= jsdisp_propput_idx(array
, i
+1, jsval_string(str
));
3628 while(SUCCEEDED(hres
)) {
3629 hres
= jsdisp_propput_name(array
, indexW
, jsval_number(result
->str
-input
));
3633 hres
= jsdisp_propput_name(array
, lastIndexW
, jsval_number(result
->str
-input
+result
->len
));
3637 hres
= jsdisp_propput_name(array
, inputW
, jsval_string(input
));
3641 str
= SysAllocStringLen(result
->str
, result
->len
);
3643 hres
= E_OUTOFMEMORY
;
3646 hres
= jsdisp_propput_name(array
, zeroW
, jsval_string(str
));
3652 jsdisp_release(array
);
3656 *ret
= to_disp(array
);
3660 static HRESULT
run_exec(script_ctx_t
*ctx
, vdisp_t
*jsthis
, jsval_t arg
, BSTR
*input
,
3661 match_result_t
*match
, match_result_t
**parens
, DWORD
*parens_cnt
, BOOL
*ret
)
3663 RegExpInstance
*regexp
;
3664 DWORD parens_size
= 0, last_index
= 0, length
;
3669 if(!is_vclass(jsthis
, JSCLASS_REGEXP
)) {
3670 FIXME("Not a RegExp\n");
3674 regexp
= regexp_from_vdisp(jsthis
);
3676 hres
= to_string(ctx
, arg
, &string
);
3679 length
= SysStringLen(string
);
3681 if(regexp
->jsregexp
->flags
& JSREG_GLOB
) {
3682 if(regexp
->last_index
< 0) {
3683 SysFreeString(string
);
3684 set_last_index(regexp
, 0);
3692 last_index
= regexp
->last_index
;
3695 cp
= string
+ last_index
;
3696 hres
= regexp_match_next(ctx
, ®exp
->dispex
, REM_RESET_INDEX
, string
, length
, &cp
, parens
,
3697 parens
? &parens_size
: NULL
, parens_cnt
, match
);
3699 SysFreeString(string
);
3703 *ret
= hres
== S_OK
;
3707 SysFreeString(string
);
3712 static HRESULT
RegExp_exec(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
, unsigned argc
, jsval_t
*argv
,
3715 match_result_t
*parens
= NULL
, match
;
3716 DWORD parens_cnt
= 0;
3723 hres
= run_exec(ctx
, jsthis
, argc
? argv
[0] : jsval_string(NULL
), &string
, &match
, &parens
, &parens_cnt
, &b
);
3731 hres
= create_match_array(ctx
, string
, &match
, parens
, parens_cnt
, &ret
);
3733 *r
= jsval_disp(ret
);
3740 SysFreeString(string
);
3744 static HRESULT
RegExp_test(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
, unsigned argc
, jsval_t
*argv
,
3747 match_result_t match
;
3755 undef_str
= SysAllocString(undefinedW
);
3757 return E_OUTOFMEMORY
;
3760 hres
= run_exec(ctx
, jsthis
, argc
? argv
[0] : jsval_string(undef_str
), NULL
, &match
, NULL
, NULL
, &b
);
3762 SysFreeString(undef_str
);
3771 static HRESULT
RegExp_value(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
, unsigned argc
, jsval_t
*argv
,
3778 return throw_type_error(ctx
, JS_E_FUNCTION_EXPECTED
, NULL
);
3780 FIXME("unimplemented flags %x\n", flags
);
3787 static void RegExp_destructor(jsdisp_t
*dispex
)
3789 RegExpInstance
*This
= (RegExpInstance
*)dispex
;
3792 js_DestroyRegExp(This
->jsregexp
);
3793 jsval_release(This
->last_index_val
);
3794 SysFreeString(This
->str
);
3798 static const builtin_prop_t RegExp_props
[] = {
3799 {execW
, RegExp_exec
, PROPF_METHOD
|1},
3800 {globalW
, RegExp_global
, 0},
3801 {ignoreCaseW
, RegExp_ignoreCase
, 0},
3802 {lastIndexW
, RegExp_lastIndex
, 0},
3803 {multilineW
, RegExp_multiline
, 0},
3804 {sourceW
, RegExp_source
, 0},
3805 {testW
, RegExp_test
, PROPF_METHOD
|1},
3806 {toStringW
, RegExp_toString
, PROPF_METHOD
}
3809 static const builtin_info_t RegExp_info
= {
3811 {NULL
, RegExp_value
, 0},
3812 sizeof(RegExp_props
)/sizeof(*RegExp_props
),
3818 static const builtin_prop_t RegExpInst_props
[] = {
3819 {globalW
, RegExp_global
, 0},
3820 {ignoreCaseW
, RegExp_ignoreCase
, 0},
3821 {lastIndexW
, RegExp_lastIndex
, 0},
3822 {multilineW
, RegExp_multiline
, 0},
3823 {sourceW
, RegExp_source
, 0}
3826 static const builtin_info_t RegExpInst_info
= {
3828 {NULL
, RegExp_value
, 0},
3829 sizeof(RegExpInst_props
)/sizeof(*RegExpInst_props
),
3835 static HRESULT
alloc_regexp(script_ctx_t
*ctx
, jsdisp_t
*object_prototype
, RegExpInstance
**ret
)
3837 RegExpInstance
*regexp
;
3840 regexp
= heap_alloc_zero(sizeof(RegExpInstance
));
3842 return E_OUTOFMEMORY
;
3844 if(object_prototype
)
3845 hres
= init_dispex(®exp
->dispex
, ctx
, &RegExp_info
, object_prototype
);
3847 hres
= init_dispex_from_constr(®exp
->dispex
, ctx
, &RegExpInst_info
, ctx
->regexp_constr
);
3858 HRESULT
create_regexp(script_ctx_t
*ctx
, const WCHAR
*exp
, int len
, DWORD flags
, jsdisp_t
**ret
)
3860 RegExpInstance
*regexp
;
3863 TRACE("%s %x\n", debugstr_wn(exp
, len
), flags
);
3865 hres
= alloc_regexp(ctx
, NULL
, ®exp
);
3870 regexp
->str
= SysAllocString(exp
);
3872 regexp
->str
= SysAllocStringLen(exp
, len
);
3874 jsdisp_release(®exp
->dispex
);
3875 return E_OUTOFMEMORY
;
3878 regexp
->jsregexp
= js_NewRegExp(ctx
, regexp
->str
, flags
, FALSE
);
3879 if(!regexp
->jsregexp
) {
3880 WARN("js_NewRegExp failed\n");
3881 jsdisp_release(®exp
->dispex
);
3885 regexp
->last_index_val
= jsval_number(0);
3887 *ret
= ®exp
->dispex
;
3891 HRESULT
create_regexp_var(script_ctx_t
*ctx
, jsval_t src_arg
, jsval_t
*flags_arg
, jsdisp_t
**ret
)
3893 const WCHAR
*opt
= emptyW
, *src
;
3897 if(is_object_instance(src_arg
)) {
3900 obj
= iface_to_jsdisp((IUnknown
*)get_object(src_arg
));
3902 if(is_class(obj
, JSCLASS_REGEXP
)) {
3903 RegExpInstance
*regexp
= (RegExpInstance
*)obj
;
3905 hres
= create_regexp(ctx
, regexp
->str
, -1, regexp
->jsregexp
->flags
, ret
);
3906 jsdisp_release(obj
);
3910 jsdisp_release(obj
);
3914 if(!is_string(src_arg
)) {
3915 FIXME("src_arg = %s\n", debugstr_jsval(src_arg
));
3919 src
= get_string(src_arg
);
3922 if(!is_string(*flags_arg
)) {
3923 FIXME("unimplemented for %s\n", debugstr_jsval(*flags_arg
));
3927 opt
= get_string(*flags_arg
);
3930 hres
= parse_regexp_flags(opt
, strlenW(opt
), &flags
);
3934 return create_regexp(ctx
, src
, -1, flags
, ret
);
3937 HRESULT
regexp_string_match(script_ctx_t
*ctx
, jsdisp_t
*re
, BSTR str
, jsval_t
*r
)
3939 static const WCHAR indexW
[] = {'i','n','d','e','x',0};
3940 static const WCHAR inputW
[] = {'i','n','p','u','t',0};
3941 static const WCHAR lastIndexW
[] = {'l','a','s','t','I','n','d','e','x',0};
3943 RegExpInstance
*regexp
= (RegExpInstance
*)re
;
3944 match_result_t
*match_result
;
3945 DWORD match_cnt
, i
, length
;
3949 length
= SysStringLen(str
);
3951 if(!(regexp
->jsregexp
->flags
& JSREG_GLOB
)) {
3952 match_result_t match
, *parens
= NULL
;
3953 DWORD parens_cnt
, parens_size
= 0;
3954 const WCHAR
*cp
= str
;
3956 hres
= regexp_match_next(ctx
, ®exp
->dispex
, 0, str
, length
, &cp
, &parens
, &parens_size
, &parens_cnt
, &match
);
3964 hres
= create_match_array(ctx
, str
, &match
, parens
, parens_cnt
, &ret
);
3966 *r
= jsval_disp(ret
);
3976 hres
= regexp_match(ctx
, ®exp
->dispex
, str
, length
, FALSE
, &match_result
, &match_cnt
);
3981 TRACE("no match\n");
3988 hres
= create_array(ctx
, match_cnt
, &array
);
3992 for(i
=0; i
< match_cnt
; i
++) {
3995 tmp_str
= SysAllocStringLen(match_result
[i
].str
, match_result
[i
].len
);
3997 hres
= E_OUTOFMEMORY
;
4001 hres
= jsdisp_propput_idx(array
, i
, jsval_string(tmp_str
));
4002 SysFreeString(tmp_str
);
4007 while(SUCCEEDED(hres
)) {
4008 hres
= jsdisp_propput_name(array
, indexW
, jsval_number(match_result
[match_cnt
-1].str
-str
));
4012 hres
= jsdisp_propput_name(array
, lastIndexW
,
4013 jsval_number(match_result
[match_cnt
-1].str
-str
+match_result
[match_cnt
-1].len
));
4017 hres
= jsdisp_propput_name(array
, inputW
, jsval_string(str
));
4021 heap_free(match_result
);
4023 if(SUCCEEDED(hres
) && r
)
4024 *r
= jsval_obj(array
);
4026 jsdisp_release(array
);
4030 static HRESULT
global_idx(script_ctx_t
*ctx
, DWORD flags
, DWORD idx
, jsval_t
*r
)
4033 case DISPATCH_PROPERTYGET
: {
4036 ret
= SysAllocStringLen(ctx
->match_parens
[idx
].str
, ctx
->match_parens
[idx
].len
);
4038 return E_OUTOFMEMORY
;
4040 *r
= jsval_string(ret
);
4043 case DISPATCH_PROPERTYPUT
:
4046 FIXME("unsupported flags\n");
4053 static HRESULT
RegExpConstr_idx1(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
,
4054 unsigned argc
, jsval_t
*argv
, jsval_t
*r
)
4057 return global_idx(ctx
, flags
, 0, r
);
4060 static HRESULT
RegExpConstr_idx2(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
,
4061 unsigned argc
, jsval_t
*argv
, jsval_t
*r
)
4064 return global_idx(ctx
, flags
, 1, r
);
4067 static HRESULT
RegExpConstr_idx3(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
,
4068 unsigned argc
, jsval_t
*argv
, jsval_t
*r
)
4071 return global_idx(ctx
, flags
, 2, r
);
4074 static HRESULT
RegExpConstr_idx4(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
,
4075 unsigned argc
, jsval_t
*argv
, jsval_t
*r
)
4078 return global_idx(ctx
, flags
, 3, r
);
4081 static HRESULT
RegExpConstr_idx5(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
,
4082 unsigned argc
, jsval_t
*argv
, jsval_t
*r
)
4085 return global_idx(ctx
, flags
, 4, r
);
4088 static HRESULT
RegExpConstr_idx6(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
,
4089 unsigned argc
, jsval_t
*argv
, jsval_t
*r
)
4092 return global_idx(ctx
, flags
, 5, r
);
4095 static HRESULT
RegExpConstr_idx7(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
,
4096 unsigned argc
, jsval_t
*argv
, jsval_t
*r
)
4099 return global_idx(ctx
, flags
, 6, r
);
4102 static HRESULT
RegExpConstr_idx8(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
,
4103 unsigned argc
, jsval_t
*argv
, jsval_t
*r
)
4106 return global_idx(ctx
, flags
, 7, r
);
4109 static HRESULT
RegExpConstr_idx9(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
,
4110 unsigned argc
, jsval_t
*argv
, jsval_t
*r
)
4113 return global_idx(ctx
, flags
, 8, r
);
4116 static HRESULT
RegExpConstr_leftContext(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
,
4117 unsigned argc
, jsval_t
*argv
, jsval_t
*r
)
4122 case DISPATCH_PROPERTYGET
: {
4125 ret
= SysAllocStringLen(ctx
->last_match
, ctx
->last_match_index
);
4127 return E_OUTOFMEMORY
;
4129 *r
= jsval_string(ret
);
4132 case DISPATCH_PROPERTYPUT
:
4135 FIXME("unsupported flags\n");
4142 static HRESULT
RegExpConstr_rightContext(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
,
4143 unsigned argc
, jsval_t
*argv
, jsval_t
*r
)
4148 case DISPATCH_PROPERTYGET
: {
4151 ret
= SysAllocString(ctx
->last_match
+ctx
->last_match_index
+ctx
->last_match_length
);
4153 return E_OUTOFMEMORY
;
4155 *r
= jsval_string(ret
);
4158 case DISPATCH_PROPERTYPUT
:
4161 FIXME("unsupported flags\n");
4168 static HRESULT
RegExpConstr_value(script_ctx_t
*ctx
, vdisp_t
*jsthis
, WORD flags
, unsigned argc
, jsval_t
*argv
,
4174 case DISPATCH_METHOD
:
4176 if(is_object_instance(argv
[0])) {
4177 jsdisp_t
*jsdisp
= iface_to_jsdisp((IUnknown
*)get_object(argv
[0]));
4179 if(is_class(jsdisp
, JSCLASS_REGEXP
)) {
4180 if(argc
> 1 && !is_undefined(argv
[1])) {
4181 jsdisp_release(jsdisp
);
4182 return throw_regexp_error(ctx
, JS_E_REGEXP_SYNTAX
, NULL
);
4186 *r
= jsval_obj(jsdisp
);
4188 jsdisp_release(jsdisp
);
4191 jsdisp_release(jsdisp
);
4196 case DISPATCH_CONSTRUCT
: {
4205 hres
= create_regexp_var(ctx
, argv
[0], argc
> 1 ? argv
+1 : NULL
, &ret
);
4210 *r
= jsval_obj(ret
);
4212 jsdisp_release(ret
);
4216 FIXME("unimplemented flags: %x\n", flags
);
4223 static const builtin_prop_t RegExpConstr_props
[] = {
4224 {idx1W
, RegExpConstr_idx1
, 0},
4225 {idx2W
, RegExpConstr_idx2
, 0},
4226 {idx3W
, RegExpConstr_idx3
, 0},
4227 {idx4W
, RegExpConstr_idx4
, 0},
4228 {idx5W
, RegExpConstr_idx5
, 0},
4229 {idx6W
, RegExpConstr_idx6
, 0},
4230 {idx7W
, RegExpConstr_idx7
, 0},
4231 {idx8W
, RegExpConstr_idx8
, 0},
4232 {idx9W
, RegExpConstr_idx9
, 0},
4233 {leftContextW
, RegExpConstr_leftContext
, 0},
4234 {rightContextW
, RegExpConstr_rightContext
, 0}
4237 static const builtin_info_t RegExpConstr_info
= {
4239 {NULL
, Function_value
, 0},
4240 sizeof(RegExpConstr_props
)/sizeof(*RegExpConstr_props
),
4246 HRESULT
create_regexp_constr(script_ctx_t
*ctx
, jsdisp_t
*object_prototype
, jsdisp_t
**ret
)
4248 RegExpInstance
*regexp
;
4251 static const WCHAR RegExpW
[] = {'R','e','g','E','x','p',0};
4253 hres
= alloc_regexp(ctx
, object_prototype
, ®exp
);
4257 hres
= create_builtin_constructor(ctx
, RegExpConstr_value
, RegExpW
, &RegExpConstr_info
,
4258 PROPF_CONSTR
|2, ®exp
->dispex
, ret
);
4260 jsdisp_release(®exp
->dispex
);
4264 HRESULT
parse_regexp_flags(const WCHAR
*str
, DWORD str_len
, DWORD
*ret
)
4269 for (p
= str
; p
< str
+str_len
; p
++) {
4272 flags
|= JSREG_GLOB
;
4275 flags
|= JSREG_FOLD
;
4278 flags
|= JSREG_MULTILINE
;
4281 flags
|= JSREG_STICKY
;
4284 WARN("wrong flag %c\n", *p
);