2 * Copyright 2008 Jacek Caban for CodeWeavers
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20 * Code in this file is based on files:
23 * from Mozilla project, released under LGPL 2.1 or later.
25 * The Original Code is Mozilla Communicator client code, released
28 * The Initial Developer of the Original Code is
29 * Netscape Communications Corporation.
30 * Portions created by the Initial Developer are Copyright (C) 1998
31 * the Initial Developer. All Rights Reserved.
38 #include "wine/debug.h"
40 WINE_DEFAULT_DEBUG_CHANNEL(jscript
);
42 #define JSREG_FOLD 0x01 /* fold uppercase to lowercase */
43 #define JSREG_GLOB 0x02 /* global exec, creates array of matches */
44 #define JSREG_MULTILINE 0x04 /* treat ^ and $ as begin and end of line */
45 #define JSREG_STICKY 0x08 /* only match starting at lastIndex */
47 typedef BYTE JSPackedBool
;
48 typedef BYTE jsbytecode
;
51 * This struct holds a bitmap representation of a class from a regexp.
52 * There's a list of these referenced by the classList field in the JSRegExp
53 * struct below. The initial state has startIndex set to the offset in the
54 * original regexp source of the beginning of the class contents. The first
55 * use of the class converts the source representation into a bitmap.
58 typedef struct RECharSet
{
59 JSPackedBool converted
;
72 WORD flags
; /* flags, see jsapi.h's JSREG_* defines */
73 size_t parenCount
; /* number of parenthesized submatches */
74 size_t classCount
; /* count [...] bitmaps */
75 RECharSet
*classList
; /* list of [...] bitmaps */
76 BSTR source
; /* locked source string, sans // */
77 jsbytecode program
[1]; /* regular expression bytecode */
87 static const WCHAR sourceW
[] = {'s','o','u','r','c','e',0};
88 static const WCHAR globalW
[] = {'g','l','o','b','a','l',0};
89 static const WCHAR ignoreCaseW
[] = {'i','g','n','o','r','e','C','a','s','e',0};
90 static const WCHAR multilineW
[] = {'m','u','l','t','i','l','i','n','e',0};
91 static const WCHAR lastIndexW
[] = {'l','a','s','t','I','n','d','e','x',0};
92 static const WCHAR toStringW
[] = {'t','o','S','t','r','i','n','g',0};
93 static const WCHAR toLocaleStringW
[] = {'t','o','L','o','c','a','l','e','S','t','r','i','n','g',0};
94 static const WCHAR hasOwnPropertyW
[] = {'h','a','s','O','w','n','P','r','o','p','e','r','t','y',0};
95 static const WCHAR propertyIsEnumerableW
[] =
96 {'p','r','o','p','e','r','t','y','I','s','E','n','u','m','e','r','a','b','l','e',0};
97 static const WCHAR isPrototypeOfW
[] = {'i','s','P','r','o','t','o','t','y','p','e','O','f',0};
98 static const WCHAR execW
[] = {'e','x','e','c',0};
100 static const WCHAR emptyW
[] = {0};
102 /* FIXME: Better error handling */
103 #define ReportRegExpError(a,b,c)
104 #define ReportRegExpErrorHelper(a,b,c,d)
105 #define JS_ReportErrorNumber(a,b,c,d)
106 #define JS_ReportErrorFlagsAndNumber(a,b,c,d,e,f)
107 #define js_ReportOutOfScriptQuota(a)
108 #define JS_ReportOutOfMemory(a)
109 #define JS_COUNT_OPERATION(a,b)
111 #define JSMSG_MIN_TOO_BIG 47
112 #define JSMSG_MAX_TOO_BIG 48
113 #define JSMSG_OUT_OF_ORDER 49
114 #define JSMSG_OUT_OF_MEMORY 137
116 #define LINE_SEPARATOR 0x2028
117 #define PARA_SEPARATOR 0x2029
119 #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
120 ((c >= 'a') && (c <= 'z')) )
121 #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
122 (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
124 #define JS_ISWORD(c) ((c) < 128 && (isalnum(c) || (c) == '_'))
126 #define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9)
127 #define JS7_UNDEC(c) ((c) - '0')
179 REOP_LIMIT
/* META: no operator >= to this */
182 #define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
184 static const char *reop_names
[] = {
237 typedef struct RECapture
{
238 ptrdiff_t index
; /* start of contents, -1 for empty */
239 size_t length
; /* length of capture */
242 typedef struct REMatchState
{
244 RECapture parens
[1]; /* first of 're->parenCount' captures,
245 allocated at end of this struct */
248 typedef struct REProgState
{
249 jsbytecode
*continue_pc
; /* current continuation data */
250 jsbytecode continue_op
;
251 ptrdiff_t index
; /* progress in text */
252 size_t parenSoFar
; /* highest indexed paren started */
255 UINT min
; /* current quantifier limits */
259 size_t top
; /* backtrack stack state */
265 typedef struct REBackTrackData
{
266 size_t sz
; /* size of previous stack entry */
267 jsbytecode
*backtrack_pc
; /* where to backtrack to */
268 jsbytecode backtrack_op
;
269 const WCHAR
*cp
; /* index in text of match at backtrack */
270 size_t parenIndex
; /* start index of saved paren contents */
271 size_t parenCount
; /* # of saved paren contents */
272 size_t saveStateStackTop
; /* number of parent states */
273 /* saved parent states follow */
274 /* saved paren contents follow */
277 #define INITIAL_STATESTACK 100
278 #define INITIAL_BACKTRACK 8000
280 typedef struct REGlobalData
{
282 JSRegExp
*regexp
; /* the RE in execution */
283 BOOL ok
; /* runtime error (out_of_memory only?) */
284 size_t start
; /* offset to start at */
285 ptrdiff_t skipped
; /* chars skipped anchoring this r.e. */
286 const WCHAR
*cpbegin
; /* text base address */
287 const WCHAR
*cpend
; /* text limit address */
289 REProgState
*stateStack
; /* stack of state of current parents */
290 size_t stateStackTop
;
291 size_t stateStackLimit
;
293 REBackTrackData
*backTrackStack
;/* stack of matched-so-far positions */
294 REBackTrackData
*backTrackSP
;
295 size_t backTrackStackSize
;
296 size_t cursz
; /* size of current stack entry */
297 size_t backTrackCount
; /* how many times we've backtracked */
298 size_t backTrackLimit
; /* upper limit on backtrack states */
300 jsheap_t
*pool
; /* It's faster to use one malloc'd pool
301 than to malloc/free the three items
302 that are allocated from this pool */
305 typedef struct RENode RENode
;
307 REOp op
; /* r.e. op bytecode */
308 RENode
*next
; /* next in concatenation order */
309 void *kid
; /* first operand */
311 void *kid2
; /* second operand */
312 INT num
; /* could be a number */
313 size_t parenIndex
; /* or a parenthesis index */
314 struct { /* or a quantifier range */
319 struct { /* or a character class */
321 size_t kidlen
; /* length of string at kid, in jschars */
322 size_t index
; /* index into class list */
323 WORD bmsize
; /* bitmap size, based on max char code */
326 struct { /* or a literal sequence */
327 WCHAR chr
; /* of one character */
328 size_t length
; /* or many (via the kid) */
331 RENode
*kid2
; /* second operand from ALT */
332 WCHAR ch1
; /* match char for ALTPREREQ */
333 WCHAR ch2
; /* ditto, or class index for ALTPREREQ2 */
338 #define CLASS_CACHE_SIZE 4
340 typedef struct CompilerState
{
341 script_ctx_t
*context
;
342 const WCHAR
*cpbegin
;
346 size_t classCount
; /* number of [] encountered */
347 size_t treeDepth
; /* maximum depth of parse tree */
348 size_t progLength
; /* estimated bytecode length */
350 size_t classBitmapsMem
; /* memory to hold all class bitmaps */
352 const WCHAR
*start
; /* small cache of class strings */
353 size_t length
; /* since they're often the same */
355 } classCache
[CLASS_CACHE_SIZE
];
359 typedef struct EmitStateStackEntry
{
360 jsbytecode
*altHead
; /* start of REOP_ALT* opcode */
361 jsbytecode
*nextAltFixup
; /* fixup pointer to next-alt offset */
362 jsbytecode
*nextTermFixup
; /* fixup ptr. to REOP_JUMP offset */
363 jsbytecode
*endTermFixup
; /* fixup ptr. to REOPT_ALTPREREQ* offset */
364 RENode
*continueNode
; /* original REOP_ALT* node being stacked */
365 jsbytecode continueOp
; /* REOP_JUMP or REOP_ENDALT continuation */
366 JSPackedBool jumpToJumpFlag
; /* true if we've patched jump-to-jump to
367 avoid 16-bit unsigned offset overflow */
368 } EmitStateStackEntry
;
371 * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
372 * the getters and setters take the pc of the offset, not of the opcode before
376 #define GET_ARG(pc) ((WORD)(((pc)[0] << 8) | (pc)[1]))
377 #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
378 (pc)[1] = (jsbytecode) (arg))
380 #define OFFSET_LEN ARG_LEN
381 #define OFFSET_MAX ((1 << (ARG_LEN * 8)) - 1)
382 #define GET_OFFSET(pc) GET_ARG(pc)
384 static BOOL
ParseRegExp(CompilerState
*);
387 * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
388 * For sanity, we limit it to 2^24 bytes.
390 #define TREE_DEPTH_MAX ((1 << 24) / sizeof(EmitStateStackEntry))
393 * The maximum memory that can be allocated for class bitmaps.
394 * For sanity, we limit it to 2^24 bytes.
396 #define CLASS_BITMAPS_MEM_LIMIT (1 << 24)
399 * Functions to get size and write/read bytecode that represent small indexes
401 * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
402 * indicates that the following byte brings more bits to the index. Otherwise
403 * this is the last byte in the index bytecode representing highest index bits.
406 GetCompactIndexWidth(size_t index
)
410 for (width
= 1; (index
>>= 7) != 0; ++width
) { }
414 static inline jsbytecode
*
415 WriteCompactIndex(jsbytecode
*pc
, size_t index
)
419 while ((next
= index
>> 7) != 0) {
420 *pc
++ = (jsbytecode
)(index
| 0x80);
423 *pc
++ = (jsbytecode
)index
;
427 static inline jsbytecode
*
428 ReadCompactIndex(jsbytecode
*pc
, size_t *result
)
433 if ((nextByte
& 0x80) == 0) {
435 * Short-circuit the most common case when compact index <= 127.
440 *result
= 0x7F & nextByte
;
443 *result
|= (nextByte
& 0x7F) << shift
;
445 } while ((nextByte
& 0x80) != 0);
450 /* Construct and initialize an RENode, returning NULL for out-of-memory */
452 NewRENode(CompilerState
*state
, REOp op
)
456 ren
= jsheap_alloc(&state
->context
->tmp_heap
, sizeof(*ren
));
458 /* js_ReportOutOfScriptQuota(cx); */
468 * Validates and converts hex ascii value.
471 isASCIIHexDigit(WCHAR c
, UINT
*digit
)
482 if (cv
>= 'a' && cv
<= 'f') {
483 *digit
= cv
- 'a' + 10;
495 #define JUMP_OFFSET_HI(off) ((jsbytecode)((off) >> 8))
496 #define JUMP_OFFSET_LO(off) ((jsbytecode)(off))
499 SetForwardJumpOffset(jsbytecode
*jump
, jsbytecode
*target
)
501 ptrdiff_t offset
= target
- jump
;
503 /* Check that target really points forward. */
505 if ((size_t)offset
> OFFSET_MAX
)
508 jump
[0] = JUMP_OFFSET_HI(offset
);
509 jump
[1] = JUMP_OFFSET_LO(offset
);
514 * Generate bytecode for the tree rooted at t using an explicit stack instead
518 EmitREBytecode(CompilerState
*state
, JSRegExp
*re
, size_t treeDepth
,
519 jsbytecode
*pc
, RENode
*t
)
521 EmitStateStackEntry
*emitStateSP
, *emitStateStack
;
525 if (treeDepth
== 0) {
526 emitStateStack
= NULL
;
528 emitStateStack
= heap_alloc(sizeof(EmitStateStackEntry
) * treeDepth
);
532 emitStateSP
= emitStateStack
;
534 assert(op
< REOP_LIMIT
);
543 case REOP_ALTPREREQ2
:
546 emitStateSP
->altHead
= pc
- 1;
547 emitStateSP
->endTermFixup
= pc
;
549 SET_ARG(pc
, t
->u
.altprereq
.ch1
);
551 SET_ARG(pc
, t
->u
.altprereq
.ch2
);
554 emitStateSP
->nextAltFixup
= pc
; /* offset to next alternate */
557 emitStateSP
->continueNode
= t
;
558 emitStateSP
->continueOp
= REOP_JUMP
;
559 emitStateSP
->jumpToJumpFlag
= FALSE
;
561 assert((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
564 assert(op
< REOP_LIMIT
);
568 emitStateSP
->nextTermFixup
= pc
; /* offset to following term */
570 if (!SetForwardJumpOffset(emitStateSP
->nextAltFixup
, pc
))
572 emitStateSP
->continueOp
= REOP_ENDALT
;
574 assert((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
577 assert(op
< REOP_LIMIT
);
582 * If we already patched emitStateSP->nextTermFixup to jump to
583 * a nearer jump, to avoid 16-bit immediate offset overflow, we
586 if (emitStateSP
->jumpToJumpFlag
)
590 * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
591 * REOP_ENDALT is executed only on successful match of the last
592 * alternate in a group.
594 if (!SetForwardJumpOffset(emitStateSP
->nextTermFixup
, pc
))
596 if (t
->op
!= REOP_ALT
) {
597 if (!SetForwardJumpOffset(emitStateSP
->endTermFixup
, pc
))
602 * If the program is bigger than the REOP_JUMP offset range, then
603 * we must check for alternates before this one that are part of
604 * the same group, and fix up their jump offsets to target jumps
605 * close enough to fit in a 16-bit unsigned offset immediate.
607 if ((size_t)(pc
- re
->program
) > OFFSET_MAX
&&
608 emitStateSP
> emitStateStack
) {
609 EmitStateStackEntry
*esp
, *esp2
;
610 jsbytecode
*alt
, *jump
;
611 ptrdiff_t span
, header
;
615 for (esp
= esp2
- 1; esp
>= emitStateStack
; --esp
) {
616 if (esp
->continueOp
== REOP_ENDALT
&&
617 !esp
->jumpToJumpFlag
&&
618 esp
->nextTermFixup
+ OFFSET_LEN
== alt
&&
619 (size_t)(pc
- ((esp
->continueNode
->op
!= REOP_ALT
)
621 : esp
->nextTermFixup
)) > OFFSET_MAX
) {
623 jump
= esp
->nextTermFixup
;
626 * The span must be 1 less than the distance from
627 * jump offset to jump offset, so we actually jump
628 * to a REOP_JUMP bytecode, not to its offset!
631 assert(jump
< esp2
->nextTermFixup
);
632 span
= esp2
->nextTermFixup
- jump
- 1;
633 if ((size_t)span
<= OFFSET_MAX
)
638 } while (esp2
->continueOp
!= REOP_ENDALT
);
641 jump
[0] = JUMP_OFFSET_HI(span
);
642 jump
[1] = JUMP_OFFSET_LO(span
);
644 if (esp
->continueNode
->op
!= REOP_ALT
) {
646 * We must patch the offset at esp->endTermFixup
647 * as well, for the REOP_ALTPREREQ{,2} opcodes.
648 * If we're unlucky and endTermFixup is more than
649 * OFFSET_MAX bytes from its target, we cheat by
650 * jumping 6 bytes to the jump whose offset is at
651 * esp->nextTermFixup, which has the same target.
653 jump
= esp
->endTermFixup
;
654 header
= esp
->nextTermFixup
- jump
;
656 if ((size_t)span
> OFFSET_MAX
)
659 jump
[0] = JUMP_OFFSET_HI(span
);
660 jump
[1] = JUMP_OFFSET_LO(span
);
663 esp
->jumpToJumpFlag
= TRUE
;
671 emitStateSP
->altHead
= pc
- 1;
672 emitStateSP
->nextAltFixup
= pc
; /* offset to next alternate */
674 emitStateSP
->continueNode
= t
;
675 emitStateSP
->continueOp
= REOP_JUMP
;
676 emitStateSP
->jumpToJumpFlag
= FALSE
;
678 assert((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
681 assert(op
< REOP_LIMIT
);
686 * Coalesce FLATs if possible and if it would not increase bytecode
687 * beyond preallocated limit. The latter happens only when bytecode
688 * size for coalesced string with offset p and length 2 exceeds 6
689 * bytes preallocated for 2 single char nodes, i.e. when
690 * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
691 * GetCompactIndexWidth(p) > 4.
692 * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
693 * nodes strictly decreases bytecode size, the check has to be
694 * done only for the first coalescing.
697 GetCompactIndexWidth((WCHAR
*)t
->kid
- state
->cpbegin
) <= 4)
700 t
->next
->op
== REOP_FLAT
&&
701 (WCHAR
*)t
->kid
+ t
->u
.flat
.length
==
703 t
->u
.flat
.length
+= t
->next
->u
.flat
.length
;
704 t
->next
= t
->next
->next
;
707 if (t
->kid
&& t
->u
.flat
.length
> 1) {
708 pc
[-1] = (state
->flags
& JSREG_FOLD
) ? REOP_FLATi
: REOP_FLAT
;
709 pc
= WriteCompactIndex(pc
, (WCHAR
*)t
->kid
- state
->cpbegin
);
710 pc
= WriteCompactIndex(pc
, t
->u
.flat
.length
);
711 } else if (t
->u
.flat
.chr
< 256) {
712 pc
[-1] = (state
->flags
& JSREG_FOLD
) ? REOP_FLAT1i
: REOP_FLAT1
;
713 *pc
++ = (jsbytecode
) t
->u
.flat
.chr
;
715 pc
[-1] = (state
->flags
& JSREG_FOLD
)
718 SET_ARG(pc
, t
->u
.flat
.chr
);
725 pc
= WriteCompactIndex(pc
, t
->u
.parenIndex
);
726 emitStateSP
->continueNode
= t
;
727 emitStateSP
->continueOp
= REOP_RPAREN
;
729 assert((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
735 pc
= WriteCompactIndex(pc
, t
->u
.parenIndex
);
739 pc
= WriteCompactIndex(pc
, t
->u
.parenIndex
);
744 emitStateSP
->nextTermFixup
= pc
;
746 emitStateSP
->continueNode
= t
;
747 emitStateSP
->continueOp
= REOP_ASSERTTEST
;
749 assert((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
754 case REOP_ASSERTTEST
:
755 case REOP_ASSERTNOTTEST
:
756 if (!SetForwardJumpOffset(emitStateSP
->nextTermFixup
, pc
))
760 case REOP_ASSERT_NOT
:
762 emitStateSP
->nextTermFixup
= pc
;
764 emitStateSP
->continueNode
= t
;
765 emitStateSP
->continueOp
= REOP_ASSERTNOTTEST
;
767 assert((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
774 if (t
->u
.range
.min
== 0 && t
->u
.range
.max
== (UINT
)-1) {
775 pc
[-1] = (t
->u
.range
.greedy
) ? REOP_STAR
: REOP_MINIMALSTAR
;
776 } else if (t
->u
.range
.min
== 0 && t
->u
.range
.max
== 1) {
777 pc
[-1] = (t
->u
.range
.greedy
) ? REOP_OPT
: REOP_MINIMALOPT
;
778 } else if (t
->u
.range
.min
== 1 && t
->u
.range
.max
== (UINT
) -1) {
779 pc
[-1] = (t
->u
.range
.greedy
) ? REOP_PLUS
: REOP_MINIMALPLUS
;
781 if (!t
->u
.range
.greedy
)
782 pc
[-1] = REOP_MINIMALQUANT
;
783 pc
= WriteCompactIndex(pc
, t
->u
.range
.min
);
785 * Write max + 1 to avoid using size_t(max) + 1 bytes
786 * for (UINT)-1 sentinel.
788 pc
= WriteCompactIndex(pc
, t
->u
.range
.max
+ 1);
790 emitStateSP
->nextTermFixup
= pc
;
792 emitStateSP
->continueNode
= t
;
793 emitStateSP
->continueOp
= REOP_ENDCHILD
;
795 assert((size_t)(emitStateSP
- emitStateStack
) <= treeDepth
);
801 if (!SetForwardJumpOffset(emitStateSP
->nextTermFixup
, pc
))
806 if (!t
->u
.ucclass
.sense
)
807 pc
[-1] = REOP_NCLASS
;
808 pc
= WriteCompactIndex(pc
, t
->u
.ucclass
.index
);
809 charSet
= &re
->classList
[t
->u
.ucclass
.index
];
810 charSet
->converted
= FALSE
;
811 charSet
->length
= t
->u
.ucclass
.bmsize
;
812 charSet
->u
.src
.startIndex
= t
->u
.ucclass
.startIndex
;
813 charSet
->u
.src
.length
= t
->u
.ucclass
.kidlen
;
814 charSet
->sense
= t
->u
.ucclass
.sense
;
825 if (emitStateSP
== emitStateStack
)
828 t
= emitStateSP
->continueNode
;
829 op
= (REOp
) emitStateSP
->continueOp
;
834 heap_free(emitStateStack
);
838 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_REGEXP_TOO_COMPLEX
);
844 * Process the op against the two top operands, reducing them to a single
845 * operand in the penultimate slot. Update progLength and treeDepth.
848 ProcessOp(CompilerState
*state
, REOpData
*opData
, RENode
**operandStack
,
853 switch (opData
->op
) {
855 result
= NewRENode(state
, REOP_ALT
);
858 result
->kid
= operandStack
[operandSP
- 2];
859 result
->u
.kid2
= operandStack
[operandSP
- 1];
860 operandStack
[operandSP
- 2] = result
;
862 if (state
->treeDepth
== TREE_DEPTH_MAX
) {
863 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_REGEXP_TOO_COMPLEX
);
869 * Look at both alternates to see if there's a FLAT or a CLASS at
870 * the start of each. If so, use a prerequisite match.
872 if (((RENode
*) result
->kid
)->op
== REOP_FLAT
&&
873 ((RENode
*) result
->u
.kid2
)->op
== REOP_FLAT
&&
874 (state
->flags
& JSREG_FOLD
) == 0) {
875 result
->op
= REOP_ALTPREREQ
;
876 result
->u
.altprereq
.ch1
= ((RENode
*) result
->kid
)->u
.flat
.chr
;
877 result
->u
.altprereq
.ch2
= ((RENode
*) result
->u
.kid2
)->u
.flat
.chr
;
878 /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
879 JUMP, <end> ... ENDALT */
880 state
->progLength
+= 13;
883 if (((RENode
*) result
->kid
)->op
== REOP_CLASS
&&
884 ((RENode
*) result
->kid
)->u
.ucclass
.index
< 256 &&
885 ((RENode
*) result
->u
.kid2
)->op
== REOP_FLAT
&&
886 (state
->flags
& JSREG_FOLD
) == 0) {
887 result
->op
= REOP_ALTPREREQ2
;
888 result
->u
.altprereq
.ch1
= ((RENode
*) result
->u
.kid2
)->u
.flat
.chr
;
889 result
->u
.altprereq
.ch2
= ((RENode
*) result
->kid
)->u
.ucclass
.index
;
890 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
891 JUMP, <end> ... ENDALT */
892 state
->progLength
+= 13;
895 if (((RENode
*) result
->kid
)->op
== REOP_FLAT
&&
896 ((RENode
*) result
->u
.kid2
)->op
== REOP_CLASS
&&
897 ((RENode
*) result
->u
.kid2
)->u
.ucclass
.index
< 256 &&
898 (state
->flags
& JSREG_FOLD
) == 0) {
899 result
->op
= REOP_ALTPREREQ2
;
900 result
->u
.altprereq
.ch1
= ((RENode
*) result
->kid
)->u
.flat
.chr
;
901 result
->u
.altprereq
.ch2
=
902 ((RENode
*) result
->u
.kid2
)->u
.ucclass
.index
;
903 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
904 JUMP, <end> ... ENDALT */
905 state
->progLength
+= 13;
908 /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
909 state
->progLength
+= 7;
914 result
= operandStack
[operandSP
- 2];
916 result
= result
->next
;
917 result
->next
= operandStack
[operandSP
- 1];
921 case REOP_ASSERT_NOT
:
924 /* These should have been processed by a close paren. */
925 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
, JSMSG_MISSING_PAREN
,
935 * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
936 * its being on the stack, and to propagate errors to its callers.
938 #define JSREG_FIND_PAREN_COUNT 0x8000
939 #define JSREG_FIND_PAREN_ERROR 0x4000
942 * Magic return value from FindParenCount and GetDecimalValue, to indicate
943 * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
944 * its findMax parameter is non-null.
946 #define OVERFLOW_VALUE ((UINT)-1)
949 FindParenCount(CompilerState
*state
)
954 if (state
->flags
& JSREG_FIND_PAREN_COUNT
)
955 return OVERFLOW_VALUE
;
958 * Copy state into temp, flag it so we never report an invalid backref,
959 * and reset its members to parse the entire regexp. This is obviously
960 * suboptimal, but GetDecimalValue calls us only if a backref appears to
961 * refer to a forward parenthetical, which is rare.
964 temp
.flags
|= JSREG_FIND_PAREN_COUNT
;
965 temp
.cp
= temp
.cpbegin
;
970 temp
.classBitmapsMem
= 0;
971 for (i
= 0; i
< CLASS_CACHE_SIZE
; i
++)
972 temp
.classCache
[i
].start
= NULL
;
974 if (!ParseRegExp(&temp
)) {
975 state
->flags
|= JSREG_FIND_PAREN_ERROR
;
976 return OVERFLOW_VALUE
;
978 return temp
.parenCount
;
982 * Extract and return a decimal value at state->cp. The initial character c
983 * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
984 * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
985 * state->flags to discover whether an error occurred under findMax.
988 GetDecimalValue(WCHAR c
, UINT max
, UINT (*findMax
)(CompilerState
*state
),
989 CompilerState
*state
)
991 UINT value
= JS7_UNDEC(c
);
992 BOOL overflow
= (value
> max
&& (!findMax
|| value
> findMax(state
)));
994 /* The following restriction allows simpler overflow checks. */
995 assert(max
<= ((UINT
)-1 - 9) / 10);
996 while (state
->cp
< state
->cpend
) {
1000 value
= 10 * value
+ JS7_UNDEC(c
);
1001 if (!overflow
&& value
> max
&& (!findMax
|| value
> findMax(state
)))
1005 return overflow
? OVERFLOW_VALUE
: value
;
1009 * Calculate the total size of the bitmap required for a class expression.
1012 CalculateBitmapSize(CompilerState
*state
, RENode
*target
, const WCHAR
*src
,
1016 BOOL inRange
= FALSE
;
1017 WCHAR c
, rangeStart
= 0;
1018 UINT n
, digit
, nDigits
, i
;
1020 target
->u
.ucclass
.bmsize
= 0;
1021 target
->u
.ucclass
.sense
= TRUE
;
1028 target
->u
.ucclass
.sense
= FALSE
;
1031 while (src
!= end
) {
1032 BOOL canStartRange
= TRUE
;
1059 if (src
< end
&& RE_IS_LETTER(*src
)) {
1060 localMax
= (UINT
) (*src
++) & 0x1F;
1073 for (i
= 0; (i
< nDigits
) && (src
< end
); i
++) {
1075 if (!isASCIIHexDigit(c
, &digit
)) {
1077 * Back off to accepting the original
1084 n
= (n
<< 4) | digit
;
1089 canStartRange
= FALSE
;
1091 JS_ReportErrorNumber(state
->context
,
1092 js_GetErrorMessage
, NULL
,
1093 JSMSG_BAD_CLASS_RANGE
);
1103 canStartRange
= FALSE
;
1105 JS_ReportErrorNumber(state
->context
,
1106 js_GetErrorMessage
, NULL
,
1107 JSMSG_BAD_CLASS_RANGE
);
1113 * If this is the start of a range, ensure that it's less than
1127 * This is a non-ECMA extension - decimal escapes (in this
1128 * case, octal!) are supposed to be an error inside class
1129 * ranges, but supported here for backwards compatibility.
1134 if ('0' <= c
&& c
<= '7') {
1136 n
= 8 * n
+ JS7_UNDEC(c
);
1138 if ('0' <= c
&& c
<= '7') {
1140 i
= 8 * n
+ JS7_UNDEC(c
);
1161 /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1162 if (rangeStart
> localMax
) {
1163 JS_ReportErrorNumber(state
->context
,
1164 js_GetErrorMessage
, NULL
,
1165 JSMSG_BAD_CLASS_RANGE
);
1170 if (canStartRange
&& src
< end
- 1) {
1174 rangeStart
= (WCHAR
)localMax
;
1178 if (state
->flags
& JSREG_FOLD
)
1179 rangeStart
= localMax
; /* one run of the uc/dc loop below */
1182 if (state
->flags
& JSREG_FOLD
) {
1183 WCHAR maxch
= localMax
;
1185 for (i
= rangeStart
; i
<= localMax
; i
++) {
1201 target
->u
.ucclass
.bmsize
= max
;
1206 ParseMinMaxQuantifier(CompilerState
*state
, BOOL ignoreValues
)
1210 const WCHAR
*errp
= state
->cp
++;
1215 min
= GetDecimalValue(c
, 0xFFFF, NULL
, state
);
1218 if (!ignoreValues
&& min
== OVERFLOW_VALUE
)
1219 return JSMSG_MIN_TOO_BIG
;
1225 max
= GetDecimalValue(c
, 0xFFFF, NULL
, state
);
1227 if (!ignoreValues
&& max
== OVERFLOW_VALUE
)
1228 return JSMSG_MAX_TOO_BIG
;
1229 if (!ignoreValues
&& min
> max
)
1230 return JSMSG_OUT_OF_ORDER
;
1238 state
->result
= NewRENode(state
, REOP_QUANT
);
1240 return JSMSG_OUT_OF_MEMORY
;
1241 state
->result
->u
.range
.min
= min
;
1242 state
->result
->u
.range
.max
= max
;
1244 * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1245 * where <max> is written as compact(max+1) to make
1246 * (UINT)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1248 state
->progLength
+= (1 + GetCompactIndexWidth(min
)
1249 + GetCompactIndexWidth(max
+ 1)
1260 ParseQuantifier(CompilerState
*state
)
1263 term
= state
->result
;
1264 if (state
->cp
< state
->cpend
) {
1265 switch (*state
->cp
) {
1267 state
->result
= NewRENode(state
, REOP_QUANT
);
1270 state
->result
->u
.range
.min
= 1;
1271 state
->result
->u
.range
.max
= (UINT
)-1;
1272 /* <PLUS>, <next> ... <ENDCHILD> */
1273 state
->progLength
+= 4;
1276 state
->result
= NewRENode(state
, REOP_QUANT
);
1279 state
->result
->u
.range
.min
= 0;
1280 state
->result
->u
.range
.max
= (UINT
)-1;
1281 /* <STAR>, <next> ... <ENDCHILD> */
1282 state
->progLength
+= 4;
1285 state
->result
= NewRENode(state
, REOP_QUANT
);
1288 state
->result
->u
.range
.min
= 0;
1289 state
->result
->u
.range
.max
= 1;
1290 /* <OPT>, <next> ... <ENDCHILD> */
1291 state
->progLength
+= 4;
1293 case '{': /* balance '}' */
1297 err
= ParseMinMaxQuantifier(state
, FALSE
);
1303 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
, err
, errp
);
1312 if (state
->treeDepth
== TREE_DEPTH_MAX
) {
1313 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_REGEXP_TOO_COMPLEX
);
1319 state
->result
->kid
= term
;
1320 if (state
->cp
< state
->cpend
&& *state
->cp
== '?') {
1322 state
->result
->u
.range
.greedy
= FALSE
;
1324 state
->result
->u
.range
.greedy
= TRUE
;
1330 * item: assertion An item is either an assertion or
1331 * quantatom a quantified atom.
1333 * assertion: '^' Assertions match beginning of string
1334 * (or line if the class static property
1335 * RegExp.multiline is true).
1336 * '$' End of string (or line if the class
1337 * static property RegExp.multiline is
1339 * '\b' Word boundary (between \w and \W).
1340 * '\B' Word non-boundary.
1342 * quantatom: atom An unquantified atom.
1343 * quantatom '{' n ',' m '}'
1344 * Atom must occur between n and m times.
1345 * quantatom '{' n ',' '}' Atom must occur at least n times.
1346 * quantatom '{' n '}' Atom must occur exactly n times.
1347 * quantatom '*' Zero or more times (same as {0,}).
1348 * quantatom '+' One or more times (same as {1,}).
1349 * quantatom '?' Zero or one time (same as {0,1}).
1351 * any of which can be optionally followed by '?' for ungreedy
1353 * atom: '(' regexp ')' A parenthesized regexp (what matched
1354 * can be addressed using a backreference,
1356 * '.' Matches any char except '\n'.
1357 * '[' classlist ']' A character class.
1358 * '[' '^' classlist ']' A negated character class.
1360 * '\n' Newline (Line Feed).
1361 * '\r' Carriage Return.
1362 * '\t' Horizontal Tab.
1363 * '\v' Vertical Tab.
1364 * '\d' A digit (same as [0-9]).
1366 * '\w' A word character, [0-9a-z_A-Z].
1367 * '\W' A non-word character.
1368 * '\s' A whitespace character, [ \b\f\n\r\t\v].
1369 * '\S' A non-whitespace character.
1370 * '\' n A backreference to the nth (n decimal
1371 * and positive) parenthesized expression.
1372 * '\' octal An octal escape sequence (octal must be
1373 * two or three digits long, unless it is
1374 * 0 for the null character).
1375 * '\x' hex A hex escape (hex must be two digits).
1376 * '\u' unicode A unicode escape (must be four digits).
1377 * '\c' ctrl A control character, ctrl is a letter.
1378 * '\' literalatomchar Any character except one of the above
1379 * that follow '\' in an atom.
1380 * otheratomchar Any character not first among the other
1381 * atom right-hand sides.
1384 ParseTerm(CompilerState
*state
)
1386 WCHAR c
= *state
->cp
++;
1388 UINT num
, tmp
, n
, i
;
1389 const WCHAR
*termStart
;
1392 /* assertions and atoms */
1394 state
->result
= NewRENode(state
, REOP_BOL
);
1397 state
->progLength
++;
1400 state
->result
= NewRENode(state
, REOP_EOL
);
1403 state
->progLength
++;
1406 if (state
->cp
>= state
->cpend
) {
1407 /* a trailing '\' is an error */
1408 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_TRAILING_SLASH
);
1413 /* assertion escapes */
1415 state
->result
= NewRENode(state
, REOP_WBDRY
);
1418 state
->progLength
++;
1421 state
->result
= NewRENode(state
, REOP_WNONBDRY
);
1424 state
->progLength
++;
1426 /* Decimal escape */
1428 /* Give a strict warning. See also the note below. */
1429 WARN("non-octal digit in an escape sequence that doesn't match a back-reference\n");
1432 while (state
->cp
< state
->cpend
) {
1434 if (c
< '0' || '7' < c
)
1437 tmp
= 8 * num
+ (UINT
)JS7_UNDEC(c
);
1444 state
->result
= NewRENode(state
, REOP_FLAT
);
1447 state
->result
->u
.flat
.chr
= c
;
1448 state
->result
->u
.flat
.length
= 1;
1449 state
->progLength
+= 3;
1460 termStart
= state
->cp
- 1;
1461 num
= GetDecimalValue(c
, state
->parenCount
, FindParenCount
, state
);
1462 if (state
->flags
& JSREG_FIND_PAREN_ERROR
)
1464 if (num
== OVERFLOW_VALUE
) {
1465 /* Give a strict mode warning. */
1466 WARN("back-reference exceeds number of capturing parentheses\n");
1469 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1470 * error here. However, for compatibility with IE, we treat the
1471 * whole backref as flat if the first character in it is not a
1472 * valid octal character, and as an octal escape otherwise.
1474 state
->cp
= termStart
;
1476 /* Treat this as flat. termStart - 1 is the \. */
1481 /* Treat this as an octal escape. */
1484 assert(1 <= num
&& num
<= 0x10000);
1485 state
->result
= NewRENode(state
, REOP_BACKREF
);
1488 state
->result
->u
.parenIndex
= num
- 1;
1490 += 1 + GetCompactIndexWidth(state
->result
->u
.parenIndex
);
1492 /* Control escape */
1508 /* Control letter */
1510 if (state
->cp
< state
->cpend
&& RE_IS_LETTER(*state
->cp
)) {
1511 c
= (WCHAR
) (*state
->cp
++ & 0x1F);
1513 /* back off to accepting the original '\' as a literal */
1518 /* HexEscapeSequence */
1522 /* UnicodeEscapeSequence */
1527 for (i
= 0; i
< nDigits
&& state
->cp
< state
->cpend
; i
++) {
1530 if (!isASCIIHexDigit(c
, &digit
)) {
1532 * Back off to accepting the original 'u' or 'x' as a
1539 n
= (n
<< 4) | digit
;
1543 /* Character class escapes */
1545 state
->result
= NewRENode(state
, REOP_DIGIT
);
1549 state
->progLength
++;
1552 state
->result
= NewRENode(state
, REOP_NONDIGIT
);
1555 state
->result
= NewRENode(state
, REOP_SPACE
);
1558 state
->result
= NewRENode(state
, REOP_NONSPACE
);
1561 state
->result
= NewRENode(state
, REOP_ALNUM
);
1564 state
->result
= NewRENode(state
, REOP_NONALNUM
);
1566 /* IdentityEscape */
1568 state
->result
= NewRENode(state
, REOP_FLAT
);
1571 state
->result
->u
.flat
.chr
= c
;
1572 state
->result
->u
.flat
.length
= 1;
1573 state
->result
->kid
= (void *) (state
->cp
- 1);
1574 state
->progLength
+= 3;
1579 state
->result
= NewRENode(state
, REOP_CLASS
);
1582 termStart
= state
->cp
;
1583 state
->result
->u
.ucclass
.startIndex
= termStart
- state
->cpbegin
;
1585 if (state
->cp
== state
->cpend
) {
1586 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
,
1587 JSMSG_UNTERM_CLASS
, termStart
);
1591 if (*state
->cp
== '\\') {
1593 if (state
->cp
!= state
->cpend
)
1597 if (*state
->cp
== ']') {
1598 state
->result
->u
.ucclass
.kidlen
= state
->cp
- termStart
;
1603 for (i
= 0; i
< CLASS_CACHE_SIZE
; i
++) {
1604 if (!state
->classCache
[i
].start
) {
1605 state
->classCache
[i
].start
= termStart
;
1606 state
->classCache
[i
].length
= state
->result
->u
.ucclass
.kidlen
;
1607 state
->classCache
[i
].index
= state
->classCount
;
1610 if (state
->classCache
[i
].length
==
1611 state
->result
->u
.ucclass
.kidlen
) {
1612 for (n
= 0; ; n
++) {
1613 if (n
== state
->classCache
[i
].length
) {
1614 state
->result
->u
.ucclass
.index
1615 = state
->classCache
[i
].index
;
1618 if (state
->classCache
[i
].start
[n
] != termStart
[n
])
1623 state
->result
->u
.ucclass
.index
= state
->classCount
++;
1627 * Call CalculateBitmapSize now as we want any errors it finds
1628 * to be reported during the parse phase, not at execution.
1630 if (!CalculateBitmapSize(state
, state
->result
, termStart
, state
->cp
++))
1633 * Update classBitmapsMem with number of bytes to hold bmsize bits,
1634 * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1635 * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1637 n
= (state
->result
->u
.ucclass
.bmsize
>> 3) + 1;
1638 if (n
> CLASS_BITMAPS_MEM_LIMIT
- state
->classBitmapsMem
) {
1639 ReportRegExpError(state
, JSREPORT_ERROR
, JSMSG_REGEXP_TOO_COMPLEX
);
1642 state
->classBitmapsMem
+= n
;
1643 /* CLASS, <index> */
1645 += 1 + GetCompactIndexWidth(state
->result
->u
.ucclass
.index
);
1649 state
->result
= NewRENode(state
, REOP_DOT
);
1654 const WCHAR
*errp
= state
->cp
--;
1657 err
= ParseMinMaxQuantifier(state
, TRUE
);
1668 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
,
1669 JSMSG_BAD_QUANTIFIER
, state
->cp
- 1);
1673 state
->result
= NewRENode(state
, REOP_FLAT
);
1676 state
->result
->u
.flat
.chr
= c
;
1677 state
->result
->u
.flat
.length
= 1;
1678 state
->result
->kid
= (void *) (state
->cp
- 1);
1679 state
->progLength
+= 3;
1682 return ParseQuantifier(state
);
1686 * Top-down regular expression grammar, based closely on Perl4.
1688 * regexp: altern A regular expression is one or more
1689 * altern '|' regexp alternatives separated by vertical bar.
1691 #define INITIAL_STACK_SIZE 128
1694 ParseRegExp(CompilerState
*state
)
1698 REOpData
*operatorStack
;
1699 RENode
**operandStack
;
1702 BOOL result
= FALSE
;
1704 INT operatorSP
= 0, operatorStackSize
= INITIAL_STACK_SIZE
;
1705 INT operandSP
= 0, operandStackSize
= INITIAL_STACK_SIZE
;
1707 /* Watch out for empty regexp */
1708 if (state
->cp
== state
->cpend
) {
1709 state
->result
= NewRENode(state
, REOP_EMPTY
);
1710 return (state
->result
!= NULL
);
1713 operatorStack
= heap_alloc(sizeof(REOpData
) * operatorStackSize
);
1717 operandStack
= heap_alloc(sizeof(RENode
*) * operandStackSize
);
1722 parenIndex
= state
->parenCount
;
1723 if (state
->cp
== state
->cpend
) {
1725 * If we are at the end of the regexp and we're short one or more
1726 * operands, the regexp must have the form /x|/ or some such, with
1727 * left parentheses making us short more than one operand.
1729 if (operatorSP
>= operandSP
) {
1730 operand
= NewRENode(state
, REOP_EMPTY
);
1736 switch (*state
->cp
) {
1739 if (state
->cp
+ 1 < state
->cpend
&&
1740 *state
->cp
== '?' &&
1741 (state
->cp
[1] == '=' ||
1742 state
->cp
[1] == '!' ||
1743 state
->cp
[1] == ':')) {
1744 switch (state
->cp
[1]) {
1747 /* ASSERT, <next>, ... ASSERTTEST */
1748 state
->progLength
+= 4;
1751 op
= REOP_ASSERT_NOT
;
1752 /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
1753 state
->progLength
+= 4;
1756 op
= REOP_LPARENNON
;
1762 /* LPAREN, <index>, ... RPAREN, <index> */
1764 += 2 * (1 + GetCompactIndexWidth(parenIndex
));
1765 state
->parenCount
++;
1766 if (state
->parenCount
== 65535) {
1767 ReportRegExpError(state
, JSREPORT_ERROR
,
1768 JSMSG_TOO_MANY_PARENS
);
1776 * If there's no stacked open parenthesis, throw syntax error.
1778 for (i
= operatorSP
- 1; ; i
--) {
1780 ReportRegExpError(state
, JSREPORT_ERROR
,
1781 JSMSG_UNMATCHED_RIGHT_PAREN
);
1784 if (operatorStack
[i
].op
== REOP_ASSERT
||
1785 operatorStack
[i
].op
== REOP_ASSERT_NOT
||
1786 operatorStack
[i
].op
== REOP_LPARENNON
||
1787 operatorStack
[i
].op
== REOP_LPAREN
) {
1794 /* Expected an operand before these, so make an empty one */
1795 operand
= NewRENode(state
, REOP_EMPTY
);
1801 if (!ParseTerm(state
))
1803 operand
= state
->result
;
1805 if (operandSP
== operandStackSize
) {
1807 operandStackSize
+= operandStackSize
;
1808 tmp
= heap_realloc(operandStack
, sizeof(RENode
*) * operandStackSize
);
1813 operandStack
[operandSP
++] = operand
;
1818 /* At the end; process remaining operators. */
1820 if (state
->cp
== state
->cpend
) {
1821 while (operatorSP
) {
1823 if (!ProcessOp(state
, &operatorStack
[operatorSP
],
1824 operandStack
, operandSP
))
1828 assert(operandSP
== 1);
1829 state
->result
= operandStack
[0];
1834 switch (*state
->cp
) {
1836 /* Process any stacked 'concat' operators */
1838 while (operatorSP
&&
1839 operatorStack
[operatorSP
- 1].op
== REOP_CONCAT
) {
1841 if (!ProcessOp(state
, &operatorStack
[operatorSP
],
1842 operandStack
, operandSP
)) {
1852 * If there's no stacked open parenthesis, throw syntax error.
1854 for (i
= operatorSP
- 1; ; i
--) {
1856 ReportRegExpError(state
, JSREPORT_ERROR
,
1857 JSMSG_UNMATCHED_RIGHT_PAREN
);
1860 if (operatorStack
[i
].op
== REOP_ASSERT
||
1861 operatorStack
[i
].op
== REOP_ASSERT_NOT
||
1862 operatorStack
[i
].op
== REOP_LPARENNON
||
1863 operatorStack
[i
].op
== REOP_LPAREN
) {
1869 /* Process everything on the stack until the open parenthesis. */
1873 switch (operatorStack
[operatorSP
].op
) {
1875 case REOP_ASSERT_NOT
:
1877 operand
= NewRENode(state
, operatorStack
[operatorSP
].op
);
1880 operand
->u
.parenIndex
=
1881 operatorStack
[operatorSP
].parenIndex
;
1883 operand
->kid
= operandStack
[operandSP
- 1];
1884 operandStack
[operandSP
- 1] = operand
;
1885 if (state
->treeDepth
== TREE_DEPTH_MAX
) {
1886 ReportRegExpError(state
, JSREPORT_ERROR
,
1887 JSMSG_REGEXP_TOO_COMPLEX
);
1893 case REOP_LPARENNON
:
1894 state
->result
= operandStack
[operandSP
- 1];
1895 if (!ParseQuantifier(state
))
1897 operandStack
[operandSP
- 1] = state
->result
;
1898 goto restartOperator
;
1900 if (!ProcessOp(state
, &operatorStack
[operatorSP
],
1901 operandStack
, operandSP
))
1911 const WCHAR
*errp
= state
->cp
;
1913 if (ParseMinMaxQuantifier(state
, TRUE
) < 0) {
1915 * This didn't even scan correctly as a quantifier, so we should
1929 ReportRegExpErrorHelper(state
, JSREPORT_ERROR
, JSMSG_BAD_QUANTIFIER
,
1935 /* Anything else is the start of the next term. */
1938 if (operatorSP
== operatorStackSize
) {
1940 operatorStackSize
+= operatorStackSize
;
1941 tmp
= heap_realloc(operatorStack
, sizeof(REOpData
) * operatorStackSize
);
1944 operatorStack
= tmp
;
1946 operatorStack
[operatorSP
].op
= op
;
1947 operatorStack
[operatorSP
].errPos
= state
->cp
;
1948 operatorStack
[operatorSP
++].parenIndex
= parenIndex
;
1953 heap_free(operatorStack
);
1954 heap_free(operandStack
);
1959 * Save the current state of the match - the position in the input
1960 * text as well as the position in the bytecode. The state of any
1961 * parent expressions is also saved (preceding state).
1962 * Contents of parenCount parentheses from parenIndex are also saved.
1964 static REBackTrackData
*
1965 PushBackTrackState(REGlobalData
*gData
, REOp op
,
1966 jsbytecode
*target
, REMatchState
*x
, const WCHAR
*cp
,
1967 size_t parenIndex
, size_t parenCount
)
1970 REBackTrackData
*result
=
1971 (REBackTrackData
*) ((char *)gData
->backTrackSP
+ gData
->cursz
);
1973 size_t sz
= sizeof(REBackTrackData
) +
1974 gData
->stateStackTop
* sizeof(REProgState
) +
1975 parenCount
* sizeof(RECapture
);
1977 ptrdiff_t btsize
= gData
->backTrackStackSize
;
1978 ptrdiff_t btincr
= ((char *)result
+ sz
) -
1979 ((char *)gData
->backTrackStack
+ btsize
);
1981 TRACE("\tBT_Push: %lu,%lu\n", (unsigned long) parenIndex
, (unsigned long) parenCount
);
1983 JS_COUNT_OPERATION(gData
->cx
, JSOW_JUMP
* (1 + parenCount
));
1985 ptrdiff_t offset
= (char *)result
- (char *)gData
->backTrackStack
;
1987 JS_COUNT_OPERATION(gData
->cx
, JSOW_ALLOCATION
);
1988 btincr
= ((btincr
+btsize
-1)/btsize
)*btsize
;
1989 gData
->backTrackStack
= jsheap_grow(gData
->pool
, gData
->backTrackStack
, btsize
, btincr
);
1990 if (!gData
->backTrackStack
) {
1991 js_ReportOutOfScriptQuota(gData
->cx
);
1995 gData
->backTrackStackSize
= btsize
+ btincr
;
1996 result
= (REBackTrackData
*) ((char *)gData
->backTrackStack
+ offset
);
1998 gData
->backTrackSP
= result
;
1999 result
->sz
= gData
->cursz
;
2002 result
->backtrack_op
= op
;
2003 result
->backtrack_pc
= target
;
2005 result
->parenCount
= parenCount
;
2006 result
->parenIndex
= parenIndex
;
2008 result
->saveStateStackTop
= gData
->stateStackTop
;
2009 assert(gData
->stateStackTop
);
2010 memcpy(result
+ 1, gData
->stateStack
,
2011 sizeof(REProgState
) * result
->saveStateStackTop
);
2013 if (parenCount
!= 0) {
2014 memcpy((char *)(result
+ 1) +
2015 sizeof(REProgState
) * result
->saveStateStackTop
,
2016 &x
->parens
[parenIndex
],
2017 sizeof(RECapture
) * parenCount
);
2018 for (i
= 0; i
!= parenCount
; i
++)
2019 x
->parens
[parenIndex
+ i
].index
= -1;
2025 static inline REMatchState
*
2026 FlatNIMatcher(REGlobalData
*gData
, REMatchState
*x
, WCHAR
*matchChars
,
2030 assert(gData
->cpend
>= x
->cp
);
2031 if (length
> (size_t)(gData
->cpend
- x
->cp
))
2033 for (i
= 0; i
!= length
; i
++) {
2034 if (toupperW(matchChars
[i
]) != toupperW(x
->cp
[i
]))
2042 * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2043 * 2. If E is not a character then go to step 6.
2044 * 3. Let ch be E's character.
2045 * 4. Let A be a one-element RECharSet containing the character ch.
2046 * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2047 * 6. E must be an integer. Let n be that integer.
2048 * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2049 * 8. Return an internal Matcher closure that takes two arguments, a State x
2050 * and a Continuation c, and performs the following:
2051 * 1. Let cap be x's captures internal array.
2052 * 2. Let s be cap[n].
2053 * 3. If s is undefined, then call c(x) and return its result.
2054 * 4. Let e be x's endIndex.
2055 * 5. Let len be s's length.
2056 * 6. Let f be e+len.
2057 * 7. If f>InputLength, return failure.
2058 * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2059 * such that Canonicalize(s[i]) is not the same character as
2060 * Canonicalize(Input [e+i]), then return failure.
2061 * 9. Let y be the State (f, cap).
2062 * 10. Call c(y) and return its result.
2064 static REMatchState
*
2065 BackrefMatcher(REGlobalData
*gData
, REMatchState
*x
, size_t parenIndex
)
2068 const WCHAR
*parenContent
;
2069 RECapture
*cap
= &x
->parens
[parenIndex
];
2071 if (cap
->index
== -1)
2075 if (x
->cp
+ len
> gData
->cpend
)
2078 parenContent
= &gData
->cpbegin
[cap
->index
];
2079 if (gData
->regexp
->flags
& JSREG_FOLD
) {
2080 for (i
= 0; i
< len
; i
++) {
2081 if (toupperW(parenContent
[i
]) != toupperW(x
->cp
[i
]))
2085 for (i
= 0; i
< len
; i
++) {
2086 if (parenContent
[i
] != x
->cp
[i
])
2094 /* Add a single character to the RECharSet */
2096 AddCharacterToCharSet(RECharSet
*cs
, WCHAR c
)
2098 UINT byteIndex
= (UINT
)(c
>> 3);
2099 assert(c
<= cs
->length
);
2100 cs
->u
.bits
[byteIndex
] |= 1 << (c
& 0x7);
2104 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2106 AddCharacterRangeToCharSet(RECharSet
*cs
, UINT c1
, UINT c2
)
2110 UINT byteIndex1
= c1
>> 3;
2111 UINT byteIndex2
= c2
>> 3;
2113 assert(c2
<= cs
->length
&& c1
<= c2
);
2118 if (byteIndex1
== byteIndex2
) {
2119 cs
->u
.bits
[byteIndex1
] |= ((BYTE
)0xFF >> (7 - (c2
- c1
))) << c1
;
2121 cs
->u
.bits
[byteIndex1
] |= 0xFF << c1
;
2122 for (i
= byteIndex1
+ 1; i
< byteIndex2
; i
++)
2123 cs
->u
.bits
[i
] = 0xFF;
2124 cs
->u
.bits
[byteIndex2
] |= (BYTE
)0xFF >> (7 - c2
);
2128 /* Compile the source of the class into a RECharSet */
2130 ProcessCharSet(REGlobalData
*gData
, RECharSet
*charSet
)
2132 const WCHAR
*src
, *end
;
2133 BOOL inRange
= FALSE
;
2134 WCHAR rangeStart
= 0;
2139 assert(!charSet
->converted
);
2141 * Assert that startIndex and length points to chars inside [] inside
2144 assert(1 <= charSet
->u
.src
.startIndex
);
2145 assert(charSet
->u
.src
.startIndex
2146 < SysStringLen(gData
->regexp
->source
));
2147 assert(charSet
->u
.src
.length
<= SysStringLen(gData
->regexp
->source
)
2148 - 1 - charSet
->u
.src
.startIndex
);
2150 charSet
->converted
= TRUE
;
2151 src
= gData
->regexp
->source
+ charSet
->u
.src
.startIndex
;
2153 end
= src
+ charSet
->u
.src
.length
;
2155 assert(src
[-1] == '[' && end
[0] == ']');
2157 byteLength
= (charSet
->length
>> 3) + 1;
2158 charSet
->u
.bits
= heap_alloc(byteLength
);
2159 if (!charSet
->u
.bits
) {
2160 JS_ReportOutOfMemory(gData
->cx
);
2164 memset(charSet
->u
.bits
, 0, byteLength
);
2170 assert(charSet
->sense
== FALSE
);
2173 assert(charSet
->sense
== TRUE
);
2176 while (src
!= end
) {
2201 if (src
< end
&& JS_ISWORD(*src
)) {
2202 thisCh
= (WCHAR
)(*src
++ & 0x1F);
2215 for (i
= 0; (i
< nDigits
) && (src
< end
); i
++) {
2218 if (!isASCIIHexDigit(c
, &digit
)) {
2220 * Back off to accepting the original '\'
2227 n
= (n
<< 4) | digit
;
2240 * This is a non-ECMA extension - decimal escapes (in this
2241 * case, octal!) are supposed to be an error inside class
2242 * ranges, but supported here for backwards compatibility.
2246 if ('0' <= c
&& c
<= '7') {
2248 n
= 8 * n
+ JS7_UNDEC(c
);
2250 if ('0' <= c
&& c
<= '7') {
2252 i
= 8 * n
+ JS7_UNDEC(c
);
2263 AddCharacterRangeToCharSet(charSet
, '0', '9');
2264 continue; /* don't need range processing */
2266 AddCharacterRangeToCharSet(charSet
, 0, '0' - 1);
2267 AddCharacterRangeToCharSet(charSet
,
2269 (WCHAR
)charSet
->length
);
2272 for (i
= (INT
)charSet
->length
; i
>= 0; i
--)
2274 AddCharacterToCharSet(charSet
, (WCHAR
)i
);
2277 for (i
= (INT
)charSet
->length
; i
>= 0; i
--)
2279 AddCharacterToCharSet(charSet
, (WCHAR
)i
);
2282 for (i
= (INT
)charSet
->length
; i
>= 0; i
--)
2284 AddCharacterToCharSet(charSet
, (WCHAR
)i
);
2287 for (i
= (INT
)charSet
->length
; i
>= 0; i
--)
2289 AddCharacterToCharSet(charSet
, (WCHAR
)i
);
2304 if (gData
->regexp
->flags
& JSREG_FOLD
) {
2307 assert(rangeStart
<= thisCh
);
2308 for (i
= rangeStart
; i
<= thisCh
; i
++) {
2311 AddCharacterToCharSet(charSet
, i
);
2315 AddCharacterToCharSet(charSet
, uch
);
2317 AddCharacterToCharSet(charSet
, dch
);
2320 AddCharacterRangeToCharSet(charSet
, rangeStart
, thisCh
);
2324 if (gData
->regexp
->flags
& JSREG_FOLD
) {
2325 AddCharacterToCharSet(charSet
, toupperW(thisCh
));
2326 AddCharacterToCharSet(charSet
, tolowerW(thisCh
));
2328 AddCharacterToCharSet(charSet
, thisCh
);
2330 if (src
< end
- 1) {
2334 rangeStart
= thisCh
;
2343 ReallocStateStack(REGlobalData
*gData
)
2345 size_t limit
= gData
->stateStackLimit
;
2346 size_t sz
= sizeof(REProgState
) * limit
;
2348 gData
->stateStack
= jsheap_grow(gData
->pool
, gData
->stateStack
, sz
, sz
);
2349 if (!gData
->stateStack
) {
2350 js_ReportOutOfScriptQuota(gData
->cx
);
2354 gData
->stateStackLimit
= limit
+ limit
;
2358 #define PUSH_STATE_STACK(data) \
2360 ++(data)->stateStackTop; \
2361 if ((data)->stateStackTop == (data)->stateStackLimit && \
2362 !ReallocStateStack((data))) { \
2368 * Apply the current op against the given input to see if it's going to match
2369 * or fail. Return false if we don't get a match, true if we do. If updatecp is
2370 * true, then update the current state's cp. Always update startpc to the next
2373 static inline REMatchState
*
2374 SimpleMatch(REGlobalData
*gData
, REMatchState
*x
, REOp op
,
2375 jsbytecode
**startpc
, BOOL updatecp
)
2377 REMatchState
*result
= NULL
;
2380 size_t offset
, length
, index
;
2381 jsbytecode
*pc
= *startpc
; /* pc has already been incremented past op */
2383 const WCHAR
*startcp
= x
->cp
;
2387 const char *opname
= reop_names
[op
];
2388 TRACE("\n%06d: %*s%s\n", pc
- gData
->regexp
->program
,
2389 (int)gData
->stateStackTop
* 2, "", opname
);
2396 if (x
->cp
!= gData
->cpbegin
) {
2397 if (/*!gData->cx->regExpStatics.multiline && FIXME !!! */
2398 !(gData
->regexp
->flags
& JSREG_MULTILINE
)) {
2401 if (!RE_IS_LINE_TERM(x
->cp
[-1]))
2407 if (x
->cp
!= gData
->cpend
) {
2408 if (/*!gData->cx->regExpStatics.multiline &&*/
2409 !(gData
->regexp
->flags
& JSREG_MULTILINE
)) {
2412 if (!RE_IS_LINE_TERM(*x
->cp
))
2418 if ((x
->cp
== gData
->cpbegin
|| !JS_ISWORD(x
->cp
[-1])) ^
2419 !(x
->cp
!= gData
->cpend
&& JS_ISWORD(*x
->cp
))) {
2424 if ((x
->cp
== gData
->cpbegin
|| !JS_ISWORD(x
->cp
[-1])) ^
2425 (x
->cp
!= gData
->cpend
&& JS_ISWORD(*x
->cp
))) {
2430 if (x
->cp
!= gData
->cpend
&& !RE_IS_LINE_TERM(*x
->cp
)) {
2436 if (x
->cp
!= gData
->cpend
&& JS7_ISDEC(*x
->cp
)) {
2442 if (x
->cp
!= gData
->cpend
&& !JS7_ISDEC(*x
->cp
)) {
2448 if (x
->cp
!= gData
->cpend
&& JS_ISWORD(*x
->cp
)) {
2454 if (x
->cp
!= gData
->cpend
&& !JS_ISWORD(*x
->cp
)) {
2460 if (x
->cp
!= gData
->cpend
&& isspaceW(*x
->cp
)) {
2466 if (x
->cp
!= gData
->cpend
&& !isspaceW(*x
->cp
)) {
2472 pc
= ReadCompactIndex(pc
, &parenIndex
);
2473 assert(parenIndex
< gData
->regexp
->parenCount
);
2474 result
= BackrefMatcher(gData
, x
, parenIndex
);
2477 pc
= ReadCompactIndex(pc
, &offset
);
2478 assert(offset
< SysStringLen(gData
->regexp
->source
));
2479 pc
= ReadCompactIndex(pc
, &length
);
2480 assert(1 <= length
);
2481 assert(length
<= SysStringLen(gData
->regexp
->source
) - offset
);
2482 if (length
<= (size_t)(gData
->cpend
- x
->cp
)) {
2483 source
= gData
->regexp
->source
+ offset
;
2484 TRACE("%s\n", debugstr_wn(source
, length
));
2485 for (index
= 0; index
!= length
; index
++) {
2486 if (source
[index
] != x
->cp
[index
])
2495 TRACE(" '%c' == '%c'\n", (char)matchCh
, (char)*x
->cp
);
2496 if (x
->cp
!= gData
->cpend
&& *x
->cp
== matchCh
) {
2502 pc
= ReadCompactIndex(pc
, &offset
);
2503 assert(offset
< SysStringLen(gData
->regexp
->source
));
2504 pc
= ReadCompactIndex(pc
, &length
);
2505 assert(1 <= length
);
2506 assert(length
<= SysStringLen(gData
->regexp
->source
) - offset
);
2507 source
= gData
->regexp
->source
;
2508 result
= FlatNIMatcher(gData
, x
, source
+ offset
, length
);
2512 if (x
->cp
!= gData
->cpend
&& toupperW(*x
->cp
) == toupperW(matchCh
)) {
2518 matchCh
= GET_ARG(pc
);
2519 TRACE(" '%c' == '%c'\n", (char)matchCh
, (char)*x
->cp
);
2521 if (x
->cp
!= gData
->cpend
&& *x
->cp
== matchCh
) {
2527 matchCh
= GET_ARG(pc
);
2529 if (x
->cp
!= gData
->cpend
&& toupperW(*x
->cp
) == toupperW(matchCh
)) {
2535 pc
= ReadCompactIndex(pc
, &index
);
2536 assert(index
< gData
->regexp
->classCount
);
2537 if (x
->cp
!= gData
->cpend
) {
2538 charSet
= &gData
->regexp
->classList
[index
];
2539 assert(charSet
->converted
);
2542 if (charSet
->length
!= 0 &&
2543 ch
<= charSet
->length
&&
2544 (charSet
->u
.bits
[index
] & (1 << (ch
& 0x7)))) {
2551 pc
= ReadCompactIndex(pc
, &index
);
2552 assert(index
< gData
->regexp
->classCount
);
2553 if (x
->cp
!= gData
->cpend
) {
2554 charSet
= &gData
->regexp
->classList
[index
];
2555 assert(charSet
->converted
);
2558 if (charSet
->length
== 0 ||
2559 ch
> charSet
->length
||
2560 !(charSet
->u
.bits
[index
] & (1 << (ch
& 0x7)))) {
2581 static inline REMatchState
*
2582 ExecuteREBytecode(REGlobalData
*gData
, REMatchState
*x
)
2584 REMatchState
*result
= NULL
;
2585 REBackTrackData
*backTrackData
;
2586 jsbytecode
*nextpc
, *testpc
;
2589 REProgState
*curState
;
2590 const WCHAR
*startcp
;
2591 size_t parenIndex
, k
;
2592 size_t parenSoFar
= 0;
2594 WCHAR matchCh1
, matchCh2
;
2598 jsbytecode
*pc
= gData
->regexp
->program
;
2599 REOp op
= (REOp
) *pc
++;
2602 * If the first node is a simple match, step the index into the string
2603 * until that match is made, or fail if it can't be found at all.
2605 if (REOP_IS_SIMPLE(op
) && !(gData
->regexp
->flags
& JSREG_STICKY
)) {
2607 while (x
->cp
<= gData
->cpend
) {
2608 nextpc
= pc
; /* reset back to start each time */
2609 result
= SimpleMatch(gData
, x
, op
, &nextpc
, TRUE
);
2613 pc
= nextpc
; /* accept skip to next opcode */
2615 assert(op
< REOP_LIMIT
);
2626 const char *opname
= reop_names
[op
];
2627 TRACE("\n%06d: %*s%s\n", pc
- gData
->regexp
->program
,
2628 (int)gData
->stateStackTop
* 2, "", opname
);
2630 if (REOP_IS_SIMPLE(op
)) {
2631 result
= SimpleMatch(gData
, x
, op
, &pc
, TRUE
);
2633 curState
= &gData
->stateStack
[gData
->stateStackTop
];
2637 case REOP_ALTPREREQ2
:
2638 nextpc
= pc
+ GET_OFFSET(pc
); /* start of next op */
2640 matchCh2
= GET_ARG(pc
);
2645 if (x
->cp
!= gData
->cpend
) {
2646 if (*x
->cp
== matchCh2
)
2649 charSet
= &gData
->regexp
->classList
[k
];
2650 if (!charSet
->converted
&& !ProcessCharSet(gData
, charSet
))
2654 if ((charSet
->length
== 0 ||
2655 matchCh1
> charSet
->length
||
2656 !(charSet
->u
.bits
[k
] & (1 << (matchCh1
& 0x7)))) ^
2664 case REOP_ALTPREREQ
:
2665 nextpc
= pc
+ GET_OFFSET(pc
); /* start of next op */
2667 matchCh1
= GET_ARG(pc
);
2669 matchCh2
= GET_ARG(pc
);
2671 if (x
->cp
== gData
->cpend
||
2672 (*x
->cp
!= matchCh1
&& *x
->cp
!= matchCh2
)) {
2676 /* else false thru... */
2680 nextpc
= pc
+ GET_OFFSET(pc
); /* start of next alternate */
2681 pc
+= ARG_LEN
; /* start of this alternate */
2682 curState
->parenSoFar
= parenSoFar
;
2683 PUSH_STATE_STACK(gData
);
2686 if (REOP_IS_SIMPLE(op
)) {
2687 if (!SimpleMatch(gData
, x
, op
, &pc
, TRUE
)) {
2688 op
= (REOp
) *nextpc
++;
2695 nextop
= (REOp
) *nextpc
++;
2696 if (!PushBackTrackState(gData
, nextop
, nextpc
, x
, startcp
, 0, 0))
2701 * Occurs at (successful) end of REOP_ALT,
2705 * If we have not gotten a result here, it is because of an
2706 * empty match. Do the same thing REOP_EMPTY would do.
2711 --gData
->stateStackTop
;
2712 pc
+= GET_OFFSET(pc
);
2717 * Occurs at last (successful) end of REOP_ALT,
2721 * If we have not gotten a result here, it is because of an
2722 * empty match. Do the same thing REOP_EMPTY would do.
2727 --gData
->stateStackTop
;
2732 pc
= ReadCompactIndex(pc
, &parenIndex
);
2733 TRACE("[ %lu ]\n", (unsigned long) parenIndex
);
2734 assert(parenIndex
< gData
->regexp
->parenCount
);
2735 if (parenIndex
+ 1 > parenSoFar
)
2736 parenSoFar
= parenIndex
+ 1;
2737 x
->parens
[parenIndex
].index
= x
->cp
- gData
->cpbegin
;
2738 x
->parens
[parenIndex
].length
= 0;
2746 pc
= ReadCompactIndex(pc
, &parenIndex
);
2747 assert(parenIndex
< gData
->regexp
->parenCount
);
2748 cap
= &x
->parens
[parenIndex
];
2749 delta
= x
->cp
- (gData
->cpbegin
+ cap
->index
);
2750 cap
->length
= (delta
< 0) ? 0 : (size_t) delta
;
2758 nextpc
= pc
+ GET_OFFSET(pc
); /* start of term after ASSERT */
2759 pc
+= ARG_LEN
; /* start of ASSERT child */
2762 if (REOP_IS_SIMPLE(op
) &&
2763 !SimpleMatch(gData
, x
, op
, &testpc
, FALSE
)) {
2767 curState
->u
.assertion
.top
=
2768 (char *)gData
->backTrackSP
- (char *)gData
->backTrackStack
;
2769 curState
->u
.assertion
.sz
= gData
->cursz
;
2770 curState
->index
= x
->cp
- gData
->cpbegin
;
2771 curState
->parenSoFar
= parenSoFar
;
2772 PUSH_STATE_STACK(gData
);
2773 if (!PushBackTrackState(gData
, REOP_ASSERTTEST
,
2774 nextpc
, x
, x
->cp
, 0, 0)) {
2779 case REOP_ASSERT_NOT
:
2780 nextpc
= pc
+ GET_OFFSET(pc
);
2784 if (REOP_IS_SIMPLE(op
) /* Note - fail to fail! */ &&
2785 SimpleMatch(gData
, x
, op
, &testpc
, FALSE
) &&
2786 *testpc
== REOP_ASSERTNOTTEST
) {
2790 curState
->u
.assertion
.top
2791 = (char *)gData
->backTrackSP
-
2792 (char *)gData
->backTrackStack
;
2793 curState
->u
.assertion
.sz
= gData
->cursz
;
2794 curState
->index
= x
->cp
- gData
->cpbegin
;
2795 curState
->parenSoFar
= parenSoFar
;
2796 PUSH_STATE_STACK(gData
);
2797 if (!PushBackTrackState(gData
, REOP_ASSERTNOTTEST
,
2798 nextpc
, x
, x
->cp
, 0, 0)) {
2803 case REOP_ASSERTTEST
:
2804 --gData
->stateStackTop
;
2806 x
->cp
= gData
->cpbegin
+ curState
->index
;
2807 gData
->backTrackSP
=
2808 (REBackTrackData
*) ((char *)gData
->backTrackStack
+
2809 curState
->u
.assertion
.top
);
2810 gData
->cursz
= curState
->u
.assertion
.sz
;
2815 case REOP_ASSERTNOTTEST
:
2816 --gData
->stateStackTop
;
2818 x
->cp
= gData
->cpbegin
+ curState
->index
;
2819 gData
->backTrackSP
=
2820 (REBackTrackData
*) ((char *)gData
->backTrackStack
+
2821 curState
->u
.assertion
.top
);
2822 gData
->cursz
= curState
->u
.assertion
.sz
;
2823 result
= (!result
) ? x
: NULL
;
2826 curState
->u
.quantifier
.min
= 0;
2827 curState
->u
.quantifier
.max
= (UINT
)-1;
2830 curState
->u
.quantifier
.min
= 1;
2831 curState
->u
.quantifier
.max
= (UINT
)-1;
2834 curState
->u
.quantifier
.min
= 0;
2835 curState
->u
.quantifier
.max
= 1;
2838 pc
= ReadCompactIndex(pc
, &k
);
2839 curState
->u
.quantifier
.min
= k
;
2840 pc
= ReadCompactIndex(pc
, &k
);
2841 /* max is k - 1 to use one byte for (UINT)-1 sentinel. */
2842 curState
->u
.quantifier
.max
= k
- 1;
2843 assert(curState
->u
.quantifier
.min
<= curState
->u
.quantifier
.max
);
2845 if (curState
->u
.quantifier
.max
== 0) {
2846 pc
= pc
+ GET_OFFSET(pc
);
2851 /* Step over <next> */
2852 nextpc
= pc
+ ARG_LEN
;
2853 op
= (REOp
) *nextpc
++;
2855 if (REOP_IS_SIMPLE(op
)) {
2856 if (!SimpleMatch(gData
, x
, op
, &nextpc
, TRUE
)) {
2857 if (curState
->u
.quantifier
.min
== 0)
2861 pc
= pc
+ GET_OFFSET(pc
);
2864 op
= (REOp
) *nextpc
++;
2867 curState
->index
= startcp
- gData
->cpbegin
;
2868 curState
->continue_op
= REOP_REPEAT
;
2869 curState
->continue_pc
= pc
;
2870 curState
->parenSoFar
= parenSoFar
;
2871 PUSH_STATE_STACK(gData
);
2872 if (curState
->u
.quantifier
.min
== 0 &&
2873 !PushBackTrackState(gData
, REOP_REPEAT
, pc
, x
, startcp
,
2880 case REOP_ENDCHILD
: /* marks the end of a quantifier child */
2881 pc
= curState
[-1].continue_pc
;
2882 op
= (REOp
) curState
[-1].continue_op
;
2891 --gData
->stateStackTop
;
2893 /* Failed, see if we have enough children. */
2894 if (curState
->u
.quantifier
.min
== 0)
2898 if (curState
->u
.quantifier
.min
== 0 &&
2899 x
->cp
== gData
->cpbegin
+ curState
->index
) {
2900 /* matched an empty string, that'll get us nowhere */
2904 if (curState
->u
.quantifier
.min
!= 0)
2905 curState
->u
.quantifier
.min
--;
2906 if (curState
->u
.quantifier
.max
!= (UINT
) -1)
2907 curState
->u
.quantifier
.max
--;
2908 if (curState
->u
.quantifier
.max
== 0)
2910 nextpc
= pc
+ ARG_LEN
;
2911 nextop
= (REOp
) *nextpc
;
2913 if (REOP_IS_SIMPLE(nextop
)) {
2915 if (!SimpleMatch(gData
, x
, nextop
, &nextpc
, TRUE
)) {
2916 if (curState
->u
.quantifier
.min
== 0)
2923 curState
->index
= startcp
- gData
->cpbegin
;
2924 PUSH_STATE_STACK(gData
);
2925 if (curState
->u
.quantifier
.min
== 0 &&
2926 !PushBackTrackState(gData
, REOP_REPEAT
,
2928 curState
->parenSoFar
,
2930 curState
->parenSoFar
)) {
2933 } while (*nextpc
== REOP_ENDCHILD
);
2936 parenSoFar
= curState
->parenSoFar
;
2941 pc
+= GET_OFFSET(pc
);
2944 case REOP_MINIMALSTAR
:
2945 curState
->u
.quantifier
.min
= 0;
2946 curState
->u
.quantifier
.max
= (UINT
)-1;
2947 goto minimalquantcommon
;
2948 case REOP_MINIMALPLUS
:
2949 curState
->u
.quantifier
.min
= 1;
2950 curState
->u
.quantifier
.max
= (UINT
)-1;
2951 goto minimalquantcommon
;
2952 case REOP_MINIMALOPT
:
2953 curState
->u
.quantifier
.min
= 0;
2954 curState
->u
.quantifier
.max
= 1;
2955 goto minimalquantcommon
;
2956 case REOP_MINIMALQUANT
:
2957 pc
= ReadCompactIndex(pc
, &k
);
2958 curState
->u
.quantifier
.min
= k
;
2959 pc
= ReadCompactIndex(pc
, &k
);
2960 /* See REOP_QUANT comments about k - 1. */
2961 curState
->u
.quantifier
.max
= k
- 1;
2962 assert(curState
->u
.quantifier
.min
2963 <= curState
->u
.quantifier
.max
);
2965 curState
->index
= x
->cp
- gData
->cpbegin
;
2966 curState
->parenSoFar
= parenSoFar
;
2967 PUSH_STATE_STACK(gData
);
2968 if (curState
->u
.quantifier
.min
!= 0) {
2969 curState
->continue_op
= REOP_MINIMALREPEAT
;
2970 curState
->continue_pc
= pc
;
2971 /* step over <next> */
2975 if (!PushBackTrackState(gData
, REOP_MINIMALREPEAT
,
2976 pc
, x
, x
->cp
, 0, 0)) {
2979 --gData
->stateStackTop
;
2980 pc
= pc
+ GET_OFFSET(pc
);
2985 case REOP_MINIMALREPEAT
:
2986 --gData
->stateStackTop
;
2989 TRACE("{%d,%d}\n", curState
->u
.quantifier
.min
, curState
->u
.quantifier
.max
);
2990 #define PREPARE_REPEAT() \
2992 curState->index = x->cp - gData->cpbegin; \
2993 curState->continue_op = REOP_MINIMALREPEAT; \
2994 curState->continue_pc = pc; \
2996 for (k = curState->parenSoFar; k < parenSoFar; k++) \
2997 x->parens[k].index = -1; \
2998 PUSH_STATE_STACK(gData); \
2999 op = (REOp) *pc++; \
3000 assert(op < REOP_LIMIT); \
3006 * Non-greedy failure - try to consume another child.
3008 if (curState
->u
.quantifier
.max
== (UINT
) -1 ||
3009 curState
->u
.quantifier
.max
> 0) {
3013 /* Don't need to adjust pc since we're going to pop. */
3016 if (curState
->u
.quantifier
.min
== 0 &&
3017 x
->cp
== gData
->cpbegin
+ curState
->index
) {
3018 /* Matched an empty string, that'll get us nowhere. */
3022 if (curState
->u
.quantifier
.min
!= 0)
3023 curState
->u
.quantifier
.min
--;
3024 if (curState
->u
.quantifier
.max
!= (UINT
) -1)
3025 curState
->u
.quantifier
.max
--;
3026 if (curState
->u
.quantifier
.min
!= 0) {
3030 curState
->index
= x
->cp
- gData
->cpbegin
;
3031 curState
->parenSoFar
= parenSoFar
;
3032 PUSH_STATE_STACK(gData
);
3033 if (!PushBackTrackState(gData
, REOP_MINIMALREPEAT
,
3035 curState
->parenSoFar
,
3036 parenSoFar
- curState
->parenSoFar
)) {
3039 --gData
->stateStackTop
;
3040 pc
= pc
+ GET_OFFSET(pc
);
3042 assert(op
< REOP_LIMIT
);
3052 * If the match failed and there's a backtrack option, take it.
3053 * Otherwise this is a complete and utter failure.
3056 if (gData
->cursz
== 0)
3059 /* Potentially detect explosive regex here. */
3060 gData
->backTrackCount
++;
3061 if (gData
->backTrackLimit
&&
3062 gData
->backTrackCount
>= gData
->backTrackLimit
) {
3063 JS_ReportErrorNumber(gData
->cx
, js_GetErrorMessage
, NULL
,
3064 JSMSG_REGEXP_TOO_COMPLEX
);
3069 backTrackData
= gData
->backTrackSP
;
3070 gData
->cursz
= backTrackData
->sz
;
3071 gData
->backTrackSP
=
3072 (REBackTrackData
*) ((char *)backTrackData
- backTrackData
->sz
);
3073 x
->cp
= backTrackData
->cp
;
3074 pc
= backTrackData
->backtrack_pc
;
3075 op
= (REOp
) backTrackData
->backtrack_op
;
3076 assert(op
< REOP_LIMIT
);
3077 gData
->stateStackTop
= backTrackData
->saveStateStackTop
;
3078 assert(gData
->stateStackTop
);
3080 memcpy(gData
->stateStack
, backTrackData
+ 1,
3081 sizeof(REProgState
) * backTrackData
->saveStateStackTop
);
3082 curState
= &gData
->stateStack
[gData
->stateStackTop
- 1];
3084 if (backTrackData
->parenCount
) {
3085 memcpy(&x
->parens
[backTrackData
->parenIndex
],
3086 (char *)(backTrackData
+ 1) +
3087 sizeof(REProgState
) * backTrackData
->saveStateStackTop
,
3088 sizeof(RECapture
) * backTrackData
->parenCount
);
3089 parenSoFar
= backTrackData
->parenIndex
+ backTrackData
->parenCount
;
3091 for (k
= curState
->parenSoFar
; k
< parenSoFar
; k
++)
3092 x
->parens
[k
].index
= -1;
3093 parenSoFar
= curState
->parenSoFar
;
3096 TRACE("\tBT_Pop: %ld,%ld\n",
3097 (unsigned long) backTrackData
->parenIndex
,
3098 (unsigned long) backTrackData
->parenCount
);
3104 * Continue with the expression.
3107 assert(op
< REOP_LIMIT
);
3119 static REMatchState
*MatchRegExp(REGlobalData
*gData
, REMatchState
*x
)
3121 REMatchState
*result
;
3122 const WCHAR
*cp
= x
->cp
;
3127 * Have to include the position beyond the last character
3128 * in order to detect end-of-input/line condition.
3130 for (cp2
= cp
; cp2
<= gData
->cpend
; cp2
++) {
3131 gData
->skipped
= cp2
- cp
;
3133 for (j
= 0; j
< gData
->regexp
->parenCount
; j
++)
3134 x
->parens
[j
].index
= -1;
3135 result
= ExecuteREBytecode(gData
, x
);
3136 if (!gData
->ok
|| result
|| (gData
->regexp
->flags
& JSREG_STICKY
))
3138 gData
->backTrackSP
= gData
->backTrackStack
;
3140 gData
->stateStackTop
= 0;
3141 cp2
= cp
+ gData
->skipped
;
3146 #define MIN_BACKTRACK_LIMIT 400000
3148 static REMatchState
*InitMatch(script_ctx_t
*cx
, REGlobalData
*gData
, JSRegExp
*re
, size_t length
)
3150 REMatchState
*result
;
3153 gData
->backTrackStackSize
= INITIAL_BACKTRACK
;
3154 gData
->backTrackStack
= jsheap_alloc(gData
->pool
, INITIAL_BACKTRACK
);
3155 if (!gData
->backTrackStack
)
3158 gData
->backTrackSP
= gData
->backTrackStack
;
3160 gData
->backTrackCount
= 0;
3161 gData
->backTrackLimit
= 0;
3163 gData
->stateStackLimit
= INITIAL_STATESTACK
;
3164 gData
->stateStack
= jsheap_alloc(gData
->pool
, sizeof(REProgState
) * INITIAL_STATESTACK
);
3165 if (!gData
->stateStack
)
3168 gData
->stateStackTop
= 0;
3173 result
= jsheap_alloc(gData
->pool
, offsetof(REMatchState
, parens
) + re
->parenCount
* sizeof(RECapture
));
3177 for (i
= 0; i
< re
->classCount
; i
++) {
3178 if (!re
->classList
[i
].converted
&&
3179 !ProcessCharSet(gData
, &re
->classList
[i
])) {
3187 js_ReportOutOfScriptQuota(cx
);
3193 js_DestroyRegExp(JSRegExp
*re
)
3195 if (re
->classList
) {
3197 for (i
= 0; i
< re
->classCount
; i
++) {
3198 if (re
->classList
[i
].converted
)
3199 heap_free(re
->classList
[i
].u
.bits
);
3200 re
->classList
[i
].u
.bits
= NULL
;
3202 heap_free(re
->classList
);
3208 js_NewRegExp(script_ctx_t
*cx
, BSTR str
, UINT flags
, BOOL flat
)
3212 CompilerState state
;
3219 mark
= jsheap_mark(&cx
->tmp_heap
);
3220 len
= SysStringLen(str
);
3226 state
.cpbegin
= state
.cp
;
3227 state
.cpend
= state
.cp
+ len
;
3228 state
.flags
= flags
;
3229 state
.parenCount
= 0;
3230 state
.classCount
= 0;
3231 state
.progLength
= 0;
3232 state
.treeDepth
= 0;
3233 state
.classBitmapsMem
= 0;
3234 for (i
= 0; i
< CLASS_CACHE_SIZE
; i
++)
3235 state
.classCache
[i
].start
= NULL
;
3237 if (len
!= 0 && flat
) {
3238 state
.result
= NewRENode(&state
, REOP_FLAT
);
3241 state
.result
->u
.flat
.chr
= *state
.cpbegin
;
3242 state
.result
->u
.flat
.length
= len
;
3243 state
.result
->kid
= (void *) state
.cpbegin
;
3244 /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
3245 state
.progLength
+= 1 + GetCompactIndexWidth(0)
3246 + GetCompactIndexWidth(len
);
3248 if (!ParseRegExp(&state
))
3251 resize
= offsetof(JSRegExp
, program
) + state
.progLength
+ 1;
3252 re
= heap_alloc(resize
);
3256 assert(state
.classBitmapsMem
<= CLASS_BITMAPS_MEM_LIMIT
);
3257 re
->classCount
= state
.classCount
;
3258 if (re
->classCount
) {
3259 re
->classList
= heap_alloc(re
->classCount
* sizeof(RECharSet
));
3260 if (!re
->classList
) {
3261 js_DestroyRegExp(re
);
3265 for (i
= 0; i
< re
->classCount
; i
++)
3266 re
->classList
[i
].converted
= FALSE
;
3268 re
->classList
= NULL
;
3270 endPC
= EmitREBytecode(&state
, re
, state
.treeDepth
, re
->program
, state
.result
);
3272 js_DestroyRegExp(re
);
3276 *endPC
++ = REOP_END
;
3278 * Check whether size was overestimated and shrink using realloc.
3279 * This is safe since no pointers to newly parsed regexp or its parts
3280 * besides re exist here.
3282 if ((size_t)(endPC
- re
->program
) != state
.progLength
+ 1) {
3284 assert((size_t)(endPC
- re
->program
) < state
.progLength
+ 1);
3285 resize
= offsetof(JSRegExp
, program
) + (endPC
- re
->program
);
3286 tmp
= heap_realloc(re
, resize
);
3292 re
->parenCount
= state
.parenCount
;
3300 static HRESULT
do_regexp_match_next(RegExpInstance
*regexp
, const WCHAR
*str
, DWORD len
,
3301 const WCHAR
**cp
, match_result_t
**parens
, DWORD
*parens_size
, DWORD
*parens_cnt
, match_result_t
*ret
)
3303 REMatchState
*x
, *result
;
3307 gData
.cpbegin
= *cp
;
3308 gData
.cpend
= str
+ len
;
3309 gData
.start
= *cp
-str
;
3311 gData
.pool
= ®exp
->dispex
.ctx
->tmp_heap
;
3313 x
= InitMatch(NULL
, &gData
, regexp
->jsregexp
, gData
.cpend
- gData
.cpbegin
);
3315 WARN("InitMatch failed\n");
3320 result
= MatchRegExp(&gData
, x
);
3322 WARN("MatchRegExp failed\n");
3332 if(regexp
->jsregexp
->parenCount
> *parens_size
) {
3333 match_result_t
*new_parens
;
3336 new_parens
= heap_realloc(*parens
, sizeof(match_result_t
)*regexp
->jsregexp
->parenCount
);
3338 new_parens
= heap_alloc(sizeof(match_result_t
)*regexp
->jsregexp
->parenCount
);
3340 return E_OUTOFMEMORY
;
3342 *parens
= new_parens
;
3345 *parens_cnt
= regexp
->jsregexp
->parenCount
;
3347 for(i
=0; i
< regexp
->jsregexp
->parenCount
; i
++) {
3348 (*parens
)[i
].str
= *cp
+ result
->parens
[i
].index
;
3349 (*parens
)[i
].len
= result
->parens
[i
].length
;
3353 matchlen
= (result
->cp
-*cp
) - gData
.skipped
;
3355 ret
->str
= result
->cp
-matchlen
;
3356 ret
->len
= matchlen
;
3361 HRESULT
regexp_match_next(DispatchEx
*dispex
, BOOL gcheck
, const WCHAR
*str
, DWORD len
,
3362 const WCHAR
**cp
, match_result_t
**parens
, DWORD
*parens_size
, DWORD
*parens_cnt
, match_result_t
*ret
)
3364 RegExpInstance
*regexp
= (RegExpInstance
*)dispex
;
3368 if(gcheck
&& !(regexp
->jsregexp
->flags
& JSREG_GLOB
))
3371 mark
= jsheap_mark(®exp
->dispex
.ctx
->tmp_heap
);
3373 hres
= do_regexp_match_next(regexp
, str
, len
, cp
, parens
, parens_size
, parens_cnt
, ret
);
3379 HRESULT
regexp_match(DispatchEx
*dispex
, const WCHAR
*str
, DWORD len
, BOOL gflag
, match_result_t
**match_result
,
3382 RegExpInstance
*This
= (RegExpInstance
*)dispex
;
3383 match_result_t
*ret
= NULL
, cres
;
3384 const WCHAR
*cp
= str
;
3385 DWORD i
=0, ret_size
= 0;
3389 mark
= jsheap_mark(&This
->dispex
.ctx
->tmp_heap
);
3392 hres
= do_regexp_match_next(This
, str
, len
, &cp
, NULL
, NULL
, NULL
, &cres
);
3393 if(hres
== S_FALSE
) {
3403 ret
= heap_realloc(ret
, (ret_size
<<= 1) * sizeof(match_result_t
));
3405 ret
= heap_alloc((ret_size
=4) * sizeof(match_result_t
));
3407 hres
= E_OUTOFMEMORY
;
3414 if(!gflag
&& !(This
->jsregexp
->flags
& JSREG_GLOB
)) {
3426 *match_result
= ret
;
3431 static HRESULT
RegExp_source(DispatchEx
*dispex
, LCID lcid
, WORD flags
, DISPPARAMS
*dp
,
3432 VARIANT
*retv
, jsexcept_t
*ei
, IServiceProvider
*sp
)
3438 static HRESULT
RegExp_global(DispatchEx
*dispex
, LCID lcid
, WORD flags
, DISPPARAMS
*dp
,
3439 VARIANT
*retv
, jsexcept_t
*ei
, IServiceProvider
*sp
)
3445 static HRESULT
RegExp_ignoreCase(DispatchEx
*dispex
, LCID lcid
, WORD flags
, DISPPARAMS
*dp
,
3446 VARIANT
*retv
, jsexcept_t
*ei
, IServiceProvider
*sp
)
3452 static HRESULT
RegExp_multiline(DispatchEx
*dispex
, LCID lcid
, WORD flags
, DISPPARAMS
*dp
,
3453 VARIANT
*retv
, jsexcept_t
*ei
, IServiceProvider
*sp
)
3459 static HRESULT
RegExp_lastIndex(DispatchEx
*dispex
, LCID lcid
, WORD flags
, DISPPARAMS
*dp
,
3460 VARIANT
*retv
, jsexcept_t
*ei
, IServiceProvider
*sp
)
3466 static HRESULT
RegExp_toString(DispatchEx
*dispex
, LCID lcid
, WORD flags
, DISPPARAMS
*dp
,
3467 VARIANT
*retv
, jsexcept_t
*ei
, IServiceProvider
*sp
)
3473 static HRESULT
RegExp_toLocaleString(DispatchEx
*dispex
, LCID lcid
, WORD flags
, DISPPARAMS
*dp
,
3474 VARIANT
*retv
, jsexcept_t
*ei
, IServiceProvider
*sp
)
3480 static HRESULT
RegExp_hasOwnProperty(DispatchEx
*dispex
, LCID lcid
, WORD flags
, DISPPARAMS
*dp
,
3481 VARIANT
*retv
, jsexcept_t
*ei
, IServiceProvider
*sp
)
3487 static HRESULT
RegExp_propertyIsEnumerable(DispatchEx
*dispex
, LCID lcid
, WORD flags
, DISPPARAMS
*dp
,
3488 VARIANT
*retv
, jsexcept_t
*ei
, IServiceProvider
*sp
)
3494 static HRESULT
RegExp_isPrototypeOf(DispatchEx
*dispex
, LCID lcid
, WORD flags
, DISPPARAMS
*dp
,
3495 VARIANT
*retv
, jsexcept_t
*ei
, IServiceProvider
*sp
)
3501 static HRESULT
RegExp_exec(DispatchEx
*dispex
, LCID lcid
, WORD flags
, DISPPARAMS
*dp
,
3502 VARIANT
*retv
, jsexcept_t
*ei
, IServiceProvider
*sp
)
3508 static HRESULT
RegExp_value(DispatchEx
*dispex
, LCID lcid
, WORD flags
, DISPPARAMS
*dp
,
3509 VARIANT
*retv
, jsexcept_t
*ei
, IServiceProvider
*sp
)
3515 static void RegExp_destructor(DispatchEx
*dispex
)
3517 RegExpInstance
*This
= (RegExpInstance
*)dispex
;
3520 js_DestroyRegExp(This
->jsregexp
);
3521 SysFreeString(This
->str
);
3525 static const builtin_prop_t RegExp_props
[] = {
3526 {execW
, RegExp_exec
, PROPF_METHOD
},
3527 {globalW
, RegExp_global
, 0},
3528 {hasOwnPropertyW
, RegExp_hasOwnProperty
, PROPF_METHOD
},
3529 {ignoreCaseW
, RegExp_ignoreCase
, 0},
3530 {isPrototypeOfW
, RegExp_isPrototypeOf
, PROPF_METHOD
},
3531 {lastIndexW
, RegExp_lastIndex
, 0},
3532 {multilineW
, RegExp_multiline
, 0},
3533 {propertyIsEnumerableW
, RegExp_propertyIsEnumerable
, PROPF_METHOD
},
3534 {sourceW
, RegExp_source
, 0},
3535 {toLocaleStringW
, RegExp_toLocaleString
, PROPF_METHOD
},
3536 {toStringW
, RegExp_toString
, PROPF_METHOD
}
3539 static const builtin_info_t RegExp_info
= {
3541 {NULL
, RegExp_value
, 0},
3542 sizeof(RegExp_props
)/sizeof(*RegExp_props
),
3548 static HRESULT
alloc_regexp(script_ctx_t
*ctx
, BOOL use_constr
, RegExpInstance
**ret
)
3550 RegExpInstance
*regexp
;
3553 regexp
= heap_alloc_zero(sizeof(RegExpInstance
));
3555 return E_OUTOFMEMORY
;
3558 hres
= init_dispex_from_constr(®exp
->dispex
, ctx
, &RegExp_info
, ctx
->regexp_constr
);
3560 hres
= init_dispex(®exp
->dispex
, ctx
, &RegExp_info
, NULL
);
3571 static HRESULT
create_regexp(script_ctx_t
*ctx
, const WCHAR
*exp
, int len
, DWORD flags
, DispatchEx
**ret
)
3573 RegExpInstance
*regexp
;
3576 TRACE("%s %x\n", debugstr_w(exp
), flags
);
3578 hres
= alloc_regexp(ctx
, TRUE
, ®exp
);
3583 regexp
->str
= SysAllocString(exp
);
3585 regexp
->str
= SysAllocStringLen(exp
, len
);
3587 jsdisp_release(®exp
->dispex
);
3588 return E_OUTOFMEMORY
;
3591 regexp
->jsregexp
= js_NewRegExp(ctx
, regexp
->str
, flags
, FALSE
);
3592 if(!regexp
->jsregexp
) {
3593 WARN("js_NewRegExp failed\n");
3594 jsdisp_release(®exp
->dispex
);
3598 *ret
= ®exp
->dispex
;
3602 static HRESULT
regexp_constructor(script_ctx_t
*ctx
, DISPPARAMS
*dp
, VARIANT
*retv
)
3604 const WCHAR
*opt
= emptyW
, *src
;
3614 arg
= get_arg(dp
,0);
3615 if(V_VT(arg
) == VT_DISPATCH
) {
3618 obj
= iface_to_jsdisp((IUnknown
*)V_DISPATCH(arg
));
3620 if(is_class(obj
, JSCLASS_REGEXP
)) {
3621 RegExpInstance
*regexp
= (RegExpInstance
*)obj
;
3623 hres
= create_regexp(ctx
, regexp
->str
, -1, regexp
->jsregexp
->flags
, &ret
);
3624 jsdisp_release(obj
);
3628 V_VT(retv
) = VT_DISPATCH
;
3629 V_DISPATCH(retv
) = (IDispatch
*)_IDispatchEx_(ret
);
3633 jsdisp_release(obj
);
3637 if(V_VT(arg
) != VT_BSTR
) {
3638 FIXME("vt arg0 = %d\n", V_VT(arg
));
3644 if(arg_cnt(dp
) >= 2) {
3645 arg
= get_arg(dp
,1);
3646 if(V_VT(arg
) != VT_BSTR
) {
3647 FIXME("unimplemented for vt %d\n", V_VT(arg
));
3654 hres
= create_regexp_str(ctx
, src
, -1, opt
, strlenW(opt
), &ret
);
3658 V_VT(retv
) = VT_DISPATCH
;
3659 V_DISPATCH(retv
) = (IDispatch
*)_IDispatchEx_(ret
);
3663 static HRESULT
RegExpConstr_value(DispatchEx
*dispex
, LCID lcid
, WORD flags
, DISPPARAMS
*dp
,
3664 VARIANT
*retv
, jsexcept_t
*ei
, IServiceProvider
*sp
)
3669 case DISPATCH_CONSTRUCT
:
3670 return regexp_constructor(dispex
->ctx
, dp
, retv
);
3672 FIXME("unimplemented flags: %x\n", flags
);
3679 HRESULT
create_regexp_constr(script_ctx_t
*ctx
, DispatchEx
**ret
)
3681 RegExpInstance
*regexp
;
3684 hres
= alloc_regexp(ctx
, FALSE
, ®exp
);
3688 hres
= create_builtin_function(ctx
, RegExpConstr_value
, PROPF_CONSTR
, ®exp
->dispex
, ret
);
3690 jsdisp_release(®exp
->dispex
);
3694 HRESULT
create_regexp_str(script_ctx_t
*ctx
, const WCHAR
*exp
, DWORD exp_len
, const WCHAR
*opt
,
3695 DWORD opt_len
, DispatchEx
**ret
)
3701 for (p
= opt
; p
< opt
+opt_len
; p
++) {
3704 flags
|= JSREG_GLOB
;
3707 flags
|= JSREG_FOLD
;
3710 flags
|= JSREG_MULTILINE
;
3713 flags
|= JSREG_STICKY
;
3716 WARN("wrong flag %c\n", *p
);
3722 return create_regexp(ctx
, exp
, exp_len
, flags
, ret
);