push c6bab2db4fc296bd36abf8f7d9cf443d4a73048e
[wine/hacks.git] / dlls / jscript / regexp.c
blobe79fee45aab6eee4e51877963a44a1119e87a9d9
1 /*
2 * Copyright 2008 Jacek Caban for CodeWeavers
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20 * Code in this file is based on files:
21 * js/src/jsregexp.h
22 * js/src/jsregexp.c
23 * from Mozilla project, released under LGPL 2.1 or later.
25 * The Original Code is Mozilla Communicator client code, released
26 * March 31, 1998.
28 * The Initial Developer of the Original Code is
29 * Netscape Communications Corporation.
30 * Portions created by the Initial Developer are Copyright (C) 1998
31 * the Initial Developer. All Rights Reserved.
34 #include <assert.h>
36 #include "jscript.h"
38 #include "wine/debug.h"
40 WINE_DEFAULT_DEBUG_CHANNEL(jscript);
42 #define JSREG_FOLD 0x01 /* fold uppercase to lowercase */
43 #define JSREG_GLOB 0x02 /* global exec, creates array of matches */
44 #define JSREG_MULTILINE 0x04 /* treat ^ and $ as begin and end of line */
45 #define JSREG_STICKY 0x08 /* only match starting at lastIndex */
47 typedef BYTE JSPackedBool;
48 typedef BYTE jsbytecode;
51 * This struct holds a bitmap representation of a class from a regexp.
52 * There's a list of these referenced by the classList field in the JSRegExp
53 * struct below. The initial state has startIndex set to the offset in the
54 * original regexp source of the beginning of the class contents. The first
55 * use of the class converts the source representation into a bitmap.
58 typedef struct RECharSet {
59 JSPackedBool converted;
60 JSPackedBool sense;
61 WORD length;
62 union {
63 BYTE *bits;
64 struct {
65 size_t startIndex;
66 size_t length;
67 } src;
68 } u;
69 } RECharSet;
71 typedef struct {
72 WORD flags; /* flags, see jsapi.h's JSREG_* defines */
73 size_t parenCount; /* number of parenthesized submatches */
74 size_t classCount; /* count [...] bitmaps */
75 RECharSet *classList; /* list of [...] bitmaps */
76 BSTR source; /* locked source string, sans // */
77 jsbytecode program[1]; /* regular expression bytecode */
78 } JSRegExp;
80 typedef struct {
81 DispatchEx dispex;
83 JSRegExp *jsregexp;
84 BSTR str;
85 } RegExpInstance;
87 static const WCHAR sourceW[] = {'s','o','u','r','c','e',0};
88 static const WCHAR globalW[] = {'g','l','o','b','a','l',0};
89 static const WCHAR ignoreCaseW[] = {'i','g','n','o','r','e','C','a','s','e',0};
90 static const WCHAR multilineW[] = {'m','u','l','t','i','l','i','n','e',0};
91 static const WCHAR lastIndexW[] = {'l','a','s','t','I','n','d','e','x',0};
92 static const WCHAR toStringW[] = {'t','o','S','t','r','i','n','g',0};
93 static const WCHAR toLocaleStringW[] = {'t','o','L','o','c','a','l','e','S','t','r','i','n','g',0};
94 static const WCHAR hasOwnPropertyW[] = {'h','a','s','O','w','n','P','r','o','p','e','r','t','y',0};
95 static const WCHAR propertyIsEnumerableW[] =
96 {'p','r','o','p','e','r','t','y','I','s','E','n','u','m','e','r','a','b','l','e',0};
97 static const WCHAR isPrototypeOfW[] = {'i','s','P','r','o','t','o','t','y','p','e','O','f',0};
98 static const WCHAR execW[] = {'e','x','e','c',0};
100 static const WCHAR emptyW[] = {0};
102 /* FIXME: Better error handling */
103 #define ReportRegExpError(a,b,c)
104 #define ReportRegExpErrorHelper(a,b,c,d)
105 #define JS_ReportErrorNumber(a,b,c,d)
106 #define JS_ReportErrorFlagsAndNumber(a,b,c,d,e,f)
107 #define js_ReportOutOfScriptQuota(a)
108 #define JS_ReportOutOfMemory(a)
109 #define JS_COUNT_OPERATION(a,b)
111 #define JSMSG_MIN_TOO_BIG 47
112 #define JSMSG_MAX_TOO_BIG 48
113 #define JSMSG_OUT_OF_ORDER 49
114 #define JSMSG_OUT_OF_MEMORY 137
116 #define LINE_SEPARATOR 0x2028
117 #define PARA_SEPARATOR 0x2029
119 #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
120 ((c >= 'a') && (c <= 'z')) )
121 #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
122 (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
124 #define JS_ISWORD(c) ((c) < 128 && (isalnum(c) || (c) == '_'))
126 #define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9)
127 #define JS7_UNDEC(c) ((c) - '0')
129 typedef enum REOp {
130 REOP_EMPTY,
131 REOP_BOL,
132 REOP_EOL,
133 REOP_WBDRY,
134 REOP_WNONBDRY,
135 REOP_DOT,
136 REOP_DIGIT,
137 REOP_NONDIGIT,
138 REOP_ALNUM,
139 REOP_NONALNUM,
140 REOP_SPACE,
141 REOP_NONSPACE,
142 REOP_BACKREF,
143 REOP_FLAT,
144 REOP_FLAT1,
145 REOP_FLATi,
146 REOP_FLAT1i,
147 REOP_UCFLAT1,
148 REOP_UCFLAT1i,
149 REOP_UCFLAT,
150 REOP_UCFLATi,
151 REOP_CLASS,
152 REOP_NCLASS,
153 REOP_ALT,
154 REOP_QUANT,
155 REOP_STAR,
156 REOP_PLUS,
157 REOP_OPT,
158 REOP_LPAREN,
159 REOP_RPAREN,
160 REOP_JUMP,
161 REOP_DOTSTAR,
162 REOP_LPARENNON,
163 REOP_ASSERT,
164 REOP_ASSERT_NOT,
165 REOP_ASSERTTEST,
166 REOP_ASSERTNOTTEST,
167 REOP_MINIMALSTAR,
168 REOP_MINIMALPLUS,
169 REOP_MINIMALOPT,
170 REOP_MINIMALQUANT,
171 REOP_ENDCHILD,
172 REOP_REPEAT,
173 REOP_MINIMALREPEAT,
174 REOP_ALTPREREQ,
175 REOP_ALTPREREQ2,
176 REOP_ENDALT,
177 REOP_CONCAT,
178 REOP_END,
179 REOP_LIMIT /* META: no operator >= to this */
180 } REOp;
182 #define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
184 static const char *reop_names[] = {
185 "empty",
186 "bol",
187 "eol",
188 "wbdry",
189 "wnonbdry",
190 "dot",
191 "digit",
192 "nondigit",
193 "alnum",
194 "nonalnum",
195 "space",
196 "nonspace",
197 "backref",
198 "flat",
199 "flat1",
200 "flati",
201 "flat1i",
202 "ucflat1",
203 "ucflat1i",
204 "ucflat",
205 "ucflati",
206 "class",
207 "nclass",
208 "alt",
209 "quant",
210 "star",
211 "plus",
212 "opt",
213 "lparen",
214 "rparen",
215 "jump",
216 "dotstar",
217 "lparennon",
218 "assert",
219 "assert_not",
220 "asserttest",
221 "assertnottest",
222 "minimalstar",
223 "minimalplus",
224 "minimalopt",
225 "minimalquant",
226 "endchild",
227 "repeat",
228 "minimalrepeat",
229 "altprereq",
230 "alrprereq2",
231 "endalt",
232 "concat",
233 "end",
234 NULL
237 typedef struct RECapture {
238 ptrdiff_t index; /* start of contents, -1 for empty */
239 size_t length; /* length of capture */
240 } RECapture;
242 typedef struct REMatchState {
243 const WCHAR *cp;
244 RECapture parens[1]; /* first of 're->parenCount' captures,
245 allocated at end of this struct */
246 } REMatchState;
248 typedef struct REProgState {
249 jsbytecode *continue_pc; /* current continuation data */
250 jsbytecode continue_op;
251 ptrdiff_t index; /* progress in text */
252 size_t parenSoFar; /* highest indexed paren started */
253 union {
254 struct {
255 UINT min; /* current quantifier limits */
256 UINT max;
257 } quantifier;
258 struct {
259 size_t top; /* backtrack stack state */
260 size_t sz;
261 } assertion;
262 } u;
263 } REProgState;
265 typedef struct REBackTrackData {
266 size_t sz; /* size of previous stack entry */
267 jsbytecode *backtrack_pc; /* where to backtrack to */
268 jsbytecode backtrack_op;
269 const WCHAR *cp; /* index in text of match at backtrack */
270 size_t parenIndex; /* start index of saved paren contents */
271 size_t parenCount; /* # of saved paren contents */
272 size_t saveStateStackTop; /* number of parent states */
273 /* saved parent states follow */
274 /* saved paren contents follow */
275 } REBackTrackData;
277 #define INITIAL_STATESTACK 100
278 #define INITIAL_BACKTRACK 8000
280 typedef struct REGlobalData {
281 script_ctx_t *cx;
282 JSRegExp *regexp; /* the RE in execution */
283 BOOL ok; /* runtime error (out_of_memory only?) */
284 size_t start; /* offset to start at */
285 ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
286 const WCHAR *cpbegin; /* text base address */
287 const WCHAR *cpend; /* text limit address */
289 REProgState *stateStack; /* stack of state of current parents */
290 size_t stateStackTop;
291 size_t stateStackLimit;
293 REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
294 REBackTrackData *backTrackSP;
295 size_t backTrackStackSize;
296 size_t cursz; /* size of current stack entry */
297 size_t backTrackCount; /* how many times we've backtracked */
298 size_t backTrackLimit; /* upper limit on backtrack states */
300 jsheap_t *pool; /* It's faster to use one malloc'd pool
301 than to malloc/free the three items
302 that are allocated from this pool */
303 } REGlobalData;
305 typedef struct RENode RENode;
306 struct RENode {
307 REOp op; /* r.e. op bytecode */
308 RENode *next; /* next in concatenation order */
309 void *kid; /* first operand */
310 union {
311 void *kid2; /* second operand */
312 INT num; /* could be a number */
313 size_t parenIndex; /* or a parenthesis index */
314 struct { /* or a quantifier range */
315 UINT min;
316 UINT max;
317 JSPackedBool greedy;
318 } range;
319 struct { /* or a character class */
320 size_t startIndex;
321 size_t kidlen; /* length of string at kid, in jschars */
322 size_t index; /* index into class list */
323 WORD bmsize; /* bitmap size, based on max char code */
324 JSPackedBool sense;
325 } ucclass;
326 struct { /* or a literal sequence */
327 WCHAR chr; /* of one character */
328 size_t length; /* or many (via the kid) */
329 } flat;
330 struct {
331 RENode *kid2; /* second operand from ALT */
332 WCHAR ch1; /* match char for ALTPREREQ */
333 WCHAR ch2; /* ditto, or class index for ALTPREREQ2 */
334 } altprereq;
335 } u;
338 #define CLASS_CACHE_SIZE 4
340 typedef struct CompilerState {
341 script_ctx_t *context;
342 const WCHAR *cpbegin;
343 const WCHAR *cpend;
344 const WCHAR *cp;
345 size_t parenCount;
346 size_t classCount; /* number of [] encountered */
347 size_t treeDepth; /* maximum depth of parse tree */
348 size_t progLength; /* estimated bytecode length */
349 RENode *result;
350 size_t classBitmapsMem; /* memory to hold all class bitmaps */
351 struct {
352 const WCHAR *start; /* small cache of class strings */
353 size_t length; /* since they're often the same */
354 size_t index;
355 } classCache[CLASS_CACHE_SIZE];
356 WORD flags;
357 } CompilerState;
359 typedef struct EmitStateStackEntry {
360 jsbytecode *altHead; /* start of REOP_ALT* opcode */
361 jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */
362 jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
363 jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */
364 RENode *continueNode; /* original REOP_ALT* node being stacked */
365 jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */
366 JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to
367 avoid 16-bit unsigned offset overflow */
368 } EmitStateStackEntry;
371 * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
372 * the getters and setters take the pc of the offset, not of the opcode before
373 * the offset.
375 #define ARG_LEN 2
376 #define GET_ARG(pc) ((WORD)(((pc)[0] << 8) | (pc)[1]))
377 #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
378 (pc)[1] = (jsbytecode) (arg))
380 #define OFFSET_LEN ARG_LEN
381 #define OFFSET_MAX ((1 << (ARG_LEN * 8)) - 1)
382 #define GET_OFFSET(pc) GET_ARG(pc)
384 static BOOL ParseRegExp(CompilerState*);
387 * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
388 * For sanity, we limit it to 2^24 bytes.
390 #define TREE_DEPTH_MAX ((1 << 24) / sizeof(EmitStateStackEntry))
393 * The maximum memory that can be allocated for class bitmaps.
394 * For sanity, we limit it to 2^24 bytes.
396 #define CLASS_BITMAPS_MEM_LIMIT (1 << 24)
399 * Functions to get size and write/read bytecode that represent small indexes
400 * compactly.
401 * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
402 * indicates that the following byte brings more bits to the index. Otherwise
403 * this is the last byte in the index bytecode representing highest index bits.
405 static size_t
406 GetCompactIndexWidth(size_t index)
408 size_t width;
410 for (width = 1; (index >>= 7) != 0; ++width) { }
411 return width;
414 static inline jsbytecode *
415 WriteCompactIndex(jsbytecode *pc, size_t index)
417 size_t next;
419 while ((next = index >> 7) != 0) {
420 *pc++ = (jsbytecode)(index | 0x80);
421 index = next;
423 *pc++ = (jsbytecode)index;
424 return pc;
427 static inline jsbytecode *
428 ReadCompactIndex(jsbytecode *pc, size_t *result)
430 size_t nextByte;
432 nextByte = *pc++;
433 if ((nextByte & 0x80) == 0) {
435 * Short-circuit the most common case when compact index <= 127.
437 *result = nextByte;
438 } else {
439 size_t shift = 7;
440 *result = 0x7F & nextByte;
441 do {
442 nextByte = *pc++;
443 *result |= (nextByte & 0x7F) << shift;
444 shift += 7;
445 } while ((nextByte & 0x80) != 0);
447 return pc;
450 /* Construct and initialize an RENode, returning NULL for out-of-memory */
451 static RENode *
452 NewRENode(CompilerState *state, REOp op)
454 RENode *ren;
456 ren = jsheap_alloc(&state->context->tmp_heap, sizeof(*ren));
457 if (!ren) {
458 /* js_ReportOutOfScriptQuota(cx); */
459 return NULL;
461 ren->op = op;
462 ren->next = NULL;
463 ren->kid = NULL;
464 return ren;
468 * Validates and converts hex ascii value.
470 static BOOL
471 isASCIIHexDigit(WCHAR c, UINT *digit)
473 UINT cv = c;
475 if (cv < '0')
476 return FALSE;
477 if (cv <= '9') {
478 *digit = cv - '0';
479 return TRUE;
481 cv |= 0x20;
482 if (cv >= 'a' && cv <= 'f') {
483 *digit = cv - 'a' + 10;
484 return TRUE;
486 return FALSE;
489 typedef struct {
490 REOp op;
491 const WCHAR *errPos;
492 size_t parenIndex;
493 } REOpData;
495 #define JUMP_OFFSET_HI(off) ((jsbytecode)((off) >> 8))
496 #define JUMP_OFFSET_LO(off) ((jsbytecode)(off))
498 static BOOL
499 SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
501 ptrdiff_t offset = target - jump;
503 /* Check that target really points forward. */
504 assert(offset >= 2);
505 if ((size_t)offset > OFFSET_MAX)
506 return FALSE;
508 jump[0] = JUMP_OFFSET_HI(offset);
509 jump[1] = JUMP_OFFSET_LO(offset);
510 return TRUE;
514 * Generate bytecode for the tree rooted at t using an explicit stack instead
515 * of recursion.
517 static jsbytecode *
518 EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
519 jsbytecode *pc, RENode *t)
521 EmitStateStackEntry *emitStateSP, *emitStateStack;
522 RECharSet *charSet;
523 REOp op;
525 if (treeDepth == 0) {
526 emitStateStack = NULL;
527 } else {
528 emitStateStack = heap_alloc(sizeof(EmitStateStackEntry) * treeDepth);
529 if (!emitStateStack)
530 return NULL;
532 emitStateSP = emitStateStack;
533 op = t->op;
534 assert(op < REOP_LIMIT);
536 for (;;) {
537 *pc++ = op;
538 switch (op) {
539 case REOP_EMPTY:
540 --pc;
541 break;
543 case REOP_ALTPREREQ2:
544 case REOP_ALTPREREQ:
545 assert(emitStateSP);
546 emitStateSP->altHead = pc - 1;
547 emitStateSP->endTermFixup = pc;
548 pc += OFFSET_LEN;
549 SET_ARG(pc, t->u.altprereq.ch1);
550 pc += ARG_LEN;
551 SET_ARG(pc, t->u.altprereq.ch2);
552 pc += ARG_LEN;
554 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
555 pc += OFFSET_LEN;
557 emitStateSP->continueNode = t;
558 emitStateSP->continueOp = REOP_JUMP;
559 emitStateSP->jumpToJumpFlag = FALSE;
560 ++emitStateSP;
561 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
562 t = t->kid;
563 op = t->op;
564 assert(op < REOP_LIMIT);
565 continue;
567 case REOP_JUMP:
568 emitStateSP->nextTermFixup = pc; /* offset to following term */
569 pc += OFFSET_LEN;
570 if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
571 goto jump_too_big;
572 emitStateSP->continueOp = REOP_ENDALT;
573 ++emitStateSP;
574 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
575 t = t->u.kid2;
576 op = t->op;
577 assert(op < REOP_LIMIT);
578 continue;
580 case REOP_ENDALT:
582 * If we already patched emitStateSP->nextTermFixup to jump to
583 * a nearer jump, to avoid 16-bit immediate offset overflow, we
584 * are done here.
586 if (emitStateSP->jumpToJumpFlag)
587 break;
590 * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
591 * REOP_ENDALT is executed only on successful match of the last
592 * alternate in a group.
594 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
595 goto jump_too_big;
596 if (t->op != REOP_ALT) {
597 if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
598 goto jump_too_big;
602 * If the program is bigger than the REOP_JUMP offset range, then
603 * we must check for alternates before this one that are part of
604 * the same group, and fix up their jump offsets to target jumps
605 * close enough to fit in a 16-bit unsigned offset immediate.
607 if ((size_t)(pc - re->program) > OFFSET_MAX &&
608 emitStateSP > emitStateStack) {
609 EmitStateStackEntry *esp, *esp2;
610 jsbytecode *alt, *jump;
611 ptrdiff_t span, header;
613 esp2 = emitStateSP;
614 alt = esp2->altHead;
615 for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
616 if (esp->continueOp == REOP_ENDALT &&
617 !esp->jumpToJumpFlag &&
618 esp->nextTermFixup + OFFSET_LEN == alt &&
619 (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
620 ? esp->endTermFixup
621 : esp->nextTermFixup)) > OFFSET_MAX) {
622 alt = esp->altHead;
623 jump = esp->nextTermFixup;
626 * The span must be 1 less than the distance from
627 * jump offset to jump offset, so we actually jump
628 * to a REOP_JUMP bytecode, not to its offset!
630 for (;;) {
631 assert(jump < esp2->nextTermFixup);
632 span = esp2->nextTermFixup - jump - 1;
633 if ((size_t)span <= OFFSET_MAX)
634 break;
635 do {
636 if (--esp2 == esp)
637 goto jump_too_big;
638 } while (esp2->continueOp != REOP_ENDALT);
641 jump[0] = JUMP_OFFSET_HI(span);
642 jump[1] = JUMP_OFFSET_LO(span);
644 if (esp->continueNode->op != REOP_ALT) {
646 * We must patch the offset at esp->endTermFixup
647 * as well, for the REOP_ALTPREREQ{,2} opcodes.
648 * If we're unlucky and endTermFixup is more than
649 * OFFSET_MAX bytes from its target, we cheat by
650 * jumping 6 bytes to the jump whose offset is at
651 * esp->nextTermFixup, which has the same target.
653 jump = esp->endTermFixup;
654 header = esp->nextTermFixup - jump;
655 span += header;
656 if ((size_t)span > OFFSET_MAX)
657 span = header;
659 jump[0] = JUMP_OFFSET_HI(span);
660 jump[1] = JUMP_OFFSET_LO(span);
663 esp->jumpToJumpFlag = TRUE;
667 break;
669 case REOP_ALT:
670 assert(emitStateSP);
671 emitStateSP->altHead = pc - 1;
672 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
673 pc += OFFSET_LEN;
674 emitStateSP->continueNode = t;
675 emitStateSP->continueOp = REOP_JUMP;
676 emitStateSP->jumpToJumpFlag = FALSE;
677 ++emitStateSP;
678 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
679 t = t->kid;
680 op = t->op;
681 assert(op < REOP_LIMIT);
682 continue;
684 case REOP_FLAT:
686 * Coalesce FLATs if possible and if it would not increase bytecode
687 * beyond preallocated limit. The latter happens only when bytecode
688 * size for coalesced string with offset p and length 2 exceeds 6
689 * bytes preallocated for 2 single char nodes, i.e. when
690 * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
691 * GetCompactIndexWidth(p) > 4.
692 * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
693 * nodes strictly decreases bytecode size, the check has to be
694 * done only for the first coalescing.
696 if (t->kid &&
697 GetCompactIndexWidth((WCHAR*)t->kid - state->cpbegin) <= 4)
699 while (t->next &&
700 t->next->op == REOP_FLAT &&
701 (WCHAR*)t->kid + t->u.flat.length ==
702 t->next->kid) {
703 t->u.flat.length += t->next->u.flat.length;
704 t->next = t->next->next;
707 if (t->kid && t->u.flat.length > 1) {
708 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT;
709 pc = WriteCompactIndex(pc, (WCHAR*)t->kid - state->cpbegin);
710 pc = WriteCompactIndex(pc, t->u.flat.length);
711 } else if (t->u.flat.chr < 256) {
712 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
713 *pc++ = (jsbytecode) t->u.flat.chr;
714 } else {
715 pc[-1] = (state->flags & JSREG_FOLD)
716 ? REOP_UCFLAT1i
717 : REOP_UCFLAT1;
718 SET_ARG(pc, t->u.flat.chr);
719 pc += ARG_LEN;
721 break;
723 case REOP_LPAREN:
724 assert(emitStateSP);
725 pc = WriteCompactIndex(pc, t->u.parenIndex);
726 emitStateSP->continueNode = t;
727 emitStateSP->continueOp = REOP_RPAREN;
728 ++emitStateSP;
729 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
730 t = t->kid;
731 op = t->op;
732 continue;
734 case REOP_RPAREN:
735 pc = WriteCompactIndex(pc, t->u.parenIndex);
736 break;
738 case REOP_BACKREF:
739 pc = WriteCompactIndex(pc, t->u.parenIndex);
740 break;
742 case REOP_ASSERT:
743 assert(emitStateSP);
744 emitStateSP->nextTermFixup = pc;
745 pc += OFFSET_LEN;
746 emitStateSP->continueNode = t;
747 emitStateSP->continueOp = REOP_ASSERTTEST;
748 ++emitStateSP;
749 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
750 t = t->kid;
751 op = t->op;
752 continue;
754 case REOP_ASSERTTEST:
755 case REOP_ASSERTNOTTEST:
756 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
757 goto jump_too_big;
758 break;
760 case REOP_ASSERT_NOT:
761 assert(emitStateSP);
762 emitStateSP->nextTermFixup = pc;
763 pc += OFFSET_LEN;
764 emitStateSP->continueNode = t;
765 emitStateSP->continueOp = REOP_ASSERTNOTTEST;
766 ++emitStateSP;
767 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
768 t = t->kid;
769 op = t->op;
770 continue;
772 case REOP_QUANT:
773 assert(emitStateSP);
774 if (t->u.range.min == 0 && t->u.range.max == (UINT)-1) {
775 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
776 } else if (t->u.range.min == 0 && t->u.range.max == 1) {
777 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
778 } else if (t->u.range.min == 1 && t->u.range.max == (UINT) -1) {
779 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
780 } else {
781 if (!t->u.range.greedy)
782 pc[-1] = REOP_MINIMALQUANT;
783 pc = WriteCompactIndex(pc, t->u.range.min);
785 * Write max + 1 to avoid using size_t(max) + 1 bytes
786 * for (UINT)-1 sentinel.
788 pc = WriteCompactIndex(pc, t->u.range.max + 1);
790 emitStateSP->nextTermFixup = pc;
791 pc += OFFSET_LEN;
792 emitStateSP->continueNode = t;
793 emitStateSP->continueOp = REOP_ENDCHILD;
794 ++emitStateSP;
795 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
796 t = t->kid;
797 op = t->op;
798 continue;
800 case REOP_ENDCHILD:
801 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
802 goto jump_too_big;
803 break;
805 case REOP_CLASS:
806 if (!t->u.ucclass.sense)
807 pc[-1] = REOP_NCLASS;
808 pc = WriteCompactIndex(pc, t->u.ucclass.index);
809 charSet = &re->classList[t->u.ucclass.index];
810 charSet->converted = FALSE;
811 charSet->length = t->u.ucclass.bmsize;
812 charSet->u.src.startIndex = t->u.ucclass.startIndex;
813 charSet->u.src.length = t->u.ucclass.kidlen;
814 charSet->sense = t->u.ucclass.sense;
815 break;
817 default:
818 break;
821 t = t->next;
822 if (t) {
823 op = t->op;
824 } else {
825 if (emitStateSP == emitStateStack)
826 break;
827 --emitStateSP;
828 t = emitStateSP->continueNode;
829 op = (REOp) emitStateSP->continueOp;
833 cleanup:
834 heap_free(emitStateStack);
835 return pc;
837 jump_too_big:
838 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
839 pc = NULL;
840 goto cleanup;
844 * Process the op against the two top operands, reducing them to a single
845 * operand in the penultimate slot. Update progLength and treeDepth.
847 static BOOL
848 ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
849 INT operandSP)
851 RENode *result;
853 switch (opData->op) {
854 case REOP_ALT:
855 result = NewRENode(state, REOP_ALT);
856 if (!result)
857 return FALSE;
858 result->kid = operandStack[operandSP - 2];
859 result->u.kid2 = operandStack[operandSP - 1];
860 operandStack[operandSP - 2] = result;
862 if (state->treeDepth == TREE_DEPTH_MAX) {
863 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
864 return FALSE;
866 ++state->treeDepth;
869 * Look at both alternates to see if there's a FLAT or a CLASS at
870 * the start of each. If so, use a prerequisite match.
872 if (((RENode *) result->kid)->op == REOP_FLAT &&
873 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
874 (state->flags & JSREG_FOLD) == 0) {
875 result->op = REOP_ALTPREREQ;
876 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
877 result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
878 /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
879 JUMP, <end> ... ENDALT */
880 state->progLength += 13;
882 else
883 if (((RENode *) result->kid)->op == REOP_CLASS &&
884 ((RENode *) result->kid)->u.ucclass.index < 256 &&
885 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
886 (state->flags & JSREG_FOLD) == 0) {
887 result->op = REOP_ALTPREREQ2;
888 result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
889 result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
890 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
891 JUMP, <end> ... ENDALT */
892 state->progLength += 13;
894 else
895 if (((RENode *) result->kid)->op == REOP_FLAT &&
896 ((RENode *) result->u.kid2)->op == REOP_CLASS &&
897 ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
898 (state->flags & JSREG_FOLD) == 0) {
899 result->op = REOP_ALTPREREQ2;
900 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
901 result->u.altprereq.ch2 =
902 ((RENode *) result->u.kid2)->u.ucclass.index;
903 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
904 JUMP, <end> ... ENDALT */
905 state->progLength += 13;
907 else {
908 /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
909 state->progLength += 7;
911 break;
913 case REOP_CONCAT:
914 result = operandStack[operandSP - 2];
915 while (result->next)
916 result = result->next;
917 result->next = operandStack[operandSP - 1];
918 break;
920 case REOP_ASSERT:
921 case REOP_ASSERT_NOT:
922 case REOP_LPARENNON:
923 case REOP_LPAREN:
924 /* These should have been processed by a close paren. */
925 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
926 opData->errPos);
927 return FALSE;
929 default:;
931 return TRUE;
935 * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
936 * its being on the stack, and to propagate errors to its callers.
938 #define JSREG_FIND_PAREN_COUNT 0x8000
939 #define JSREG_FIND_PAREN_ERROR 0x4000
942 * Magic return value from FindParenCount and GetDecimalValue, to indicate
943 * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
944 * its findMax parameter is non-null.
946 #define OVERFLOW_VALUE ((UINT)-1)
948 static UINT
949 FindParenCount(CompilerState *state)
951 CompilerState temp;
952 int i;
954 if (state->flags & JSREG_FIND_PAREN_COUNT)
955 return OVERFLOW_VALUE;
958 * Copy state into temp, flag it so we never report an invalid backref,
959 * and reset its members to parse the entire regexp. This is obviously
960 * suboptimal, but GetDecimalValue calls us only if a backref appears to
961 * refer to a forward parenthetical, which is rare.
963 temp = *state;
964 temp.flags |= JSREG_FIND_PAREN_COUNT;
965 temp.cp = temp.cpbegin;
966 temp.parenCount = 0;
967 temp.classCount = 0;
968 temp.progLength = 0;
969 temp.treeDepth = 0;
970 temp.classBitmapsMem = 0;
971 for (i = 0; i < CLASS_CACHE_SIZE; i++)
972 temp.classCache[i].start = NULL;
974 if (!ParseRegExp(&temp)) {
975 state->flags |= JSREG_FIND_PAREN_ERROR;
976 return OVERFLOW_VALUE;
978 return temp.parenCount;
982 * Extract and return a decimal value at state->cp. The initial character c
983 * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
984 * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
985 * state->flags to discover whether an error occurred under findMax.
987 static UINT
988 GetDecimalValue(WCHAR c, UINT max, UINT (*findMax)(CompilerState *state),
989 CompilerState *state)
991 UINT value = JS7_UNDEC(c);
992 BOOL overflow = (value > max && (!findMax || value > findMax(state)));
994 /* The following restriction allows simpler overflow checks. */
995 assert(max <= ((UINT)-1 - 9) / 10);
996 while (state->cp < state->cpend) {
997 c = *state->cp;
998 if (!JS7_ISDEC(c))
999 break;
1000 value = 10 * value + JS7_UNDEC(c);
1001 if (!overflow && value > max && (!findMax || value > findMax(state)))
1002 overflow = TRUE;
1003 ++state->cp;
1005 return overflow ? OVERFLOW_VALUE : value;
1009 * Calculate the total size of the bitmap required for a class expression.
1011 static BOOL
1012 CalculateBitmapSize(CompilerState *state, RENode *target, const WCHAR *src,
1013 const WCHAR *end)
1015 UINT max = 0;
1016 BOOL inRange = FALSE;
1017 WCHAR c, rangeStart = 0;
1018 UINT n, digit, nDigits, i;
1020 target->u.ucclass.bmsize = 0;
1021 target->u.ucclass.sense = TRUE;
1023 if (src == end)
1024 return TRUE;
1026 if (*src == '^') {
1027 ++src;
1028 target->u.ucclass.sense = FALSE;
1031 while (src != end) {
1032 BOOL canStartRange = TRUE;
1033 UINT localMax = 0;
1035 switch (*src) {
1036 case '\\':
1037 ++src;
1038 c = *src++;
1039 switch (c) {
1040 case 'b':
1041 localMax = 0x8;
1042 break;
1043 case 'f':
1044 localMax = 0xC;
1045 break;
1046 case 'n':
1047 localMax = 0xA;
1048 break;
1049 case 'r':
1050 localMax = 0xD;
1051 break;
1052 case 't':
1053 localMax = 0x9;
1054 break;
1055 case 'v':
1056 localMax = 0xB;
1057 break;
1058 case 'c':
1059 if (src < end && RE_IS_LETTER(*src)) {
1060 localMax = (UINT) (*src++) & 0x1F;
1061 } else {
1062 --src;
1063 localMax = '\\';
1065 break;
1066 case 'x':
1067 nDigits = 2;
1068 goto lexHex;
1069 case 'u':
1070 nDigits = 4;
1071 lexHex:
1072 n = 0;
1073 for (i = 0; (i < nDigits) && (src < end); i++) {
1074 c = *src++;
1075 if (!isASCIIHexDigit(c, &digit)) {
1077 * Back off to accepting the original
1078 *'\' as a literal.
1080 src -= i + 1;
1081 n = '\\';
1082 break;
1084 n = (n << 4) | digit;
1086 localMax = n;
1087 break;
1088 case 'd':
1089 canStartRange = FALSE;
1090 if (inRange) {
1091 JS_ReportErrorNumber(state->context,
1092 js_GetErrorMessage, NULL,
1093 JSMSG_BAD_CLASS_RANGE);
1094 return FALSE;
1096 localMax = '9';
1097 break;
1098 case 'D':
1099 case 's':
1100 case 'S':
1101 case 'w':
1102 case 'W':
1103 canStartRange = FALSE;
1104 if (inRange) {
1105 JS_ReportErrorNumber(state->context,
1106 js_GetErrorMessage, NULL,
1107 JSMSG_BAD_CLASS_RANGE);
1108 return FALSE;
1110 max = 65535;
1113 * If this is the start of a range, ensure that it's less than
1114 * the end.
1116 localMax = 0;
1117 break;
1118 case '0':
1119 case '1':
1120 case '2':
1121 case '3':
1122 case '4':
1123 case '5':
1124 case '6':
1125 case '7':
1127 * This is a non-ECMA extension - decimal escapes (in this
1128 * case, octal!) are supposed to be an error inside class
1129 * ranges, but supported here for backwards compatibility.
1132 n = JS7_UNDEC(c);
1133 c = *src;
1134 if ('0' <= c && c <= '7') {
1135 src++;
1136 n = 8 * n + JS7_UNDEC(c);
1137 c = *src;
1138 if ('0' <= c && c <= '7') {
1139 src++;
1140 i = 8 * n + JS7_UNDEC(c);
1141 if (i <= 0377)
1142 n = i;
1143 else
1144 src--;
1147 localMax = n;
1148 break;
1150 default:
1151 localMax = c;
1152 break;
1154 break;
1155 default:
1156 localMax = *src++;
1157 break;
1160 if (inRange) {
1161 /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1162 if (rangeStart > localMax) {
1163 JS_ReportErrorNumber(state->context,
1164 js_GetErrorMessage, NULL,
1165 JSMSG_BAD_CLASS_RANGE);
1166 return FALSE;
1168 inRange = FALSE;
1169 } else {
1170 if (canStartRange && src < end - 1) {
1171 if (*src == '-') {
1172 ++src;
1173 inRange = TRUE;
1174 rangeStart = (WCHAR)localMax;
1175 continue;
1178 if (state->flags & JSREG_FOLD)
1179 rangeStart = localMax; /* one run of the uc/dc loop below */
1182 if (state->flags & JSREG_FOLD) {
1183 WCHAR maxch = localMax;
1185 for (i = rangeStart; i <= localMax; i++) {
1186 WCHAR uch, dch;
1188 uch = toupperW(i);
1189 dch = tolowerW(i);
1190 if(maxch < uch)
1191 maxch = uch;
1192 if(maxch < dch)
1193 maxch = dch;
1195 localMax = maxch;
1198 if (localMax > max)
1199 max = localMax;
1201 target->u.ucclass.bmsize = max;
1202 return TRUE;
1205 static INT
1206 ParseMinMaxQuantifier(CompilerState *state, BOOL ignoreValues)
1208 UINT min, max;
1209 WCHAR c;
1210 const WCHAR *errp = state->cp++;
1212 c = *state->cp;
1213 if (JS7_ISDEC(c)) {
1214 ++state->cp;
1215 min = GetDecimalValue(c, 0xFFFF, NULL, state);
1216 c = *state->cp;
1218 if (!ignoreValues && min == OVERFLOW_VALUE)
1219 return JSMSG_MIN_TOO_BIG;
1221 if (c == ',') {
1222 c = *++state->cp;
1223 if (JS7_ISDEC(c)) {
1224 ++state->cp;
1225 max = GetDecimalValue(c, 0xFFFF, NULL, state);
1226 c = *state->cp;
1227 if (!ignoreValues && max == OVERFLOW_VALUE)
1228 return JSMSG_MAX_TOO_BIG;
1229 if (!ignoreValues && min > max)
1230 return JSMSG_OUT_OF_ORDER;
1231 } else {
1232 max = (UINT)-1;
1234 } else {
1235 max = min;
1237 if (c == '}') {
1238 state->result = NewRENode(state, REOP_QUANT);
1239 if (!state->result)
1240 return JSMSG_OUT_OF_MEMORY;
1241 state->result->u.range.min = min;
1242 state->result->u.range.max = max;
1244 * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1245 * where <max> is written as compact(max+1) to make
1246 * (UINT)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1248 state->progLength += (1 + GetCompactIndexWidth(min)
1249 + GetCompactIndexWidth(max + 1)
1250 +3);
1251 return 0;
1255 state->cp = errp;
1256 return -1;
1259 static BOOL
1260 ParseQuantifier(CompilerState *state)
1262 RENode *term;
1263 term = state->result;
1264 if (state->cp < state->cpend) {
1265 switch (*state->cp) {
1266 case '+':
1267 state->result = NewRENode(state, REOP_QUANT);
1268 if (!state->result)
1269 return FALSE;
1270 state->result->u.range.min = 1;
1271 state->result->u.range.max = (UINT)-1;
1272 /* <PLUS>, <next> ... <ENDCHILD> */
1273 state->progLength += 4;
1274 goto quantifier;
1275 case '*':
1276 state->result = NewRENode(state, REOP_QUANT);
1277 if (!state->result)
1278 return FALSE;
1279 state->result->u.range.min = 0;
1280 state->result->u.range.max = (UINT)-1;
1281 /* <STAR>, <next> ... <ENDCHILD> */
1282 state->progLength += 4;
1283 goto quantifier;
1284 case '?':
1285 state->result = NewRENode(state, REOP_QUANT);
1286 if (!state->result)
1287 return FALSE;
1288 state->result->u.range.min = 0;
1289 state->result->u.range.max = 1;
1290 /* <OPT>, <next> ... <ENDCHILD> */
1291 state->progLength += 4;
1292 goto quantifier;
1293 case '{': /* balance '}' */
1295 INT err;
1297 err = ParseMinMaxQuantifier(state, FALSE);
1298 if (err == 0)
1299 goto quantifier;
1300 if (err == -1)
1301 return TRUE;
1303 ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
1304 return FALSE;
1306 default:;
1309 return TRUE;
1311 quantifier:
1312 if (state->treeDepth == TREE_DEPTH_MAX) {
1313 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1314 return FALSE;
1317 ++state->treeDepth;
1318 ++state->cp;
1319 state->result->kid = term;
1320 if (state->cp < state->cpend && *state->cp == '?') {
1321 ++state->cp;
1322 state->result->u.range.greedy = FALSE;
1323 } else {
1324 state->result->u.range.greedy = TRUE;
1326 return TRUE;
1330 * item: assertion An item is either an assertion or
1331 * quantatom a quantified atom.
1333 * assertion: '^' Assertions match beginning of string
1334 * (or line if the class static property
1335 * RegExp.multiline is true).
1336 * '$' End of string (or line if the class
1337 * static property RegExp.multiline is
1338 * true).
1339 * '\b' Word boundary (between \w and \W).
1340 * '\B' Word non-boundary.
1342 * quantatom: atom An unquantified atom.
1343 * quantatom '{' n ',' m '}'
1344 * Atom must occur between n and m times.
1345 * quantatom '{' n ',' '}' Atom must occur at least n times.
1346 * quantatom '{' n '}' Atom must occur exactly n times.
1347 * quantatom '*' Zero or more times (same as {0,}).
1348 * quantatom '+' One or more times (same as {1,}).
1349 * quantatom '?' Zero or one time (same as {0,1}).
1351 * any of which can be optionally followed by '?' for ungreedy
1353 * atom: '(' regexp ')' A parenthesized regexp (what matched
1354 * can be addressed using a backreference,
1355 * see '\' n below).
1356 * '.' Matches any char except '\n'.
1357 * '[' classlist ']' A character class.
1358 * '[' '^' classlist ']' A negated character class.
1359 * '\f' Form Feed.
1360 * '\n' Newline (Line Feed).
1361 * '\r' Carriage Return.
1362 * '\t' Horizontal Tab.
1363 * '\v' Vertical Tab.
1364 * '\d' A digit (same as [0-9]).
1365 * '\D' A non-digit.
1366 * '\w' A word character, [0-9a-z_A-Z].
1367 * '\W' A non-word character.
1368 * '\s' A whitespace character, [ \b\f\n\r\t\v].
1369 * '\S' A non-whitespace character.
1370 * '\' n A backreference to the nth (n decimal
1371 * and positive) parenthesized expression.
1372 * '\' octal An octal escape sequence (octal must be
1373 * two or three digits long, unless it is
1374 * 0 for the null character).
1375 * '\x' hex A hex escape (hex must be two digits).
1376 * '\u' unicode A unicode escape (must be four digits).
1377 * '\c' ctrl A control character, ctrl is a letter.
1378 * '\' literalatomchar Any character except one of the above
1379 * that follow '\' in an atom.
1380 * otheratomchar Any character not first among the other
1381 * atom right-hand sides.
1383 static BOOL
1384 ParseTerm(CompilerState *state)
1386 WCHAR c = *state->cp++;
1387 UINT nDigits;
1388 UINT num, tmp, n, i;
1389 const WCHAR *termStart;
1391 switch (c) {
1392 /* assertions and atoms */
1393 case '^':
1394 state->result = NewRENode(state, REOP_BOL);
1395 if (!state->result)
1396 return FALSE;
1397 state->progLength++;
1398 return TRUE;
1399 case '$':
1400 state->result = NewRENode(state, REOP_EOL);
1401 if (!state->result)
1402 return FALSE;
1403 state->progLength++;
1404 return TRUE;
1405 case '\\':
1406 if (state->cp >= state->cpend) {
1407 /* a trailing '\' is an error */
1408 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
1409 return FALSE;
1411 c = *state->cp++;
1412 switch (c) {
1413 /* assertion escapes */
1414 case 'b' :
1415 state->result = NewRENode(state, REOP_WBDRY);
1416 if (!state->result)
1417 return FALSE;
1418 state->progLength++;
1419 return TRUE;
1420 case 'B':
1421 state->result = NewRENode(state, REOP_WNONBDRY);
1422 if (!state->result)
1423 return FALSE;
1424 state->progLength++;
1425 return TRUE;
1426 /* Decimal escape */
1427 case '0':
1428 /* Give a strict warning. See also the note below. */
1429 WARN("non-octal digit in an escape sequence that doesn't match a back-reference\n");
1430 doOctal:
1431 num = 0;
1432 while (state->cp < state->cpend) {
1433 c = *state->cp;
1434 if (c < '0' || '7' < c)
1435 break;
1436 state->cp++;
1437 tmp = 8 * num + (UINT)JS7_UNDEC(c);
1438 if (tmp > 0377)
1439 break;
1440 num = tmp;
1442 c = (WCHAR)num;
1443 doFlat:
1444 state->result = NewRENode(state, REOP_FLAT);
1445 if (!state->result)
1446 return FALSE;
1447 state->result->u.flat.chr = c;
1448 state->result->u.flat.length = 1;
1449 state->progLength += 3;
1450 break;
1451 case '1':
1452 case '2':
1453 case '3':
1454 case '4':
1455 case '5':
1456 case '6':
1457 case '7':
1458 case '8':
1459 case '9':
1460 termStart = state->cp - 1;
1461 num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1462 if (state->flags & JSREG_FIND_PAREN_ERROR)
1463 return FALSE;
1464 if (num == OVERFLOW_VALUE) {
1465 /* Give a strict mode warning. */
1466 WARN("back-reference exceeds number of capturing parentheses\n");
1469 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1470 * error here. However, for compatibility with IE, we treat the
1471 * whole backref as flat if the first character in it is not a
1472 * valid octal character, and as an octal escape otherwise.
1474 state->cp = termStart;
1475 if (c >= '8') {
1476 /* Treat this as flat. termStart - 1 is the \. */
1477 c = '\\';
1478 goto asFlat;
1481 /* Treat this as an octal escape. */
1482 goto doOctal;
1484 assert(1 <= num && num <= 0x10000);
1485 state->result = NewRENode(state, REOP_BACKREF);
1486 if (!state->result)
1487 return FALSE;
1488 state->result->u.parenIndex = num - 1;
1489 state->progLength
1490 += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1491 break;
1492 /* Control escape */
1493 case 'f':
1494 c = 0xC;
1495 goto doFlat;
1496 case 'n':
1497 c = 0xA;
1498 goto doFlat;
1499 case 'r':
1500 c = 0xD;
1501 goto doFlat;
1502 case 't':
1503 c = 0x9;
1504 goto doFlat;
1505 case 'v':
1506 c = 0xB;
1507 goto doFlat;
1508 /* Control letter */
1509 case 'c':
1510 if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1511 c = (WCHAR) (*state->cp++ & 0x1F);
1512 } else {
1513 /* back off to accepting the original '\' as a literal */
1514 --state->cp;
1515 c = '\\';
1517 goto doFlat;
1518 /* HexEscapeSequence */
1519 case 'x':
1520 nDigits = 2;
1521 goto lexHex;
1522 /* UnicodeEscapeSequence */
1523 case 'u':
1524 nDigits = 4;
1525 lexHex:
1526 n = 0;
1527 for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1528 UINT digit;
1529 c = *state->cp++;
1530 if (!isASCIIHexDigit(c, &digit)) {
1532 * Back off to accepting the original 'u' or 'x' as a
1533 * literal.
1535 state->cp -= i + 2;
1536 n = *state->cp++;
1537 break;
1539 n = (n << 4) | digit;
1541 c = (WCHAR) n;
1542 goto doFlat;
1543 /* Character class escapes */
1544 case 'd':
1545 state->result = NewRENode(state, REOP_DIGIT);
1546 doSimple:
1547 if (!state->result)
1548 return FALSE;
1549 state->progLength++;
1550 break;
1551 case 'D':
1552 state->result = NewRENode(state, REOP_NONDIGIT);
1553 goto doSimple;
1554 case 's':
1555 state->result = NewRENode(state, REOP_SPACE);
1556 goto doSimple;
1557 case 'S':
1558 state->result = NewRENode(state, REOP_NONSPACE);
1559 goto doSimple;
1560 case 'w':
1561 state->result = NewRENode(state, REOP_ALNUM);
1562 goto doSimple;
1563 case 'W':
1564 state->result = NewRENode(state, REOP_NONALNUM);
1565 goto doSimple;
1566 /* IdentityEscape */
1567 default:
1568 state->result = NewRENode(state, REOP_FLAT);
1569 if (!state->result)
1570 return FALSE;
1571 state->result->u.flat.chr = c;
1572 state->result->u.flat.length = 1;
1573 state->result->kid = (void *) (state->cp - 1);
1574 state->progLength += 3;
1575 break;
1577 break;
1578 case '[':
1579 state->result = NewRENode(state, REOP_CLASS);
1580 if (!state->result)
1581 return FALSE;
1582 termStart = state->cp;
1583 state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1584 for (;;) {
1585 if (state->cp == state->cpend) {
1586 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1587 JSMSG_UNTERM_CLASS, termStart);
1589 return FALSE;
1591 if (*state->cp == '\\') {
1592 state->cp++;
1593 if (state->cp != state->cpend)
1594 state->cp++;
1595 continue;
1597 if (*state->cp == ']') {
1598 state->result->u.ucclass.kidlen = state->cp - termStart;
1599 break;
1601 state->cp++;
1603 for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1604 if (!state->classCache[i].start) {
1605 state->classCache[i].start = termStart;
1606 state->classCache[i].length = state->result->u.ucclass.kidlen;
1607 state->classCache[i].index = state->classCount;
1608 break;
1610 if (state->classCache[i].length ==
1611 state->result->u.ucclass.kidlen) {
1612 for (n = 0; ; n++) {
1613 if (n == state->classCache[i].length) {
1614 state->result->u.ucclass.index
1615 = state->classCache[i].index;
1616 goto claim;
1618 if (state->classCache[i].start[n] != termStart[n])
1619 break;
1623 state->result->u.ucclass.index = state->classCount++;
1625 claim:
1627 * Call CalculateBitmapSize now as we want any errors it finds
1628 * to be reported during the parse phase, not at execution.
1630 if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1631 return FALSE;
1633 * Update classBitmapsMem with number of bytes to hold bmsize bits,
1634 * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1635 * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1637 n = (state->result->u.ucclass.bmsize >> 3) + 1;
1638 if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1639 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1640 return FALSE;
1642 state->classBitmapsMem += n;
1643 /* CLASS, <index> */
1644 state->progLength
1645 += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1646 break;
1648 case '.':
1649 state->result = NewRENode(state, REOP_DOT);
1650 goto doSimple;
1652 case '{':
1654 const WCHAR *errp = state->cp--;
1655 INT err;
1657 err = ParseMinMaxQuantifier(state, TRUE);
1658 state->cp = errp;
1660 if (err < 0)
1661 goto asFlat;
1663 /* FALL THROUGH */
1665 case '*':
1666 case '+':
1667 case '?':
1668 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1669 JSMSG_BAD_QUANTIFIER, state->cp - 1);
1670 return FALSE;
1671 default:
1672 asFlat:
1673 state->result = NewRENode(state, REOP_FLAT);
1674 if (!state->result)
1675 return FALSE;
1676 state->result->u.flat.chr = c;
1677 state->result->u.flat.length = 1;
1678 state->result->kid = (void *) (state->cp - 1);
1679 state->progLength += 3;
1680 break;
1682 return ParseQuantifier(state);
1686 * Top-down regular expression grammar, based closely on Perl4.
1688 * regexp: altern A regular expression is one or more
1689 * altern '|' regexp alternatives separated by vertical bar.
1691 #define INITIAL_STACK_SIZE 128
1693 static BOOL
1694 ParseRegExp(CompilerState *state)
1696 size_t parenIndex;
1697 RENode *operand;
1698 REOpData *operatorStack;
1699 RENode **operandStack;
1700 REOp op;
1701 INT i;
1702 BOOL result = FALSE;
1704 INT operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
1705 INT operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
1707 /* Watch out for empty regexp */
1708 if (state->cp == state->cpend) {
1709 state->result = NewRENode(state, REOP_EMPTY);
1710 return (state->result != NULL);
1713 operatorStack = heap_alloc(sizeof(REOpData) * operatorStackSize);
1714 if (!operatorStack)
1715 return FALSE;
1717 operandStack = heap_alloc(sizeof(RENode *) * operandStackSize);
1718 if (!operandStack)
1719 goto out;
1721 for (;;) {
1722 parenIndex = state->parenCount;
1723 if (state->cp == state->cpend) {
1725 * If we are at the end of the regexp and we're short one or more
1726 * operands, the regexp must have the form /x|/ or some such, with
1727 * left parentheses making us short more than one operand.
1729 if (operatorSP >= operandSP) {
1730 operand = NewRENode(state, REOP_EMPTY);
1731 if (!operand)
1732 goto out;
1733 goto pushOperand;
1735 } else {
1736 switch (*state->cp) {
1737 case '(':
1738 ++state->cp;
1739 if (state->cp + 1 < state->cpend &&
1740 *state->cp == '?' &&
1741 (state->cp[1] == '=' ||
1742 state->cp[1] == '!' ||
1743 state->cp[1] == ':')) {
1744 switch (state->cp[1]) {
1745 case '=':
1746 op = REOP_ASSERT;
1747 /* ASSERT, <next>, ... ASSERTTEST */
1748 state->progLength += 4;
1749 break;
1750 case '!':
1751 op = REOP_ASSERT_NOT;
1752 /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
1753 state->progLength += 4;
1754 break;
1755 default:
1756 op = REOP_LPARENNON;
1757 break;
1759 state->cp += 2;
1760 } else {
1761 op = REOP_LPAREN;
1762 /* LPAREN, <index>, ... RPAREN, <index> */
1763 state->progLength
1764 += 2 * (1 + GetCompactIndexWidth(parenIndex));
1765 state->parenCount++;
1766 if (state->parenCount == 65535) {
1767 ReportRegExpError(state, JSREPORT_ERROR,
1768 JSMSG_TOO_MANY_PARENS);
1769 goto out;
1772 goto pushOperator;
1774 case ')':
1776 * If there's no stacked open parenthesis, throw syntax error.
1778 for (i = operatorSP - 1; ; i--) {
1779 if (i < 0) {
1780 ReportRegExpError(state, JSREPORT_ERROR,
1781 JSMSG_UNMATCHED_RIGHT_PAREN);
1782 goto out;
1784 if (operatorStack[i].op == REOP_ASSERT ||
1785 operatorStack[i].op == REOP_ASSERT_NOT ||
1786 operatorStack[i].op == REOP_LPARENNON ||
1787 operatorStack[i].op == REOP_LPAREN) {
1788 break;
1791 /* FALL THROUGH */
1793 case '|':
1794 /* Expected an operand before these, so make an empty one */
1795 operand = NewRENode(state, REOP_EMPTY);
1796 if (!operand)
1797 goto out;
1798 goto pushOperand;
1800 default:
1801 if (!ParseTerm(state))
1802 goto out;
1803 operand = state->result;
1804 pushOperand:
1805 if (operandSP == operandStackSize) {
1806 RENode **tmp;
1807 operandStackSize += operandStackSize;
1808 tmp = heap_realloc(operandStack, sizeof(RENode *) * operandStackSize);
1809 if (!tmp)
1810 goto out;
1811 operandStack = tmp;
1813 operandStack[operandSP++] = operand;
1814 break;
1818 /* At the end; process remaining operators. */
1819 restartOperator:
1820 if (state->cp == state->cpend) {
1821 while (operatorSP) {
1822 --operatorSP;
1823 if (!ProcessOp(state, &operatorStack[operatorSP],
1824 operandStack, operandSP))
1825 goto out;
1826 --operandSP;
1828 assert(operandSP == 1);
1829 state->result = operandStack[0];
1830 result = TRUE;
1831 goto out;
1834 switch (*state->cp) {
1835 case '|':
1836 /* Process any stacked 'concat' operators */
1837 ++state->cp;
1838 while (operatorSP &&
1839 operatorStack[operatorSP - 1].op == REOP_CONCAT) {
1840 --operatorSP;
1841 if (!ProcessOp(state, &operatorStack[operatorSP],
1842 operandStack, operandSP)) {
1843 goto out;
1845 --operandSP;
1847 op = REOP_ALT;
1848 goto pushOperator;
1850 case ')':
1852 * If there's no stacked open parenthesis, throw syntax error.
1854 for (i = operatorSP - 1; ; i--) {
1855 if (i < 0) {
1856 ReportRegExpError(state, JSREPORT_ERROR,
1857 JSMSG_UNMATCHED_RIGHT_PAREN);
1858 goto out;
1860 if (operatorStack[i].op == REOP_ASSERT ||
1861 operatorStack[i].op == REOP_ASSERT_NOT ||
1862 operatorStack[i].op == REOP_LPARENNON ||
1863 operatorStack[i].op == REOP_LPAREN) {
1864 break;
1867 ++state->cp;
1869 /* Process everything on the stack until the open parenthesis. */
1870 for (;;) {
1871 assert(operatorSP);
1872 --operatorSP;
1873 switch (operatorStack[operatorSP].op) {
1874 case REOP_ASSERT:
1875 case REOP_ASSERT_NOT:
1876 case REOP_LPAREN:
1877 operand = NewRENode(state, operatorStack[operatorSP].op);
1878 if (!operand)
1879 goto out;
1880 operand->u.parenIndex =
1881 operatorStack[operatorSP].parenIndex;
1882 assert(operandSP);
1883 operand->kid = operandStack[operandSP - 1];
1884 operandStack[operandSP - 1] = operand;
1885 if (state->treeDepth == TREE_DEPTH_MAX) {
1886 ReportRegExpError(state, JSREPORT_ERROR,
1887 JSMSG_REGEXP_TOO_COMPLEX);
1888 goto out;
1890 ++state->treeDepth;
1891 /* FALL THROUGH */
1893 case REOP_LPARENNON:
1894 state->result = operandStack[operandSP - 1];
1895 if (!ParseQuantifier(state))
1896 goto out;
1897 operandStack[operandSP - 1] = state->result;
1898 goto restartOperator;
1899 default:
1900 if (!ProcessOp(state, &operatorStack[operatorSP],
1901 operandStack, operandSP))
1902 goto out;
1903 --operandSP;
1904 break;
1907 break;
1909 case '{':
1911 const WCHAR *errp = state->cp;
1913 if (ParseMinMaxQuantifier(state, TRUE) < 0) {
1915 * This didn't even scan correctly as a quantifier, so we should
1916 * treat it as flat.
1918 op = REOP_CONCAT;
1919 goto pushOperator;
1922 state->cp = errp;
1923 /* FALL THROUGH */
1926 case '+':
1927 case '*':
1928 case '?':
1929 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
1930 state->cp);
1931 result = FALSE;
1932 goto out;
1934 default:
1935 /* Anything else is the start of the next term. */
1936 op = REOP_CONCAT;
1937 pushOperator:
1938 if (operatorSP == operatorStackSize) {
1939 REOpData *tmp;
1940 operatorStackSize += operatorStackSize;
1941 tmp = heap_realloc(operatorStack, sizeof(REOpData) * operatorStackSize);
1942 if (!tmp)
1943 goto out;
1944 operatorStack = tmp;
1946 operatorStack[operatorSP].op = op;
1947 operatorStack[operatorSP].errPos = state->cp;
1948 operatorStack[operatorSP++].parenIndex = parenIndex;
1949 break;
1952 out:
1953 heap_free(operatorStack);
1954 heap_free(operandStack);
1955 return result;
1959 * Save the current state of the match - the position in the input
1960 * text as well as the position in the bytecode. The state of any
1961 * parent expressions is also saved (preceding state).
1962 * Contents of parenCount parentheses from parenIndex are also saved.
1964 static REBackTrackData *
1965 PushBackTrackState(REGlobalData *gData, REOp op,
1966 jsbytecode *target, REMatchState *x, const WCHAR *cp,
1967 size_t parenIndex, size_t parenCount)
1969 size_t i;
1970 REBackTrackData *result =
1971 (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
1973 size_t sz = sizeof(REBackTrackData) +
1974 gData->stateStackTop * sizeof(REProgState) +
1975 parenCount * sizeof(RECapture);
1977 ptrdiff_t btsize = gData->backTrackStackSize;
1978 ptrdiff_t btincr = ((char *)result + sz) -
1979 ((char *)gData->backTrackStack + btsize);
1981 TRACE("\tBT_Push: %lu,%lu\n", (unsigned long) parenIndex, (unsigned long) parenCount);
1983 JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount));
1984 if (btincr > 0) {
1985 ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
1987 JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION);
1988 btincr = ((btincr+btsize-1)/btsize)*btsize;
1989 gData->backTrackStack = jsheap_grow(gData->pool, gData->backTrackStack, btsize, btincr);
1990 if (!gData->backTrackStack) {
1991 js_ReportOutOfScriptQuota(gData->cx);
1992 gData->ok = FALSE;
1993 return NULL;
1995 gData->backTrackStackSize = btsize + btincr;
1996 result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
1998 gData->backTrackSP = result;
1999 result->sz = gData->cursz;
2000 gData->cursz = sz;
2002 result->backtrack_op = op;
2003 result->backtrack_pc = target;
2004 result->cp = cp;
2005 result->parenCount = parenCount;
2006 result->parenIndex = parenIndex;
2008 result->saveStateStackTop = gData->stateStackTop;
2009 assert(gData->stateStackTop);
2010 memcpy(result + 1, gData->stateStack,
2011 sizeof(REProgState) * result->saveStateStackTop);
2013 if (parenCount != 0) {
2014 memcpy((char *)(result + 1) +
2015 sizeof(REProgState) * result->saveStateStackTop,
2016 &x->parens[parenIndex],
2017 sizeof(RECapture) * parenCount);
2018 for (i = 0; i != parenCount; i++)
2019 x->parens[parenIndex + i].index = -1;
2022 return result;
2025 static inline REMatchState *
2026 FlatNIMatcher(REGlobalData *gData, REMatchState *x, WCHAR *matchChars,
2027 size_t length)
2029 size_t i;
2030 assert(gData->cpend >= x->cp);
2031 if (length > (size_t)(gData->cpend - x->cp))
2032 return NULL;
2033 for (i = 0; i != length; i++) {
2034 if (toupperW(matchChars[i]) != toupperW(x->cp[i]))
2035 return NULL;
2037 x->cp += length;
2038 return x;
2042 * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2043 * 2. If E is not a character then go to step 6.
2044 * 3. Let ch be E's character.
2045 * 4. Let A be a one-element RECharSet containing the character ch.
2046 * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2047 * 6. E must be an integer. Let n be that integer.
2048 * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2049 * 8. Return an internal Matcher closure that takes two arguments, a State x
2050 * and a Continuation c, and performs the following:
2051 * 1. Let cap be x's captures internal array.
2052 * 2. Let s be cap[n].
2053 * 3. If s is undefined, then call c(x) and return its result.
2054 * 4. Let e be x's endIndex.
2055 * 5. Let len be s's length.
2056 * 6. Let f be e+len.
2057 * 7. If f>InputLength, return failure.
2058 * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2059 * such that Canonicalize(s[i]) is not the same character as
2060 * Canonicalize(Input [e+i]), then return failure.
2061 * 9. Let y be the State (f, cap).
2062 * 10. Call c(y) and return its result.
2064 static REMatchState *
2065 BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex)
2067 size_t len, i;
2068 const WCHAR *parenContent;
2069 RECapture *cap = &x->parens[parenIndex];
2071 if (cap->index == -1)
2072 return x;
2074 len = cap->length;
2075 if (x->cp + len > gData->cpend)
2076 return NULL;
2078 parenContent = &gData->cpbegin[cap->index];
2079 if (gData->regexp->flags & JSREG_FOLD) {
2080 for (i = 0; i < len; i++) {
2081 if (toupperW(parenContent[i]) != toupperW(x->cp[i]))
2082 return NULL;
2084 } else {
2085 for (i = 0; i < len; i++) {
2086 if (parenContent[i] != x->cp[i])
2087 return NULL;
2090 x->cp += len;
2091 return x;
2094 /* Add a single character to the RECharSet */
2095 static void
2096 AddCharacterToCharSet(RECharSet *cs, WCHAR c)
2098 UINT byteIndex = (UINT)(c >> 3);
2099 assert(c <= cs->length);
2100 cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2104 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2105 static void
2106 AddCharacterRangeToCharSet(RECharSet *cs, UINT c1, UINT c2)
2108 UINT i;
2110 UINT byteIndex1 = c1 >> 3;
2111 UINT byteIndex2 = c2 >> 3;
2113 assert(c2 <= cs->length && c1 <= c2);
2115 c1 &= 0x7;
2116 c2 &= 0x7;
2118 if (byteIndex1 == byteIndex2) {
2119 cs->u.bits[byteIndex1] |= ((BYTE)0xFF >> (7 - (c2 - c1))) << c1;
2120 } else {
2121 cs->u.bits[byteIndex1] |= 0xFF << c1;
2122 for (i = byteIndex1 + 1; i < byteIndex2; i++)
2123 cs->u.bits[i] = 0xFF;
2124 cs->u.bits[byteIndex2] |= (BYTE)0xFF >> (7 - c2);
2128 /* Compile the source of the class into a RECharSet */
2129 static BOOL
2130 ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
2132 const WCHAR *src, *end;
2133 BOOL inRange = FALSE;
2134 WCHAR rangeStart = 0;
2135 UINT byteLength, n;
2136 WCHAR c, thisCh;
2137 INT nDigits, i;
2139 assert(!charSet->converted);
2141 * Assert that startIndex and length points to chars inside [] inside
2142 * source string.
2144 assert(1 <= charSet->u.src.startIndex);
2145 assert(charSet->u.src.startIndex
2146 < SysStringLen(gData->regexp->source));
2147 assert(charSet->u.src.length <= SysStringLen(gData->regexp->source)
2148 - 1 - charSet->u.src.startIndex);
2150 charSet->converted = TRUE;
2151 src = gData->regexp->source + charSet->u.src.startIndex;
2153 end = src + charSet->u.src.length;
2155 assert(src[-1] == '[' && end[0] == ']');
2157 byteLength = (charSet->length >> 3) + 1;
2158 charSet->u.bits = heap_alloc(byteLength);
2159 if (!charSet->u.bits) {
2160 JS_ReportOutOfMemory(gData->cx);
2161 gData->ok = FALSE;
2162 return FALSE;
2164 memset(charSet->u.bits, 0, byteLength);
2166 if (src == end)
2167 return TRUE;
2169 if (*src == '^') {
2170 assert(charSet->sense == FALSE);
2171 ++src;
2172 } else {
2173 assert(charSet->sense == TRUE);
2176 while (src != end) {
2177 switch (*src) {
2178 case '\\':
2179 ++src;
2180 c = *src++;
2181 switch (c) {
2182 case 'b':
2183 thisCh = 0x8;
2184 break;
2185 case 'f':
2186 thisCh = 0xC;
2187 break;
2188 case 'n':
2189 thisCh = 0xA;
2190 break;
2191 case 'r':
2192 thisCh = 0xD;
2193 break;
2194 case 't':
2195 thisCh = 0x9;
2196 break;
2197 case 'v':
2198 thisCh = 0xB;
2199 break;
2200 case 'c':
2201 if (src < end && JS_ISWORD(*src)) {
2202 thisCh = (WCHAR)(*src++ & 0x1F);
2203 } else {
2204 --src;
2205 thisCh = '\\';
2207 break;
2208 case 'x':
2209 nDigits = 2;
2210 goto lexHex;
2211 case 'u':
2212 nDigits = 4;
2213 lexHex:
2214 n = 0;
2215 for (i = 0; (i < nDigits) && (src < end); i++) {
2216 UINT digit;
2217 c = *src++;
2218 if (!isASCIIHexDigit(c, &digit)) {
2220 * Back off to accepting the original '\'
2221 * as a literal
2223 src -= i + 1;
2224 n = '\\';
2225 break;
2227 n = (n << 4) | digit;
2229 thisCh = (WCHAR)n;
2230 break;
2231 case '0':
2232 case '1':
2233 case '2':
2234 case '3':
2235 case '4':
2236 case '5':
2237 case '6':
2238 case '7':
2240 * This is a non-ECMA extension - decimal escapes (in this
2241 * case, octal!) are supposed to be an error inside class
2242 * ranges, but supported here for backwards compatibility.
2244 n = JS7_UNDEC(c);
2245 c = *src;
2246 if ('0' <= c && c <= '7') {
2247 src++;
2248 n = 8 * n + JS7_UNDEC(c);
2249 c = *src;
2250 if ('0' <= c && c <= '7') {
2251 src++;
2252 i = 8 * n + JS7_UNDEC(c);
2253 if (i <= 0377)
2254 n = i;
2255 else
2256 src--;
2259 thisCh = (WCHAR)n;
2260 break;
2262 case 'd':
2263 AddCharacterRangeToCharSet(charSet, '0', '9');
2264 continue; /* don't need range processing */
2265 case 'D':
2266 AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2267 AddCharacterRangeToCharSet(charSet,
2268 (WCHAR)('9' + 1),
2269 (WCHAR)charSet->length);
2270 continue;
2271 case 's':
2272 for (i = (INT)charSet->length; i >= 0; i--)
2273 if (isspaceW(i))
2274 AddCharacterToCharSet(charSet, (WCHAR)i);
2275 continue;
2276 case 'S':
2277 for (i = (INT)charSet->length; i >= 0; i--)
2278 if (!isspaceW(i))
2279 AddCharacterToCharSet(charSet, (WCHAR)i);
2280 continue;
2281 case 'w':
2282 for (i = (INT)charSet->length; i >= 0; i--)
2283 if (JS_ISWORD(i))
2284 AddCharacterToCharSet(charSet, (WCHAR)i);
2285 continue;
2286 case 'W':
2287 for (i = (INT)charSet->length; i >= 0; i--)
2288 if (!JS_ISWORD(i))
2289 AddCharacterToCharSet(charSet, (WCHAR)i);
2290 continue;
2291 default:
2292 thisCh = c;
2293 break;
2296 break;
2298 default:
2299 thisCh = *src++;
2300 break;
2303 if (inRange) {
2304 if (gData->regexp->flags & JSREG_FOLD) {
2305 int i;
2307 assert(rangeStart <= thisCh);
2308 for (i = rangeStart; i <= thisCh; i++) {
2309 WCHAR uch, dch;
2311 AddCharacterToCharSet(charSet, i);
2312 uch = toupperW(i);
2313 dch = tolowerW(i);
2314 if (i != uch)
2315 AddCharacterToCharSet(charSet, uch);
2316 if (i != dch)
2317 AddCharacterToCharSet(charSet, dch);
2319 } else {
2320 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2322 inRange = FALSE;
2323 } else {
2324 if (gData->regexp->flags & JSREG_FOLD) {
2325 AddCharacterToCharSet(charSet, toupperW(thisCh));
2326 AddCharacterToCharSet(charSet, tolowerW(thisCh));
2327 } else {
2328 AddCharacterToCharSet(charSet, thisCh);
2330 if (src < end - 1) {
2331 if (*src == '-') {
2332 ++src;
2333 inRange = TRUE;
2334 rangeStart = thisCh;
2339 return TRUE;
2342 static BOOL
2343 ReallocStateStack(REGlobalData *gData)
2345 size_t limit = gData->stateStackLimit;
2346 size_t sz = sizeof(REProgState) * limit;
2348 gData->stateStack = jsheap_grow(gData->pool, gData->stateStack, sz, sz);
2349 if (!gData->stateStack) {
2350 js_ReportOutOfScriptQuota(gData->cx);
2351 gData->ok = FALSE;
2352 return FALSE;
2354 gData->stateStackLimit = limit + limit;
2355 return TRUE;
2358 #define PUSH_STATE_STACK(data) \
2359 do { \
2360 ++(data)->stateStackTop; \
2361 if ((data)->stateStackTop == (data)->stateStackLimit && \
2362 !ReallocStateStack((data))) { \
2363 return NULL; \
2365 }while(0)
2368 * Apply the current op against the given input to see if it's going to match
2369 * or fail. Return false if we don't get a match, true if we do. If updatecp is
2370 * true, then update the current state's cp. Always update startpc to the next
2371 * op.
2373 static inline REMatchState *
2374 SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
2375 jsbytecode **startpc, BOOL updatecp)
2377 REMatchState *result = NULL;
2378 WCHAR matchCh;
2379 size_t parenIndex;
2380 size_t offset, length, index;
2381 jsbytecode *pc = *startpc; /* pc has already been incremented past op */
2382 WCHAR *source;
2383 const WCHAR *startcp = x->cp;
2384 WCHAR ch;
2385 RECharSet *charSet;
2387 const char *opname = reop_names[op];
2388 TRACE("\n%06d: %*s%s\n", pc - gData->regexp->program,
2389 (int)gData->stateStackTop * 2, "", opname);
2391 switch (op) {
2392 case REOP_EMPTY:
2393 result = x;
2394 break;
2395 case REOP_BOL:
2396 if (x->cp != gData->cpbegin) {
2397 if (/*!gData->cx->regExpStatics.multiline && FIXME !!! */
2398 !(gData->regexp->flags & JSREG_MULTILINE)) {
2399 break;
2401 if (!RE_IS_LINE_TERM(x->cp[-1]))
2402 break;
2404 result = x;
2405 break;
2406 case REOP_EOL:
2407 if (x->cp != gData->cpend) {
2408 if (/*!gData->cx->regExpStatics.multiline &&*/
2409 !(gData->regexp->flags & JSREG_MULTILINE)) {
2410 break;
2412 if (!RE_IS_LINE_TERM(*x->cp))
2413 break;
2415 result = x;
2416 break;
2417 case REOP_WBDRY:
2418 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2419 !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2420 result = x;
2422 break;
2423 case REOP_WNONBDRY:
2424 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2425 (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2426 result = x;
2428 break;
2429 case REOP_DOT:
2430 if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2431 result = x;
2432 result->cp++;
2434 break;
2435 case REOP_DIGIT:
2436 if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
2437 result = x;
2438 result->cp++;
2440 break;
2441 case REOP_NONDIGIT:
2442 if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
2443 result = x;
2444 result->cp++;
2446 break;
2447 case REOP_ALNUM:
2448 if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2449 result = x;
2450 result->cp++;
2452 break;
2453 case REOP_NONALNUM:
2454 if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2455 result = x;
2456 result->cp++;
2458 break;
2459 case REOP_SPACE:
2460 if (x->cp != gData->cpend && isspaceW(*x->cp)) {
2461 result = x;
2462 result->cp++;
2464 break;
2465 case REOP_NONSPACE:
2466 if (x->cp != gData->cpend && !isspaceW(*x->cp)) {
2467 result = x;
2468 result->cp++;
2470 break;
2471 case REOP_BACKREF:
2472 pc = ReadCompactIndex(pc, &parenIndex);
2473 assert(parenIndex < gData->regexp->parenCount);
2474 result = BackrefMatcher(gData, x, parenIndex);
2475 break;
2476 case REOP_FLAT:
2477 pc = ReadCompactIndex(pc, &offset);
2478 assert(offset < SysStringLen(gData->regexp->source));
2479 pc = ReadCompactIndex(pc, &length);
2480 assert(1 <= length);
2481 assert(length <= SysStringLen(gData->regexp->source) - offset);
2482 if (length <= (size_t)(gData->cpend - x->cp)) {
2483 source = gData->regexp->source + offset;
2484 TRACE("%s\n", debugstr_wn(source, length));
2485 for (index = 0; index != length; index++) {
2486 if (source[index] != x->cp[index])
2487 return NULL;
2489 x->cp += length;
2490 result = x;
2492 break;
2493 case REOP_FLAT1:
2494 matchCh = *pc++;
2495 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2496 if (x->cp != gData->cpend && *x->cp == matchCh) {
2497 result = x;
2498 result->cp++;
2500 break;
2501 case REOP_FLATi:
2502 pc = ReadCompactIndex(pc, &offset);
2503 assert(offset < SysStringLen(gData->regexp->source));
2504 pc = ReadCompactIndex(pc, &length);
2505 assert(1 <= length);
2506 assert(length <= SysStringLen(gData->regexp->source) - offset);
2507 source = gData->regexp->source;
2508 result = FlatNIMatcher(gData, x, source + offset, length);
2509 break;
2510 case REOP_FLAT1i:
2511 matchCh = *pc++;
2512 if (x->cp != gData->cpend && toupperW(*x->cp) == toupperW(matchCh)) {
2513 result = x;
2514 result->cp++;
2516 break;
2517 case REOP_UCFLAT1:
2518 matchCh = GET_ARG(pc);
2519 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2520 pc += ARG_LEN;
2521 if (x->cp != gData->cpend && *x->cp == matchCh) {
2522 result = x;
2523 result->cp++;
2525 break;
2526 case REOP_UCFLAT1i:
2527 matchCh = GET_ARG(pc);
2528 pc += ARG_LEN;
2529 if (x->cp != gData->cpend && toupperW(*x->cp) == toupperW(matchCh)) {
2530 result = x;
2531 result->cp++;
2533 break;
2534 case REOP_CLASS:
2535 pc = ReadCompactIndex(pc, &index);
2536 assert(index < gData->regexp->classCount);
2537 if (x->cp != gData->cpend) {
2538 charSet = &gData->regexp->classList[index];
2539 assert(charSet->converted);
2540 ch = *x->cp;
2541 index = ch >> 3;
2542 if (charSet->length != 0 &&
2543 ch <= charSet->length &&
2544 (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2545 result = x;
2546 result->cp++;
2549 break;
2550 case REOP_NCLASS:
2551 pc = ReadCompactIndex(pc, &index);
2552 assert(index < gData->regexp->classCount);
2553 if (x->cp != gData->cpend) {
2554 charSet = &gData->regexp->classList[index];
2555 assert(charSet->converted);
2556 ch = *x->cp;
2557 index = ch >> 3;
2558 if (charSet->length == 0 ||
2559 ch > charSet->length ||
2560 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2561 result = x;
2562 result->cp++;
2565 break;
2567 default:
2568 assert(FALSE);
2570 if (result) {
2571 if (!updatecp)
2572 x->cp = startcp;
2573 *startpc = pc;
2574 TRACE(" *\n");
2575 return result;
2577 x->cp = startcp;
2578 return NULL;
2581 static inline REMatchState *
2582 ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
2584 REMatchState *result = NULL;
2585 REBackTrackData *backTrackData;
2586 jsbytecode *nextpc, *testpc;
2587 REOp nextop;
2588 RECapture *cap;
2589 REProgState *curState;
2590 const WCHAR *startcp;
2591 size_t parenIndex, k;
2592 size_t parenSoFar = 0;
2594 WCHAR matchCh1, matchCh2;
2595 RECharSet *charSet;
2597 BOOL anchor;
2598 jsbytecode *pc = gData->regexp->program;
2599 REOp op = (REOp) *pc++;
2602 * If the first node is a simple match, step the index into the string
2603 * until that match is made, or fail if it can't be found at all.
2605 if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & JSREG_STICKY)) {
2606 anchor = FALSE;
2607 while (x->cp <= gData->cpend) {
2608 nextpc = pc; /* reset back to start each time */
2609 result = SimpleMatch(gData, x, op, &nextpc, TRUE);
2610 if (result) {
2611 anchor = TRUE;
2612 x = result;
2613 pc = nextpc; /* accept skip to next opcode */
2614 op = (REOp) *pc++;
2615 assert(op < REOP_LIMIT);
2616 break;
2618 gData->skipped++;
2619 x->cp++;
2621 if (!anchor)
2622 goto bad;
2625 for (;;) {
2626 const char *opname = reop_names[op];
2627 TRACE("\n%06d: %*s%s\n", pc - gData->regexp->program,
2628 (int)gData->stateStackTop * 2, "", opname);
2630 if (REOP_IS_SIMPLE(op)) {
2631 result = SimpleMatch(gData, x, op, &pc, TRUE);
2632 } else {
2633 curState = &gData->stateStack[gData->stateStackTop];
2634 switch (op) {
2635 case REOP_END:
2636 goto good;
2637 case REOP_ALTPREREQ2:
2638 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2639 pc += ARG_LEN;
2640 matchCh2 = GET_ARG(pc);
2641 pc += ARG_LEN;
2642 k = GET_ARG(pc);
2643 pc += ARG_LEN;
2645 if (x->cp != gData->cpend) {
2646 if (*x->cp == matchCh2)
2647 goto doAlt;
2649 charSet = &gData->regexp->classList[k];
2650 if (!charSet->converted && !ProcessCharSet(gData, charSet))
2651 goto bad;
2652 matchCh1 = *x->cp;
2653 k = matchCh1 >> 3;
2654 if ((charSet->length == 0 ||
2655 matchCh1 > charSet->length ||
2656 !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2657 charSet->sense) {
2658 goto doAlt;
2661 result = NULL;
2662 break;
2664 case REOP_ALTPREREQ:
2665 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2666 pc += ARG_LEN;
2667 matchCh1 = GET_ARG(pc);
2668 pc += ARG_LEN;
2669 matchCh2 = GET_ARG(pc);
2670 pc += ARG_LEN;
2671 if (x->cp == gData->cpend ||
2672 (*x->cp != matchCh1 && *x->cp != matchCh2)) {
2673 result = NULL;
2674 break;
2676 /* else false thru... */
2678 case REOP_ALT:
2679 doAlt:
2680 nextpc = pc + GET_OFFSET(pc); /* start of next alternate */
2681 pc += ARG_LEN; /* start of this alternate */
2682 curState->parenSoFar = parenSoFar;
2683 PUSH_STATE_STACK(gData);
2684 op = (REOp) *pc++;
2685 startcp = x->cp;
2686 if (REOP_IS_SIMPLE(op)) {
2687 if (!SimpleMatch(gData, x, op, &pc, TRUE)) {
2688 op = (REOp) *nextpc++;
2689 pc = nextpc;
2690 continue;
2692 result = x;
2693 op = (REOp) *pc++;
2695 nextop = (REOp) *nextpc++;
2696 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2697 goto bad;
2698 continue;
2701 * Occurs at (successful) end of REOP_ALT,
2703 case REOP_JUMP:
2705 * If we have not gotten a result here, it is because of an
2706 * empty match. Do the same thing REOP_EMPTY would do.
2708 if (!result)
2709 result = x;
2711 --gData->stateStackTop;
2712 pc += GET_OFFSET(pc);
2713 op = (REOp) *pc++;
2714 continue;
2717 * Occurs at last (successful) end of REOP_ALT,
2719 case REOP_ENDALT:
2721 * If we have not gotten a result here, it is because of an
2722 * empty match. Do the same thing REOP_EMPTY would do.
2724 if (!result)
2725 result = x;
2727 --gData->stateStackTop;
2728 op = (REOp) *pc++;
2729 continue;
2731 case REOP_LPAREN:
2732 pc = ReadCompactIndex(pc, &parenIndex);
2733 TRACE("[ %lu ]\n", (unsigned long) parenIndex);
2734 assert(parenIndex < gData->regexp->parenCount);
2735 if (parenIndex + 1 > parenSoFar)
2736 parenSoFar = parenIndex + 1;
2737 x->parens[parenIndex].index = x->cp - gData->cpbegin;
2738 x->parens[parenIndex].length = 0;
2739 op = (REOp) *pc++;
2740 continue;
2742 case REOP_RPAREN:
2744 ptrdiff_t delta;
2746 pc = ReadCompactIndex(pc, &parenIndex);
2747 assert(parenIndex < gData->regexp->parenCount);
2748 cap = &x->parens[parenIndex];
2749 delta = x->cp - (gData->cpbegin + cap->index);
2750 cap->length = (delta < 0) ? 0 : (size_t) delta;
2751 op = (REOp) *pc++;
2753 if (!result)
2754 result = x;
2755 continue;
2757 case REOP_ASSERT:
2758 nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */
2759 pc += ARG_LEN; /* start of ASSERT child */
2760 op = (REOp) *pc++;
2761 testpc = pc;
2762 if (REOP_IS_SIMPLE(op) &&
2763 !SimpleMatch(gData, x, op, &testpc, FALSE)) {
2764 result = NULL;
2765 break;
2767 curState->u.assertion.top =
2768 (char *)gData->backTrackSP - (char *)gData->backTrackStack;
2769 curState->u.assertion.sz = gData->cursz;
2770 curState->index = x->cp - gData->cpbegin;
2771 curState->parenSoFar = parenSoFar;
2772 PUSH_STATE_STACK(gData);
2773 if (!PushBackTrackState(gData, REOP_ASSERTTEST,
2774 nextpc, x, x->cp, 0, 0)) {
2775 goto bad;
2777 continue;
2779 case REOP_ASSERT_NOT:
2780 nextpc = pc + GET_OFFSET(pc);
2781 pc += ARG_LEN;
2782 op = (REOp) *pc++;
2783 testpc = pc;
2784 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2785 SimpleMatch(gData, x, op, &testpc, FALSE) &&
2786 *testpc == REOP_ASSERTNOTTEST) {
2787 result = NULL;
2788 break;
2790 curState->u.assertion.top
2791 = (char *)gData->backTrackSP -
2792 (char *)gData->backTrackStack;
2793 curState->u.assertion.sz = gData->cursz;
2794 curState->index = x->cp - gData->cpbegin;
2795 curState->parenSoFar = parenSoFar;
2796 PUSH_STATE_STACK(gData);
2797 if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
2798 nextpc, x, x->cp, 0, 0)) {
2799 goto bad;
2801 continue;
2803 case REOP_ASSERTTEST:
2804 --gData->stateStackTop;
2805 --curState;
2806 x->cp = gData->cpbegin + curState->index;
2807 gData->backTrackSP =
2808 (REBackTrackData *) ((char *)gData->backTrackStack +
2809 curState->u.assertion.top);
2810 gData->cursz = curState->u.assertion.sz;
2811 if (result)
2812 result = x;
2813 break;
2815 case REOP_ASSERTNOTTEST:
2816 --gData->stateStackTop;
2817 --curState;
2818 x->cp = gData->cpbegin + curState->index;
2819 gData->backTrackSP =
2820 (REBackTrackData *) ((char *)gData->backTrackStack +
2821 curState->u.assertion.top);
2822 gData->cursz = curState->u.assertion.sz;
2823 result = (!result) ? x : NULL;
2824 break;
2825 case REOP_STAR:
2826 curState->u.quantifier.min = 0;
2827 curState->u.quantifier.max = (UINT)-1;
2828 goto quantcommon;
2829 case REOP_PLUS:
2830 curState->u.quantifier.min = 1;
2831 curState->u.quantifier.max = (UINT)-1;
2832 goto quantcommon;
2833 case REOP_OPT:
2834 curState->u.quantifier.min = 0;
2835 curState->u.quantifier.max = 1;
2836 goto quantcommon;
2837 case REOP_QUANT:
2838 pc = ReadCompactIndex(pc, &k);
2839 curState->u.quantifier.min = k;
2840 pc = ReadCompactIndex(pc, &k);
2841 /* max is k - 1 to use one byte for (UINT)-1 sentinel. */
2842 curState->u.quantifier.max = k - 1;
2843 assert(curState->u.quantifier.min <= curState->u.quantifier.max);
2844 quantcommon:
2845 if (curState->u.quantifier.max == 0) {
2846 pc = pc + GET_OFFSET(pc);
2847 op = (REOp) *pc++;
2848 result = x;
2849 continue;
2851 /* Step over <next> */
2852 nextpc = pc + ARG_LEN;
2853 op = (REOp) *nextpc++;
2854 startcp = x->cp;
2855 if (REOP_IS_SIMPLE(op)) {
2856 if (!SimpleMatch(gData, x, op, &nextpc, TRUE)) {
2857 if (curState->u.quantifier.min == 0)
2858 result = x;
2859 else
2860 result = NULL;
2861 pc = pc + GET_OFFSET(pc);
2862 break;
2864 op = (REOp) *nextpc++;
2865 result = x;
2867 curState->index = startcp - gData->cpbegin;
2868 curState->continue_op = REOP_REPEAT;
2869 curState->continue_pc = pc;
2870 curState->parenSoFar = parenSoFar;
2871 PUSH_STATE_STACK(gData);
2872 if (curState->u.quantifier.min == 0 &&
2873 !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
2874 0, 0)) {
2875 goto bad;
2877 pc = nextpc;
2878 continue;
2880 case REOP_ENDCHILD: /* marks the end of a quantifier child */
2881 pc = curState[-1].continue_pc;
2882 op = (REOp) curState[-1].continue_op;
2884 if (!result)
2885 result = x;
2886 continue;
2888 case REOP_REPEAT:
2889 --curState;
2890 do {
2891 --gData->stateStackTop;
2892 if (!result) {
2893 /* Failed, see if we have enough children. */
2894 if (curState->u.quantifier.min == 0)
2895 goto repeatDone;
2896 goto break_switch;
2898 if (curState->u.quantifier.min == 0 &&
2899 x->cp == gData->cpbegin + curState->index) {
2900 /* matched an empty string, that'll get us nowhere */
2901 result = NULL;
2902 goto break_switch;
2904 if (curState->u.quantifier.min != 0)
2905 curState->u.quantifier.min--;
2906 if (curState->u.quantifier.max != (UINT) -1)
2907 curState->u.quantifier.max--;
2908 if (curState->u.quantifier.max == 0)
2909 goto repeatDone;
2910 nextpc = pc + ARG_LEN;
2911 nextop = (REOp) *nextpc;
2912 startcp = x->cp;
2913 if (REOP_IS_SIMPLE(nextop)) {
2914 nextpc++;
2915 if (!SimpleMatch(gData, x, nextop, &nextpc, TRUE)) {
2916 if (curState->u.quantifier.min == 0)
2917 goto repeatDone;
2918 result = NULL;
2919 goto break_switch;
2921 result = x;
2923 curState->index = startcp - gData->cpbegin;
2924 PUSH_STATE_STACK(gData);
2925 if (curState->u.quantifier.min == 0 &&
2926 !PushBackTrackState(gData, REOP_REPEAT,
2927 pc, x, startcp,
2928 curState->parenSoFar,
2929 parenSoFar -
2930 curState->parenSoFar)) {
2931 goto bad;
2933 } while (*nextpc == REOP_ENDCHILD);
2934 pc = nextpc;
2935 op = (REOp) *pc++;
2936 parenSoFar = curState->parenSoFar;
2937 continue;
2939 repeatDone:
2940 result = x;
2941 pc += GET_OFFSET(pc);
2942 goto break_switch;
2944 case REOP_MINIMALSTAR:
2945 curState->u.quantifier.min = 0;
2946 curState->u.quantifier.max = (UINT)-1;
2947 goto minimalquantcommon;
2948 case REOP_MINIMALPLUS:
2949 curState->u.quantifier.min = 1;
2950 curState->u.quantifier.max = (UINT)-1;
2951 goto minimalquantcommon;
2952 case REOP_MINIMALOPT:
2953 curState->u.quantifier.min = 0;
2954 curState->u.quantifier.max = 1;
2955 goto minimalquantcommon;
2956 case REOP_MINIMALQUANT:
2957 pc = ReadCompactIndex(pc, &k);
2958 curState->u.quantifier.min = k;
2959 pc = ReadCompactIndex(pc, &k);
2960 /* See REOP_QUANT comments about k - 1. */
2961 curState->u.quantifier.max = k - 1;
2962 assert(curState->u.quantifier.min
2963 <= curState->u.quantifier.max);
2964 minimalquantcommon:
2965 curState->index = x->cp - gData->cpbegin;
2966 curState->parenSoFar = parenSoFar;
2967 PUSH_STATE_STACK(gData);
2968 if (curState->u.quantifier.min != 0) {
2969 curState->continue_op = REOP_MINIMALREPEAT;
2970 curState->continue_pc = pc;
2971 /* step over <next> */
2972 pc += OFFSET_LEN;
2973 op = (REOp) *pc++;
2974 } else {
2975 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
2976 pc, x, x->cp, 0, 0)) {
2977 goto bad;
2979 --gData->stateStackTop;
2980 pc = pc + GET_OFFSET(pc);
2981 op = (REOp) *pc++;
2983 continue;
2985 case REOP_MINIMALREPEAT:
2986 --gData->stateStackTop;
2987 --curState;
2989 TRACE("{%d,%d}\n", curState->u.quantifier.min, curState->u.quantifier.max);
2990 #define PREPARE_REPEAT() \
2991 do { \
2992 curState->index = x->cp - gData->cpbegin; \
2993 curState->continue_op = REOP_MINIMALREPEAT; \
2994 curState->continue_pc = pc; \
2995 pc += ARG_LEN; \
2996 for (k = curState->parenSoFar; k < parenSoFar; k++) \
2997 x->parens[k].index = -1; \
2998 PUSH_STATE_STACK(gData); \
2999 op = (REOp) *pc++; \
3000 assert(op < REOP_LIMIT); \
3001 }while(0)
3003 if (!result) {
3004 TRACE(" -\n");
3006 * Non-greedy failure - try to consume another child.
3008 if (curState->u.quantifier.max == (UINT) -1 ||
3009 curState->u.quantifier.max > 0) {
3010 PREPARE_REPEAT();
3011 continue;
3013 /* Don't need to adjust pc since we're going to pop. */
3014 break;
3016 if (curState->u.quantifier.min == 0 &&
3017 x->cp == gData->cpbegin + curState->index) {
3018 /* Matched an empty string, that'll get us nowhere. */
3019 result = NULL;
3020 break;
3022 if (curState->u.quantifier.min != 0)
3023 curState->u.quantifier.min--;
3024 if (curState->u.quantifier.max != (UINT) -1)
3025 curState->u.quantifier.max--;
3026 if (curState->u.quantifier.min != 0) {
3027 PREPARE_REPEAT();
3028 continue;
3030 curState->index = x->cp - gData->cpbegin;
3031 curState->parenSoFar = parenSoFar;
3032 PUSH_STATE_STACK(gData);
3033 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3034 pc, x, x->cp,
3035 curState->parenSoFar,
3036 parenSoFar - curState->parenSoFar)) {
3037 goto bad;
3039 --gData->stateStackTop;
3040 pc = pc + GET_OFFSET(pc);
3041 op = (REOp) *pc++;
3042 assert(op < REOP_LIMIT);
3043 continue;
3044 default:
3045 assert(FALSE);
3046 result = NULL;
3048 break_switch:;
3052 * If the match failed and there's a backtrack option, take it.
3053 * Otherwise this is a complete and utter failure.
3055 if (!result) {
3056 if (gData->cursz == 0)
3057 return NULL;
3059 /* Potentially detect explosive regex here. */
3060 gData->backTrackCount++;
3061 if (gData->backTrackLimit &&
3062 gData->backTrackCount >= gData->backTrackLimit) {
3063 JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
3064 JSMSG_REGEXP_TOO_COMPLEX);
3065 gData->ok = FALSE;
3066 return NULL;
3069 backTrackData = gData->backTrackSP;
3070 gData->cursz = backTrackData->sz;
3071 gData->backTrackSP =
3072 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3073 x->cp = backTrackData->cp;
3074 pc = backTrackData->backtrack_pc;
3075 op = (REOp) backTrackData->backtrack_op;
3076 assert(op < REOP_LIMIT);
3077 gData->stateStackTop = backTrackData->saveStateStackTop;
3078 assert(gData->stateStackTop);
3080 memcpy(gData->stateStack, backTrackData + 1,
3081 sizeof(REProgState) * backTrackData->saveStateStackTop);
3082 curState = &gData->stateStack[gData->stateStackTop - 1];
3084 if (backTrackData->parenCount) {
3085 memcpy(&x->parens[backTrackData->parenIndex],
3086 (char *)(backTrackData + 1) +
3087 sizeof(REProgState) * backTrackData->saveStateStackTop,
3088 sizeof(RECapture) * backTrackData->parenCount);
3089 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3090 } else {
3091 for (k = curState->parenSoFar; k < parenSoFar; k++)
3092 x->parens[k].index = -1;
3093 parenSoFar = curState->parenSoFar;
3096 TRACE("\tBT_Pop: %ld,%ld\n",
3097 (unsigned long) backTrackData->parenIndex,
3098 (unsigned long) backTrackData->parenCount);
3099 continue;
3101 x = result;
3104 * Continue with the expression.
3106 op = (REOp)*pc++;
3107 assert(op < REOP_LIMIT);
3110 bad:
3111 TRACE("\n");
3112 return NULL;
3114 good:
3115 TRACE("\n");
3116 return x;
3119 static REMatchState *MatchRegExp(REGlobalData *gData, REMatchState *x)
3121 REMatchState *result;
3122 const WCHAR *cp = x->cp;
3123 const WCHAR *cp2;
3124 UINT j;
3127 * Have to include the position beyond the last character
3128 * in order to detect end-of-input/line condition.
3130 for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3131 gData->skipped = cp2 - cp;
3132 x->cp = cp2;
3133 for (j = 0; j < gData->regexp->parenCount; j++)
3134 x->parens[j].index = -1;
3135 result = ExecuteREBytecode(gData, x);
3136 if (!gData->ok || result || (gData->regexp->flags & JSREG_STICKY))
3137 return result;
3138 gData->backTrackSP = gData->backTrackStack;
3139 gData->cursz = 0;
3140 gData->stateStackTop = 0;
3141 cp2 = cp + gData->skipped;
3143 return NULL;
3146 #define MIN_BACKTRACK_LIMIT 400000
3148 static REMatchState *InitMatch(script_ctx_t *cx, REGlobalData *gData, JSRegExp *re, size_t length)
3150 REMatchState *result;
3151 UINT i;
3153 gData->backTrackStackSize = INITIAL_BACKTRACK;
3154 gData->backTrackStack = jsheap_alloc(gData->pool, INITIAL_BACKTRACK);
3155 if (!gData->backTrackStack)
3156 goto bad;
3158 gData->backTrackSP = gData->backTrackStack;
3159 gData->cursz = 0;
3160 gData->backTrackCount = 0;
3161 gData->backTrackLimit = 0;
3163 gData->stateStackLimit = INITIAL_STATESTACK;
3164 gData->stateStack = jsheap_alloc(gData->pool, sizeof(REProgState) * INITIAL_STATESTACK);
3165 if (!gData->stateStack)
3166 goto bad;
3168 gData->stateStackTop = 0;
3169 gData->cx = cx;
3170 gData->regexp = re;
3171 gData->ok = TRUE;
3173 result = jsheap_alloc(gData->pool, offsetof(REMatchState, parens) + re->parenCount * sizeof(RECapture));
3174 if (!result)
3175 goto bad;
3177 for (i = 0; i < re->classCount; i++) {
3178 if (!re->classList[i].converted &&
3179 !ProcessCharSet(gData, &re->classList[i])) {
3180 return NULL;
3184 return result;
3186 bad:
3187 js_ReportOutOfScriptQuota(cx);
3188 gData->ok = FALSE;
3189 return NULL;
3192 static void
3193 js_DestroyRegExp(JSRegExp *re)
3195 if (re->classList) {
3196 UINT i;
3197 for (i = 0; i < re->classCount; i++) {
3198 if (re->classList[i].converted)
3199 heap_free(re->classList[i].u.bits);
3200 re->classList[i].u.bits = NULL;
3202 heap_free(re->classList);
3204 heap_free(re);
3207 static JSRegExp *
3208 js_NewRegExp(script_ctx_t *cx, BSTR str, UINT flags, BOOL flat)
3210 JSRegExp *re;
3211 jsheap_t *mark;
3212 CompilerState state;
3213 size_t resize;
3214 jsbytecode *endPC;
3215 UINT i;
3216 size_t len;
3218 re = NULL;
3219 mark = jsheap_mark(&cx->tmp_heap);
3220 len = SysStringLen(str);
3222 state.context = cx;
3223 state.cp = str;
3224 if (!state.cp)
3225 goto out;
3226 state.cpbegin = state.cp;
3227 state.cpend = state.cp + len;
3228 state.flags = flags;
3229 state.parenCount = 0;
3230 state.classCount = 0;
3231 state.progLength = 0;
3232 state.treeDepth = 0;
3233 state.classBitmapsMem = 0;
3234 for (i = 0; i < CLASS_CACHE_SIZE; i++)
3235 state.classCache[i].start = NULL;
3237 if (len != 0 && flat) {
3238 state.result = NewRENode(&state, REOP_FLAT);
3239 if (!state.result)
3240 goto out;
3241 state.result->u.flat.chr = *state.cpbegin;
3242 state.result->u.flat.length = len;
3243 state.result->kid = (void *) state.cpbegin;
3244 /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
3245 state.progLength += 1 + GetCompactIndexWidth(0)
3246 + GetCompactIndexWidth(len);
3247 } else {
3248 if (!ParseRegExp(&state))
3249 goto out;
3251 resize = offsetof(JSRegExp, program) + state.progLength + 1;
3252 re = heap_alloc(resize);
3253 if (!re)
3254 goto out;
3256 assert(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
3257 re->classCount = state.classCount;
3258 if (re->classCount) {
3259 re->classList = heap_alloc(re->classCount * sizeof(RECharSet));
3260 if (!re->classList) {
3261 js_DestroyRegExp(re);
3262 re = NULL;
3263 goto out;
3265 for (i = 0; i < re->classCount; i++)
3266 re->classList[i].converted = FALSE;
3267 } else {
3268 re->classList = NULL;
3270 endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
3271 if (!endPC) {
3272 js_DestroyRegExp(re);
3273 re = NULL;
3274 goto out;
3276 *endPC++ = REOP_END;
3278 * Check whether size was overestimated and shrink using realloc.
3279 * This is safe since no pointers to newly parsed regexp or its parts
3280 * besides re exist here.
3282 if ((size_t)(endPC - re->program) != state.progLength + 1) {
3283 JSRegExp *tmp;
3284 assert((size_t)(endPC - re->program) < state.progLength + 1);
3285 resize = offsetof(JSRegExp, program) + (endPC - re->program);
3286 tmp = heap_realloc(re, resize);
3287 if (tmp)
3288 re = tmp;
3291 re->flags = flags;
3292 re->parenCount = state.parenCount;
3293 re->source = str;
3295 out:
3296 jsheap_clear(mark);
3297 return re;
3300 static HRESULT do_regexp_match_next(RegExpInstance *regexp, const WCHAR *str, DWORD len,
3301 const WCHAR **cp, match_result_t **parens, DWORD *parens_size, DWORD *parens_cnt, match_result_t *ret)
3303 REMatchState *x, *result;
3304 REGlobalData gData;
3305 DWORD matchlen;
3307 gData.cpbegin = *cp;
3308 gData.cpend = str + len;
3309 gData.start = *cp-str;
3310 gData.skipped = 0;
3311 gData.pool = &regexp->dispex.ctx->tmp_heap;
3313 x = InitMatch(NULL, &gData, regexp->jsregexp, gData.cpend - gData.cpbegin);
3314 if(!x) {
3315 WARN("InitMatch failed\n");
3316 return E_FAIL;
3319 x->cp = *cp;
3320 result = MatchRegExp(&gData, x);
3321 if(!gData.ok) {
3322 WARN("MatchRegExp failed\n");
3323 return E_FAIL;
3326 if(!result)
3327 return S_FALSE;
3329 if(parens) {
3330 DWORD i;
3332 if(regexp->jsregexp->parenCount > *parens_size) {
3333 match_result_t *new_parens;
3335 if(*parens)
3336 new_parens = heap_realloc(*parens, sizeof(match_result_t)*regexp->jsregexp->parenCount);
3337 else
3338 new_parens = heap_alloc(sizeof(match_result_t)*regexp->jsregexp->parenCount);
3339 if(!new_parens)
3340 return E_OUTOFMEMORY;
3342 *parens = new_parens;
3345 *parens_cnt = regexp->jsregexp->parenCount;
3347 for(i=0; i < regexp->jsregexp->parenCount; i++) {
3348 (*parens)[i].str = *cp + result->parens[i].index;
3349 (*parens)[i].len = result->parens[i].length;
3353 matchlen = (result->cp-*cp) - gData.skipped;
3354 *cp = result->cp;
3355 ret->str = result->cp-matchlen;
3356 ret->len = matchlen;
3358 return S_OK;
3361 HRESULT regexp_match_next(DispatchEx *dispex, BOOL gcheck, const WCHAR *str, DWORD len,
3362 const WCHAR **cp, match_result_t **parens, DWORD *parens_size, DWORD *parens_cnt, match_result_t *ret)
3364 RegExpInstance *regexp = (RegExpInstance*)dispex;
3365 jsheap_t *mark;
3366 HRESULT hres;
3368 if(gcheck && !(regexp->jsregexp->flags & JSREG_GLOB))
3369 return S_FALSE;
3371 mark = jsheap_mark(&regexp->dispex.ctx->tmp_heap);
3373 hres = do_regexp_match_next(regexp, str, len, cp, parens, parens_size, parens_cnt, ret);
3375 jsheap_clear(mark);
3376 return hres;
3379 HRESULT regexp_match(DispatchEx *dispex, const WCHAR *str, DWORD len, BOOL gflag, match_result_t **match_result,
3380 DWORD *result_cnt)
3382 RegExpInstance *This = (RegExpInstance*)dispex;
3383 match_result_t *ret = NULL, cres;
3384 const WCHAR *cp = str;
3385 DWORD i=0, ret_size = 0;
3386 jsheap_t *mark;
3387 HRESULT hres;
3389 mark = jsheap_mark(&This->dispex.ctx->tmp_heap);
3391 while(1) {
3392 hres = do_regexp_match_next(This, str, len, &cp, NULL, NULL, NULL, &cres);
3393 if(hres == S_FALSE) {
3394 hres = S_OK;
3395 break;
3398 if(FAILED(hres))
3399 return hres;
3401 if(ret_size == i) {
3402 if(ret)
3403 ret = heap_realloc(ret, (ret_size <<= 1) * sizeof(match_result_t));
3404 else
3405 ret = heap_alloc((ret_size=4) * sizeof(match_result_t));
3406 if(!ret) {
3407 hres = E_OUTOFMEMORY;
3408 break;
3412 ret[i++] = cres;
3414 if(!gflag && !(This->jsregexp->flags & JSREG_GLOB)) {
3415 hres = S_OK;
3416 break;
3420 jsheap_clear(mark);
3421 if(FAILED(hres)) {
3422 heap_free(ret);
3423 return hres;
3426 *match_result = ret;
3427 *result_cnt = i;
3428 return S_OK;
3431 static HRESULT RegExp_source(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3432 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3434 FIXME("\n");
3435 return E_NOTIMPL;
3438 static HRESULT RegExp_global(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3439 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3441 FIXME("\n");
3442 return E_NOTIMPL;
3445 static HRESULT RegExp_ignoreCase(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3446 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3448 FIXME("\n");
3449 return E_NOTIMPL;
3452 static HRESULT RegExp_multiline(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3453 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3455 FIXME("\n");
3456 return E_NOTIMPL;
3459 static HRESULT RegExp_lastIndex(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3460 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3462 FIXME("\n");
3463 return E_NOTIMPL;
3466 static HRESULT RegExp_toString(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3467 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3469 FIXME("\n");
3470 return E_NOTIMPL;
3473 static HRESULT RegExp_toLocaleString(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3474 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3476 FIXME("\n");
3477 return E_NOTIMPL;
3480 static HRESULT RegExp_hasOwnProperty(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3481 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3483 FIXME("\n");
3484 return E_NOTIMPL;
3487 static HRESULT RegExp_propertyIsEnumerable(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3488 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3490 FIXME("\n");
3491 return E_NOTIMPL;
3494 static HRESULT RegExp_isPrototypeOf(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3495 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3497 FIXME("\n");
3498 return E_NOTIMPL;
3501 static HRESULT RegExp_exec(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3502 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3504 FIXME("\n");
3505 return E_NOTIMPL;
3508 static HRESULT RegExp_value(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3509 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3511 FIXME("\n");
3512 return E_NOTIMPL;
3515 static void RegExp_destructor(DispatchEx *dispex)
3517 RegExpInstance *This = (RegExpInstance*)dispex;
3519 if(This->jsregexp)
3520 js_DestroyRegExp(This->jsregexp);
3521 SysFreeString(This->str);
3522 heap_free(This);
3525 static const builtin_prop_t RegExp_props[] = {
3526 {execW, RegExp_exec, PROPF_METHOD},
3527 {globalW, RegExp_global, 0},
3528 {hasOwnPropertyW, RegExp_hasOwnProperty, PROPF_METHOD},
3529 {ignoreCaseW, RegExp_ignoreCase, 0},
3530 {isPrototypeOfW, RegExp_isPrototypeOf, PROPF_METHOD},
3531 {lastIndexW, RegExp_lastIndex, 0},
3532 {multilineW, RegExp_multiline, 0},
3533 {propertyIsEnumerableW, RegExp_propertyIsEnumerable, PROPF_METHOD},
3534 {sourceW, RegExp_source, 0},
3535 {toLocaleStringW, RegExp_toLocaleString, PROPF_METHOD},
3536 {toStringW, RegExp_toString, PROPF_METHOD}
3539 static const builtin_info_t RegExp_info = {
3540 JSCLASS_REGEXP,
3541 {NULL, RegExp_value, 0},
3542 sizeof(RegExp_props)/sizeof(*RegExp_props),
3543 RegExp_props,
3544 RegExp_destructor,
3545 NULL
3548 static HRESULT alloc_regexp(script_ctx_t *ctx, BOOL use_constr, RegExpInstance **ret)
3550 RegExpInstance *regexp;
3551 HRESULT hres;
3553 regexp = heap_alloc_zero(sizeof(RegExpInstance));
3554 if(!regexp)
3555 return E_OUTOFMEMORY;
3557 if(use_constr)
3558 hres = init_dispex_from_constr(&regexp->dispex, ctx, &RegExp_info, ctx->regexp_constr);
3559 else
3560 hres = init_dispex(&regexp->dispex, ctx, &RegExp_info, NULL);
3562 if(FAILED(hres)) {
3563 heap_free(regexp);
3564 return hres;
3567 *ret = regexp;
3568 return S_OK;
3571 static HRESULT create_regexp(script_ctx_t *ctx, const WCHAR *exp, int len, DWORD flags, DispatchEx **ret)
3573 RegExpInstance *regexp;
3574 HRESULT hres;
3576 TRACE("%s %x\n", debugstr_w(exp), flags);
3578 hres = alloc_regexp(ctx, TRUE, &regexp);
3579 if(FAILED(hres))
3580 return hres;
3582 if(len == -1)
3583 regexp->str = SysAllocString(exp);
3584 else
3585 regexp->str = SysAllocStringLen(exp, len);
3586 if(!regexp->str) {
3587 jsdisp_release(&regexp->dispex);
3588 return E_OUTOFMEMORY;
3591 regexp->jsregexp = js_NewRegExp(ctx, regexp->str, flags, FALSE);
3592 if(!regexp->jsregexp) {
3593 WARN("js_NewRegExp failed\n");
3594 jsdisp_release(&regexp->dispex);
3595 return E_FAIL;
3598 *ret = &regexp->dispex;
3599 return S_OK;
3602 static HRESULT regexp_constructor(script_ctx_t *ctx, DISPPARAMS *dp, VARIANT *retv)
3604 const WCHAR *opt = emptyW, *src;
3605 DispatchEx *ret;
3606 VARIANT *arg;
3607 HRESULT hres;
3609 if(!arg_cnt(dp)) {
3610 FIXME("no args\n");
3611 return E_NOTIMPL;
3614 arg = get_arg(dp,0);
3615 if(V_VT(arg) == VT_DISPATCH) {
3616 DispatchEx *obj;
3618 obj = iface_to_jsdisp((IUnknown*)V_DISPATCH(arg));
3619 if(obj) {
3620 if(is_class(obj, JSCLASS_REGEXP)) {
3621 RegExpInstance *regexp = (RegExpInstance*)obj;
3623 hres = create_regexp(ctx, regexp->str, -1, regexp->jsregexp->flags, &ret);
3624 jsdisp_release(obj);
3625 if(FAILED(hres))
3626 return hres;
3628 V_VT(retv) = VT_DISPATCH;
3629 V_DISPATCH(retv) = (IDispatch*)_IDispatchEx_(ret);
3630 return S_OK;
3633 jsdisp_release(obj);
3637 if(V_VT(arg) != VT_BSTR) {
3638 FIXME("vt arg0 = %d\n", V_VT(arg));
3639 return E_NOTIMPL;
3642 src = V_BSTR(arg);
3644 if(arg_cnt(dp) >= 2) {
3645 arg = get_arg(dp,1);
3646 if(V_VT(arg) != VT_BSTR) {
3647 FIXME("unimplemented for vt %d\n", V_VT(arg));
3648 return E_NOTIMPL;
3651 opt = V_BSTR(arg);
3654 hres = create_regexp_str(ctx, src, -1, opt, strlenW(opt), &ret);
3655 if(FAILED(hres))
3656 return hres;
3658 V_VT(retv) = VT_DISPATCH;
3659 V_DISPATCH(retv) = (IDispatch*)_IDispatchEx_(ret);
3660 return S_OK;
3663 static HRESULT RegExpConstr_value(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3664 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3666 TRACE("\n");
3668 switch(flags) {
3669 case DISPATCH_CONSTRUCT:
3670 return regexp_constructor(dispex->ctx, dp, retv);
3671 default:
3672 FIXME("unimplemented flags: %x\n", flags);
3673 return E_NOTIMPL;
3676 return S_OK;
3679 HRESULT create_regexp_constr(script_ctx_t *ctx, DispatchEx **ret)
3681 RegExpInstance *regexp;
3682 HRESULT hres;
3684 hres = alloc_regexp(ctx, FALSE, &regexp);
3685 if(FAILED(hres))
3686 return hres;
3688 hres = create_builtin_function(ctx, RegExpConstr_value, PROPF_CONSTR, &regexp->dispex, ret);
3690 jsdisp_release(&regexp->dispex);
3691 return hres;
3694 HRESULT create_regexp_str(script_ctx_t *ctx, const WCHAR *exp, DWORD exp_len, const WCHAR *opt,
3695 DWORD opt_len, DispatchEx **ret)
3697 const WCHAR *p;
3698 DWORD flags = 0;
3700 if(opt) {
3701 for (p = opt; p < opt+opt_len; p++) {
3702 switch (*p) {
3703 case 'g':
3704 flags |= JSREG_GLOB;
3705 break;
3706 case 'i':
3707 flags |= JSREG_FOLD;
3708 break;
3709 case 'm':
3710 flags |= JSREG_MULTILINE;
3711 break;
3712 case 'y':
3713 flags |= JSREG_STICKY;
3714 break;
3715 default:
3716 WARN("wrong flag %c\n", *p);
3717 return E_FAIL;
3722 return create_regexp(ctx, exp, exp_len, flags, ret);