ntoskrnl.exe: Add stub for IoQueryDeviceDescription.
[wine/multimedia.git] / dlls / jscript / regexp.c
blobcc38c49ba4e877f4468b43280fac0ac3b08c8b18
1 /*
2 * Copyright 2008 Jacek Caban for CodeWeavers
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20 * Code in this file is based on files:
21 * js/src/jsregexp.h
22 * js/src/jsregexp.c
23 * from Mozilla project, released under LGPL 2.1 or later.
25 * The Original Code is Mozilla Communicator client code, released
26 * March 31, 1998.
28 * The Initial Developer of the Original Code is
29 * Netscape Communications Corporation.
30 * Portions created by the Initial Developer are Copyright (C) 1998
31 * the Initial Developer. All Rights Reserved.
34 #include <assert.h>
36 #include "jscript.h"
38 #include "wine/debug.h"
40 WINE_DEFAULT_DEBUG_CHANNEL(jscript);
42 #define JSREG_FOLD 0x01 /* fold uppercase to lowercase */
43 #define JSREG_GLOB 0x02 /* global exec, creates array of matches */
44 #define JSREG_MULTILINE 0x04 /* treat ^ and $ as begin and end of line */
45 #define JSREG_STICKY 0x08 /* only match starting at lastIndex */
47 typedef BYTE JSPackedBool;
48 typedef BYTE jsbytecode;
51 * This struct holds a bitmap representation of a class from a regexp.
52 * There's a list of these referenced by the classList field in the JSRegExp
53 * struct below. The initial state has startIndex set to the offset in the
54 * original regexp source of the beginning of the class contents. The first
55 * use of the class converts the source representation into a bitmap.
58 typedef struct RECharSet {
59 JSPackedBool converted;
60 JSPackedBool sense;
61 WORD length;
62 union {
63 BYTE *bits;
64 struct {
65 size_t startIndex;
66 size_t length;
67 } src;
68 } u;
69 } RECharSet;
71 typedef struct {
72 WORD flags; /* flags, see jsapi.h's JSREG_* defines */
73 size_t parenCount; /* number of parenthesized submatches */
74 size_t classCount; /* count [...] bitmaps */
75 RECharSet *classList; /* list of [...] bitmaps */
76 BSTR source; /* locked source string, sans // */
77 jsbytecode program[1]; /* regular expression bytecode */
78 } JSRegExp;
80 typedef struct {
81 DispatchEx dispex;
83 JSRegExp *jsregexp;
84 BSTR str;
85 } RegExpInstance;
87 static const WCHAR sourceW[] = {'s','o','u','r','c','e',0};
88 static const WCHAR globalW[] = {'g','l','o','b','a','l',0};
89 static const WCHAR ignoreCaseW[] = {'i','g','n','o','r','e','C','a','s','e',0};
90 static const WCHAR multilineW[] = {'m','u','l','t','i','l','i','n','e',0};
91 static const WCHAR lastIndexW[] = {'l','a','s','t','I','n','d','e','x',0};
92 static const WCHAR toStringW[] = {'t','o','S','t','r','i','n','g',0};
93 static const WCHAR execW[] = {'e','x','e','c',0};
94 static const WCHAR testW[] = {'t','e','s','t',0};
96 static const WCHAR emptyW[] = {0};
98 /* FIXME: Better error handling */
99 #define ReportRegExpError(a,b,c)
100 #define ReportRegExpErrorHelper(a,b,c,d)
101 #define JS_ReportErrorNumber(a,b,c,d)
102 #define JS_ReportErrorFlagsAndNumber(a,b,c,d,e,f)
103 #define js_ReportOutOfScriptQuota(a)
104 #define JS_ReportOutOfMemory(a)
105 #define JS_COUNT_OPERATION(a,b)
107 #define JSMSG_MIN_TOO_BIG 47
108 #define JSMSG_MAX_TOO_BIG 48
109 #define JSMSG_OUT_OF_ORDER 49
110 #define JSMSG_OUT_OF_MEMORY 137
112 #define LINE_SEPARATOR 0x2028
113 #define PARA_SEPARATOR 0x2029
115 #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
116 ((c >= 'a') && (c <= 'z')) )
117 #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
118 (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
120 #define JS_ISWORD(c) ((c) < 128 && (isalnum(c) || (c) == '_'))
122 #define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9)
123 #define JS7_UNDEC(c) ((c) - '0')
125 typedef enum REOp {
126 REOP_EMPTY,
127 REOP_BOL,
128 REOP_EOL,
129 REOP_WBDRY,
130 REOP_WNONBDRY,
131 REOP_DOT,
132 REOP_DIGIT,
133 REOP_NONDIGIT,
134 REOP_ALNUM,
135 REOP_NONALNUM,
136 REOP_SPACE,
137 REOP_NONSPACE,
138 REOP_BACKREF,
139 REOP_FLAT,
140 REOP_FLAT1,
141 REOP_FLATi,
142 REOP_FLAT1i,
143 REOP_UCFLAT1,
144 REOP_UCFLAT1i,
145 REOP_UCFLAT,
146 REOP_UCFLATi,
147 REOP_CLASS,
148 REOP_NCLASS,
149 REOP_ALT,
150 REOP_QUANT,
151 REOP_STAR,
152 REOP_PLUS,
153 REOP_OPT,
154 REOP_LPAREN,
155 REOP_RPAREN,
156 REOP_JUMP,
157 REOP_DOTSTAR,
158 REOP_LPARENNON,
159 REOP_ASSERT,
160 REOP_ASSERT_NOT,
161 REOP_ASSERTTEST,
162 REOP_ASSERTNOTTEST,
163 REOP_MINIMALSTAR,
164 REOP_MINIMALPLUS,
165 REOP_MINIMALOPT,
166 REOP_MINIMALQUANT,
167 REOP_ENDCHILD,
168 REOP_REPEAT,
169 REOP_MINIMALREPEAT,
170 REOP_ALTPREREQ,
171 REOP_ALTPREREQ2,
172 REOP_ENDALT,
173 REOP_CONCAT,
174 REOP_END,
175 REOP_LIMIT /* META: no operator >= to this */
176 } REOp;
178 #define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
180 static const char *reop_names[] = {
181 "empty",
182 "bol",
183 "eol",
184 "wbdry",
185 "wnonbdry",
186 "dot",
187 "digit",
188 "nondigit",
189 "alnum",
190 "nonalnum",
191 "space",
192 "nonspace",
193 "backref",
194 "flat",
195 "flat1",
196 "flati",
197 "flat1i",
198 "ucflat1",
199 "ucflat1i",
200 "ucflat",
201 "ucflati",
202 "class",
203 "nclass",
204 "alt",
205 "quant",
206 "star",
207 "plus",
208 "opt",
209 "lparen",
210 "rparen",
211 "jump",
212 "dotstar",
213 "lparennon",
214 "assert",
215 "assert_not",
216 "asserttest",
217 "assertnottest",
218 "minimalstar",
219 "minimalplus",
220 "minimalopt",
221 "minimalquant",
222 "endchild",
223 "repeat",
224 "minimalrepeat",
225 "altprereq",
226 "alrprereq2",
227 "endalt",
228 "concat",
229 "end",
230 NULL
233 typedef struct RECapture {
234 ptrdiff_t index; /* start of contents, -1 for empty */
235 size_t length; /* length of capture */
236 } RECapture;
238 typedef struct REMatchState {
239 const WCHAR *cp;
240 RECapture parens[1]; /* first of 're->parenCount' captures,
241 allocated at end of this struct */
242 } REMatchState;
244 typedef struct REProgState {
245 jsbytecode *continue_pc; /* current continuation data */
246 jsbytecode continue_op;
247 ptrdiff_t index; /* progress in text */
248 size_t parenSoFar; /* highest indexed paren started */
249 union {
250 struct {
251 UINT min; /* current quantifier limits */
252 UINT max;
253 } quantifier;
254 struct {
255 size_t top; /* backtrack stack state */
256 size_t sz;
257 } assertion;
258 } u;
259 } REProgState;
261 typedef struct REBackTrackData {
262 size_t sz; /* size of previous stack entry */
263 jsbytecode *backtrack_pc; /* where to backtrack to */
264 jsbytecode backtrack_op;
265 const WCHAR *cp; /* index in text of match at backtrack */
266 size_t parenIndex; /* start index of saved paren contents */
267 size_t parenCount; /* # of saved paren contents */
268 size_t saveStateStackTop; /* number of parent states */
269 /* saved parent states follow */
270 /* saved paren contents follow */
271 } REBackTrackData;
273 #define INITIAL_STATESTACK 100
274 #define INITIAL_BACKTRACK 8000
276 typedef struct REGlobalData {
277 script_ctx_t *cx;
278 JSRegExp *regexp; /* the RE in execution */
279 BOOL ok; /* runtime error (out_of_memory only?) */
280 size_t start; /* offset to start at */
281 ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
282 const WCHAR *cpbegin; /* text base address */
283 const WCHAR *cpend; /* text limit address */
285 REProgState *stateStack; /* stack of state of current parents */
286 size_t stateStackTop;
287 size_t stateStackLimit;
289 REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
290 REBackTrackData *backTrackSP;
291 size_t backTrackStackSize;
292 size_t cursz; /* size of current stack entry */
293 size_t backTrackCount; /* how many times we've backtracked */
294 size_t backTrackLimit; /* upper limit on backtrack states */
296 jsheap_t *pool; /* It's faster to use one malloc'd pool
297 than to malloc/free the three items
298 that are allocated from this pool */
299 } REGlobalData;
301 typedef struct RENode RENode;
302 struct RENode {
303 REOp op; /* r.e. op bytecode */
304 RENode *next; /* next in concatenation order */
305 void *kid; /* first operand */
306 union {
307 void *kid2; /* second operand */
308 INT num; /* could be a number */
309 size_t parenIndex; /* or a parenthesis index */
310 struct { /* or a quantifier range */
311 UINT min;
312 UINT max;
313 JSPackedBool greedy;
314 } range;
315 struct { /* or a character class */
316 size_t startIndex;
317 size_t kidlen; /* length of string at kid, in jschars */
318 size_t index; /* index into class list */
319 WORD bmsize; /* bitmap size, based on max char code */
320 JSPackedBool sense;
321 } ucclass;
322 struct { /* or a literal sequence */
323 WCHAR chr; /* of one character */
324 size_t length; /* or many (via the kid) */
325 } flat;
326 struct {
327 RENode *kid2; /* second operand from ALT */
328 WCHAR ch1; /* match char for ALTPREREQ */
329 WCHAR ch2; /* ditto, or class index for ALTPREREQ2 */
330 } altprereq;
331 } u;
334 #define CLASS_CACHE_SIZE 4
336 typedef struct CompilerState {
337 script_ctx_t *context;
338 const WCHAR *cpbegin;
339 const WCHAR *cpend;
340 const WCHAR *cp;
341 size_t parenCount;
342 size_t classCount; /* number of [] encountered */
343 size_t treeDepth; /* maximum depth of parse tree */
344 size_t progLength; /* estimated bytecode length */
345 RENode *result;
346 size_t classBitmapsMem; /* memory to hold all class bitmaps */
347 struct {
348 const WCHAR *start; /* small cache of class strings */
349 size_t length; /* since they're often the same */
350 size_t index;
351 } classCache[CLASS_CACHE_SIZE];
352 WORD flags;
353 } CompilerState;
355 typedef struct EmitStateStackEntry {
356 jsbytecode *altHead; /* start of REOP_ALT* opcode */
357 jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */
358 jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
359 jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */
360 RENode *continueNode; /* original REOP_ALT* node being stacked */
361 jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */
362 JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to
363 avoid 16-bit unsigned offset overflow */
364 } EmitStateStackEntry;
367 * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
368 * the getters and setters take the pc of the offset, not of the opcode before
369 * the offset.
371 #define ARG_LEN 2
372 #define GET_ARG(pc) ((WORD)(((pc)[0] << 8) | (pc)[1]))
373 #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
374 (pc)[1] = (jsbytecode) (arg))
376 #define OFFSET_LEN ARG_LEN
377 #define OFFSET_MAX ((1 << (ARG_LEN * 8)) - 1)
378 #define GET_OFFSET(pc) GET_ARG(pc)
380 static BOOL ParseRegExp(CompilerState*);
383 * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
384 * For sanity, we limit it to 2^24 bytes.
386 #define TREE_DEPTH_MAX ((1 << 24) / sizeof(EmitStateStackEntry))
389 * The maximum memory that can be allocated for class bitmaps.
390 * For sanity, we limit it to 2^24 bytes.
392 #define CLASS_BITMAPS_MEM_LIMIT (1 << 24)
395 * Functions to get size and write/read bytecode that represent small indexes
396 * compactly.
397 * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
398 * indicates that the following byte brings more bits to the index. Otherwise
399 * this is the last byte in the index bytecode representing highest index bits.
401 static size_t
402 GetCompactIndexWidth(size_t index)
404 size_t width;
406 for (width = 1; (index >>= 7) != 0; ++width) { }
407 return width;
410 static inline jsbytecode *
411 WriteCompactIndex(jsbytecode *pc, size_t index)
413 size_t next;
415 while ((next = index >> 7) != 0) {
416 *pc++ = (jsbytecode)(index | 0x80);
417 index = next;
419 *pc++ = (jsbytecode)index;
420 return pc;
423 static inline jsbytecode *
424 ReadCompactIndex(jsbytecode *pc, size_t *result)
426 size_t nextByte;
428 nextByte = *pc++;
429 if ((nextByte & 0x80) == 0) {
431 * Short-circuit the most common case when compact index <= 127.
433 *result = nextByte;
434 } else {
435 size_t shift = 7;
436 *result = 0x7F & nextByte;
437 do {
438 nextByte = *pc++;
439 *result |= (nextByte & 0x7F) << shift;
440 shift += 7;
441 } while ((nextByte & 0x80) != 0);
443 return pc;
446 /* Construct and initialize an RENode, returning NULL for out-of-memory */
447 static RENode *
448 NewRENode(CompilerState *state, REOp op)
450 RENode *ren;
452 ren = jsheap_alloc(&state->context->tmp_heap, sizeof(*ren));
453 if (!ren) {
454 /* js_ReportOutOfScriptQuota(cx); */
455 return NULL;
457 ren->op = op;
458 ren->next = NULL;
459 ren->kid = NULL;
460 return ren;
464 * Validates and converts hex ascii value.
466 static BOOL
467 isASCIIHexDigit(WCHAR c, UINT *digit)
469 UINT cv = c;
471 if (cv < '0')
472 return FALSE;
473 if (cv <= '9') {
474 *digit = cv - '0';
475 return TRUE;
477 cv |= 0x20;
478 if (cv >= 'a' && cv <= 'f') {
479 *digit = cv - 'a' + 10;
480 return TRUE;
482 return FALSE;
485 typedef struct {
486 REOp op;
487 const WCHAR *errPos;
488 size_t parenIndex;
489 } REOpData;
491 #define JUMP_OFFSET_HI(off) ((jsbytecode)((off) >> 8))
492 #define JUMP_OFFSET_LO(off) ((jsbytecode)(off))
494 static BOOL
495 SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
497 ptrdiff_t offset = target - jump;
499 /* Check that target really points forward. */
500 assert(offset >= 2);
501 if ((size_t)offset > OFFSET_MAX)
502 return FALSE;
504 jump[0] = JUMP_OFFSET_HI(offset);
505 jump[1] = JUMP_OFFSET_LO(offset);
506 return TRUE;
510 * Generate bytecode for the tree rooted at t using an explicit stack instead
511 * of recursion.
513 static jsbytecode *
514 EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
515 jsbytecode *pc, RENode *t)
517 EmitStateStackEntry *emitStateSP, *emitStateStack;
518 RECharSet *charSet;
519 REOp op;
521 if (treeDepth == 0) {
522 emitStateStack = NULL;
523 } else {
524 emitStateStack = heap_alloc(sizeof(EmitStateStackEntry) * treeDepth);
525 if (!emitStateStack)
526 return NULL;
528 emitStateSP = emitStateStack;
529 op = t->op;
530 assert(op < REOP_LIMIT);
532 for (;;) {
533 *pc++ = op;
534 switch (op) {
535 case REOP_EMPTY:
536 --pc;
537 break;
539 case REOP_ALTPREREQ2:
540 case REOP_ALTPREREQ:
541 assert(emitStateSP);
542 emitStateSP->altHead = pc - 1;
543 emitStateSP->endTermFixup = pc;
544 pc += OFFSET_LEN;
545 SET_ARG(pc, t->u.altprereq.ch1);
546 pc += ARG_LEN;
547 SET_ARG(pc, t->u.altprereq.ch2);
548 pc += ARG_LEN;
550 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
551 pc += OFFSET_LEN;
553 emitStateSP->continueNode = t;
554 emitStateSP->continueOp = REOP_JUMP;
555 emitStateSP->jumpToJumpFlag = FALSE;
556 ++emitStateSP;
557 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
558 t = t->kid;
559 op = t->op;
560 assert(op < REOP_LIMIT);
561 continue;
563 case REOP_JUMP:
564 emitStateSP->nextTermFixup = pc; /* offset to following term */
565 pc += OFFSET_LEN;
566 if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
567 goto jump_too_big;
568 emitStateSP->continueOp = REOP_ENDALT;
569 ++emitStateSP;
570 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
571 t = t->u.kid2;
572 op = t->op;
573 assert(op < REOP_LIMIT);
574 continue;
576 case REOP_ENDALT:
578 * If we already patched emitStateSP->nextTermFixup to jump to
579 * a nearer jump, to avoid 16-bit immediate offset overflow, we
580 * are done here.
582 if (emitStateSP->jumpToJumpFlag)
583 break;
586 * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
587 * REOP_ENDALT is executed only on successful match of the last
588 * alternate in a group.
590 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
591 goto jump_too_big;
592 if (t->op != REOP_ALT) {
593 if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
594 goto jump_too_big;
598 * If the program is bigger than the REOP_JUMP offset range, then
599 * we must check for alternates before this one that are part of
600 * the same group, and fix up their jump offsets to target jumps
601 * close enough to fit in a 16-bit unsigned offset immediate.
603 if ((size_t)(pc - re->program) > OFFSET_MAX &&
604 emitStateSP > emitStateStack) {
605 EmitStateStackEntry *esp, *esp2;
606 jsbytecode *alt, *jump;
607 ptrdiff_t span, header;
609 esp2 = emitStateSP;
610 alt = esp2->altHead;
611 for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
612 if (esp->continueOp == REOP_ENDALT &&
613 !esp->jumpToJumpFlag &&
614 esp->nextTermFixup + OFFSET_LEN == alt &&
615 (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
616 ? esp->endTermFixup
617 : esp->nextTermFixup)) > OFFSET_MAX) {
618 alt = esp->altHead;
619 jump = esp->nextTermFixup;
622 * The span must be 1 less than the distance from
623 * jump offset to jump offset, so we actually jump
624 * to a REOP_JUMP bytecode, not to its offset!
626 for (;;) {
627 assert(jump < esp2->nextTermFixup);
628 span = esp2->nextTermFixup - jump - 1;
629 if ((size_t)span <= OFFSET_MAX)
630 break;
631 do {
632 if (--esp2 == esp)
633 goto jump_too_big;
634 } while (esp2->continueOp != REOP_ENDALT);
637 jump[0] = JUMP_OFFSET_HI(span);
638 jump[1] = JUMP_OFFSET_LO(span);
640 if (esp->continueNode->op != REOP_ALT) {
642 * We must patch the offset at esp->endTermFixup
643 * as well, for the REOP_ALTPREREQ{,2} opcodes.
644 * If we're unlucky and endTermFixup is more than
645 * OFFSET_MAX bytes from its target, we cheat by
646 * jumping 6 bytes to the jump whose offset is at
647 * esp->nextTermFixup, which has the same target.
649 jump = esp->endTermFixup;
650 header = esp->nextTermFixup - jump;
651 span += header;
652 if ((size_t)span > OFFSET_MAX)
653 span = header;
655 jump[0] = JUMP_OFFSET_HI(span);
656 jump[1] = JUMP_OFFSET_LO(span);
659 esp->jumpToJumpFlag = TRUE;
663 break;
665 case REOP_ALT:
666 assert(emitStateSP);
667 emitStateSP->altHead = pc - 1;
668 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
669 pc += OFFSET_LEN;
670 emitStateSP->continueNode = t;
671 emitStateSP->continueOp = REOP_JUMP;
672 emitStateSP->jumpToJumpFlag = FALSE;
673 ++emitStateSP;
674 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
675 t = t->kid;
676 op = t->op;
677 assert(op < REOP_LIMIT);
678 continue;
680 case REOP_FLAT:
682 * Coalesce FLATs if possible and if it would not increase bytecode
683 * beyond preallocated limit. The latter happens only when bytecode
684 * size for coalesced string with offset p and length 2 exceeds 6
685 * bytes preallocated for 2 single char nodes, i.e. when
686 * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
687 * GetCompactIndexWidth(p) > 4.
688 * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
689 * nodes strictly decreases bytecode size, the check has to be
690 * done only for the first coalescing.
692 if (t->kid &&
693 GetCompactIndexWidth((WCHAR*)t->kid - state->cpbegin) <= 4)
695 while (t->next &&
696 t->next->op == REOP_FLAT &&
697 (WCHAR*)t->kid + t->u.flat.length ==
698 t->next->kid) {
699 t->u.flat.length += t->next->u.flat.length;
700 t->next = t->next->next;
703 if (t->kid && t->u.flat.length > 1) {
704 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT;
705 pc = WriteCompactIndex(pc, (WCHAR*)t->kid - state->cpbegin);
706 pc = WriteCompactIndex(pc, t->u.flat.length);
707 } else if (t->u.flat.chr < 256) {
708 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
709 *pc++ = (jsbytecode) t->u.flat.chr;
710 } else {
711 pc[-1] = (state->flags & JSREG_FOLD)
712 ? REOP_UCFLAT1i
713 : REOP_UCFLAT1;
714 SET_ARG(pc, t->u.flat.chr);
715 pc += ARG_LEN;
717 break;
719 case REOP_LPAREN:
720 assert(emitStateSP);
721 pc = WriteCompactIndex(pc, t->u.parenIndex);
722 emitStateSP->continueNode = t;
723 emitStateSP->continueOp = REOP_RPAREN;
724 ++emitStateSP;
725 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
726 t = t->kid;
727 op = t->op;
728 continue;
730 case REOP_RPAREN:
731 pc = WriteCompactIndex(pc, t->u.parenIndex);
732 break;
734 case REOP_BACKREF:
735 pc = WriteCompactIndex(pc, t->u.parenIndex);
736 break;
738 case REOP_ASSERT:
739 assert(emitStateSP);
740 emitStateSP->nextTermFixup = pc;
741 pc += OFFSET_LEN;
742 emitStateSP->continueNode = t;
743 emitStateSP->continueOp = REOP_ASSERTTEST;
744 ++emitStateSP;
745 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
746 t = t->kid;
747 op = t->op;
748 continue;
750 case REOP_ASSERTTEST:
751 case REOP_ASSERTNOTTEST:
752 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
753 goto jump_too_big;
754 break;
756 case REOP_ASSERT_NOT:
757 assert(emitStateSP);
758 emitStateSP->nextTermFixup = pc;
759 pc += OFFSET_LEN;
760 emitStateSP->continueNode = t;
761 emitStateSP->continueOp = REOP_ASSERTNOTTEST;
762 ++emitStateSP;
763 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
764 t = t->kid;
765 op = t->op;
766 continue;
768 case REOP_QUANT:
769 assert(emitStateSP);
770 if (t->u.range.min == 0 && t->u.range.max == (UINT)-1) {
771 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
772 } else if (t->u.range.min == 0 && t->u.range.max == 1) {
773 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
774 } else if (t->u.range.min == 1 && t->u.range.max == (UINT) -1) {
775 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
776 } else {
777 if (!t->u.range.greedy)
778 pc[-1] = REOP_MINIMALQUANT;
779 pc = WriteCompactIndex(pc, t->u.range.min);
781 * Write max + 1 to avoid using size_t(max) + 1 bytes
782 * for (UINT)-1 sentinel.
784 pc = WriteCompactIndex(pc, t->u.range.max + 1);
786 emitStateSP->nextTermFixup = pc;
787 pc += OFFSET_LEN;
788 emitStateSP->continueNode = t;
789 emitStateSP->continueOp = REOP_ENDCHILD;
790 ++emitStateSP;
791 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
792 t = t->kid;
793 op = t->op;
794 continue;
796 case REOP_ENDCHILD:
797 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
798 goto jump_too_big;
799 break;
801 case REOP_CLASS:
802 if (!t->u.ucclass.sense)
803 pc[-1] = REOP_NCLASS;
804 pc = WriteCompactIndex(pc, t->u.ucclass.index);
805 charSet = &re->classList[t->u.ucclass.index];
806 charSet->converted = FALSE;
807 charSet->length = t->u.ucclass.bmsize;
808 charSet->u.src.startIndex = t->u.ucclass.startIndex;
809 charSet->u.src.length = t->u.ucclass.kidlen;
810 charSet->sense = t->u.ucclass.sense;
811 break;
813 default:
814 break;
817 t = t->next;
818 if (t) {
819 op = t->op;
820 } else {
821 if (emitStateSP == emitStateStack)
822 break;
823 --emitStateSP;
824 t = emitStateSP->continueNode;
825 op = (REOp) emitStateSP->continueOp;
829 cleanup:
830 heap_free(emitStateStack);
831 return pc;
833 jump_too_big:
834 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
835 pc = NULL;
836 goto cleanup;
840 * Process the op against the two top operands, reducing them to a single
841 * operand in the penultimate slot. Update progLength and treeDepth.
843 static BOOL
844 ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
845 INT operandSP)
847 RENode *result;
849 switch (opData->op) {
850 case REOP_ALT:
851 result = NewRENode(state, REOP_ALT);
852 if (!result)
853 return FALSE;
854 result->kid = operandStack[operandSP - 2];
855 result->u.kid2 = operandStack[operandSP - 1];
856 operandStack[operandSP - 2] = result;
858 if (state->treeDepth == TREE_DEPTH_MAX) {
859 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
860 return FALSE;
862 ++state->treeDepth;
865 * Look at both alternates to see if there's a FLAT or a CLASS at
866 * the start of each. If so, use a prerequisite match.
868 if (((RENode *) result->kid)->op == REOP_FLAT &&
869 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
870 (state->flags & JSREG_FOLD) == 0) {
871 result->op = REOP_ALTPREREQ;
872 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
873 result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
874 /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
875 JUMP, <end> ... ENDALT */
876 state->progLength += 13;
878 else
879 if (((RENode *) result->kid)->op == REOP_CLASS &&
880 ((RENode *) result->kid)->u.ucclass.index < 256 &&
881 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
882 (state->flags & JSREG_FOLD) == 0) {
883 result->op = REOP_ALTPREREQ2;
884 result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
885 result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
886 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
887 JUMP, <end> ... ENDALT */
888 state->progLength += 13;
890 else
891 if (((RENode *) result->kid)->op == REOP_FLAT &&
892 ((RENode *) result->u.kid2)->op == REOP_CLASS &&
893 ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
894 (state->flags & JSREG_FOLD) == 0) {
895 result->op = REOP_ALTPREREQ2;
896 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
897 result->u.altprereq.ch2 =
898 ((RENode *) result->u.kid2)->u.ucclass.index;
899 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
900 JUMP, <end> ... ENDALT */
901 state->progLength += 13;
903 else {
904 /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
905 state->progLength += 7;
907 break;
909 case REOP_CONCAT:
910 result = operandStack[operandSP - 2];
911 while (result->next)
912 result = result->next;
913 result->next = operandStack[operandSP - 1];
914 break;
916 case REOP_ASSERT:
917 case REOP_ASSERT_NOT:
918 case REOP_LPARENNON:
919 case REOP_LPAREN:
920 /* These should have been processed by a close paren. */
921 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
922 opData->errPos);
923 return FALSE;
925 default:;
927 return TRUE;
931 * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
932 * its being on the stack, and to propagate errors to its callers.
934 #define JSREG_FIND_PAREN_COUNT 0x8000
935 #define JSREG_FIND_PAREN_ERROR 0x4000
938 * Magic return value from FindParenCount and GetDecimalValue, to indicate
939 * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
940 * its findMax parameter is non-null.
942 #define OVERFLOW_VALUE ((UINT)-1)
944 static UINT
945 FindParenCount(CompilerState *state)
947 CompilerState temp;
948 int i;
950 if (state->flags & JSREG_FIND_PAREN_COUNT)
951 return OVERFLOW_VALUE;
954 * Copy state into temp, flag it so we never report an invalid backref,
955 * and reset its members to parse the entire regexp. This is obviously
956 * suboptimal, but GetDecimalValue calls us only if a backref appears to
957 * refer to a forward parenthetical, which is rare.
959 temp = *state;
960 temp.flags |= JSREG_FIND_PAREN_COUNT;
961 temp.cp = temp.cpbegin;
962 temp.parenCount = 0;
963 temp.classCount = 0;
964 temp.progLength = 0;
965 temp.treeDepth = 0;
966 temp.classBitmapsMem = 0;
967 for (i = 0; i < CLASS_CACHE_SIZE; i++)
968 temp.classCache[i].start = NULL;
970 if (!ParseRegExp(&temp)) {
971 state->flags |= JSREG_FIND_PAREN_ERROR;
972 return OVERFLOW_VALUE;
974 return temp.parenCount;
978 * Extract and return a decimal value at state->cp. The initial character c
979 * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
980 * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
981 * state->flags to discover whether an error occurred under findMax.
983 static UINT
984 GetDecimalValue(WCHAR c, UINT max, UINT (*findMax)(CompilerState *state),
985 CompilerState *state)
987 UINT value = JS7_UNDEC(c);
988 BOOL overflow = (value > max && (!findMax || value > findMax(state)));
990 /* The following restriction allows simpler overflow checks. */
991 assert(max <= ((UINT)-1 - 9) / 10);
992 while (state->cp < state->cpend) {
993 c = *state->cp;
994 if (!JS7_ISDEC(c))
995 break;
996 value = 10 * value + JS7_UNDEC(c);
997 if (!overflow && value > max && (!findMax || value > findMax(state)))
998 overflow = TRUE;
999 ++state->cp;
1001 return overflow ? OVERFLOW_VALUE : value;
1005 * Calculate the total size of the bitmap required for a class expression.
1007 static BOOL
1008 CalculateBitmapSize(CompilerState *state, RENode *target, const WCHAR *src,
1009 const WCHAR *end)
1011 UINT max = 0;
1012 BOOL inRange = FALSE;
1013 WCHAR c, rangeStart = 0;
1014 UINT n, digit, nDigits, i;
1016 target->u.ucclass.bmsize = 0;
1017 target->u.ucclass.sense = TRUE;
1019 if (src == end)
1020 return TRUE;
1022 if (*src == '^') {
1023 ++src;
1024 target->u.ucclass.sense = FALSE;
1027 while (src != end) {
1028 BOOL canStartRange = TRUE;
1029 UINT localMax = 0;
1031 switch (*src) {
1032 case '\\':
1033 ++src;
1034 c = *src++;
1035 switch (c) {
1036 case 'b':
1037 localMax = 0x8;
1038 break;
1039 case 'f':
1040 localMax = 0xC;
1041 break;
1042 case 'n':
1043 localMax = 0xA;
1044 break;
1045 case 'r':
1046 localMax = 0xD;
1047 break;
1048 case 't':
1049 localMax = 0x9;
1050 break;
1051 case 'v':
1052 localMax = 0xB;
1053 break;
1054 case 'c':
1055 if (src < end && RE_IS_LETTER(*src)) {
1056 localMax = (UINT) (*src++) & 0x1F;
1057 } else {
1058 --src;
1059 localMax = '\\';
1061 break;
1062 case 'x':
1063 nDigits = 2;
1064 goto lexHex;
1065 case 'u':
1066 nDigits = 4;
1067 lexHex:
1068 n = 0;
1069 for (i = 0; (i < nDigits) && (src < end); i++) {
1070 c = *src++;
1071 if (!isASCIIHexDigit(c, &digit)) {
1073 * Back off to accepting the original
1074 *'\' as a literal.
1076 src -= i + 1;
1077 n = '\\';
1078 break;
1080 n = (n << 4) | digit;
1082 localMax = n;
1083 break;
1084 case 'd':
1085 canStartRange = FALSE;
1086 if (inRange) {
1087 JS_ReportErrorNumber(state->context,
1088 js_GetErrorMessage, NULL,
1089 JSMSG_BAD_CLASS_RANGE);
1090 return FALSE;
1092 localMax = '9';
1093 break;
1094 case 'D':
1095 case 's':
1096 case 'S':
1097 case 'w':
1098 case 'W':
1099 canStartRange = FALSE;
1100 if (inRange) {
1101 JS_ReportErrorNumber(state->context,
1102 js_GetErrorMessage, NULL,
1103 JSMSG_BAD_CLASS_RANGE);
1104 return FALSE;
1106 max = 65535;
1109 * If this is the start of a range, ensure that it's less than
1110 * the end.
1112 localMax = 0;
1113 break;
1114 case '0':
1115 case '1':
1116 case '2':
1117 case '3':
1118 case '4':
1119 case '5':
1120 case '6':
1121 case '7':
1123 * This is a non-ECMA extension - decimal escapes (in this
1124 * case, octal!) are supposed to be an error inside class
1125 * ranges, but supported here for backwards compatibility.
1128 n = JS7_UNDEC(c);
1129 c = *src;
1130 if ('0' <= c && c <= '7') {
1131 src++;
1132 n = 8 * n + JS7_UNDEC(c);
1133 c = *src;
1134 if ('0' <= c && c <= '7') {
1135 src++;
1136 i = 8 * n + JS7_UNDEC(c);
1137 if (i <= 0377)
1138 n = i;
1139 else
1140 src--;
1143 localMax = n;
1144 break;
1146 default:
1147 localMax = c;
1148 break;
1150 break;
1151 default:
1152 localMax = *src++;
1153 break;
1156 if (inRange) {
1157 /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1158 if (rangeStart > localMax) {
1159 JS_ReportErrorNumber(state->context,
1160 js_GetErrorMessage, NULL,
1161 JSMSG_BAD_CLASS_RANGE);
1162 return FALSE;
1164 inRange = FALSE;
1165 } else {
1166 if (canStartRange && src < end - 1) {
1167 if (*src == '-') {
1168 ++src;
1169 inRange = TRUE;
1170 rangeStart = (WCHAR)localMax;
1171 continue;
1174 if (state->flags & JSREG_FOLD)
1175 rangeStart = localMax; /* one run of the uc/dc loop below */
1178 if (state->flags & JSREG_FOLD) {
1179 WCHAR maxch = localMax;
1181 for (i = rangeStart; i <= localMax; i++) {
1182 WCHAR uch, dch;
1184 uch = toupperW(i);
1185 dch = tolowerW(i);
1186 if(maxch < uch)
1187 maxch = uch;
1188 if(maxch < dch)
1189 maxch = dch;
1191 localMax = maxch;
1194 if (localMax > max)
1195 max = localMax;
1197 target->u.ucclass.bmsize = max;
1198 return TRUE;
1201 static INT
1202 ParseMinMaxQuantifier(CompilerState *state, BOOL ignoreValues)
1204 UINT min, max;
1205 WCHAR c;
1206 const WCHAR *errp = state->cp++;
1208 c = *state->cp;
1209 if (JS7_ISDEC(c)) {
1210 ++state->cp;
1211 min = GetDecimalValue(c, 0xFFFF, NULL, state);
1212 c = *state->cp;
1214 if (!ignoreValues && min == OVERFLOW_VALUE)
1215 return JSMSG_MIN_TOO_BIG;
1217 if (c == ',') {
1218 c = *++state->cp;
1219 if (JS7_ISDEC(c)) {
1220 ++state->cp;
1221 max = GetDecimalValue(c, 0xFFFF, NULL, state);
1222 c = *state->cp;
1223 if (!ignoreValues && max == OVERFLOW_VALUE)
1224 return JSMSG_MAX_TOO_BIG;
1225 if (!ignoreValues && min > max)
1226 return JSMSG_OUT_OF_ORDER;
1227 } else {
1228 max = (UINT)-1;
1230 } else {
1231 max = min;
1233 if (c == '}') {
1234 state->result = NewRENode(state, REOP_QUANT);
1235 if (!state->result)
1236 return JSMSG_OUT_OF_MEMORY;
1237 state->result->u.range.min = min;
1238 state->result->u.range.max = max;
1240 * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1241 * where <max> is written as compact(max+1) to make
1242 * (UINT)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1244 state->progLength += (1 + GetCompactIndexWidth(min)
1245 + GetCompactIndexWidth(max + 1)
1246 +3);
1247 return 0;
1251 state->cp = errp;
1252 return -1;
1255 static BOOL
1256 ParseQuantifier(CompilerState *state)
1258 RENode *term;
1259 term = state->result;
1260 if (state->cp < state->cpend) {
1261 switch (*state->cp) {
1262 case '+':
1263 state->result = NewRENode(state, REOP_QUANT);
1264 if (!state->result)
1265 return FALSE;
1266 state->result->u.range.min = 1;
1267 state->result->u.range.max = (UINT)-1;
1268 /* <PLUS>, <next> ... <ENDCHILD> */
1269 state->progLength += 4;
1270 goto quantifier;
1271 case '*':
1272 state->result = NewRENode(state, REOP_QUANT);
1273 if (!state->result)
1274 return FALSE;
1275 state->result->u.range.min = 0;
1276 state->result->u.range.max = (UINT)-1;
1277 /* <STAR>, <next> ... <ENDCHILD> */
1278 state->progLength += 4;
1279 goto quantifier;
1280 case '?':
1281 state->result = NewRENode(state, REOP_QUANT);
1282 if (!state->result)
1283 return FALSE;
1284 state->result->u.range.min = 0;
1285 state->result->u.range.max = 1;
1286 /* <OPT>, <next> ... <ENDCHILD> */
1287 state->progLength += 4;
1288 goto quantifier;
1289 case '{': /* balance '}' */
1291 INT err;
1293 err = ParseMinMaxQuantifier(state, FALSE);
1294 if (err == 0)
1295 goto quantifier;
1296 if (err == -1)
1297 return TRUE;
1299 ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
1300 return FALSE;
1302 default:;
1305 return TRUE;
1307 quantifier:
1308 if (state->treeDepth == TREE_DEPTH_MAX) {
1309 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1310 return FALSE;
1313 ++state->treeDepth;
1314 ++state->cp;
1315 state->result->kid = term;
1316 if (state->cp < state->cpend && *state->cp == '?') {
1317 ++state->cp;
1318 state->result->u.range.greedy = FALSE;
1319 } else {
1320 state->result->u.range.greedy = TRUE;
1322 return TRUE;
1326 * item: assertion An item is either an assertion or
1327 * quantatom a quantified atom.
1329 * assertion: '^' Assertions match beginning of string
1330 * (or line if the class static property
1331 * RegExp.multiline is true).
1332 * '$' End of string (or line if the class
1333 * static property RegExp.multiline is
1334 * true).
1335 * '\b' Word boundary (between \w and \W).
1336 * '\B' Word non-boundary.
1338 * quantatom: atom An unquantified atom.
1339 * quantatom '{' n ',' m '}'
1340 * Atom must occur between n and m times.
1341 * quantatom '{' n ',' '}' Atom must occur at least n times.
1342 * quantatom '{' n '}' Atom must occur exactly n times.
1343 * quantatom '*' Zero or more times (same as {0,}).
1344 * quantatom '+' One or more times (same as {1,}).
1345 * quantatom '?' Zero or one time (same as {0,1}).
1347 * any of which can be optionally followed by '?' for ungreedy
1349 * atom: '(' regexp ')' A parenthesized regexp (what matched
1350 * can be addressed using a backreference,
1351 * see '\' n below).
1352 * '.' Matches any char except '\n'.
1353 * '[' classlist ']' A character class.
1354 * '[' '^' classlist ']' A negated character class.
1355 * '\f' Form Feed.
1356 * '\n' Newline (Line Feed).
1357 * '\r' Carriage Return.
1358 * '\t' Horizontal Tab.
1359 * '\v' Vertical Tab.
1360 * '\d' A digit (same as [0-9]).
1361 * '\D' A non-digit.
1362 * '\w' A word character, [0-9a-z_A-Z].
1363 * '\W' A non-word character.
1364 * '\s' A whitespace character, [ \b\f\n\r\t\v].
1365 * '\S' A non-whitespace character.
1366 * '\' n A backreference to the nth (n decimal
1367 * and positive) parenthesized expression.
1368 * '\' octal An octal escape sequence (octal must be
1369 * two or three digits long, unless it is
1370 * 0 for the null character).
1371 * '\x' hex A hex escape (hex must be two digits).
1372 * '\u' unicode A unicode escape (must be four digits).
1373 * '\c' ctrl A control character, ctrl is a letter.
1374 * '\' literalatomchar Any character except one of the above
1375 * that follow '\' in an atom.
1376 * otheratomchar Any character not first among the other
1377 * atom right-hand sides.
1379 static BOOL
1380 ParseTerm(CompilerState *state)
1382 WCHAR c = *state->cp++;
1383 UINT nDigits;
1384 UINT num, tmp, n, i;
1385 const WCHAR *termStart;
1387 switch (c) {
1388 /* assertions and atoms */
1389 case '^':
1390 state->result = NewRENode(state, REOP_BOL);
1391 if (!state->result)
1392 return FALSE;
1393 state->progLength++;
1394 return TRUE;
1395 case '$':
1396 state->result = NewRENode(state, REOP_EOL);
1397 if (!state->result)
1398 return FALSE;
1399 state->progLength++;
1400 return TRUE;
1401 case '\\':
1402 if (state->cp >= state->cpend) {
1403 /* a trailing '\' is an error */
1404 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
1405 return FALSE;
1407 c = *state->cp++;
1408 switch (c) {
1409 /* assertion escapes */
1410 case 'b' :
1411 state->result = NewRENode(state, REOP_WBDRY);
1412 if (!state->result)
1413 return FALSE;
1414 state->progLength++;
1415 return TRUE;
1416 case 'B':
1417 state->result = NewRENode(state, REOP_WNONBDRY);
1418 if (!state->result)
1419 return FALSE;
1420 state->progLength++;
1421 return TRUE;
1422 /* Decimal escape */
1423 case '0':
1424 /* Give a strict warning. See also the note below. */
1425 WARN("non-octal digit in an escape sequence that doesn't match a back-reference\n");
1426 doOctal:
1427 num = 0;
1428 while (state->cp < state->cpend) {
1429 c = *state->cp;
1430 if (c < '0' || '7' < c)
1431 break;
1432 state->cp++;
1433 tmp = 8 * num + (UINT)JS7_UNDEC(c);
1434 if (tmp > 0377)
1435 break;
1436 num = tmp;
1438 c = (WCHAR)num;
1439 doFlat:
1440 state->result = NewRENode(state, REOP_FLAT);
1441 if (!state->result)
1442 return FALSE;
1443 state->result->u.flat.chr = c;
1444 state->result->u.flat.length = 1;
1445 state->progLength += 3;
1446 break;
1447 case '1':
1448 case '2':
1449 case '3':
1450 case '4':
1451 case '5':
1452 case '6':
1453 case '7':
1454 case '8':
1455 case '9':
1456 termStart = state->cp - 1;
1457 num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1458 if (state->flags & JSREG_FIND_PAREN_ERROR)
1459 return FALSE;
1460 if (num == OVERFLOW_VALUE) {
1461 /* Give a strict mode warning. */
1462 WARN("back-reference exceeds number of capturing parentheses\n");
1465 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1466 * error here. However, for compatibility with IE, we treat the
1467 * whole backref as flat if the first character in it is not a
1468 * valid octal character, and as an octal escape otherwise.
1470 state->cp = termStart;
1471 if (c >= '8') {
1472 /* Treat this as flat. termStart - 1 is the \. */
1473 c = '\\';
1474 goto asFlat;
1477 /* Treat this as an octal escape. */
1478 goto doOctal;
1480 assert(1 <= num && num <= 0x10000);
1481 state->result = NewRENode(state, REOP_BACKREF);
1482 if (!state->result)
1483 return FALSE;
1484 state->result->u.parenIndex = num - 1;
1485 state->progLength
1486 += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1487 break;
1488 /* Control escape */
1489 case 'f':
1490 c = 0xC;
1491 goto doFlat;
1492 case 'n':
1493 c = 0xA;
1494 goto doFlat;
1495 case 'r':
1496 c = 0xD;
1497 goto doFlat;
1498 case 't':
1499 c = 0x9;
1500 goto doFlat;
1501 case 'v':
1502 c = 0xB;
1503 goto doFlat;
1504 /* Control letter */
1505 case 'c':
1506 if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1507 c = (WCHAR) (*state->cp++ & 0x1F);
1508 } else {
1509 /* back off to accepting the original '\' as a literal */
1510 --state->cp;
1511 c = '\\';
1513 goto doFlat;
1514 /* HexEscapeSequence */
1515 case 'x':
1516 nDigits = 2;
1517 goto lexHex;
1518 /* UnicodeEscapeSequence */
1519 case 'u':
1520 nDigits = 4;
1521 lexHex:
1522 n = 0;
1523 for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1524 UINT digit;
1525 c = *state->cp++;
1526 if (!isASCIIHexDigit(c, &digit)) {
1528 * Back off to accepting the original 'u' or 'x' as a
1529 * literal.
1531 state->cp -= i + 2;
1532 n = *state->cp++;
1533 break;
1535 n = (n << 4) | digit;
1537 c = (WCHAR) n;
1538 goto doFlat;
1539 /* Character class escapes */
1540 case 'd':
1541 state->result = NewRENode(state, REOP_DIGIT);
1542 doSimple:
1543 if (!state->result)
1544 return FALSE;
1545 state->progLength++;
1546 break;
1547 case 'D':
1548 state->result = NewRENode(state, REOP_NONDIGIT);
1549 goto doSimple;
1550 case 's':
1551 state->result = NewRENode(state, REOP_SPACE);
1552 goto doSimple;
1553 case 'S':
1554 state->result = NewRENode(state, REOP_NONSPACE);
1555 goto doSimple;
1556 case 'w':
1557 state->result = NewRENode(state, REOP_ALNUM);
1558 goto doSimple;
1559 case 'W':
1560 state->result = NewRENode(state, REOP_NONALNUM);
1561 goto doSimple;
1562 /* IdentityEscape */
1563 default:
1564 state->result = NewRENode(state, REOP_FLAT);
1565 if (!state->result)
1566 return FALSE;
1567 state->result->u.flat.chr = c;
1568 state->result->u.flat.length = 1;
1569 state->result->kid = (void *) (state->cp - 1);
1570 state->progLength += 3;
1571 break;
1573 break;
1574 case '[':
1575 state->result = NewRENode(state, REOP_CLASS);
1576 if (!state->result)
1577 return FALSE;
1578 termStart = state->cp;
1579 state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1580 for (;;) {
1581 if (state->cp == state->cpend) {
1582 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1583 JSMSG_UNTERM_CLASS, termStart);
1585 return FALSE;
1587 if (*state->cp == '\\') {
1588 state->cp++;
1589 if (state->cp != state->cpend)
1590 state->cp++;
1591 continue;
1593 if (*state->cp == ']') {
1594 state->result->u.ucclass.kidlen = state->cp - termStart;
1595 break;
1597 state->cp++;
1599 for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1600 if (!state->classCache[i].start) {
1601 state->classCache[i].start = termStart;
1602 state->classCache[i].length = state->result->u.ucclass.kidlen;
1603 state->classCache[i].index = state->classCount;
1604 break;
1606 if (state->classCache[i].length ==
1607 state->result->u.ucclass.kidlen) {
1608 for (n = 0; ; n++) {
1609 if (n == state->classCache[i].length) {
1610 state->result->u.ucclass.index
1611 = state->classCache[i].index;
1612 goto claim;
1614 if (state->classCache[i].start[n] != termStart[n])
1615 break;
1619 state->result->u.ucclass.index = state->classCount++;
1621 claim:
1623 * Call CalculateBitmapSize now as we want any errors it finds
1624 * to be reported during the parse phase, not at execution.
1626 if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1627 return FALSE;
1629 * Update classBitmapsMem with number of bytes to hold bmsize bits,
1630 * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1631 * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1633 n = (state->result->u.ucclass.bmsize >> 3) + 1;
1634 if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1635 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1636 return FALSE;
1638 state->classBitmapsMem += n;
1639 /* CLASS, <index> */
1640 state->progLength
1641 += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1642 break;
1644 case '.':
1645 state->result = NewRENode(state, REOP_DOT);
1646 goto doSimple;
1648 case '{':
1650 const WCHAR *errp = state->cp--;
1651 INT err;
1653 err = ParseMinMaxQuantifier(state, TRUE);
1654 state->cp = errp;
1656 if (err < 0)
1657 goto asFlat;
1659 /* FALL THROUGH */
1661 case '*':
1662 case '+':
1663 case '?':
1664 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1665 JSMSG_BAD_QUANTIFIER, state->cp - 1);
1666 return FALSE;
1667 default:
1668 asFlat:
1669 state->result = NewRENode(state, REOP_FLAT);
1670 if (!state->result)
1671 return FALSE;
1672 state->result->u.flat.chr = c;
1673 state->result->u.flat.length = 1;
1674 state->result->kid = (void *) (state->cp - 1);
1675 state->progLength += 3;
1676 break;
1678 return ParseQuantifier(state);
1682 * Top-down regular expression grammar, based closely on Perl4.
1684 * regexp: altern A regular expression is one or more
1685 * altern '|' regexp alternatives separated by vertical bar.
1687 #define INITIAL_STACK_SIZE 128
1689 static BOOL
1690 ParseRegExp(CompilerState *state)
1692 size_t parenIndex;
1693 RENode *operand;
1694 REOpData *operatorStack;
1695 RENode **operandStack;
1696 REOp op;
1697 INT i;
1698 BOOL result = FALSE;
1700 INT operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
1701 INT operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
1703 /* Watch out for empty regexp */
1704 if (state->cp == state->cpend) {
1705 state->result = NewRENode(state, REOP_EMPTY);
1706 return (state->result != NULL);
1709 operatorStack = heap_alloc(sizeof(REOpData) * operatorStackSize);
1710 if (!operatorStack)
1711 return FALSE;
1713 operandStack = heap_alloc(sizeof(RENode *) * operandStackSize);
1714 if (!operandStack)
1715 goto out;
1717 for (;;) {
1718 parenIndex = state->parenCount;
1719 if (state->cp == state->cpend) {
1721 * If we are at the end of the regexp and we're short one or more
1722 * operands, the regexp must have the form /x|/ or some such, with
1723 * left parentheses making us short more than one operand.
1725 if (operatorSP >= operandSP) {
1726 operand = NewRENode(state, REOP_EMPTY);
1727 if (!operand)
1728 goto out;
1729 goto pushOperand;
1731 } else {
1732 switch (*state->cp) {
1733 case '(':
1734 ++state->cp;
1735 if (state->cp + 1 < state->cpend &&
1736 *state->cp == '?' &&
1737 (state->cp[1] == '=' ||
1738 state->cp[1] == '!' ||
1739 state->cp[1] == ':')) {
1740 switch (state->cp[1]) {
1741 case '=':
1742 op = REOP_ASSERT;
1743 /* ASSERT, <next>, ... ASSERTTEST */
1744 state->progLength += 4;
1745 break;
1746 case '!':
1747 op = REOP_ASSERT_NOT;
1748 /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
1749 state->progLength += 4;
1750 break;
1751 default:
1752 op = REOP_LPARENNON;
1753 break;
1755 state->cp += 2;
1756 } else {
1757 op = REOP_LPAREN;
1758 /* LPAREN, <index>, ... RPAREN, <index> */
1759 state->progLength
1760 += 2 * (1 + GetCompactIndexWidth(parenIndex));
1761 state->parenCount++;
1762 if (state->parenCount == 65535) {
1763 ReportRegExpError(state, JSREPORT_ERROR,
1764 JSMSG_TOO_MANY_PARENS);
1765 goto out;
1768 goto pushOperator;
1770 case ')':
1772 * If there's no stacked open parenthesis, throw syntax error.
1774 for (i = operatorSP - 1; ; i--) {
1775 if (i < 0) {
1776 ReportRegExpError(state, JSREPORT_ERROR,
1777 JSMSG_UNMATCHED_RIGHT_PAREN);
1778 goto out;
1780 if (operatorStack[i].op == REOP_ASSERT ||
1781 operatorStack[i].op == REOP_ASSERT_NOT ||
1782 operatorStack[i].op == REOP_LPARENNON ||
1783 operatorStack[i].op == REOP_LPAREN) {
1784 break;
1787 /* FALL THROUGH */
1789 case '|':
1790 /* Expected an operand before these, so make an empty one */
1791 operand = NewRENode(state, REOP_EMPTY);
1792 if (!operand)
1793 goto out;
1794 goto pushOperand;
1796 default:
1797 if (!ParseTerm(state))
1798 goto out;
1799 operand = state->result;
1800 pushOperand:
1801 if (operandSP == operandStackSize) {
1802 RENode **tmp;
1803 operandStackSize += operandStackSize;
1804 tmp = heap_realloc(operandStack, sizeof(RENode *) * operandStackSize);
1805 if (!tmp)
1806 goto out;
1807 operandStack = tmp;
1809 operandStack[operandSP++] = operand;
1810 break;
1814 /* At the end; process remaining operators. */
1815 restartOperator:
1816 if (state->cp == state->cpend) {
1817 while (operatorSP) {
1818 --operatorSP;
1819 if (!ProcessOp(state, &operatorStack[operatorSP],
1820 operandStack, operandSP))
1821 goto out;
1822 --operandSP;
1824 assert(operandSP == 1);
1825 state->result = operandStack[0];
1826 result = TRUE;
1827 goto out;
1830 switch (*state->cp) {
1831 case '|':
1832 /* Process any stacked 'concat' operators */
1833 ++state->cp;
1834 while (operatorSP &&
1835 operatorStack[operatorSP - 1].op == REOP_CONCAT) {
1836 --operatorSP;
1837 if (!ProcessOp(state, &operatorStack[operatorSP],
1838 operandStack, operandSP)) {
1839 goto out;
1841 --operandSP;
1843 op = REOP_ALT;
1844 goto pushOperator;
1846 case ')':
1848 * If there's no stacked open parenthesis, throw syntax error.
1850 for (i = operatorSP - 1; ; i--) {
1851 if (i < 0) {
1852 ReportRegExpError(state, JSREPORT_ERROR,
1853 JSMSG_UNMATCHED_RIGHT_PAREN);
1854 goto out;
1856 if (operatorStack[i].op == REOP_ASSERT ||
1857 operatorStack[i].op == REOP_ASSERT_NOT ||
1858 operatorStack[i].op == REOP_LPARENNON ||
1859 operatorStack[i].op == REOP_LPAREN) {
1860 break;
1863 ++state->cp;
1865 /* Process everything on the stack until the open parenthesis. */
1866 for (;;) {
1867 assert(operatorSP);
1868 --operatorSP;
1869 switch (operatorStack[operatorSP].op) {
1870 case REOP_ASSERT:
1871 case REOP_ASSERT_NOT:
1872 case REOP_LPAREN:
1873 operand = NewRENode(state, operatorStack[operatorSP].op);
1874 if (!operand)
1875 goto out;
1876 operand->u.parenIndex =
1877 operatorStack[operatorSP].parenIndex;
1878 assert(operandSP);
1879 operand->kid = operandStack[operandSP - 1];
1880 operandStack[operandSP - 1] = operand;
1881 if (state->treeDepth == TREE_DEPTH_MAX) {
1882 ReportRegExpError(state, JSREPORT_ERROR,
1883 JSMSG_REGEXP_TOO_COMPLEX);
1884 goto out;
1886 ++state->treeDepth;
1887 /* FALL THROUGH */
1889 case REOP_LPARENNON:
1890 state->result = operandStack[operandSP - 1];
1891 if (!ParseQuantifier(state))
1892 goto out;
1893 operandStack[operandSP - 1] = state->result;
1894 goto restartOperator;
1895 default:
1896 if (!ProcessOp(state, &operatorStack[operatorSP],
1897 operandStack, operandSP))
1898 goto out;
1899 --operandSP;
1900 break;
1903 break;
1905 case '{':
1907 const WCHAR *errp = state->cp;
1909 if (ParseMinMaxQuantifier(state, TRUE) < 0) {
1911 * This didn't even scan correctly as a quantifier, so we should
1912 * treat it as flat.
1914 op = REOP_CONCAT;
1915 goto pushOperator;
1918 state->cp = errp;
1919 /* FALL THROUGH */
1922 case '+':
1923 case '*':
1924 case '?':
1925 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
1926 state->cp);
1927 result = FALSE;
1928 goto out;
1930 default:
1931 /* Anything else is the start of the next term. */
1932 op = REOP_CONCAT;
1933 pushOperator:
1934 if (operatorSP == operatorStackSize) {
1935 REOpData *tmp;
1936 operatorStackSize += operatorStackSize;
1937 tmp = heap_realloc(operatorStack, sizeof(REOpData) * operatorStackSize);
1938 if (!tmp)
1939 goto out;
1940 operatorStack = tmp;
1942 operatorStack[operatorSP].op = op;
1943 operatorStack[operatorSP].errPos = state->cp;
1944 operatorStack[operatorSP++].parenIndex = parenIndex;
1945 break;
1948 out:
1949 heap_free(operatorStack);
1950 heap_free(operandStack);
1951 return result;
1955 * Save the current state of the match - the position in the input
1956 * text as well as the position in the bytecode. The state of any
1957 * parent expressions is also saved (preceding state).
1958 * Contents of parenCount parentheses from parenIndex are also saved.
1960 static REBackTrackData *
1961 PushBackTrackState(REGlobalData *gData, REOp op,
1962 jsbytecode *target, REMatchState *x, const WCHAR *cp,
1963 size_t parenIndex, size_t parenCount)
1965 size_t i;
1966 REBackTrackData *result =
1967 (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
1969 size_t sz = sizeof(REBackTrackData) +
1970 gData->stateStackTop * sizeof(REProgState) +
1971 parenCount * sizeof(RECapture);
1973 ptrdiff_t btsize = gData->backTrackStackSize;
1974 ptrdiff_t btincr = ((char *)result + sz) -
1975 ((char *)gData->backTrackStack + btsize);
1977 TRACE("\tBT_Push: %lu,%lu\n", (unsigned long) parenIndex, (unsigned long) parenCount);
1979 JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount));
1980 if (btincr > 0) {
1981 ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
1983 JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION);
1984 btincr = ((btincr+btsize-1)/btsize)*btsize;
1985 gData->backTrackStack = jsheap_grow(gData->pool, gData->backTrackStack, btsize, btincr);
1986 if (!gData->backTrackStack) {
1987 js_ReportOutOfScriptQuota(gData->cx);
1988 gData->ok = FALSE;
1989 return NULL;
1991 gData->backTrackStackSize = btsize + btincr;
1992 result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
1994 gData->backTrackSP = result;
1995 result->sz = gData->cursz;
1996 gData->cursz = sz;
1998 result->backtrack_op = op;
1999 result->backtrack_pc = target;
2000 result->cp = cp;
2001 result->parenCount = parenCount;
2002 result->parenIndex = parenIndex;
2004 result->saveStateStackTop = gData->stateStackTop;
2005 assert(gData->stateStackTop);
2006 memcpy(result + 1, gData->stateStack,
2007 sizeof(REProgState) * result->saveStateStackTop);
2009 if (parenCount != 0) {
2010 memcpy((char *)(result + 1) +
2011 sizeof(REProgState) * result->saveStateStackTop,
2012 &x->parens[parenIndex],
2013 sizeof(RECapture) * parenCount);
2014 for (i = 0; i != parenCount; i++)
2015 x->parens[parenIndex + i].index = -1;
2018 return result;
2021 static inline REMatchState *
2022 FlatNIMatcher(REGlobalData *gData, REMatchState *x, WCHAR *matchChars,
2023 size_t length)
2025 size_t i;
2026 assert(gData->cpend >= x->cp);
2027 if (length > (size_t)(gData->cpend - x->cp))
2028 return NULL;
2029 for (i = 0; i != length; i++) {
2030 if (toupperW(matchChars[i]) != toupperW(x->cp[i]))
2031 return NULL;
2033 x->cp += length;
2034 return x;
2038 * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2039 * 2. If E is not a character then go to step 6.
2040 * 3. Let ch be E's character.
2041 * 4. Let A be a one-element RECharSet containing the character ch.
2042 * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2043 * 6. E must be an integer. Let n be that integer.
2044 * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2045 * 8. Return an internal Matcher closure that takes two arguments, a State x
2046 * and a Continuation c, and performs the following:
2047 * 1. Let cap be x's captures internal array.
2048 * 2. Let s be cap[n].
2049 * 3. If s is undefined, then call c(x) and return its result.
2050 * 4. Let e be x's endIndex.
2051 * 5. Let len be s's length.
2052 * 6. Let f be e+len.
2053 * 7. If f>InputLength, return failure.
2054 * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2055 * such that Canonicalize(s[i]) is not the same character as
2056 * Canonicalize(Input [e+i]), then return failure.
2057 * 9. Let y be the State (f, cap).
2058 * 10. Call c(y) and return its result.
2060 static REMatchState *
2061 BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex)
2063 size_t len, i;
2064 const WCHAR *parenContent;
2065 RECapture *cap = &x->parens[parenIndex];
2067 if (cap->index == -1)
2068 return x;
2070 len = cap->length;
2071 if (x->cp + len > gData->cpend)
2072 return NULL;
2074 parenContent = &gData->cpbegin[cap->index];
2075 if (gData->regexp->flags & JSREG_FOLD) {
2076 for (i = 0; i < len; i++) {
2077 if (toupperW(parenContent[i]) != toupperW(x->cp[i]))
2078 return NULL;
2080 } else {
2081 for (i = 0; i < len; i++) {
2082 if (parenContent[i] != x->cp[i])
2083 return NULL;
2086 x->cp += len;
2087 return x;
2090 /* Add a single character to the RECharSet */
2091 static void
2092 AddCharacterToCharSet(RECharSet *cs, WCHAR c)
2094 UINT byteIndex = (UINT)(c >> 3);
2095 assert(c <= cs->length);
2096 cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2100 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2101 static void
2102 AddCharacterRangeToCharSet(RECharSet *cs, UINT c1, UINT c2)
2104 UINT i;
2106 UINT byteIndex1 = c1 >> 3;
2107 UINT byteIndex2 = c2 >> 3;
2109 assert(c2 <= cs->length && c1 <= c2);
2111 c1 &= 0x7;
2112 c2 &= 0x7;
2114 if (byteIndex1 == byteIndex2) {
2115 cs->u.bits[byteIndex1] |= ((BYTE)0xFF >> (7 - (c2 - c1))) << c1;
2116 } else {
2117 cs->u.bits[byteIndex1] |= 0xFF << c1;
2118 for (i = byteIndex1 + 1; i < byteIndex2; i++)
2119 cs->u.bits[i] = 0xFF;
2120 cs->u.bits[byteIndex2] |= (BYTE)0xFF >> (7 - c2);
2124 /* Compile the source of the class into a RECharSet */
2125 static BOOL
2126 ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
2128 const WCHAR *src, *end;
2129 BOOL inRange = FALSE;
2130 WCHAR rangeStart = 0;
2131 UINT byteLength, n;
2132 WCHAR c, thisCh;
2133 INT nDigits, i;
2135 assert(!charSet->converted);
2137 * Assert that startIndex and length points to chars inside [] inside
2138 * source string.
2140 assert(1 <= charSet->u.src.startIndex);
2141 assert(charSet->u.src.startIndex
2142 < SysStringLen(gData->regexp->source));
2143 assert(charSet->u.src.length <= SysStringLen(gData->regexp->source)
2144 - 1 - charSet->u.src.startIndex);
2146 charSet->converted = TRUE;
2147 src = gData->regexp->source + charSet->u.src.startIndex;
2149 end = src + charSet->u.src.length;
2151 assert(src[-1] == '[' && end[0] == ']');
2153 byteLength = (charSet->length >> 3) + 1;
2154 charSet->u.bits = heap_alloc(byteLength);
2155 if (!charSet->u.bits) {
2156 JS_ReportOutOfMemory(gData->cx);
2157 gData->ok = FALSE;
2158 return FALSE;
2160 memset(charSet->u.bits, 0, byteLength);
2162 if (src == end)
2163 return TRUE;
2165 if (*src == '^') {
2166 assert(charSet->sense == FALSE);
2167 ++src;
2168 } else {
2169 assert(charSet->sense == TRUE);
2172 while (src != end) {
2173 switch (*src) {
2174 case '\\':
2175 ++src;
2176 c = *src++;
2177 switch (c) {
2178 case 'b':
2179 thisCh = 0x8;
2180 break;
2181 case 'f':
2182 thisCh = 0xC;
2183 break;
2184 case 'n':
2185 thisCh = 0xA;
2186 break;
2187 case 'r':
2188 thisCh = 0xD;
2189 break;
2190 case 't':
2191 thisCh = 0x9;
2192 break;
2193 case 'v':
2194 thisCh = 0xB;
2195 break;
2196 case 'c':
2197 if (src < end && JS_ISWORD(*src)) {
2198 thisCh = (WCHAR)(*src++ & 0x1F);
2199 } else {
2200 --src;
2201 thisCh = '\\';
2203 break;
2204 case 'x':
2205 nDigits = 2;
2206 goto lexHex;
2207 case 'u':
2208 nDigits = 4;
2209 lexHex:
2210 n = 0;
2211 for (i = 0; (i < nDigits) && (src < end); i++) {
2212 UINT digit;
2213 c = *src++;
2214 if (!isASCIIHexDigit(c, &digit)) {
2216 * Back off to accepting the original '\'
2217 * as a literal
2219 src -= i + 1;
2220 n = '\\';
2221 break;
2223 n = (n << 4) | digit;
2225 thisCh = (WCHAR)n;
2226 break;
2227 case '0':
2228 case '1':
2229 case '2':
2230 case '3':
2231 case '4':
2232 case '5':
2233 case '6':
2234 case '7':
2236 * This is a non-ECMA extension - decimal escapes (in this
2237 * case, octal!) are supposed to be an error inside class
2238 * ranges, but supported here for backwards compatibility.
2240 n = JS7_UNDEC(c);
2241 c = *src;
2242 if ('0' <= c && c <= '7') {
2243 src++;
2244 n = 8 * n + JS7_UNDEC(c);
2245 c = *src;
2246 if ('0' <= c && c <= '7') {
2247 src++;
2248 i = 8 * n + JS7_UNDEC(c);
2249 if (i <= 0377)
2250 n = i;
2251 else
2252 src--;
2255 thisCh = (WCHAR)n;
2256 break;
2258 case 'd':
2259 AddCharacterRangeToCharSet(charSet, '0', '9');
2260 continue; /* don't need range processing */
2261 case 'D':
2262 AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2263 AddCharacterRangeToCharSet(charSet,
2264 (WCHAR)('9' + 1),
2265 (WCHAR)charSet->length);
2266 continue;
2267 case 's':
2268 for (i = (INT)charSet->length; i >= 0; i--)
2269 if (isspaceW(i))
2270 AddCharacterToCharSet(charSet, (WCHAR)i);
2271 continue;
2272 case 'S':
2273 for (i = (INT)charSet->length; i >= 0; i--)
2274 if (!isspaceW(i))
2275 AddCharacterToCharSet(charSet, (WCHAR)i);
2276 continue;
2277 case 'w':
2278 for (i = (INT)charSet->length; i >= 0; i--)
2279 if (JS_ISWORD(i))
2280 AddCharacterToCharSet(charSet, (WCHAR)i);
2281 continue;
2282 case 'W':
2283 for (i = (INT)charSet->length; i >= 0; i--)
2284 if (!JS_ISWORD(i))
2285 AddCharacterToCharSet(charSet, (WCHAR)i);
2286 continue;
2287 default:
2288 thisCh = c;
2289 break;
2292 break;
2294 default:
2295 thisCh = *src++;
2296 break;
2299 if (inRange) {
2300 if (gData->regexp->flags & JSREG_FOLD) {
2301 int i;
2303 assert(rangeStart <= thisCh);
2304 for (i = rangeStart; i <= thisCh; i++) {
2305 WCHAR uch, dch;
2307 AddCharacterToCharSet(charSet, i);
2308 uch = toupperW(i);
2309 dch = tolowerW(i);
2310 if (i != uch)
2311 AddCharacterToCharSet(charSet, uch);
2312 if (i != dch)
2313 AddCharacterToCharSet(charSet, dch);
2315 } else {
2316 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2318 inRange = FALSE;
2319 } else {
2320 if (gData->regexp->flags & JSREG_FOLD) {
2321 AddCharacterToCharSet(charSet, toupperW(thisCh));
2322 AddCharacterToCharSet(charSet, tolowerW(thisCh));
2323 } else {
2324 AddCharacterToCharSet(charSet, thisCh);
2326 if (src < end - 1) {
2327 if (*src == '-') {
2328 ++src;
2329 inRange = TRUE;
2330 rangeStart = thisCh;
2335 return TRUE;
2338 static BOOL
2339 ReallocStateStack(REGlobalData *gData)
2341 size_t limit = gData->stateStackLimit;
2342 size_t sz = sizeof(REProgState) * limit;
2344 gData->stateStack = jsheap_grow(gData->pool, gData->stateStack, sz, sz);
2345 if (!gData->stateStack) {
2346 js_ReportOutOfScriptQuota(gData->cx);
2347 gData->ok = FALSE;
2348 return FALSE;
2350 gData->stateStackLimit = limit + limit;
2351 return TRUE;
2354 #define PUSH_STATE_STACK(data) \
2355 do { \
2356 ++(data)->stateStackTop; \
2357 if ((data)->stateStackTop == (data)->stateStackLimit && \
2358 !ReallocStateStack((data))) { \
2359 return NULL; \
2361 }while(0)
2364 * Apply the current op against the given input to see if it's going to match
2365 * or fail. Return false if we don't get a match, true if we do. If updatecp is
2366 * true, then update the current state's cp. Always update startpc to the next
2367 * op.
2369 static inline REMatchState *
2370 SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
2371 jsbytecode **startpc, BOOL updatecp)
2373 REMatchState *result = NULL;
2374 WCHAR matchCh;
2375 size_t parenIndex;
2376 size_t offset, length, index;
2377 jsbytecode *pc = *startpc; /* pc has already been incremented past op */
2378 WCHAR *source;
2379 const WCHAR *startcp = x->cp;
2380 WCHAR ch;
2381 RECharSet *charSet;
2383 const char *opname = reop_names[op];
2384 TRACE("\n%06d: %*s%s\n", pc - gData->regexp->program,
2385 (int)gData->stateStackTop * 2, "", opname);
2387 switch (op) {
2388 case REOP_EMPTY:
2389 result = x;
2390 break;
2391 case REOP_BOL:
2392 if (x->cp != gData->cpbegin) {
2393 if (/*!gData->cx->regExpStatics.multiline && FIXME !!! */
2394 !(gData->regexp->flags & JSREG_MULTILINE)) {
2395 break;
2397 if (!RE_IS_LINE_TERM(x->cp[-1]))
2398 break;
2400 result = x;
2401 break;
2402 case REOP_EOL:
2403 if (x->cp != gData->cpend) {
2404 if (/*!gData->cx->regExpStatics.multiline &&*/
2405 !(gData->regexp->flags & JSREG_MULTILINE)) {
2406 break;
2408 if (!RE_IS_LINE_TERM(*x->cp))
2409 break;
2411 result = x;
2412 break;
2413 case REOP_WBDRY:
2414 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2415 !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2416 result = x;
2418 break;
2419 case REOP_WNONBDRY:
2420 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2421 (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2422 result = x;
2424 break;
2425 case REOP_DOT:
2426 if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2427 result = x;
2428 result->cp++;
2430 break;
2431 case REOP_DIGIT:
2432 if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
2433 result = x;
2434 result->cp++;
2436 break;
2437 case REOP_NONDIGIT:
2438 if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
2439 result = x;
2440 result->cp++;
2442 break;
2443 case REOP_ALNUM:
2444 if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2445 result = x;
2446 result->cp++;
2448 break;
2449 case REOP_NONALNUM:
2450 if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2451 result = x;
2452 result->cp++;
2454 break;
2455 case REOP_SPACE:
2456 if (x->cp != gData->cpend && isspaceW(*x->cp)) {
2457 result = x;
2458 result->cp++;
2460 break;
2461 case REOP_NONSPACE:
2462 if (x->cp != gData->cpend && !isspaceW(*x->cp)) {
2463 result = x;
2464 result->cp++;
2466 break;
2467 case REOP_BACKREF:
2468 pc = ReadCompactIndex(pc, &parenIndex);
2469 assert(parenIndex < gData->regexp->parenCount);
2470 result = BackrefMatcher(gData, x, parenIndex);
2471 break;
2472 case REOP_FLAT:
2473 pc = ReadCompactIndex(pc, &offset);
2474 assert(offset < SysStringLen(gData->regexp->source));
2475 pc = ReadCompactIndex(pc, &length);
2476 assert(1 <= length);
2477 assert(length <= SysStringLen(gData->regexp->source) - offset);
2478 if (length <= (size_t)(gData->cpend - x->cp)) {
2479 source = gData->regexp->source + offset;
2480 TRACE("%s\n", debugstr_wn(source, length));
2481 for (index = 0; index != length; index++) {
2482 if (source[index] != x->cp[index])
2483 return NULL;
2485 x->cp += length;
2486 result = x;
2488 break;
2489 case REOP_FLAT1:
2490 matchCh = *pc++;
2491 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2492 if (x->cp != gData->cpend && *x->cp == matchCh) {
2493 result = x;
2494 result->cp++;
2496 break;
2497 case REOP_FLATi:
2498 pc = ReadCompactIndex(pc, &offset);
2499 assert(offset < SysStringLen(gData->regexp->source));
2500 pc = ReadCompactIndex(pc, &length);
2501 assert(1 <= length);
2502 assert(length <= SysStringLen(gData->regexp->source) - offset);
2503 source = gData->regexp->source;
2504 result = FlatNIMatcher(gData, x, source + offset, length);
2505 break;
2506 case REOP_FLAT1i:
2507 matchCh = *pc++;
2508 if (x->cp != gData->cpend && toupperW(*x->cp) == toupperW(matchCh)) {
2509 result = x;
2510 result->cp++;
2512 break;
2513 case REOP_UCFLAT1:
2514 matchCh = GET_ARG(pc);
2515 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2516 pc += ARG_LEN;
2517 if (x->cp != gData->cpend && *x->cp == matchCh) {
2518 result = x;
2519 result->cp++;
2521 break;
2522 case REOP_UCFLAT1i:
2523 matchCh = GET_ARG(pc);
2524 pc += ARG_LEN;
2525 if (x->cp != gData->cpend && toupperW(*x->cp) == toupperW(matchCh)) {
2526 result = x;
2527 result->cp++;
2529 break;
2530 case REOP_CLASS:
2531 pc = ReadCompactIndex(pc, &index);
2532 assert(index < gData->regexp->classCount);
2533 if (x->cp != gData->cpend) {
2534 charSet = &gData->regexp->classList[index];
2535 assert(charSet->converted);
2536 ch = *x->cp;
2537 index = ch >> 3;
2538 if (charSet->length != 0 &&
2539 ch <= charSet->length &&
2540 (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2541 result = x;
2542 result->cp++;
2545 break;
2546 case REOP_NCLASS:
2547 pc = ReadCompactIndex(pc, &index);
2548 assert(index < gData->regexp->classCount);
2549 if (x->cp != gData->cpend) {
2550 charSet = &gData->regexp->classList[index];
2551 assert(charSet->converted);
2552 ch = *x->cp;
2553 index = ch >> 3;
2554 if (charSet->length == 0 ||
2555 ch > charSet->length ||
2556 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2557 result = x;
2558 result->cp++;
2561 break;
2563 default:
2564 assert(FALSE);
2566 if (result) {
2567 if (!updatecp)
2568 x->cp = startcp;
2569 *startpc = pc;
2570 TRACE(" *\n");
2571 return result;
2573 x->cp = startcp;
2574 return NULL;
2577 static inline REMatchState *
2578 ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
2580 REMatchState *result = NULL;
2581 REBackTrackData *backTrackData;
2582 jsbytecode *nextpc, *testpc;
2583 REOp nextop;
2584 RECapture *cap;
2585 REProgState *curState;
2586 const WCHAR *startcp;
2587 size_t parenIndex, k;
2588 size_t parenSoFar = 0;
2590 WCHAR matchCh1, matchCh2;
2591 RECharSet *charSet;
2593 BOOL anchor;
2594 jsbytecode *pc = gData->regexp->program;
2595 REOp op = (REOp) *pc++;
2598 * If the first node is a simple match, step the index into the string
2599 * until that match is made, or fail if it can't be found at all.
2601 if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & JSREG_STICKY)) {
2602 anchor = FALSE;
2603 while (x->cp <= gData->cpend) {
2604 nextpc = pc; /* reset back to start each time */
2605 result = SimpleMatch(gData, x, op, &nextpc, TRUE);
2606 if (result) {
2607 anchor = TRUE;
2608 x = result;
2609 pc = nextpc; /* accept skip to next opcode */
2610 op = (REOp) *pc++;
2611 assert(op < REOP_LIMIT);
2612 break;
2614 gData->skipped++;
2615 x->cp++;
2617 if (!anchor)
2618 goto bad;
2621 for (;;) {
2622 const char *opname = reop_names[op];
2623 TRACE("\n%06d: %*s%s\n", pc - gData->regexp->program,
2624 (int)gData->stateStackTop * 2, "", opname);
2626 if (REOP_IS_SIMPLE(op)) {
2627 result = SimpleMatch(gData, x, op, &pc, TRUE);
2628 } else {
2629 curState = &gData->stateStack[gData->stateStackTop];
2630 switch (op) {
2631 case REOP_END:
2632 goto good;
2633 case REOP_ALTPREREQ2:
2634 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2635 pc += ARG_LEN;
2636 matchCh2 = GET_ARG(pc);
2637 pc += ARG_LEN;
2638 k = GET_ARG(pc);
2639 pc += ARG_LEN;
2641 if (x->cp != gData->cpend) {
2642 if (*x->cp == matchCh2)
2643 goto doAlt;
2645 charSet = &gData->regexp->classList[k];
2646 if (!charSet->converted && !ProcessCharSet(gData, charSet))
2647 goto bad;
2648 matchCh1 = *x->cp;
2649 k = matchCh1 >> 3;
2650 if ((charSet->length == 0 ||
2651 matchCh1 > charSet->length ||
2652 !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2653 charSet->sense) {
2654 goto doAlt;
2657 result = NULL;
2658 break;
2660 case REOP_ALTPREREQ:
2661 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2662 pc += ARG_LEN;
2663 matchCh1 = GET_ARG(pc);
2664 pc += ARG_LEN;
2665 matchCh2 = GET_ARG(pc);
2666 pc += ARG_LEN;
2667 if (x->cp == gData->cpend ||
2668 (*x->cp != matchCh1 && *x->cp != matchCh2)) {
2669 result = NULL;
2670 break;
2672 /* else false thru... */
2674 case REOP_ALT:
2675 doAlt:
2676 nextpc = pc + GET_OFFSET(pc); /* start of next alternate */
2677 pc += ARG_LEN; /* start of this alternate */
2678 curState->parenSoFar = parenSoFar;
2679 PUSH_STATE_STACK(gData);
2680 op = (REOp) *pc++;
2681 startcp = x->cp;
2682 if (REOP_IS_SIMPLE(op)) {
2683 if (!SimpleMatch(gData, x, op, &pc, TRUE)) {
2684 op = (REOp) *nextpc++;
2685 pc = nextpc;
2686 continue;
2688 result = x;
2689 op = (REOp) *pc++;
2691 nextop = (REOp) *nextpc++;
2692 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2693 goto bad;
2694 continue;
2697 * Occurs at (successful) end of REOP_ALT,
2699 case REOP_JUMP:
2701 * If we have not gotten a result here, it is because of an
2702 * empty match. Do the same thing REOP_EMPTY would do.
2704 if (!result)
2705 result = x;
2707 --gData->stateStackTop;
2708 pc += GET_OFFSET(pc);
2709 op = (REOp) *pc++;
2710 continue;
2713 * Occurs at last (successful) end of REOP_ALT,
2715 case REOP_ENDALT:
2717 * If we have not gotten a result here, it is because of an
2718 * empty match. Do the same thing REOP_EMPTY would do.
2720 if (!result)
2721 result = x;
2723 --gData->stateStackTop;
2724 op = (REOp) *pc++;
2725 continue;
2727 case REOP_LPAREN:
2728 pc = ReadCompactIndex(pc, &parenIndex);
2729 TRACE("[ %lu ]\n", (unsigned long) parenIndex);
2730 assert(parenIndex < gData->regexp->parenCount);
2731 if (parenIndex + 1 > parenSoFar)
2732 parenSoFar = parenIndex + 1;
2733 x->parens[parenIndex].index = x->cp - gData->cpbegin;
2734 x->parens[parenIndex].length = 0;
2735 op = (REOp) *pc++;
2736 continue;
2738 case REOP_RPAREN:
2740 ptrdiff_t delta;
2742 pc = ReadCompactIndex(pc, &parenIndex);
2743 assert(parenIndex < gData->regexp->parenCount);
2744 cap = &x->parens[parenIndex];
2745 delta = x->cp - (gData->cpbegin + cap->index);
2746 cap->length = (delta < 0) ? 0 : (size_t) delta;
2747 op = (REOp) *pc++;
2749 if (!result)
2750 result = x;
2751 continue;
2753 case REOP_ASSERT:
2754 nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */
2755 pc += ARG_LEN; /* start of ASSERT child */
2756 op = (REOp) *pc++;
2757 testpc = pc;
2758 if (REOP_IS_SIMPLE(op) &&
2759 !SimpleMatch(gData, x, op, &testpc, FALSE)) {
2760 result = NULL;
2761 break;
2763 curState->u.assertion.top =
2764 (char *)gData->backTrackSP - (char *)gData->backTrackStack;
2765 curState->u.assertion.sz = gData->cursz;
2766 curState->index = x->cp - gData->cpbegin;
2767 curState->parenSoFar = parenSoFar;
2768 PUSH_STATE_STACK(gData);
2769 if (!PushBackTrackState(gData, REOP_ASSERTTEST,
2770 nextpc, x, x->cp, 0, 0)) {
2771 goto bad;
2773 continue;
2775 case REOP_ASSERT_NOT:
2776 nextpc = pc + GET_OFFSET(pc);
2777 pc += ARG_LEN;
2778 op = (REOp) *pc++;
2779 testpc = pc;
2780 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2781 SimpleMatch(gData, x, op, &testpc, FALSE) &&
2782 *testpc == REOP_ASSERTNOTTEST) {
2783 result = NULL;
2784 break;
2786 curState->u.assertion.top
2787 = (char *)gData->backTrackSP -
2788 (char *)gData->backTrackStack;
2789 curState->u.assertion.sz = gData->cursz;
2790 curState->index = x->cp - gData->cpbegin;
2791 curState->parenSoFar = parenSoFar;
2792 PUSH_STATE_STACK(gData);
2793 if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
2794 nextpc, x, x->cp, 0, 0)) {
2795 goto bad;
2797 continue;
2799 case REOP_ASSERTTEST:
2800 --gData->stateStackTop;
2801 --curState;
2802 x->cp = gData->cpbegin + curState->index;
2803 gData->backTrackSP =
2804 (REBackTrackData *) ((char *)gData->backTrackStack +
2805 curState->u.assertion.top);
2806 gData->cursz = curState->u.assertion.sz;
2807 if (result)
2808 result = x;
2809 break;
2811 case REOP_ASSERTNOTTEST:
2812 --gData->stateStackTop;
2813 --curState;
2814 x->cp = gData->cpbegin + curState->index;
2815 gData->backTrackSP =
2816 (REBackTrackData *) ((char *)gData->backTrackStack +
2817 curState->u.assertion.top);
2818 gData->cursz = curState->u.assertion.sz;
2819 result = (!result) ? x : NULL;
2820 break;
2821 case REOP_STAR:
2822 curState->u.quantifier.min = 0;
2823 curState->u.quantifier.max = (UINT)-1;
2824 goto quantcommon;
2825 case REOP_PLUS:
2826 curState->u.quantifier.min = 1;
2827 curState->u.quantifier.max = (UINT)-1;
2828 goto quantcommon;
2829 case REOP_OPT:
2830 curState->u.quantifier.min = 0;
2831 curState->u.quantifier.max = 1;
2832 goto quantcommon;
2833 case REOP_QUANT:
2834 pc = ReadCompactIndex(pc, &k);
2835 curState->u.quantifier.min = k;
2836 pc = ReadCompactIndex(pc, &k);
2837 /* max is k - 1 to use one byte for (UINT)-1 sentinel. */
2838 curState->u.quantifier.max = k - 1;
2839 assert(curState->u.quantifier.min <= curState->u.quantifier.max);
2840 quantcommon:
2841 if (curState->u.quantifier.max == 0) {
2842 pc = pc + GET_OFFSET(pc);
2843 op = (REOp) *pc++;
2844 result = x;
2845 continue;
2847 /* Step over <next> */
2848 nextpc = pc + ARG_LEN;
2849 op = (REOp) *nextpc++;
2850 startcp = x->cp;
2851 if (REOP_IS_SIMPLE(op)) {
2852 if (!SimpleMatch(gData, x, op, &nextpc, TRUE)) {
2853 if (curState->u.quantifier.min == 0)
2854 result = x;
2855 else
2856 result = NULL;
2857 pc = pc + GET_OFFSET(pc);
2858 break;
2860 op = (REOp) *nextpc++;
2861 result = x;
2863 curState->index = startcp - gData->cpbegin;
2864 curState->continue_op = REOP_REPEAT;
2865 curState->continue_pc = pc;
2866 curState->parenSoFar = parenSoFar;
2867 PUSH_STATE_STACK(gData);
2868 if (curState->u.quantifier.min == 0 &&
2869 !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
2870 0, 0)) {
2871 goto bad;
2873 pc = nextpc;
2874 continue;
2876 case REOP_ENDCHILD: /* marks the end of a quantifier child */
2877 pc = curState[-1].continue_pc;
2878 op = (REOp) curState[-1].continue_op;
2880 if (!result)
2881 result = x;
2882 continue;
2884 case REOP_REPEAT:
2885 --curState;
2886 do {
2887 --gData->stateStackTop;
2888 if (!result) {
2889 /* Failed, see if we have enough children. */
2890 if (curState->u.quantifier.min == 0)
2891 goto repeatDone;
2892 goto break_switch;
2894 if (curState->u.quantifier.min == 0 &&
2895 x->cp == gData->cpbegin + curState->index) {
2896 /* matched an empty string, that'll get us nowhere */
2897 result = NULL;
2898 goto break_switch;
2900 if (curState->u.quantifier.min != 0)
2901 curState->u.quantifier.min--;
2902 if (curState->u.quantifier.max != (UINT) -1)
2903 curState->u.quantifier.max--;
2904 if (curState->u.quantifier.max == 0)
2905 goto repeatDone;
2906 nextpc = pc + ARG_LEN;
2907 nextop = (REOp) *nextpc;
2908 startcp = x->cp;
2909 if (REOP_IS_SIMPLE(nextop)) {
2910 nextpc++;
2911 if (!SimpleMatch(gData, x, nextop, &nextpc, TRUE)) {
2912 if (curState->u.quantifier.min == 0)
2913 goto repeatDone;
2914 result = NULL;
2915 goto break_switch;
2917 result = x;
2919 curState->index = startcp - gData->cpbegin;
2920 PUSH_STATE_STACK(gData);
2921 if (curState->u.quantifier.min == 0 &&
2922 !PushBackTrackState(gData, REOP_REPEAT,
2923 pc, x, startcp,
2924 curState->parenSoFar,
2925 parenSoFar -
2926 curState->parenSoFar)) {
2927 goto bad;
2929 } while (*nextpc == REOP_ENDCHILD);
2930 pc = nextpc;
2931 op = (REOp) *pc++;
2932 parenSoFar = curState->parenSoFar;
2933 continue;
2935 repeatDone:
2936 result = x;
2937 pc += GET_OFFSET(pc);
2938 goto break_switch;
2940 case REOP_MINIMALSTAR:
2941 curState->u.quantifier.min = 0;
2942 curState->u.quantifier.max = (UINT)-1;
2943 goto minimalquantcommon;
2944 case REOP_MINIMALPLUS:
2945 curState->u.quantifier.min = 1;
2946 curState->u.quantifier.max = (UINT)-1;
2947 goto minimalquantcommon;
2948 case REOP_MINIMALOPT:
2949 curState->u.quantifier.min = 0;
2950 curState->u.quantifier.max = 1;
2951 goto minimalquantcommon;
2952 case REOP_MINIMALQUANT:
2953 pc = ReadCompactIndex(pc, &k);
2954 curState->u.quantifier.min = k;
2955 pc = ReadCompactIndex(pc, &k);
2956 /* See REOP_QUANT comments about k - 1. */
2957 curState->u.quantifier.max = k - 1;
2958 assert(curState->u.quantifier.min
2959 <= curState->u.quantifier.max);
2960 minimalquantcommon:
2961 curState->index = x->cp - gData->cpbegin;
2962 curState->parenSoFar = parenSoFar;
2963 PUSH_STATE_STACK(gData);
2964 if (curState->u.quantifier.min != 0) {
2965 curState->continue_op = REOP_MINIMALREPEAT;
2966 curState->continue_pc = pc;
2967 /* step over <next> */
2968 pc += OFFSET_LEN;
2969 op = (REOp) *pc++;
2970 } else {
2971 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
2972 pc, x, x->cp, 0, 0)) {
2973 goto bad;
2975 --gData->stateStackTop;
2976 pc = pc + GET_OFFSET(pc);
2977 op = (REOp) *pc++;
2979 continue;
2981 case REOP_MINIMALREPEAT:
2982 --gData->stateStackTop;
2983 --curState;
2985 TRACE("{%d,%d}\n", curState->u.quantifier.min, curState->u.quantifier.max);
2986 #define PREPARE_REPEAT() \
2987 do { \
2988 curState->index = x->cp - gData->cpbegin; \
2989 curState->continue_op = REOP_MINIMALREPEAT; \
2990 curState->continue_pc = pc; \
2991 pc += ARG_LEN; \
2992 for (k = curState->parenSoFar; k < parenSoFar; k++) \
2993 x->parens[k].index = -1; \
2994 PUSH_STATE_STACK(gData); \
2995 op = (REOp) *pc++; \
2996 assert(op < REOP_LIMIT); \
2997 }while(0)
2999 if (!result) {
3000 TRACE(" -\n");
3002 * Non-greedy failure - try to consume another child.
3004 if (curState->u.quantifier.max == (UINT) -1 ||
3005 curState->u.quantifier.max > 0) {
3006 PREPARE_REPEAT();
3007 continue;
3009 /* Don't need to adjust pc since we're going to pop. */
3010 break;
3012 if (curState->u.quantifier.min == 0 &&
3013 x->cp == gData->cpbegin + curState->index) {
3014 /* Matched an empty string, that'll get us nowhere. */
3015 result = NULL;
3016 break;
3018 if (curState->u.quantifier.min != 0)
3019 curState->u.quantifier.min--;
3020 if (curState->u.quantifier.max != (UINT) -1)
3021 curState->u.quantifier.max--;
3022 if (curState->u.quantifier.min != 0) {
3023 PREPARE_REPEAT();
3024 continue;
3026 curState->index = x->cp - gData->cpbegin;
3027 curState->parenSoFar = parenSoFar;
3028 PUSH_STATE_STACK(gData);
3029 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3030 pc, x, x->cp,
3031 curState->parenSoFar,
3032 parenSoFar - curState->parenSoFar)) {
3033 goto bad;
3035 --gData->stateStackTop;
3036 pc = pc + GET_OFFSET(pc);
3037 op = (REOp) *pc++;
3038 assert(op < REOP_LIMIT);
3039 continue;
3040 default:
3041 assert(FALSE);
3042 result = NULL;
3044 break_switch:;
3048 * If the match failed and there's a backtrack option, take it.
3049 * Otherwise this is a complete and utter failure.
3051 if (!result) {
3052 if (gData->cursz == 0)
3053 return NULL;
3055 /* Potentially detect explosive regex here. */
3056 gData->backTrackCount++;
3057 if (gData->backTrackLimit &&
3058 gData->backTrackCount >= gData->backTrackLimit) {
3059 JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
3060 JSMSG_REGEXP_TOO_COMPLEX);
3061 gData->ok = FALSE;
3062 return NULL;
3065 backTrackData = gData->backTrackSP;
3066 gData->cursz = backTrackData->sz;
3067 gData->backTrackSP =
3068 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3069 x->cp = backTrackData->cp;
3070 pc = backTrackData->backtrack_pc;
3071 op = (REOp) backTrackData->backtrack_op;
3072 assert(op < REOP_LIMIT);
3073 gData->stateStackTop = backTrackData->saveStateStackTop;
3074 assert(gData->stateStackTop);
3076 memcpy(gData->stateStack, backTrackData + 1,
3077 sizeof(REProgState) * backTrackData->saveStateStackTop);
3078 curState = &gData->stateStack[gData->stateStackTop - 1];
3080 if (backTrackData->parenCount) {
3081 memcpy(&x->parens[backTrackData->parenIndex],
3082 (char *)(backTrackData + 1) +
3083 sizeof(REProgState) * backTrackData->saveStateStackTop,
3084 sizeof(RECapture) * backTrackData->parenCount);
3085 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3086 } else {
3087 for (k = curState->parenSoFar; k < parenSoFar; k++)
3088 x->parens[k].index = -1;
3089 parenSoFar = curState->parenSoFar;
3092 TRACE("\tBT_Pop: %ld,%ld\n",
3093 (unsigned long) backTrackData->parenIndex,
3094 (unsigned long) backTrackData->parenCount);
3095 continue;
3097 x = result;
3100 * Continue with the expression.
3102 op = (REOp)*pc++;
3103 assert(op < REOP_LIMIT);
3106 bad:
3107 TRACE("\n");
3108 return NULL;
3110 good:
3111 TRACE("\n");
3112 return x;
3115 static REMatchState *MatchRegExp(REGlobalData *gData, REMatchState *x)
3117 REMatchState *result;
3118 const WCHAR *cp = x->cp;
3119 const WCHAR *cp2;
3120 UINT j;
3123 * Have to include the position beyond the last character
3124 * in order to detect end-of-input/line condition.
3126 for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3127 gData->skipped = cp2 - cp;
3128 x->cp = cp2;
3129 for (j = 0; j < gData->regexp->parenCount; j++)
3130 x->parens[j].index = -1;
3131 result = ExecuteREBytecode(gData, x);
3132 if (!gData->ok || result || (gData->regexp->flags & JSREG_STICKY))
3133 return result;
3134 gData->backTrackSP = gData->backTrackStack;
3135 gData->cursz = 0;
3136 gData->stateStackTop = 0;
3137 cp2 = cp + gData->skipped;
3139 return NULL;
3142 #define MIN_BACKTRACK_LIMIT 400000
3144 static REMatchState *InitMatch(script_ctx_t *cx, REGlobalData *gData, JSRegExp *re, size_t length)
3146 REMatchState *result;
3147 UINT i;
3149 gData->backTrackStackSize = INITIAL_BACKTRACK;
3150 gData->backTrackStack = jsheap_alloc(gData->pool, INITIAL_BACKTRACK);
3151 if (!gData->backTrackStack)
3152 goto bad;
3154 gData->backTrackSP = gData->backTrackStack;
3155 gData->cursz = 0;
3156 gData->backTrackCount = 0;
3157 gData->backTrackLimit = 0;
3159 gData->stateStackLimit = INITIAL_STATESTACK;
3160 gData->stateStack = jsheap_alloc(gData->pool, sizeof(REProgState) * INITIAL_STATESTACK);
3161 if (!gData->stateStack)
3162 goto bad;
3164 gData->stateStackTop = 0;
3165 gData->cx = cx;
3166 gData->regexp = re;
3167 gData->ok = TRUE;
3169 result = jsheap_alloc(gData->pool, offsetof(REMatchState, parens) + re->parenCount * sizeof(RECapture));
3170 if (!result)
3171 goto bad;
3173 for (i = 0; i < re->classCount; i++) {
3174 if (!re->classList[i].converted &&
3175 !ProcessCharSet(gData, &re->classList[i])) {
3176 return NULL;
3180 return result;
3182 bad:
3183 js_ReportOutOfScriptQuota(cx);
3184 gData->ok = FALSE;
3185 return NULL;
3188 static void
3189 js_DestroyRegExp(JSRegExp *re)
3191 if (re->classList) {
3192 UINT i;
3193 for (i = 0; i < re->classCount; i++) {
3194 if (re->classList[i].converted)
3195 heap_free(re->classList[i].u.bits);
3196 re->classList[i].u.bits = NULL;
3198 heap_free(re->classList);
3200 heap_free(re);
3203 static JSRegExp *
3204 js_NewRegExp(script_ctx_t *cx, BSTR str, UINT flags, BOOL flat)
3206 JSRegExp *re;
3207 jsheap_t *mark;
3208 CompilerState state;
3209 size_t resize;
3210 jsbytecode *endPC;
3211 UINT i;
3212 size_t len;
3214 re = NULL;
3215 mark = jsheap_mark(&cx->tmp_heap);
3216 len = SysStringLen(str);
3218 state.context = cx;
3219 state.cp = str;
3220 if (!state.cp)
3221 goto out;
3222 state.cpbegin = state.cp;
3223 state.cpend = state.cp + len;
3224 state.flags = flags;
3225 state.parenCount = 0;
3226 state.classCount = 0;
3227 state.progLength = 0;
3228 state.treeDepth = 0;
3229 state.classBitmapsMem = 0;
3230 for (i = 0; i < CLASS_CACHE_SIZE; i++)
3231 state.classCache[i].start = NULL;
3233 if (len != 0 && flat) {
3234 state.result = NewRENode(&state, REOP_FLAT);
3235 if (!state.result)
3236 goto out;
3237 state.result->u.flat.chr = *state.cpbegin;
3238 state.result->u.flat.length = len;
3239 state.result->kid = (void *) state.cpbegin;
3240 /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
3241 state.progLength += 1 + GetCompactIndexWidth(0)
3242 + GetCompactIndexWidth(len);
3243 } else {
3244 if (!ParseRegExp(&state))
3245 goto out;
3247 resize = offsetof(JSRegExp, program) + state.progLength + 1;
3248 re = heap_alloc(resize);
3249 if (!re)
3250 goto out;
3252 assert(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
3253 re->classCount = state.classCount;
3254 if (re->classCount) {
3255 re->classList = heap_alloc(re->classCount * sizeof(RECharSet));
3256 if (!re->classList) {
3257 js_DestroyRegExp(re);
3258 re = NULL;
3259 goto out;
3261 for (i = 0; i < re->classCount; i++)
3262 re->classList[i].converted = FALSE;
3263 } else {
3264 re->classList = NULL;
3266 endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
3267 if (!endPC) {
3268 js_DestroyRegExp(re);
3269 re = NULL;
3270 goto out;
3272 *endPC++ = REOP_END;
3274 * Check whether size was overestimated and shrink using realloc.
3275 * This is safe since no pointers to newly parsed regexp or its parts
3276 * besides re exist here.
3278 if ((size_t)(endPC - re->program) != state.progLength + 1) {
3279 JSRegExp *tmp;
3280 assert((size_t)(endPC - re->program) < state.progLength + 1);
3281 resize = offsetof(JSRegExp, program) + (endPC - re->program);
3282 tmp = heap_realloc(re, resize);
3283 if (tmp)
3284 re = tmp;
3287 re->flags = flags;
3288 re->parenCount = state.parenCount;
3289 re->source = str;
3291 out:
3292 jsheap_clear(mark);
3293 return re;
3296 static HRESULT do_regexp_match_next(RegExpInstance *regexp, const WCHAR *str, DWORD len,
3297 const WCHAR **cp, match_result_t **parens, DWORD *parens_size, DWORD *parens_cnt, match_result_t *ret)
3299 REMatchState *x, *result;
3300 REGlobalData gData;
3301 DWORD matchlen;
3303 gData.cpbegin = *cp;
3304 gData.cpend = str + len;
3305 gData.start = *cp-str;
3306 gData.skipped = 0;
3307 gData.pool = &regexp->dispex.ctx->tmp_heap;
3309 x = InitMatch(NULL, &gData, regexp->jsregexp, gData.cpend - gData.cpbegin);
3310 if(!x) {
3311 WARN("InitMatch failed\n");
3312 return E_FAIL;
3315 x->cp = *cp;
3316 result = MatchRegExp(&gData, x);
3317 if(!gData.ok) {
3318 WARN("MatchRegExp failed\n");
3319 return E_FAIL;
3322 if(!result)
3323 return S_FALSE;
3325 if(parens) {
3326 DWORD i;
3328 if(regexp->jsregexp->parenCount > *parens_size) {
3329 match_result_t *new_parens;
3331 if(*parens)
3332 new_parens = heap_realloc(*parens, sizeof(match_result_t)*regexp->jsregexp->parenCount);
3333 else
3334 new_parens = heap_alloc(sizeof(match_result_t)*regexp->jsregexp->parenCount);
3335 if(!new_parens)
3336 return E_OUTOFMEMORY;
3338 *parens = new_parens;
3341 *parens_cnt = regexp->jsregexp->parenCount;
3343 for(i=0; i < regexp->jsregexp->parenCount; i++) {
3344 (*parens)[i].str = *cp + result->parens[i].index;
3345 (*parens)[i].len = result->parens[i].length;
3349 matchlen = (result->cp-*cp) - gData.skipped;
3350 *cp = result->cp;
3351 ret->str = result->cp-matchlen;
3352 ret->len = matchlen;
3354 return S_OK;
3357 HRESULT regexp_match_next(DispatchEx *dispex, BOOL gcheck, const WCHAR *str, DWORD len,
3358 const WCHAR **cp, match_result_t **parens, DWORD *parens_size, DWORD *parens_cnt, match_result_t *ret)
3360 RegExpInstance *regexp = (RegExpInstance*)dispex;
3361 jsheap_t *mark;
3362 HRESULT hres;
3364 if(gcheck && !(regexp->jsregexp->flags & JSREG_GLOB))
3365 return S_FALSE;
3367 mark = jsheap_mark(&regexp->dispex.ctx->tmp_heap);
3369 hres = do_regexp_match_next(regexp, str, len, cp, parens, parens_size, parens_cnt, ret);
3371 jsheap_clear(mark);
3372 return hres;
3375 HRESULT regexp_match(DispatchEx *dispex, const WCHAR *str, DWORD len, BOOL gflag, match_result_t **match_result,
3376 DWORD *result_cnt)
3378 RegExpInstance *This = (RegExpInstance*)dispex;
3379 match_result_t *ret = NULL, cres;
3380 const WCHAR *cp = str;
3381 DWORD i=0, ret_size = 0;
3382 jsheap_t *mark;
3383 HRESULT hres;
3385 mark = jsheap_mark(&This->dispex.ctx->tmp_heap);
3387 while(1) {
3388 hres = do_regexp_match_next(This, str, len, &cp, NULL, NULL, NULL, &cres);
3389 if(hres == S_FALSE) {
3390 hres = S_OK;
3391 break;
3394 if(FAILED(hres))
3395 break;
3397 if(ret_size == i) {
3398 if(ret)
3399 ret = heap_realloc(ret, (ret_size <<= 1) * sizeof(match_result_t));
3400 else
3401 ret = heap_alloc((ret_size=4) * sizeof(match_result_t));
3402 if(!ret) {
3403 hres = E_OUTOFMEMORY;
3404 break;
3408 ret[i++] = cres;
3410 if(!gflag && !(This->jsregexp->flags & JSREG_GLOB)) {
3411 hres = S_OK;
3412 break;
3416 jsheap_clear(mark);
3417 if(FAILED(hres)) {
3418 heap_free(ret);
3419 return hres;
3422 *match_result = ret;
3423 *result_cnt = i;
3424 return S_OK;
3427 static HRESULT RegExp_source(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3428 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3430 TRACE("\n");
3432 switch(flags) {
3433 case DISPATCH_PROPERTYGET: {
3434 RegExpInstance *This = (RegExpInstance*)dispex;
3436 V_VT(retv) = VT_BSTR;
3437 V_BSTR(retv) = SysAllocString(This->str);
3438 if(!V_BSTR(retv))
3439 return E_OUTOFMEMORY;
3440 break;
3442 default:
3443 FIXME("Unimplemnted flags %x\n", flags);
3444 return E_NOTIMPL;
3447 return S_OK;
3450 static HRESULT RegExp_global(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3451 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3453 FIXME("\n");
3454 return E_NOTIMPL;
3457 static HRESULT RegExp_ignoreCase(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3458 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3460 FIXME("\n");
3461 return E_NOTIMPL;
3464 static HRESULT RegExp_multiline(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3465 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3467 FIXME("\n");
3468 return E_NOTIMPL;
3471 static HRESULT RegExp_lastIndex(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3472 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3474 FIXME("\n");
3475 return E_NOTIMPL;
3478 static HRESULT RegExp_toString(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3479 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3481 FIXME("\n");
3482 return E_NOTIMPL;
3485 static HRESULT RegExp_exec(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3486 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3488 FIXME("\n");
3489 return E_NOTIMPL;
3492 static HRESULT RegExp_test(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3493 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3495 FIXME("\n");
3496 return E_NOTIMPL;
3499 static HRESULT RegExp_value(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3500 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3502 TRACE("\n");
3504 switch(flags) {
3505 case INVOKE_FUNC:
3506 return throw_type_error(dispex->ctx, ei, IDS_NOT_FUNC, NULL);
3507 default:
3508 FIXME("unimplemented flags %x\n", flags);
3509 return E_NOTIMPL;
3512 return S_OK;
3515 static void RegExp_destructor(DispatchEx *dispex)
3517 RegExpInstance *This = (RegExpInstance*)dispex;
3519 if(This->jsregexp)
3520 js_DestroyRegExp(This->jsregexp);
3521 SysFreeString(This->str);
3522 heap_free(This);
3525 static const builtin_prop_t RegExp_props[] = {
3526 {execW, RegExp_exec, PROPF_METHOD},
3527 {globalW, RegExp_global, 0},
3528 {ignoreCaseW, RegExp_ignoreCase, 0},
3529 {lastIndexW, RegExp_lastIndex, 0},
3530 {multilineW, RegExp_multiline, 0},
3531 {sourceW, RegExp_source, 0},
3532 {testW, RegExp_test, PROPF_METHOD},
3533 {toStringW, RegExp_toString, PROPF_METHOD}
3536 static const builtin_info_t RegExp_info = {
3537 JSCLASS_REGEXP,
3538 {NULL, RegExp_value, 0},
3539 sizeof(RegExp_props)/sizeof(*RegExp_props),
3540 RegExp_props,
3541 RegExp_destructor,
3542 NULL
3545 static HRESULT alloc_regexp(script_ctx_t *ctx, DispatchEx *object_prototype, RegExpInstance **ret)
3547 RegExpInstance *regexp;
3548 HRESULT hres;
3550 regexp = heap_alloc_zero(sizeof(RegExpInstance));
3551 if(!regexp)
3552 return E_OUTOFMEMORY;
3554 if(object_prototype)
3555 hres = init_dispex(&regexp->dispex, ctx, &RegExp_info, object_prototype);
3556 else
3557 hres = init_dispex_from_constr(&regexp->dispex, ctx, &RegExp_info, ctx->regexp_constr);
3559 if(FAILED(hres)) {
3560 heap_free(regexp);
3561 return hres;
3564 *ret = regexp;
3565 return S_OK;
3568 static HRESULT create_regexp(script_ctx_t *ctx, const WCHAR *exp, int len, DWORD flags, DispatchEx **ret)
3570 RegExpInstance *regexp;
3571 HRESULT hres;
3573 TRACE("%s %x\n", debugstr_w(exp), flags);
3575 hres = alloc_regexp(ctx, NULL, &regexp);
3576 if(FAILED(hres))
3577 return hres;
3579 if(len == -1)
3580 regexp->str = SysAllocString(exp);
3581 else
3582 regexp->str = SysAllocStringLen(exp, len);
3583 if(!regexp->str) {
3584 jsdisp_release(&regexp->dispex);
3585 return E_OUTOFMEMORY;
3588 regexp->jsregexp = js_NewRegExp(ctx, regexp->str, flags, FALSE);
3589 if(!regexp->jsregexp) {
3590 WARN("js_NewRegExp failed\n");
3591 jsdisp_release(&regexp->dispex);
3592 return E_FAIL;
3595 *ret = &regexp->dispex;
3596 return S_OK;
3599 static HRESULT regexp_constructor(script_ctx_t *ctx, DISPPARAMS *dp, VARIANT *retv)
3601 const WCHAR *opt = emptyW, *src;
3602 DispatchEx *ret;
3603 VARIANT *arg;
3604 HRESULT hres;
3606 if(!arg_cnt(dp)) {
3607 FIXME("no args\n");
3608 return E_NOTIMPL;
3611 arg = get_arg(dp,0);
3612 if(V_VT(arg) == VT_DISPATCH) {
3613 DispatchEx *obj;
3615 obj = iface_to_jsdisp((IUnknown*)V_DISPATCH(arg));
3616 if(obj) {
3617 if(is_class(obj, JSCLASS_REGEXP)) {
3618 RegExpInstance *regexp = (RegExpInstance*)obj;
3620 hres = create_regexp(ctx, regexp->str, -1, regexp->jsregexp->flags, &ret);
3621 jsdisp_release(obj);
3622 if(FAILED(hres))
3623 return hres;
3625 V_VT(retv) = VT_DISPATCH;
3626 V_DISPATCH(retv) = (IDispatch*)_IDispatchEx_(ret);
3627 return S_OK;
3630 jsdisp_release(obj);
3634 if(V_VT(arg) != VT_BSTR) {
3635 FIXME("vt arg0 = %d\n", V_VT(arg));
3636 return E_NOTIMPL;
3639 src = V_BSTR(arg);
3641 if(arg_cnt(dp) >= 2) {
3642 arg = get_arg(dp,1);
3643 if(V_VT(arg) != VT_BSTR) {
3644 FIXME("unimplemented for vt %d\n", V_VT(arg));
3645 return E_NOTIMPL;
3648 opt = V_BSTR(arg);
3651 hres = create_regexp_str(ctx, src, -1, opt, strlenW(opt), &ret);
3652 if(FAILED(hres))
3653 return hres;
3655 V_VT(retv) = VT_DISPATCH;
3656 V_DISPATCH(retv) = (IDispatch*)_IDispatchEx_(ret);
3657 return S_OK;
3660 static HRESULT RegExpConstr_value(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3661 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3663 TRACE("\n");
3665 switch(flags) {
3666 case DISPATCH_CONSTRUCT:
3667 return regexp_constructor(dispex->ctx, dp, retv);
3668 default:
3669 FIXME("unimplemented flags: %x\n", flags);
3670 return E_NOTIMPL;
3673 return S_OK;
3676 HRESULT create_regexp_constr(script_ctx_t *ctx, DispatchEx *object_prototype, DispatchEx **ret)
3678 RegExpInstance *regexp;
3679 HRESULT hres;
3681 hres = alloc_regexp(ctx, object_prototype, &regexp);
3682 if(FAILED(hres))
3683 return hres;
3685 hres = create_builtin_function(ctx, RegExpConstr_value, NULL, PROPF_CONSTR, &regexp->dispex, ret);
3687 jsdisp_release(&regexp->dispex);
3688 return hres;
3691 HRESULT create_regexp_str(script_ctx_t *ctx, const WCHAR *exp, DWORD exp_len, const WCHAR *opt,
3692 DWORD opt_len, DispatchEx **ret)
3694 const WCHAR *p;
3695 DWORD flags = 0;
3697 if(opt) {
3698 for (p = opt; p < opt+opt_len; p++) {
3699 switch (*p) {
3700 case 'g':
3701 flags |= JSREG_GLOB;
3702 break;
3703 case 'i':
3704 flags |= JSREG_FOLD;
3705 break;
3706 case 'm':
3707 flags |= JSREG_MULTILINE;
3708 break;
3709 case 'y':
3710 flags |= JSREG_STICKY;
3711 break;
3712 default:
3713 WARN("wrong flag %c\n", *p);
3714 return E_FAIL;
3719 return create_regexp(ctx, exp, exp_len, flags, ret);