push 862965e7f7bbbfb960006fcded9111ae18c06ef6
[wine/hacks.git] / dlls / jscript / regexp.c
blob9722919f722c5e93185d1a9e5ed9c91e3f14555b
1 /*
2 * Copyright 2008 Jacek Caban for CodeWeavers
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20 * Code in this file is based on files:
21 * js/src/jsregexp.h
22 * js/src/jsregexp.c
23 * from Mozilla project, released under LGPL 2.1 or later.
25 * The Original Code is Mozilla Communicator client code, released
26 * March 31, 1998.
28 * The Initial Developer of the Original Code is
29 * Netscape Communications Corporation.
30 * Portions created by the Initial Developer are Copyright (C) 1998
31 * the Initial Developer. All Rights Reserved.
34 #include <assert.h>
36 #include "jscript.h"
38 #include "wine/debug.h"
40 WINE_DEFAULT_DEBUG_CHANNEL(jscript);
42 #define JSREG_FOLD 0x01 /* fold uppercase to lowercase */
43 #define JSREG_GLOB 0x02 /* global exec, creates array of matches */
44 #define JSREG_MULTILINE 0x04 /* treat ^ and $ as begin and end of line */
45 #define JSREG_STICKY 0x08 /* only match starting at lastIndex */
47 typedef BYTE JSPackedBool;
48 typedef BYTE jsbytecode;
51 * This struct holds a bitmap representation of a class from a regexp.
52 * There's a list of these referenced by the classList field in the JSRegExp
53 * struct below. The initial state has startIndex set to the offset in the
54 * original regexp source of the beginning of the class contents. The first
55 * use of the class converts the source representation into a bitmap.
58 typedef struct RECharSet {
59 JSPackedBool converted;
60 JSPackedBool sense;
61 WORD length;
62 union {
63 BYTE *bits;
64 struct {
65 size_t startIndex;
66 size_t length;
67 } src;
68 } u;
69 } RECharSet;
71 typedef struct {
72 WORD flags; /* flags, see jsapi.h's JSREG_* defines */
73 size_t parenCount; /* number of parenthesized submatches */
74 size_t classCount; /* count [...] bitmaps */
75 RECharSet *classList; /* list of [...] bitmaps */
76 BSTR source; /* locked source string, sans // */
77 jsbytecode program[1]; /* regular expression bytecode */
78 } JSRegExp;
80 typedef struct {
81 DispatchEx dispex;
83 JSRegExp *jsregexp;
84 BSTR str;
85 } RegExpInstance;
87 static const WCHAR sourceW[] = {'s','o','u','r','c','e',0};
88 static const WCHAR globalW[] = {'g','l','o','b','a','l',0};
89 static const WCHAR ignoreCaseW[] = {'i','g','n','o','r','e','C','a','s','e',0};
90 static const WCHAR multilineW[] = {'m','u','l','t','i','l','i','n','e',0};
91 static const WCHAR lastIndexW[] = {'l','a','s','t','I','n','d','e','x',0};
92 static const WCHAR toStringW[] = {'t','o','S','t','r','i','n','g',0};
93 static const WCHAR toLocaleStringW[] = {'t','o','L','o','c','a','l','e','S','t','r','i','n','g',0};
94 static const WCHAR hasOwnPropertyW[] = {'h','a','s','O','w','n','P','r','o','p','e','r','t','y',0};
95 static const WCHAR propertyIsEnumerableW[] =
96 {'p','r','o','p','e','r','t','y','I','s','E','n','u','m','e','r','a','b','l','e',0};
97 static const WCHAR isPrototypeOfW[] = {'i','s','P','r','o','t','o','t','y','p','e','O','f',0};
98 static const WCHAR execW[] = {'e','x','e','c',0};
99 static const WCHAR testW[] = {'t','e','s','t',0};
101 static const WCHAR emptyW[] = {0};
103 /* FIXME: Better error handling */
104 #define ReportRegExpError(a,b,c)
105 #define ReportRegExpErrorHelper(a,b,c,d)
106 #define JS_ReportErrorNumber(a,b,c,d)
107 #define JS_ReportErrorFlagsAndNumber(a,b,c,d,e,f)
108 #define js_ReportOutOfScriptQuota(a)
109 #define JS_ReportOutOfMemory(a)
110 #define JS_COUNT_OPERATION(a,b)
112 #define JSMSG_MIN_TOO_BIG 47
113 #define JSMSG_MAX_TOO_BIG 48
114 #define JSMSG_OUT_OF_ORDER 49
115 #define JSMSG_OUT_OF_MEMORY 137
117 #define LINE_SEPARATOR 0x2028
118 #define PARA_SEPARATOR 0x2029
120 #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
121 ((c >= 'a') && (c <= 'z')) )
122 #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
123 (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
125 #define JS_ISWORD(c) ((c) < 128 && (isalnum(c) || (c) == '_'))
127 #define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9)
128 #define JS7_UNDEC(c) ((c) - '0')
130 typedef enum REOp {
131 REOP_EMPTY,
132 REOP_BOL,
133 REOP_EOL,
134 REOP_WBDRY,
135 REOP_WNONBDRY,
136 REOP_DOT,
137 REOP_DIGIT,
138 REOP_NONDIGIT,
139 REOP_ALNUM,
140 REOP_NONALNUM,
141 REOP_SPACE,
142 REOP_NONSPACE,
143 REOP_BACKREF,
144 REOP_FLAT,
145 REOP_FLAT1,
146 REOP_FLATi,
147 REOP_FLAT1i,
148 REOP_UCFLAT1,
149 REOP_UCFLAT1i,
150 REOP_UCFLAT,
151 REOP_UCFLATi,
152 REOP_CLASS,
153 REOP_NCLASS,
154 REOP_ALT,
155 REOP_QUANT,
156 REOP_STAR,
157 REOP_PLUS,
158 REOP_OPT,
159 REOP_LPAREN,
160 REOP_RPAREN,
161 REOP_JUMP,
162 REOP_DOTSTAR,
163 REOP_LPARENNON,
164 REOP_ASSERT,
165 REOP_ASSERT_NOT,
166 REOP_ASSERTTEST,
167 REOP_ASSERTNOTTEST,
168 REOP_MINIMALSTAR,
169 REOP_MINIMALPLUS,
170 REOP_MINIMALOPT,
171 REOP_MINIMALQUANT,
172 REOP_ENDCHILD,
173 REOP_REPEAT,
174 REOP_MINIMALREPEAT,
175 REOP_ALTPREREQ,
176 REOP_ALTPREREQ2,
177 REOP_ENDALT,
178 REOP_CONCAT,
179 REOP_END,
180 REOP_LIMIT /* META: no operator >= to this */
181 } REOp;
183 #define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
185 static const char *reop_names[] = {
186 "empty",
187 "bol",
188 "eol",
189 "wbdry",
190 "wnonbdry",
191 "dot",
192 "digit",
193 "nondigit",
194 "alnum",
195 "nonalnum",
196 "space",
197 "nonspace",
198 "backref",
199 "flat",
200 "flat1",
201 "flati",
202 "flat1i",
203 "ucflat1",
204 "ucflat1i",
205 "ucflat",
206 "ucflati",
207 "class",
208 "nclass",
209 "alt",
210 "quant",
211 "star",
212 "plus",
213 "opt",
214 "lparen",
215 "rparen",
216 "jump",
217 "dotstar",
218 "lparennon",
219 "assert",
220 "assert_not",
221 "asserttest",
222 "assertnottest",
223 "minimalstar",
224 "minimalplus",
225 "minimalopt",
226 "minimalquant",
227 "endchild",
228 "repeat",
229 "minimalrepeat",
230 "altprereq",
231 "alrprereq2",
232 "endalt",
233 "concat",
234 "end",
235 NULL
238 typedef struct RECapture {
239 ptrdiff_t index; /* start of contents, -1 for empty */
240 size_t length; /* length of capture */
241 } RECapture;
243 typedef struct REMatchState {
244 const WCHAR *cp;
245 RECapture parens[1]; /* first of 're->parenCount' captures,
246 allocated at end of this struct */
247 } REMatchState;
249 typedef struct REProgState {
250 jsbytecode *continue_pc; /* current continuation data */
251 jsbytecode continue_op;
252 ptrdiff_t index; /* progress in text */
253 size_t parenSoFar; /* highest indexed paren started */
254 union {
255 struct {
256 UINT min; /* current quantifier limits */
257 UINT max;
258 } quantifier;
259 struct {
260 size_t top; /* backtrack stack state */
261 size_t sz;
262 } assertion;
263 } u;
264 } REProgState;
266 typedef struct REBackTrackData {
267 size_t sz; /* size of previous stack entry */
268 jsbytecode *backtrack_pc; /* where to backtrack to */
269 jsbytecode backtrack_op;
270 const WCHAR *cp; /* index in text of match at backtrack */
271 size_t parenIndex; /* start index of saved paren contents */
272 size_t parenCount; /* # of saved paren contents */
273 size_t saveStateStackTop; /* number of parent states */
274 /* saved parent states follow */
275 /* saved paren contents follow */
276 } REBackTrackData;
278 #define INITIAL_STATESTACK 100
279 #define INITIAL_BACKTRACK 8000
281 typedef struct REGlobalData {
282 script_ctx_t *cx;
283 JSRegExp *regexp; /* the RE in execution */
284 BOOL ok; /* runtime error (out_of_memory only?) */
285 size_t start; /* offset to start at */
286 ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
287 const WCHAR *cpbegin; /* text base address */
288 const WCHAR *cpend; /* text limit address */
290 REProgState *stateStack; /* stack of state of current parents */
291 size_t stateStackTop;
292 size_t stateStackLimit;
294 REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
295 REBackTrackData *backTrackSP;
296 size_t backTrackStackSize;
297 size_t cursz; /* size of current stack entry */
298 size_t backTrackCount; /* how many times we've backtracked */
299 size_t backTrackLimit; /* upper limit on backtrack states */
301 jsheap_t *pool; /* It's faster to use one malloc'd pool
302 than to malloc/free the three items
303 that are allocated from this pool */
304 } REGlobalData;
306 typedef struct RENode RENode;
307 struct RENode {
308 REOp op; /* r.e. op bytecode */
309 RENode *next; /* next in concatenation order */
310 void *kid; /* first operand */
311 union {
312 void *kid2; /* second operand */
313 INT num; /* could be a number */
314 size_t parenIndex; /* or a parenthesis index */
315 struct { /* or a quantifier range */
316 UINT min;
317 UINT max;
318 JSPackedBool greedy;
319 } range;
320 struct { /* or a character class */
321 size_t startIndex;
322 size_t kidlen; /* length of string at kid, in jschars */
323 size_t index; /* index into class list */
324 WORD bmsize; /* bitmap size, based on max char code */
325 JSPackedBool sense;
326 } ucclass;
327 struct { /* or a literal sequence */
328 WCHAR chr; /* of one character */
329 size_t length; /* or many (via the kid) */
330 } flat;
331 struct {
332 RENode *kid2; /* second operand from ALT */
333 WCHAR ch1; /* match char for ALTPREREQ */
334 WCHAR ch2; /* ditto, or class index for ALTPREREQ2 */
335 } altprereq;
336 } u;
339 #define CLASS_CACHE_SIZE 4
341 typedef struct CompilerState {
342 script_ctx_t *context;
343 const WCHAR *cpbegin;
344 const WCHAR *cpend;
345 const WCHAR *cp;
346 size_t parenCount;
347 size_t classCount; /* number of [] encountered */
348 size_t treeDepth; /* maximum depth of parse tree */
349 size_t progLength; /* estimated bytecode length */
350 RENode *result;
351 size_t classBitmapsMem; /* memory to hold all class bitmaps */
352 struct {
353 const WCHAR *start; /* small cache of class strings */
354 size_t length; /* since they're often the same */
355 size_t index;
356 } classCache[CLASS_CACHE_SIZE];
357 WORD flags;
358 } CompilerState;
360 typedef struct EmitStateStackEntry {
361 jsbytecode *altHead; /* start of REOP_ALT* opcode */
362 jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */
363 jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
364 jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */
365 RENode *continueNode; /* original REOP_ALT* node being stacked */
366 jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */
367 JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to
368 avoid 16-bit unsigned offset overflow */
369 } EmitStateStackEntry;
372 * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
373 * the getters and setters take the pc of the offset, not of the opcode before
374 * the offset.
376 #define ARG_LEN 2
377 #define GET_ARG(pc) ((WORD)(((pc)[0] << 8) | (pc)[1]))
378 #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
379 (pc)[1] = (jsbytecode) (arg))
381 #define OFFSET_LEN ARG_LEN
382 #define OFFSET_MAX ((1 << (ARG_LEN * 8)) - 1)
383 #define GET_OFFSET(pc) GET_ARG(pc)
385 static BOOL ParseRegExp(CompilerState*);
388 * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
389 * For sanity, we limit it to 2^24 bytes.
391 #define TREE_DEPTH_MAX ((1 << 24) / sizeof(EmitStateStackEntry))
394 * The maximum memory that can be allocated for class bitmaps.
395 * For sanity, we limit it to 2^24 bytes.
397 #define CLASS_BITMAPS_MEM_LIMIT (1 << 24)
400 * Functions to get size and write/read bytecode that represent small indexes
401 * compactly.
402 * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
403 * indicates that the following byte brings more bits to the index. Otherwise
404 * this is the last byte in the index bytecode representing highest index bits.
406 static size_t
407 GetCompactIndexWidth(size_t index)
409 size_t width;
411 for (width = 1; (index >>= 7) != 0; ++width) { }
412 return width;
415 static inline jsbytecode *
416 WriteCompactIndex(jsbytecode *pc, size_t index)
418 size_t next;
420 while ((next = index >> 7) != 0) {
421 *pc++ = (jsbytecode)(index | 0x80);
422 index = next;
424 *pc++ = (jsbytecode)index;
425 return pc;
428 static inline jsbytecode *
429 ReadCompactIndex(jsbytecode *pc, size_t *result)
431 size_t nextByte;
433 nextByte = *pc++;
434 if ((nextByte & 0x80) == 0) {
436 * Short-circuit the most common case when compact index <= 127.
438 *result = nextByte;
439 } else {
440 size_t shift = 7;
441 *result = 0x7F & nextByte;
442 do {
443 nextByte = *pc++;
444 *result |= (nextByte & 0x7F) << shift;
445 shift += 7;
446 } while ((nextByte & 0x80) != 0);
448 return pc;
451 /* Construct and initialize an RENode, returning NULL for out-of-memory */
452 static RENode *
453 NewRENode(CompilerState *state, REOp op)
455 RENode *ren;
457 ren = jsheap_alloc(&state->context->tmp_heap, sizeof(*ren));
458 if (!ren) {
459 /* js_ReportOutOfScriptQuota(cx); */
460 return NULL;
462 ren->op = op;
463 ren->next = NULL;
464 ren->kid = NULL;
465 return ren;
469 * Validates and converts hex ascii value.
471 static BOOL
472 isASCIIHexDigit(WCHAR c, UINT *digit)
474 UINT cv = c;
476 if (cv < '0')
477 return FALSE;
478 if (cv <= '9') {
479 *digit = cv - '0';
480 return TRUE;
482 cv |= 0x20;
483 if (cv >= 'a' && cv <= 'f') {
484 *digit = cv - 'a' + 10;
485 return TRUE;
487 return FALSE;
490 typedef struct {
491 REOp op;
492 const WCHAR *errPos;
493 size_t parenIndex;
494 } REOpData;
496 #define JUMP_OFFSET_HI(off) ((jsbytecode)((off) >> 8))
497 #define JUMP_OFFSET_LO(off) ((jsbytecode)(off))
499 static BOOL
500 SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
502 ptrdiff_t offset = target - jump;
504 /* Check that target really points forward. */
505 assert(offset >= 2);
506 if ((size_t)offset > OFFSET_MAX)
507 return FALSE;
509 jump[0] = JUMP_OFFSET_HI(offset);
510 jump[1] = JUMP_OFFSET_LO(offset);
511 return TRUE;
515 * Generate bytecode for the tree rooted at t using an explicit stack instead
516 * of recursion.
518 static jsbytecode *
519 EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
520 jsbytecode *pc, RENode *t)
522 EmitStateStackEntry *emitStateSP, *emitStateStack;
523 RECharSet *charSet;
524 REOp op;
526 if (treeDepth == 0) {
527 emitStateStack = NULL;
528 } else {
529 emitStateStack = heap_alloc(sizeof(EmitStateStackEntry) * treeDepth);
530 if (!emitStateStack)
531 return NULL;
533 emitStateSP = emitStateStack;
534 op = t->op;
535 assert(op < REOP_LIMIT);
537 for (;;) {
538 *pc++ = op;
539 switch (op) {
540 case REOP_EMPTY:
541 --pc;
542 break;
544 case REOP_ALTPREREQ2:
545 case REOP_ALTPREREQ:
546 assert(emitStateSP);
547 emitStateSP->altHead = pc - 1;
548 emitStateSP->endTermFixup = pc;
549 pc += OFFSET_LEN;
550 SET_ARG(pc, t->u.altprereq.ch1);
551 pc += ARG_LEN;
552 SET_ARG(pc, t->u.altprereq.ch2);
553 pc += ARG_LEN;
555 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
556 pc += OFFSET_LEN;
558 emitStateSP->continueNode = t;
559 emitStateSP->continueOp = REOP_JUMP;
560 emitStateSP->jumpToJumpFlag = FALSE;
561 ++emitStateSP;
562 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
563 t = t->kid;
564 op = t->op;
565 assert(op < REOP_LIMIT);
566 continue;
568 case REOP_JUMP:
569 emitStateSP->nextTermFixup = pc; /* offset to following term */
570 pc += OFFSET_LEN;
571 if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
572 goto jump_too_big;
573 emitStateSP->continueOp = REOP_ENDALT;
574 ++emitStateSP;
575 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
576 t = t->u.kid2;
577 op = t->op;
578 assert(op < REOP_LIMIT);
579 continue;
581 case REOP_ENDALT:
583 * If we already patched emitStateSP->nextTermFixup to jump to
584 * a nearer jump, to avoid 16-bit immediate offset overflow, we
585 * are done here.
587 if (emitStateSP->jumpToJumpFlag)
588 break;
591 * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
592 * REOP_ENDALT is executed only on successful match of the last
593 * alternate in a group.
595 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
596 goto jump_too_big;
597 if (t->op != REOP_ALT) {
598 if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
599 goto jump_too_big;
603 * If the program is bigger than the REOP_JUMP offset range, then
604 * we must check for alternates before this one that are part of
605 * the same group, and fix up their jump offsets to target jumps
606 * close enough to fit in a 16-bit unsigned offset immediate.
608 if ((size_t)(pc - re->program) > OFFSET_MAX &&
609 emitStateSP > emitStateStack) {
610 EmitStateStackEntry *esp, *esp2;
611 jsbytecode *alt, *jump;
612 ptrdiff_t span, header;
614 esp2 = emitStateSP;
615 alt = esp2->altHead;
616 for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
617 if (esp->continueOp == REOP_ENDALT &&
618 !esp->jumpToJumpFlag &&
619 esp->nextTermFixup + OFFSET_LEN == alt &&
620 (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
621 ? esp->endTermFixup
622 : esp->nextTermFixup)) > OFFSET_MAX) {
623 alt = esp->altHead;
624 jump = esp->nextTermFixup;
627 * The span must be 1 less than the distance from
628 * jump offset to jump offset, so we actually jump
629 * to a REOP_JUMP bytecode, not to its offset!
631 for (;;) {
632 assert(jump < esp2->nextTermFixup);
633 span = esp2->nextTermFixup - jump - 1;
634 if ((size_t)span <= OFFSET_MAX)
635 break;
636 do {
637 if (--esp2 == esp)
638 goto jump_too_big;
639 } while (esp2->continueOp != REOP_ENDALT);
642 jump[0] = JUMP_OFFSET_HI(span);
643 jump[1] = JUMP_OFFSET_LO(span);
645 if (esp->continueNode->op != REOP_ALT) {
647 * We must patch the offset at esp->endTermFixup
648 * as well, for the REOP_ALTPREREQ{,2} opcodes.
649 * If we're unlucky and endTermFixup is more than
650 * OFFSET_MAX bytes from its target, we cheat by
651 * jumping 6 bytes to the jump whose offset is at
652 * esp->nextTermFixup, which has the same target.
654 jump = esp->endTermFixup;
655 header = esp->nextTermFixup - jump;
656 span += header;
657 if ((size_t)span > OFFSET_MAX)
658 span = header;
660 jump[0] = JUMP_OFFSET_HI(span);
661 jump[1] = JUMP_OFFSET_LO(span);
664 esp->jumpToJumpFlag = TRUE;
668 break;
670 case REOP_ALT:
671 assert(emitStateSP);
672 emitStateSP->altHead = pc - 1;
673 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
674 pc += OFFSET_LEN;
675 emitStateSP->continueNode = t;
676 emitStateSP->continueOp = REOP_JUMP;
677 emitStateSP->jumpToJumpFlag = FALSE;
678 ++emitStateSP;
679 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
680 t = t->kid;
681 op = t->op;
682 assert(op < REOP_LIMIT);
683 continue;
685 case REOP_FLAT:
687 * Coalesce FLATs if possible and if it would not increase bytecode
688 * beyond preallocated limit. The latter happens only when bytecode
689 * size for coalesced string with offset p and length 2 exceeds 6
690 * bytes preallocated for 2 single char nodes, i.e. when
691 * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
692 * GetCompactIndexWidth(p) > 4.
693 * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
694 * nodes strictly decreases bytecode size, the check has to be
695 * done only for the first coalescing.
697 if (t->kid &&
698 GetCompactIndexWidth((WCHAR*)t->kid - state->cpbegin) <= 4)
700 while (t->next &&
701 t->next->op == REOP_FLAT &&
702 (WCHAR*)t->kid + t->u.flat.length ==
703 t->next->kid) {
704 t->u.flat.length += t->next->u.flat.length;
705 t->next = t->next->next;
708 if (t->kid && t->u.flat.length > 1) {
709 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT;
710 pc = WriteCompactIndex(pc, (WCHAR*)t->kid - state->cpbegin);
711 pc = WriteCompactIndex(pc, t->u.flat.length);
712 } else if (t->u.flat.chr < 256) {
713 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
714 *pc++ = (jsbytecode) t->u.flat.chr;
715 } else {
716 pc[-1] = (state->flags & JSREG_FOLD)
717 ? REOP_UCFLAT1i
718 : REOP_UCFLAT1;
719 SET_ARG(pc, t->u.flat.chr);
720 pc += ARG_LEN;
722 break;
724 case REOP_LPAREN:
725 assert(emitStateSP);
726 pc = WriteCompactIndex(pc, t->u.parenIndex);
727 emitStateSP->continueNode = t;
728 emitStateSP->continueOp = REOP_RPAREN;
729 ++emitStateSP;
730 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
731 t = t->kid;
732 op = t->op;
733 continue;
735 case REOP_RPAREN:
736 pc = WriteCompactIndex(pc, t->u.parenIndex);
737 break;
739 case REOP_BACKREF:
740 pc = WriteCompactIndex(pc, t->u.parenIndex);
741 break;
743 case REOP_ASSERT:
744 assert(emitStateSP);
745 emitStateSP->nextTermFixup = pc;
746 pc += OFFSET_LEN;
747 emitStateSP->continueNode = t;
748 emitStateSP->continueOp = REOP_ASSERTTEST;
749 ++emitStateSP;
750 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
751 t = t->kid;
752 op = t->op;
753 continue;
755 case REOP_ASSERTTEST:
756 case REOP_ASSERTNOTTEST:
757 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
758 goto jump_too_big;
759 break;
761 case REOP_ASSERT_NOT:
762 assert(emitStateSP);
763 emitStateSP->nextTermFixup = pc;
764 pc += OFFSET_LEN;
765 emitStateSP->continueNode = t;
766 emitStateSP->continueOp = REOP_ASSERTNOTTEST;
767 ++emitStateSP;
768 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
769 t = t->kid;
770 op = t->op;
771 continue;
773 case REOP_QUANT:
774 assert(emitStateSP);
775 if (t->u.range.min == 0 && t->u.range.max == (UINT)-1) {
776 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
777 } else if (t->u.range.min == 0 && t->u.range.max == 1) {
778 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
779 } else if (t->u.range.min == 1 && t->u.range.max == (UINT) -1) {
780 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
781 } else {
782 if (!t->u.range.greedy)
783 pc[-1] = REOP_MINIMALQUANT;
784 pc = WriteCompactIndex(pc, t->u.range.min);
786 * Write max + 1 to avoid using size_t(max) + 1 bytes
787 * for (UINT)-1 sentinel.
789 pc = WriteCompactIndex(pc, t->u.range.max + 1);
791 emitStateSP->nextTermFixup = pc;
792 pc += OFFSET_LEN;
793 emitStateSP->continueNode = t;
794 emitStateSP->continueOp = REOP_ENDCHILD;
795 ++emitStateSP;
796 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
797 t = t->kid;
798 op = t->op;
799 continue;
801 case REOP_ENDCHILD:
802 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
803 goto jump_too_big;
804 break;
806 case REOP_CLASS:
807 if (!t->u.ucclass.sense)
808 pc[-1] = REOP_NCLASS;
809 pc = WriteCompactIndex(pc, t->u.ucclass.index);
810 charSet = &re->classList[t->u.ucclass.index];
811 charSet->converted = FALSE;
812 charSet->length = t->u.ucclass.bmsize;
813 charSet->u.src.startIndex = t->u.ucclass.startIndex;
814 charSet->u.src.length = t->u.ucclass.kidlen;
815 charSet->sense = t->u.ucclass.sense;
816 break;
818 default:
819 break;
822 t = t->next;
823 if (t) {
824 op = t->op;
825 } else {
826 if (emitStateSP == emitStateStack)
827 break;
828 --emitStateSP;
829 t = emitStateSP->continueNode;
830 op = (REOp) emitStateSP->continueOp;
834 cleanup:
835 heap_free(emitStateStack);
836 return pc;
838 jump_too_big:
839 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
840 pc = NULL;
841 goto cleanup;
845 * Process the op against the two top operands, reducing them to a single
846 * operand in the penultimate slot. Update progLength and treeDepth.
848 static BOOL
849 ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
850 INT operandSP)
852 RENode *result;
854 switch (opData->op) {
855 case REOP_ALT:
856 result = NewRENode(state, REOP_ALT);
857 if (!result)
858 return FALSE;
859 result->kid = operandStack[operandSP - 2];
860 result->u.kid2 = operandStack[operandSP - 1];
861 operandStack[operandSP - 2] = result;
863 if (state->treeDepth == TREE_DEPTH_MAX) {
864 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
865 return FALSE;
867 ++state->treeDepth;
870 * Look at both alternates to see if there's a FLAT or a CLASS at
871 * the start of each. If so, use a prerequisite match.
873 if (((RENode *) result->kid)->op == REOP_FLAT &&
874 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
875 (state->flags & JSREG_FOLD) == 0) {
876 result->op = REOP_ALTPREREQ;
877 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
878 result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
879 /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
880 JUMP, <end> ... ENDALT */
881 state->progLength += 13;
883 else
884 if (((RENode *) result->kid)->op == REOP_CLASS &&
885 ((RENode *) result->kid)->u.ucclass.index < 256 &&
886 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
887 (state->flags & JSREG_FOLD) == 0) {
888 result->op = REOP_ALTPREREQ2;
889 result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
890 result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
891 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
892 JUMP, <end> ... ENDALT */
893 state->progLength += 13;
895 else
896 if (((RENode *) result->kid)->op == REOP_FLAT &&
897 ((RENode *) result->u.kid2)->op == REOP_CLASS &&
898 ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
899 (state->flags & JSREG_FOLD) == 0) {
900 result->op = REOP_ALTPREREQ2;
901 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
902 result->u.altprereq.ch2 =
903 ((RENode *) result->u.kid2)->u.ucclass.index;
904 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
905 JUMP, <end> ... ENDALT */
906 state->progLength += 13;
908 else {
909 /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
910 state->progLength += 7;
912 break;
914 case REOP_CONCAT:
915 result = operandStack[operandSP - 2];
916 while (result->next)
917 result = result->next;
918 result->next = operandStack[operandSP - 1];
919 break;
921 case REOP_ASSERT:
922 case REOP_ASSERT_NOT:
923 case REOP_LPARENNON:
924 case REOP_LPAREN:
925 /* These should have been processed by a close paren. */
926 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
927 opData->errPos);
928 return FALSE;
930 default:;
932 return TRUE;
936 * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
937 * its being on the stack, and to propagate errors to its callers.
939 #define JSREG_FIND_PAREN_COUNT 0x8000
940 #define JSREG_FIND_PAREN_ERROR 0x4000
943 * Magic return value from FindParenCount and GetDecimalValue, to indicate
944 * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
945 * its findMax parameter is non-null.
947 #define OVERFLOW_VALUE ((UINT)-1)
949 static UINT
950 FindParenCount(CompilerState *state)
952 CompilerState temp;
953 int i;
955 if (state->flags & JSREG_FIND_PAREN_COUNT)
956 return OVERFLOW_VALUE;
959 * Copy state into temp, flag it so we never report an invalid backref,
960 * and reset its members to parse the entire regexp. This is obviously
961 * suboptimal, but GetDecimalValue calls us only if a backref appears to
962 * refer to a forward parenthetical, which is rare.
964 temp = *state;
965 temp.flags |= JSREG_FIND_PAREN_COUNT;
966 temp.cp = temp.cpbegin;
967 temp.parenCount = 0;
968 temp.classCount = 0;
969 temp.progLength = 0;
970 temp.treeDepth = 0;
971 temp.classBitmapsMem = 0;
972 for (i = 0; i < CLASS_CACHE_SIZE; i++)
973 temp.classCache[i].start = NULL;
975 if (!ParseRegExp(&temp)) {
976 state->flags |= JSREG_FIND_PAREN_ERROR;
977 return OVERFLOW_VALUE;
979 return temp.parenCount;
983 * Extract and return a decimal value at state->cp. The initial character c
984 * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
985 * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
986 * state->flags to discover whether an error occurred under findMax.
988 static UINT
989 GetDecimalValue(WCHAR c, UINT max, UINT (*findMax)(CompilerState *state),
990 CompilerState *state)
992 UINT value = JS7_UNDEC(c);
993 BOOL overflow = (value > max && (!findMax || value > findMax(state)));
995 /* The following restriction allows simpler overflow checks. */
996 assert(max <= ((UINT)-1 - 9) / 10);
997 while (state->cp < state->cpend) {
998 c = *state->cp;
999 if (!JS7_ISDEC(c))
1000 break;
1001 value = 10 * value + JS7_UNDEC(c);
1002 if (!overflow && value > max && (!findMax || value > findMax(state)))
1003 overflow = TRUE;
1004 ++state->cp;
1006 return overflow ? OVERFLOW_VALUE : value;
1010 * Calculate the total size of the bitmap required for a class expression.
1012 static BOOL
1013 CalculateBitmapSize(CompilerState *state, RENode *target, const WCHAR *src,
1014 const WCHAR *end)
1016 UINT max = 0;
1017 BOOL inRange = FALSE;
1018 WCHAR c, rangeStart = 0;
1019 UINT n, digit, nDigits, i;
1021 target->u.ucclass.bmsize = 0;
1022 target->u.ucclass.sense = TRUE;
1024 if (src == end)
1025 return TRUE;
1027 if (*src == '^') {
1028 ++src;
1029 target->u.ucclass.sense = FALSE;
1032 while (src != end) {
1033 BOOL canStartRange = TRUE;
1034 UINT localMax = 0;
1036 switch (*src) {
1037 case '\\':
1038 ++src;
1039 c = *src++;
1040 switch (c) {
1041 case 'b':
1042 localMax = 0x8;
1043 break;
1044 case 'f':
1045 localMax = 0xC;
1046 break;
1047 case 'n':
1048 localMax = 0xA;
1049 break;
1050 case 'r':
1051 localMax = 0xD;
1052 break;
1053 case 't':
1054 localMax = 0x9;
1055 break;
1056 case 'v':
1057 localMax = 0xB;
1058 break;
1059 case 'c':
1060 if (src < end && RE_IS_LETTER(*src)) {
1061 localMax = (UINT) (*src++) & 0x1F;
1062 } else {
1063 --src;
1064 localMax = '\\';
1066 break;
1067 case 'x':
1068 nDigits = 2;
1069 goto lexHex;
1070 case 'u':
1071 nDigits = 4;
1072 lexHex:
1073 n = 0;
1074 for (i = 0; (i < nDigits) && (src < end); i++) {
1075 c = *src++;
1076 if (!isASCIIHexDigit(c, &digit)) {
1078 * Back off to accepting the original
1079 *'\' as a literal.
1081 src -= i + 1;
1082 n = '\\';
1083 break;
1085 n = (n << 4) | digit;
1087 localMax = n;
1088 break;
1089 case 'd':
1090 canStartRange = FALSE;
1091 if (inRange) {
1092 JS_ReportErrorNumber(state->context,
1093 js_GetErrorMessage, NULL,
1094 JSMSG_BAD_CLASS_RANGE);
1095 return FALSE;
1097 localMax = '9';
1098 break;
1099 case 'D':
1100 case 's':
1101 case 'S':
1102 case 'w':
1103 case 'W':
1104 canStartRange = FALSE;
1105 if (inRange) {
1106 JS_ReportErrorNumber(state->context,
1107 js_GetErrorMessage, NULL,
1108 JSMSG_BAD_CLASS_RANGE);
1109 return FALSE;
1111 max = 65535;
1114 * If this is the start of a range, ensure that it's less than
1115 * the end.
1117 localMax = 0;
1118 break;
1119 case '0':
1120 case '1':
1121 case '2':
1122 case '3':
1123 case '4':
1124 case '5':
1125 case '6':
1126 case '7':
1128 * This is a non-ECMA extension - decimal escapes (in this
1129 * case, octal!) are supposed to be an error inside class
1130 * ranges, but supported here for backwards compatibility.
1133 n = JS7_UNDEC(c);
1134 c = *src;
1135 if ('0' <= c && c <= '7') {
1136 src++;
1137 n = 8 * n + JS7_UNDEC(c);
1138 c = *src;
1139 if ('0' <= c && c <= '7') {
1140 src++;
1141 i = 8 * n + JS7_UNDEC(c);
1142 if (i <= 0377)
1143 n = i;
1144 else
1145 src--;
1148 localMax = n;
1149 break;
1151 default:
1152 localMax = c;
1153 break;
1155 break;
1156 default:
1157 localMax = *src++;
1158 break;
1161 if (inRange) {
1162 /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1163 if (rangeStart > localMax) {
1164 JS_ReportErrorNumber(state->context,
1165 js_GetErrorMessage, NULL,
1166 JSMSG_BAD_CLASS_RANGE);
1167 return FALSE;
1169 inRange = FALSE;
1170 } else {
1171 if (canStartRange && src < end - 1) {
1172 if (*src == '-') {
1173 ++src;
1174 inRange = TRUE;
1175 rangeStart = (WCHAR)localMax;
1176 continue;
1179 if (state->flags & JSREG_FOLD)
1180 rangeStart = localMax; /* one run of the uc/dc loop below */
1183 if (state->flags & JSREG_FOLD) {
1184 WCHAR maxch = localMax;
1186 for (i = rangeStart; i <= localMax; i++) {
1187 WCHAR uch, dch;
1189 uch = toupperW(i);
1190 dch = tolowerW(i);
1191 if(maxch < uch)
1192 maxch = uch;
1193 if(maxch < dch)
1194 maxch = dch;
1196 localMax = maxch;
1199 if (localMax > max)
1200 max = localMax;
1202 target->u.ucclass.bmsize = max;
1203 return TRUE;
1206 static INT
1207 ParseMinMaxQuantifier(CompilerState *state, BOOL ignoreValues)
1209 UINT min, max;
1210 WCHAR c;
1211 const WCHAR *errp = state->cp++;
1213 c = *state->cp;
1214 if (JS7_ISDEC(c)) {
1215 ++state->cp;
1216 min = GetDecimalValue(c, 0xFFFF, NULL, state);
1217 c = *state->cp;
1219 if (!ignoreValues && min == OVERFLOW_VALUE)
1220 return JSMSG_MIN_TOO_BIG;
1222 if (c == ',') {
1223 c = *++state->cp;
1224 if (JS7_ISDEC(c)) {
1225 ++state->cp;
1226 max = GetDecimalValue(c, 0xFFFF, NULL, state);
1227 c = *state->cp;
1228 if (!ignoreValues && max == OVERFLOW_VALUE)
1229 return JSMSG_MAX_TOO_BIG;
1230 if (!ignoreValues && min > max)
1231 return JSMSG_OUT_OF_ORDER;
1232 } else {
1233 max = (UINT)-1;
1235 } else {
1236 max = min;
1238 if (c == '}') {
1239 state->result = NewRENode(state, REOP_QUANT);
1240 if (!state->result)
1241 return JSMSG_OUT_OF_MEMORY;
1242 state->result->u.range.min = min;
1243 state->result->u.range.max = max;
1245 * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1246 * where <max> is written as compact(max+1) to make
1247 * (UINT)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1249 state->progLength += (1 + GetCompactIndexWidth(min)
1250 + GetCompactIndexWidth(max + 1)
1251 +3);
1252 return 0;
1256 state->cp = errp;
1257 return -1;
1260 static BOOL
1261 ParseQuantifier(CompilerState *state)
1263 RENode *term;
1264 term = state->result;
1265 if (state->cp < state->cpend) {
1266 switch (*state->cp) {
1267 case '+':
1268 state->result = NewRENode(state, REOP_QUANT);
1269 if (!state->result)
1270 return FALSE;
1271 state->result->u.range.min = 1;
1272 state->result->u.range.max = (UINT)-1;
1273 /* <PLUS>, <next> ... <ENDCHILD> */
1274 state->progLength += 4;
1275 goto quantifier;
1276 case '*':
1277 state->result = NewRENode(state, REOP_QUANT);
1278 if (!state->result)
1279 return FALSE;
1280 state->result->u.range.min = 0;
1281 state->result->u.range.max = (UINT)-1;
1282 /* <STAR>, <next> ... <ENDCHILD> */
1283 state->progLength += 4;
1284 goto quantifier;
1285 case '?':
1286 state->result = NewRENode(state, REOP_QUANT);
1287 if (!state->result)
1288 return FALSE;
1289 state->result->u.range.min = 0;
1290 state->result->u.range.max = 1;
1291 /* <OPT>, <next> ... <ENDCHILD> */
1292 state->progLength += 4;
1293 goto quantifier;
1294 case '{': /* balance '}' */
1296 INT err;
1298 err = ParseMinMaxQuantifier(state, FALSE);
1299 if (err == 0)
1300 goto quantifier;
1301 if (err == -1)
1302 return TRUE;
1304 ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
1305 return FALSE;
1307 default:;
1310 return TRUE;
1312 quantifier:
1313 if (state->treeDepth == TREE_DEPTH_MAX) {
1314 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1315 return FALSE;
1318 ++state->treeDepth;
1319 ++state->cp;
1320 state->result->kid = term;
1321 if (state->cp < state->cpend && *state->cp == '?') {
1322 ++state->cp;
1323 state->result->u.range.greedy = FALSE;
1324 } else {
1325 state->result->u.range.greedy = TRUE;
1327 return TRUE;
1331 * item: assertion An item is either an assertion or
1332 * quantatom a quantified atom.
1334 * assertion: '^' Assertions match beginning of string
1335 * (or line if the class static property
1336 * RegExp.multiline is true).
1337 * '$' End of string (or line if the class
1338 * static property RegExp.multiline is
1339 * true).
1340 * '\b' Word boundary (between \w and \W).
1341 * '\B' Word non-boundary.
1343 * quantatom: atom An unquantified atom.
1344 * quantatom '{' n ',' m '}'
1345 * Atom must occur between n and m times.
1346 * quantatom '{' n ',' '}' Atom must occur at least n times.
1347 * quantatom '{' n '}' Atom must occur exactly n times.
1348 * quantatom '*' Zero or more times (same as {0,}).
1349 * quantatom '+' One or more times (same as {1,}).
1350 * quantatom '?' Zero or one time (same as {0,1}).
1352 * any of which can be optionally followed by '?' for ungreedy
1354 * atom: '(' regexp ')' A parenthesized regexp (what matched
1355 * can be addressed using a backreference,
1356 * see '\' n below).
1357 * '.' Matches any char except '\n'.
1358 * '[' classlist ']' A character class.
1359 * '[' '^' classlist ']' A negated character class.
1360 * '\f' Form Feed.
1361 * '\n' Newline (Line Feed).
1362 * '\r' Carriage Return.
1363 * '\t' Horizontal Tab.
1364 * '\v' Vertical Tab.
1365 * '\d' A digit (same as [0-9]).
1366 * '\D' A non-digit.
1367 * '\w' A word character, [0-9a-z_A-Z].
1368 * '\W' A non-word character.
1369 * '\s' A whitespace character, [ \b\f\n\r\t\v].
1370 * '\S' A non-whitespace character.
1371 * '\' n A backreference to the nth (n decimal
1372 * and positive) parenthesized expression.
1373 * '\' octal An octal escape sequence (octal must be
1374 * two or three digits long, unless it is
1375 * 0 for the null character).
1376 * '\x' hex A hex escape (hex must be two digits).
1377 * '\u' unicode A unicode escape (must be four digits).
1378 * '\c' ctrl A control character, ctrl is a letter.
1379 * '\' literalatomchar Any character except one of the above
1380 * that follow '\' in an atom.
1381 * otheratomchar Any character not first among the other
1382 * atom right-hand sides.
1384 static BOOL
1385 ParseTerm(CompilerState *state)
1387 WCHAR c = *state->cp++;
1388 UINT nDigits;
1389 UINT num, tmp, n, i;
1390 const WCHAR *termStart;
1392 switch (c) {
1393 /* assertions and atoms */
1394 case '^':
1395 state->result = NewRENode(state, REOP_BOL);
1396 if (!state->result)
1397 return FALSE;
1398 state->progLength++;
1399 return TRUE;
1400 case '$':
1401 state->result = NewRENode(state, REOP_EOL);
1402 if (!state->result)
1403 return FALSE;
1404 state->progLength++;
1405 return TRUE;
1406 case '\\':
1407 if (state->cp >= state->cpend) {
1408 /* a trailing '\' is an error */
1409 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
1410 return FALSE;
1412 c = *state->cp++;
1413 switch (c) {
1414 /* assertion escapes */
1415 case 'b' :
1416 state->result = NewRENode(state, REOP_WBDRY);
1417 if (!state->result)
1418 return FALSE;
1419 state->progLength++;
1420 return TRUE;
1421 case 'B':
1422 state->result = NewRENode(state, REOP_WNONBDRY);
1423 if (!state->result)
1424 return FALSE;
1425 state->progLength++;
1426 return TRUE;
1427 /* Decimal escape */
1428 case '0':
1429 /* Give a strict warning. See also the note below. */
1430 WARN("non-octal digit in an escape sequence that doesn't match a back-reference\n");
1431 doOctal:
1432 num = 0;
1433 while (state->cp < state->cpend) {
1434 c = *state->cp;
1435 if (c < '0' || '7' < c)
1436 break;
1437 state->cp++;
1438 tmp = 8 * num + (UINT)JS7_UNDEC(c);
1439 if (tmp > 0377)
1440 break;
1441 num = tmp;
1443 c = (WCHAR)num;
1444 doFlat:
1445 state->result = NewRENode(state, REOP_FLAT);
1446 if (!state->result)
1447 return FALSE;
1448 state->result->u.flat.chr = c;
1449 state->result->u.flat.length = 1;
1450 state->progLength += 3;
1451 break;
1452 case '1':
1453 case '2':
1454 case '3':
1455 case '4':
1456 case '5':
1457 case '6':
1458 case '7':
1459 case '8':
1460 case '9':
1461 termStart = state->cp - 1;
1462 num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1463 if (state->flags & JSREG_FIND_PAREN_ERROR)
1464 return FALSE;
1465 if (num == OVERFLOW_VALUE) {
1466 /* Give a strict mode warning. */
1467 WARN("back-reference exceeds number of capturing parentheses\n");
1470 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1471 * error here. However, for compatibility with IE, we treat the
1472 * whole backref as flat if the first character in it is not a
1473 * valid octal character, and as an octal escape otherwise.
1475 state->cp = termStart;
1476 if (c >= '8') {
1477 /* Treat this as flat. termStart - 1 is the \. */
1478 c = '\\';
1479 goto asFlat;
1482 /* Treat this as an octal escape. */
1483 goto doOctal;
1485 assert(1 <= num && num <= 0x10000);
1486 state->result = NewRENode(state, REOP_BACKREF);
1487 if (!state->result)
1488 return FALSE;
1489 state->result->u.parenIndex = num - 1;
1490 state->progLength
1491 += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1492 break;
1493 /* Control escape */
1494 case 'f':
1495 c = 0xC;
1496 goto doFlat;
1497 case 'n':
1498 c = 0xA;
1499 goto doFlat;
1500 case 'r':
1501 c = 0xD;
1502 goto doFlat;
1503 case 't':
1504 c = 0x9;
1505 goto doFlat;
1506 case 'v':
1507 c = 0xB;
1508 goto doFlat;
1509 /* Control letter */
1510 case 'c':
1511 if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1512 c = (WCHAR) (*state->cp++ & 0x1F);
1513 } else {
1514 /* back off to accepting the original '\' as a literal */
1515 --state->cp;
1516 c = '\\';
1518 goto doFlat;
1519 /* HexEscapeSequence */
1520 case 'x':
1521 nDigits = 2;
1522 goto lexHex;
1523 /* UnicodeEscapeSequence */
1524 case 'u':
1525 nDigits = 4;
1526 lexHex:
1527 n = 0;
1528 for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1529 UINT digit;
1530 c = *state->cp++;
1531 if (!isASCIIHexDigit(c, &digit)) {
1533 * Back off to accepting the original 'u' or 'x' as a
1534 * literal.
1536 state->cp -= i + 2;
1537 n = *state->cp++;
1538 break;
1540 n = (n << 4) | digit;
1542 c = (WCHAR) n;
1543 goto doFlat;
1544 /* Character class escapes */
1545 case 'd':
1546 state->result = NewRENode(state, REOP_DIGIT);
1547 doSimple:
1548 if (!state->result)
1549 return FALSE;
1550 state->progLength++;
1551 break;
1552 case 'D':
1553 state->result = NewRENode(state, REOP_NONDIGIT);
1554 goto doSimple;
1555 case 's':
1556 state->result = NewRENode(state, REOP_SPACE);
1557 goto doSimple;
1558 case 'S':
1559 state->result = NewRENode(state, REOP_NONSPACE);
1560 goto doSimple;
1561 case 'w':
1562 state->result = NewRENode(state, REOP_ALNUM);
1563 goto doSimple;
1564 case 'W':
1565 state->result = NewRENode(state, REOP_NONALNUM);
1566 goto doSimple;
1567 /* IdentityEscape */
1568 default:
1569 state->result = NewRENode(state, REOP_FLAT);
1570 if (!state->result)
1571 return FALSE;
1572 state->result->u.flat.chr = c;
1573 state->result->u.flat.length = 1;
1574 state->result->kid = (void *) (state->cp - 1);
1575 state->progLength += 3;
1576 break;
1578 break;
1579 case '[':
1580 state->result = NewRENode(state, REOP_CLASS);
1581 if (!state->result)
1582 return FALSE;
1583 termStart = state->cp;
1584 state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1585 for (;;) {
1586 if (state->cp == state->cpend) {
1587 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1588 JSMSG_UNTERM_CLASS, termStart);
1590 return FALSE;
1592 if (*state->cp == '\\') {
1593 state->cp++;
1594 if (state->cp != state->cpend)
1595 state->cp++;
1596 continue;
1598 if (*state->cp == ']') {
1599 state->result->u.ucclass.kidlen = state->cp - termStart;
1600 break;
1602 state->cp++;
1604 for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1605 if (!state->classCache[i].start) {
1606 state->classCache[i].start = termStart;
1607 state->classCache[i].length = state->result->u.ucclass.kidlen;
1608 state->classCache[i].index = state->classCount;
1609 break;
1611 if (state->classCache[i].length ==
1612 state->result->u.ucclass.kidlen) {
1613 for (n = 0; ; n++) {
1614 if (n == state->classCache[i].length) {
1615 state->result->u.ucclass.index
1616 = state->classCache[i].index;
1617 goto claim;
1619 if (state->classCache[i].start[n] != termStart[n])
1620 break;
1624 state->result->u.ucclass.index = state->classCount++;
1626 claim:
1628 * Call CalculateBitmapSize now as we want any errors it finds
1629 * to be reported during the parse phase, not at execution.
1631 if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1632 return FALSE;
1634 * Update classBitmapsMem with number of bytes to hold bmsize bits,
1635 * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1636 * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1638 n = (state->result->u.ucclass.bmsize >> 3) + 1;
1639 if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1640 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1641 return FALSE;
1643 state->classBitmapsMem += n;
1644 /* CLASS, <index> */
1645 state->progLength
1646 += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1647 break;
1649 case '.':
1650 state->result = NewRENode(state, REOP_DOT);
1651 goto doSimple;
1653 case '{':
1655 const WCHAR *errp = state->cp--;
1656 INT err;
1658 err = ParseMinMaxQuantifier(state, TRUE);
1659 state->cp = errp;
1661 if (err < 0)
1662 goto asFlat;
1664 /* FALL THROUGH */
1666 case '*':
1667 case '+':
1668 case '?':
1669 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1670 JSMSG_BAD_QUANTIFIER, state->cp - 1);
1671 return FALSE;
1672 default:
1673 asFlat:
1674 state->result = NewRENode(state, REOP_FLAT);
1675 if (!state->result)
1676 return FALSE;
1677 state->result->u.flat.chr = c;
1678 state->result->u.flat.length = 1;
1679 state->result->kid = (void *) (state->cp - 1);
1680 state->progLength += 3;
1681 break;
1683 return ParseQuantifier(state);
1687 * Top-down regular expression grammar, based closely on Perl4.
1689 * regexp: altern A regular expression is one or more
1690 * altern '|' regexp alternatives separated by vertical bar.
1692 #define INITIAL_STACK_SIZE 128
1694 static BOOL
1695 ParseRegExp(CompilerState *state)
1697 size_t parenIndex;
1698 RENode *operand;
1699 REOpData *operatorStack;
1700 RENode **operandStack;
1701 REOp op;
1702 INT i;
1703 BOOL result = FALSE;
1705 INT operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
1706 INT operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
1708 /* Watch out for empty regexp */
1709 if (state->cp == state->cpend) {
1710 state->result = NewRENode(state, REOP_EMPTY);
1711 return (state->result != NULL);
1714 operatorStack = heap_alloc(sizeof(REOpData) * operatorStackSize);
1715 if (!operatorStack)
1716 return FALSE;
1718 operandStack = heap_alloc(sizeof(RENode *) * operandStackSize);
1719 if (!operandStack)
1720 goto out;
1722 for (;;) {
1723 parenIndex = state->parenCount;
1724 if (state->cp == state->cpend) {
1726 * If we are at the end of the regexp and we're short one or more
1727 * operands, the regexp must have the form /x|/ or some such, with
1728 * left parentheses making us short more than one operand.
1730 if (operatorSP >= operandSP) {
1731 operand = NewRENode(state, REOP_EMPTY);
1732 if (!operand)
1733 goto out;
1734 goto pushOperand;
1736 } else {
1737 switch (*state->cp) {
1738 case '(':
1739 ++state->cp;
1740 if (state->cp + 1 < state->cpend &&
1741 *state->cp == '?' &&
1742 (state->cp[1] == '=' ||
1743 state->cp[1] == '!' ||
1744 state->cp[1] == ':')) {
1745 switch (state->cp[1]) {
1746 case '=':
1747 op = REOP_ASSERT;
1748 /* ASSERT, <next>, ... ASSERTTEST */
1749 state->progLength += 4;
1750 break;
1751 case '!':
1752 op = REOP_ASSERT_NOT;
1753 /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
1754 state->progLength += 4;
1755 break;
1756 default:
1757 op = REOP_LPARENNON;
1758 break;
1760 state->cp += 2;
1761 } else {
1762 op = REOP_LPAREN;
1763 /* LPAREN, <index>, ... RPAREN, <index> */
1764 state->progLength
1765 += 2 * (1 + GetCompactIndexWidth(parenIndex));
1766 state->parenCount++;
1767 if (state->parenCount == 65535) {
1768 ReportRegExpError(state, JSREPORT_ERROR,
1769 JSMSG_TOO_MANY_PARENS);
1770 goto out;
1773 goto pushOperator;
1775 case ')':
1777 * If there's no stacked open parenthesis, throw syntax error.
1779 for (i = operatorSP - 1; ; i--) {
1780 if (i < 0) {
1781 ReportRegExpError(state, JSREPORT_ERROR,
1782 JSMSG_UNMATCHED_RIGHT_PAREN);
1783 goto out;
1785 if (operatorStack[i].op == REOP_ASSERT ||
1786 operatorStack[i].op == REOP_ASSERT_NOT ||
1787 operatorStack[i].op == REOP_LPARENNON ||
1788 operatorStack[i].op == REOP_LPAREN) {
1789 break;
1792 /* FALL THROUGH */
1794 case '|':
1795 /* Expected an operand before these, so make an empty one */
1796 operand = NewRENode(state, REOP_EMPTY);
1797 if (!operand)
1798 goto out;
1799 goto pushOperand;
1801 default:
1802 if (!ParseTerm(state))
1803 goto out;
1804 operand = state->result;
1805 pushOperand:
1806 if (operandSP == operandStackSize) {
1807 RENode **tmp;
1808 operandStackSize += operandStackSize;
1809 tmp = heap_realloc(operandStack, sizeof(RENode *) * operandStackSize);
1810 if (!tmp)
1811 goto out;
1812 operandStack = tmp;
1814 operandStack[operandSP++] = operand;
1815 break;
1819 /* At the end; process remaining operators. */
1820 restartOperator:
1821 if (state->cp == state->cpend) {
1822 while (operatorSP) {
1823 --operatorSP;
1824 if (!ProcessOp(state, &operatorStack[operatorSP],
1825 operandStack, operandSP))
1826 goto out;
1827 --operandSP;
1829 assert(operandSP == 1);
1830 state->result = operandStack[0];
1831 result = TRUE;
1832 goto out;
1835 switch (*state->cp) {
1836 case '|':
1837 /* Process any stacked 'concat' operators */
1838 ++state->cp;
1839 while (operatorSP &&
1840 operatorStack[operatorSP - 1].op == REOP_CONCAT) {
1841 --operatorSP;
1842 if (!ProcessOp(state, &operatorStack[operatorSP],
1843 operandStack, operandSP)) {
1844 goto out;
1846 --operandSP;
1848 op = REOP_ALT;
1849 goto pushOperator;
1851 case ')':
1853 * If there's no stacked open parenthesis, throw syntax error.
1855 for (i = operatorSP - 1; ; i--) {
1856 if (i < 0) {
1857 ReportRegExpError(state, JSREPORT_ERROR,
1858 JSMSG_UNMATCHED_RIGHT_PAREN);
1859 goto out;
1861 if (operatorStack[i].op == REOP_ASSERT ||
1862 operatorStack[i].op == REOP_ASSERT_NOT ||
1863 operatorStack[i].op == REOP_LPARENNON ||
1864 operatorStack[i].op == REOP_LPAREN) {
1865 break;
1868 ++state->cp;
1870 /* Process everything on the stack until the open parenthesis. */
1871 for (;;) {
1872 assert(operatorSP);
1873 --operatorSP;
1874 switch (operatorStack[operatorSP].op) {
1875 case REOP_ASSERT:
1876 case REOP_ASSERT_NOT:
1877 case REOP_LPAREN:
1878 operand = NewRENode(state, operatorStack[operatorSP].op);
1879 if (!operand)
1880 goto out;
1881 operand->u.parenIndex =
1882 operatorStack[operatorSP].parenIndex;
1883 assert(operandSP);
1884 operand->kid = operandStack[operandSP - 1];
1885 operandStack[operandSP - 1] = operand;
1886 if (state->treeDepth == TREE_DEPTH_MAX) {
1887 ReportRegExpError(state, JSREPORT_ERROR,
1888 JSMSG_REGEXP_TOO_COMPLEX);
1889 goto out;
1891 ++state->treeDepth;
1892 /* FALL THROUGH */
1894 case REOP_LPARENNON:
1895 state->result = operandStack[operandSP - 1];
1896 if (!ParseQuantifier(state))
1897 goto out;
1898 operandStack[operandSP - 1] = state->result;
1899 goto restartOperator;
1900 default:
1901 if (!ProcessOp(state, &operatorStack[operatorSP],
1902 operandStack, operandSP))
1903 goto out;
1904 --operandSP;
1905 break;
1908 break;
1910 case '{':
1912 const WCHAR *errp = state->cp;
1914 if (ParseMinMaxQuantifier(state, TRUE) < 0) {
1916 * This didn't even scan correctly as a quantifier, so we should
1917 * treat it as flat.
1919 op = REOP_CONCAT;
1920 goto pushOperator;
1923 state->cp = errp;
1924 /* FALL THROUGH */
1927 case '+':
1928 case '*':
1929 case '?':
1930 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
1931 state->cp);
1932 result = FALSE;
1933 goto out;
1935 default:
1936 /* Anything else is the start of the next term. */
1937 op = REOP_CONCAT;
1938 pushOperator:
1939 if (operatorSP == operatorStackSize) {
1940 REOpData *tmp;
1941 operatorStackSize += operatorStackSize;
1942 tmp = heap_realloc(operatorStack, sizeof(REOpData) * operatorStackSize);
1943 if (!tmp)
1944 goto out;
1945 operatorStack = tmp;
1947 operatorStack[operatorSP].op = op;
1948 operatorStack[operatorSP].errPos = state->cp;
1949 operatorStack[operatorSP++].parenIndex = parenIndex;
1950 break;
1953 out:
1954 heap_free(operatorStack);
1955 heap_free(operandStack);
1956 return result;
1960 * Save the current state of the match - the position in the input
1961 * text as well as the position in the bytecode. The state of any
1962 * parent expressions is also saved (preceding state).
1963 * Contents of parenCount parentheses from parenIndex are also saved.
1965 static REBackTrackData *
1966 PushBackTrackState(REGlobalData *gData, REOp op,
1967 jsbytecode *target, REMatchState *x, const WCHAR *cp,
1968 size_t parenIndex, size_t parenCount)
1970 size_t i;
1971 REBackTrackData *result =
1972 (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
1974 size_t sz = sizeof(REBackTrackData) +
1975 gData->stateStackTop * sizeof(REProgState) +
1976 parenCount * sizeof(RECapture);
1978 ptrdiff_t btsize = gData->backTrackStackSize;
1979 ptrdiff_t btincr = ((char *)result + sz) -
1980 ((char *)gData->backTrackStack + btsize);
1982 TRACE("\tBT_Push: %lu,%lu\n", (unsigned long) parenIndex, (unsigned long) parenCount);
1984 JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount));
1985 if (btincr > 0) {
1986 ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
1988 JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION);
1989 btincr = ((btincr+btsize-1)/btsize)*btsize;
1990 gData->backTrackStack = jsheap_grow(gData->pool, gData->backTrackStack, btsize, btincr);
1991 if (!gData->backTrackStack) {
1992 js_ReportOutOfScriptQuota(gData->cx);
1993 gData->ok = FALSE;
1994 return NULL;
1996 gData->backTrackStackSize = btsize + btincr;
1997 result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
1999 gData->backTrackSP = result;
2000 result->sz = gData->cursz;
2001 gData->cursz = sz;
2003 result->backtrack_op = op;
2004 result->backtrack_pc = target;
2005 result->cp = cp;
2006 result->parenCount = parenCount;
2007 result->parenIndex = parenIndex;
2009 result->saveStateStackTop = gData->stateStackTop;
2010 assert(gData->stateStackTop);
2011 memcpy(result + 1, gData->stateStack,
2012 sizeof(REProgState) * result->saveStateStackTop);
2014 if (parenCount != 0) {
2015 memcpy((char *)(result + 1) +
2016 sizeof(REProgState) * result->saveStateStackTop,
2017 &x->parens[parenIndex],
2018 sizeof(RECapture) * parenCount);
2019 for (i = 0; i != parenCount; i++)
2020 x->parens[parenIndex + i].index = -1;
2023 return result;
2026 static inline REMatchState *
2027 FlatNIMatcher(REGlobalData *gData, REMatchState *x, WCHAR *matchChars,
2028 size_t length)
2030 size_t i;
2031 assert(gData->cpend >= x->cp);
2032 if (length > (size_t)(gData->cpend - x->cp))
2033 return NULL;
2034 for (i = 0; i != length; i++) {
2035 if (toupperW(matchChars[i]) != toupperW(x->cp[i]))
2036 return NULL;
2038 x->cp += length;
2039 return x;
2043 * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2044 * 2. If E is not a character then go to step 6.
2045 * 3. Let ch be E's character.
2046 * 4. Let A be a one-element RECharSet containing the character ch.
2047 * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2048 * 6. E must be an integer. Let n be that integer.
2049 * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2050 * 8. Return an internal Matcher closure that takes two arguments, a State x
2051 * and a Continuation c, and performs the following:
2052 * 1. Let cap be x's captures internal array.
2053 * 2. Let s be cap[n].
2054 * 3. If s is undefined, then call c(x) and return its result.
2055 * 4. Let e be x's endIndex.
2056 * 5. Let len be s's length.
2057 * 6. Let f be e+len.
2058 * 7. If f>InputLength, return failure.
2059 * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2060 * such that Canonicalize(s[i]) is not the same character as
2061 * Canonicalize(Input [e+i]), then return failure.
2062 * 9. Let y be the State (f, cap).
2063 * 10. Call c(y) and return its result.
2065 static REMatchState *
2066 BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex)
2068 size_t len, i;
2069 const WCHAR *parenContent;
2070 RECapture *cap = &x->parens[parenIndex];
2072 if (cap->index == -1)
2073 return x;
2075 len = cap->length;
2076 if (x->cp + len > gData->cpend)
2077 return NULL;
2079 parenContent = &gData->cpbegin[cap->index];
2080 if (gData->regexp->flags & JSREG_FOLD) {
2081 for (i = 0; i < len; i++) {
2082 if (toupperW(parenContent[i]) != toupperW(x->cp[i]))
2083 return NULL;
2085 } else {
2086 for (i = 0; i < len; i++) {
2087 if (parenContent[i] != x->cp[i])
2088 return NULL;
2091 x->cp += len;
2092 return x;
2095 /* Add a single character to the RECharSet */
2096 static void
2097 AddCharacterToCharSet(RECharSet *cs, WCHAR c)
2099 UINT byteIndex = (UINT)(c >> 3);
2100 assert(c <= cs->length);
2101 cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2105 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2106 static void
2107 AddCharacterRangeToCharSet(RECharSet *cs, UINT c1, UINT c2)
2109 UINT i;
2111 UINT byteIndex1 = c1 >> 3;
2112 UINT byteIndex2 = c2 >> 3;
2114 assert(c2 <= cs->length && c1 <= c2);
2116 c1 &= 0x7;
2117 c2 &= 0x7;
2119 if (byteIndex1 == byteIndex2) {
2120 cs->u.bits[byteIndex1] |= ((BYTE)0xFF >> (7 - (c2 - c1))) << c1;
2121 } else {
2122 cs->u.bits[byteIndex1] |= 0xFF << c1;
2123 for (i = byteIndex1 + 1; i < byteIndex2; i++)
2124 cs->u.bits[i] = 0xFF;
2125 cs->u.bits[byteIndex2] |= (BYTE)0xFF >> (7 - c2);
2129 /* Compile the source of the class into a RECharSet */
2130 static BOOL
2131 ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
2133 const WCHAR *src, *end;
2134 BOOL inRange = FALSE;
2135 WCHAR rangeStart = 0;
2136 UINT byteLength, n;
2137 WCHAR c, thisCh;
2138 INT nDigits, i;
2140 assert(!charSet->converted);
2142 * Assert that startIndex and length points to chars inside [] inside
2143 * source string.
2145 assert(1 <= charSet->u.src.startIndex);
2146 assert(charSet->u.src.startIndex
2147 < SysStringLen(gData->regexp->source));
2148 assert(charSet->u.src.length <= SysStringLen(gData->regexp->source)
2149 - 1 - charSet->u.src.startIndex);
2151 charSet->converted = TRUE;
2152 src = gData->regexp->source + charSet->u.src.startIndex;
2154 end = src + charSet->u.src.length;
2156 assert(src[-1] == '[' && end[0] == ']');
2158 byteLength = (charSet->length >> 3) + 1;
2159 charSet->u.bits = heap_alloc(byteLength);
2160 if (!charSet->u.bits) {
2161 JS_ReportOutOfMemory(gData->cx);
2162 gData->ok = FALSE;
2163 return FALSE;
2165 memset(charSet->u.bits, 0, byteLength);
2167 if (src == end)
2168 return TRUE;
2170 if (*src == '^') {
2171 assert(charSet->sense == FALSE);
2172 ++src;
2173 } else {
2174 assert(charSet->sense == TRUE);
2177 while (src != end) {
2178 switch (*src) {
2179 case '\\':
2180 ++src;
2181 c = *src++;
2182 switch (c) {
2183 case 'b':
2184 thisCh = 0x8;
2185 break;
2186 case 'f':
2187 thisCh = 0xC;
2188 break;
2189 case 'n':
2190 thisCh = 0xA;
2191 break;
2192 case 'r':
2193 thisCh = 0xD;
2194 break;
2195 case 't':
2196 thisCh = 0x9;
2197 break;
2198 case 'v':
2199 thisCh = 0xB;
2200 break;
2201 case 'c':
2202 if (src < end && JS_ISWORD(*src)) {
2203 thisCh = (WCHAR)(*src++ & 0x1F);
2204 } else {
2205 --src;
2206 thisCh = '\\';
2208 break;
2209 case 'x':
2210 nDigits = 2;
2211 goto lexHex;
2212 case 'u':
2213 nDigits = 4;
2214 lexHex:
2215 n = 0;
2216 for (i = 0; (i < nDigits) && (src < end); i++) {
2217 UINT digit;
2218 c = *src++;
2219 if (!isASCIIHexDigit(c, &digit)) {
2221 * Back off to accepting the original '\'
2222 * as a literal
2224 src -= i + 1;
2225 n = '\\';
2226 break;
2228 n = (n << 4) | digit;
2230 thisCh = (WCHAR)n;
2231 break;
2232 case '0':
2233 case '1':
2234 case '2':
2235 case '3':
2236 case '4':
2237 case '5':
2238 case '6':
2239 case '7':
2241 * This is a non-ECMA extension - decimal escapes (in this
2242 * case, octal!) are supposed to be an error inside class
2243 * ranges, but supported here for backwards compatibility.
2245 n = JS7_UNDEC(c);
2246 c = *src;
2247 if ('0' <= c && c <= '7') {
2248 src++;
2249 n = 8 * n + JS7_UNDEC(c);
2250 c = *src;
2251 if ('0' <= c && c <= '7') {
2252 src++;
2253 i = 8 * n + JS7_UNDEC(c);
2254 if (i <= 0377)
2255 n = i;
2256 else
2257 src--;
2260 thisCh = (WCHAR)n;
2261 break;
2263 case 'd':
2264 AddCharacterRangeToCharSet(charSet, '0', '9');
2265 continue; /* don't need range processing */
2266 case 'D':
2267 AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2268 AddCharacterRangeToCharSet(charSet,
2269 (WCHAR)('9' + 1),
2270 (WCHAR)charSet->length);
2271 continue;
2272 case 's':
2273 for (i = (INT)charSet->length; i >= 0; i--)
2274 if (isspaceW(i))
2275 AddCharacterToCharSet(charSet, (WCHAR)i);
2276 continue;
2277 case 'S':
2278 for (i = (INT)charSet->length; i >= 0; i--)
2279 if (!isspaceW(i))
2280 AddCharacterToCharSet(charSet, (WCHAR)i);
2281 continue;
2282 case 'w':
2283 for (i = (INT)charSet->length; i >= 0; i--)
2284 if (JS_ISWORD(i))
2285 AddCharacterToCharSet(charSet, (WCHAR)i);
2286 continue;
2287 case 'W':
2288 for (i = (INT)charSet->length; i >= 0; i--)
2289 if (!JS_ISWORD(i))
2290 AddCharacterToCharSet(charSet, (WCHAR)i);
2291 continue;
2292 default:
2293 thisCh = c;
2294 break;
2297 break;
2299 default:
2300 thisCh = *src++;
2301 break;
2304 if (inRange) {
2305 if (gData->regexp->flags & JSREG_FOLD) {
2306 int i;
2308 assert(rangeStart <= thisCh);
2309 for (i = rangeStart; i <= thisCh; i++) {
2310 WCHAR uch, dch;
2312 AddCharacterToCharSet(charSet, i);
2313 uch = toupperW(i);
2314 dch = tolowerW(i);
2315 if (i != uch)
2316 AddCharacterToCharSet(charSet, uch);
2317 if (i != dch)
2318 AddCharacterToCharSet(charSet, dch);
2320 } else {
2321 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2323 inRange = FALSE;
2324 } else {
2325 if (gData->regexp->flags & JSREG_FOLD) {
2326 AddCharacterToCharSet(charSet, toupperW(thisCh));
2327 AddCharacterToCharSet(charSet, tolowerW(thisCh));
2328 } else {
2329 AddCharacterToCharSet(charSet, thisCh);
2331 if (src < end - 1) {
2332 if (*src == '-') {
2333 ++src;
2334 inRange = TRUE;
2335 rangeStart = thisCh;
2340 return TRUE;
2343 static BOOL
2344 ReallocStateStack(REGlobalData *gData)
2346 size_t limit = gData->stateStackLimit;
2347 size_t sz = sizeof(REProgState) * limit;
2349 gData->stateStack = jsheap_grow(gData->pool, gData->stateStack, sz, sz);
2350 if (!gData->stateStack) {
2351 js_ReportOutOfScriptQuota(gData->cx);
2352 gData->ok = FALSE;
2353 return FALSE;
2355 gData->stateStackLimit = limit + limit;
2356 return TRUE;
2359 #define PUSH_STATE_STACK(data) \
2360 do { \
2361 ++(data)->stateStackTop; \
2362 if ((data)->stateStackTop == (data)->stateStackLimit && \
2363 !ReallocStateStack((data))) { \
2364 return NULL; \
2366 }while(0)
2369 * Apply the current op against the given input to see if it's going to match
2370 * or fail. Return false if we don't get a match, true if we do. If updatecp is
2371 * true, then update the current state's cp. Always update startpc to the next
2372 * op.
2374 static inline REMatchState *
2375 SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
2376 jsbytecode **startpc, BOOL updatecp)
2378 REMatchState *result = NULL;
2379 WCHAR matchCh;
2380 size_t parenIndex;
2381 size_t offset, length, index;
2382 jsbytecode *pc = *startpc; /* pc has already been incremented past op */
2383 WCHAR *source;
2384 const WCHAR *startcp = x->cp;
2385 WCHAR ch;
2386 RECharSet *charSet;
2388 const char *opname = reop_names[op];
2389 TRACE("\n%06d: %*s%s\n", pc - gData->regexp->program,
2390 (int)gData->stateStackTop * 2, "", opname);
2392 switch (op) {
2393 case REOP_EMPTY:
2394 result = x;
2395 break;
2396 case REOP_BOL:
2397 if (x->cp != gData->cpbegin) {
2398 if (/*!gData->cx->regExpStatics.multiline && FIXME !!! */
2399 !(gData->regexp->flags & JSREG_MULTILINE)) {
2400 break;
2402 if (!RE_IS_LINE_TERM(x->cp[-1]))
2403 break;
2405 result = x;
2406 break;
2407 case REOP_EOL:
2408 if (x->cp != gData->cpend) {
2409 if (/*!gData->cx->regExpStatics.multiline &&*/
2410 !(gData->regexp->flags & JSREG_MULTILINE)) {
2411 break;
2413 if (!RE_IS_LINE_TERM(*x->cp))
2414 break;
2416 result = x;
2417 break;
2418 case REOP_WBDRY:
2419 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2420 !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2421 result = x;
2423 break;
2424 case REOP_WNONBDRY:
2425 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2426 (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2427 result = x;
2429 break;
2430 case REOP_DOT:
2431 if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2432 result = x;
2433 result->cp++;
2435 break;
2436 case REOP_DIGIT:
2437 if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
2438 result = x;
2439 result->cp++;
2441 break;
2442 case REOP_NONDIGIT:
2443 if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
2444 result = x;
2445 result->cp++;
2447 break;
2448 case REOP_ALNUM:
2449 if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2450 result = x;
2451 result->cp++;
2453 break;
2454 case REOP_NONALNUM:
2455 if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2456 result = x;
2457 result->cp++;
2459 break;
2460 case REOP_SPACE:
2461 if (x->cp != gData->cpend && isspaceW(*x->cp)) {
2462 result = x;
2463 result->cp++;
2465 break;
2466 case REOP_NONSPACE:
2467 if (x->cp != gData->cpend && !isspaceW(*x->cp)) {
2468 result = x;
2469 result->cp++;
2471 break;
2472 case REOP_BACKREF:
2473 pc = ReadCompactIndex(pc, &parenIndex);
2474 assert(parenIndex < gData->regexp->parenCount);
2475 result = BackrefMatcher(gData, x, parenIndex);
2476 break;
2477 case REOP_FLAT:
2478 pc = ReadCompactIndex(pc, &offset);
2479 assert(offset < SysStringLen(gData->regexp->source));
2480 pc = ReadCompactIndex(pc, &length);
2481 assert(1 <= length);
2482 assert(length <= SysStringLen(gData->regexp->source) - offset);
2483 if (length <= (size_t)(gData->cpend - x->cp)) {
2484 source = gData->regexp->source + offset;
2485 TRACE("%s\n", debugstr_wn(source, length));
2486 for (index = 0; index != length; index++) {
2487 if (source[index] != x->cp[index])
2488 return NULL;
2490 x->cp += length;
2491 result = x;
2493 break;
2494 case REOP_FLAT1:
2495 matchCh = *pc++;
2496 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2497 if (x->cp != gData->cpend && *x->cp == matchCh) {
2498 result = x;
2499 result->cp++;
2501 break;
2502 case REOP_FLATi:
2503 pc = ReadCompactIndex(pc, &offset);
2504 assert(offset < SysStringLen(gData->regexp->source));
2505 pc = ReadCompactIndex(pc, &length);
2506 assert(1 <= length);
2507 assert(length <= SysStringLen(gData->regexp->source) - offset);
2508 source = gData->regexp->source;
2509 result = FlatNIMatcher(gData, x, source + offset, length);
2510 break;
2511 case REOP_FLAT1i:
2512 matchCh = *pc++;
2513 if (x->cp != gData->cpend && toupperW(*x->cp) == toupperW(matchCh)) {
2514 result = x;
2515 result->cp++;
2517 break;
2518 case REOP_UCFLAT1:
2519 matchCh = GET_ARG(pc);
2520 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2521 pc += ARG_LEN;
2522 if (x->cp != gData->cpend && *x->cp == matchCh) {
2523 result = x;
2524 result->cp++;
2526 break;
2527 case REOP_UCFLAT1i:
2528 matchCh = GET_ARG(pc);
2529 pc += ARG_LEN;
2530 if (x->cp != gData->cpend && toupperW(*x->cp) == toupperW(matchCh)) {
2531 result = x;
2532 result->cp++;
2534 break;
2535 case REOP_CLASS:
2536 pc = ReadCompactIndex(pc, &index);
2537 assert(index < gData->regexp->classCount);
2538 if (x->cp != gData->cpend) {
2539 charSet = &gData->regexp->classList[index];
2540 assert(charSet->converted);
2541 ch = *x->cp;
2542 index = ch >> 3;
2543 if (charSet->length != 0 &&
2544 ch <= charSet->length &&
2545 (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2546 result = x;
2547 result->cp++;
2550 break;
2551 case REOP_NCLASS:
2552 pc = ReadCompactIndex(pc, &index);
2553 assert(index < gData->regexp->classCount);
2554 if (x->cp != gData->cpend) {
2555 charSet = &gData->regexp->classList[index];
2556 assert(charSet->converted);
2557 ch = *x->cp;
2558 index = ch >> 3;
2559 if (charSet->length == 0 ||
2560 ch > charSet->length ||
2561 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2562 result = x;
2563 result->cp++;
2566 break;
2568 default:
2569 assert(FALSE);
2571 if (result) {
2572 if (!updatecp)
2573 x->cp = startcp;
2574 *startpc = pc;
2575 TRACE(" *\n");
2576 return result;
2578 x->cp = startcp;
2579 return NULL;
2582 static inline REMatchState *
2583 ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
2585 REMatchState *result = NULL;
2586 REBackTrackData *backTrackData;
2587 jsbytecode *nextpc, *testpc;
2588 REOp nextop;
2589 RECapture *cap;
2590 REProgState *curState;
2591 const WCHAR *startcp;
2592 size_t parenIndex, k;
2593 size_t parenSoFar = 0;
2595 WCHAR matchCh1, matchCh2;
2596 RECharSet *charSet;
2598 BOOL anchor;
2599 jsbytecode *pc = gData->regexp->program;
2600 REOp op = (REOp) *pc++;
2603 * If the first node is a simple match, step the index into the string
2604 * until that match is made, or fail if it can't be found at all.
2606 if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & JSREG_STICKY)) {
2607 anchor = FALSE;
2608 while (x->cp <= gData->cpend) {
2609 nextpc = pc; /* reset back to start each time */
2610 result = SimpleMatch(gData, x, op, &nextpc, TRUE);
2611 if (result) {
2612 anchor = TRUE;
2613 x = result;
2614 pc = nextpc; /* accept skip to next opcode */
2615 op = (REOp) *pc++;
2616 assert(op < REOP_LIMIT);
2617 break;
2619 gData->skipped++;
2620 x->cp++;
2622 if (!anchor)
2623 goto bad;
2626 for (;;) {
2627 const char *opname = reop_names[op];
2628 TRACE("\n%06d: %*s%s\n", pc - gData->regexp->program,
2629 (int)gData->stateStackTop * 2, "", opname);
2631 if (REOP_IS_SIMPLE(op)) {
2632 result = SimpleMatch(gData, x, op, &pc, TRUE);
2633 } else {
2634 curState = &gData->stateStack[gData->stateStackTop];
2635 switch (op) {
2636 case REOP_END:
2637 goto good;
2638 case REOP_ALTPREREQ2:
2639 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2640 pc += ARG_LEN;
2641 matchCh2 = GET_ARG(pc);
2642 pc += ARG_LEN;
2643 k = GET_ARG(pc);
2644 pc += ARG_LEN;
2646 if (x->cp != gData->cpend) {
2647 if (*x->cp == matchCh2)
2648 goto doAlt;
2650 charSet = &gData->regexp->classList[k];
2651 if (!charSet->converted && !ProcessCharSet(gData, charSet))
2652 goto bad;
2653 matchCh1 = *x->cp;
2654 k = matchCh1 >> 3;
2655 if ((charSet->length == 0 ||
2656 matchCh1 > charSet->length ||
2657 !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2658 charSet->sense) {
2659 goto doAlt;
2662 result = NULL;
2663 break;
2665 case REOP_ALTPREREQ:
2666 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2667 pc += ARG_LEN;
2668 matchCh1 = GET_ARG(pc);
2669 pc += ARG_LEN;
2670 matchCh2 = GET_ARG(pc);
2671 pc += ARG_LEN;
2672 if (x->cp == gData->cpend ||
2673 (*x->cp != matchCh1 && *x->cp != matchCh2)) {
2674 result = NULL;
2675 break;
2677 /* else false thru... */
2679 case REOP_ALT:
2680 doAlt:
2681 nextpc = pc + GET_OFFSET(pc); /* start of next alternate */
2682 pc += ARG_LEN; /* start of this alternate */
2683 curState->parenSoFar = parenSoFar;
2684 PUSH_STATE_STACK(gData);
2685 op = (REOp) *pc++;
2686 startcp = x->cp;
2687 if (REOP_IS_SIMPLE(op)) {
2688 if (!SimpleMatch(gData, x, op, &pc, TRUE)) {
2689 op = (REOp) *nextpc++;
2690 pc = nextpc;
2691 continue;
2693 result = x;
2694 op = (REOp) *pc++;
2696 nextop = (REOp) *nextpc++;
2697 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2698 goto bad;
2699 continue;
2702 * Occurs at (successful) end of REOP_ALT,
2704 case REOP_JUMP:
2706 * If we have not gotten a result here, it is because of an
2707 * empty match. Do the same thing REOP_EMPTY would do.
2709 if (!result)
2710 result = x;
2712 --gData->stateStackTop;
2713 pc += GET_OFFSET(pc);
2714 op = (REOp) *pc++;
2715 continue;
2718 * Occurs at last (successful) end of REOP_ALT,
2720 case REOP_ENDALT:
2722 * If we have not gotten a result here, it is because of an
2723 * empty match. Do the same thing REOP_EMPTY would do.
2725 if (!result)
2726 result = x;
2728 --gData->stateStackTop;
2729 op = (REOp) *pc++;
2730 continue;
2732 case REOP_LPAREN:
2733 pc = ReadCompactIndex(pc, &parenIndex);
2734 TRACE("[ %lu ]\n", (unsigned long) parenIndex);
2735 assert(parenIndex < gData->regexp->parenCount);
2736 if (parenIndex + 1 > parenSoFar)
2737 parenSoFar = parenIndex + 1;
2738 x->parens[parenIndex].index = x->cp - gData->cpbegin;
2739 x->parens[parenIndex].length = 0;
2740 op = (REOp) *pc++;
2741 continue;
2743 case REOP_RPAREN:
2745 ptrdiff_t delta;
2747 pc = ReadCompactIndex(pc, &parenIndex);
2748 assert(parenIndex < gData->regexp->parenCount);
2749 cap = &x->parens[parenIndex];
2750 delta = x->cp - (gData->cpbegin + cap->index);
2751 cap->length = (delta < 0) ? 0 : (size_t) delta;
2752 op = (REOp) *pc++;
2754 if (!result)
2755 result = x;
2756 continue;
2758 case REOP_ASSERT:
2759 nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */
2760 pc += ARG_LEN; /* start of ASSERT child */
2761 op = (REOp) *pc++;
2762 testpc = pc;
2763 if (REOP_IS_SIMPLE(op) &&
2764 !SimpleMatch(gData, x, op, &testpc, FALSE)) {
2765 result = NULL;
2766 break;
2768 curState->u.assertion.top =
2769 (char *)gData->backTrackSP - (char *)gData->backTrackStack;
2770 curState->u.assertion.sz = gData->cursz;
2771 curState->index = x->cp - gData->cpbegin;
2772 curState->parenSoFar = parenSoFar;
2773 PUSH_STATE_STACK(gData);
2774 if (!PushBackTrackState(gData, REOP_ASSERTTEST,
2775 nextpc, x, x->cp, 0, 0)) {
2776 goto bad;
2778 continue;
2780 case REOP_ASSERT_NOT:
2781 nextpc = pc + GET_OFFSET(pc);
2782 pc += ARG_LEN;
2783 op = (REOp) *pc++;
2784 testpc = pc;
2785 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2786 SimpleMatch(gData, x, op, &testpc, FALSE) &&
2787 *testpc == REOP_ASSERTNOTTEST) {
2788 result = NULL;
2789 break;
2791 curState->u.assertion.top
2792 = (char *)gData->backTrackSP -
2793 (char *)gData->backTrackStack;
2794 curState->u.assertion.sz = gData->cursz;
2795 curState->index = x->cp - gData->cpbegin;
2796 curState->parenSoFar = parenSoFar;
2797 PUSH_STATE_STACK(gData);
2798 if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
2799 nextpc, x, x->cp, 0, 0)) {
2800 goto bad;
2802 continue;
2804 case REOP_ASSERTTEST:
2805 --gData->stateStackTop;
2806 --curState;
2807 x->cp = gData->cpbegin + curState->index;
2808 gData->backTrackSP =
2809 (REBackTrackData *) ((char *)gData->backTrackStack +
2810 curState->u.assertion.top);
2811 gData->cursz = curState->u.assertion.sz;
2812 if (result)
2813 result = x;
2814 break;
2816 case REOP_ASSERTNOTTEST:
2817 --gData->stateStackTop;
2818 --curState;
2819 x->cp = gData->cpbegin + curState->index;
2820 gData->backTrackSP =
2821 (REBackTrackData *) ((char *)gData->backTrackStack +
2822 curState->u.assertion.top);
2823 gData->cursz = curState->u.assertion.sz;
2824 result = (!result) ? x : NULL;
2825 break;
2826 case REOP_STAR:
2827 curState->u.quantifier.min = 0;
2828 curState->u.quantifier.max = (UINT)-1;
2829 goto quantcommon;
2830 case REOP_PLUS:
2831 curState->u.quantifier.min = 1;
2832 curState->u.quantifier.max = (UINT)-1;
2833 goto quantcommon;
2834 case REOP_OPT:
2835 curState->u.quantifier.min = 0;
2836 curState->u.quantifier.max = 1;
2837 goto quantcommon;
2838 case REOP_QUANT:
2839 pc = ReadCompactIndex(pc, &k);
2840 curState->u.quantifier.min = k;
2841 pc = ReadCompactIndex(pc, &k);
2842 /* max is k - 1 to use one byte for (UINT)-1 sentinel. */
2843 curState->u.quantifier.max = k - 1;
2844 assert(curState->u.quantifier.min <= curState->u.quantifier.max);
2845 quantcommon:
2846 if (curState->u.quantifier.max == 0) {
2847 pc = pc + GET_OFFSET(pc);
2848 op = (REOp) *pc++;
2849 result = x;
2850 continue;
2852 /* Step over <next> */
2853 nextpc = pc + ARG_LEN;
2854 op = (REOp) *nextpc++;
2855 startcp = x->cp;
2856 if (REOP_IS_SIMPLE(op)) {
2857 if (!SimpleMatch(gData, x, op, &nextpc, TRUE)) {
2858 if (curState->u.quantifier.min == 0)
2859 result = x;
2860 else
2861 result = NULL;
2862 pc = pc + GET_OFFSET(pc);
2863 break;
2865 op = (REOp) *nextpc++;
2866 result = x;
2868 curState->index = startcp - gData->cpbegin;
2869 curState->continue_op = REOP_REPEAT;
2870 curState->continue_pc = pc;
2871 curState->parenSoFar = parenSoFar;
2872 PUSH_STATE_STACK(gData);
2873 if (curState->u.quantifier.min == 0 &&
2874 !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
2875 0, 0)) {
2876 goto bad;
2878 pc = nextpc;
2879 continue;
2881 case REOP_ENDCHILD: /* marks the end of a quantifier child */
2882 pc = curState[-1].continue_pc;
2883 op = (REOp) curState[-1].continue_op;
2885 if (!result)
2886 result = x;
2887 continue;
2889 case REOP_REPEAT:
2890 --curState;
2891 do {
2892 --gData->stateStackTop;
2893 if (!result) {
2894 /* Failed, see if we have enough children. */
2895 if (curState->u.quantifier.min == 0)
2896 goto repeatDone;
2897 goto break_switch;
2899 if (curState->u.quantifier.min == 0 &&
2900 x->cp == gData->cpbegin + curState->index) {
2901 /* matched an empty string, that'll get us nowhere */
2902 result = NULL;
2903 goto break_switch;
2905 if (curState->u.quantifier.min != 0)
2906 curState->u.quantifier.min--;
2907 if (curState->u.quantifier.max != (UINT) -1)
2908 curState->u.quantifier.max--;
2909 if (curState->u.quantifier.max == 0)
2910 goto repeatDone;
2911 nextpc = pc + ARG_LEN;
2912 nextop = (REOp) *nextpc;
2913 startcp = x->cp;
2914 if (REOP_IS_SIMPLE(nextop)) {
2915 nextpc++;
2916 if (!SimpleMatch(gData, x, nextop, &nextpc, TRUE)) {
2917 if (curState->u.quantifier.min == 0)
2918 goto repeatDone;
2919 result = NULL;
2920 goto break_switch;
2922 result = x;
2924 curState->index = startcp - gData->cpbegin;
2925 PUSH_STATE_STACK(gData);
2926 if (curState->u.quantifier.min == 0 &&
2927 !PushBackTrackState(gData, REOP_REPEAT,
2928 pc, x, startcp,
2929 curState->parenSoFar,
2930 parenSoFar -
2931 curState->parenSoFar)) {
2932 goto bad;
2934 } while (*nextpc == REOP_ENDCHILD);
2935 pc = nextpc;
2936 op = (REOp) *pc++;
2937 parenSoFar = curState->parenSoFar;
2938 continue;
2940 repeatDone:
2941 result = x;
2942 pc += GET_OFFSET(pc);
2943 goto break_switch;
2945 case REOP_MINIMALSTAR:
2946 curState->u.quantifier.min = 0;
2947 curState->u.quantifier.max = (UINT)-1;
2948 goto minimalquantcommon;
2949 case REOP_MINIMALPLUS:
2950 curState->u.quantifier.min = 1;
2951 curState->u.quantifier.max = (UINT)-1;
2952 goto minimalquantcommon;
2953 case REOP_MINIMALOPT:
2954 curState->u.quantifier.min = 0;
2955 curState->u.quantifier.max = 1;
2956 goto minimalquantcommon;
2957 case REOP_MINIMALQUANT:
2958 pc = ReadCompactIndex(pc, &k);
2959 curState->u.quantifier.min = k;
2960 pc = ReadCompactIndex(pc, &k);
2961 /* See REOP_QUANT comments about k - 1. */
2962 curState->u.quantifier.max = k - 1;
2963 assert(curState->u.quantifier.min
2964 <= curState->u.quantifier.max);
2965 minimalquantcommon:
2966 curState->index = x->cp - gData->cpbegin;
2967 curState->parenSoFar = parenSoFar;
2968 PUSH_STATE_STACK(gData);
2969 if (curState->u.quantifier.min != 0) {
2970 curState->continue_op = REOP_MINIMALREPEAT;
2971 curState->continue_pc = pc;
2972 /* step over <next> */
2973 pc += OFFSET_LEN;
2974 op = (REOp) *pc++;
2975 } else {
2976 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
2977 pc, x, x->cp, 0, 0)) {
2978 goto bad;
2980 --gData->stateStackTop;
2981 pc = pc + GET_OFFSET(pc);
2982 op = (REOp) *pc++;
2984 continue;
2986 case REOP_MINIMALREPEAT:
2987 --gData->stateStackTop;
2988 --curState;
2990 TRACE("{%d,%d}\n", curState->u.quantifier.min, curState->u.quantifier.max);
2991 #define PREPARE_REPEAT() \
2992 do { \
2993 curState->index = x->cp - gData->cpbegin; \
2994 curState->continue_op = REOP_MINIMALREPEAT; \
2995 curState->continue_pc = pc; \
2996 pc += ARG_LEN; \
2997 for (k = curState->parenSoFar; k < parenSoFar; k++) \
2998 x->parens[k].index = -1; \
2999 PUSH_STATE_STACK(gData); \
3000 op = (REOp) *pc++; \
3001 assert(op < REOP_LIMIT); \
3002 }while(0)
3004 if (!result) {
3005 TRACE(" -\n");
3007 * Non-greedy failure - try to consume another child.
3009 if (curState->u.quantifier.max == (UINT) -1 ||
3010 curState->u.quantifier.max > 0) {
3011 PREPARE_REPEAT();
3012 continue;
3014 /* Don't need to adjust pc since we're going to pop. */
3015 break;
3017 if (curState->u.quantifier.min == 0 &&
3018 x->cp == gData->cpbegin + curState->index) {
3019 /* Matched an empty string, that'll get us nowhere. */
3020 result = NULL;
3021 break;
3023 if (curState->u.quantifier.min != 0)
3024 curState->u.quantifier.min--;
3025 if (curState->u.quantifier.max != (UINT) -1)
3026 curState->u.quantifier.max--;
3027 if (curState->u.quantifier.min != 0) {
3028 PREPARE_REPEAT();
3029 continue;
3031 curState->index = x->cp - gData->cpbegin;
3032 curState->parenSoFar = parenSoFar;
3033 PUSH_STATE_STACK(gData);
3034 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3035 pc, x, x->cp,
3036 curState->parenSoFar,
3037 parenSoFar - curState->parenSoFar)) {
3038 goto bad;
3040 --gData->stateStackTop;
3041 pc = pc + GET_OFFSET(pc);
3042 op = (REOp) *pc++;
3043 assert(op < REOP_LIMIT);
3044 continue;
3045 default:
3046 assert(FALSE);
3047 result = NULL;
3049 break_switch:;
3053 * If the match failed and there's a backtrack option, take it.
3054 * Otherwise this is a complete and utter failure.
3056 if (!result) {
3057 if (gData->cursz == 0)
3058 return NULL;
3060 /* Potentially detect explosive regex here. */
3061 gData->backTrackCount++;
3062 if (gData->backTrackLimit &&
3063 gData->backTrackCount >= gData->backTrackLimit) {
3064 JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
3065 JSMSG_REGEXP_TOO_COMPLEX);
3066 gData->ok = FALSE;
3067 return NULL;
3070 backTrackData = gData->backTrackSP;
3071 gData->cursz = backTrackData->sz;
3072 gData->backTrackSP =
3073 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3074 x->cp = backTrackData->cp;
3075 pc = backTrackData->backtrack_pc;
3076 op = (REOp) backTrackData->backtrack_op;
3077 assert(op < REOP_LIMIT);
3078 gData->stateStackTop = backTrackData->saveStateStackTop;
3079 assert(gData->stateStackTop);
3081 memcpy(gData->stateStack, backTrackData + 1,
3082 sizeof(REProgState) * backTrackData->saveStateStackTop);
3083 curState = &gData->stateStack[gData->stateStackTop - 1];
3085 if (backTrackData->parenCount) {
3086 memcpy(&x->parens[backTrackData->parenIndex],
3087 (char *)(backTrackData + 1) +
3088 sizeof(REProgState) * backTrackData->saveStateStackTop,
3089 sizeof(RECapture) * backTrackData->parenCount);
3090 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3091 } else {
3092 for (k = curState->parenSoFar; k < parenSoFar; k++)
3093 x->parens[k].index = -1;
3094 parenSoFar = curState->parenSoFar;
3097 TRACE("\tBT_Pop: %ld,%ld\n",
3098 (unsigned long) backTrackData->parenIndex,
3099 (unsigned long) backTrackData->parenCount);
3100 continue;
3102 x = result;
3105 * Continue with the expression.
3107 op = (REOp)*pc++;
3108 assert(op < REOP_LIMIT);
3111 bad:
3112 TRACE("\n");
3113 return NULL;
3115 good:
3116 TRACE("\n");
3117 return x;
3120 static REMatchState *MatchRegExp(REGlobalData *gData, REMatchState *x)
3122 REMatchState *result;
3123 const WCHAR *cp = x->cp;
3124 const WCHAR *cp2;
3125 UINT j;
3128 * Have to include the position beyond the last character
3129 * in order to detect end-of-input/line condition.
3131 for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3132 gData->skipped = cp2 - cp;
3133 x->cp = cp2;
3134 for (j = 0; j < gData->regexp->parenCount; j++)
3135 x->parens[j].index = -1;
3136 result = ExecuteREBytecode(gData, x);
3137 if (!gData->ok || result || (gData->regexp->flags & JSREG_STICKY))
3138 return result;
3139 gData->backTrackSP = gData->backTrackStack;
3140 gData->cursz = 0;
3141 gData->stateStackTop = 0;
3142 cp2 = cp + gData->skipped;
3144 return NULL;
3147 #define MIN_BACKTRACK_LIMIT 400000
3149 static REMatchState *InitMatch(script_ctx_t *cx, REGlobalData *gData, JSRegExp *re, size_t length)
3151 REMatchState *result;
3152 UINT i;
3154 gData->backTrackStackSize = INITIAL_BACKTRACK;
3155 gData->backTrackStack = jsheap_alloc(gData->pool, INITIAL_BACKTRACK);
3156 if (!gData->backTrackStack)
3157 goto bad;
3159 gData->backTrackSP = gData->backTrackStack;
3160 gData->cursz = 0;
3161 gData->backTrackCount = 0;
3162 gData->backTrackLimit = 0;
3164 gData->stateStackLimit = INITIAL_STATESTACK;
3165 gData->stateStack = jsheap_alloc(gData->pool, sizeof(REProgState) * INITIAL_STATESTACK);
3166 if (!gData->stateStack)
3167 goto bad;
3169 gData->stateStackTop = 0;
3170 gData->cx = cx;
3171 gData->regexp = re;
3172 gData->ok = TRUE;
3174 result = jsheap_alloc(gData->pool, offsetof(REMatchState, parens) + re->parenCount * sizeof(RECapture));
3175 if (!result)
3176 goto bad;
3178 for (i = 0; i < re->classCount; i++) {
3179 if (!re->classList[i].converted &&
3180 !ProcessCharSet(gData, &re->classList[i])) {
3181 return NULL;
3185 return result;
3187 bad:
3188 js_ReportOutOfScriptQuota(cx);
3189 gData->ok = FALSE;
3190 return NULL;
3193 static void
3194 js_DestroyRegExp(JSRegExp *re)
3196 if (re->classList) {
3197 UINT i;
3198 for (i = 0; i < re->classCount; i++) {
3199 if (re->classList[i].converted)
3200 heap_free(re->classList[i].u.bits);
3201 re->classList[i].u.bits = NULL;
3203 heap_free(re->classList);
3205 heap_free(re);
3208 static JSRegExp *
3209 js_NewRegExp(script_ctx_t *cx, BSTR str, UINT flags, BOOL flat)
3211 JSRegExp *re;
3212 jsheap_t *mark;
3213 CompilerState state;
3214 size_t resize;
3215 jsbytecode *endPC;
3216 UINT i;
3217 size_t len;
3219 re = NULL;
3220 mark = jsheap_mark(&cx->tmp_heap);
3221 len = SysStringLen(str);
3223 state.context = cx;
3224 state.cp = str;
3225 if (!state.cp)
3226 goto out;
3227 state.cpbegin = state.cp;
3228 state.cpend = state.cp + len;
3229 state.flags = flags;
3230 state.parenCount = 0;
3231 state.classCount = 0;
3232 state.progLength = 0;
3233 state.treeDepth = 0;
3234 state.classBitmapsMem = 0;
3235 for (i = 0; i < CLASS_CACHE_SIZE; i++)
3236 state.classCache[i].start = NULL;
3238 if (len != 0 && flat) {
3239 state.result = NewRENode(&state, REOP_FLAT);
3240 if (!state.result)
3241 goto out;
3242 state.result->u.flat.chr = *state.cpbegin;
3243 state.result->u.flat.length = len;
3244 state.result->kid = (void *) state.cpbegin;
3245 /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
3246 state.progLength += 1 + GetCompactIndexWidth(0)
3247 + GetCompactIndexWidth(len);
3248 } else {
3249 if (!ParseRegExp(&state))
3250 goto out;
3252 resize = offsetof(JSRegExp, program) + state.progLength + 1;
3253 re = heap_alloc(resize);
3254 if (!re)
3255 goto out;
3257 assert(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
3258 re->classCount = state.classCount;
3259 if (re->classCount) {
3260 re->classList = heap_alloc(re->classCount * sizeof(RECharSet));
3261 if (!re->classList) {
3262 js_DestroyRegExp(re);
3263 re = NULL;
3264 goto out;
3266 for (i = 0; i < re->classCount; i++)
3267 re->classList[i].converted = FALSE;
3268 } else {
3269 re->classList = NULL;
3271 endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
3272 if (!endPC) {
3273 js_DestroyRegExp(re);
3274 re = NULL;
3275 goto out;
3277 *endPC++ = REOP_END;
3279 * Check whether size was overestimated and shrink using realloc.
3280 * This is safe since no pointers to newly parsed regexp or its parts
3281 * besides re exist here.
3283 if ((size_t)(endPC - re->program) != state.progLength + 1) {
3284 JSRegExp *tmp;
3285 assert((size_t)(endPC - re->program) < state.progLength + 1);
3286 resize = offsetof(JSRegExp, program) + (endPC - re->program);
3287 tmp = heap_realloc(re, resize);
3288 if (tmp)
3289 re = tmp;
3292 re->flags = flags;
3293 re->parenCount = state.parenCount;
3294 re->source = str;
3296 out:
3297 jsheap_clear(mark);
3298 return re;
3301 static HRESULT do_regexp_match_next(RegExpInstance *regexp, const WCHAR *str, DWORD len,
3302 const WCHAR **cp, match_result_t **parens, DWORD *parens_size, DWORD *parens_cnt, match_result_t *ret)
3304 REMatchState *x, *result;
3305 REGlobalData gData;
3306 DWORD matchlen;
3308 gData.cpbegin = *cp;
3309 gData.cpend = str + len;
3310 gData.start = *cp-str;
3311 gData.skipped = 0;
3312 gData.pool = &regexp->dispex.ctx->tmp_heap;
3314 x = InitMatch(NULL, &gData, regexp->jsregexp, gData.cpend - gData.cpbegin);
3315 if(!x) {
3316 WARN("InitMatch failed\n");
3317 return E_FAIL;
3320 x->cp = *cp;
3321 result = MatchRegExp(&gData, x);
3322 if(!gData.ok) {
3323 WARN("MatchRegExp failed\n");
3324 return E_FAIL;
3327 if(!result)
3328 return S_FALSE;
3330 if(parens) {
3331 DWORD i;
3333 if(regexp->jsregexp->parenCount > *parens_size) {
3334 match_result_t *new_parens;
3336 if(*parens)
3337 new_parens = heap_realloc(*parens, sizeof(match_result_t)*regexp->jsregexp->parenCount);
3338 else
3339 new_parens = heap_alloc(sizeof(match_result_t)*regexp->jsregexp->parenCount);
3340 if(!new_parens)
3341 return E_OUTOFMEMORY;
3343 *parens = new_parens;
3346 *parens_cnt = regexp->jsregexp->parenCount;
3348 for(i=0; i < regexp->jsregexp->parenCount; i++) {
3349 (*parens)[i].str = *cp + result->parens[i].index;
3350 (*parens)[i].len = result->parens[i].length;
3354 matchlen = (result->cp-*cp) - gData.skipped;
3355 *cp = result->cp;
3356 ret->str = result->cp-matchlen;
3357 ret->len = matchlen;
3359 return S_OK;
3362 HRESULT regexp_match_next(DispatchEx *dispex, BOOL gcheck, const WCHAR *str, DWORD len,
3363 const WCHAR **cp, match_result_t **parens, DWORD *parens_size, DWORD *parens_cnt, match_result_t *ret)
3365 RegExpInstance *regexp = (RegExpInstance*)dispex;
3366 jsheap_t *mark;
3367 HRESULT hres;
3369 if(gcheck && !(regexp->jsregexp->flags & JSREG_GLOB))
3370 return S_FALSE;
3372 mark = jsheap_mark(&regexp->dispex.ctx->tmp_heap);
3374 hres = do_regexp_match_next(regexp, str, len, cp, parens, parens_size, parens_cnt, ret);
3376 jsheap_clear(mark);
3377 return hres;
3380 HRESULT regexp_match(DispatchEx *dispex, const WCHAR *str, DWORD len, BOOL gflag, match_result_t **match_result,
3381 DWORD *result_cnt)
3383 RegExpInstance *This = (RegExpInstance*)dispex;
3384 match_result_t *ret = NULL, cres;
3385 const WCHAR *cp = str;
3386 DWORD i=0, ret_size = 0;
3387 jsheap_t *mark;
3388 HRESULT hres;
3390 mark = jsheap_mark(&This->dispex.ctx->tmp_heap);
3392 while(1) {
3393 hres = do_regexp_match_next(This, str, len, &cp, NULL, NULL, NULL, &cres);
3394 if(hres == S_FALSE) {
3395 hres = S_OK;
3396 break;
3399 if(FAILED(hres))
3400 break;
3402 if(ret_size == i) {
3403 if(ret)
3404 ret = heap_realloc(ret, (ret_size <<= 1) * sizeof(match_result_t));
3405 else
3406 ret = heap_alloc((ret_size=4) * sizeof(match_result_t));
3407 if(!ret) {
3408 hres = E_OUTOFMEMORY;
3409 break;
3413 ret[i++] = cres;
3415 if(!gflag && !(This->jsregexp->flags & JSREG_GLOB)) {
3416 hres = S_OK;
3417 break;
3421 jsheap_clear(mark);
3422 if(FAILED(hres)) {
3423 heap_free(ret);
3424 return hres;
3427 *match_result = ret;
3428 *result_cnt = i;
3429 return S_OK;
3432 static HRESULT RegExp_source(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3433 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3435 FIXME("\n");
3436 return E_NOTIMPL;
3439 static HRESULT RegExp_global(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3440 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3442 FIXME("\n");
3443 return E_NOTIMPL;
3446 static HRESULT RegExp_ignoreCase(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3447 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3449 FIXME("\n");
3450 return E_NOTIMPL;
3453 static HRESULT RegExp_multiline(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3454 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3456 FIXME("\n");
3457 return E_NOTIMPL;
3460 static HRESULT RegExp_lastIndex(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3461 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3463 FIXME("\n");
3464 return E_NOTIMPL;
3467 static HRESULT RegExp_toString(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3468 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3470 FIXME("\n");
3471 return E_NOTIMPL;
3474 static HRESULT RegExp_toLocaleString(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3475 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3477 FIXME("\n");
3478 return E_NOTIMPL;
3481 static HRESULT RegExp_hasOwnProperty(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3482 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3484 FIXME("\n");
3485 return E_NOTIMPL;
3488 static HRESULT RegExp_propertyIsEnumerable(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3489 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3491 FIXME("\n");
3492 return E_NOTIMPL;
3495 static HRESULT RegExp_isPrototypeOf(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3496 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3498 FIXME("\n");
3499 return E_NOTIMPL;
3502 static HRESULT RegExp_exec(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3503 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3505 FIXME("\n");
3506 return E_NOTIMPL;
3509 static HRESULT RegExp_test(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3510 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3512 FIXME("\n");
3513 return E_NOTIMPL;
3516 static HRESULT RegExp_value(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3517 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3519 FIXME("\n");
3520 return E_NOTIMPL;
3523 static void RegExp_destructor(DispatchEx *dispex)
3525 RegExpInstance *This = (RegExpInstance*)dispex;
3527 if(This->jsregexp)
3528 js_DestroyRegExp(This->jsregexp);
3529 SysFreeString(This->str);
3530 heap_free(This);
3533 static const builtin_prop_t RegExp_props[] = {
3534 {execW, RegExp_exec, PROPF_METHOD},
3535 {globalW, RegExp_global, 0},
3536 {hasOwnPropertyW, RegExp_hasOwnProperty, PROPF_METHOD},
3537 {ignoreCaseW, RegExp_ignoreCase, 0},
3538 {isPrototypeOfW, RegExp_isPrototypeOf, PROPF_METHOD},
3539 {lastIndexW, RegExp_lastIndex, 0},
3540 {multilineW, RegExp_multiline, 0},
3541 {propertyIsEnumerableW, RegExp_propertyIsEnumerable, PROPF_METHOD},
3542 {sourceW, RegExp_source, 0},
3543 {testW, RegExp_test, PROPF_METHOD},
3544 {toLocaleStringW, RegExp_toLocaleString, PROPF_METHOD},
3545 {toStringW, RegExp_toString, PROPF_METHOD}
3548 static const builtin_info_t RegExp_info = {
3549 JSCLASS_REGEXP,
3550 {NULL, RegExp_value, 0},
3551 sizeof(RegExp_props)/sizeof(*RegExp_props),
3552 RegExp_props,
3553 RegExp_destructor,
3554 NULL
3557 static HRESULT alloc_regexp(script_ctx_t *ctx, BOOL use_constr, RegExpInstance **ret)
3559 RegExpInstance *regexp;
3560 HRESULT hres;
3562 regexp = heap_alloc_zero(sizeof(RegExpInstance));
3563 if(!regexp)
3564 return E_OUTOFMEMORY;
3566 if(use_constr)
3567 hres = init_dispex_from_constr(&regexp->dispex, ctx, &RegExp_info, ctx->regexp_constr);
3568 else
3569 hres = init_dispex(&regexp->dispex, ctx, &RegExp_info, NULL);
3571 if(FAILED(hres)) {
3572 heap_free(regexp);
3573 return hres;
3576 *ret = regexp;
3577 return S_OK;
3580 static HRESULT create_regexp(script_ctx_t *ctx, const WCHAR *exp, int len, DWORD flags, DispatchEx **ret)
3582 RegExpInstance *regexp;
3583 HRESULT hres;
3585 TRACE("%s %x\n", debugstr_w(exp), flags);
3587 hres = alloc_regexp(ctx, TRUE, &regexp);
3588 if(FAILED(hres))
3589 return hres;
3591 if(len == -1)
3592 regexp->str = SysAllocString(exp);
3593 else
3594 regexp->str = SysAllocStringLen(exp, len);
3595 if(!regexp->str) {
3596 jsdisp_release(&regexp->dispex);
3597 return E_OUTOFMEMORY;
3600 regexp->jsregexp = js_NewRegExp(ctx, regexp->str, flags, FALSE);
3601 if(!regexp->jsregexp) {
3602 WARN("js_NewRegExp failed\n");
3603 jsdisp_release(&regexp->dispex);
3604 return E_FAIL;
3607 *ret = &regexp->dispex;
3608 return S_OK;
3611 static HRESULT regexp_constructor(script_ctx_t *ctx, DISPPARAMS *dp, VARIANT *retv)
3613 const WCHAR *opt = emptyW, *src;
3614 DispatchEx *ret;
3615 VARIANT *arg;
3616 HRESULT hres;
3618 if(!arg_cnt(dp)) {
3619 FIXME("no args\n");
3620 return E_NOTIMPL;
3623 arg = get_arg(dp,0);
3624 if(V_VT(arg) == VT_DISPATCH) {
3625 DispatchEx *obj;
3627 obj = iface_to_jsdisp((IUnknown*)V_DISPATCH(arg));
3628 if(obj) {
3629 if(is_class(obj, JSCLASS_REGEXP)) {
3630 RegExpInstance *regexp = (RegExpInstance*)obj;
3632 hres = create_regexp(ctx, regexp->str, -1, regexp->jsregexp->flags, &ret);
3633 jsdisp_release(obj);
3634 if(FAILED(hres))
3635 return hres;
3637 V_VT(retv) = VT_DISPATCH;
3638 V_DISPATCH(retv) = (IDispatch*)_IDispatchEx_(ret);
3639 return S_OK;
3642 jsdisp_release(obj);
3646 if(V_VT(arg) != VT_BSTR) {
3647 FIXME("vt arg0 = %d\n", V_VT(arg));
3648 return E_NOTIMPL;
3651 src = V_BSTR(arg);
3653 if(arg_cnt(dp) >= 2) {
3654 arg = get_arg(dp,1);
3655 if(V_VT(arg) != VT_BSTR) {
3656 FIXME("unimplemented for vt %d\n", V_VT(arg));
3657 return E_NOTIMPL;
3660 opt = V_BSTR(arg);
3663 hres = create_regexp_str(ctx, src, -1, opt, strlenW(opt), &ret);
3664 if(FAILED(hres))
3665 return hres;
3667 V_VT(retv) = VT_DISPATCH;
3668 V_DISPATCH(retv) = (IDispatch*)_IDispatchEx_(ret);
3669 return S_OK;
3672 static HRESULT RegExpConstr_value(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp,
3673 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3675 TRACE("\n");
3677 switch(flags) {
3678 case DISPATCH_CONSTRUCT:
3679 return regexp_constructor(dispex->ctx, dp, retv);
3680 default:
3681 FIXME("unimplemented flags: %x\n", flags);
3682 return E_NOTIMPL;
3685 return S_OK;
3688 HRESULT create_regexp_constr(script_ctx_t *ctx, DispatchEx **ret)
3690 RegExpInstance *regexp;
3691 HRESULT hres;
3693 hres = alloc_regexp(ctx, FALSE, &regexp);
3694 if(FAILED(hres))
3695 return hres;
3697 hres = create_builtin_function(ctx, RegExpConstr_value, NULL, PROPF_CONSTR, &regexp->dispex, ret);
3699 jsdisp_release(&regexp->dispex);
3700 return hres;
3703 HRESULT create_regexp_str(script_ctx_t *ctx, const WCHAR *exp, DWORD exp_len, const WCHAR *opt,
3704 DWORD opt_len, DispatchEx **ret)
3706 const WCHAR *p;
3707 DWORD flags = 0;
3709 if(opt) {
3710 for (p = opt; p < opt+opt_len; p++) {
3711 switch (*p) {
3712 case 'g':
3713 flags |= JSREG_GLOB;
3714 break;
3715 case 'i':
3716 flags |= JSREG_FOLD;
3717 break;
3718 case 'm':
3719 flags |= JSREG_MULTILINE;
3720 break;
3721 case 'y':
3722 flags |= JSREG_STICKY;
3723 break;
3724 default:
3725 WARN("wrong flag %c\n", *p);
3726 return E_FAIL;
3731 return create_regexp(ctx, exp, exp_len, flags, ret);