Make paren indexing uniform. (r=dmandelin, b=605754)
[mozilla-central.git] / js / src / jsregexpinlines.h
blobff015c7fbcfccc7f1f51aaca9a0f8fdf58872943
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sw=4 et tw=99 ft=cpp:
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
15 * License.
17 * The Original Code is Mozilla SpiderMonkey JavaScript 1.9 code, released
18 * June 12, 2009.
20 * The Initial Developer of the Original Code is
21 * the Mozilla Corporation.
23 * Contributor(s):
24 * Chris Leary <cdleary@mozilla.com>
26 * Alternatively, the contents of this file may be used under the terms of
27 * either of the GNU General Public License Version 2 or later (the "GPL"),
28 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
40 #ifndef jsregexpinlines_h___
41 #define jsregexpinlines_h___
43 #include "jsregexp.h"
44 #include "jscntxt.h"
45 #include "jsobjinlines.h"
46 #include "assembler/wtf/Platform.h"
48 #if ENABLE_YARR_JIT
49 #include "yarr/yarr/RegexJIT.h"
50 #else
51 #include "yarr/pcre/pcre.h"
52 #endif
54 namespace js {
57 * res = RegExp statics.
60 extern Class regexp_statics_class;
62 static inline JSObject *
63 regexp_statics_construct(JSContext *cx, JSObject *parent)
65 JSObject *obj = NewObject<WithProto::Given>(cx, &regexp_statics_class, NULL, parent);
66 if (!obj)
67 return NULL;
68 RegExpStatics *res = cx->create<RegExpStatics>();
69 if (!res)
70 return NULL;
71 obj->setPrivate(static_cast<void *>(res));
72 return obj;
75 /* Defined in the inlines header to avoid Yarr dependency includes in main header. */
76 class RegExp
78 jsrefcount refCount;
79 JSString *source;
80 #if ENABLE_YARR_JIT
81 JSC::Yarr::RegexCodeBlock compiled;
82 #else
83 JSRegExp *compiled;
84 #endif
85 unsigned parenCount;
86 uint32 flags;
88 RegExp(JSString *source, uint32 flags)
89 : refCount(1), source(source), compiled(), parenCount(0), flags(flags) {}
90 bool compileHelper(JSContext *cx, UString &pattern);
91 bool compile(JSContext *cx);
92 static const uint32 allFlags = JSREG_FOLD | JSREG_GLOB | JSREG_MULTILINE | JSREG_STICKY;
93 void handlePCREError(JSContext *cx, int error);
94 void handleYarrError(JSContext *cx, int error);
95 static inline bool initArena(JSContext *cx);
96 static inline void checkMatchPairs(JSString *input, int *buf, size_t matchItemCount);
97 static JSObject *createResult(JSContext *cx, JSString *input, int *buf, size_t matchItemCount);
98 inline bool executeInternal(JSContext *cx, RegExpStatics *res, JSString *input,
99 size_t *lastIndex, bool test, Value *rval);
101 public:
102 ~RegExp() {
103 #if !ENABLE_YARR_JIT
104 if (compiled)
105 jsRegExpFree(compiled);
106 #endif
109 static bool isMetaChar(jschar c);
110 static bool hasMetaChars(const jschar *chars, size_t length);
113 * Parse regexp flags. Report an error and return false if an invalid
114 * sequence of flags is encountered (repeat/invalid flag).
116 static bool parseFlags(JSContext *cx, JSString *flagStr, uint32 &flagsOut);
119 * Execute regexp on |input| at |*lastIndex|.
121 * On match: Update |*lastIndex| and RegExp class statics.
122 * Return true if test is true. Place an array in |*rval| if test is false.
123 * On mismatch: Make |*rval| null.
125 bool execute(JSContext *cx, RegExpStatics *res, JSString *input, size_t *lastIndex, bool test,
126 Value *rval) {
127 JS_ASSERT(res);
128 return executeInternal(cx, res, input, lastIndex, test, rval);
131 bool executeNoStatics(JSContext *cx, JSString *input, size_t *lastIndex, bool test,
132 Value *rval) {
133 return executeInternal(cx, NULL, input, lastIndex, test, rval);
136 /* Factories. */
137 static RegExp *create(JSContext *cx, JSString *source, uint32 flags);
138 static RegExp *createFlagged(JSContext *cx, JSString *source, JSString *flags);
140 * Create an object with new regular expression internals.
141 * @note The context's regexp statics flags are OR'd into the provided flags,
142 * so this function is really meant for object creation during code
143 * execution, as opposed to during something like XDR.
145 static JSObject *createObject(JSContext *cx, RegExpStatics *res, const jschar *chars,
146 size_t length, uint32 flags);
147 static JSObject *createObjectNoStatics(JSContext *cx, const jschar *chars, size_t length,
148 uint32 flags);
149 static RegExp *extractFrom(JSObject *obj);
150 static RegExp *clone(JSContext *cx, const RegExp &other);
152 /* Mutators. */
153 void incref(JSContext *cx) { JS_ATOMIC_INCREMENT(&refCount); }
154 void decref(JSContext *cx);
156 /* Accessors. */
157 JSString *getSource() const { return source; }
158 size_t getParenCount() const { return parenCount; }
159 bool ignoreCase() const { return flags & JSREG_FOLD; }
160 bool global() const { return flags & JSREG_GLOB; }
161 bool multiline() const { return flags & JSREG_MULTILINE; }
162 bool sticky() const { return flags & JSREG_STICKY; }
164 const uint32 &getFlags() const { JS_ASSERT((flags & allFlags) == flags); return flags; }
165 uint32 flagCount() const;
168 class RegExpMatchBuilder
170 JSContext * const cx;
171 JSObject * const array;
173 public:
174 RegExpMatchBuilder(JSContext *cx, JSObject *array) : cx(cx), array(array) {}
176 bool append(int index, JSString *str) {
177 JS_ASSERT(str);
178 return append(INT_TO_JSID(index), StringValue(str));
181 bool append(jsid id, Value val) {
182 return !!js_DefineProperty(cx, array, id, &val, js::PropertyStub, js::PropertyStub,
183 JSPROP_ENUMERATE);
186 bool appendIndex(int index) {
187 return append(ATOM_TO_JSID(cx->runtime->atomState.indexAtom), Int32Value(index));
190 /* Sets the input attribute of the match array. */
191 bool appendInput(JSString *str) {
192 JS_ASSERT(str);
193 return append(ATOM_TO_JSID(cx->runtime->atomState.inputAtom), StringValue(str));
197 /* RegExp inlines. */
199 inline bool
200 RegExp::initArena(JSContext *cx)
202 if (cx->regExpPool.first.next)
203 return true;
206 * The regular expression arena pool is special... we want to hang on to it
207 * until a GC is performed so rapid subsequent regexp executions don't
208 * thrash malloc/freeing arena chunks.
210 * Stick a timestamp at the base of that pool.
212 int64 *timestamp;
213 JS_ARENA_ALLOCATE_CAST(timestamp, int64 *, &cx->regExpPool, sizeof *timestamp);
214 if (!timestamp)
215 return false;
216 *timestamp = JS_Now();
217 return true;
220 inline void
221 RegExp::checkMatchPairs(JSString *input, int *buf, size_t matchItemCount)
223 #if DEBUG
224 size_t inputLength = input->length();
225 int largestStartSeen = 0;
226 for (size_t i = 0; i < matchItemCount; i += 2) {
227 int start = buf[i];
228 int limit = buf[i + 1];
229 JS_ASSERT(limit >= start); /* Limit index must be larger than the start index. */
230 if (start == -1)
231 continue;
232 JS_ASSERT(start >= 0);
233 JS_ASSERT(size_t(limit) <= inputLength);
234 /* Test the monotonically increasing nature of left parens. */
235 JS_ASSERT(start >= largestStartSeen);
236 largestStartSeen = start;
238 #endif
241 inline JSObject *
242 RegExp::createResult(JSContext *cx, JSString *input, int *buf, size_t matchItemCount)
245 * Create the result array for a match. Array contents:
246 * 0: matched string
247 * 1..pairCount-1: paren matches
249 JSObject *array = js_NewSlowArrayObject(cx);
250 if (!array)
251 return NULL;
253 RegExpMatchBuilder builder(cx, array);
254 for (size_t i = 0; i < matchItemCount; i += 2) {
255 int start = buf[i];
256 int end = buf[i + 1];
258 JSString *captured;
259 if (start >= 0) {
260 JS_ASSERT(start <= end);
261 JS_ASSERT(unsigned(end) <= input->length());
262 captured = js_NewDependentString(cx, input, start, end - start);
263 if (!(captured && builder.append(i / 2, captured)))
264 return NULL;
265 } else {
266 /* Missing parenthesized match. */
267 JS_ASSERT(i != 0); /* Since we had a match, first pair must be present. */
268 if (!builder.append(INT_TO_JSID(i / 2), UndefinedValue()))
269 return NULL;
273 if (!builder.appendIndex(buf[0]) ||
274 !builder.appendInput(input))
275 return NULL;
277 return array;
280 inline bool
281 RegExp::executeInternal(JSContext *cx, RegExpStatics *res, JSString *input,
282 size_t *lastIndex, bool test, Value *rval)
284 #if !ENABLE_YARR_JIT
285 JS_ASSERT(compiled);
286 #endif
287 const size_t pairCount = parenCount + 1;
288 const size_t bufCount = pairCount * 3; /* Should be x2, but PCRE has... needs. */
289 const size_t matchItemCount = pairCount * 2;
291 if (!initArena(cx))
292 return false;
294 AutoArenaAllocator aaa(&cx->regExpPool);
295 int *buf = aaa.alloc<int>(bufCount);
296 if (!buf)
297 return false;
300 * The JIT regexp procedure doesn't always initialize matchPair values.
301 * Maybe we can make this faster by ensuring it does?
303 for (int *it = buf; it != buf + matchItemCount; ++it)
304 *it = -1;
306 const jschar *chars = input->chars();
307 size_t len = input->length();
310 * inputOffset emulates sticky mode by matching from this offset into the char buf and
311 * subtracting the delta off at the end.
313 size_t inputOffset = 0;
315 if (sticky()) {
316 /* Sticky matches at the last index for the regexp object. */
317 chars += *lastIndex;
318 len -= *lastIndex;
319 inputOffset = *lastIndex;
322 #if ENABLE_YARR_JIT
323 int result = JSC::Yarr::executeRegex(cx, compiled, chars, *lastIndex - inputOffset, len, buf,
324 bufCount);
325 #else
326 int result = jsRegExpExecute(cx, compiled, chars, len, *lastIndex - inputOffset, buf,
327 bufCount) < 0 ? -1 : buf[0];
328 #endif
329 if (result == -1) {
330 *rval = NullValue();
331 return true;
335 * Adjust buf for the inputOffset. Use of sticky is rare and the matchItemCount is small, so
336 * just do another pass.
338 if (JS_UNLIKELY(inputOffset)) {
339 for (size_t i = 0; i < matchItemCount; ++i)
340 buf[i] = buf[i] < 0 ? -1 : buf[i] + inputOffset;
343 /* Make sure the populated contents of |buf| are sane values against |input|. */
344 checkMatchPairs(input, buf, matchItemCount);
346 if (res)
347 res->updateFromMatch(cx, input, buf, matchItemCount);
349 *lastIndex = buf[1];
351 if (test) {
352 *rval = BooleanValue(true);
353 return true;
356 JSObject *array = createResult(cx, input, buf, matchItemCount);
357 if (!array)
358 return false;
360 *rval = ObjectValue(*array);
361 return true;
364 inline RegExp *
365 RegExp::create(JSContext *cx, JSString *source, uint32 flags)
367 RegExp *self;
368 void *mem = cx->malloc(sizeof(*self));
369 if (!mem)
370 return NULL;
371 self = new (mem) RegExp(source, flags);
372 if (!self->compile(cx)) {
373 cx->destroy<RegExp>(self);
374 return NULL;
376 return self;
379 inline JSObject *
380 RegExp::createObject(JSContext *cx, RegExpStatics *res, const jschar *chars, size_t length,
381 uint32 flags)
383 uint32 staticsFlags = res->getFlags();
384 return createObjectNoStatics(cx, chars, length, flags | staticsFlags);
387 inline JSObject *
388 RegExp::createObjectNoStatics(JSContext *cx, const jschar *chars, size_t length, uint32 flags)
390 JS_ASSERT((flags & allFlags) == flags);
391 JSString *str = js_NewStringCopyN(cx, chars, length);
392 if (!str)
393 return NULL;
394 RegExp *re = RegExp::create(cx, str, flags);
395 if (!re)
396 return NULL;
397 JSObject *obj = NewBuiltinClassInstance(cx, &js_RegExpClass);
398 if (!obj) {
399 re->decref(cx);
400 return NULL;
402 obj->setPrivate(re);
403 obj->zeroRegExpLastIndex();
404 return obj;
407 #ifdef ANDROID
408 static bool
409 YarrJITIsBroken(JSContext *cx)
411 #if defined(JS_TRACER) && defined(JS_METHODJIT)
412 /* FIXME/bug 604774: dead code walking.
414 * If both JITs are disabled, assume they were disabled because
415 * we're running on a blacklisted device.
417 return !cx->traceJitEnabled && !cx->methodJitEnabled;
418 #else
419 return false;
420 #endif
422 #endif /* ANDROID */
424 inline bool
425 RegExp::compileHelper(JSContext *cx, UString &pattern)
427 #if ENABLE_YARR_JIT
428 bool fellBack = false;
429 int error = 0;
430 jitCompileRegex(*cx->runtime->regExpAllocator, compiled, pattern, parenCount, error, fellBack, ignoreCase(), multiline()
431 #ifdef ANDROID
432 /* Temporary gross hack to work around buggy kernels. */
433 , YarrJITIsBroken(cx)
434 #endif
436 if (!error)
437 return true;
438 if (fellBack)
439 handlePCREError(cx, error);
440 else
441 handleYarrError(cx, error);
442 return false;
443 #else
444 int error = 0;
445 compiled = jsRegExpCompile(pattern.chars(), pattern.length(),
446 ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase,
447 multiline() ? JSRegExpMultiline : JSRegExpSingleLine,
448 &parenCount, &error);
449 if (!error)
450 return true;
451 handlePCREError(cx, error);
452 return false;
453 #endif
456 inline bool
457 RegExp::compile(JSContext *cx)
459 if (!sticky())
460 return compileHelper(cx, *source);
462 * The sticky case we implement hackily by prepending a caret onto the front
463 * and relying on |::execute| to pseudo-slice the string when it sees a sticky regexp.
465 static const jschar prefix[] = {'^', '(', '?', ':'};
466 static const jschar postfix[] = {')'};
468 JSCharBuffer cb(cx);
469 if (!cb.reserve(JS_ARRAY_LENGTH(prefix) + source->length() + JS_ARRAY_LENGTH(postfix)))
470 return false;
471 JS_ALWAYS_TRUE(cb.append(prefix, JS_ARRAY_LENGTH(prefix)));
472 JS_ALWAYS_TRUE(cb.append(source->chars(), source->length()));
473 JS_ALWAYS_TRUE(cb.append(postfix, JS_ARRAY_LENGTH(postfix)));
475 JSString *fakeySource = js_NewStringFromCharBuffer(cx, cb);
476 if (!fakeySource)
477 return false;
478 return compileHelper(cx, *fakeySource);
481 inline bool
482 RegExp::isMetaChar(jschar c)
484 switch (c) {
485 /* Taken from the PatternCharacter production in 15.10.1. */
486 case '^': case '$': case '\\': case '.': case '*': case '+':
487 case '?': case '(': case ')': case '[': case ']': case '{':
488 case '}': case '|':
489 return true;
490 default:
491 return false;
495 inline bool
496 RegExp::hasMetaChars(const jschar *chars, size_t length)
498 for (size_t i = 0; i < length; ++i) {
499 if (isMetaChar(chars[i]))
500 return true;
502 return false;
505 inline uint32
506 RegExp::flagCount() const
508 uint32 nflags = 0;
509 for (uint32 tmpFlags = flags; tmpFlags != 0; tmpFlags &= tmpFlags - 1)
510 nflags++;
511 return nflags;
514 inline void
515 RegExp::decref(JSContext *cx)
517 if (JS_ATOMIC_DECREMENT(&refCount) == 0)
518 cx->destroy<RegExp>(this);
521 inline RegExp *
522 RegExp::extractFrom(JSObject *obj)
524 JS_ASSERT_IF(obj, obj->isRegExp());
525 return static_cast<RegExp *>(obj->getPrivate());
528 inline RegExp *
529 RegExp::clone(JSContext *cx, const RegExp &other)
531 return create(cx, other.source, other.flags);
534 /* RegExpStatics inlines. */
536 inline RegExpStatics *
537 RegExpStatics::extractFrom(JSObject *global)
539 Value resVal = global->getReservedSlot(JSRESERVED_GLOBAL_REGEXP_STATICS);
540 RegExpStatics *res = static_cast<RegExpStatics *>(resVal.toObject().getPrivate());
541 return res;
544 inline bool
545 RegExpStatics::createDependent(JSContext *cx, size_t start, size_t end, Value *out) const
547 JS_ASSERT(start <= end);
548 JS_ASSERT(end <= matchPairsInput->length());
549 JSString *str = js_NewDependentString(cx, matchPairsInput, start, end - start);
550 if (!str)
551 return false;
552 *out = StringValue(str);
553 return true;
556 inline bool
557 RegExpStatics::createPendingInput(JSContext *cx, Value *out) const
559 out->setString(pendingInput ? pendingInput : cx->runtime->emptyString);
560 return true;
563 inline bool
564 RegExpStatics::makeMatch(JSContext *cx, size_t checkValidIndex, size_t pairNum, Value *out) const
566 if (checkValidIndex / 2 >= pairCount() || matchPairs[checkValidIndex] < 0) {
567 out->setString(cx->runtime->emptyString);
568 return true;
570 return createDependent(cx, get(pairNum, 0), get(pairNum, 1), out);
573 inline bool
574 RegExpStatics::createLastParen(JSContext *cx, Value *out) const
576 if (pairCount() <= 1) {
577 out->setString(cx->runtime->emptyString);
578 return true;
580 size_t num = pairCount() - 1;
581 int start = get(num, 0);
582 int end = get(num, 1);
583 if (start == -1) {
584 out->setString(cx->runtime->emptyString);
585 return true;
587 JS_ASSERT(start >= 0 && end >= 0);
588 JS_ASSERT(end >= start);
589 return createDependent(cx, start, end, out);
592 inline bool
593 RegExpStatics::createLeftContext(JSContext *cx, Value *out) const
595 if (!pairCount()) {
596 out->setString(cx->runtime->emptyString);
597 return true;
599 if (matchPairs[0] < 0) {
600 *out = UndefinedValue();
601 return true;
603 return createDependent(cx, 0, matchPairs[0], out);
606 inline bool
607 RegExpStatics::createRightContext(JSContext *cx, Value *out) const
609 if (!pairCount()) {
610 out->setString(cx->runtime->emptyString);
611 return true;
613 if (matchPairs[1] < 0) {
614 *out = UndefinedValue();
615 return true;
617 return createDependent(cx, matchPairs[1], matchPairsInput->length(), out);
620 inline void
621 RegExpStatics::getParen(size_t pairNum, JSSubString *out) const
623 checkParenNum(pairNum);
624 if (!pairIsPresent(pairNum)) {
625 *out = js_EmptySubString;
626 return;
628 out->chars = matchPairsInput->chars() + getCrash(pairNum, 0);
629 out->length = getParenLength(pairNum);
632 inline void
633 RegExpStatics::getLastMatch(JSSubString *out) const
635 if (!pairCountCrash()) {
636 *out = js_EmptySubString;
637 return;
639 JS_CRASH_UNLESS(matchPairsInput);
640 out->chars = matchPairsInput->chars() + getCrash(0, 0);
641 JS_CRASH_UNLESS(getCrash(0, 1) >= getCrash(0, 0));
642 out->length = get(0, 1) - get(0, 0);
645 inline void
646 RegExpStatics::getLastParen(JSSubString *out) const
648 size_t pairCount = pairCountCrash();
649 /* Note: the first pair is the whole match. */
650 if (pairCount <= 1) {
651 *out = js_EmptySubString;
652 return;
654 getParen(pairCount - 1, out);
657 inline void
658 RegExpStatics::getLeftContext(JSSubString *out) const
660 if (!pairCountCrash()) {
661 *out = js_EmptySubString;
662 return;
664 out->chars = matchPairsInput->chars();
665 out->length = getCrash(0, 0);
668 inline void
669 RegExpStatics::getRightContext(JSSubString *out) const
671 if (!pairCountCrash()) {
672 *out = js_EmptySubString;
673 return;
675 out->chars = matchPairsInput->chars() + getCrash(0, 1);
676 JS_CRASH_UNLESS(get(0, 1) <= int(matchPairsInput->length()));
677 out->length = matchPairsInput->length() - get(0, 1);
682 #endif /* jsregexpinlines_h___ */