Merge mozilla-central and tracemonkey. (a=blockers)
[mozilla-central.git] / js / src / jsregexpinlines.h
blob30c72d9d401bd2701117be0d628b65ee4496381f
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sw=4 et tw=99 ft=cpp:
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
15 * License.
17 * The Original Code is Mozilla SpiderMonkey JavaScript 1.9 code, released
18 * June 12, 2009.
20 * The Initial Developer of the Original Code is
21 * the Mozilla Corporation.
23 * Contributor(s):
24 * Chris Leary <cdleary@mozilla.com>
26 * Alternatively, the contents of this file may be used under the terms of
27 * either of the GNU General Public License Version 2 or later (the "GPL"),
28 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
40 #ifndef jsregexpinlines_h___
41 #define jsregexpinlines_h___
43 #include "jsregexp.h"
44 #include "jscntxt.h"
45 #include "jsobjinlines.h"
46 #include "assembler/wtf/Platform.h"
48 #if ENABLE_YARR_JIT
49 #include "yarr/yarr/RegexJIT.h"
50 #else
51 #include "yarr/pcre/pcre.h"
52 #endif
54 namespace js {
57 * res = RegExp statics.
60 extern Class regexp_statics_class;
62 static inline JSObject *
63 regexp_statics_construct(JSContext *cx, JSObject *parent)
65 JSObject *obj = NewObject<WithProto::Given>(cx, &regexp_statics_class, NULL, parent);
66 if (!obj)
67 return NULL;
68 RegExpStatics *res = cx->create<RegExpStatics>();
69 if (!res)
70 return NULL;
71 obj->setPrivate(static_cast<void *>(res));
72 return obj;
76 * The "meat" of the builtin regular expression objects: it contains the
77 * mini-program that represents the source of the regular expression. Excepting
78 * refcounts, this is an immutable datastructure after compilation.
80 * Non-atomic refcounting is used, so single-thread invariants must be
81 * maintained: we check regexp operations are performed in a single
82 * compartment.
84 * Note: defined in the inlines header to avoid Yarr dependency includes in
85 * main header.
87 * Note: refCount cannot overflow because that would require more referring
88 * regexp objects than there is space for in addressable memory.
90 class RegExp
92 #if ENABLE_YARR_JIT
93 JSC::Yarr::RegexCodeBlock compiled;
94 #else
95 JSRegExp *compiled;
96 #endif
97 JSLinearString *source;
98 size_t refCount;
99 unsigned parenCount; /* Must be |unsigned| to interface with YARR. */
100 uint32 flags;
101 #ifdef DEBUG
102 public:
103 JSCompartment *compartment;
105 private:
106 #endif
108 RegExp(JSLinearString *source, uint32 flags, JSCompartment *compartment)
109 : compiled(), source(source), refCount(1), parenCount(0), flags(flags)
110 #ifdef DEBUG
111 , compartment(compartment)
112 #endif
115 ~RegExp() {
116 #if !ENABLE_YARR_JIT
117 if (compiled)
118 jsRegExpFree(compiled);
119 #endif
122 /* Constructor/destructor are hidden; called by cx->create/destroy. */
123 friend struct ::JSContext;
125 bool compileHelper(JSContext *cx, JSLinearString &pattern);
126 bool compile(JSContext *cx);
127 static const uint32 allFlags = JSREG_FOLD | JSREG_GLOB | JSREG_MULTILINE | JSREG_STICKY;
128 void handlePCREError(JSContext *cx, int error);
129 void handleYarrError(JSContext *cx, int error);
130 static inline bool initArena(JSContext *cx);
131 static inline void checkMatchPairs(JSString *input, int *buf, size_t matchItemCount);
132 static JSObject *createResult(JSContext *cx, JSString *input, int *buf, size_t matchItemCount);
133 inline bool executeInternal(JSContext *cx, RegExpStatics *res, JSString *input,
134 size_t *lastIndex, bool test, Value *rval);
136 public:
137 static inline bool isMetaChar(jschar c);
138 static inline bool hasMetaChars(const jschar *chars, size_t length);
141 * Parse regexp flags. Report an error and return false if an invalid
142 * sequence of flags is encountered (repeat/invalid flag).
144 * N.B. flagStr must be rooted.
146 static bool parseFlags(JSContext *cx, JSString *flagStr, uintN *flagsOut);
149 * Execute regexp on |input| at |*lastIndex|.
151 * On match: Update |*lastIndex| and RegExp class statics.
152 * Return true if test is true. Place an array in |*rval| if test is false.
153 * On mismatch: Make |*rval| null.
155 bool execute(JSContext *cx, RegExpStatics *res, JSString *input, size_t *lastIndex, bool test,
156 Value *rval) {
157 JS_ASSERT(res);
158 return executeInternal(cx, res, input, lastIndex, test, rval);
161 bool executeNoStatics(JSContext *cx, JSString *input, size_t *lastIndex, bool test,
162 Value *rval) {
163 return executeInternal(cx, NULL, input, lastIndex, test, rval);
166 /* Factories */
168 static AlreadyIncRefed<RegExp> create(JSContext *cx, JSString *source, uint32 flags);
170 /* Would overload |create|, but |0| resolves ambiguously against pointer and uint. */
171 static AlreadyIncRefed<RegExp> createFlagged(JSContext *cx, JSString *source, JSString *flags);
174 * Create an object with new regular expression internals.
175 * @note The context's regexp statics flags are OR'd into the provided flags,
176 * so this function is really meant for object creation during code
177 * execution, as opposed to during something like XDR.
179 static JSObject *createObject(JSContext *cx, RegExpStatics *res, const jschar *chars,
180 size_t length, uint32 flags);
181 static JSObject *createObjectNoStatics(JSContext *cx, const jschar *chars, size_t length,
182 uint32 flags);
183 static RegExp *extractFrom(JSObject *obj);
185 /* Mutators */
187 void incref(JSContext *cx);
188 void decref(JSContext *cx);
190 /* Accessors */
192 JSLinearString *getSource() const { return source; }
193 size_t getParenCount() const { return parenCount; }
194 bool ignoreCase() const { return flags & JSREG_FOLD; }
195 bool global() const { return flags & JSREG_GLOB; }
196 bool multiline() const { return flags & JSREG_MULTILINE; }
197 bool sticky() const { return flags & JSREG_STICKY; }
199 const uint32 &getFlags() const {
200 JS_ASSERT((flags & allFlags) == flags);
201 return flags;
205 class RegExpMatchBuilder
207 JSContext * const cx;
208 JSObject * const array;
210 public:
211 RegExpMatchBuilder(JSContext *cx, JSObject *array) : cx(cx), array(array) {}
213 bool append(int index, JSString *str) {
214 JS_ASSERT(str);
215 return append(INT_TO_JSID(index), StringValue(str));
218 bool append(jsid id, Value val) {
219 return !!js_DefineProperty(cx, array, id, &val, js::PropertyStub, js::StrictPropertyStub,
220 JSPROP_ENUMERATE);
223 bool appendIndex(int index) {
224 return append(ATOM_TO_JSID(cx->runtime->atomState.indexAtom), Int32Value(index));
227 /* Sets the input attribute of the match array. */
228 bool appendInput(JSString *str) {
229 JS_ASSERT(str);
230 return append(ATOM_TO_JSID(cx->runtime->atomState.inputAtom), StringValue(str));
234 /* RegExp inlines. */
236 inline bool
237 RegExp::initArena(JSContext *cx)
239 if (cx->regExpPool.first.next)
240 return true;
243 * The regular expression arena pool is special... we want to hang on to it
244 * until a GC is performed so rapid subsequent regexp executions don't
245 * thrash malloc/freeing arena chunks.
247 * Stick a timestamp at the base of that pool.
249 int64 *timestamp;
250 JS_ARENA_ALLOCATE_CAST(timestamp, int64 *, &cx->regExpPool, sizeof *timestamp);
251 if (!timestamp)
252 return false;
253 *timestamp = JS_Now();
254 return true;
257 inline void
258 RegExp::checkMatchPairs(JSString *input, int *buf, size_t matchItemCount)
260 #if DEBUG
261 size_t inputLength = input->length();
262 for (size_t i = 0; i < matchItemCount; i += 2) {
263 int start = buf[i];
264 int limit = buf[i + 1];
265 JS_ASSERT(limit >= start); /* Limit index must be larger than the start index. */
266 if (start == -1)
267 continue;
268 JS_ASSERT(start >= 0);
269 JS_ASSERT(size_t(limit) <= inputLength);
271 #endif
274 inline JSObject *
275 RegExp::createResult(JSContext *cx, JSString *input, int *buf, size_t matchItemCount)
278 * Create the result array for a match. Array contents:
279 * 0: matched string
280 * 1..pairCount-1: paren matches
282 JSObject *array = NewSlowEmptyArray(cx);
283 if (!array)
284 return NULL;
286 RegExpMatchBuilder builder(cx, array);
287 for (size_t i = 0; i < matchItemCount; i += 2) {
288 int start = buf[i];
289 int end = buf[i + 1];
291 JSString *captured;
292 if (start >= 0) {
293 JS_ASSERT(start <= end);
294 JS_ASSERT(unsigned(end) <= input->length());
295 captured = js_NewDependentString(cx, input, start, end - start);
296 if (!(captured && builder.append(i / 2, captured)))
297 return NULL;
298 } else {
299 /* Missing parenthesized match. */
300 JS_ASSERT(i != 0); /* Since we had a match, first pair must be present. */
301 if (!builder.append(INT_TO_JSID(i / 2), UndefinedValue()))
302 return NULL;
306 if (!builder.appendIndex(buf[0]) ||
307 !builder.appendInput(input))
308 return NULL;
310 return array;
313 inline bool
314 RegExp::executeInternal(JSContext *cx, RegExpStatics *res, JSString *inputstr,
315 size_t *lastIndex, bool test, Value *rval)
317 #if !ENABLE_YARR_JIT
318 JS_ASSERT(compiled);
319 #endif
320 const size_t pairCount = parenCount + 1;
321 const size_t bufCount = pairCount * 3; /* Should be x2, but PCRE has... needs. */
322 const size_t matchItemCount = pairCount * 2;
324 if (!initArena(cx))
325 return false;
327 AutoArenaAllocator aaa(&cx->regExpPool);
328 int *buf = aaa.alloc<int>(bufCount);
329 if (!buf)
330 return false;
333 * The JIT regexp procedure doesn't always initialize matchPair values.
334 * Maybe we can make this faster by ensuring it does?
336 for (int *it = buf; it != buf + matchItemCount; ++it)
337 *it = -1;
339 JSLinearString *input = inputstr->ensureLinear(cx);
340 if (!input)
341 return false;
343 size_t len = input->length();
344 const jschar *chars = input->chars();
347 * inputOffset emulates sticky mode by matching from this offset into the char buf and
348 * subtracting the delta off at the end.
350 size_t inputOffset = 0;
352 if (sticky()) {
353 /* Sticky matches at the last index for the regexp object. */
354 chars += *lastIndex;
355 len -= *lastIndex;
356 inputOffset = *lastIndex;
359 #if ENABLE_YARR_JIT
360 int result = JSC::Yarr::executeRegex(cx, compiled, chars, *lastIndex - inputOffset, len, buf,
361 bufCount);
362 #else
363 int result = jsRegExpExecute(cx, compiled, chars, len, *lastIndex - inputOffset, buf,
364 bufCount);
365 #endif
366 if (result == -1) {
367 *rval = NullValue();
368 return true;
371 if (result < 0) {
372 #if ENABLE_YARR_JIT
373 handleYarrError(cx, result);
374 #else
375 handlePCREError(cx, result);
376 #endif
377 return false;
381 * Adjust buf for the inputOffset. Use of sticky is rare and the matchItemCount is small, so
382 * just do another pass.
384 if (JS_UNLIKELY(inputOffset)) {
385 for (size_t i = 0; i < matchItemCount; ++i)
386 buf[i] = buf[i] < 0 ? -1 : buf[i] + inputOffset;
389 /* Make sure the populated contents of |buf| are sane values against |input|. */
390 checkMatchPairs(input, buf, matchItemCount);
392 if (res)
393 res->updateFromMatch(cx, input, buf, matchItemCount);
395 *lastIndex = buf[1];
397 if (test) {
398 *rval = BooleanValue(true);
399 return true;
402 JSObject *array = createResult(cx, input, buf, matchItemCount);
403 if (!array)
404 return false;
406 *rval = ObjectValue(*array);
407 return true;
410 inline AlreadyIncRefed<RegExp>
411 RegExp::create(JSContext *cx, JSString *source, uint32 flags)
413 typedef AlreadyIncRefed<RegExp> RetType;
414 JSLinearString *flatSource = source->ensureLinear(cx);
415 if (!flatSource)
416 return RetType(NULL);
417 RegExp *self = cx->create<RegExp>(flatSource, flags, cx->compartment);
418 if (!self)
419 return RetType(NULL);
420 if (!self->compile(cx)) {
421 cx->destroy<RegExp>(self);
422 return RetType(NULL);
424 return RetType(self);
427 inline JSObject *
428 RegExp::createObject(JSContext *cx, RegExpStatics *res, const jschar *chars, size_t length,
429 uint32 flags)
431 uint32 staticsFlags = res->getFlags();
432 return createObjectNoStatics(cx, chars, length, flags | staticsFlags);
435 inline JSObject *
436 RegExp::createObjectNoStatics(JSContext *cx, const jschar *chars, size_t length, uint32 flags)
438 JS_ASSERT((flags & allFlags) == flags);
439 JSString *str = js_NewStringCopyN(cx, chars, length);
440 if (!str)
441 return NULL;
442 AlreadyIncRefed<RegExp> re = RegExp::create(cx, str, flags);
443 if (!re)
444 return NULL;
445 JSObject *obj = NewBuiltinClassInstance(cx, &js_RegExpClass);
446 if (!obj) {
447 re->decref(cx);
448 return NULL;
450 obj->setPrivate(re.get());
451 obj->zeroRegExpLastIndex();
452 return obj;
455 #ifdef ANDROID
456 static bool
457 YarrJITIsBroken(JSContext *cx)
459 #if defined(JS_TRACER) && defined(JS_METHODJIT)
460 /* FIXME/bug 604774: dead code walking.
462 * If both JITs are disabled, assume they were disabled because
463 * we're running on a blacklisted device.
465 return !cx->traceJitEnabled && !cx->methodJitEnabled;
466 #else
467 return false;
468 #endif
470 #endif /* ANDROID */
472 inline bool
473 RegExp::compileHelper(JSContext *cx, JSLinearString &pattern)
475 #if ENABLE_YARR_JIT
476 bool fellBack = false;
477 int error = 0;
478 jitCompileRegex(*cx->compartment->regExpAllocator, compiled, pattern, parenCount, error, fellBack, ignoreCase(), multiline()
479 #ifdef ANDROID
480 /* Temporary gross hack to work around buggy kernels. */
481 , YarrJITIsBroken(cx)
482 #endif
484 if (!error)
485 return true;
486 if (fellBack)
487 handlePCREError(cx, error);
488 else
489 handleYarrError(cx, error);
490 return false;
491 #else
492 int error = 0;
493 compiled = jsRegExpCompile(pattern.chars(), pattern.length(),
494 ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase,
495 multiline() ? JSRegExpMultiline : JSRegExpSingleLine,
496 &parenCount, &error);
497 if (!error)
498 return true;
499 handlePCREError(cx, error);
500 return false;
501 #endif
504 inline bool
505 RegExp::compile(JSContext *cx)
507 /* Flatten source early for the rest of compilation. */
508 if (!source->ensureLinear(cx))
509 return false;
511 if (!sticky())
512 return compileHelper(cx, *source);
515 * The sticky case we implement hackily by prepending a caret onto the front
516 * and relying on |::execute| to pseudo-slice the string when it sees a sticky regexp.
518 static const jschar prefix[] = {'^', '(', '?', ':'};
519 static const jschar postfix[] = {')'};
521 StringBuffer sb(cx);
522 if (!sb.reserve(JS_ARRAY_LENGTH(prefix) + source->length() + JS_ARRAY_LENGTH(postfix)))
523 return false;
524 JS_ALWAYS_TRUE(sb.append(prefix, JS_ARRAY_LENGTH(prefix)));
525 JS_ALWAYS_TRUE(sb.append(source->chars(), source->length()));
526 JS_ALWAYS_TRUE(sb.append(postfix, JS_ARRAY_LENGTH(postfix)));
528 JSLinearString *fakeySource = sb.finishString();
529 if (!fakeySource)
530 return false;
531 return compileHelper(cx, *fakeySource);
534 inline bool
535 RegExp::isMetaChar(jschar c)
537 switch (c) {
538 /* Taken from the PatternCharacter production in 15.10.1. */
539 case '^': case '$': case '\\': case '.': case '*': case '+':
540 case '?': case '(': case ')': case '[': case ']': case '{':
541 case '}': case '|':
542 return true;
543 default:
544 return false;
548 inline bool
549 RegExp::hasMetaChars(const jschar *chars, size_t length)
551 for (size_t i = 0; i < length; ++i) {
552 if (isMetaChar(chars[i]))
553 return true;
555 return false;
558 inline void
559 RegExp::incref(JSContext *cx)
561 #ifdef DEBUG
562 assertSameCompartment(cx, compartment);
563 #endif
564 ++refCount;
567 inline void
568 RegExp::decref(JSContext *cx)
570 #ifdef DEBUG
571 assertSameCompartment(cx, compartment);
572 #endif
573 if (--refCount == 0)
574 cx->destroy<RegExp>(this);
577 inline RegExp *
578 RegExp::extractFrom(JSObject *obj)
580 JS_ASSERT_IF(obj, obj->isRegExp());
581 RegExp *re = static_cast<RegExp *>(obj->getPrivate());
582 #ifdef DEBUG
583 if (re)
584 CompartmentChecker::check(obj->getCompartment(), re->compartment);
585 #endif
586 return re;
589 /* RegExpStatics inlines. */
591 inline RegExpStatics *
592 RegExpStatics::extractFrom(JSObject *global)
594 Value resVal = global->getReservedSlot(JSRESERVED_GLOBAL_REGEXP_STATICS);
595 RegExpStatics *res = static_cast<RegExpStatics *>(resVal.toObject().getPrivate());
596 return res;
599 inline bool
600 RegExpStatics::createDependent(JSContext *cx, size_t start, size_t end, Value *out) const
602 JS_ASSERT(start <= end);
603 JS_ASSERT(end <= matchPairsInput->length());
604 JSString *str = js_NewDependentString(cx, matchPairsInput, start, end - start);
605 if (!str)
606 return false;
607 *out = StringValue(str);
608 return true;
611 inline bool
612 RegExpStatics::createPendingInput(JSContext *cx, Value *out) const
614 out->setString(pendingInput ? pendingInput : cx->runtime->emptyString);
615 return true;
618 inline bool
619 RegExpStatics::makeMatch(JSContext *cx, size_t checkValidIndex, size_t pairNum, Value *out) const
621 if (checkValidIndex / 2 >= pairCount() || matchPairs[checkValidIndex] < 0) {
622 out->setString(cx->runtime->emptyString);
623 return true;
625 return createDependent(cx, get(pairNum, 0), get(pairNum, 1), out);
628 inline bool
629 RegExpStatics::createLastParen(JSContext *cx, Value *out) const
631 if (pairCount() <= 1) {
632 out->setString(cx->runtime->emptyString);
633 return true;
635 size_t num = pairCount() - 1;
636 int start = get(num, 0);
637 int end = get(num, 1);
638 if (start == -1) {
639 out->setString(cx->runtime->emptyString);
640 return true;
642 JS_ASSERT(start >= 0 && end >= 0);
643 JS_ASSERT(end >= start);
644 return createDependent(cx, start, end, out);
647 inline bool
648 RegExpStatics::createLeftContext(JSContext *cx, Value *out) const
650 if (!pairCount()) {
651 out->setString(cx->runtime->emptyString);
652 return true;
654 if (matchPairs[0] < 0) {
655 *out = UndefinedValue();
656 return true;
658 return createDependent(cx, 0, matchPairs[0], out);
661 inline bool
662 RegExpStatics::createRightContext(JSContext *cx, Value *out) const
664 if (!pairCount()) {
665 out->setString(cx->runtime->emptyString);
666 return true;
668 if (matchPairs[1] < 0) {
669 *out = UndefinedValue();
670 return true;
672 return createDependent(cx, matchPairs[1], matchPairsInput->length(), out);
675 inline void
676 RegExpStatics::getParen(size_t pairNum, JSSubString *out) const
678 checkParenNum(pairNum);
679 if (!pairIsPresent(pairNum)) {
680 *out = js_EmptySubString;
681 return;
683 out->chars = matchPairsInput->chars() + get(pairNum, 0);
684 out->length = getParenLength(pairNum);
687 inline void
688 RegExpStatics::getLastMatch(JSSubString *out) const
690 if (!pairCount()) {
691 *out = js_EmptySubString;
692 return;
694 JS_ASSERT(matchPairsInput);
695 out->chars = matchPairsInput->chars() + get(0, 0);
696 JS_ASSERT(get(0, 1) >= get(0, 0));
697 out->length = get(0, 1) - get(0, 0);
700 inline void
701 RegExpStatics::getLastParen(JSSubString *out) const
703 size_t pc = pairCount();
704 /* Note: the first pair is the whole match. */
705 if (pc <= 1) {
706 *out = js_EmptySubString;
707 return;
709 getParen(pc - 1, out);
712 inline void
713 RegExpStatics::getLeftContext(JSSubString *out) const
715 if (!pairCount()) {
716 *out = js_EmptySubString;
717 return;
719 out->chars = matchPairsInput->chars();
720 out->length = get(0, 0);
723 inline void
724 RegExpStatics::getRightContext(JSSubString *out) const
726 if (!pairCount()) {
727 *out = js_EmptySubString;
728 return;
730 out->chars = matchPairsInput->chars() + get(0, 1);
731 JS_ASSERT(get(0, 1) <= int(matchPairsInput->length()));
732 out->length = matchPairsInput->length() - get(0, 1);
737 #endif /* jsregexpinlines_h___ */