1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sw=4 et tw=99 ft=cpp:
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
17 * The Original Code is Mozilla SpiderMonkey JavaScript 1.9 code, released
20 * The Initial Developer of the Original Code is
21 * the Mozilla Corporation.
24 * Chris Leary <cdleary@mozilla.com>
26 * Alternatively, the contents of this file may be used under the terms of
27 * either of the GNU General Public License Version 2 or later (the "GPL"),
28 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
40 #ifndef jsregexpinlines_h___
41 #define jsregexpinlines_h___
45 #include "jsobjinlines.h"
46 #include "assembler/wtf/Platform.h"
49 #include "yarr/yarr/RegexJIT.h"
51 #include "yarr/pcre/pcre.h"
57 * res = RegExp statics.
60 extern Class regexp_statics_class
;
62 static inline JSObject
*
63 regexp_statics_construct(JSContext
*cx
, JSObject
*parent
)
65 JSObject
*obj
= NewObject
<WithProto::Given
>(cx
, ®exp_statics_class
, NULL
, parent
);
68 RegExpStatics
*res
= cx
->create
<RegExpStatics
>();
71 obj
->setPrivate(static_cast<void *>(res
));
76 * The "meat" of the builtin regular expression objects: it contains the
77 * mini-program that represents the source of the regular expression. Excepting
78 * refcounts, this is an immutable datastructure after compilation.
80 * Non-atomic refcounting is used, so single-thread invariants must be
81 * maintained: we check regexp operations are performed in a single
84 * Note: defined in the inlines header to avoid Yarr dependency includes in
87 * Note: refCount cannot overflow because that would require more referring
88 * regexp objects than there is space for in addressable memory.
93 JSC::Yarr::RegexCodeBlock compiled
;
97 JSLinearString
*source
;
99 unsigned parenCount
; /* Must be |unsigned| to interface with YARR. */
103 JSCompartment
*compartment
;
108 RegExp(JSLinearString
*source
, uint32 flags
, JSCompartment
*compartment
)
109 : compiled(), source(source
), refCount(1), parenCount(0), flags(flags
)
111 , compartment(compartment
)
118 jsRegExpFree(compiled
);
122 /* Constructor/destructor are hidden; called by cx->create/destroy. */
123 friend struct ::JSContext
;
125 bool compileHelper(JSContext
*cx
, JSLinearString
&pattern
);
126 bool compile(JSContext
*cx
);
127 static const uint32 allFlags
= JSREG_FOLD
| JSREG_GLOB
| JSREG_MULTILINE
| JSREG_STICKY
;
128 void handlePCREError(JSContext
*cx
, int error
);
129 void handleYarrError(JSContext
*cx
, int error
);
130 static inline bool initArena(JSContext
*cx
);
131 static inline void checkMatchPairs(JSString
*input
, int *buf
, size_t matchItemCount
);
132 static JSObject
*createResult(JSContext
*cx
, JSString
*input
, int *buf
, size_t matchItemCount
);
133 inline bool executeInternal(JSContext
*cx
, RegExpStatics
*res
, JSString
*input
,
134 size_t *lastIndex
, bool test
, Value
*rval
);
137 static inline bool isMetaChar(jschar c
);
138 static inline bool hasMetaChars(const jschar
*chars
, size_t length
);
141 * Parse regexp flags. Report an error and return false if an invalid
142 * sequence of flags is encountered (repeat/invalid flag).
144 * N.B. flagStr must be rooted.
146 static bool parseFlags(JSContext
*cx
, JSString
*flagStr
, uintN
*flagsOut
);
149 * Execute regexp on |input| at |*lastIndex|.
151 * On match: Update |*lastIndex| and RegExp class statics.
152 * Return true if test is true. Place an array in |*rval| if test is false.
153 * On mismatch: Make |*rval| null.
155 bool execute(JSContext
*cx
, RegExpStatics
*res
, JSString
*input
, size_t *lastIndex
, bool test
,
158 return executeInternal(cx
, res
, input
, lastIndex
, test
, rval
);
161 bool executeNoStatics(JSContext
*cx
, JSString
*input
, size_t *lastIndex
, bool test
,
163 return executeInternal(cx
, NULL
, input
, lastIndex
, test
, rval
);
168 static AlreadyIncRefed
<RegExp
> create(JSContext
*cx
, JSString
*source
, uint32 flags
);
170 /* Would overload |create|, but |0| resolves ambiguously against pointer and uint. */
171 static AlreadyIncRefed
<RegExp
> createFlagged(JSContext
*cx
, JSString
*source
, JSString
*flags
);
174 * Create an object with new regular expression internals.
175 * @note The context's regexp statics flags are OR'd into the provided flags,
176 * so this function is really meant for object creation during code
177 * execution, as opposed to during something like XDR.
179 static JSObject
*createObject(JSContext
*cx
, RegExpStatics
*res
, const jschar
*chars
,
180 size_t length
, uint32 flags
);
181 static JSObject
*createObjectNoStatics(JSContext
*cx
, const jschar
*chars
, size_t length
,
183 static RegExp
*extractFrom(JSObject
*obj
);
187 void incref(JSContext
*cx
);
188 void decref(JSContext
*cx
);
192 JSLinearString
*getSource() const { return source
; }
193 size_t getParenCount() const { return parenCount
; }
194 bool ignoreCase() const { return flags
& JSREG_FOLD
; }
195 bool global() const { return flags
& JSREG_GLOB
; }
196 bool multiline() const { return flags
& JSREG_MULTILINE
; }
197 bool sticky() const { return flags
& JSREG_STICKY
; }
199 const uint32
&getFlags() const {
200 JS_ASSERT((flags
& allFlags
) == flags
);
205 class RegExpMatchBuilder
207 JSContext
* const cx
;
208 JSObject
* const array
;
211 RegExpMatchBuilder(JSContext
*cx
, JSObject
*array
) : cx(cx
), array(array
) {}
213 bool append(int index
, JSString
*str
) {
215 return append(INT_TO_JSID(index
), StringValue(str
));
218 bool append(jsid id
, Value val
) {
219 return !!js_DefineProperty(cx
, array
, id
, &val
, js::PropertyStub
, js::StrictPropertyStub
,
223 bool appendIndex(int index
) {
224 return append(ATOM_TO_JSID(cx
->runtime
->atomState
.indexAtom
), Int32Value(index
));
227 /* Sets the input attribute of the match array. */
228 bool appendInput(JSString
*str
) {
230 return append(ATOM_TO_JSID(cx
->runtime
->atomState
.inputAtom
), StringValue(str
));
234 /* RegExp inlines. */
237 RegExp::initArena(JSContext
*cx
)
239 if (cx
->regExpPool
.first
.next
)
243 * The regular expression arena pool is special... we want to hang on to it
244 * until a GC is performed so rapid subsequent regexp executions don't
245 * thrash malloc/freeing arena chunks.
247 * Stick a timestamp at the base of that pool.
250 JS_ARENA_ALLOCATE_CAST(timestamp
, int64
*, &cx
->regExpPool
, sizeof *timestamp
);
253 *timestamp
= JS_Now();
258 RegExp::checkMatchPairs(JSString
*input
, int *buf
, size_t matchItemCount
)
261 size_t inputLength
= input
->length();
262 for (size_t i
= 0; i
< matchItemCount
; i
+= 2) {
264 int limit
= buf
[i
+ 1];
265 JS_ASSERT(limit
>= start
); /* Limit index must be larger than the start index. */
268 JS_ASSERT(start
>= 0);
269 JS_ASSERT(size_t(limit
) <= inputLength
);
275 RegExp::createResult(JSContext
*cx
, JSString
*input
, int *buf
, size_t matchItemCount
)
278 * Create the result array for a match. Array contents:
280 * 1..pairCount-1: paren matches
282 JSObject
*array
= NewSlowEmptyArray(cx
);
286 RegExpMatchBuilder
builder(cx
, array
);
287 for (size_t i
= 0; i
< matchItemCount
; i
+= 2) {
289 int end
= buf
[i
+ 1];
293 JS_ASSERT(start
<= end
);
294 JS_ASSERT(unsigned(end
) <= input
->length());
295 captured
= js_NewDependentString(cx
, input
, start
, end
- start
);
296 if (!(captured
&& builder
.append(i
/ 2, captured
)))
299 /* Missing parenthesized match. */
300 JS_ASSERT(i
!= 0); /* Since we had a match, first pair must be present. */
301 if (!builder
.append(INT_TO_JSID(i
/ 2), UndefinedValue()))
306 if (!builder
.appendIndex(buf
[0]) ||
307 !builder
.appendInput(input
))
314 RegExp::executeInternal(JSContext
*cx
, RegExpStatics
*res
, JSString
*inputstr
,
315 size_t *lastIndex
, bool test
, Value
*rval
)
320 const size_t pairCount
= parenCount
+ 1;
321 const size_t bufCount
= pairCount
* 3; /* Should be x2, but PCRE has... needs. */
322 const size_t matchItemCount
= pairCount
* 2;
327 AutoArenaAllocator
aaa(&cx
->regExpPool
);
328 int *buf
= aaa
.alloc
<int>(bufCount
);
333 * The JIT regexp procedure doesn't always initialize matchPair values.
334 * Maybe we can make this faster by ensuring it does?
336 for (int *it
= buf
; it
!= buf
+ matchItemCount
; ++it
)
339 JSLinearString
*input
= inputstr
->ensureLinear(cx
);
343 size_t len
= input
->length();
344 const jschar
*chars
= input
->chars();
347 * inputOffset emulates sticky mode by matching from this offset into the char buf and
348 * subtracting the delta off at the end.
350 size_t inputOffset
= 0;
353 /* Sticky matches at the last index for the regexp object. */
356 inputOffset
= *lastIndex
;
360 int result
= JSC::Yarr::executeRegex(cx
, compiled
, chars
, *lastIndex
- inputOffset
, len
, buf
,
363 int result
= jsRegExpExecute(cx
, compiled
, chars
, len
, *lastIndex
- inputOffset
, buf
,
373 handleYarrError(cx
, result
);
375 handlePCREError(cx
, result
);
381 * Adjust buf for the inputOffset. Use of sticky is rare and the matchItemCount is small, so
382 * just do another pass.
384 if (JS_UNLIKELY(inputOffset
)) {
385 for (size_t i
= 0; i
< matchItemCount
; ++i
)
386 buf
[i
] = buf
[i
] < 0 ? -1 : buf
[i
] + inputOffset
;
389 /* Make sure the populated contents of |buf| are sane values against |input|. */
390 checkMatchPairs(input
, buf
, matchItemCount
);
393 res
->updateFromMatch(cx
, input
, buf
, matchItemCount
);
398 *rval
= BooleanValue(true);
402 JSObject
*array
= createResult(cx
, input
, buf
, matchItemCount
);
406 *rval
= ObjectValue(*array
);
410 inline AlreadyIncRefed
<RegExp
>
411 RegExp::create(JSContext
*cx
, JSString
*source
, uint32 flags
)
413 typedef AlreadyIncRefed
<RegExp
> RetType
;
414 JSLinearString
*flatSource
= source
->ensureLinear(cx
);
416 return RetType(NULL
);
417 RegExp
*self
= cx
->create
<RegExp
>(flatSource
, flags
, cx
->compartment
);
419 return RetType(NULL
);
420 if (!self
->compile(cx
)) {
421 cx
->destroy
<RegExp
>(self
);
422 return RetType(NULL
);
424 return RetType(self
);
428 RegExp::createObject(JSContext
*cx
, RegExpStatics
*res
, const jschar
*chars
, size_t length
,
431 uint32 staticsFlags
= res
->getFlags();
432 return createObjectNoStatics(cx
, chars
, length
, flags
| staticsFlags
);
436 RegExp::createObjectNoStatics(JSContext
*cx
, const jschar
*chars
, size_t length
, uint32 flags
)
438 JS_ASSERT((flags
& allFlags
) == flags
);
439 JSString
*str
= js_NewStringCopyN(cx
, chars
, length
);
442 AlreadyIncRefed
<RegExp
> re
= RegExp::create(cx
, str
, flags
);
445 JSObject
*obj
= NewBuiltinClassInstance(cx
, &js_RegExpClass
);
450 obj
->setPrivate(re
.get());
451 obj
->zeroRegExpLastIndex();
457 YarrJITIsBroken(JSContext
*cx
)
459 #if defined(JS_TRACER) && defined(JS_METHODJIT)
460 /* FIXME/bug 604774: dead code walking.
462 * If both JITs are disabled, assume they were disabled because
463 * we're running on a blacklisted device.
465 return !cx
->traceJitEnabled
&& !cx
->methodJitEnabled
;
473 RegExp::compileHelper(JSContext
*cx
, JSLinearString
&pattern
)
476 bool fellBack
= false;
478 jitCompileRegex(*cx
->compartment
->regExpAllocator
, compiled
, pattern
, parenCount
, error
, fellBack
, ignoreCase(), multiline()
480 /* Temporary gross hack to work around buggy kernels. */
481 , YarrJITIsBroken(cx
)
487 handlePCREError(cx
, error
);
489 handleYarrError(cx
, error
);
493 compiled
= jsRegExpCompile(pattern
.chars(), pattern
.length(),
494 ignoreCase() ? JSRegExpIgnoreCase
: JSRegExpDoNotIgnoreCase
,
495 multiline() ? JSRegExpMultiline
: JSRegExpSingleLine
,
496 &parenCount
, &error
);
499 handlePCREError(cx
, error
);
505 RegExp::compile(JSContext
*cx
)
507 /* Flatten source early for the rest of compilation. */
508 if (!source
->ensureLinear(cx
))
512 return compileHelper(cx
, *source
);
515 * The sticky case we implement hackily by prepending a caret onto the front
516 * and relying on |::execute| to pseudo-slice the string when it sees a sticky regexp.
518 static const jschar prefix
[] = {'^', '(', '?', ':'};
519 static const jschar postfix
[] = {')'};
522 if (!sb
.reserve(JS_ARRAY_LENGTH(prefix
) + source
->length() + JS_ARRAY_LENGTH(postfix
)))
524 JS_ALWAYS_TRUE(sb
.append(prefix
, JS_ARRAY_LENGTH(prefix
)));
525 JS_ALWAYS_TRUE(sb
.append(source
->chars(), source
->length()));
526 JS_ALWAYS_TRUE(sb
.append(postfix
, JS_ARRAY_LENGTH(postfix
)));
528 JSLinearString
*fakeySource
= sb
.finishString();
531 return compileHelper(cx
, *fakeySource
);
535 RegExp::isMetaChar(jschar c
)
538 /* Taken from the PatternCharacter production in 15.10.1. */
539 case '^': case '$': case '\\': case '.': case '*': case '+':
540 case '?': case '(': case ')': case '[': case ']': case '{':
549 RegExp::hasMetaChars(const jschar
*chars
, size_t length
)
551 for (size_t i
= 0; i
< length
; ++i
) {
552 if (isMetaChar(chars
[i
]))
559 RegExp::incref(JSContext
*cx
)
562 assertSameCompartment(cx
, compartment
);
568 RegExp::decref(JSContext
*cx
)
571 assertSameCompartment(cx
, compartment
);
574 cx
->destroy
<RegExp
>(this);
578 RegExp::extractFrom(JSObject
*obj
)
580 JS_ASSERT_IF(obj
, obj
->isRegExp());
581 RegExp
*re
= static_cast<RegExp
*>(obj
->getPrivate());
584 CompartmentChecker::check(obj
->getCompartment(), re
->compartment
);
589 /* RegExpStatics inlines. */
591 inline RegExpStatics
*
592 RegExpStatics::extractFrom(JSObject
*global
)
594 Value resVal
= global
->getReservedSlot(JSRESERVED_GLOBAL_REGEXP_STATICS
);
595 RegExpStatics
*res
= static_cast<RegExpStatics
*>(resVal
.toObject().getPrivate());
600 RegExpStatics::createDependent(JSContext
*cx
, size_t start
, size_t end
, Value
*out
) const
602 JS_ASSERT(start
<= end
);
603 JS_ASSERT(end
<= matchPairsInput
->length());
604 JSString
*str
= js_NewDependentString(cx
, matchPairsInput
, start
, end
- start
);
607 *out
= StringValue(str
);
612 RegExpStatics::createPendingInput(JSContext
*cx
, Value
*out
) const
614 out
->setString(pendingInput
? pendingInput
: cx
->runtime
->emptyString
);
619 RegExpStatics::makeMatch(JSContext
*cx
, size_t checkValidIndex
, size_t pairNum
, Value
*out
) const
621 if (checkValidIndex
/ 2 >= pairCount() || matchPairs
[checkValidIndex
] < 0) {
622 out
->setString(cx
->runtime
->emptyString
);
625 return createDependent(cx
, get(pairNum
, 0), get(pairNum
, 1), out
);
629 RegExpStatics::createLastParen(JSContext
*cx
, Value
*out
) const
631 if (pairCount() <= 1) {
632 out
->setString(cx
->runtime
->emptyString
);
635 size_t num
= pairCount() - 1;
636 int start
= get(num
, 0);
637 int end
= get(num
, 1);
639 out
->setString(cx
->runtime
->emptyString
);
642 JS_ASSERT(start
>= 0 && end
>= 0);
643 JS_ASSERT(end
>= start
);
644 return createDependent(cx
, start
, end
, out
);
648 RegExpStatics::createLeftContext(JSContext
*cx
, Value
*out
) const
651 out
->setString(cx
->runtime
->emptyString
);
654 if (matchPairs
[0] < 0) {
655 *out
= UndefinedValue();
658 return createDependent(cx
, 0, matchPairs
[0], out
);
662 RegExpStatics::createRightContext(JSContext
*cx
, Value
*out
) const
665 out
->setString(cx
->runtime
->emptyString
);
668 if (matchPairs
[1] < 0) {
669 *out
= UndefinedValue();
672 return createDependent(cx
, matchPairs
[1], matchPairsInput
->length(), out
);
676 RegExpStatics::getParen(size_t pairNum
, JSSubString
*out
) const
678 checkParenNum(pairNum
);
679 if (!pairIsPresent(pairNum
)) {
680 *out
= js_EmptySubString
;
683 out
->chars
= matchPairsInput
->chars() + get(pairNum
, 0);
684 out
->length
= getParenLength(pairNum
);
688 RegExpStatics::getLastMatch(JSSubString
*out
) const
691 *out
= js_EmptySubString
;
694 JS_ASSERT(matchPairsInput
);
695 out
->chars
= matchPairsInput
->chars() + get(0, 0);
696 JS_ASSERT(get(0, 1) >= get(0, 0));
697 out
->length
= get(0, 1) - get(0, 0);
701 RegExpStatics::getLastParen(JSSubString
*out
) const
703 size_t pc
= pairCount();
704 /* Note: the first pair is the whole match. */
706 *out
= js_EmptySubString
;
709 getParen(pc
- 1, out
);
713 RegExpStatics::getLeftContext(JSSubString
*out
) const
716 *out
= js_EmptySubString
;
719 out
->chars
= matchPairsInput
->chars();
720 out
->length
= get(0, 0);
724 RegExpStatics::getRightContext(JSSubString
*out
) const
727 *out
= js_EmptySubString
;
730 out
->chars
= matchPairsInput
->chars() + get(0, 1);
731 JS_ASSERT(get(0, 1) <= int(matchPairsInput
->length()));
732 out
->length
= matchPairsInput
->length() - get(0, 1);
737 #endif /* jsregexpinlines_h___ */