1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sw=4 et tw=99 ft=cpp:
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
17 * The Original Code is Mozilla SpiderMonkey JavaScript 1.9 code, released
20 * The Initial Developer of the Original Code is
21 * the Mozilla Corporation.
24 * Chris Leary <cdleary@mozilla.com>
26 * Alternatively, the contents of this file may be used under the terms of
27 * either of the GNU General Public License Version 2 or later (the "GPL"),
28 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
40 #ifndef jsregexpinlines_h___
41 #define jsregexpinlines_h___
45 #include "jsobjinlines.h"
46 #include "assembler/wtf/Platform.h"
49 #include "yarr/yarr/RegexJIT.h"
51 #include "yarr/pcre/pcre.h"
57 * res = RegExp statics.
60 extern Class regexp_statics_class
;
62 static inline JSObject
*
63 regexp_statics_construct(JSContext
*cx
, JSObject
*parent
)
65 JSObject
*obj
= NewObject
<WithProto::Given
>(cx
, ®exp_statics_class
, NULL
, parent
);
68 RegExpStatics
*res
= cx
->create
<RegExpStatics
>();
71 obj
->setPrivate(static_cast<void *>(res
));
75 /* Defined in the inlines header to avoid Yarr dependency includes in main header. */
81 JSC::Yarr::RegexCodeBlock compiled
;
88 RegExp(JSString
*source
, uint32 flags
)
89 : refCount(1), source(source
), compiled(), parenCount(0), flags(flags
) {}
90 bool compileHelper(JSContext
*cx
, UString
&pattern
);
91 bool compile(JSContext
*cx
);
92 static const uint32 allFlags
= JSREG_FOLD
| JSREG_GLOB
| JSREG_MULTILINE
| JSREG_STICKY
;
93 void handlePCREError(JSContext
*cx
, int error
);
94 void handleYarrError(JSContext
*cx
, int error
);
95 static inline bool initArena(JSContext
*cx
);
96 static inline void checkMatchPairs(JSString
*input
, int *buf
, size_t matchItemCount
);
97 static JSObject
*createResult(JSContext
*cx
, JSString
*input
, int *buf
, size_t matchItemCount
);
98 inline bool executeInternal(JSContext
*cx
, RegExpStatics
*res
, JSString
*input
,
99 size_t *lastIndex
, bool test
, Value
*rval
);
105 jsRegExpFree(compiled
);
109 static bool isMetaChar(jschar c
);
110 static bool hasMetaChars(const jschar
*chars
, size_t length
);
113 * Parse regexp flags. Report an error and return false if an invalid
114 * sequence of flags is encountered (repeat/invalid flag).
116 static bool parseFlags(JSContext
*cx
, JSString
*flagStr
, uint32
&flagsOut
);
119 * Execute regexp on |input| at |*lastIndex|.
121 * On match: Update |*lastIndex| and RegExp class statics.
122 * Return true if test is true. Place an array in |*rval| if test is false.
123 * On mismatch: Make |*rval| null.
125 bool execute(JSContext
*cx
, RegExpStatics
*res
, JSString
*input
, size_t *lastIndex
, bool test
,
128 return executeInternal(cx
, res
, input
, lastIndex
, test
, rval
);
131 bool executeNoStatics(JSContext
*cx
, JSString
*input
, size_t *lastIndex
, bool test
,
133 return executeInternal(cx
, NULL
, input
, lastIndex
, test
, rval
);
137 static RegExp
*create(JSContext
*cx
, JSString
*source
, uint32 flags
);
138 static RegExp
*createFlagged(JSContext
*cx
, JSString
*source
, JSString
*flags
);
140 * Create an object with new regular expression internals.
141 * @note The context's regexp statics flags are OR'd into the provided flags,
142 * so this function is really meant for object creation during code
143 * execution, as opposed to during something like XDR.
145 static JSObject
*createObject(JSContext
*cx
, RegExpStatics
*res
, const jschar
*chars
,
146 size_t length
, uint32 flags
);
147 static JSObject
*createObjectNoStatics(JSContext
*cx
, const jschar
*chars
, size_t length
,
149 static RegExp
*extractFrom(JSObject
*obj
);
150 static RegExp
*clone(JSContext
*cx
, const RegExp
&other
);
153 void incref(JSContext
*cx
) { JS_ATOMIC_INCREMENT(&refCount
); }
154 void decref(JSContext
*cx
);
157 JSString
*getSource() const { return source
; }
158 size_t getParenCount() const { return parenCount
; }
159 bool ignoreCase() const { return flags
& JSREG_FOLD
; }
160 bool global() const { return flags
& JSREG_GLOB
; }
161 bool multiline() const { return flags
& JSREG_MULTILINE
; }
162 bool sticky() const { return flags
& JSREG_STICKY
; }
164 const uint32
&getFlags() const { JS_ASSERT((flags
& allFlags
) == flags
); return flags
; }
165 uint32
flagCount() const;
168 class RegExpMatchBuilder
170 JSContext
* const cx
;
171 JSObject
* const array
;
174 RegExpMatchBuilder(JSContext
*cx
, JSObject
*array
) : cx(cx
), array(array
) {}
176 bool append(int index
, JSString
*str
) {
178 return append(INT_TO_JSID(index
), StringValue(str
));
181 bool append(jsid id
, Value val
) {
182 return !!js_DefineProperty(cx
, array
, id
, &val
, js::PropertyStub
, js::PropertyStub
,
186 bool appendIndex(int index
) {
187 return append(ATOM_TO_JSID(cx
->runtime
->atomState
.indexAtom
), Int32Value(index
));
190 /* Sets the input attribute of the match array. */
191 bool appendInput(JSString
*str
) {
193 return append(ATOM_TO_JSID(cx
->runtime
->atomState
.inputAtom
), StringValue(str
));
197 /* RegExp inlines. */
200 RegExp::initArena(JSContext
*cx
)
202 if (cx
->regExpPool
.first
.next
)
206 * The regular expression arena pool is special... we want to hang on to it
207 * until a GC is performed so rapid subsequent regexp executions don't
208 * thrash malloc/freeing arena chunks.
210 * Stick a timestamp at the base of that pool.
213 JS_ARENA_ALLOCATE_CAST(timestamp
, int64
*, &cx
->regExpPool
, sizeof *timestamp
);
216 *timestamp
= JS_Now();
221 RegExp::checkMatchPairs(JSString
*input
, int *buf
, size_t matchItemCount
)
224 size_t inputLength
= input
->length();
225 int largestStartSeen
= 0;
226 for (size_t i
= 0; i
< matchItemCount
; i
+= 2) {
228 int limit
= buf
[i
+ 1];
229 JS_ASSERT(limit
>= start
); /* Limit index must be larger than the start index. */
232 JS_ASSERT(start
>= 0);
233 JS_ASSERT(size_t(limit
) <= inputLength
);
234 /* Test the monotonically increasing nature of left parens. */
235 JS_ASSERT(start
>= largestStartSeen
);
236 largestStartSeen
= start
;
242 RegExp::createResult(JSContext
*cx
, JSString
*input
, int *buf
, size_t matchItemCount
)
245 * Create the result array for a match. Array contents:
247 * 1..pairCount-1: paren matches
249 JSObject
*array
= js_NewSlowArrayObject(cx
);
253 RegExpMatchBuilder
builder(cx
, array
);
254 for (size_t i
= 0; i
< matchItemCount
; i
+= 2) {
256 int end
= buf
[i
+ 1];
260 JS_ASSERT(start
<= end
);
261 JS_ASSERT(unsigned(end
) <= input
->length());
262 captured
= js_NewDependentString(cx
, input
, start
, end
- start
);
263 if (!(captured
&& builder
.append(i
/ 2, captured
)))
266 /* Missing parenthesized match. */
267 JS_ASSERT(i
!= 0); /* Since we had a match, first pair must be present. */
268 if (!builder
.append(INT_TO_JSID(i
/ 2), UndefinedValue()))
273 if (!builder
.appendIndex(buf
[0]) ||
274 !builder
.appendInput(input
))
281 RegExp::executeInternal(JSContext
*cx
, RegExpStatics
*res
, JSString
*input
,
282 size_t *lastIndex
, bool test
, Value
*rval
)
287 const size_t pairCount
= parenCount
+ 1;
288 const size_t bufCount
= pairCount
* 3; /* Should be x2, but PCRE has... needs. */
289 const size_t matchItemCount
= pairCount
* 2;
294 AutoArenaAllocator
aaa(&cx
->regExpPool
);
295 int *buf
= aaa
.alloc
<int>(bufCount
);
300 * The JIT regexp procedure doesn't always initialize matchPair values.
301 * Maybe we can make this faster by ensuring it does?
303 for (int *it
= buf
; it
!= buf
+ matchItemCount
; ++it
)
306 const jschar
*chars
= input
->chars();
307 size_t len
= input
->length();
310 * inputOffset emulates sticky mode by matching from this offset into the char buf and
311 * subtracting the delta off at the end.
313 size_t inputOffset
= 0;
316 /* Sticky matches at the last index for the regexp object. */
319 inputOffset
= *lastIndex
;
323 int result
= JSC::Yarr::executeRegex(cx
, compiled
, chars
, *lastIndex
- inputOffset
, len
, buf
,
326 int result
= jsRegExpExecute(cx
, compiled
, chars
, len
, *lastIndex
- inputOffset
, buf
,
327 bufCount
) < 0 ? -1 : buf
[0];
335 * Adjust buf for the inputOffset. Use of sticky is rare and the matchItemCount is small, so
336 * just do another pass.
338 if (JS_UNLIKELY(inputOffset
)) {
339 for (size_t i
= 0; i
< matchItemCount
; ++i
)
340 buf
[i
] = buf
[i
] < 0 ? -1 : buf
[i
] + inputOffset
;
343 /* Make sure the populated contents of |buf| are sane values against |input|. */
344 checkMatchPairs(input
, buf
, matchItemCount
);
347 res
->updateFromMatch(cx
, input
, buf
, matchItemCount
);
352 *rval
= BooleanValue(true);
356 JSObject
*array
= createResult(cx
, input
, buf
, matchItemCount
);
360 *rval
= ObjectValue(*array
);
365 RegExp::create(JSContext
*cx
, JSString
*source
, uint32 flags
)
368 void *mem
= cx
->malloc(sizeof(*self
));
371 self
= new (mem
) RegExp(source
, flags
);
372 if (!self
->compile(cx
)) {
373 cx
->destroy
<RegExp
>(self
);
380 RegExp::createObject(JSContext
*cx
, RegExpStatics
*res
, const jschar
*chars
, size_t length
,
383 uint32 staticsFlags
= res
->getFlags();
384 return createObjectNoStatics(cx
, chars
, length
, flags
| staticsFlags
);
388 RegExp::createObjectNoStatics(JSContext
*cx
, const jschar
*chars
, size_t length
, uint32 flags
)
390 JS_ASSERT((flags
& allFlags
) == flags
);
391 JSString
*str
= js_NewStringCopyN(cx
, chars
, length
);
394 RegExp
*re
= RegExp::create(cx
, str
, flags
);
397 JSObject
*obj
= NewBuiltinClassInstance(cx
, &js_RegExpClass
);
403 obj
->zeroRegExpLastIndex();
409 YarrJITIsBroken(JSContext
*cx
)
411 #if defined(JS_TRACER) && defined(JS_METHODJIT)
412 /* FIXME/bug 604774: dead code walking.
414 * If both JITs are disabled, assume they were disabled because
415 * we're running on a blacklisted device.
417 return !cx
->traceJitEnabled
&& !cx
->methodJitEnabled
;
425 RegExp::compileHelper(JSContext
*cx
, UString
&pattern
)
428 bool fellBack
= false;
430 jitCompileRegex(*cx
->runtime
->regExpAllocator
, compiled
, pattern
, parenCount
, error
, fellBack
, ignoreCase(), multiline()
432 /* Temporary gross hack to work around buggy kernels. */
433 , YarrJITIsBroken(cx
)
439 handlePCREError(cx
, error
);
441 handleYarrError(cx
, error
);
445 compiled
= jsRegExpCompile(pattern
.chars(), pattern
.length(),
446 ignoreCase() ? JSRegExpIgnoreCase
: JSRegExpDoNotIgnoreCase
,
447 multiline() ? JSRegExpMultiline
: JSRegExpSingleLine
,
448 &parenCount
, &error
);
451 handlePCREError(cx
, error
);
457 RegExp::compile(JSContext
*cx
)
460 return compileHelper(cx
, *source
);
462 * The sticky case we implement hackily by prepending a caret onto the front
463 * and relying on |::execute| to pseudo-slice the string when it sees a sticky regexp.
465 static const jschar prefix
[] = {'^', '(', '?', ':'};
466 static const jschar postfix
[] = {')'};
469 if (!cb
.reserve(JS_ARRAY_LENGTH(prefix
) + source
->length() + JS_ARRAY_LENGTH(postfix
)))
471 JS_ALWAYS_TRUE(cb
.append(prefix
, JS_ARRAY_LENGTH(prefix
)));
472 JS_ALWAYS_TRUE(cb
.append(source
->chars(), source
->length()));
473 JS_ALWAYS_TRUE(cb
.append(postfix
, JS_ARRAY_LENGTH(postfix
)));
475 JSString
*fakeySource
= js_NewStringFromCharBuffer(cx
, cb
);
478 return compileHelper(cx
, *fakeySource
);
482 RegExp::isMetaChar(jschar c
)
485 /* Taken from the PatternCharacter production in 15.10.1. */
486 case '^': case '$': case '\\': case '.': case '*': case '+':
487 case '?': case '(': case ')': case '[': case ']': case '{':
496 RegExp::hasMetaChars(const jschar
*chars
, size_t length
)
498 for (size_t i
= 0; i
< length
; ++i
) {
499 if (isMetaChar(chars
[i
]))
506 RegExp::flagCount() const
509 for (uint32 tmpFlags
= flags
; tmpFlags
!= 0; tmpFlags
&= tmpFlags
- 1)
515 RegExp::decref(JSContext
*cx
)
517 if (JS_ATOMIC_DECREMENT(&refCount
) == 0)
518 cx
->destroy
<RegExp
>(this);
522 RegExp::extractFrom(JSObject
*obj
)
524 JS_ASSERT_IF(obj
, obj
->isRegExp());
525 return static_cast<RegExp
*>(obj
->getPrivate());
529 RegExp::clone(JSContext
*cx
, const RegExp
&other
)
531 return create(cx
, other
.source
, other
.flags
);
534 /* RegExpStatics inlines. */
536 inline RegExpStatics
*
537 RegExpStatics::extractFrom(JSObject
*global
)
539 Value resVal
= global
->getReservedSlot(JSRESERVED_GLOBAL_REGEXP_STATICS
);
540 RegExpStatics
*res
= static_cast<RegExpStatics
*>(resVal
.toObject().getPrivate());
545 RegExpStatics::createDependent(JSContext
*cx
, size_t start
, size_t end
, Value
*out
) const
547 JS_ASSERT(start
<= end
);
548 JS_ASSERT(end
<= matchPairsInput
->length());
549 JSString
*str
= js_NewDependentString(cx
, matchPairsInput
, start
, end
- start
);
552 *out
= StringValue(str
);
557 RegExpStatics::createPendingInput(JSContext
*cx
, Value
*out
) const
559 out
->setString(pendingInput
? pendingInput
: cx
->runtime
->emptyString
);
564 RegExpStatics::makeMatch(JSContext
*cx
, size_t checkValidIndex
, size_t pairNum
, Value
*out
) const
566 if (checkValidIndex
/ 2 >= pairCount() || matchPairs
[checkValidIndex
] < 0) {
567 out
->setString(cx
->runtime
->emptyString
);
570 return createDependent(cx
, get(pairNum
, 0), get(pairNum
, 1), out
);
574 RegExpStatics::createLastParen(JSContext
*cx
, Value
*out
) const
576 if (pairCount() <= 1) {
577 out
->setString(cx
->runtime
->emptyString
);
580 size_t num
= pairCount() - 1;
581 int start
= get(num
, 0);
582 int end
= get(num
, 1);
584 out
->setString(cx
->runtime
->emptyString
);
587 JS_ASSERT(start
>= 0 && end
>= 0);
588 JS_ASSERT(end
>= start
);
589 return createDependent(cx
, start
, end
, out
);
593 RegExpStatics::createLeftContext(JSContext
*cx
, Value
*out
) const
596 out
->setString(cx
->runtime
->emptyString
);
599 if (matchPairs
[0] < 0) {
600 *out
= UndefinedValue();
603 return createDependent(cx
, 0, matchPairs
[0], out
);
607 RegExpStatics::createRightContext(JSContext
*cx
, Value
*out
) const
610 out
->setString(cx
->runtime
->emptyString
);
613 if (matchPairs
[1] < 0) {
614 *out
= UndefinedValue();
617 return createDependent(cx
, matchPairs
[1], matchPairsInput
->length(), out
);
621 RegExpStatics::getParen(size_t pairNum
, JSSubString
*out
) const
623 checkParenNum(pairNum
);
624 if (!pairIsPresent(pairNum
)) {
625 *out
= js_EmptySubString
;
628 out
->chars
= matchPairsInput
->chars() + getCrash(pairNum
, 0);
629 out
->length
= getParenLength(pairNum
);
633 RegExpStatics::getLastMatch(JSSubString
*out
) const
635 if (!pairCountCrash()) {
636 *out
= js_EmptySubString
;
639 JS_CRASH_UNLESS(matchPairsInput
);
640 out
->chars
= matchPairsInput
->chars() + getCrash(0, 0);
641 JS_CRASH_UNLESS(getCrash(0, 1) >= getCrash(0, 0));
642 out
->length
= get(0, 1) - get(0, 0);
646 RegExpStatics::getLastParen(JSSubString
*out
) const
648 size_t pairCount
= pairCountCrash();
649 /* Note: the first pair is the whole match. */
650 if (pairCount
<= 1) {
651 *out
= js_EmptySubString
;
654 getParen(pairCount
- 1, out
);
658 RegExpStatics::getLeftContext(JSSubString
*out
) const
660 if (!pairCountCrash()) {
661 *out
= js_EmptySubString
;
664 out
->chars
= matchPairsInput
->chars();
665 out
->length
= getCrash(0, 0);
669 RegExpStatics::getRightContext(JSSubString
*out
) const
671 if (!pairCountCrash()) {
672 *out
= js_EmptySubString
;
675 out
->chars
= matchPairsInput
->chars() + getCrash(0, 1);
676 JS_CRASH_UNLESS(get(0, 1) <= int(matchPairsInput
->length()));
677 out
->length
= matchPairsInput
->length() - get(0, 1);
682 #endif /* jsregexpinlines_h___ */