1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "vm/RegExpObject.h"
9 #include "mozilla/MemoryReporting.h"
10 #include "mozilla/PodOperations.h"
12 #include <type_traits>
14 #include "builtin/RegExp.h"
15 #include "builtin/SelfHostingDefines.h" // REGEXP_*_FLAG
16 #include "frontend/FrontendContext.h" // AutoReportFrontendContext
17 #include "frontend/TokenStream.h"
18 #include "gc/HashUtil.h"
19 #include "irregexp/RegExpAPI.h"
20 #include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_*
21 #include "js/friend/StackLimits.h" // js::ReportOverRecursed
22 #include "js/Object.h" // JS::GetBuiltinClass
23 #include "js/Printer.h" // js::GenericPrinter
24 #include "js/RegExp.h"
25 #include "js/RegExpFlags.h" // JS::RegExpFlags
26 #include "util/StringBuffer.h"
27 #include "util/Unicode.h"
28 #include "vm/JSONPrinter.h" // js::JSONPrinter
29 #include "vm/MatchPairs.h"
30 #include "vm/PlainObject.h"
31 #include "vm/RegExpStatics.h"
32 #include "vm/StringType.h"
34 #include "vm/JSContext-inl.h"
35 #include "vm/JSObject-inl.h"
36 #include "vm/NativeObject-inl.h"
37 #include "vm/Shape-inl.h"
41 using JS::AutoStableStringChars
;
42 using JS::CompileOptions
;
44 using JS::RegExpFlags
;
45 using mozilla::DebugOnly
;
46 using mozilla::PodCopy
;
48 using JS::AutoCheckCannotGC
;
50 static_assert(RegExpFlag::HasIndices
== REGEXP_HASINDICES_FLAG
,
51 "self-hosted JS and /d flag bits must agree");
52 static_assert(RegExpFlag::Global
== REGEXP_GLOBAL_FLAG
,
53 "self-hosted JS and /g flag bits must agree");
54 static_assert(RegExpFlag::IgnoreCase
== REGEXP_IGNORECASE_FLAG
,
55 "self-hosted JS and /i flag bits must agree");
56 static_assert(RegExpFlag::Multiline
== REGEXP_MULTILINE_FLAG
,
57 "self-hosted JS and /m flag bits must agree");
58 static_assert(RegExpFlag::DotAll
== REGEXP_DOTALL_FLAG
,
59 "self-hosted JS and /s flag bits must agree");
60 static_assert(RegExpFlag::Unicode
== REGEXP_UNICODE_FLAG
,
61 "self-hosted JS and /u flag bits must agree");
62 static_assert(RegExpFlag::UnicodeSets
== REGEXP_UNICODESETS_FLAG
,
63 "self-hosted JS and /v flag bits must agree");
64 static_assert(RegExpFlag::Sticky
== REGEXP_STICKY_FLAG
,
65 "self-hosted JS and /y flag bits must agree");
67 RegExpObject
* js::RegExpAlloc(JSContext
* cx
, NewObjectKind newKind
,
68 HandleObject proto
/* = nullptr */) {
69 Rooted
<RegExpObject
*> regexp(
70 cx
, NewObjectWithClassProtoAndKind
<RegExpObject
>(cx
, proto
, newKind
));
75 if (!SharedShape::ensureInitialCustomShape
<RegExpObject
>(cx
, regexp
)) {
79 MOZ_ASSERT(regexp
->lookupPure(cx
->names().lastIndex
)->slot() ==
80 RegExpObject::lastIndexSlot());
87 bool VectorMatchPairs::initArrayFrom(VectorMatchPairs
& copyFrom
) {
88 MOZ_ASSERT(copyFrom
.pairCount() > 0);
90 if (!allocOrExpandArray(copyFrom
.pairCount())) {
94 PodCopy(pairs_
, copyFrom
.pairs_
, pairCount_
);
99 bool VectorMatchPairs::allocOrExpandArray(size_t pairCount
) {
100 if (!vec_
.resizeUninitialized(pairCount
)) {
105 pairCount_
= pairCount
;
112 RegExpShared
* RegExpObject::getShared(JSContext
* cx
,
113 Handle
<RegExpObject
*> regexp
) {
114 if (regexp
->hasShared()) {
115 return regexp
->getShared();
118 return createShared(cx
, regexp
);
122 bool RegExpObject::isOriginalFlagGetter(JSNative native
, RegExpFlags
* mask
) {
123 if (native
== regexp_hasIndices
) {
124 *mask
= RegExpFlag::HasIndices
;
127 if (native
== regexp_global
) {
128 *mask
= RegExpFlag::Global
;
131 if (native
== regexp_ignoreCase
) {
132 *mask
= RegExpFlag::IgnoreCase
;
135 if (native
== regexp_multiline
) {
136 *mask
= RegExpFlag::Multiline
;
139 if (native
== regexp_dotAll
) {
140 *mask
= RegExpFlag::DotAll
;
143 if (native
== regexp_sticky
) {
144 *mask
= RegExpFlag::Sticky
;
147 if (native
== regexp_unicode
) {
148 *mask
= RegExpFlag::Unicode
;
151 if (native
== regexp_unicodeSets
) {
152 *mask
= RegExpFlag::UnicodeSets
;
159 static bool FinishRegExpClassInit(JSContext
* cx
, JS::HandleObject ctor
,
160 JS::HandleObject proto
) {
162 // Assert RegExp.prototype.exec is usually stored in a dynamic slot. The
163 // optimization in InlinableNativeIRGenerator::tryAttachIntrinsicRegExpExec
165 Handle
<NativeObject
*> nproto
= proto
.as
<NativeObject
>();
166 auto prop
= nproto
->lookupPure(cx
->names().exec
);
167 MOZ_ASSERT(prop
->isDataProperty());
168 MOZ_ASSERT(!nproto
->isFixedSlot(prop
->slot()));
173 static const ClassSpec RegExpObjectClassSpec
= {
174 GenericCreateConstructor
<js::regexp_construct
, 2, gc::AllocKind::FUNCTION
>,
175 GenericCreatePrototype
<RegExpObject
>,
177 js::regexp_static_props
,
179 js::regexp_properties
,
180 FinishRegExpClassInit
};
182 const JSClass
RegExpObject::class_
= {
184 JSCLASS_HAS_RESERVED_SLOTS(RegExpObject::RESERVED_SLOTS
) |
185 JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp
),
186 JS_NULL_CLASS_OPS
, &RegExpObjectClassSpec
};
188 const JSClass
RegExpObject::protoClass_
= {
189 "RegExp.prototype", JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp
),
190 JS_NULL_CLASS_OPS
, &RegExpObjectClassSpec
};
192 template <typename CharT
>
193 RegExpObject
* RegExpObject::create(JSContext
* cx
, const CharT
* chars
,
194 size_t length
, RegExpFlags flags
,
195 NewObjectKind newKind
) {
196 static_assert(std::is_same_v
<CharT
, char16_t
>,
197 "this code may need updating if/when CharT encodes UTF-8");
199 Rooted
<JSAtom
*> source(cx
, AtomizeChars(cx
, chars
, length
));
204 return create(cx
, source
, flags
, newKind
);
207 template RegExpObject
* RegExpObject::create(JSContext
* cx
,
208 const char16_t
* chars
,
209 size_t length
, RegExpFlags flags
,
210 NewObjectKind newKind
);
212 RegExpObject
* RegExpObject::createSyntaxChecked(JSContext
* cx
,
213 Handle
<JSAtom
*> source
,
215 NewObjectKind newKind
) {
216 Rooted
<RegExpObject
*> regexp(cx
, RegExpAlloc(cx
, newKind
));
221 regexp
->initAndZeroLastIndex(source
, flags
, cx
);
226 RegExpObject
* RegExpObject::create(JSContext
* cx
, Handle
<JSAtom
*> source
,
227 RegExpFlags flags
, NewObjectKind newKind
) {
228 Rooted
<RegExpObject
*> regexp(cx
);
230 AutoReportFrontendContext
fc(cx
);
231 CompileOptions
dummyOptions(cx
);
232 frontend::DummyTokenStream
dummyTokenStream(&fc
, dummyOptions
);
234 LifoAllocScope
allocScope(&cx
->tempLifoAlloc());
235 if (!irregexp::CheckPatternSyntax(cx
, cx
->stackLimitForCurrentPrincipal(),
236 dummyTokenStream
, source
, flags
)) {
240 regexp
= RegExpAlloc(cx
, newKind
);
245 regexp
->initAndZeroLastIndex(source
, flags
, cx
);
247 MOZ_ASSERT(!regexp
->hasShared());
253 RegExpShared
* RegExpObject::createShared(JSContext
* cx
,
254 Handle
<RegExpObject
*> regexp
) {
255 MOZ_ASSERT(!regexp
->hasShared());
256 Rooted
<JSAtom
*> source(cx
, regexp
->getSource());
257 RegExpShared
* shared
=
258 cx
->zone()->regExps().get(cx
, source
, regexp
->getFlags());
263 regexp
->setShared(shared
);
265 MOZ_ASSERT(regexp
->hasShared());
270 SharedShape
* RegExpObject::assignInitialShape(JSContext
* cx
,
271 Handle
<RegExpObject
*> self
) {
272 MOZ_ASSERT(self
->empty());
274 static_assert(LAST_INDEX_SLOT
== 0);
276 /* The lastIndex property alone is writable but non-configurable. */
277 if (!NativeObject::addPropertyInReservedSlot(cx
, self
, cx
->names().lastIndex
,
279 {PropertyFlag::Writable
})) {
283 return self
->sharedShape();
286 void RegExpObject::initIgnoringLastIndex(JSAtom
* source
, RegExpFlags flags
) {
287 // If this is a re-initialization with an existing RegExpShared, 'flags'
288 // may not match getShared()->flags, so forget the RegExpShared.
295 void RegExpObject::initAndZeroLastIndex(JSAtom
* source
, RegExpFlags flags
,
297 initIgnoringLastIndex(source
, flags
);
301 template <typename KnownF
, typename UnknownF
>
302 void ForEachRegExpFlag(JS::RegExpFlags flags
, KnownF known
, UnknownF unknown
) {
303 uint8_t raw
= flags
.value();
305 for (uint8_t i
= 1; i
; i
= i
<< 1) {
310 case RegExpFlag::HasIndices
:
311 known("HasIndices", "d");
313 case RegExpFlag::Global
:
314 known("Global", "g");
316 case RegExpFlag::IgnoreCase
:
317 known("IgnoreCase", "i");
319 case RegExpFlag::Multiline
:
320 known("Multiline", "m");
322 case RegExpFlag::DotAll
:
323 known("DotAll", "s");
325 case RegExpFlag::Unicode
:
326 known("Unicode", "u");
328 case RegExpFlag::Sticky
:
329 known("Sticky", "y");
338 std::ostream
& JS::operator<<(std::ostream
& os
, RegExpFlags flags
) {
340 flags
, [&](const char* name
, const char* c
) { os
<< c
; },
341 [&](uint8_t value
) { os
<< '?'; });
345 #if defined(DEBUG) || defined(JS_JITSPEW)
346 void RegExpObject::dumpOwnFields(js::JSONPrinter
& json
) const {
348 js::GenericPrinter
& out
= json
.beginStringProperty("source");
349 getSource()->dumpPropertyName(out
);
350 json
.endStringProperty();
353 json
.beginInlineListProperty("flags");
356 [&](const char* name
, const char* c
) { json
.value("%s", name
); },
357 [&](uint8_t value
) { json
.value("Unknown(%02x)", value
); });
358 json
.endInlineList();
361 js::GenericPrinter
& out
= json
.beginStringProperty("lastIndex");
362 getLastIndex().dumpStringContent(out
);
363 json
.endStringProperty();
367 void RegExpObject::dumpOwnStringContent(js::GenericPrinter
& out
) const {
370 getSource()->dumpCharsNoQuote(out
);
375 getFlags(), [&](const char* name
, const char* c
) { out
.put(c
); },
376 [&](uint8_t value
) {});
378 #endif /* defined(DEBUG) || defined(JS_JITSPEW) */
380 static MOZ_ALWAYS_INLINE
bool IsRegExpLineTerminator(const JS::Latin1Char c
) {
381 return c
== '\n' || c
== '\r';
384 static MOZ_ALWAYS_INLINE
bool IsRegExpLineTerminator(const char16_t c
) {
385 return c
== '\n' || c
== '\r' || c
== 0x2028 || c
== 0x2029;
388 static MOZ_ALWAYS_INLINE
bool AppendEscapedLineTerminator(
389 StringBuffer
& sb
, const JS::Latin1Char c
) {
392 if (!sb
.append('n')) {
397 if (!sb
.append('r')) {
402 MOZ_CRASH("Bad LineTerminator");
407 static MOZ_ALWAYS_INLINE
bool AppendEscapedLineTerminator(StringBuffer
& sb
,
411 if (!sb
.append('n')) {
416 if (!sb
.append('r')) {
421 if (!sb
.append("u2028")) {
426 if (!sb
.append("u2029")) {
431 MOZ_CRASH("Bad LineTerminator");
436 template <typename CharT
>
437 static MOZ_ALWAYS_INLINE
bool SetupBuffer(StringBuffer
& sb
,
438 const CharT
* oldChars
, size_t oldLen
,
440 if constexpr (std::is_same_v
<CharT
, char16_t
>) {
441 if (!sb
.ensureTwoByteChars()) {
446 if (!sb
.reserve(oldLen
+ 1)) {
450 sb
.infallibleAppend(oldChars
, size_t(it
- oldChars
));
454 // Note: leaves the string buffer empty if no escaping need be performed.
455 template <typename CharT
>
456 static bool EscapeRegExpPattern(StringBuffer
& sb
, const CharT
* oldChars
,
458 bool inBrackets
= false;
459 bool previousCharacterWasBackslash
= false;
461 for (const CharT
* it
= oldChars
; it
< oldChars
+ oldLen
; ++it
) {
463 if (!previousCharacterWasBackslash
) {
468 } else if (ch
== '/') {
469 // There's a forward slash that needs escaping.
471 // This is the first char we've seen that needs escaping,
472 // copy everything up to this point.
473 if (!SetupBuffer(sb
, oldChars
, oldLen
, it
)) {
477 if (!sb
.append('\\')) {
480 } else if (ch
== '[') {
485 if (IsRegExpLineTerminator(ch
)) {
486 // There's LineTerminator that needs escaping.
488 // This is the first char we've seen that needs escaping,
489 // copy everything up to this point.
490 if (!SetupBuffer(sb
, oldChars
, oldLen
, it
)) {
494 if (!previousCharacterWasBackslash
) {
495 if (!sb
.append('\\')) {
499 if (!AppendEscapedLineTerminator(sb
, ch
)) {
502 } else if (!sb
.empty()) {
503 if (!sb
.append(ch
)) {
508 if (previousCharacterWasBackslash
) {
509 previousCharacterWasBackslash
= false;
510 } else if (ch
== '\\') {
511 previousCharacterWasBackslash
= true;
518 // ES6 draft rev32 21.2.3.2.4.
519 JSLinearString
* js::EscapeRegExpPattern(JSContext
* cx
, Handle
<JSAtom
*> src
) {
521 if (src
->length() == 0) {
522 return cx
->names().emptyRegExp_
;
525 // We may never need to use |sb|. Start using it lazily.
526 JSStringBuilder
sb(cx
);
527 bool escapeFailed
= false;
528 if (src
->hasLatin1Chars()) {
529 JS::AutoCheckCannotGC nogc
;
531 !::EscapeRegExpPattern(sb
, src
->latin1Chars(nogc
), src
->length());
533 JS::AutoCheckCannotGC nogc
;
535 !::EscapeRegExpPattern(sb
, src
->twoByteChars(nogc
), src
->length());
545 return sb
.finishString();
548 // ES6 draft rev32 21.2.5.14. Optimized for RegExpObject.
549 JSLinearString
* RegExpObject::toString(JSContext
* cx
,
550 Handle
<RegExpObject
*> obj
) {
552 Rooted
<JSAtom
*> src(cx
, obj
->getSource());
556 Rooted
<JSLinearString
*> escapedSrc(cx
, EscapeRegExpPattern(cx
, src
));
559 JSStringBuilder
sb(cx
);
560 size_t len
= escapedSrc
->length();
561 if (!sb
.reserve(len
+ 2)) {
564 sb
.infallibleAppend('/');
565 if (!sb
.append(escapedSrc
)) {
568 sb
.infallibleAppend('/');
571 if (obj
->hasIndices() && !sb
.append('d')) {
574 if (obj
->global() && !sb
.append('g')) {
577 if (obj
->ignoreCase() && !sb
.append('i')) {
580 if (obj
->multiline() && !sb
.append('m')) {
583 if (obj
->dotAll() && !sb
.append('s')) {
586 if (obj
->unicode() && !sb
.append('u')) {
589 if (obj
->unicodeSets() && !sb
.append('v')) {
592 if (obj
->sticky() && !sb
.append('y')) {
596 return sb
.finishString();
599 template <typename CharT
>
600 static MOZ_ALWAYS_INLINE
bool IsRegExpMetaChar(CharT ch
) {
602 /* ES 2016 draft Mar 25, 2016 21.2.1 SyntaxCharacter. */
623 template <typename CharT
>
624 bool js::HasRegExpMetaChars(const CharT
* chars
, size_t length
) {
625 for (size_t i
= 0; i
< length
; ++i
) {
626 if (IsRegExpMetaChar
<CharT
>(chars
[i
])) {
633 template bool js::HasRegExpMetaChars
<Latin1Char
>(const Latin1Char
* chars
,
636 template bool js::HasRegExpMetaChars
<char16_t
>(const char16_t
* chars
,
639 bool js::StringHasRegExpMetaChars(JSLinearString
* str
) {
640 AutoCheckCannotGC nogc
;
641 if (str
->hasLatin1Chars()) {
642 return HasRegExpMetaChars(str
->latin1Chars(nogc
), str
->length());
645 return HasRegExpMetaChars(str
->twoByteChars(nogc
), str
->length());
650 RegExpShared::RegExpShared(JSAtom
* source
, RegExpFlags flags
)
651 : CellWithTenuredGCPointer(source
), pairCount_(0), flags(flags
) {}
653 void RegExpShared::traceChildren(JSTracer
* trc
) {
654 TraceNullableCellHeaderEdge(trc
, this, "RegExpShared source");
655 if (kind() == RegExpShared::Kind::Atom
) {
656 TraceNullableEdge(trc
, &patternAtom_
, "RegExpShared pattern atom");
658 for (auto& comp
: compilationArray
) {
659 TraceNullableEdge(trc
, &comp
.jitCode
, "RegExpShared code");
661 TraceNullableEdge(trc
, &groupsTemplate_
, "RegExpShared groups template");
665 void RegExpShared::discardJitCode() {
666 for (auto& comp
: compilationArray
) {
667 comp
.jitCode
= nullptr;
670 // We can also purge the tables used by JIT code.
671 tables
.clearAndFree();
674 void RegExpShared::finalize(JS::GCContext
* gcx
) {
675 for (auto& comp
: compilationArray
) {
677 size_t length
= comp
.byteCodeLength();
678 gcx
->free_(this, comp
.byteCode
, length
, MemoryUse::RegExpSharedBytecode
);
681 if (namedCaptureIndices_
) {
682 size_t length
= numNamedCaptures() * sizeof(uint32_t);
683 gcx
->free_(this, namedCaptureIndices_
, length
,
684 MemoryUse::RegExpSharedNamedCaptureData
);
686 tables
.~JitCodeTables();
690 bool RegExpShared::compileIfNecessary(JSContext
* cx
,
691 MutableHandleRegExpShared re
,
692 Handle
<JSLinearString
*> input
,
693 RegExpShared::CodeKind codeKind
) {
694 if (codeKind
== RegExpShared::CodeKind::Any
) {
695 // We start by interpreting regexps, then compile them once they are
696 // sufficiently hot. For very long input strings, we tier up eagerly.
697 codeKind
= RegExpShared::CodeKind::Bytecode
;
698 if (re
->markedForTierUp() || input
->length() > 1000) {
699 codeKind
= RegExpShared::CodeKind::Jitcode
;
703 // Fall back to bytecode if native codegen is not available.
704 if (!IsNativeRegExpEnabled() && codeKind
== RegExpShared::CodeKind::Jitcode
) {
705 codeKind
= RegExpShared::CodeKind::Bytecode
;
708 bool needsCompile
= false;
709 if (re
->kind() == RegExpShared::Kind::Unparsed
) {
712 if (re
->kind() == RegExpShared::Kind::RegExp
) {
713 if (!re
->isCompiled(input
->hasLatin1Chars(), codeKind
)) {
718 return irregexp::CompilePattern(cx
, re
, input
, codeKind
);
724 RegExpRunStatus
RegExpShared::execute(JSContext
* cx
,
725 MutableHandleRegExpShared re
,
726 Handle
<JSLinearString
*> input
,
727 size_t start
, VectorMatchPairs
* matches
) {
730 // TODO: Add tracelogger support
732 /* Compile the code at point-of-use. */
733 if (!compileIfNecessary(cx
, re
, input
, RegExpShared::CodeKind::Any
)) {
734 return RegExpRunStatus::Error
;
738 * Ensure sufficient memory for output vector.
739 * No need to initialize it. The RegExp engine fills them in on a match.
741 if (!matches
->allocOrExpandArray(re
->pairCount())) {
742 ReportOutOfMemory(cx
);
743 return RegExpRunStatus::Error
;
746 if (re
->kind() == RegExpShared::Kind::Atom
) {
747 return RegExpShared::executeAtom(re
, input
, start
, matches
);
751 * Ensure sufficient memory for output vector.
752 * No need to initialize it. The RegExp engine fills them in on a match.
754 if (!matches
->allocOrExpandArray(re
->pairCount())) {
755 ReportOutOfMemory(cx
);
756 return RegExpRunStatus::Error
;
759 uint32_t interruptRetries
= 0;
760 const uint32_t maxInterruptRetries
= 4;
762 DebugOnly
<bool> alreadyThrowing
= cx
->isExceptionPending();
763 RegExpRunStatus result
= irregexp::Execute(cx
, re
, input
, start
, matches
);
765 // Check if we must simulate the interruption
766 if (js::irregexp::IsolateShouldSimulateInterrupt(cx
->isolate
)) {
767 js::irregexp::IsolateClearShouldSimulateInterrupt(cx
->isolate
);
768 cx
->requestInterrupt(InterruptReason::CallbackUrgent
);
771 if (result
== RegExpRunStatus::Error
) {
772 /* Execute can return RegExpRunStatus::Error:
774 * 1. If the native stack overflowed
775 * 2. If the backtrack stack overflowed
776 * 3. If an interrupt was requested during execution.
778 * In the first two cases, we want to throw an error. In the
779 * third case, we want to handle the interrupt and try again.
780 * We cap the number of times we will retry.
782 if (cx
->isExceptionPending()) {
783 // If this regexp is being executed by recovery instructions
784 // while bailing out to handle an exception, there may already
785 // be an exception pending. If so, just return that exception
786 // instead of reporting a new one.
787 MOZ_ASSERT(alreadyThrowing
);
788 return RegExpRunStatus::Error
;
790 if (cx
->hasAnyPendingInterrupt()) {
791 if (!CheckForInterrupt(cx
)) {
792 return RegExpRunStatus::Error
;
794 if (interruptRetries
++ < maxInterruptRetries
) {
795 // The initial execution may have been interpreted, or the
796 // interrupt may have triggered a GC that discarded jitcode.
797 // To maximize the chance of succeeding before being
798 // interrupted again, we want to ensure we are compiled.
799 if (!compileIfNecessary(cx
, re
, input
,
800 RegExpShared::CodeKind::Jitcode
)) {
801 return RegExpRunStatus::Error
;
806 // If we have run out of retries, this regexp takes too long to execute.
807 ReportOverRecursed(cx
);
808 return RegExpRunStatus::Error
;
811 MOZ_ASSERT(result
== RegExpRunStatus::Success
||
812 result
== RegExpRunStatus::Success_NotFound
);
817 MOZ_CRASH("Unreachable");
820 void RegExpShared::useAtomMatch(Handle
<JSAtom
*> pattern
) {
821 MOZ_ASSERT(kind() == RegExpShared::Kind::Unparsed
);
822 kind_
= RegExpShared::Kind::Atom
;
823 patternAtom_
= pattern
;
827 void RegExpShared::useRegExpMatch(size_t pairCount
) {
828 MOZ_ASSERT(kind() == RegExpShared::Kind::Unparsed
);
829 kind_
= RegExpShared::Kind::RegExp
;
830 pairCount_
= pairCount
;
831 ticks_
= jit::JitOptions
.regexpWarmUpThreshold
;
835 void RegExpShared::InitializeNamedCaptures(JSContext
* cx
, HandleRegExpShared re
,
836 uint32_t numNamedCaptures
,
837 Handle
<PlainObject
*> templateObject
,
838 uint32_t* captureIndices
) {
839 MOZ_ASSERT(!re
->groupsTemplate_
);
840 MOZ_ASSERT(!re
->namedCaptureIndices_
);
842 re
->numNamedCaptures_
= numNamedCaptures
;
843 re
->groupsTemplate_
= templateObject
;
844 re
->namedCaptureIndices_
= captureIndices
;
846 uint32_t arraySize
= numNamedCaptures
* sizeof(uint32_t);
847 js::AddCellMemory(re
, arraySize
, MemoryUse::RegExpSharedNamedCaptureData
);
850 void RegExpShared::tierUpTick() {
851 MOZ_ASSERT(kind() == RegExpShared::Kind::RegExp
);
857 bool RegExpShared::markedForTierUp() const {
858 if (!IsNativeRegExpEnabled()) {
861 if (kind() != RegExpShared::Kind::RegExp
) {
867 // When either unicode flag is set and if |index| points to a trail surrogate,
868 // step back to the corresponding lead surrogate.
869 static size_t StepBackToLeadSurrogate(const JSLinearString
* input
,
871 // |index| must be a position within a two-byte string, otherwise it can't
872 // point to the trail surrogate of a surrogate pair.
873 if (index
== 0 || index
>= input
->length() || input
->hasLatin1Chars()) {
878 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad
880 * Let listIndex be the index into Input of the character that was obtained
881 * from element index of str.
883 * In the spec, pattern match is performed with decoded Unicode code points,
884 * but our implementation performs it with UTF-16 encoded strings. In step 2,
885 * we should decrement lastIndex (index) if it points to a trail surrogate
886 * that has a corresponding lead surrogate.
888 * var r = /\uD83D\uDC38/ug;
890 * var str = "\uD83D\uDC38";
891 * var result = r.exec(str); // pattern match starts from index 0
892 * print(result.index); // prints 0
894 * Note: This doesn't match the current spec text and result in different
895 * values for `result.index` under certain conditions. However, the spec will
896 * change to match our implementation's behavior.
897 * See https://github.com/tc39/ecma262/issues/128.
899 JS::AutoCheckCannotGC nogc
;
900 const auto* chars
= input
->twoByteChars(nogc
);
901 if (unicode::IsTrailSurrogate(chars
[index
]) &&
902 unicode::IsLeadSurrogate(chars
[index
- 1])) {
908 static RegExpRunStatus
ExecuteAtomImpl(RegExpShared
* re
, JSLinearString
* input
,
909 size_t start
, MatchPairs
* matches
) {
910 MOZ_ASSERT(re
->pairCount() == 1);
911 size_t length
= input
->length();
912 size_t searchLength
= re
->patternAtom()->length();
914 if (re
->unicode() || re
->unicodeSets()) {
915 start
= StepBackToLeadSurrogate(input
, start
);
919 // First part checks size_t overflow.
920 if (searchLength
+ start
< searchLength
|| searchLength
+ start
> length
) {
921 return RegExpRunStatus::Success_NotFound
;
923 if (!HasSubstringAt(input
, re
->patternAtom(), start
)) {
924 return RegExpRunStatus::Success_NotFound
;
927 (*matches
)[0].start
= start
;
928 (*matches
)[0].limit
= start
+ searchLength
;
929 matches
->checkAgainst(input
->length());
930 return RegExpRunStatus::Success
;
933 int res
= StringFindPattern(input
, re
->patternAtom(), start
);
935 return RegExpRunStatus::Success_NotFound
;
938 (*matches
)[0].start
= res
;
939 (*matches
)[0].limit
= res
+ searchLength
;
940 matches
->checkAgainst(input
->length());
941 return RegExpRunStatus::Success
;
944 RegExpRunStatus
js::ExecuteRegExpAtomRaw(RegExpShared
* re
,
945 JSLinearString
* input
, size_t start
,
946 MatchPairs
* matchPairs
) {
947 AutoUnsafeCallWithABI unsafe
;
948 return ExecuteAtomImpl(re
, input
, start
, matchPairs
);
952 RegExpRunStatus
RegExpShared::executeAtom(MutableHandleRegExpShared re
,
953 Handle
<JSLinearString
*> input
,
955 VectorMatchPairs
* matches
) {
956 return ExecuteAtomImpl(re
, input
, start
, matches
);
959 size_t RegExpShared::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf
) {
962 for (const auto& compilation
: compilationArray
) {
963 if (compilation
.byteCode
) {
964 n
+= mallocSizeOf(compilation
.byteCode
);
968 n
+= tables
.sizeOfExcludingThis(mallocSizeOf
);
969 for (size_t i
= 0; i
< tables
.length(); i
++) {
970 n
+= mallocSizeOf(tables
[i
].get());
978 RegExpRealm::RegExpRealm()
979 : optimizableRegExpPrototypeShape_(nullptr),
980 optimizableRegExpInstanceShape_(nullptr) {
981 for (auto& shape
: matchResultShapes_
) {
986 SharedShape
* RegExpRealm::createMatchResultShape(JSContext
* cx
,
987 ResultShapeKind kind
) {
988 MOZ_ASSERT(!matchResultShapes_
[kind
]);
990 /* Create template array object */
991 Rooted
<ArrayObject
*> templateObject(cx
, NewDenseEmptyArray(cx
));
992 if (!templateObject
) {
996 if (kind
== ResultShapeKind::Indices
) {
997 /* The |indices| array only has a |groups| property. */
998 if (!NativeDefineDataProperty(cx
, templateObject
, cx
->names().groups
,
999 UndefinedHandleValue
, JSPROP_ENUMERATE
)) {
1002 MOZ_ASSERT(templateObject
->getLastProperty().slot() == IndicesGroupsSlot
);
1004 matchResultShapes_
[kind
].set(templateObject
->sharedShape());
1005 return matchResultShapes_
[kind
];
1008 /* Set dummy index property */
1009 if (!NativeDefineDataProperty(cx
, templateObject
, cx
->names().index
,
1010 UndefinedHandleValue
, JSPROP_ENUMERATE
)) {
1013 MOZ_ASSERT(templateObject
->getLastProperty().slot() ==
1014 MatchResultObjectIndexSlot
);
1016 /* Set dummy input property */
1017 if (!NativeDefineDataProperty(cx
, templateObject
, cx
->names().input
,
1018 UndefinedHandleValue
, JSPROP_ENUMERATE
)) {
1021 MOZ_ASSERT(templateObject
->getLastProperty().slot() ==
1022 MatchResultObjectInputSlot
);
1024 /* Set dummy groups property */
1025 if (!NativeDefineDataProperty(cx
, templateObject
, cx
->names().groups
,
1026 UndefinedHandleValue
, JSPROP_ENUMERATE
)) {
1029 MOZ_ASSERT(templateObject
->getLastProperty().slot() ==
1030 MatchResultObjectGroupsSlot
);
1032 if (kind
== ResultShapeKind::WithIndices
) {
1033 /* Set dummy indices property */
1034 if (!NativeDefineDataProperty(cx
, templateObject
, cx
->names().indices
,
1035 UndefinedHandleValue
, JSPROP_ENUMERATE
)) {
1038 MOZ_ASSERT(templateObject
->getLastProperty().slot() ==
1039 MatchResultObjectIndicesSlot
);
1043 if (kind
== ResultShapeKind::Normal
) {
1044 MOZ_ASSERT(templateObject
->numFixedSlots() == 0);
1045 MOZ_ASSERT(templateObject
->numDynamicSlots() ==
1046 MatchResultObjectNumDynamicSlots
);
1047 MOZ_ASSERT(templateObject
->slotSpan() == MatchResultObjectSlotSpan
);
1051 matchResultShapes_
[kind
].set(templateObject
->sharedShape());
1053 return matchResultShapes_
[kind
];
1056 void RegExpRealm::trace(JSTracer
* trc
) {
1057 if (regExpStatics
) {
1058 regExpStatics
->trace(trc
);
1061 for (auto& shape
: matchResultShapes_
) {
1062 TraceNullableEdge(trc
, &shape
, "RegExpRealm::matchResultShapes_");
1065 TraceNullableEdge(trc
, &optimizableRegExpPrototypeShape_
,
1066 "RegExpRealm::optimizableRegExpPrototypeShape_");
1068 TraceNullableEdge(trc
, &optimizableRegExpInstanceShape_
,
1069 "RegExpRealm::optimizableRegExpInstanceShape_");
1072 RegExpShared
* RegExpZone::get(JSContext
* cx
, Handle
<JSAtom
*> source
,
1073 RegExpFlags flags
) {
1074 DependentAddPtr
<Set
> p(cx
, set_
, Key(source
, flags
));
1079 auto* shared
= cx
->newCell
<RegExpShared
>(source
, flags
);
1084 if (!p
.add(cx
, set_
, Key(source
, flags
), shared
)) {
1091 size_t RegExpZone::sizeOfIncludingThis(
1092 mozilla::MallocSizeOf mallocSizeOf
) const {
1093 return mallocSizeOf(this) + set_
.sizeOfExcludingThis(mallocSizeOf
);
1096 RegExpZone::RegExpZone(Zone
* zone
) : set_(zone
, zone
) {}
1100 JSObject
* js::CloneRegExpObject(JSContext
* cx
, Handle
<RegExpObject
*> regex
) {
1101 constexpr gc::AllocKind allocKind
= RegExpObject::AllocKind
;
1102 static_assert(gc::GetGCKindSlots(allocKind
) == RegExpObject::RESERVED_SLOTS
);
1103 MOZ_ASSERT(regex
->asTenured().getAllocKind() == allocKind
);
1105 Rooted
<SharedShape
*> shape(cx
, regex
->sharedShape());
1106 Rooted
<RegExpObject
*> clone(cx
, NativeObject::create
<RegExpObject
>(
1107 cx
, allocKind
, gc::Heap::Default
, shape
));
1112 RegExpShared
* shared
= RegExpObject::getShared(cx
, regex
);
1117 clone
->initAndZeroLastIndex(shared
->getSource(), shared
->getFlags(), cx
);
1118 clone
->setShared(shared
);
1123 template <typename CharT
>
1124 static bool ParseRegExpFlags(const CharT
* chars
, size_t length
,
1125 RegExpFlags
* flagsOut
, char16_t
* invalidFlag
) {
1126 *flagsOut
= RegExpFlag::NoFlags
;
1128 for (size_t i
= 0; i
< length
; i
++) {
1130 if (!JS::MaybeParseRegExpFlag(chars
[i
], &flag
) || *flagsOut
& flag
) {
1131 *invalidFlag
= chars
[i
];
1135 // /u and /v flags are mutually exclusive.
1136 if (((*flagsOut
& RegExpFlag::Unicode
) &&
1137 (flag
& RegExpFlag::UnicodeSets
)) ||
1138 ((*flagsOut
& RegExpFlag::UnicodeSets
) &&
1139 (flag
& RegExpFlag::Unicode
))) {
1140 *invalidFlag
= chars
[i
];
1150 bool js::ParseRegExpFlags(JSContext
* cx
, JSString
* flagStr
,
1151 RegExpFlags
* flagsOut
) {
1152 JSLinearString
* linear
= flagStr
->ensureLinear(cx
);
1157 size_t len
= linear
->length();
1160 char16_t invalidFlag
;
1161 if (linear
->hasLatin1Chars()) {
1162 AutoCheckCannotGC nogc
;
1163 ok
= ::ParseRegExpFlags(linear
->latin1Chars(nogc
), len
, flagsOut
,
1166 AutoCheckCannotGC nogc
;
1167 ok
= ::ParseRegExpFlags(linear
->twoByteChars(nogc
), len
, flagsOut
,
1172 JS::TwoByteChars
range(&invalidFlag
, 1);
1173 UniqueChars
utf8(JS::CharsToNewUTF8CharsZ(cx
, range
).c_str());
1177 JS_ReportErrorNumberUTF8(cx
, GetErrorMessage
, nullptr,
1178 JSMSG_BAD_REGEXP_FLAG
, utf8
.get());
1185 JS::ubi::Node::Size
JS::ubi::Concrete
<RegExpShared
>::size(
1186 mozilla::MallocSizeOf mallocSizeOf
) const {
1187 return js::gc::Arena::thingSize(gc::AllocKind::REGEXP_SHARED
) +
1188 get().sizeOfExcludingThis(mallocSizeOf
);
1192 * Regular Expressions.
1194 JS_PUBLIC_API JSObject
* JS::NewRegExpObject(JSContext
* cx
, const char* bytes
,
1195 size_t length
, RegExpFlags flags
) {
1199 UniqueTwoByteChars
chars(InflateString(cx
, bytes
, length
));
1204 return RegExpObject::create(cx
, chars
.get(), length
, flags
, GenericObject
);
1207 JS_PUBLIC_API JSObject
* JS::NewUCRegExpObject(JSContext
* cx
,
1208 const char16_t
* chars
,
1210 RegExpFlags flags
) {
1214 return RegExpObject::create(cx
, chars
, length
, flags
, GenericObject
);
1217 JS_PUBLIC_API
bool JS::SetRegExpInput(JSContext
* cx
, HandleObject obj
,
1218 HandleString input
) {
1223 Handle
<GlobalObject
*> global
= obj
.as
<GlobalObject
>();
1224 RegExpStatics
* res
= GlobalObject::getRegExpStatics(cx
, global
);
1233 JS_PUBLIC_API
bool JS::ClearRegExpStatics(JSContext
* cx
, HandleObject obj
) {
1238 Handle
<GlobalObject
*> global
= obj
.as
<GlobalObject
>();
1239 RegExpStatics
* res
= GlobalObject::getRegExpStatics(cx
, global
);
1248 JS_PUBLIC_API
bool JS::ExecuteRegExp(JSContext
* cx
, HandleObject obj
,
1249 HandleObject reobj
, const char16_t
* chars
,
1250 size_t length
, size_t* indexp
, bool test
,
1251 MutableHandleValue rval
) {
1255 Handle
<GlobalObject
*> global
= obj
.as
<GlobalObject
>();
1256 RegExpStatics
* res
= GlobalObject::getRegExpStatics(cx
, global
);
1261 Rooted
<JSLinearString
*> input(cx
, NewStringCopyN
<CanGC
>(cx
, chars
, length
));
1266 return ExecuteRegExpLegacy(cx
, res
, reobj
.as
<RegExpObject
>(), input
, indexp
,
1270 JS_PUBLIC_API
bool JS::ExecuteRegExpNoStatics(JSContext
* cx
, HandleObject obj
,
1271 const char16_t
* chars
,
1272 size_t length
, size_t* indexp
,
1274 MutableHandleValue rval
) {
1278 Rooted
<JSLinearString
*> input(cx
, NewStringCopyN
<CanGC
>(cx
, chars
, length
));
1283 return ExecuteRegExpLegacy(cx
, nullptr, obj
.as
<RegExpObject
>(), input
, indexp
,
1287 JS_PUBLIC_API
bool JS::ObjectIsRegExp(JSContext
* cx
, HandleObject obj
,
1292 if (!GetBuiltinClass(cx
, obj
, &cls
)) {
1296 *isRegExp
= cls
== ESClass::RegExp
;
1300 JS_PUBLIC_API RegExpFlags
JS::GetRegExpFlags(JSContext
* cx
, HandleObject obj
) {
1304 RegExpShared
* shared
= RegExpToShared(cx
, obj
);
1306 return RegExpFlag::NoFlags
;
1308 return shared
->getFlags();
1311 JS_PUBLIC_API JSString
* JS::GetRegExpSource(JSContext
* cx
, HandleObject obj
) {
1315 RegExpShared
* shared
= RegExpToShared(cx
, obj
);
1319 return shared
->getSource();
1322 JS_PUBLIC_API
bool JS::CheckRegExpSyntax(JSContext
* cx
, const char16_t
* chars
,
1323 size_t length
, RegExpFlags flags
,
1324 MutableHandleValue error
) {
1328 AutoReportFrontendContext
fc(cx
);
1329 CompileOptions
dummyOptions(cx
);
1330 frontend::DummyTokenStream
dummyTokenStream(&fc
, dummyOptions
);
1332 LifoAllocScope
allocScope(&cx
->tempLifoAlloc());
1334 mozilla::Range
<const char16_t
> source(chars
, length
);
1335 bool success
= irregexp::CheckPatternSyntax(
1336 cx
->tempLifoAlloc(), cx
->stackLimitForCurrentPrincipal(),
1337 dummyTokenStream
, source
, flags
);
1338 error
.set(UndefinedValue());
1340 if (!fc
.convertToRuntimeErrorAndClear()) {
1343 // We can fail because of OOM or over-recursion even if the syntax is valid.
1344 if (cx
->isThrowingOutOfMemory() || cx
->isThrowingOverRecursed()) {
1348 if (!cx
->getPendingException(error
)) {
1351 cx
->clearPendingException();