Bug 1885337 - Part 1: Implement to/from hex methods. r=dminor
[gecko.git] / js / src / vm / RegExpObject.cpp
blob29806c21d4b10aef47a2d5868a9d5773c02a0f5e
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "vm/RegExpObject.h"
9 #include "mozilla/MemoryReporting.h"
10 #include "mozilla/PodOperations.h"
12 #include <type_traits>
14 #include "builtin/RegExp.h"
15 #include "builtin/SelfHostingDefines.h" // REGEXP_*_FLAG
16 #include "frontend/FrontendContext.h" // AutoReportFrontendContext
17 #include "frontend/TokenStream.h"
18 #include "gc/HashUtil.h"
19 #include "irregexp/RegExpAPI.h"
20 #include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_*
21 #include "js/friend/StackLimits.h" // js::ReportOverRecursed
22 #include "js/Object.h" // JS::GetBuiltinClass
23 #include "js/Printer.h" // js::GenericPrinter
24 #include "js/RegExp.h"
25 #include "js/RegExpFlags.h" // JS::RegExpFlags
26 #include "util/StringBuffer.h"
27 #include "util/Unicode.h"
28 #include "vm/JSONPrinter.h" // js::JSONPrinter
29 #include "vm/MatchPairs.h"
30 #include "vm/PlainObject.h"
31 #include "vm/RegExpStatics.h"
32 #include "vm/StringType.h"
34 #include "vm/JSContext-inl.h"
35 #include "vm/JSObject-inl.h"
36 #include "vm/NativeObject-inl.h"
37 #include "vm/Shape-inl.h"
39 using namespace js;
41 using JS::AutoStableStringChars;
42 using JS::CompileOptions;
43 using JS::RegExpFlag;
44 using JS::RegExpFlags;
45 using mozilla::DebugOnly;
46 using mozilla::PodCopy;
48 using JS::AutoCheckCannotGC;
50 static_assert(RegExpFlag::HasIndices == REGEXP_HASINDICES_FLAG,
51 "self-hosted JS and /d flag bits must agree");
52 static_assert(RegExpFlag::Global == REGEXP_GLOBAL_FLAG,
53 "self-hosted JS and /g flag bits must agree");
54 static_assert(RegExpFlag::IgnoreCase == REGEXP_IGNORECASE_FLAG,
55 "self-hosted JS and /i flag bits must agree");
56 static_assert(RegExpFlag::Multiline == REGEXP_MULTILINE_FLAG,
57 "self-hosted JS and /m flag bits must agree");
58 static_assert(RegExpFlag::DotAll == REGEXP_DOTALL_FLAG,
59 "self-hosted JS and /s flag bits must agree");
60 static_assert(RegExpFlag::Unicode == REGEXP_UNICODE_FLAG,
61 "self-hosted JS and /u flag bits must agree");
62 static_assert(RegExpFlag::UnicodeSets == REGEXP_UNICODESETS_FLAG,
63 "self-hosted JS and /v flag bits must agree");
64 static_assert(RegExpFlag::Sticky == REGEXP_STICKY_FLAG,
65 "self-hosted JS and /y flag bits must agree");
67 RegExpObject* js::RegExpAlloc(JSContext* cx, NewObjectKind newKind,
68 HandleObject proto /* = nullptr */) {
69 Rooted<RegExpObject*> regexp(
70 cx, NewObjectWithClassProtoAndKind<RegExpObject>(cx, proto, newKind));
71 if (!regexp) {
72 return nullptr;
75 if (!SharedShape::ensureInitialCustomShape<RegExpObject>(cx, regexp)) {
76 return nullptr;
79 MOZ_ASSERT(regexp->lookupPure(cx->names().lastIndex)->slot() ==
80 RegExpObject::lastIndexSlot());
82 return regexp;
85 /* MatchPairs */
87 bool VectorMatchPairs::initArrayFrom(VectorMatchPairs& copyFrom) {
88 MOZ_ASSERT(copyFrom.pairCount() > 0);
90 if (!allocOrExpandArray(copyFrom.pairCount())) {
91 return false;
94 PodCopy(pairs_, copyFrom.pairs_, pairCount_);
96 return true;
99 bool VectorMatchPairs::allocOrExpandArray(size_t pairCount) {
100 if (!vec_.resizeUninitialized(pairCount)) {
101 return false;
104 pairs_ = &vec_[0];
105 pairCount_ = pairCount;
106 return true;
109 /* RegExpObject */
111 /* static */
112 RegExpShared* RegExpObject::getShared(JSContext* cx,
113 Handle<RegExpObject*> regexp) {
114 if (regexp->hasShared()) {
115 return regexp->getShared();
118 return createShared(cx, regexp);
121 /* static */
122 bool RegExpObject::isOriginalFlagGetter(JSNative native, RegExpFlags* mask) {
123 if (native == regexp_hasIndices) {
124 *mask = RegExpFlag::HasIndices;
125 return true;
127 if (native == regexp_global) {
128 *mask = RegExpFlag::Global;
129 return true;
131 if (native == regexp_ignoreCase) {
132 *mask = RegExpFlag::IgnoreCase;
133 return true;
135 if (native == regexp_multiline) {
136 *mask = RegExpFlag::Multiline;
137 return true;
139 if (native == regexp_dotAll) {
140 *mask = RegExpFlag::DotAll;
141 return true;
143 if (native == regexp_sticky) {
144 *mask = RegExpFlag::Sticky;
145 return true;
147 if (native == regexp_unicode) {
148 *mask = RegExpFlag::Unicode;
149 return true;
151 if (native == regexp_unicodeSets) {
152 *mask = RegExpFlag::UnicodeSets;
153 return true;
156 return false;
159 static bool FinishRegExpClassInit(JSContext* cx, JS::HandleObject ctor,
160 JS::HandleObject proto) {
161 #ifdef DEBUG
162 // Assert RegExp.prototype.exec is usually stored in a dynamic slot. The
163 // optimization in InlinableNativeIRGenerator::tryAttachIntrinsicRegExpExec
164 // depends on this.
165 Handle<NativeObject*> nproto = proto.as<NativeObject>();
166 auto prop = nproto->lookupPure(cx->names().exec);
167 MOZ_ASSERT(prop->isDataProperty());
168 MOZ_ASSERT(!nproto->isFixedSlot(prop->slot()));
169 #endif
170 return true;
173 static const ClassSpec RegExpObjectClassSpec = {
174 GenericCreateConstructor<js::regexp_construct, 2, gc::AllocKind::FUNCTION>,
175 GenericCreatePrototype<RegExpObject>,
176 nullptr,
177 js::regexp_static_props,
178 js::regexp_methods,
179 js::regexp_properties,
180 FinishRegExpClassInit};
182 const JSClass RegExpObject::class_ = {
183 "RegExp",
184 JSCLASS_HAS_RESERVED_SLOTS(RegExpObject::RESERVED_SLOTS) |
185 JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp),
186 JS_NULL_CLASS_OPS, &RegExpObjectClassSpec};
188 const JSClass RegExpObject::protoClass_ = {
189 "RegExp.prototype", JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp),
190 JS_NULL_CLASS_OPS, &RegExpObjectClassSpec};
192 template <typename CharT>
193 RegExpObject* RegExpObject::create(JSContext* cx, const CharT* chars,
194 size_t length, RegExpFlags flags,
195 NewObjectKind newKind) {
196 static_assert(std::is_same_v<CharT, char16_t>,
197 "this code may need updating if/when CharT encodes UTF-8");
199 Rooted<JSAtom*> source(cx, AtomizeChars(cx, chars, length));
200 if (!source) {
201 return nullptr;
204 return create(cx, source, flags, newKind);
207 template RegExpObject* RegExpObject::create(JSContext* cx,
208 const char16_t* chars,
209 size_t length, RegExpFlags flags,
210 NewObjectKind newKind);
212 RegExpObject* RegExpObject::createSyntaxChecked(JSContext* cx,
213 Handle<JSAtom*> source,
214 RegExpFlags flags,
215 NewObjectKind newKind) {
216 Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, newKind));
217 if (!regexp) {
218 return nullptr;
221 regexp->initAndZeroLastIndex(source, flags, cx);
223 return regexp;
226 RegExpObject* RegExpObject::create(JSContext* cx, Handle<JSAtom*> source,
227 RegExpFlags flags, NewObjectKind newKind) {
228 Rooted<RegExpObject*> regexp(cx);
230 AutoReportFrontendContext fc(cx);
231 CompileOptions dummyOptions(cx);
232 frontend::DummyTokenStream dummyTokenStream(&fc, dummyOptions);
234 LifoAllocScope allocScope(&cx->tempLifoAlloc());
235 if (!irregexp::CheckPatternSyntax(cx, cx->stackLimitForCurrentPrincipal(),
236 dummyTokenStream, source, flags)) {
237 return nullptr;
240 regexp = RegExpAlloc(cx, newKind);
241 if (!regexp) {
242 return nullptr;
245 regexp->initAndZeroLastIndex(source, flags, cx);
247 MOZ_ASSERT(!regexp->hasShared());
249 return regexp;
252 /* static */
253 RegExpShared* RegExpObject::createShared(JSContext* cx,
254 Handle<RegExpObject*> regexp) {
255 MOZ_ASSERT(!regexp->hasShared());
256 Rooted<JSAtom*> source(cx, regexp->getSource());
257 RegExpShared* shared =
258 cx->zone()->regExps().get(cx, source, regexp->getFlags());
259 if (!shared) {
260 return nullptr;
263 regexp->setShared(shared);
265 MOZ_ASSERT(regexp->hasShared());
267 return shared;
270 SharedShape* RegExpObject::assignInitialShape(JSContext* cx,
271 Handle<RegExpObject*> self) {
272 MOZ_ASSERT(self->empty());
274 static_assert(LAST_INDEX_SLOT == 0);
276 /* The lastIndex property alone is writable but non-configurable. */
277 if (!NativeObject::addPropertyInReservedSlot(cx, self, cx->names().lastIndex,
278 LAST_INDEX_SLOT,
279 {PropertyFlag::Writable})) {
280 return nullptr;
283 return self->sharedShape();
286 void RegExpObject::initIgnoringLastIndex(JSAtom* source, RegExpFlags flags) {
287 // If this is a re-initialization with an existing RegExpShared, 'flags'
288 // may not match getShared()->flags, so forget the RegExpShared.
289 clearShared();
291 setSource(source);
292 setFlags(flags);
295 void RegExpObject::initAndZeroLastIndex(JSAtom* source, RegExpFlags flags,
296 JSContext* cx) {
297 initIgnoringLastIndex(source, flags);
298 zeroLastIndex(cx);
301 template <typename KnownF, typename UnknownF>
302 void ForEachRegExpFlag(JS::RegExpFlags flags, KnownF known, UnknownF unknown) {
303 uint8_t raw = flags.value();
305 for (uint8_t i = 1; i; i = i << 1) {
306 if (!(raw & i)) {
307 continue;
309 switch (raw & i) {
310 case RegExpFlag::HasIndices:
311 known("HasIndices", "d");
312 break;
313 case RegExpFlag::Global:
314 known("Global", "g");
315 break;
316 case RegExpFlag::IgnoreCase:
317 known("IgnoreCase", "i");
318 break;
319 case RegExpFlag::Multiline:
320 known("Multiline", "m");
321 break;
322 case RegExpFlag::DotAll:
323 known("DotAll", "s");
324 break;
325 case RegExpFlag::Unicode:
326 known("Unicode", "u");
327 break;
328 case RegExpFlag::Sticky:
329 known("Sticky", "y");
330 break;
331 default:
332 unknown(i);
333 break;
338 std::ostream& JS::operator<<(std::ostream& os, RegExpFlags flags) {
339 ForEachRegExpFlag(
340 flags, [&](const char* name, const char* c) { os << c; },
341 [&](uint8_t value) { os << '?'; });
342 return os;
345 #if defined(DEBUG) || defined(JS_JITSPEW)
346 void RegExpObject::dumpOwnFields(js::JSONPrinter& json) const {
348 js::GenericPrinter& out = json.beginStringProperty("source");
349 getSource()->dumpPropertyName(out);
350 json.endStringProperty();
353 json.beginInlineListProperty("flags");
354 ForEachRegExpFlag(
355 getFlags(),
356 [&](const char* name, const char* c) { json.value("%s", name); },
357 [&](uint8_t value) { json.value("Unknown(%02x)", value); });
358 json.endInlineList();
361 js::GenericPrinter& out = json.beginStringProperty("lastIndex");
362 getLastIndex().dumpStringContent(out);
363 json.endStringProperty();
367 void RegExpObject::dumpOwnStringContent(js::GenericPrinter& out) const {
368 out.put("/");
370 getSource()->dumpCharsNoQuote(out);
372 out.put("/");
374 ForEachRegExpFlag(
375 getFlags(), [&](const char* name, const char* c) { out.put(c); },
376 [&](uint8_t value) {});
378 #endif /* defined(DEBUG) || defined(JS_JITSPEW) */
380 static MOZ_ALWAYS_INLINE bool IsRegExpLineTerminator(const JS::Latin1Char c) {
381 return c == '\n' || c == '\r';
384 static MOZ_ALWAYS_INLINE bool IsRegExpLineTerminator(const char16_t c) {
385 return c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029;
388 static MOZ_ALWAYS_INLINE bool AppendEscapedLineTerminator(
389 StringBuffer& sb, const JS::Latin1Char c) {
390 switch (c) {
391 case '\n':
392 if (!sb.append('n')) {
393 return false;
395 break;
396 case '\r':
397 if (!sb.append('r')) {
398 return false;
400 break;
401 default:
402 MOZ_CRASH("Bad LineTerminator");
404 return true;
407 static MOZ_ALWAYS_INLINE bool AppendEscapedLineTerminator(StringBuffer& sb,
408 const char16_t c) {
409 switch (c) {
410 case '\n':
411 if (!sb.append('n')) {
412 return false;
414 break;
415 case '\r':
416 if (!sb.append('r')) {
417 return false;
419 break;
420 case 0x2028:
421 if (!sb.append("u2028")) {
422 return false;
424 break;
425 case 0x2029:
426 if (!sb.append("u2029")) {
427 return false;
429 break;
430 default:
431 MOZ_CRASH("Bad LineTerminator");
433 return true;
436 template <typename CharT>
437 static MOZ_ALWAYS_INLINE bool SetupBuffer(StringBuffer& sb,
438 const CharT* oldChars, size_t oldLen,
439 const CharT* it) {
440 if constexpr (std::is_same_v<CharT, char16_t>) {
441 if (!sb.ensureTwoByteChars()) {
442 return false;
446 if (!sb.reserve(oldLen + 1)) {
447 return false;
450 sb.infallibleAppend(oldChars, size_t(it - oldChars));
451 return true;
454 // Note: leaves the string buffer empty if no escaping need be performed.
455 template <typename CharT>
456 static bool EscapeRegExpPattern(StringBuffer& sb, const CharT* oldChars,
457 size_t oldLen) {
458 bool inBrackets = false;
459 bool previousCharacterWasBackslash = false;
461 for (const CharT* it = oldChars; it < oldChars + oldLen; ++it) {
462 CharT ch = *it;
463 if (!previousCharacterWasBackslash) {
464 if (inBrackets) {
465 if (ch == ']') {
466 inBrackets = false;
468 } else if (ch == '/') {
469 // There's a forward slash that needs escaping.
470 if (sb.empty()) {
471 // This is the first char we've seen that needs escaping,
472 // copy everything up to this point.
473 if (!SetupBuffer(sb, oldChars, oldLen, it)) {
474 return false;
477 if (!sb.append('\\')) {
478 return false;
480 } else if (ch == '[') {
481 inBrackets = true;
485 if (IsRegExpLineTerminator(ch)) {
486 // There's LineTerminator that needs escaping.
487 if (sb.empty()) {
488 // This is the first char we've seen that needs escaping,
489 // copy everything up to this point.
490 if (!SetupBuffer(sb, oldChars, oldLen, it)) {
491 return false;
494 if (!previousCharacterWasBackslash) {
495 if (!sb.append('\\')) {
496 return false;
499 if (!AppendEscapedLineTerminator(sb, ch)) {
500 return false;
502 } else if (!sb.empty()) {
503 if (!sb.append(ch)) {
504 return false;
508 if (previousCharacterWasBackslash) {
509 previousCharacterWasBackslash = false;
510 } else if (ch == '\\') {
511 previousCharacterWasBackslash = true;
515 return true;
518 // ES6 draft rev32 21.2.3.2.4.
519 JSLinearString* js::EscapeRegExpPattern(JSContext* cx, Handle<JSAtom*> src) {
520 // Step 2.
521 if (src->length() == 0) {
522 return cx->names().emptyRegExp_;
525 // We may never need to use |sb|. Start using it lazily.
526 JSStringBuilder sb(cx);
527 bool escapeFailed = false;
528 if (src->hasLatin1Chars()) {
529 JS::AutoCheckCannotGC nogc;
530 escapeFailed =
531 !::EscapeRegExpPattern(sb, src->latin1Chars(nogc), src->length());
532 } else {
533 JS::AutoCheckCannotGC nogc;
534 escapeFailed =
535 !::EscapeRegExpPattern(sb, src->twoByteChars(nogc), src->length());
537 if (escapeFailed) {
538 return nullptr;
541 // Step 3.
542 if (sb.empty()) {
543 return src;
545 return sb.finishString();
548 // ES6 draft rev32 21.2.5.14. Optimized for RegExpObject.
549 JSLinearString* RegExpObject::toString(JSContext* cx,
550 Handle<RegExpObject*> obj) {
551 // Steps 3-4.
552 Rooted<JSAtom*> src(cx, obj->getSource());
553 if (!src) {
554 return nullptr;
556 Rooted<JSLinearString*> escapedSrc(cx, EscapeRegExpPattern(cx, src));
558 // Step 7.
559 JSStringBuilder sb(cx);
560 size_t len = escapedSrc->length();
561 if (!sb.reserve(len + 2)) {
562 return nullptr;
564 sb.infallibleAppend('/');
565 if (!sb.append(escapedSrc)) {
566 return nullptr;
568 sb.infallibleAppend('/');
570 // Steps 5-7.
571 if (obj->hasIndices() && !sb.append('d')) {
572 return nullptr;
574 if (obj->global() && !sb.append('g')) {
575 return nullptr;
577 if (obj->ignoreCase() && !sb.append('i')) {
578 return nullptr;
580 if (obj->multiline() && !sb.append('m')) {
581 return nullptr;
583 if (obj->dotAll() && !sb.append('s')) {
584 return nullptr;
586 if (obj->unicode() && !sb.append('u')) {
587 return nullptr;
589 if (obj->unicodeSets() && !sb.append('v')) {
590 return nullptr;
592 if (obj->sticky() && !sb.append('y')) {
593 return nullptr;
596 return sb.finishString();
599 template <typename CharT>
600 static MOZ_ALWAYS_INLINE bool IsRegExpMetaChar(CharT ch) {
601 switch (ch) {
602 /* ES 2016 draft Mar 25, 2016 21.2.1 SyntaxCharacter. */
603 case '^':
604 case '$':
605 case '\\':
606 case '.':
607 case '*':
608 case '+':
609 case '?':
610 case '(':
611 case ')':
612 case '[':
613 case ']':
614 case '{':
615 case '}':
616 case '|':
617 return true;
618 default:
619 return false;
623 template <typename CharT>
624 bool js::HasRegExpMetaChars(const CharT* chars, size_t length) {
625 for (size_t i = 0; i < length; ++i) {
626 if (IsRegExpMetaChar<CharT>(chars[i])) {
627 return true;
630 return false;
633 template bool js::HasRegExpMetaChars<Latin1Char>(const Latin1Char* chars,
634 size_t length);
636 template bool js::HasRegExpMetaChars<char16_t>(const char16_t* chars,
637 size_t length);
639 bool js::StringHasRegExpMetaChars(JSLinearString* str) {
640 AutoCheckCannotGC nogc;
641 if (str->hasLatin1Chars()) {
642 return HasRegExpMetaChars(str->latin1Chars(nogc), str->length());
645 return HasRegExpMetaChars(str->twoByteChars(nogc), str->length());
648 /* RegExpShared */
650 RegExpShared::RegExpShared(JSAtom* source, RegExpFlags flags)
651 : CellWithTenuredGCPointer(source), pairCount_(0), flags(flags) {}
653 void RegExpShared::traceChildren(JSTracer* trc) {
654 TraceNullableCellHeaderEdge(trc, this, "RegExpShared source");
655 if (kind() == RegExpShared::Kind::Atom) {
656 TraceNullableEdge(trc, &patternAtom_, "RegExpShared pattern atom");
657 } else {
658 for (auto& comp : compilationArray) {
659 TraceNullableEdge(trc, &comp.jitCode, "RegExpShared code");
661 TraceNullableEdge(trc, &groupsTemplate_, "RegExpShared groups template");
665 void RegExpShared::discardJitCode() {
666 for (auto& comp : compilationArray) {
667 comp.jitCode = nullptr;
670 // We can also purge the tables used by JIT code.
671 tables.clearAndFree();
674 void RegExpShared::finalize(JS::GCContext* gcx) {
675 for (auto& comp : compilationArray) {
676 if (comp.byteCode) {
677 size_t length = comp.byteCodeLength();
678 gcx->free_(this, comp.byteCode, length, MemoryUse::RegExpSharedBytecode);
681 if (namedCaptureIndices_) {
682 size_t length = numNamedCaptures() * sizeof(uint32_t);
683 gcx->free_(this, namedCaptureIndices_, length,
684 MemoryUse::RegExpSharedNamedCaptureData);
686 tables.~JitCodeTables();
689 /* static */
690 bool RegExpShared::compileIfNecessary(JSContext* cx,
691 MutableHandleRegExpShared re,
692 Handle<JSLinearString*> input,
693 RegExpShared::CodeKind codeKind) {
694 if (codeKind == RegExpShared::CodeKind::Any) {
695 // We start by interpreting regexps, then compile them once they are
696 // sufficiently hot. For very long input strings, we tier up eagerly.
697 codeKind = RegExpShared::CodeKind::Bytecode;
698 if (re->markedForTierUp() || input->length() > 1000) {
699 codeKind = RegExpShared::CodeKind::Jitcode;
703 // Fall back to bytecode if native codegen is not available.
704 if (!IsNativeRegExpEnabled() && codeKind == RegExpShared::CodeKind::Jitcode) {
705 codeKind = RegExpShared::CodeKind::Bytecode;
708 bool needsCompile = false;
709 if (re->kind() == RegExpShared::Kind::Unparsed) {
710 needsCompile = true;
712 if (re->kind() == RegExpShared::Kind::RegExp) {
713 if (!re->isCompiled(input->hasLatin1Chars(), codeKind)) {
714 needsCompile = true;
717 if (needsCompile) {
718 return irregexp::CompilePattern(cx, re, input, codeKind);
720 return true;
723 /* static */
724 RegExpRunStatus RegExpShared::execute(JSContext* cx,
725 MutableHandleRegExpShared re,
726 Handle<JSLinearString*> input,
727 size_t start, VectorMatchPairs* matches) {
728 MOZ_ASSERT(matches);
730 // TODO: Add tracelogger support
732 /* Compile the code at point-of-use. */
733 if (!compileIfNecessary(cx, re, input, RegExpShared::CodeKind::Any)) {
734 return RegExpRunStatus::Error;
738 * Ensure sufficient memory for output vector.
739 * No need to initialize it. The RegExp engine fills them in on a match.
741 if (!matches->allocOrExpandArray(re->pairCount())) {
742 ReportOutOfMemory(cx);
743 return RegExpRunStatus::Error;
746 if (re->kind() == RegExpShared::Kind::Atom) {
747 return RegExpShared::executeAtom(re, input, start, matches);
751 * Ensure sufficient memory for output vector.
752 * No need to initialize it. The RegExp engine fills them in on a match.
754 if (!matches->allocOrExpandArray(re->pairCount())) {
755 ReportOutOfMemory(cx);
756 return RegExpRunStatus::Error;
759 uint32_t interruptRetries = 0;
760 const uint32_t maxInterruptRetries = 4;
761 do {
762 DebugOnly<bool> alreadyThrowing = cx->isExceptionPending();
763 RegExpRunStatus result = irregexp::Execute(cx, re, input, start, matches);
764 #ifdef DEBUG
765 // Check if we must simulate the interruption
766 if (js::irregexp::IsolateShouldSimulateInterrupt(cx->isolate)) {
767 js::irregexp::IsolateClearShouldSimulateInterrupt(cx->isolate);
768 cx->requestInterrupt(InterruptReason::CallbackUrgent);
770 #endif
771 if (result == RegExpRunStatus::Error) {
772 /* Execute can return RegExpRunStatus::Error:
774 * 1. If the native stack overflowed
775 * 2. If the backtrack stack overflowed
776 * 3. If an interrupt was requested during execution.
778 * In the first two cases, we want to throw an error. In the
779 * third case, we want to handle the interrupt and try again.
780 * We cap the number of times we will retry.
782 if (cx->isExceptionPending()) {
783 // If this regexp is being executed by recovery instructions
784 // while bailing out to handle an exception, there may already
785 // be an exception pending. If so, just return that exception
786 // instead of reporting a new one.
787 MOZ_ASSERT(alreadyThrowing);
788 return RegExpRunStatus::Error;
790 if (cx->hasAnyPendingInterrupt()) {
791 if (!CheckForInterrupt(cx)) {
792 return RegExpRunStatus::Error;
794 if (interruptRetries++ < maxInterruptRetries) {
795 // The initial execution may have been interpreted, or the
796 // interrupt may have triggered a GC that discarded jitcode.
797 // To maximize the chance of succeeding before being
798 // interrupted again, we want to ensure we are compiled.
799 if (!compileIfNecessary(cx, re, input,
800 RegExpShared::CodeKind::Jitcode)) {
801 return RegExpRunStatus::Error;
803 continue;
806 // If we have run out of retries, this regexp takes too long to execute.
807 ReportOverRecursed(cx);
808 return RegExpRunStatus::Error;
811 MOZ_ASSERT(result == RegExpRunStatus::Success ||
812 result == RegExpRunStatus::Success_NotFound);
814 return result;
815 } while (true);
817 MOZ_CRASH("Unreachable");
820 void RegExpShared::useAtomMatch(Handle<JSAtom*> pattern) {
821 MOZ_ASSERT(kind() == RegExpShared::Kind::Unparsed);
822 kind_ = RegExpShared::Kind::Atom;
823 patternAtom_ = pattern;
824 pairCount_ = 1;
827 void RegExpShared::useRegExpMatch(size_t pairCount) {
828 MOZ_ASSERT(kind() == RegExpShared::Kind::Unparsed);
829 kind_ = RegExpShared::Kind::RegExp;
830 pairCount_ = pairCount;
831 ticks_ = jit::JitOptions.regexpWarmUpThreshold;
834 /* static */
835 void RegExpShared::InitializeNamedCaptures(JSContext* cx, HandleRegExpShared re,
836 uint32_t numNamedCaptures,
837 Handle<PlainObject*> templateObject,
838 uint32_t* captureIndices) {
839 MOZ_ASSERT(!re->groupsTemplate_);
840 MOZ_ASSERT(!re->namedCaptureIndices_);
842 re->numNamedCaptures_ = numNamedCaptures;
843 re->groupsTemplate_ = templateObject;
844 re->namedCaptureIndices_ = captureIndices;
846 uint32_t arraySize = numNamedCaptures * sizeof(uint32_t);
847 js::AddCellMemory(re, arraySize, MemoryUse::RegExpSharedNamedCaptureData);
850 void RegExpShared::tierUpTick() {
851 MOZ_ASSERT(kind() == RegExpShared::Kind::RegExp);
852 if (ticks_ > 0) {
853 ticks_--;
857 bool RegExpShared::markedForTierUp() const {
858 if (!IsNativeRegExpEnabled()) {
859 return false;
861 if (kind() != RegExpShared::Kind::RegExp) {
862 return false;
864 return ticks_ == 0;
867 // When either unicode flag is set and if |index| points to a trail surrogate,
868 // step back to the corresponding lead surrogate.
869 static size_t StepBackToLeadSurrogate(const JSLinearString* input,
870 size_t index) {
871 // |index| must be a position within a two-byte string, otherwise it can't
872 // point to the trail surrogate of a surrogate pair.
873 if (index == 0 || index >= input->length() || input->hasLatin1Chars()) {
874 return index;
878 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad
879 * 21.2.2.2 step 2.
880 * Let listIndex be the index into Input of the character that was obtained
881 * from element index of str.
883 * In the spec, pattern match is performed with decoded Unicode code points,
884 * but our implementation performs it with UTF-16 encoded strings. In step 2,
885 * we should decrement lastIndex (index) if it points to a trail surrogate
886 * that has a corresponding lead surrogate.
888 * var r = /\uD83D\uDC38/ug;
889 * r.lastIndex = 1;
890 * var str = "\uD83D\uDC38";
891 * var result = r.exec(str); // pattern match starts from index 0
892 * print(result.index); // prints 0
894 * Note: This doesn't match the current spec text and result in different
895 * values for `result.index` under certain conditions. However, the spec will
896 * change to match our implementation's behavior.
897 * See https://github.com/tc39/ecma262/issues/128.
899 JS::AutoCheckCannotGC nogc;
900 const auto* chars = input->twoByteChars(nogc);
901 if (unicode::IsTrailSurrogate(chars[index]) &&
902 unicode::IsLeadSurrogate(chars[index - 1])) {
903 index--;
905 return index;
908 static RegExpRunStatus ExecuteAtomImpl(RegExpShared* re, JSLinearString* input,
909 size_t start, MatchPairs* matches) {
910 MOZ_ASSERT(re->pairCount() == 1);
911 size_t length = input->length();
912 size_t searchLength = re->patternAtom()->length();
914 if (re->unicode() || re->unicodeSets()) {
915 start = StepBackToLeadSurrogate(input, start);
918 if (re->sticky()) {
919 // First part checks size_t overflow.
920 if (searchLength + start < searchLength || searchLength + start > length) {
921 return RegExpRunStatus::Success_NotFound;
923 if (!HasSubstringAt(input, re->patternAtom(), start)) {
924 return RegExpRunStatus::Success_NotFound;
927 (*matches)[0].start = start;
928 (*matches)[0].limit = start + searchLength;
929 matches->checkAgainst(input->length());
930 return RegExpRunStatus::Success;
933 int res = StringFindPattern(input, re->patternAtom(), start);
934 if (res == -1) {
935 return RegExpRunStatus::Success_NotFound;
938 (*matches)[0].start = res;
939 (*matches)[0].limit = res + searchLength;
940 matches->checkAgainst(input->length());
941 return RegExpRunStatus::Success;
944 RegExpRunStatus js::ExecuteRegExpAtomRaw(RegExpShared* re,
945 JSLinearString* input, size_t start,
946 MatchPairs* matchPairs) {
947 AutoUnsafeCallWithABI unsafe;
948 return ExecuteAtomImpl(re, input, start, matchPairs);
951 /* static */
952 RegExpRunStatus RegExpShared::executeAtom(MutableHandleRegExpShared re,
953 Handle<JSLinearString*> input,
954 size_t start,
955 VectorMatchPairs* matches) {
956 return ExecuteAtomImpl(re, input, start, matches);
959 size_t RegExpShared::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) {
960 size_t n = 0;
962 for (const auto& compilation : compilationArray) {
963 if (compilation.byteCode) {
964 n += mallocSizeOf(compilation.byteCode);
968 n += tables.sizeOfExcludingThis(mallocSizeOf);
969 for (size_t i = 0; i < tables.length(); i++) {
970 n += mallocSizeOf(tables[i].get());
973 return n;
976 /* RegExpRealm */
978 RegExpRealm::RegExpRealm()
979 : optimizableRegExpPrototypeShape_(nullptr),
980 optimizableRegExpInstanceShape_(nullptr) {
981 for (auto& shape : matchResultShapes_) {
982 shape = nullptr;
986 SharedShape* RegExpRealm::createMatchResultShape(JSContext* cx,
987 ResultShapeKind kind) {
988 MOZ_ASSERT(!matchResultShapes_[kind]);
990 /* Create template array object */
991 Rooted<ArrayObject*> templateObject(cx, NewDenseEmptyArray(cx));
992 if (!templateObject) {
993 return nullptr;
996 if (kind == ResultShapeKind::Indices) {
997 /* The |indices| array only has a |groups| property. */
998 if (!NativeDefineDataProperty(cx, templateObject, cx->names().groups,
999 UndefinedHandleValue, JSPROP_ENUMERATE)) {
1000 return nullptr;
1002 MOZ_ASSERT(templateObject->getLastProperty().slot() == IndicesGroupsSlot);
1004 matchResultShapes_[kind].set(templateObject->sharedShape());
1005 return matchResultShapes_[kind];
1008 /* Set dummy index property */
1009 if (!NativeDefineDataProperty(cx, templateObject, cx->names().index,
1010 UndefinedHandleValue, JSPROP_ENUMERATE)) {
1011 return nullptr;
1013 MOZ_ASSERT(templateObject->getLastProperty().slot() ==
1014 MatchResultObjectIndexSlot);
1016 /* Set dummy input property */
1017 if (!NativeDefineDataProperty(cx, templateObject, cx->names().input,
1018 UndefinedHandleValue, JSPROP_ENUMERATE)) {
1019 return nullptr;
1021 MOZ_ASSERT(templateObject->getLastProperty().slot() ==
1022 MatchResultObjectInputSlot);
1024 /* Set dummy groups property */
1025 if (!NativeDefineDataProperty(cx, templateObject, cx->names().groups,
1026 UndefinedHandleValue, JSPROP_ENUMERATE)) {
1027 return nullptr;
1029 MOZ_ASSERT(templateObject->getLastProperty().slot() ==
1030 MatchResultObjectGroupsSlot);
1032 if (kind == ResultShapeKind::WithIndices) {
1033 /* Set dummy indices property */
1034 if (!NativeDefineDataProperty(cx, templateObject, cx->names().indices,
1035 UndefinedHandleValue, JSPROP_ENUMERATE)) {
1036 return nullptr;
1038 MOZ_ASSERT(templateObject->getLastProperty().slot() ==
1039 MatchResultObjectIndicesSlot);
1042 #ifdef DEBUG
1043 if (kind == ResultShapeKind::Normal) {
1044 MOZ_ASSERT(templateObject->numFixedSlots() == 0);
1045 MOZ_ASSERT(templateObject->numDynamicSlots() ==
1046 MatchResultObjectNumDynamicSlots);
1047 MOZ_ASSERT(templateObject->slotSpan() == MatchResultObjectSlotSpan);
1049 #endif
1051 matchResultShapes_[kind].set(templateObject->sharedShape());
1053 return matchResultShapes_[kind];
1056 void RegExpRealm::trace(JSTracer* trc) {
1057 if (regExpStatics) {
1058 regExpStatics->trace(trc);
1061 for (auto& shape : matchResultShapes_) {
1062 TraceNullableEdge(trc, &shape, "RegExpRealm::matchResultShapes_");
1065 TraceNullableEdge(trc, &optimizableRegExpPrototypeShape_,
1066 "RegExpRealm::optimizableRegExpPrototypeShape_");
1068 TraceNullableEdge(trc, &optimizableRegExpInstanceShape_,
1069 "RegExpRealm::optimizableRegExpInstanceShape_");
1072 RegExpShared* RegExpZone::get(JSContext* cx, Handle<JSAtom*> source,
1073 RegExpFlags flags) {
1074 DependentAddPtr<Set> p(cx, set_, Key(source, flags));
1075 if (p) {
1076 return *p;
1079 auto* shared = cx->newCell<RegExpShared>(source, flags);
1080 if (!shared) {
1081 return nullptr;
1084 if (!p.add(cx, set_, Key(source, flags), shared)) {
1085 return nullptr;
1088 return shared;
1091 size_t RegExpZone::sizeOfIncludingThis(
1092 mozilla::MallocSizeOf mallocSizeOf) const {
1093 return mallocSizeOf(this) + set_.sizeOfExcludingThis(mallocSizeOf);
1096 RegExpZone::RegExpZone(Zone* zone) : set_(zone, zone) {}
1098 /* Functions */
1100 JSObject* js::CloneRegExpObject(JSContext* cx, Handle<RegExpObject*> regex) {
1101 constexpr gc::AllocKind allocKind = RegExpObject::AllocKind;
1102 static_assert(gc::GetGCKindSlots(allocKind) == RegExpObject::RESERVED_SLOTS);
1103 MOZ_ASSERT(regex->asTenured().getAllocKind() == allocKind);
1105 Rooted<SharedShape*> shape(cx, regex->sharedShape());
1106 Rooted<RegExpObject*> clone(cx, NativeObject::create<RegExpObject>(
1107 cx, allocKind, gc::Heap::Default, shape));
1108 if (!clone) {
1109 return nullptr;
1112 RegExpShared* shared = RegExpObject::getShared(cx, regex);
1113 if (!shared) {
1114 return nullptr;
1117 clone->initAndZeroLastIndex(shared->getSource(), shared->getFlags(), cx);
1118 clone->setShared(shared);
1120 return clone;
1123 template <typename CharT>
1124 static bool ParseRegExpFlags(const CharT* chars, size_t length,
1125 RegExpFlags* flagsOut, char16_t* invalidFlag) {
1126 *flagsOut = RegExpFlag::NoFlags;
1128 for (size_t i = 0; i < length; i++) {
1129 uint8_t flag;
1130 if (!JS::MaybeParseRegExpFlag(chars[i], &flag) || *flagsOut & flag) {
1131 *invalidFlag = chars[i];
1132 return false;
1135 // /u and /v flags are mutually exclusive.
1136 if (((*flagsOut & RegExpFlag::Unicode) &&
1137 (flag & RegExpFlag::UnicodeSets)) ||
1138 ((*flagsOut & RegExpFlag::UnicodeSets) &&
1139 (flag & RegExpFlag::Unicode))) {
1140 *invalidFlag = chars[i];
1141 return false;
1144 *flagsOut |= flag;
1147 return true;
1150 bool js::ParseRegExpFlags(JSContext* cx, JSString* flagStr,
1151 RegExpFlags* flagsOut) {
1152 JSLinearString* linear = flagStr->ensureLinear(cx);
1153 if (!linear) {
1154 return false;
1157 size_t len = linear->length();
1159 bool ok;
1160 char16_t invalidFlag;
1161 if (linear->hasLatin1Chars()) {
1162 AutoCheckCannotGC nogc;
1163 ok = ::ParseRegExpFlags(linear->latin1Chars(nogc), len, flagsOut,
1164 &invalidFlag);
1165 } else {
1166 AutoCheckCannotGC nogc;
1167 ok = ::ParseRegExpFlags(linear->twoByteChars(nogc), len, flagsOut,
1168 &invalidFlag);
1171 if (!ok) {
1172 JS::TwoByteChars range(&invalidFlag, 1);
1173 UniqueChars utf8(JS::CharsToNewUTF8CharsZ(cx, range).c_str());
1174 if (!utf8) {
1175 return false;
1177 JS_ReportErrorNumberUTF8(cx, GetErrorMessage, nullptr,
1178 JSMSG_BAD_REGEXP_FLAG, utf8.get());
1179 return false;
1182 return true;
1185 JS::ubi::Node::Size JS::ubi::Concrete<RegExpShared>::size(
1186 mozilla::MallocSizeOf mallocSizeOf) const {
1187 return js::gc::Arena::thingSize(gc::AllocKind::REGEXP_SHARED) +
1188 get().sizeOfExcludingThis(mallocSizeOf);
1192 * Regular Expressions.
1194 JS_PUBLIC_API JSObject* JS::NewRegExpObject(JSContext* cx, const char* bytes,
1195 size_t length, RegExpFlags flags) {
1196 AssertHeapIsIdle();
1197 CHECK_THREAD(cx);
1199 UniqueTwoByteChars chars(InflateString(cx, bytes, length));
1200 if (!chars) {
1201 return nullptr;
1204 return RegExpObject::create(cx, chars.get(), length, flags, GenericObject);
1207 JS_PUBLIC_API JSObject* JS::NewUCRegExpObject(JSContext* cx,
1208 const char16_t* chars,
1209 size_t length,
1210 RegExpFlags flags) {
1211 AssertHeapIsIdle();
1212 CHECK_THREAD(cx);
1214 return RegExpObject::create(cx, chars, length, flags, GenericObject);
1217 JS_PUBLIC_API bool JS::SetRegExpInput(JSContext* cx, HandleObject obj,
1218 HandleString input) {
1219 AssertHeapIsIdle();
1220 CHECK_THREAD(cx);
1221 cx->check(input);
1223 Handle<GlobalObject*> global = obj.as<GlobalObject>();
1224 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, global);
1225 if (!res) {
1226 return false;
1229 res->reset(input);
1230 return true;
1233 JS_PUBLIC_API bool JS::ClearRegExpStatics(JSContext* cx, HandleObject obj) {
1234 AssertHeapIsIdle();
1235 CHECK_THREAD(cx);
1236 MOZ_ASSERT(obj);
1238 Handle<GlobalObject*> global = obj.as<GlobalObject>();
1239 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, global);
1240 if (!res) {
1241 return false;
1244 res->clear();
1245 return true;
1248 JS_PUBLIC_API bool JS::ExecuteRegExp(JSContext* cx, HandleObject obj,
1249 HandleObject reobj, const char16_t* chars,
1250 size_t length, size_t* indexp, bool test,
1251 MutableHandleValue rval) {
1252 AssertHeapIsIdle();
1253 CHECK_THREAD(cx);
1255 Handle<GlobalObject*> global = obj.as<GlobalObject>();
1256 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, global);
1257 if (!res) {
1258 return false;
1261 Rooted<JSLinearString*> input(cx, NewStringCopyN<CanGC>(cx, chars, length));
1262 if (!input) {
1263 return false;
1266 return ExecuteRegExpLegacy(cx, res, reobj.as<RegExpObject>(), input, indexp,
1267 test, rval);
1270 JS_PUBLIC_API bool JS::ExecuteRegExpNoStatics(JSContext* cx, HandleObject obj,
1271 const char16_t* chars,
1272 size_t length, size_t* indexp,
1273 bool test,
1274 MutableHandleValue rval) {
1275 AssertHeapIsIdle();
1276 CHECK_THREAD(cx);
1278 Rooted<JSLinearString*> input(cx, NewStringCopyN<CanGC>(cx, chars, length));
1279 if (!input) {
1280 return false;
1283 return ExecuteRegExpLegacy(cx, nullptr, obj.as<RegExpObject>(), input, indexp,
1284 test, rval);
1287 JS_PUBLIC_API bool JS::ObjectIsRegExp(JSContext* cx, HandleObject obj,
1288 bool* isRegExp) {
1289 cx->check(obj);
1291 ESClass cls;
1292 if (!GetBuiltinClass(cx, obj, &cls)) {
1293 return false;
1296 *isRegExp = cls == ESClass::RegExp;
1297 return true;
1300 JS_PUBLIC_API RegExpFlags JS::GetRegExpFlags(JSContext* cx, HandleObject obj) {
1301 AssertHeapIsIdle();
1302 CHECK_THREAD(cx);
1304 RegExpShared* shared = RegExpToShared(cx, obj);
1305 if (!shared) {
1306 return RegExpFlag::NoFlags;
1308 return shared->getFlags();
1311 JS_PUBLIC_API JSString* JS::GetRegExpSource(JSContext* cx, HandleObject obj) {
1312 AssertHeapIsIdle();
1313 CHECK_THREAD(cx);
1315 RegExpShared* shared = RegExpToShared(cx, obj);
1316 if (!shared) {
1317 return nullptr;
1319 return shared->getSource();
1322 JS_PUBLIC_API bool JS::CheckRegExpSyntax(JSContext* cx, const char16_t* chars,
1323 size_t length, RegExpFlags flags,
1324 MutableHandleValue error) {
1325 AssertHeapIsIdle();
1326 CHECK_THREAD(cx);
1328 AutoReportFrontendContext fc(cx);
1329 CompileOptions dummyOptions(cx);
1330 frontend::DummyTokenStream dummyTokenStream(&fc, dummyOptions);
1332 LifoAllocScope allocScope(&cx->tempLifoAlloc());
1334 mozilla::Range<const char16_t> source(chars, length);
1335 bool success = irregexp::CheckPatternSyntax(
1336 cx->tempLifoAlloc(), cx->stackLimitForCurrentPrincipal(),
1337 dummyTokenStream, source, flags);
1338 error.set(UndefinedValue());
1339 if (!success) {
1340 if (!fc.convertToRuntimeErrorAndClear()) {
1341 return false;
1343 // We can fail because of OOM or over-recursion even if the syntax is valid.
1344 if (cx->isThrowingOutOfMemory() || cx->isThrowingOverRecursed()) {
1345 return false;
1348 if (!cx->getPendingException(error)) {
1349 return false;
1351 cx->clearPendingException();
1353 return true;