1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "vm/StringType-inl.h"
9 #include "mozilla/DebugOnly.h"
10 #include "mozilla/HashFunctions.h"
11 #include "mozilla/Latin1.h"
12 #include "mozilla/MathAlgorithms.h"
13 #include "mozilla/MemoryReporting.h"
14 #include "mozilla/PodOperations.h"
15 #include "mozilla/RangedPtr.h"
16 #include "mozilla/TextUtils.h"
17 #include "mozilla/Utf8.h"
18 #include "mozilla/Vector.h"
20 #include <algorithm> // std::{all_of,copy_n,enable_if,is_const,move}
21 #include <iterator> // std::size
22 #include <type_traits> // std::is_same, std::is_unsigned
24 #include "jsfriendapi.h"
27 #include "builtin/Boolean.h"
28 #ifdef ENABLE_RECORD_TUPLE
29 # include "builtin/RecordObject.h"
31 #include "gc/AllocKind.h"
32 #include "gc/MaybeRooted.h"
33 #include "gc/Nursery.h"
34 #include "js/CharacterEncoding.h"
35 #include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_*
36 #include "js/Printer.h" // js::GenericPrinter
37 #include "js/PropertyAndElement.h" // JS_DefineElement
38 #include "js/SourceText.h" // JS::SourceText
39 #include "js/StableStringChars.h"
40 #include "js/UbiNode.h"
41 #include "util/Identifier.h" // js::IsIdentifierNameOrPrivateName
42 #include "util/Unicode.h"
43 #include "vm/GeckoProfiler.h"
44 #include "vm/JSONPrinter.h" // js::JSONPrinter
45 #include "vm/StaticStrings.h"
46 #include "vm/ToSource.h" // js::ValueToSource
48 #include "gc/Marking-inl.h"
49 #include "vm/GeckoProfiler-inl.h"
50 #ifdef ENABLE_RECORD_TUPLE
51 # include "vm/RecordType.h"
52 # include "vm/TupleType.h"
57 using mozilla::AsWritableChars
;
58 using mozilla::ConvertLatin1toUtf16
;
59 using mozilla::IsAsciiDigit
;
60 using mozilla::IsUtf16Latin1
;
61 using mozilla::LossyConvertUtf16toLatin1
;
62 using mozilla::PodCopy
;
63 using mozilla::RangedPtr
;
64 using mozilla::RoundUpPow2
;
67 using JS::AutoCheckCannotGC
;
68 using JS::AutoStableStringChars
;
70 using UniqueLatin1Chars
= UniquePtr
<Latin1Char
[], JS::FreePolicy
>;
72 size_t JSString::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf
) {
73 // JSRope: do nothing, we'll count all children chars when we hit the leaf
79 MOZ_ASSERT(isLinear());
81 // JSDependentString: do nothing, we'll count the chars when we hit the base
87 // JSExternalString: Ask the embedding to tell us what's going on.
89 // Our callback isn't supposed to cause GC.
90 JS::AutoSuppressGCAnalysis nogc
;
91 JSExternalString
& external
= asExternal();
92 if (external
.hasLatin1Chars()) {
93 return asExternal().callbacks()->sizeOfBuffer(external
.latin1Chars(),
96 return asExternal().callbacks()->sizeOfBuffer(external
.twoByteChars(),
101 // JSExtensibleString: count the full capacity, not just the used space.
102 if (isExtensible()) {
103 JSExtensibleString
& extensible
= asExtensible();
104 return extensible
.hasLatin1Chars()
105 ? mallocSizeOf(extensible
.rawLatin1Chars())
106 : mallocSizeOf(extensible
.rawTwoByteChars());
109 // JSInlineString, JSFatInlineString, js::ThinInlineAtom, js::FatInlineAtom:
110 // the chars are inline.
115 // Chars in the nursery are owned by the nursery.
116 if (!ownsMallocedChars()) {
120 // Everything else: measure the space for the chars.
121 JSLinearString
& linear
= asLinear();
122 return linear
.hasLatin1Chars() ? mallocSizeOf(linear
.rawLatin1Chars())
123 : mallocSizeOf(linear
.rawTwoByteChars());
126 JS::ubi::Node::Size
JS::ubi::Concrete
<JSString
>::size(
127 mozilla::MallocSizeOf mallocSizeOf
) const {
128 JSString
& str
= get();
131 if (str
.isInline()) {
132 size
= str
.isFatInline() ? sizeof(js::FatInlineAtom
)
133 : sizeof(js::ThinInlineAtom
);
135 size
= sizeof(js::NormalAtom
);
138 size
= str
.isFatInline() ? sizeof(JSFatInlineString
) : sizeof(JSString
);
141 if (IsInsideNursery(&str
)) {
142 size
+= Nursery::nurseryCellHeaderSize();
145 size
+= str
.sizeOfExcludingThis(mallocSizeOf
);
150 const char16_t
JS::ubi::Concrete
<JSString
>::concreteTypeName
[] = u
"JSString";
152 mozilla::Maybe
<std::tuple
<size_t, size_t>> JSString::encodeUTF8Partial(
153 const JS::AutoRequireNoGC
& nogc
, mozilla::Span
<char> buffer
) const {
154 mozilla::Vector
<const JSString
*, 16, SystemAllocPolicy
> stack
;
155 const JSString
* current
= this;
156 char16_t pendingLeadSurrogate
= 0; // U+0000 means no pending lead surrogate
157 size_t totalRead
= 0;
158 size_t totalWritten
= 0;
160 if (current
->isRope()) {
161 JSRope
& rope
= current
->asRope();
162 if (!stack
.append(rope
.rightChild())) {
164 return mozilla::Nothing();
166 current
= rope
.leftChild();
170 JSLinearString
& linear
= current
->asLinear();
171 if (MOZ_LIKELY(linear
.hasLatin1Chars())) {
172 if (MOZ_UNLIKELY(pendingLeadSurrogate
)) {
173 if (buffer
.Length() < 3) {
174 return mozilla::Some(std::make_tuple(totalRead
, totalWritten
));
179 buffer
= buffer
.From(3);
180 totalRead
+= 1; // pendingLeadSurrogate
182 pendingLeadSurrogate
= 0;
184 auto src
= mozilla::AsChars(
185 mozilla::Span(linear
.latin1Chars(nogc
), linear
.length()));
188 std::tie(read
, written
) =
189 mozilla::ConvertLatin1toUtf8Partial(src
, buffer
);
190 buffer
= buffer
.From(written
);
192 totalWritten
+= written
;
193 if (read
< src
.Length()) {
194 return mozilla::Some(std::make_tuple(totalRead
, totalWritten
));
197 auto src
= mozilla::Span(linear
.twoByteChars(nogc
), linear
.length());
198 if (MOZ_UNLIKELY(pendingLeadSurrogate
)) {
200 if (!src
.IsEmpty()) {
203 if (unicode::IsTrailSurrogate(first
)) {
204 // Got a surrogate pair
205 if (buffer
.Length() < 4) {
206 return mozilla::Some(std::make_tuple(totalRead
, totalWritten
));
208 uint32_t astral
= unicode::UTF16Decode(pendingLeadSurrogate
, first
);
209 buffer
[0] = char(0b1111'0000 | (astral
>> 18));
210 buffer
[1] = char(0b1000'0000 | ((astral
>> 12) & 0b11'1111));
211 buffer
[2] = char(0b1000'0000 | ((astral
>> 6) & 0b11'1111));
212 buffer
[3] = char(0b1000'0000 | (astral
& 0b11'1111));
214 buffer
= buffer
.From(4);
215 totalRead
+= 2; // both pendingLeadSurrogate and first!
218 // unpaired surrogate
219 if (buffer
.Length() < 3) {
220 return mozilla::Some(std::make_tuple(totalRead
, totalWritten
));
225 buffer
= buffer
.From(3);
226 totalRead
+= 1; // pendingLeadSurrogate
229 pendingLeadSurrogate
= 0;
231 if (!src
.IsEmpty()) {
232 char16_t last
= src
[src
.Length() - 1];
233 if (unicode::IsLeadSurrogate(last
)) {
234 src
= src
.To(src
.Length() - 1);
235 pendingLeadSurrogate
= last
;
237 MOZ_ASSERT(!pendingLeadSurrogate
);
241 std::tie(read
, written
) =
242 mozilla::ConvertUtf16toUtf8Partial(src
, buffer
);
243 buffer
= buffer
.From(written
);
245 totalWritten
+= written
;
246 if (read
< src
.Length()) {
247 return mozilla::Some(std::make_tuple(totalRead
, totalWritten
));
254 current
= stack
.popCopy();
256 if (MOZ_UNLIKELY(pendingLeadSurrogate
)) {
257 if (buffer
.Length() < 3) {
258 return mozilla::Some(std::make_tuple(totalRead
, totalWritten
));
263 // No need to update buffer and pendingLeadSurrogate anymore
267 return mozilla::Some(std::make_tuple(totalRead
, totalWritten
));
270 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
271 template <typename CharT
>
273 void JSString::dumpCharsNoQuote(const CharT
* s
, size_t n
,
274 js::GenericPrinter
& out
) {
275 for (size_t i
= 0; i
< n
; i
++) {
279 } else if (c
== '\'') {
281 } else if (c
== '`') {
283 } else if (c
== '\\') {
285 } else if (c
== '\r') {
287 } else if (c
== '\n') {
289 } else if (c
== '\t') {
291 } else if (c
>= 32 && c
< 127) {
292 out
.putChar((char)s
[i
]);
293 } else if (c
<= 255) {
294 out
.printf("\\x%02x", unsigned(c
));
296 out
.printf("\\u%04x", unsigned(c
));
302 template void JSString::dumpCharsNoQuote(const Latin1Char
* s
, size_t n
,
303 js::GenericPrinter
& out
);
306 template void JSString::dumpCharsNoQuote(const char16_t
* s
, size_t n
,
307 js::GenericPrinter
& out
);
309 void JSString::dump() const {
310 js::Fprinter
out(stderr
);
314 void JSString::dump(js::GenericPrinter
& out
) const {
315 js::JSONPrinter
json(out
);
320 void JSString::dump(js::JSONPrinter
& json
) const {
326 const char* RepresentationToString(const JSString
* s
) {
332 if (s
->isDependent()) {
333 return "JSDependentString";
335 if (s
->isExternal()) {
336 return "JSExternalString";
338 if (s
->isExtensible()) {
339 return "JSExtensibleString";
343 if (s
->isFatInline()) {
344 return "JSFatInlineString";
346 return "JSThinInlineString";
349 return "JSLinearString";
359 template <typename KnownF
, typename UnknownF
>
360 void ForEachStringFlag(const JSString
* str
, uint32_t flags
, KnownF known
,
362 for (uint32_t i
= js::Bit(3); i
< js::Bit(16); i
= i
<< 1) {
367 case JSString::ATOM_BIT
:
370 case JSString::LINEAR_BIT
:
373 case JSString::DEPENDENT_BIT
:
374 known("DEPENDENT_BIT");
376 case JSString::INLINE_CHARS_BIT
:
379 case JSString::LINEAR_IS_EXTENSIBLE_BIT
:
380 static_assert(JSString::LINEAR_IS_EXTENSIBLE_BIT
==
381 JSString::INLINE_IS_FAT_BIT
);
382 if (str
->isLinear()) {
383 if (str
->isInline()) {
385 } else if (!str
->isAtom()) {
394 case JSString::LINEAR_IS_EXTERNAL_BIT
:
395 static_assert(JSString::LINEAR_IS_EXTERNAL_BIT
==
396 JSString::ATOM_IS_PERMANENT_BIT
);
399 } else if (str
->isLinear()) {
405 case JSString::LATIN1_CHARS_BIT
:
406 known("LATIN1_CHARS_BIT");
408 case JSString::ATOM_IS_INDEX_BIT
:
409 known("ATOM_IS_INDEX_BIT");
411 case JSString::INDEX_VALUE_BIT
:
412 known("INDEX_VALUE_BIT");
414 case JSString::IN_STRING_TO_ATOM_CACHE
:
415 known("IN_STRING_TO_ATOM_CACHE");
417 case JSString::FLATTEN_VISIT_RIGHT
:
419 known("FLATTEN_VISIT_RIGHT");
421 known("NON_DEDUP_BIT");
424 case JSString::FLATTEN_FINISH_NODE
:
425 static_assert(JSString::FLATTEN_FINISH_NODE
==
426 JSString::PINNED_ATOM_BIT
);
428 known("FLATTEN_FINISH_NODE");
429 } else if (str
->isAtom()) {
430 known("PINNED_ATOM_BIT");
442 void JSString::dumpFields(js::JSONPrinter
& json
) const {
443 dumpCommonFields(json
);
444 dumpCharsFields(json
);
447 void JSString::dumpCommonFields(js::JSONPrinter
& json
) const {
448 json
.formatProperty("address", "(%s*)0x%p", RepresentationToString(this),
451 json
.beginInlineListProperty("flags");
453 this, flags(), [&](const char* name
) { json
.value("%s", name
); },
454 [&](uint32_t value
) { json
.value("Unknown(%08x)", value
); });
455 json
.endInlineList();
457 if (hasIndexValue()) {
458 json
.property("indexValue", getIndexValue());
461 json
.boolProperty("isTenured", isTenured());
463 json
.property("length", length());
466 void JSString::dumpCharsFields(js::JSONPrinter
& json
) const {
468 const JSLinearString
* linear
= &asLinear();
470 AutoCheckCannotGC nogc
;
471 if (hasLatin1Chars()) {
472 const Latin1Char
* chars
= linear
->latin1Chars(nogc
);
474 json
.formatProperty("chars", "(JS::Latin1Char*)0x%p", chars
);
476 js::GenericPrinter
& out
= json
.beginStringProperty("value");
477 dumpCharsNoQuote(chars
, length(), out
);
478 json
.endStringProperty();
480 const char16_t
* chars
= linear
->twoByteChars(nogc
);
482 json
.formatProperty("chars", "(char16_t*)0x%p", chars
);
484 js::GenericPrinter
& out
= json
.beginStringProperty("value");
485 dumpCharsNoQuote(chars
, length(), out
);
486 json
.endStringProperty();
489 js::GenericPrinter
& out
= json
.beginStringProperty("value");
490 dumpCharsNoQuote(out
);
491 json
.endStringProperty();
495 void JSString::dumpRepresentation() const {
496 js::Fprinter
out(stderr
);
497 dumpRepresentation(out
);
500 void JSString::dumpRepresentation(js::GenericPrinter
& out
) const {
501 js::JSONPrinter
json(out
);
502 dumpRepresentation(json
);
506 void JSString::dumpRepresentation(js::JSONPrinter
& json
) const {
508 dumpRepresentationFields(json
);
512 void JSString::dumpRepresentationFields(js::JSONPrinter
& json
) const {
513 dumpCommonFields(json
);
516 asAtom().dumpOwnRepresentationFields(json
);
517 } else if (isLinear()) {
518 asLinear().dumpOwnRepresentationFields(json
);
521 asDependent().dumpOwnRepresentationFields(json
);
522 } else if (isExternal()) {
523 asExternal().dumpOwnRepresentationFields(json
);
524 } else if (isExtensible()) {
525 asExtensible().dumpOwnRepresentationFields(json
);
526 } else if (isInline()) {
527 asInline().dumpOwnRepresentationFields(json
);
529 } else if (isRope()) {
530 asRope().dumpOwnRepresentationFields(json
);
531 // Rope already shows the chars.
535 dumpCharsFields(json
);
538 void JSString::dumpStringContent(js::GenericPrinter
& out
) const {
539 dumpCharsSingleQuote(out
);
541 out
.printf(" @ (%s*)0x%p", RepresentationToString(this), this);
544 void JSString::dumpPropertyName(js::GenericPrinter
& out
) const {
545 dumpCharsNoQuote(out
);
548 void JSString::dumpChars(js::GenericPrinter
& out
) const {
550 dumpCharsNoQuote(out
);
554 void JSString::dumpCharsSingleQuote(js::GenericPrinter
& out
) const {
556 dumpCharsNoQuote(out
);
560 void JSString::dumpCharsNoQuote(js::GenericPrinter
& out
) const {
562 const JSLinearString
* linear
= &asLinear();
564 AutoCheckCannotGC nogc
;
565 if (hasLatin1Chars()) {
566 dumpCharsNoQuote(linear
->latin1Chars(nogc
), length(), out
);
568 dumpCharsNoQuote(linear
->twoByteChars(nogc
), length(), out
);
570 } else if (isRope()) {
571 JSRope
* rope
= &asRope();
572 rope
->leftChild()->dumpCharsNoQuote(out
);
573 rope
->rightChild()->dumpCharsNoQuote(out
);
577 bool JSString::equals(const char* s
) {
578 JSLinearString
* linear
= ensureLinear(nullptr);
580 // This is DEBUG-only code.
581 fprintf(stderr
, "OOM in JSString::equals!\n");
585 return StringEqualsAscii(linear
, s
);
587 #endif /* defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) */
589 JSExtensibleString
& JSLinearString::makeExtensible(size_t capacity
) {
590 MOZ_ASSERT(!isDependent());
591 MOZ_ASSERT(!isInline());
592 MOZ_ASSERT(!isAtom());
593 MOZ_ASSERT(!isExternal());
594 MOZ_ASSERT(capacity
>= length());
595 js::RemoveCellMemory(this, allocSize(), js::MemoryUse::StringContents
);
596 setLengthAndFlags(length(), flags() | EXTENSIBLE_FLAGS
);
597 d
.s
.u3
.capacity
= capacity
;
598 js::AddCellMemory(this, allocSize(), js::MemoryUse::StringContents
);
599 return asExtensible();
602 template <typename CharT
>
603 static MOZ_ALWAYS_INLINE
bool AllocCharsForFlatten(JSString
* str
, size_t length
,
607 * Grow by 12.5% if the buffer is very large. Otherwise, round up to the
608 * next power of 2. This is similar to what we do with arrays; see
609 * JSObject::ensureDenseArrayElements.
611 static const size_t DOUBLING_MAX
= 1024 * 1024;
613 length
> DOUBLING_MAX
? length
+ (length
/ 8) : RoundUpPow2(length
);
615 static_assert(JSString::MAX_LENGTH
* sizeof(CharT
) <= UINT32_MAX
);
617 str
->zone()->pod_arena_malloc
<CharT
>(js::StringBufferArena
, *capacity
);
618 return *chars
!= nullptr;
621 UniqueLatin1Chars
JSRope::copyLatin1Chars(JSContext
* maybecx
,
622 arena_id_t destArenaId
) const {
623 return copyCharsInternal
<Latin1Char
>(maybecx
, destArenaId
);
626 UniqueTwoByteChars
JSRope::copyTwoByteChars(JSContext
* maybecx
,
627 arena_id_t destArenaId
) const {
628 return copyCharsInternal
<char16_t
>(maybecx
, destArenaId
);
631 // Allocate chars for a string. If parameters and conditions allow, this will
632 // try to allocate in the nursery, but this may always fall back to a malloc
633 // allocation. The return value will record where the allocation happened.
634 template <typename CharT
>
635 static MOZ_ALWAYS_INLINE
JSString::OwnedChars
<CharT
> AllocChars(JSContext
* cx
,
638 if (heap
== gc::Heap::Default
&& cx
->zone()->allocNurseryStrings()) {
639 MOZ_ASSERT(cx
->nursery().isEnabled());
640 auto [buffer
, isMalloced
] = cx
->nursery().allocateBuffer(
641 cx
->zone(), length
* sizeof(CharT
), js::StringBufferArena
);
643 ReportOutOfMemory(cx
);
644 return {nullptr, 0, false, false};
647 return {static_cast<CharT
*>(buffer
), length
, isMalloced
, isMalloced
};
650 auto buffer
= cx
->make_pod_arena_array
<CharT
>(js::StringBufferArena
, length
);
652 ReportOutOfMemory(cx
);
653 return {nullptr, 0, false, false};
656 return {std::move(buffer
), length
, true};
659 template <typename CharT
>
660 UniquePtr
<CharT
[], JS::FreePolicy
> JSRope::copyCharsInternal(
661 JSContext
* maybecx
, arena_id_t destArenaId
) const {
662 // Left-leaning ropes are far more common than right-leaning ropes, so
663 // perform a non-destructive traversal of the rope, right node first,
664 // splatting each node's characters into a contiguous buffer.
668 UniquePtr
<CharT
[], JS::FreePolicy
> out
;
670 out
.reset(maybecx
->pod_arena_malloc
<CharT
>(destArenaId
, n
));
672 out
.reset(js_pod_arena_malloc
<CharT
>(destArenaId
, n
));
679 Vector
<const JSString
*, 8, SystemAllocPolicy
> nodeStack
;
680 const JSString
* str
= this;
681 CharT
* end
= out
.get() + str
->length();
684 if (!nodeStack
.append(str
->asRope().leftChild())) {
686 ReportOutOfMemory(maybecx
);
690 str
= str
->asRope().rightChild();
692 end
-= str
->length();
693 CopyChars(end
, str
->asLinear());
694 if (nodeStack
.empty()) {
697 str
= nodeStack
.popCopy();
700 MOZ_ASSERT(end
== out
.get());
705 template <typename CharT
>
706 void AddStringToHash(uint32_t* hash
, const CharT
* chars
, size_t len
) {
707 // It's tempting to use |HashString| instead of this loop, but that's
708 // slightly different than our existing implementation for non-ropes. We
709 // want to pretend we have a contiguous set of chars so we need to
710 // accumulate char by char rather than generate a new hash for substring
711 // and then accumulate that.
712 for (size_t i
= 0; i
< len
; i
++) {
713 *hash
= mozilla::AddToHash(*hash
, chars
[i
]);
717 void AddStringToHash(uint32_t* hash
, const JSString
* str
) {
718 AutoCheckCannotGC nogc
;
719 const auto& s
= str
->asLinear();
720 if (s
.hasLatin1Chars()) {
721 AddStringToHash(hash
, s
.latin1Chars(nogc
), s
.length());
723 AddStringToHash(hash
, s
.twoByteChars(nogc
), s
.length());
727 bool JSRope::hash(uint32_t* outHash
) const {
728 Vector
<const JSString
*, 8, SystemAllocPolicy
> nodeStack
;
729 const JSString
* str
= this;
735 if (!nodeStack
.append(str
->asRope().rightChild())) {
738 str
= str
->asRope().leftChild();
740 AddStringToHash(outHash
, str
);
741 if (nodeStack
.empty()) {
744 str
= nodeStack
.popCopy();
751 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
752 void JSRope::dumpOwnRepresentationFields(js::JSONPrinter
& json
) const {
753 json
.beginObjectProperty("leftChild");
754 leftChild()->dumpRepresentationFields(json
);
757 json
.beginObjectProperty("rightChild");
758 rightChild()->dumpRepresentationFields(json
);
766 void CopyChars(char16_t
* dest
, const JSLinearString
& str
) {
767 AutoCheckCannotGC nogc
;
768 if (str
.hasTwoByteChars()) {
769 PodCopy(dest
, str
.twoByteChars(nogc
), str
.length());
771 CopyAndInflateChars(dest
, str
.latin1Chars(nogc
), str
.length());
776 void CopyChars(Latin1Char
* dest
, const JSLinearString
& str
) {
777 AutoCheckCannotGC nogc
;
778 if (str
.hasLatin1Chars()) {
779 PodCopy(dest
, str
.latin1Chars(nogc
), str
.length());
782 * When we flatten a TwoByte rope, we turn child ropes (including Latin1
783 * ropes) into TwoByte dependent strings. If one of these strings is
784 * also part of another Latin1 rope tree, we can have a Latin1 rope with
785 * a TwoByte descendent and we end up here when we flatten it. Although
786 * the chars are stored as TwoByte, we know they must be in the Latin1
787 * range, so we can safely deflate here.
789 size_t len
= str
.length();
790 const char16_t
* chars
= str
.twoByteChars(nogc
);
791 auto src
= Span(chars
, len
);
792 MOZ_ASSERT(IsUtf16Latin1(src
));
793 LossyConvertUtf16toLatin1(src
, AsWritableChars(Span(dest
, len
)));
799 template <typename CharT
>
800 static constexpr uint32_t StringFlagsForCharType(uint32_t baseFlags
) {
801 if constexpr (std::is_same_v
<CharT
, char16_t
>) {
805 return baseFlags
| JSString::LATIN1_CHARS_BIT
;
808 static bool UpdateNurseryBuffersOnTransfer(js::Nursery
& nursery
, JSString
* from
,
809 JSString
* to
, void* buffer
,
811 // Update the list of buffers associated with nursery cells when |buffer| is
812 // moved from string |from| to string |to|, depending on whether those strings
813 // are in the nursery or not.
815 if (from
->isTenured() && !to
->isTenured()) {
816 // Tenured leftmost child is giving its chars buffer to the
817 // nursery-allocated root node.
818 if (!nursery
.registerMallocedBuffer(buffer
, size
)) {
821 } else if (!from
->isTenured() && to
->isTenured()) {
822 // Leftmost child is giving its nursery-held chars buffer to a
824 nursery
.removeMallocedBuffer(buffer
, size
);
830 static bool CanReuseLeftmostBuffer(JSString
* leftmostChild
, size_t wholeLength
,
831 bool hasTwoByteChars
) {
832 if (!leftmostChild
->isExtensible()) {
836 JSExtensibleString
& str
= leftmostChild
->asExtensible();
837 return str
.capacity() >= wholeLength
&&
838 str
.hasTwoByteChars() == hasTwoByteChars
;
841 JSLinearString
* JSRope::flatten(JSContext
* maybecx
) {
842 mozilla::Maybe
<AutoGeckoProfilerEntry
> entry
;
844 entry
.emplace(maybecx
, "JSRope::flatten");
847 JSLinearString
* str
= flattenInternal();
848 if (!str
&& maybecx
) {
849 ReportOutOfMemory(maybecx
);
855 JSLinearString
* JSRope::flattenInternal() {
856 if (zone()->needsIncrementalBarrier()) {
857 return flattenInternal
<WithIncrementalBarrier
>();
860 return flattenInternal
<NoBarrier
>();
863 template <JSRope::UsingBarrier usingBarrier
>
864 JSLinearString
* JSRope::flattenInternal() {
865 if (hasTwoByteChars()) {
866 return flattenInternal
<usingBarrier
, char16_t
>(this);
869 return flattenInternal
<usingBarrier
, Latin1Char
>(this);
872 template <JSRope::UsingBarrier usingBarrier
, typename CharT
>
874 JSLinearString
* JSRope::flattenInternal(JSRope
* root
) {
876 * Consider the DAG of JSRopes rooted at |root|, with non-JSRopes as
877 * its leaves. Mutate the root JSRope into a JSExtensibleString containing
878 * the full flattened text that the root represents, and mutate all other
879 * JSRopes in the interior of the DAG into JSDependentStrings that refer to
880 * this new JSExtensibleString.
882 * If the leftmost leaf of our DAG is a JSExtensibleString, consider
883 * stealing its buffer for use in our new root, and transforming it into a
884 * JSDependentString too. Do not mutate any of the other leaves.
886 * Perform a depth-first dag traversal, splatting each node's characters
887 * into a contiguous buffer. Visit each rope node three times:
888 * 1. record position in the buffer and recurse into left child;
889 * 2. recurse into the right child;
890 * 3. transform the node into a dependent string.
891 * To avoid maintaining a stack, tree nodes are mutated to indicate how many
892 * times they have been visited. Since ropes can be dags, a node may be
893 * encountered multiple times during traversal. However, step 3 above leaves
894 * a valid dependent string, so everything works out.
896 * While ropes avoid all sorts of quadratic cases with string concatenation,
897 * they can't help when ropes are immediately flattened. One idiomatic case
898 * that we'd like to keep linear (and has traditionally been linear in SM
899 * and other JS engines) is:
906 * Two behaviors accomplish this:
908 * - When the leftmost non-rope in the DAG we're flattening is a
909 * JSExtensibleString with sufficient capacity to hold the entire
910 * flattened string, we just flatten the DAG into its buffer. Then, when
911 * we transform the root of the DAG from a JSRope into a
912 * JSExtensibleString, we steal that buffer, and change the victim from a
913 * JSExtensibleString to a JSDependentString. In this case, the left-hand
914 * side of the string never needs to be copied.
916 * - Otherwise, we round up the total flattened size and create a fresh
917 * JSExtensibleString with that much capacity. If this in turn becomes the
918 * leftmost leaf of a subsequent flatten, we will hopefully be able to
919 * fill it, as in the case above.
921 * Note that, even though the code for creating JSDependentStrings avoids
922 * creating dependents of dependents, we can create that situation here: the
923 * JSExtensibleStrings we transform into JSDependentStrings might have
924 * JSDependentStrings pointing to them already. Stealing the buffer doesn't
925 * change its address, only its owning JSExtensibleString, so all chars()
926 * pointers in the JSDependentStrings are still valid.
928 * This chain of dependent strings could be problematic if the base string
929 * moves, either because it was initially allocated in the nursery or it
930 * gets deduplicated, because you might have a dependent ->
931 * tenured dependent -> nursery base string, and the store buffer would
932 * only capture the latter edge. Prevent this case from happening by
933 * marking the root as nondeduplicatable if the extensible string
934 * optimization applied.
936 const size_t wholeLength
= root
->length();
937 size_t wholeCapacity
;
940 AutoCheckCannotGC nogc
;
942 Nursery
& nursery
= root
->runtimeFromMainThread()->gc
.nursery();
944 /* Find the left most string, containing the first string. */
945 JSRope
* leftmostRope
= root
;
946 while (leftmostRope
->leftChild()->isRope()) {
947 leftmostRope
= &leftmostRope
->leftChild()->asRope();
949 JSString
* leftmostChild
= leftmostRope
->leftChild();
951 bool reuseLeftmostBuffer
= CanReuseLeftmostBuffer(
952 leftmostChild
, wholeLength
, std::is_same_v
<CharT
, char16_t
>);
954 if (reuseLeftmostBuffer
) {
955 JSExtensibleString
& left
= leftmostChild
->asExtensible();
956 wholeCapacity
= left
.capacity();
957 wholeChars
= const_cast<CharT
*>(left
.nonInlineChars
<CharT
>(nogc
));
959 // Nursery::registerMallocedBuffer is fallible, so attempt it first before
960 // doing anything irreversible.
961 if (!UpdateNurseryBuffersOnTransfer(nursery
, &left
, root
, wholeChars
,
962 wholeCapacity
* sizeof(CharT
))) {
966 // If we can't reuse the leftmost child's buffer, allocate a new one.
967 if (!AllocCharsForFlatten(root
, wholeLength
, &wholeChars
, &wholeCapacity
)) {
971 if (!root
->isTenured()) {
972 if (!nursery
.registerMallocedBuffer(wholeChars
,
973 wholeCapacity
* sizeof(CharT
))) {
981 CharT
* pos
= wholeChars
;
983 JSRope
* parent
= nullptr;
984 uint32_t parentFlag
= 0;
987 MOZ_ASSERT_IF(str
!= root
, parent
&& parentFlag
);
988 MOZ_ASSERT(!str
->asRope().isBeingFlattened());
990 ropeBarrierDuringFlattening
<usingBarrier
>(str
);
992 JSString
& left
= *str
->d
.s
.u2
.left
;
993 str
->d
.s
.u2
.parent
= parent
;
994 str
->setFlagBit(parentFlag
);
999 /* Return to this node when 'left' done, then goto visit_right_child. */
1001 parentFlag
= FLATTEN_VISIT_RIGHT
;
1002 str
= &left
.asRope();
1003 goto first_visit_node
;
1005 if (!(reuseLeftmostBuffer
&& pos
== wholeChars
)) {
1006 CopyChars(pos
, left
.asLinear());
1008 pos
+= left
.length();
1011 visit_right_child
: {
1012 JSString
& right
= *str
->d
.s
.u3
.right
;
1013 if (right
.isRope()) {
1014 /* Return to this node when 'right' done, then goto finish_node. */
1016 parentFlag
= FLATTEN_FINISH_NODE
;
1017 str
= &right
.asRope();
1018 goto first_visit_node
;
1020 CopyChars(pos
, right
.asLinear());
1021 pos
+= right
.length();
1029 MOZ_ASSERT(pos
>= wholeChars
);
1030 CharT
* chars
= pos
- str
->length();
1031 JSRope
* strParent
= str
->d
.s
.u2
.parent
;
1032 str
->setNonInlineChars(chars
);
1034 MOZ_ASSERT(str
->asRope().isBeingFlattened());
1035 mozilla::DebugOnly
<bool> visitRight
= str
->flags() & FLATTEN_VISIT_RIGHT
;
1036 bool finishNode
= str
->flags() & FLATTEN_FINISH_NODE
;
1037 MOZ_ASSERT(visitRight
!= finishNode
);
1039 // This also clears the flags related to flattening.
1040 str
->setLengthAndFlags(str
->length(),
1041 StringFlagsForCharType
<CharT
>(INIT_DEPENDENT_FLAGS
));
1043 reinterpret_cast<JSLinearString
*>(root
); /* will be true on exit */
1045 // Every interior (rope) node in the rope's tree will be visited during
1046 // the traversal and post-barriered here, so earlier additions of
1047 // dependent.base -> root pointers are handled by this barrier as well.
1049 // The only time post-barriers need do anything is when the root is in
1050 // the nursery. Note that the root was a rope but will be an extensible
1051 // string when we return, so it will not point to any strings and need
1052 // not be barriered.
1053 if (str
->isTenured() && !root
->isTenured()) {
1054 root
->storeBuffer()->putWholeCell(str
);
1061 MOZ_ASSERT(visitRight
);
1062 goto visit_right_child
;
1066 // We traversed all the way back up to the root so we're finished.
1067 MOZ_ASSERT(str
== root
);
1068 MOZ_ASSERT(pos
== wholeChars
+ wholeLength
);
1070 root
->setLengthAndFlags(wholeLength
,
1071 StringFlagsForCharType
<CharT
>(EXTENSIBLE_FLAGS
));
1072 root
->setNonInlineChars(wholeChars
);
1073 root
->d
.s
.u3
.capacity
= wholeCapacity
;
1074 AddCellMemory(root
, root
->asLinear().allocSize(), MemoryUse::StringContents
);
1076 if (reuseLeftmostBuffer
) {
1077 // Remove memory association for left node we're about to make into a
1078 // dependent string.
1079 JSString
& left
= *leftmostChild
;
1080 RemoveCellMemory(&left
, left
.allocSize(), MemoryUse::StringContents
);
1082 uint32_t flags
= INIT_DEPENDENT_FLAGS
;
1083 if (left
.inStringToAtomCache()) {
1084 flags
|= IN_STRING_TO_ATOM_CACHE
;
1086 left
.setLengthAndFlags(left
.length(), StringFlagsForCharType
<CharT
>(flags
));
1087 left
.d
.s
.u3
.base
= &root
->asLinear();
1088 if (left
.isTenured() && !root
->isTenured()) {
1089 // leftmost child -> root is a tenured -> nursery edge. Put the leftmost
1090 // child in the store buffer and prevent the root's chars from moving or
1091 // being freed (because the leftmost child may have a tenured dependent
1092 // string that cannot be updated.)
1093 root
->storeBuffer()->putWholeCell(&left
);
1094 root
->setNonDeduplicatable();
1098 return &root
->asLinear();
1101 template <JSRope::UsingBarrier usingBarrier
>
1103 inline void JSRope::ropeBarrierDuringFlattening(JSRope
* rope
) {
1104 MOZ_ASSERT(!rope
->isBeingFlattened());
1105 if constexpr (usingBarrier
) {
1106 gc::PreWriteBarrierDuringFlattening(rope
->leftChild());
1107 gc::PreWriteBarrierDuringFlattening(rope
->rightChild());
1111 template <AllowGC allowGC
>
1112 static JSLinearString
* EnsureLinear(
1114 typename MaybeRooted
<JSString
*, allowGC
>::HandleType string
) {
1115 JSLinearString
* linear
= string
->ensureLinear(cx
);
1116 // Don't report an exception if GC is not allowed, just return nullptr.
1117 if (!linear
&& !allowGC
) {
1118 cx
->recoverFromOutOfMemory();
1123 template <AllowGC allowGC
>
1124 JSString
* js::ConcatStrings(
1125 JSContext
* cx
, typename MaybeRooted
<JSString
*, allowGC
>::HandleType left
,
1126 typename MaybeRooted
<JSString
*, allowGC
>::HandleType right
, gc::Heap heap
) {
1127 MOZ_ASSERT_IF(!left
->isAtom(), cx
->isInsideCurrentZone(left
));
1128 MOZ_ASSERT_IF(!right
->isAtom(), cx
->isInsideCurrentZone(right
));
1130 size_t leftLen
= left
->length();
1135 size_t rightLen
= right
->length();
1136 if (rightLen
== 0) {
1140 size_t wholeLength
= leftLen
+ rightLen
;
1141 if (MOZ_UNLIKELY(wholeLength
> JSString::MAX_LENGTH
)) {
1142 // Don't report an exception if GC is not allowed, just return nullptr.
1144 js::ReportOversizedAllocation(cx
, JSMSG_ALLOC_OVERFLOW
);
1149 bool isLatin1
= left
->hasLatin1Chars() && right
->hasLatin1Chars();
1150 bool canUseInline
= isLatin1
1151 ? JSInlineString::lengthFits
<Latin1Char
>(wholeLength
)
1152 : JSInlineString::lengthFits
<char16_t
>(wholeLength
);
1154 Latin1Char
* latin1Buf
= nullptr; // initialize to silence GCC warning
1155 char16_t
* twoByteBuf
= nullptr; // initialize to silence GCC warning
1156 JSInlineString
* str
=
1158 ? AllocateInlineString
<allowGC
>(cx
, wholeLength
, &latin1Buf
, heap
)
1159 : AllocateInlineString
<allowGC
>(cx
, wholeLength
, &twoByteBuf
, heap
);
1164 AutoCheckCannotGC nogc
;
1165 JSLinearString
* leftLinear
= EnsureLinear
<allowGC
>(cx
, left
);
1169 JSLinearString
* rightLinear
= EnsureLinear
<allowGC
>(cx
, right
);
1175 PodCopy(latin1Buf
, leftLinear
->latin1Chars(nogc
), leftLen
);
1176 PodCopy(latin1Buf
+ leftLen
, rightLinear
->latin1Chars(nogc
), rightLen
);
1178 if (leftLinear
->hasTwoByteChars()) {
1179 PodCopy(twoByteBuf
, leftLinear
->twoByteChars(nogc
), leftLen
);
1181 CopyAndInflateChars(twoByteBuf
, leftLinear
->latin1Chars(nogc
), leftLen
);
1183 if (rightLinear
->hasTwoByteChars()) {
1184 PodCopy(twoByteBuf
+ leftLen
, rightLinear
->twoByteChars(nogc
),
1187 CopyAndInflateChars(twoByteBuf
+ leftLen
,
1188 rightLinear
->latin1Chars(nogc
), rightLen
);
1195 return JSRope::new_
<allowGC
>(cx
, left
, right
, wholeLength
, heap
);
1198 template JSString
* js::ConcatStrings
<CanGC
>(JSContext
* cx
, HandleString left
,
1199 HandleString right
, gc::Heap heap
);
1201 template JSString
* js::ConcatStrings
<NoGC
>(JSContext
* cx
, JSString
* const& left
,
1202 JSString
* const& right
,
1205 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
1206 void JSDependentString::dumpOwnRepresentationFields(
1207 js::JSONPrinter
& json
) const {
1208 json
.property("baseOffset", baseOffset());
1209 json
.beginObjectProperty("base");
1210 base()->dumpRepresentationFields(json
);
1215 bool js::EqualChars(const JSLinearString
* str1
, const JSLinearString
* str2
) {
1216 // Assert this isn't called for strings the caller should handle with a fast
1218 MOZ_ASSERT(str1
->length() == str2
->length());
1219 MOZ_ASSERT(str1
!= str2
);
1220 MOZ_ASSERT(!str1
->isAtom() || !str2
->isAtom());
1222 size_t len
= str1
->length();
1224 AutoCheckCannotGC nogc
;
1225 if (str1
->hasTwoByteChars()) {
1226 if (str2
->hasTwoByteChars()) {
1227 return EqualChars(str1
->twoByteChars(nogc
), str2
->twoByteChars(nogc
),
1231 return EqualChars(str2
->latin1Chars(nogc
), str1
->twoByteChars(nogc
), len
);
1234 if (str2
->hasLatin1Chars()) {
1235 return EqualChars(str1
->latin1Chars(nogc
), str2
->latin1Chars(nogc
), len
);
1238 return EqualChars(str1
->latin1Chars(nogc
), str2
->twoByteChars(nogc
), len
);
1241 bool js::HasSubstringAt(JSLinearString
* text
, JSLinearString
* pat
,
1243 MOZ_ASSERT(start
+ pat
->length() <= text
->length());
1245 size_t patLen
= pat
->length();
1247 AutoCheckCannotGC nogc
;
1248 if (text
->hasLatin1Chars()) {
1249 const Latin1Char
* textChars
= text
->latin1Chars(nogc
) + start
;
1250 if (pat
->hasLatin1Chars()) {
1251 return EqualChars(textChars
, pat
->latin1Chars(nogc
), patLen
);
1254 return EqualChars(textChars
, pat
->twoByteChars(nogc
), patLen
);
1257 const char16_t
* textChars
= text
->twoByteChars(nogc
) + start
;
1258 if (pat
->hasTwoByteChars()) {
1259 return EqualChars(textChars
, pat
->twoByteChars(nogc
), patLen
);
1262 return EqualChars(pat
->latin1Chars(nogc
), textChars
, patLen
);
1265 bool js::EqualStrings(JSContext
* cx
, JSString
* str1
, JSString
* str2
,
1271 if (str1
->length() != str2
->length()) {
1275 if (str1
->isAtom() && str2
->isAtom()) {
1280 JSLinearString
* linear1
= str1
->ensureLinear(cx
);
1284 JSLinearString
* linear2
= str2
->ensureLinear(cx
);
1289 *result
= EqualChars(linear1
, linear2
);
1293 bool js::EqualStrings(const JSLinearString
* str1
, const JSLinearString
* str2
) {
1297 if (str1
->length() != str2
->length()) {
1300 if (str1
->isAtom() && str2
->isAtom()) {
1303 return EqualChars(str1
, str2
);
1306 int32_t js::CompareChars(const char16_t
* s1
, size_t len1
, JSLinearString
* s2
) {
1307 AutoCheckCannotGC nogc
;
1308 return s2
->hasLatin1Chars()
1309 ? CompareChars(s1
, len1
, s2
->latin1Chars(nogc
), s2
->length())
1310 : CompareChars(s1
, len1
, s2
->twoByteChars(nogc
), s2
->length());
1313 static int32_t CompareStringsImpl(const JSLinearString
* str1
,
1314 const JSLinearString
* str2
) {
1315 size_t len1
= str1
->length();
1316 size_t len2
= str2
->length();
1318 AutoCheckCannotGC nogc
;
1319 if (str1
->hasLatin1Chars()) {
1320 const Latin1Char
* chars1
= str1
->latin1Chars(nogc
);
1321 return str2
->hasLatin1Chars()
1322 ? CompareChars(chars1
, len1
, str2
->latin1Chars(nogc
), len2
)
1323 : CompareChars(chars1
, len1
, str2
->twoByteChars(nogc
), len2
);
1326 const char16_t
* chars1
= str1
->twoByteChars(nogc
);
1327 return str2
->hasLatin1Chars()
1328 ? CompareChars(chars1
, len1
, str2
->latin1Chars(nogc
), len2
)
1329 : CompareChars(chars1
, len1
, str2
->twoByteChars(nogc
), len2
);
1332 bool js::CompareStrings(JSContext
* cx
, JSString
* str1
, JSString
* str2
,
1342 JSLinearString
* linear1
= str1
->ensureLinear(cx
);
1347 JSLinearString
* linear2
= str2
->ensureLinear(cx
);
1352 *result
= CompareStringsImpl(linear1
, linear2
);
1356 int32_t js::CompareStrings(const JSLinearString
* str1
,
1357 const JSLinearString
* str2
) {
1364 return CompareStringsImpl(str1
, str2
);
1367 bool js::StringIsAscii(JSLinearString
* str
) {
1368 JS::AutoCheckCannotGC nogc
;
1369 if (str
->hasLatin1Chars()) {
1370 return mozilla::IsAscii(
1371 AsChars(Span(str
->latin1Chars(nogc
), str
->length())));
1373 return mozilla::IsAscii(Span(str
->twoByteChars(nogc
), str
->length()));
1376 bool js::StringEqualsAscii(JSLinearString
* str
, const char* asciiBytes
) {
1377 return StringEqualsAscii(str
, asciiBytes
, strlen(asciiBytes
));
1380 bool js::StringEqualsAscii(JSLinearString
* str
, const char* asciiBytes
,
1382 MOZ_ASSERT(JS::StringIsASCII(Span(asciiBytes
, length
)));
1384 if (length
!= str
->length()) {
1388 const Latin1Char
* latin1
= reinterpret_cast<const Latin1Char
*>(asciiBytes
);
1390 AutoCheckCannotGC nogc
;
1391 return str
->hasLatin1Chars()
1392 ? EqualChars(latin1
, str
->latin1Chars(nogc
), length
)
1393 : EqualChars(latin1
, str
->twoByteChars(nogc
), length
);
1396 template <typename CharT
>
1397 bool js::CheckStringIsIndex(const CharT
* s
, size_t length
, uint32_t* indexp
) {
1398 MOZ_ASSERT(length
> 0);
1399 MOZ_ASSERT(length
<= UINT32_CHAR_BUFFER_LENGTH
);
1400 MOZ_ASSERT(IsAsciiDigit(*s
),
1401 "caller's fast path must have checked first char");
1403 RangedPtr
<const CharT
> cp(s
, length
);
1404 const RangedPtr
<const CharT
> end(s
+ length
, s
, length
);
1406 uint32_t index
= AsciiDigitToNumber(*cp
++);
1407 uint32_t oldIndex
= 0;
1411 // Consume remaining characters only if the first character isn't '0'.
1412 while (cp
< end
&& IsAsciiDigit(*cp
)) {
1414 c
= AsciiDigitToNumber(*cp
);
1415 index
= 10 * index
+ c
;
1420 // It's not an integer index if there are characters after the number.
1425 // Look out for "4294967295" and larger-number strings that fit in
1426 // UINT32_CHAR_BUFFER_LENGTH: only unsigned 32-bit integers less than or equal
1427 // to MAX_ARRAY_INDEX shall pass.
1428 if (oldIndex
< MAX_ARRAY_INDEX
/ 10 ||
1429 (oldIndex
== MAX_ARRAY_INDEX
/ 10 && c
<= (MAX_ARRAY_INDEX
% 10))) {
1430 MOZ_ASSERT(index
<= MAX_ARRAY_INDEX
);
1438 template bool js::CheckStringIsIndex(const Latin1Char
* s
, size_t length
,
1440 template bool js::CheckStringIsIndex(const char16_t
* s
, size_t length
,
1443 template <typename CharT
>
1444 static uint32_t AtomCharsToIndex(const CharT
* s
, size_t length
) {
1445 // Chars are known to be a valid index value (as determined by
1446 // CheckStringIsIndex) that didn't fit in the "index value" bits in the
1449 MOZ_ASSERT(length
> 0);
1450 MOZ_ASSERT(length
<= UINT32_CHAR_BUFFER_LENGTH
);
1452 RangedPtr
<const CharT
> cp(s
, length
);
1453 const RangedPtr
<const CharT
> end(s
+ length
, s
, length
);
1455 MOZ_ASSERT(IsAsciiDigit(*cp
));
1456 uint32_t index
= AsciiDigitToNumber(*cp
++);
1457 MOZ_ASSERT(index
!= 0);
1460 MOZ_ASSERT(IsAsciiDigit(*cp
));
1461 index
= 10 * index
+ AsciiDigitToNumber(*cp
);
1465 MOZ_ASSERT(index
<= MAX_ARRAY_INDEX
);
1469 uint32_t JSAtom::getIndexSlow() const {
1470 MOZ_ASSERT(isIndex());
1471 MOZ_ASSERT(!hasIndexValue());
1473 size_t len
= length();
1475 AutoCheckCannotGC nogc
;
1476 return hasLatin1Chars() ? AtomCharsToIndex(latin1Chars(nogc
), len
)
1477 : AtomCharsToIndex(twoByteChars(nogc
), len
);
1480 // Prevent the actual owner of the string's characters from being deduplicated
1481 // (and thus freeing its characters, which would invalidate the ASSC's chars
1482 // pointer). Intermediate dependent strings on the chain can be deduplicated,
1483 // since the base will be updated to the root base during tenuring anyway and
1484 // the intermediates won't matter.
1485 void PreventRootBaseDeduplication(JSLinearString
* s
) {
1486 while (s
->hasBase()) {
1489 if (!s
->isTenured()) {
1490 s
->setNonDeduplicatable();
1494 bool AutoStableStringChars::init(JSContext
* cx
, JSString
* s
) {
1495 Rooted
<JSLinearString
*> linearString(cx
, s
->ensureLinear(cx
));
1496 if (!linearString
) {
1500 MOZ_ASSERT(state_
== Uninitialized
);
1502 // Inline and nursery-allocated chars may move during a GC, so copy them
1503 // out into a temporary malloced buffer. Note that we cannot update the
1504 // string itself with a malloced buffer, because there may be dependent
1505 // strings that are using the original chars.
1506 if (linearString
->hasMovableChars()) {
1507 return linearString
->hasTwoByteChars() ? copyTwoByteChars(cx
, linearString
)
1508 : copyLatin1Chars(cx
, linearString
);
1511 if (linearString
->hasLatin1Chars()) {
1513 latin1Chars_
= linearString
->rawLatin1Chars();
1516 twoByteChars_
= linearString
->rawTwoByteChars();
1519 PreventRootBaseDeduplication(linearString
);
1525 bool AutoStableStringChars::initTwoByte(JSContext
* cx
, JSString
* s
) {
1526 Rooted
<JSLinearString
*> linearString(cx
, s
->ensureLinear(cx
));
1527 if (!linearString
) {
1531 MOZ_ASSERT(state_
== Uninitialized
);
1533 if (linearString
->hasLatin1Chars()) {
1534 return copyAndInflateLatin1Chars(cx
, linearString
);
1537 // Copy movable chars since they may be moved by GC (see above).
1538 if (linearString
->hasMovableChars()) {
1539 return copyTwoByteChars(cx
, linearString
);
1543 twoByteChars_
= linearString
->rawTwoByteChars();
1545 PreventRootBaseDeduplication(linearString
);
1551 template <typename T
>
1552 T
* AutoStableStringChars::allocOwnChars(JSContext
* cx
, size_t count
) {
1555 sizeof(JS::Latin1Char
) * JSFatInlineString::MAX_LENGTH_LATIN1
&&
1557 sizeof(char16_t
) * JSFatInlineString::MAX_LENGTH_TWO_BYTE
,
1558 "InlineCapacity too small to hold fat inline strings");
1560 static_assert((JSString::MAX_LENGTH
&
1561 mozilla::tl::MulOverflowMask
<sizeof(T
)>::value
) == 0,
1562 "Size calculation can overflow");
1563 MOZ_ASSERT(count
<= JSString::MAX_LENGTH
);
1564 size_t size
= sizeof(T
) * count
;
1566 ownChars_
.emplace(cx
);
1567 if (!ownChars_
->resize(size
)) {
1572 return reinterpret_cast<T
*>(ownChars_
->begin());
1575 bool AutoStableStringChars::copyAndInflateLatin1Chars(
1576 JSContext
* cx
, Handle
<JSLinearString
*> linearString
) {
1577 size_t length
= linearString
->length();
1578 char16_t
* chars
= allocOwnChars
<char16_t
>(cx
, length
);
1583 // Copy |src[0..length]| to |dest[0..length]| when copying doesn't narrow and
1584 // therefore can't lose information.
1585 auto src
= AsChars(Span(linearString
->rawLatin1Chars(), length
));
1586 auto dest
= Span(chars
, length
);
1587 ConvertLatin1toUtf16(src
, dest
);
1590 twoByteChars_
= chars
;
1595 bool AutoStableStringChars::copyLatin1Chars(
1596 JSContext
* cx
, Handle
<JSLinearString
*> linearString
) {
1597 size_t length
= linearString
->length();
1598 JS::Latin1Char
* chars
= allocOwnChars
<JS::Latin1Char
>(cx
, length
);
1603 PodCopy(chars
, linearString
->rawLatin1Chars(), length
);
1606 latin1Chars_
= chars
;
1611 bool AutoStableStringChars::copyTwoByteChars(
1612 JSContext
* cx
, Handle
<JSLinearString
*> linearString
) {
1613 size_t length
= linearString
->length();
1614 char16_t
* chars
= allocOwnChars
<char16_t
>(cx
, length
);
1619 PodCopy(chars
, linearString
->rawTwoByteChars(), length
);
1622 twoByteChars_
= chars
;
1628 bool JS::SourceText
<char16_t
>::initMaybeBorrowed(
1629 JSContext
* cx
, JS::AutoStableStringChars
& linearChars
) {
1630 MOZ_ASSERT(linearChars
.isTwoByte(),
1631 "AutoStableStringChars must be initialized with char16_t");
1633 const char16_t
* chars
= linearChars
.twoByteChars();
1634 size_t length
= linearChars
.length();
1635 JS::SourceOwnership ownership
= linearChars
.maybeGiveOwnershipToCaller()
1636 ? JS::SourceOwnership::TakeOwnership
1637 : JS::SourceOwnership::Borrowed
;
1638 return initImpl(cx
, chars
, length
, ownership
);
1642 bool JS::SourceText
<char16_t
>::initMaybeBorrowed(
1643 JS::FrontendContext
* fc
, JS::AutoStableStringChars
& linearChars
) {
1644 MOZ_ASSERT(linearChars
.isTwoByte(),
1645 "AutoStableStringChars must be initialized with char16_t");
1647 const char16_t
* chars
= linearChars
.twoByteChars();
1648 size_t length
= linearChars
.length();
1649 JS::SourceOwnership ownership
= linearChars
.maybeGiveOwnershipToCaller()
1650 ? JS::SourceOwnership::TakeOwnership
1651 : JS::SourceOwnership::Borrowed
;
1652 return initImpl(fc
, chars
, length
, ownership
);
1655 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
1656 void JSAtom::dump(js::GenericPrinter
& out
) {
1657 out
.printf("JSAtom* (%p) = ", (void*)this);
1658 this->JSString::dump(out
);
1661 void JSAtom::dump() {
1662 Fprinter
out(stderr
);
1666 void JSExternalString::dumpOwnRepresentationFields(
1667 js::JSONPrinter
& json
) const {
1668 json
.formatProperty("callbacks", "(JSExternalStringCallbacks*)0x%p",
1671 #endif /* defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) */
1673 JSLinearString
* js::NewDependentString(JSContext
* cx
, JSString
* baseArg
,
1674 size_t start
, size_t length
,
1677 return cx
->emptyString();
1680 JSLinearString
* base
= baseArg
->ensureLinear(cx
);
1685 if (start
== 0 && length
== base
->length()) {
1690 if (base
->hasTwoByteChars()) {
1691 AutoCheckCannotGC nogc
;
1692 const char16_t
* chars
= base
->twoByteChars(nogc
) + start
;
1693 if (JSLinearString
* staticStr
= cx
->staticStrings().lookup(chars
, length
)) {
1696 useInline
= JSInlineString::lengthFits
<char16_t
>(length
);
1698 AutoCheckCannotGC nogc
;
1699 const Latin1Char
* chars
= base
->latin1Chars(nogc
) + start
;
1700 if (JSLinearString
* staticStr
= cx
->staticStrings().lookup(chars
, length
)) {
1703 useInline
= JSInlineString::lengthFits
<Latin1Char
>(length
);
1707 Rooted
<JSLinearString
*> rootedBase(cx
, base
);
1709 // Do not create a dependent string that would fit into an inline string.
1710 // First, that could create a string dependent on an inline base string's
1711 // chars, which would be an awkward moving-GC hazard. Second, this makes
1712 // it more likely to have a very short string keep a very long string alive.
1713 if (base
->hasTwoByteChars()) {
1714 return NewInlineString
<char16_t
>(cx
, rootedBase
, start
, length
, heap
);
1716 return NewInlineString
<Latin1Char
>(cx
, rootedBase
, start
, length
, heap
);
1719 return JSDependentString::new_(cx
, base
, start
, length
, heap
);
1722 static constexpr bool CanStoreCharsAsLatin1(const JS::Latin1Char
* s
,
1727 static inline bool CanStoreCharsAsLatin1(const char16_t
* s
, size_t length
) {
1728 return IsUtf16Latin1(Span(s
, length
));
1732 * Copy |src[0..length]| to |dest[0..length]| when copying *does* narrow, but
1733 * the user guarantees every runtime |src[i]| value can be stored without change
1734 * of value in |dest[i]|.
1736 static inline void FillFromCompatible(unsigned char* dest
, const char16_t
* src
,
1738 LossyConvertUtf16toLatin1(Span(src
, length
),
1739 AsWritableChars(Span(dest
, length
)));
1742 template <AllowGC allowGC
>
1743 static MOZ_ALWAYS_INLINE JSInlineString
* NewInlineStringDeflated(
1744 JSContext
* cx
, const mozilla::Range
<const char16_t
>& chars
,
1745 gc::Heap heap
= gc::Heap::Default
) {
1746 size_t len
= chars
.length();
1747 Latin1Char
* storage
;
1748 JSInlineString
* str
= AllocateInlineString
<allowGC
>(cx
, len
, &storage
, heap
);
1753 MOZ_ASSERT(CanStoreCharsAsLatin1(chars
.begin().get(), len
));
1754 FillFromCompatible(storage
, chars
.begin().get(), len
);
1758 template <AllowGC allowGC
>
1759 static JSLinearString
* NewStringDeflated(JSContext
* cx
, const char16_t
* s
,
1760 size_t n
, gc::Heap heap
) {
1761 if (JSLinearString
* str
= TryEmptyOrStaticString(cx
, s
, n
)) {
1765 if (JSInlineString::lengthFits
<Latin1Char
>(n
)) {
1766 return NewInlineStringDeflated
<allowGC
>(
1767 cx
, mozilla::Range
<const char16_t
>(s
, n
), heap
);
1770 JS::Rooted
<JSString::OwnedChars
<Latin1Char
>> news(
1771 cx
, AllocChars
<Latin1Char
>(cx
, n
, heap
));
1774 cx
->recoverFromOutOfMemory();
1779 MOZ_ASSERT(CanStoreCharsAsLatin1(s
, n
));
1780 FillFromCompatible(news
.data(), s
, n
);
1782 return JSLinearString::new_
<allowGC
, Latin1Char
>(cx
, &news
, heap
);
1785 static MOZ_ALWAYS_INLINE JSAtom
* NewInlineAtomDeflated(JSContext
* cx
,
1786 const char16_t
* chars
,
1788 js::HashNumber hash
) {
1789 Latin1Char
* storage
;
1790 JSAtom
* str
= AllocateInlineAtom(cx
, length
, &storage
, hash
);
1795 MOZ_ASSERT(CanStoreCharsAsLatin1(chars
, length
));
1796 FillFromCompatible(storage
, chars
, length
);
1800 static JSAtom
* NewAtomDeflatedValidLength(JSContext
* cx
, const char16_t
* s
,
1801 size_t n
, js::HashNumber hash
) {
1802 if (JSAtom::lengthFitsInline
<Latin1Char
>(n
)) {
1803 return NewInlineAtomDeflated(cx
, s
, n
, hash
);
1806 auto news
= cx
->make_pod_arena_array
<Latin1Char
>(js::StringBufferArena
, n
);
1808 cx
->recoverFromOutOfMemory();
1812 MOZ_ASSERT(CanStoreCharsAsLatin1(s
, n
));
1813 FillFromCompatible(news
.get(), s
, n
);
1815 return JSAtom::newValidLength(cx
, std::move(news
), n
, hash
);
1818 template <AllowGC allowGC
, typename CharT
>
1819 JSLinearString
* js::NewStringDontDeflate(
1820 JSContext
* cx
, UniquePtr
<CharT
[], JS::FreePolicy
> chars
, size_t length
,
1822 if (JSLinearString
* str
= TryEmptyOrStaticString(cx
, chars
.get(), length
)) {
1826 if (JSInlineString::lengthFits
<CharT
>(length
)) {
1827 // |chars.get()| is safe because 1) |NewInlineString| necessarily *copies*,
1828 // and 2) |chars| frees its contents only when this function returns.
1829 return NewInlineString
<allowGC
>(
1830 cx
, mozilla::Range
<const CharT
>(chars
.get(), length
), heap
);
1833 JS::Rooted
<JSString::OwnedChars
<CharT
>> ownedChars(cx
, std::move(chars
),
1835 return JSLinearString::new_
<allowGC
, CharT
>(cx
, &ownedChars
, heap
);
1838 template JSLinearString
* js::NewStringDontDeflate
<CanGC
>(
1839 JSContext
* cx
, UniqueTwoByteChars chars
, size_t length
, gc::Heap heap
);
1841 template JSLinearString
* js::NewStringDontDeflate
<NoGC
>(
1842 JSContext
* cx
, UniqueTwoByteChars chars
, size_t length
, gc::Heap heap
);
1844 template JSLinearString
* js::NewStringDontDeflate
<CanGC
>(
1845 JSContext
* cx
, UniqueLatin1Chars chars
, size_t length
, gc::Heap heap
);
1847 template JSLinearString
* js::NewStringDontDeflate
<NoGC
>(JSContext
* cx
,
1848 UniqueLatin1Chars chars
,
1852 template <AllowGC allowGC
, typename CharT
>
1853 JSLinearString
* js::NewString(JSContext
* cx
,
1854 UniquePtr
<CharT
[], JS::FreePolicy
> chars
,
1855 size_t length
, gc::Heap heap
) {
1856 if constexpr (std::is_same_v
<CharT
, char16_t
>) {
1857 if (CanStoreCharsAsLatin1(chars
.get(), length
)) {
1858 // Deflating copies from |chars.get()| and lets |chars| be freed on
1860 return NewStringDeflated
<allowGC
>(cx
, chars
.get(), length
, heap
);
1864 return NewStringDontDeflate
<allowGC
>(cx
, std::move(chars
), length
, heap
);
1867 template JSLinearString
* js::NewString
<CanGC
>(JSContext
* cx
,
1868 UniqueTwoByteChars chars
,
1869 size_t length
, gc::Heap heap
);
1871 template JSLinearString
* js::NewString
<NoGC
>(JSContext
* cx
,
1872 UniqueTwoByteChars chars
,
1873 size_t length
, gc::Heap heap
);
1875 template JSLinearString
* js::NewString
<CanGC
>(JSContext
* cx
,
1876 UniqueLatin1Chars chars
,
1877 size_t length
, gc::Heap heap
);
1879 template JSLinearString
* js::NewString
<NoGC
>(JSContext
* cx
,
1880 UniqueLatin1Chars chars
,
1881 size_t length
, gc::Heap heap
);
1885 template <AllowGC allowGC
, typename CharT
>
1886 JSLinearString
* NewStringCopyNDontDeflateNonStaticValidLength(JSContext
* cx
,
1890 if (JSInlineString::lengthFits
<CharT
>(n
)) {
1891 return NewInlineString
<allowGC
>(cx
, mozilla::Range
<const CharT
>(s
, n
),
1895 Rooted
<JSString::OwnedChars
<CharT
>> news(cx
,
1896 ::AllocChars
<CharT
>(cx
, n
, heap
));
1899 cx
->recoverFromOutOfMemory();
1904 PodCopy(news
.data(), s
, n
);
1906 return JSLinearString::newValidLength
<allowGC
, CharT
>(cx
, &news
, heap
);
1909 template JSLinearString
* NewStringCopyNDontDeflateNonStaticValidLength
<CanGC
>(
1910 JSContext
* cx
, const char16_t
* s
, size_t n
, gc::Heap heap
);
1912 template JSLinearString
* NewStringCopyNDontDeflateNonStaticValidLength
<CanGC
>(
1913 JSContext
* cx
, const Latin1Char
* s
, size_t n
, gc::Heap heap
);
1915 template <AllowGC allowGC
, typename CharT
>
1916 JSLinearString
* NewStringCopyNDontDeflate(JSContext
* cx
, const CharT
* s
,
1917 size_t n
, gc::Heap heap
) {
1918 if (JSLinearString
* str
= TryEmptyOrStaticString(cx
, s
, n
)) {
1922 if (MOZ_UNLIKELY(!JSLinearString::validateLength(cx
, n
))) {
1926 return NewStringCopyNDontDeflateNonStaticValidLength
<allowGC
>(cx
, s
, n
, heap
);
1929 template JSLinearString
* NewStringCopyNDontDeflate
<CanGC
>(JSContext
* cx
,
1934 template JSLinearString
* NewStringCopyNDontDeflate
<NoGC
>(JSContext
* cx
,
1939 template JSLinearString
* NewStringCopyNDontDeflate
<CanGC
>(JSContext
* cx
,
1940 const Latin1Char
* s
,
1944 template JSLinearString
* NewStringCopyNDontDeflate
<NoGC
>(JSContext
* cx
,
1945 const Latin1Char
* s
,
1949 JSLinearString
* NewLatin1StringZ(JSContext
* cx
, UniqueChars chars
,
1951 size_t length
= strlen(chars
.get());
1952 UniqueLatin1Chars
latin1(reinterpret_cast<Latin1Char
*>(chars
.release()));
1953 return NewString
<CanGC
>(cx
, std::move(latin1
), length
, heap
);
1956 template <AllowGC allowGC
, typename CharT
>
1957 JSLinearString
* NewStringCopyN(JSContext
* cx
, const CharT
* s
, size_t n
,
1959 if constexpr (std::is_same_v
<CharT
, char16_t
>) {
1960 if (CanStoreCharsAsLatin1(s
, n
)) {
1961 return NewStringDeflated
<allowGC
>(cx
, s
, n
, heap
);
1965 return NewStringCopyNDontDeflate
<allowGC
>(cx
, s
, n
, heap
);
1968 template JSLinearString
* NewStringCopyN
<CanGC
>(JSContext
* cx
, const char16_t
* s
,
1969 size_t n
, gc::Heap heap
);
1971 template JSLinearString
* NewStringCopyN
<NoGC
>(JSContext
* cx
, const char16_t
* s
,
1972 size_t n
, gc::Heap heap
);
1974 template JSLinearString
* NewStringCopyN
<CanGC
>(JSContext
* cx
,
1975 const Latin1Char
* s
, size_t n
,
1978 template JSLinearString
* NewStringCopyN
<NoGC
>(JSContext
* cx
,
1979 const Latin1Char
* s
, size_t n
,
1982 template <typename CharT
>
1983 JSAtom
* NewAtomCopyNDontDeflateValidLength(JSContext
* cx
, const CharT
* s
,
1984 size_t n
, js::HashNumber hash
) {
1985 if constexpr (std::is_same_v
<CharT
, char16_t
>) {
1986 MOZ_ASSERT(!CanStoreCharsAsLatin1(s
, n
));
1989 if (JSAtom::lengthFitsInline
<CharT
>(n
)) {
1990 return NewInlineAtom(cx
, s
, n
, hash
);
1993 auto news
= cx
->make_pod_arena_array
<CharT
>(js::StringBufferArena
, n
);
1995 cx
->recoverFromOutOfMemory();
1999 PodCopy(news
.get(), s
, n
);
2001 return JSAtom::newValidLength(cx
, std::move(news
), n
, hash
);
2004 template JSAtom
* NewAtomCopyNDontDeflateValidLength(JSContext
* cx
,
2005 const char16_t
* s
, size_t n
,
2006 js::HashNumber hash
);
2008 template JSAtom
* NewAtomCopyNDontDeflateValidLength(JSContext
* cx
,
2009 const Latin1Char
* s
,
2011 js::HashNumber hash
);
2013 template <typename CharT
>
2014 JSAtom
* NewAtomCopyNMaybeDeflateValidLength(JSContext
* cx
, const CharT
* s
,
2015 size_t n
, js::HashNumber hash
) {
2016 if constexpr (std::is_same_v
<CharT
, char16_t
>) {
2017 if (CanStoreCharsAsLatin1(s
, n
)) {
2018 return NewAtomDeflatedValidLength(cx
, s
, n
, hash
);
2022 return NewAtomCopyNDontDeflateValidLength(cx
, s
, n
, hash
);
2025 template JSAtom
* NewAtomCopyNMaybeDeflateValidLength(JSContext
* cx
,
2028 js::HashNumber hash
);
2030 template JSAtom
* NewAtomCopyNMaybeDeflateValidLength(JSContext
* cx
,
2031 const Latin1Char
* s
,
2033 js::HashNumber hash
);
2035 JSLinearString
* NewStringCopyUTF8N(JSContext
* cx
, const JS::UTF8Chars
& utf8
,
2036 JS::SmallestEncoding encoding
,
2038 if (encoding
== JS::SmallestEncoding::ASCII
) {
2039 return NewStringCopyN
<js::CanGC
>(cx
, utf8
.begin().get(), utf8
.length(),
2044 if (encoding
== JS::SmallestEncoding::Latin1
) {
2045 UniqueLatin1Chars
latin1(
2046 UTF8CharsToNewLatin1CharsZ(cx
, utf8
, &length
, js::StringBufferArena
)
2052 return NewString
<js::CanGC
>(cx
, std::move(latin1
), length
, heap
);
2055 MOZ_ASSERT(encoding
== JS::SmallestEncoding::UTF16
);
2057 UniqueTwoByteChars
utf16(
2058 UTF8CharsToNewTwoByteCharsZ(cx
, utf8
, &length
, js::StringBufferArena
)
2064 return NewString
<js::CanGC
>(cx
, std::move(utf16
), length
, heap
);
2067 JSLinearString
* NewStringCopyUTF8N(JSContext
* cx
, const JS::UTF8Chars
& utf8
,
2069 JS::SmallestEncoding encoding
= JS::FindSmallestEncoding(utf8
);
2070 return NewStringCopyUTF8N(cx
, utf8
, encoding
, heap
);
2073 template <typename CharT
>
2074 MOZ_ALWAYS_INLINE JSExternalString
* ExternalStringCache::lookupExternalImpl(
2075 const CharT
* chars
, size_t len
) const {
2076 AutoCheckCannotGC nogc
;
2078 for (size_t i
= 0; i
< NumEntries
; i
++) {
2079 JSExternalString
* str
= externalEntries_
[i
];
2080 if (!str
|| str
->length() != len
) {
2084 if constexpr (std::is_same_v
<CharT
, JS::Latin1Char
>) {
2085 if (!str
->hasLatin1Chars()) {
2089 if (!str
->hasTwoByteChars()) {
2094 const CharT
* strChars
= str
->nonInlineChars
<CharT
>(nogc
);
2095 if (chars
== strChars
) {
2096 // Note that we don't need an incremental barrier here or below.
2097 // The cache is purged on GC so any string we get from the cache
2098 // must have been allocated after the GC started.
2102 // Compare the chars. Don't do this for long strings as it will be
2103 // faster to allocate a new external string.
2104 static const size_t MaxLengthForCharComparison
= 100;
2105 if (len
<= MaxLengthForCharComparison
&& EqualChars(chars
, strChars
, len
)) {
2113 MOZ_ALWAYS_INLINE JSExternalString
* ExternalStringCache::lookupExternal(
2114 const JS::Latin1Char
* chars
, size_t len
) const {
2115 return lookupExternalImpl(chars
, len
);
2117 MOZ_ALWAYS_INLINE JSExternalString
* ExternalStringCache::lookupExternal(
2118 const char16_t
* chars
, size_t len
) const {
2119 return lookupExternalImpl(chars
, len
);
2122 MOZ_ALWAYS_INLINE
void ExternalStringCache::putExternal(JSExternalString
* str
) {
2123 for (size_t i
= NumEntries
- 1; i
> 0; i
--) {
2124 externalEntries_
[i
] = externalEntries_
[i
- 1];
2126 externalEntries_
[0] = str
;
2129 template <typename CharT
>
2130 MOZ_ALWAYS_INLINE JSInlineString
* ExternalStringCache::lookupInlineImpl(
2131 const CharT
* chars
, size_t len
) const {
2132 MOZ_ASSERT(CanStoreCharsAsLatin1(chars
, len
));
2133 MOZ_ASSERT(JSThinInlineString::lengthFits
<Latin1Char
>(len
));
2135 AutoCheckCannotGC nogc
;
2137 for (size_t i
= 0; i
< NumEntries
; i
++) {
2138 JSInlineString
* str
= inlineEntries_
[i
];
2139 if (!str
|| str
->length() != len
) {
2143 const JS::Latin1Char
* strChars
= str
->latin1Chars(nogc
);
2144 if (EqualChars(chars
, strChars
, len
)) {
2152 MOZ_ALWAYS_INLINE JSInlineString
* ExternalStringCache::lookupInline(
2153 const JS::Latin1Char
* chars
, size_t len
) const {
2154 return lookupInlineImpl(chars
, len
);
2156 MOZ_ALWAYS_INLINE JSInlineString
* ExternalStringCache::lookupInline(
2157 const char16_t
* chars
, size_t len
) const {
2158 return lookupInlineImpl(chars
, len
);
2161 MOZ_ALWAYS_INLINE
void ExternalStringCache::putInline(JSInlineString
* str
) {
2162 MOZ_ASSERT(str
->hasLatin1Chars());
2164 for (size_t i
= NumEntries
- 1; i
> 0; i
--) {
2165 inlineEntries_
[i
] = inlineEntries_
[i
- 1];
2167 inlineEntries_
[0] = str
;
2170 } /* namespace js */
2172 template <AllowGC allowGC
>
2173 static MOZ_ALWAYS_INLINE JSInlineString
* NewInlineStringMaybeDeflated(
2174 JSContext
* cx
, const mozilla::Range
<const JS::Latin1Char
>& chars
,
2175 gc::Heap heap
= gc::Heap::Default
) {
2176 return NewInlineString
<allowGC
>(cx
, chars
, heap
);
2179 template <AllowGC allowGC
>
2180 static MOZ_ALWAYS_INLINE JSInlineString
* NewInlineStringMaybeDeflated(
2181 JSContext
* cx
, const mozilla::Range
<const char16_t
>& chars
,
2182 gc::Heap heap
= gc::Heap::Default
) {
2183 return NewInlineStringDeflated
<allowGC
>(cx
, chars
, heap
);
2188 template <typename CharT
>
2189 JSString
* NewMaybeExternalString(JSContext
* cx
, const CharT
* s
, size_t n
,
2190 const JSExternalStringCallbacks
* callbacks
,
2191 bool* allocatedExternal
, gc::Heap heap
) {
2192 if (JSString
* str
= TryEmptyOrStaticString(cx
, s
, n
)) {
2193 *allocatedExternal
= false;
2197 ExternalStringCache
& cache
= cx
->zone()->externalStringCache();
2199 if (JSThinInlineString::lengthFits
<Latin1Char
>(n
) &&
2200 CanStoreCharsAsLatin1(s
, n
)) {
2201 *allocatedExternal
= false;
2202 if (JSInlineString
* str
= cache
.lookupInline(s
, n
)) {
2205 JSInlineString
* str
= NewInlineStringMaybeDeflated
<AllowGC::CanGC
>(
2206 cx
, mozilla::Range
<const CharT
>(s
, n
), heap
);
2210 cache
.putInline(str
);
2214 if (JSExternalString
* str
= cache
.lookupExternal(s
, n
)) {
2215 *allocatedExternal
= false;
2219 JSExternalString
* str
= JSExternalString::new_(cx
, s
, n
, callbacks
);
2224 *allocatedExternal
= true;
2225 cache
.putExternal(str
);
2229 template JSString
* NewMaybeExternalString(
2230 JSContext
* cx
, const JS::Latin1Char
* s
, size_t n
,
2231 const JSExternalStringCallbacks
* callbacks
, bool* allocatedExternal
,
2234 template JSString
* NewMaybeExternalString(
2235 JSContext
* cx
, const char16_t
* s
, size_t n
,
2236 const JSExternalStringCallbacks
* callbacks
, bool* allocatedExternal
,
2239 } /* namespace js */
2241 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
2242 void JSExtensibleString::dumpOwnRepresentationFields(
2243 js::JSONPrinter
& json
) const {
2244 json
.property("capacity", capacity());
2247 void JSInlineString::dumpOwnRepresentationFields(js::JSONPrinter
& json
) const {}
2249 void JSLinearString::dumpOwnRepresentationFields(js::JSONPrinter
& json
) const {
2251 // Include whether the chars are in the nursery even for tenured
2252 // strings, which should always be false. For investigating bugs, it's
2253 // better to not assume that.
2254 js::Nursery
& nursery
= runtimeFromMainThread()->gc
.nursery();
2255 bool inNursery
= nursery
.isInside(nonInlineCharsRaw());
2256 json
.boolProperty("charsInNursery", inNursery
);
2261 struct RepresentativeExternalString
: public JSExternalStringCallbacks
{
2262 void finalize(JS::Latin1Char
* chars
) const override
{
2263 // Constant chars, nothing to do.
2265 void finalize(char16_t
* chars
) const override
{
2266 // Constant chars, nothing to do.
2268 size_t sizeOfBuffer(const JS::Latin1Char
* chars
,
2269 mozilla::MallocSizeOf mallocSizeOf
) const override
{
2270 // This string's buffer is not heap-allocated, so its malloc size is 0.
2273 size_t sizeOfBuffer(const char16_t
* chars
,
2274 mozilla::MallocSizeOf mallocSizeOf
) const override
{
2275 // This string's buffer is not heap-allocated, so its malloc size is 0.
2280 static const RepresentativeExternalString RepresentativeExternalStringCallbacks
;
2282 template <typename CheckString
, typename CharT
>
2283 static bool FillWithRepresentatives(JSContext
* cx
, Handle
<ArrayObject
*> array
,
2284 uint32_t* index
, const CharT
* chars
,
2285 size_t len
, size_t inlineStringMaxLength
,
2286 size_t inlineAtomMaxLength
,
2287 const CheckString
& check
, gc::Heap heap
) {
2288 auto AppendString
= [&check
](JSContext
* cx
, Handle
<ArrayObject
*> array
,
2289 uint32_t* index
, HandleString s
) {
2290 MOZ_ASSERT(check(s
));
2291 (void)check
; // silence clang -Wunused-lambda-capture in opt builds
2292 RootedValue
val(cx
, StringValue(s
));
2293 return JS_DefineElement(cx
, array
, (*index
)++, val
, 0);
2296 MOZ_ASSERT(len
> inlineStringMaxLength
);
2297 MOZ_ASSERT(len
> inlineAtomMaxLength
);
2300 RootedString
atom1(cx
, AtomizeChars(cx
, chars
, len
));
2301 if (!atom1
|| !AppendString(cx
, array
, index
, atom1
)) {
2304 MOZ_ASSERT(atom1
->isAtom());
2306 // Thin inline atom.
2307 RootedString
atom2(cx
, AtomizeChars(cx
, chars
, 2));
2308 if (!atom2
|| !AppendString(cx
, array
, index
, atom2
)) {
2311 MOZ_ASSERT(atom2
->isAtom());
2312 MOZ_ASSERT(atom2
->isInline());
2315 RootedString
atom3(cx
, AtomizeChars(cx
, chars
, inlineAtomMaxLength
));
2316 if (!atom3
|| !AppendString(cx
, array
, index
, atom3
)) {
2319 MOZ_ASSERT(atom3
->isAtom());
2320 MOZ_ASSERT_IF(inlineStringMaxLength
< inlineAtomMaxLength
,
2321 atom3
->isFatInline());
2323 // Normal linear string; maybe nursery.
2324 RootedString
linear1(cx
, NewStringCopyN
<CanGC
>(cx
, chars
, len
, heap
));
2325 if (!linear1
|| !AppendString(cx
, array
, index
, linear1
)) {
2328 MOZ_ASSERT(linear1
->isLinear());
2330 // Inline string; maybe nursery.
2331 RootedString
linear2(cx
, NewStringCopyN
<CanGC
>(cx
, chars
, 3, heap
));
2332 if (!linear2
|| !AppendString(cx
, array
, index
, linear2
)) {
2335 MOZ_ASSERT(linear2
->isLinear());
2336 MOZ_ASSERT(linear2
->isInline());
2338 // Fat inline string; maybe nursery.
2339 RootedString
linear3(
2340 cx
, NewStringCopyN
<CanGC
>(cx
, chars
, inlineStringMaxLength
, heap
));
2341 if (!linear3
|| !AppendString(cx
, array
, index
, linear3
)) {
2344 MOZ_ASSERT(linear3
->isLinear());
2345 MOZ_ASSERT(linear3
->isFatInline());
2347 // Rope; maybe nursery.
2348 RootedString
rope(cx
, ConcatStrings
<CanGC
>(cx
, atom1
, atom3
, heap
));
2349 if (!rope
|| !AppendString(cx
, array
, index
, rope
)) {
2352 MOZ_ASSERT(rope
->isRope());
2354 // Dependent; maybe nursery.
2355 RootedString
dep(cx
, NewDependentString(cx
, atom1
, 0, len
- 2, heap
));
2356 if (!dep
|| !AppendString(cx
, array
, index
, dep
)) {
2359 MOZ_ASSERT(dep
->isDependent());
2361 // Extensible; maybe nursery.
2362 RootedString
temp1(cx
, NewStringCopyN
<CanGC
>(cx
, chars
, len
, heap
));
2366 RootedString
extensible(cx
, ConcatStrings
<CanGC
>(cx
, temp1
, atom3
, heap
));
2367 if (!extensible
|| !extensible
->ensureLinear(cx
)) {
2370 if (!AppendString(cx
, array
, index
, extensible
)) {
2373 MOZ_ASSERT(extensible
->isExtensible());
2375 RootedString
external1(cx
), external2(cx
);
2376 if constexpr (std::is_same_v
<CharT
, char16_t
>) {
2377 external1
= JS_NewExternalUCString(cx
, (const char16_t
*)chars
, len
,
2378 &RepresentativeExternalStringCallbacks
);
2379 if (!external1
|| !AppendString(cx
, array
, index
, external1
)) {
2382 MOZ_ASSERT(external1
->isExternal());
2384 external2
= JS_NewExternalUCString(cx
, (const char16_t
*)chars
, 2,
2385 &RepresentativeExternalStringCallbacks
);
2386 if (!external2
|| !AppendString(cx
, array
, index
, external2
)) {
2389 MOZ_ASSERT(external2
->isExternal());
2392 JS_NewExternalStringLatin1(cx
, (const Latin1Char
*)chars
, len
,
2393 &RepresentativeExternalStringCallbacks
);
2394 if (!external1
|| !AppendString(cx
, array
, index
, external1
)) {
2397 MOZ_ASSERT(external1
->isExternal());
2400 JS_NewExternalStringLatin1(cx
, (const Latin1Char
*)chars
, 2,
2401 &RepresentativeExternalStringCallbacks
);
2402 if (!external2
|| !AppendString(cx
, array
, index
, external2
)) {
2405 MOZ_ASSERT(external2
->isExternal());
2408 // Assert the strings still have the types we expect after creating the
2411 MOZ_ASSERT(atom1
->isAtom());
2412 MOZ_ASSERT(atom2
->isAtom());
2413 MOZ_ASSERT(atom3
->isAtom());
2414 MOZ_ASSERT(atom2
->isInline());
2415 MOZ_ASSERT_IF(inlineStringMaxLength
< inlineAtomMaxLength
,
2416 atom3
->isFatInline());
2418 MOZ_ASSERT(linear1
->isLinear());
2419 MOZ_ASSERT(linear2
->isLinear());
2420 MOZ_ASSERT(linear3
->isLinear());
2421 MOZ_ASSERT(linear2
->isInline());
2422 MOZ_ASSERT(linear3
->isFatInline());
2424 MOZ_ASSERT(rope
->isRope());
2425 MOZ_ASSERT(dep
->isDependent());
2426 MOZ_ASSERT(extensible
->isExtensible());
2427 MOZ_ASSERT(external1
->isExternal());
2428 MOZ_ASSERT(external2
->isExternal());
2433 bool JSString::fillWithRepresentatives(JSContext
* cx
,
2434 Handle
<ArrayObject
*> array
) {
2437 auto CheckTwoByte
= [](JSString
* str
) { return str
->hasTwoByteChars(); };
2438 auto CheckLatin1
= [](JSString
* str
) { return str
->hasLatin1Chars(); };
2440 static const char16_t twoByteChars
[] =
2441 u
"\u1234abc\0def\u5678ghijklmasdfa\0xyz0123456789";
2442 static const Latin1Char latin1Chars
[] = "abc\0defghijklmasdfa\0xyz0123456789";
2444 // Create strings using both the default heap and forcing the tenured heap. If
2445 // nursery strings are available, this is a best effort at creating them in
2446 // the default heap case. Since nursery strings may be disabled or a GC may
2447 // occur during this process, there may be duplicate representatives in the
2450 if (!FillWithRepresentatives(cx
, array
, &index
, twoByteChars
,
2451 std::size(twoByteChars
) - 1,
2452 JSFatInlineString::MAX_LENGTH_TWO_BYTE
,
2453 js::FatInlineAtom::MAX_LENGTH_TWO_BYTE
,
2454 CheckTwoByte
, gc::Heap::Tenured
)) {
2457 if (!FillWithRepresentatives(cx
, array
, &index
, latin1Chars
,
2458 std::size(latin1Chars
) - 1,
2459 JSFatInlineString::MAX_LENGTH_LATIN1
,
2460 js::FatInlineAtom::MAX_LENGTH_LATIN1
,
2461 CheckLatin1
, gc::Heap::Tenured
)) {
2464 if (!FillWithRepresentatives(cx
, array
, &index
, twoByteChars
,
2465 std::size(twoByteChars
) - 1,
2466 JSFatInlineString::MAX_LENGTH_TWO_BYTE
,
2467 js::FatInlineAtom::MAX_LENGTH_TWO_BYTE
,
2468 CheckTwoByte
, gc::Heap::Default
)) {
2471 if (!FillWithRepresentatives(cx
, array
, &index
, latin1Chars
,
2472 std::size(latin1Chars
) - 1,
2473 JSFatInlineString::MAX_LENGTH_LATIN1
,
2474 js::FatInlineAtom::MAX_LENGTH_LATIN1
,
2475 CheckLatin1
, gc::Heap::Default
)) {
2482 // * Fat inline atom.
2483 // * Normal linear string
2485 // * Fat inline string
2486 // * Rope; maybe nursery.
2489 // * External with original len
2490 // * External with len==2
2491 static constexpr uint32_t StringTypes
= 11;
2494 static constexpr uint32_t CharTypes
= 2;
2497 static constexpr uint32_t HeapType
= 2;
2498 MOZ_ASSERT(index
== StringTypes
* CharTypes
* HeapType
);
2504 /*** Conversions ************************************************************/
2506 UniqueChars
js::EncodeLatin1(JSContext
* cx
, JSString
* str
) {
2507 JSLinearString
* linear
= str
->ensureLinear(cx
);
2512 JS::AutoCheckCannotGC nogc
;
2513 if (linear
->hasTwoByteChars()) {
2514 JS::Latin1CharsZ chars
=
2515 JS::LossyTwoByteCharsToNewLatin1CharsZ(cx
, linear
->twoByteRange(nogc
));
2516 return UniqueChars(chars
.c_str());
2519 size_t len
= str
->length();
2520 Latin1Char
* buf
= cx
->pod_malloc
<Latin1Char
>(len
+ 1);
2525 PodCopy(buf
, linear
->latin1Chars(nogc
), len
);
2528 return UniqueChars(reinterpret_cast<char*>(buf
));
2531 UniqueChars
js::EncodeAscii(JSContext
* cx
, JSString
* str
) {
2532 JSLinearString
* linear
= str
->ensureLinear(cx
);
2537 MOZ_ASSERT(StringIsAscii(linear
));
2538 return EncodeLatin1(cx
, linear
);
2541 UniqueChars
js::IdToPrintableUTF8(JSContext
* cx
, HandleId id
,
2542 IdToPrintableBehavior behavior
) {
2543 // ToString(<symbol>) throws a TypeError, therefore require that callers
2544 // request source representation when |id| is a property key.
2545 MOZ_ASSERT_IF(behavior
== IdToPrintableBehavior::IdIsIdentifier
,
2546 id
.isAtom() && IsIdentifierNameOrPrivateName(id
.toAtom()));
2548 RootedValue
v(cx
, IdToValue(id
));
2550 if (behavior
== IdToPrintableBehavior::IdIsPropertyKey
) {
2551 str
= ValueToSource(cx
, v
);
2553 str
= ToString
<CanGC
>(cx
, v
);
2558 return StringToNewUTF8CharsZ(cx
, *str
);
2561 template <AllowGC allowGC
>
2562 JSString
* js::ToStringSlow(
2563 JSContext
* cx
, typename MaybeRooted
<Value
, allowGC
>::HandleType arg
) {
2564 /* As with ToObjectSlow, callers must verify that |arg| isn't a string. */
2565 MOZ_ASSERT(!arg
.isString());
2568 if (!v
.isPrimitive()) {
2572 RootedValue
v2(cx
, v
);
2573 if (!ToPrimitive(cx
, JSTYPE_STRING
, &v2
)) {
2582 } else if (v
.isInt32()) {
2583 str
= Int32ToString
<allowGC
>(cx
, v
.toInt32());
2584 } else if (v
.isDouble()) {
2585 str
= NumberToString
<allowGC
>(cx
, v
.toDouble());
2586 } else if (v
.isBoolean()) {
2587 str
= BooleanToString(cx
, v
.toBoolean());
2588 } else if (v
.isNull()) {
2589 str
= cx
->names().null
;
2590 } else if (v
.isSymbol()) {
2592 JS_ReportErrorNumberASCII(cx
, GetErrorMessage
, nullptr,
2593 JSMSG_SYMBOL_TO_STRING
);
2596 } else if (v
.isBigInt()) {
2600 RootedBigInt
i(cx
, v
.toBigInt());
2601 str
= BigInt::toString
<CanGC
>(cx
, i
, 10);
2603 #ifdef ENABLE_RECORD_TUPLE
2604 else if (v
.isExtendedPrimitive()) {
2609 Rooted
<TupleType
*> tup(cx
, &TupleType::thisTupleValue(v
));
2610 return TupleToSource(cx
, tup
);
2612 Rooted
<RecordType
*> rec(cx
);
2613 MOZ_ALWAYS_TRUE(RecordObject::maybeUnbox(&v
.getObjectPayload(), &rec
));
2614 return RecordToSource(cx
, rec
);
2618 MOZ_ASSERT(v
.isUndefined());
2619 str
= cx
->names().undefined
;
2624 template JSString
* js::ToStringSlow
<CanGC
>(JSContext
* cx
, HandleValue arg
);
2626 template JSString
* js::ToStringSlow
<NoGC
>(JSContext
* cx
, const Value
& arg
);
2628 JS_PUBLIC_API JSString
* js::ToStringSlow(JSContext
* cx
, HandleValue v
) {
2629 return ToStringSlow
<CanGC
>(cx
, v
);