Backed out changeset 2450366cf7ca (bug 1891629) for causing win msix mochitest failures
[gecko.git] / js / src / vm / StringType.cpp
blobb735b91b71e13089b6fa130983bb48d04e5e8f96
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "vm/StringType-inl.h"
9 #include "mozilla/DebugOnly.h"
10 #include "mozilla/HashFunctions.h"
11 #include "mozilla/Latin1.h"
12 #include "mozilla/MathAlgorithms.h"
13 #include "mozilla/MemoryReporting.h"
14 #include "mozilla/PodOperations.h"
15 #include "mozilla/RangedPtr.h"
16 #include "mozilla/TextUtils.h"
17 #include "mozilla/Utf8.h"
18 #include "mozilla/Vector.h"
20 #include <algorithm> // std::{all_of,copy_n,enable_if,is_const,move}
21 #include <iterator> // std::size
22 #include <type_traits> // std::is_same, std::is_unsigned
24 #include "jsfriendapi.h"
25 #include "jsnum.h"
27 #include "builtin/Boolean.h"
28 #ifdef ENABLE_RECORD_TUPLE
29 # include "builtin/RecordObject.h"
30 #endif
31 #include "gc/AllocKind.h"
32 #include "gc/MaybeRooted.h"
33 #include "gc/Nursery.h"
34 #include "js/CharacterEncoding.h"
35 #include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_*
36 #include "js/Printer.h" // js::GenericPrinter
37 #include "js/PropertyAndElement.h" // JS_DefineElement
38 #include "js/SourceText.h" // JS::SourceText
39 #include "js/StableStringChars.h"
40 #include "js/UbiNode.h"
41 #include "util/Identifier.h" // js::IsIdentifierNameOrPrivateName
42 #include "util/Unicode.h"
43 #include "vm/GeckoProfiler.h"
44 #include "vm/JSONPrinter.h" // js::JSONPrinter
45 #include "vm/StaticStrings.h"
46 #include "vm/ToSource.h" // js::ValueToSource
48 #include "gc/Marking-inl.h"
49 #include "vm/GeckoProfiler-inl.h"
50 #ifdef ENABLE_RECORD_TUPLE
51 # include "vm/RecordType.h"
52 # include "vm/TupleType.h"
53 #endif
55 using namespace js;
57 using mozilla::AsWritableChars;
58 using mozilla::ConvertLatin1toUtf16;
59 using mozilla::IsAsciiDigit;
60 using mozilla::IsUtf16Latin1;
61 using mozilla::LossyConvertUtf16toLatin1;
62 using mozilla::PodCopy;
63 using mozilla::RangedPtr;
64 using mozilla::RoundUpPow2;
65 using mozilla::Span;
67 using JS::AutoCheckCannotGC;
68 using JS::AutoStableStringChars;
70 using UniqueLatin1Chars = UniquePtr<Latin1Char[], JS::FreePolicy>;
72 size_t JSString::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) {
73 // JSRope: do nothing, we'll count all children chars when we hit the leaf
74 // strings.
75 if (isRope()) {
76 return 0;
79 MOZ_ASSERT(isLinear());
81 // JSDependentString: do nothing, we'll count the chars when we hit the base
82 // string.
83 if (isDependent()) {
84 return 0;
87 // JSExternalString: Ask the embedding to tell us what's going on.
88 if (isExternal()) {
89 // Our callback isn't supposed to cause GC.
90 JS::AutoSuppressGCAnalysis nogc;
91 JSExternalString& external = asExternal();
92 if (external.hasLatin1Chars()) {
93 return asExternal().callbacks()->sizeOfBuffer(external.latin1Chars(),
94 mallocSizeOf);
95 } else {
96 return asExternal().callbacks()->sizeOfBuffer(external.twoByteChars(),
97 mallocSizeOf);
101 // JSExtensibleString: count the full capacity, not just the used space.
102 if (isExtensible()) {
103 JSExtensibleString& extensible = asExtensible();
104 return extensible.hasLatin1Chars()
105 ? mallocSizeOf(extensible.rawLatin1Chars())
106 : mallocSizeOf(extensible.rawTwoByteChars());
109 // JSInlineString, JSFatInlineString, js::ThinInlineAtom, js::FatInlineAtom:
110 // the chars are inline.
111 if (isInline()) {
112 return 0;
115 // Chars in the nursery are owned by the nursery.
116 if (!ownsMallocedChars()) {
117 return 0;
120 // Everything else: measure the space for the chars.
121 JSLinearString& linear = asLinear();
122 return linear.hasLatin1Chars() ? mallocSizeOf(linear.rawLatin1Chars())
123 : mallocSizeOf(linear.rawTwoByteChars());
126 JS::ubi::Node::Size JS::ubi::Concrete<JSString>::size(
127 mozilla::MallocSizeOf mallocSizeOf) const {
128 JSString& str = get();
129 size_t size;
130 if (str.isAtom()) {
131 if (str.isInline()) {
132 size = str.isFatInline() ? sizeof(js::FatInlineAtom)
133 : sizeof(js::ThinInlineAtom);
134 } else {
135 size = sizeof(js::NormalAtom);
137 } else {
138 size = str.isFatInline() ? sizeof(JSFatInlineString) : sizeof(JSString);
141 if (IsInsideNursery(&str)) {
142 size += Nursery::nurseryCellHeaderSize();
145 size += str.sizeOfExcludingThis(mallocSizeOf);
147 return size;
150 const char16_t JS::ubi::Concrete<JSString>::concreteTypeName[] = u"JSString";
152 mozilla::Maybe<std::tuple<size_t, size_t>> JSString::encodeUTF8Partial(
153 const JS::AutoRequireNoGC& nogc, mozilla::Span<char> buffer) const {
154 mozilla::Vector<const JSString*, 16, SystemAllocPolicy> stack;
155 const JSString* current = this;
156 char16_t pendingLeadSurrogate = 0; // U+0000 means no pending lead surrogate
157 size_t totalRead = 0;
158 size_t totalWritten = 0;
159 for (;;) {
160 if (current->isRope()) {
161 JSRope& rope = current->asRope();
162 if (!stack.append(rope.rightChild())) {
163 // OOM
164 return mozilla::Nothing();
166 current = rope.leftChild();
167 continue;
170 JSLinearString& linear = current->asLinear();
171 if (MOZ_LIKELY(linear.hasLatin1Chars())) {
172 if (MOZ_UNLIKELY(pendingLeadSurrogate)) {
173 if (buffer.Length() < 3) {
174 return mozilla::Some(std::make_tuple(totalRead, totalWritten));
176 buffer[0] = '\xEF';
177 buffer[1] = '\xBF';
178 buffer[2] = '\xBD';
179 buffer = buffer.From(3);
180 totalRead += 1; // pendingLeadSurrogate
181 totalWritten += 3;
182 pendingLeadSurrogate = 0;
184 auto src = mozilla::AsChars(
185 mozilla::Span(linear.latin1Chars(nogc), linear.length()));
186 size_t read;
187 size_t written;
188 std::tie(read, written) =
189 mozilla::ConvertLatin1toUtf8Partial(src, buffer);
190 buffer = buffer.From(written);
191 totalRead += read;
192 totalWritten += written;
193 if (read < src.Length()) {
194 return mozilla::Some(std::make_tuple(totalRead, totalWritten));
196 } else {
197 auto src = mozilla::Span(linear.twoByteChars(nogc), linear.length());
198 if (MOZ_UNLIKELY(pendingLeadSurrogate)) {
199 char16_t first = 0;
200 if (!src.IsEmpty()) {
201 first = src[0];
203 if (unicode::IsTrailSurrogate(first)) {
204 // Got a surrogate pair
205 if (buffer.Length() < 4) {
206 return mozilla::Some(std::make_tuple(totalRead, totalWritten));
208 uint32_t astral = unicode::UTF16Decode(pendingLeadSurrogate, first);
209 buffer[0] = char(0b1111'0000 | (astral >> 18));
210 buffer[1] = char(0b1000'0000 | ((astral >> 12) & 0b11'1111));
211 buffer[2] = char(0b1000'0000 | ((astral >> 6) & 0b11'1111));
212 buffer[3] = char(0b1000'0000 | (astral & 0b11'1111));
213 src = src.From(1);
214 buffer = buffer.From(4);
215 totalRead += 2; // both pendingLeadSurrogate and first!
216 totalWritten += 4;
217 } else {
218 // unpaired surrogate
219 if (buffer.Length() < 3) {
220 return mozilla::Some(std::make_tuple(totalRead, totalWritten));
222 buffer[0] = '\xEF';
223 buffer[1] = '\xBF';
224 buffer[2] = '\xBD';
225 buffer = buffer.From(3);
226 totalRead += 1; // pendingLeadSurrogate
227 totalWritten += 3;
229 pendingLeadSurrogate = 0;
231 if (!src.IsEmpty()) {
232 char16_t last = src[src.Length() - 1];
233 if (unicode::IsLeadSurrogate(last)) {
234 src = src.To(src.Length() - 1);
235 pendingLeadSurrogate = last;
236 } else {
237 MOZ_ASSERT(!pendingLeadSurrogate);
239 size_t read;
240 size_t written;
241 std::tie(read, written) =
242 mozilla::ConvertUtf16toUtf8Partial(src, buffer);
243 buffer = buffer.From(written);
244 totalRead += read;
245 totalWritten += written;
246 if (read < src.Length()) {
247 return mozilla::Some(std::make_tuple(totalRead, totalWritten));
251 if (stack.empty()) {
252 break;
254 current = stack.popCopy();
256 if (MOZ_UNLIKELY(pendingLeadSurrogate)) {
257 if (buffer.Length() < 3) {
258 return mozilla::Some(std::make_tuple(totalRead, totalWritten));
260 buffer[0] = '\xEF';
261 buffer[1] = '\xBF';
262 buffer[2] = '\xBD';
263 // No need to update buffer and pendingLeadSurrogate anymore
264 totalRead += 1;
265 totalWritten += 3;
267 return mozilla::Some(std::make_tuple(totalRead, totalWritten));
270 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
271 template <typename CharT>
272 /*static */
273 void JSString::dumpCharsNoQuote(const CharT* s, size_t n,
274 js::GenericPrinter& out) {
275 for (size_t i = 0; i < n; i++) {
276 char16_t c = s[i];
277 if (c == '"') {
278 out.put("\\\"");
279 } else if (c == '\'') {
280 out.put("\\'");
281 } else if (c == '`') {
282 out.put("\\`");
283 } else if (c == '\\') {
284 out.put("\\\\");
285 } else if (c == '\r') {
286 out.put("\\r");
287 } else if (c == '\n') {
288 out.put("\\n");
289 } else if (c == '\t') {
290 out.put("\\t");
291 } else if (c >= 32 && c < 127) {
292 out.putChar((char)s[i]);
293 } else if (c <= 255) {
294 out.printf("\\x%02x", unsigned(c));
295 } else {
296 out.printf("\\u%04x", unsigned(c));
301 /* static */
302 template void JSString::dumpCharsNoQuote(const Latin1Char* s, size_t n,
303 js::GenericPrinter& out);
305 /* static */
306 template void JSString::dumpCharsNoQuote(const char16_t* s, size_t n,
307 js::GenericPrinter& out);
309 void JSString::dump() const {
310 js::Fprinter out(stderr);
311 dump(out);
314 void JSString::dump(js::GenericPrinter& out) const {
315 js::JSONPrinter json(out);
316 dump(json);
317 out.put("\n");
320 void JSString::dump(js::JSONPrinter& json) const {
321 json.beginObject();
322 dumpFields(json);
323 json.endObject();
326 const char* RepresentationToString(const JSString* s) {
327 if (s->isAtom()) {
328 return "JSAtom";
331 if (s->isLinear()) {
332 if (s->isDependent()) {
333 return "JSDependentString";
335 if (s->isExternal()) {
336 return "JSExternalString";
338 if (s->isExtensible()) {
339 return "JSExtensibleString";
342 if (s->isInline()) {
343 if (s->isFatInline()) {
344 return "JSFatInlineString";
346 return "JSThinInlineString";
349 return "JSLinearString";
352 if (s->isRope()) {
353 return "JSRope";
356 return "JSString";
359 template <typename KnownF, typename UnknownF>
360 void ForEachStringFlag(const JSString* str, uint32_t flags, KnownF known,
361 UnknownF unknown) {
362 for (uint32_t i = js::Bit(3); i < js::Bit(16); i = i << 1) {
363 if (!(flags & i)) {
364 continue;
366 switch (i) {
367 case JSString::ATOM_BIT:
368 known("ATOM_BIT");
369 break;
370 case JSString::LINEAR_BIT:
371 known("LINEAR_BIT");
372 break;
373 case JSString::DEPENDENT_BIT:
374 known("DEPENDENT_BIT");
375 break;
376 case JSString::INLINE_CHARS_BIT:
377 known("INLINE_BIT");
378 break;
379 case JSString::LINEAR_IS_EXTENSIBLE_BIT:
380 static_assert(JSString::LINEAR_IS_EXTENSIBLE_BIT ==
381 JSString::INLINE_IS_FAT_BIT);
382 if (str->isLinear()) {
383 if (str->isInline()) {
384 known("FAT");
385 } else if (!str->isAtom()) {
386 known("EXTENSIBLE");
387 } else {
388 unknown(i);
390 } else {
391 unknown(i);
393 break;
394 case JSString::LINEAR_IS_EXTERNAL_BIT:
395 static_assert(JSString::LINEAR_IS_EXTERNAL_BIT ==
396 JSString::ATOM_IS_PERMANENT_BIT);
397 if (str->isAtom()) {
398 known("PERMANENT");
399 } else if (str->isLinear()) {
400 known("EXTERNAL");
401 } else {
402 unknown(i);
404 break;
405 case JSString::LATIN1_CHARS_BIT:
406 known("LATIN1_CHARS_BIT");
407 break;
408 case JSString::ATOM_IS_INDEX_BIT:
409 known("ATOM_IS_INDEX_BIT");
410 break;
411 case JSString::INDEX_VALUE_BIT:
412 known("INDEX_VALUE_BIT");
413 break;
414 case JSString::IN_STRING_TO_ATOM_CACHE:
415 known("IN_STRING_TO_ATOM_CACHE");
416 break;
417 case JSString::FLATTEN_VISIT_RIGHT:
418 if (str->isRope()) {
419 known("FLATTEN_VISIT_RIGHT");
420 } else {
421 known("NON_DEDUP_BIT");
423 break;
424 case JSString::FLATTEN_FINISH_NODE:
425 static_assert(JSString::FLATTEN_FINISH_NODE ==
426 JSString::PINNED_ATOM_BIT);
427 if (str->isRope()) {
428 known("FLATTEN_FINISH_NODE");
429 } else if (str->isAtom()) {
430 known("PINNED_ATOM_BIT");
431 } else {
432 unknown(i);
434 break;
435 default:
436 unknown(i);
437 break;
442 void JSString::dumpFields(js::JSONPrinter& json) const {
443 dumpCommonFields(json);
444 dumpCharsFields(json);
447 void JSString::dumpCommonFields(js::JSONPrinter& json) const {
448 json.formatProperty("address", "(%s*)0x%p", RepresentationToString(this),
449 this);
451 json.beginInlineListProperty("flags");
452 ForEachStringFlag(
453 this, flags(), [&](const char* name) { json.value("%s", name); },
454 [&](uint32_t value) { json.value("Unknown(%08x)", value); });
455 json.endInlineList();
457 if (hasIndexValue()) {
458 json.property("indexValue", getIndexValue());
461 json.boolProperty("isTenured", isTenured());
463 json.property("length", length());
466 void JSString::dumpCharsFields(js::JSONPrinter& json) const {
467 if (isLinear()) {
468 const JSLinearString* linear = &asLinear();
470 AutoCheckCannotGC nogc;
471 if (hasLatin1Chars()) {
472 const Latin1Char* chars = linear->latin1Chars(nogc);
474 json.formatProperty("chars", "(JS::Latin1Char*)0x%p", chars);
476 js::GenericPrinter& out = json.beginStringProperty("value");
477 dumpCharsNoQuote(chars, length(), out);
478 json.endStringProperty();
479 } else {
480 const char16_t* chars = linear->twoByteChars(nogc);
482 json.formatProperty("chars", "(char16_t*)0x%p", chars);
484 js::GenericPrinter& out = json.beginStringProperty("value");
485 dumpCharsNoQuote(chars, length(), out);
486 json.endStringProperty();
488 } else {
489 js::GenericPrinter& out = json.beginStringProperty("value");
490 dumpCharsNoQuote(out);
491 json.endStringProperty();
495 void JSString::dumpRepresentation() const {
496 js::Fprinter out(stderr);
497 dumpRepresentation(out);
500 void JSString::dumpRepresentation(js::GenericPrinter& out) const {
501 js::JSONPrinter json(out);
502 dumpRepresentation(json);
503 out.put("\n");
506 void JSString::dumpRepresentation(js::JSONPrinter& json) const {
507 json.beginObject();
508 dumpRepresentationFields(json);
509 json.endObject();
512 void JSString::dumpRepresentationFields(js::JSONPrinter& json) const {
513 dumpCommonFields(json);
515 if (isAtom()) {
516 asAtom().dumpOwnRepresentationFields(json);
517 } else if (isLinear()) {
518 asLinear().dumpOwnRepresentationFields(json);
520 if (isDependent()) {
521 asDependent().dumpOwnRepresentationFields(json);
522 } else if (isExternal()) {
523 asExternal().dumpOwnRepresentationFields(json);
524 } else if (isExtensible()) {
525 asExtensible().dumpOwnRepresentationFields(json);
526 } else if (isInline()) {
527 asInline().dumpOwnRepresentationFields(json);
529 } else if (isRope()) {
530 asRope().dumpOwnRepresentationFields(json);
531 // Rope already shows the chars.
532 return;
535 dumpCharsFields(json);
538 void JSString::dumpStringContent(js::GenericPrinter& out) const {
539 dumpCharsSingleQuote(out);
541 out.printf(" @ (%s*)0x%p", RepresentationToString(this), this);
544 void JSString::dumpPropertyName(js::GenericPrinter& out) const {
545 dumpCharsNoQuote(out);
548 void JSString::dumpChars(js::GenericPrinter& out) const {
549 out.putChar('"');
550 dumpCharsNoQuote(out);
551 out.putChar('"');
554 void JSString::dumpCharsSingleQuote(js::GenericPrinter& out) const {
555 out.putChar('\'');
556 dumpCharsNoQuote(out);
557 out.putChar('\'');
560 void JSString::dumpCharsNoQuote(js::GenericPrinter& out) const {
561 if (isLinear()) {
562 const JSLinearString* linear = &asLinear();
564 AutoCheckCannotGC nogc;
565 if (hasLatin1Chars()) {
566 dumpCharsNoQuote(linear->latin1Chars(nogc), length(), out);
567 } else {
568 dumpCharsNoQuote(linear->twoByteChars(nogc), length(), out);
570 } else if (isRope()) {
571 JSRope* rope = &asRope();
572 rope->leftChild()->dumpCharsNoQuote(out);
573 rope->rightChild()->dumpCharsNoQuote(out);
577 bool JSString::equals(const char* s) {
578 JSLinearString* linear = ensureLinear(nullptr);
579 if (!linear) {
580 // This is DEBUG-only code.
581 fprintf(stderr, "OOM in JSString::equals!\n");
582 return false;
585 return StringEqualsAscii(linear, s);
587 #endif /* defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) */
589 JSExtensibleString& JSLinearString::makeExtensible(size_t capacity) {
590 MOZ_ASSERT(!isDependent());
591 MOZ_ASSERT(!isInline());
592 MOZ_ASSERT(!isAtom());
593 MOZ_ASSERT(!isExternal());
594 MOZ_ASSERT(capacity >= length());
595 js::RemoveCellMemory(this, allocSize(), js::MemoryUse::StringContents);
596 setLengthAndFlags(length(), flags() | EXTENSIBLE_FLAGS);
597 d.s.u3.capacity = capacity;
598 js::AddCellMemory(this, allocSize(), js::MemoryUse::StringContents);
599 return asExtensible();
602 template <typename CharT>
603 static MOZ_ALWAYS_INLINE bool AllocCharsForFlatten(JSString* str, size_t length,
604 CharT** chars,
605 size_t* capacity) {
607 * Grow by 12.5% if the buffer is very large. Otherwise, round up to the
608 * next power of 2. This is similar to what we do with arrays; see
609 * JSObject::ensureDenseArrayElements.
611 static const size_t DOUBLING_MAX = 1024 * 1024;
612 *capacity =
613 length > DOUBLING_MAX ? length + (length / 8) : RoundUpPow2(length);
615 static_assert(JSString::MAX_LENGTH * sizeof(CharT) <= UINT32_MAX);
616 *chars =
617 str->zone()->pod_arena_malloc<CharT>(js::StringBufferArena, *capacity);
618 return *chars != nullptr;
621 UniqueLatin1Chars JSRope::copyLatin1Chars(JSContext* maybecx,
622 arena_id_t destArenaId) const {
623 return copyCharsInternal<Latin1Char>(maybecx, destArenaId);
626 UniqueTwoByteChars JSRope::copyTwoByteChars(JSContext* maybecx,
627 arena_id_t destArenaId) const {
628 return copyCharsInternal<char16_t>(maybecx, destArenaId);
631 // Allocate chars for a string. If parameters and conditions allow, this will
632 // try to allocate in the nursery, but this may always fall back to a malloc
633 // allocation. The return value will record where the allocation happened.
634 template <typename CharT>
635 static MOZ_ALWAYS_INLINE JSString::OwnedChars<CharT> AllocChars(JSContext* cx,
636 size_t length,
637 gc::Heap heap) {
638 if (heap == gc::Heap::Default && cx->zone()->allocNurseryStrings()) {
639 MOZ_ASSERT(cx->nursery().isEnabled());
640 auto [buffer, isMalloced] = cx->nursery().allocateBuffer(
641 cx->zone(), length * sizeof(CharT), js::StringBufferArena);
642 if (!buffer) {
643 ReportOutOfMemory(cx);
644 return {nullptr, 0, false, false};
647 return {static_cast<CharT*>(buffer), length, isMalloced, isMalloced};
650 auto buffer = cx->make_pod_arena_array<CharT>(js::StringBufferArena, length);
651 if (!buffer) {
652 ReportOutOfMemory(cx);
653 return {nullptr, 0, false, false};
656 return {std::move(buffer), length, true};
659 template <typename CharT>
660 UniquePtr<CharT[], JS::FreePolicy> JSRope::copyCharsInternal(
661 JSContext* maybecx, arena_id_t destArenaId) const {
662 // Left-leaning ropes are far more common than right-leaning ropes, so
663 // perform a non-destructive traversal of the rope, right node first,
664 // splatting each node's characters into a contiguous buffer.
666 size_t n = length();
668 UniquePtr<CharT[], JS::FreePolicy> out;
669 if (maybecx) {
670 out.reset(maybecx->pod_arena_malloc<CharT>(destArenaId, n));
671 } else {
672 out.reset(js_pod_arena_malloc<CharT>(destArenaId, n));
675 if (!out) {
676 return nullptr;
679 Vector<const JSString*, 8, SystemAllocPolicy> nodeStack;
680 const JSString* str = this;
681 CharT* end = out.get() + str->length();
682 while (true) {
683 if (str->isRope()) {
684 if (!nodeStack.append(str->asRope().leftChild())) {
685 if (maybecx) {
686 ReportOutOfMemory(maybecx);
688 return nullptr;
690 str = str->asRope().rightChild();
691 } else {
692 end -= str->length();
693 CopyChars(end, str->asLinear());
694 if (nodeStack.empty()) {
695 break;
697 str = nodeStack.popCopy();
700 MOZ_ASSERT(end == out.get());
702 return out;
705 template <typename CharT>
706 void AddStringToHash(uint32_t* hash, const CharT* chars, size_t len) {
707 // It's tempting to use |HashString| instead of this loop, but that's
708 // slightly different than our existing implementation for non-ropes. We
709 // want to pretend we have a contiguous set of chars so we need to
710 // accumulate char by char rather than generate a new hash for substring
711 // and then accumulate that.
712 for (size_t i = 0; i < len; i++) {
713 *hash = mozilla::AddToHash(*hash, chars[i]);
717 void AddStringToHash(uint32_t* hash, const JSString* str) {
718 AutoCheckCannotGC nogc;
719 const auto& s = str->asLinear();
720 if (s.hasLatin1Chars()) {
721 AddStringToHash(hash, s.latin1Chars(nogc), s.length());
722 } else {
723 AddStringToHash(hash, s.twoByteChars(nogc), s.length());
727 bool JSRope::hash(uint32_t* outHash) const {
728 Vector<const JSString*, 8, SystemAllocPolicy> nodeStack;
729 const JSString* str = this;
731 *outHash = 0;
733 while (true) {
734 if (str->isRope()) {
735 if (!nodeStack.append(str->asRope().rightChild())) {
736 return false;
738 str = str->asRope().leftChild();
739 } else {
740 AddStringToHash(outHash, str);
741 if (nodeStack.empty()) {
742 break;
744 str = nodeStack.popCopy();
748 return true;
751 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
752 void JSRope::dumpOwnRepresentationFields(js::JSONPrinter& json) const {
753 json.beginObjectProperty("leftChild");
754 leftChild()->dumpRepresentationFields(json);
755 json.endObject();
757 json.beginObjectProperty("rightChild");
758 rightChild()->dumpRepresentationFields(json);
759 json.endObject();
761 #endif
763 namespace js {
765 template <>
766 void CopyChars(char16_t* dest, const JSLinearString& str) {
767 AutoCheckCannotGC nogc;
768 if (str.hasTwoByteChars()) {
769 PodCopy(dest, str.twoByteChars(nogc), str.length());
770 } else {
771 CopyAndInflateChars(dest, str.latin1Chars(nogc), str.length());
775 template <>
776 void CopyChars(Latin1Char* dest, const JSLinearString& str) {
777 AutoCheckCannotGC nogc;
778 if (str.hasLatin1Chars()) {
779 PodCopy(dest, str.latin1Chars(nogc), str.length());
780 } else {
782 * When we flatten a TwoByte rope, we turn child ropes (including Latin1
783 * ropes) into TwoByte dependent strings. If one of these strings is
784 * also part of another Latin1 rope tree, we can have a Latin1 rope with
785 * a TwoByte descendent and we end up here when we flatten it. Although
786 * the chars are stored as TwoByte, we know they must be in the Latin1
787 * range, so we can safely deflate here.
789 size_t len = str.length();
790 const char16_t* chars = str.twoByteChars(nogc);
791 auto src = Span(chars, len);
792 MOZ_ASSERT(IsUtf16Latin1(src));
793 LossyConvertUtf16toLatin1(src, AsWritableChars(Span(dest, len)));
797 } /* namespace js */
799 template <typename CharT>
800 static constexpr uint32_t StringFlagsForCharType(uint32_t baseFlags) {
801 if constexpr (std::is_same_v<CharT, char16_t>) {
802 return baseFlags;
805 return baseFlags | JSString::LATIN1_CHARS_BIT;
808 static bool UpdateNurseryBuffersOnTransfer(js::Nursery& nursery, JSString* from,
809 JSString* to, void* buffer,
810 size_t size) {
811 // Update the list of buffers associated with nursery cells when |buffer| is
812 // moved from string |from| to string |to|, depending on whether those strings
813 // are in the nursery or not.
815 if (from->isTenured() && !to->isTenured()) {
816 // Tenured leftmost child is giving its chars buffer to the
817 // nursery-allocated root node.
818 if (!nursery.registerMallocedBuffer(buffer, size)) {
819 return false;
821 } else if (!from->isTenured() && to->isTenured()) {
822 // Leftmost child is giving its nursery-held chars buffer to a
823 // tenured string.
824 nursery.removeMallocedBuffer(buffer, size);
827 return true;
830 static bool CanReuseLeftmostBuffer(JSString* leftmostChild, size_t wholeLength,
831 bool hasTwoByteChars) {
832 if (!leftmostChild->isExtensible()) {
833 return false;
836 JSExtensibleString& str = leftmostChild->asExtensible();
837 return str.capacity() >= wholeLength &&
838 str.hasTwoByteChars() == hasTwoByteChars;
841 JSLinearString* JSRope::flatten(JSContext* maybecx) {
842 mozilla::Maybe<AutoGeckoProfilerEntry> entry;
843 if (maybecx) {
844 entry.emplace(maybecx, "JSRope::flatten");
847 JSLinearString* str = flattenInternal();
848 if (!str && maybecx) {
849 ReportOutOfMemory(maybecx);
852 return str;
855 JSLinearString* JSRope::flattenInternal() {
856 if (zone()->needsIncrementalBarrier()) {
857 return flattenInternal<WithIncrementalBarrier>();
860 return flattenInternal<NoBarrier>();
863 template <JSRope::UsingBarrier usingBarrier>
864 JSLinearString* JSRope::flattenInternal() {
865 if (hasTwoByteChars()) {
866 return flattenInternal<usingBarrier, char16_t>(this);
869 return flattenInternal<usingBarrier, Latin1Char>(this);
872 template <JSRope::UsingBarrier usingBarrier, typename CharT>
873 /* static */
874 JSLinearString* JSRope::flattenInternal(JSRope* root) {
876 * Consider the DAG of JSRopes rooted at |root|, with non-JSRopes as
877 * its leaves. Mutate the root JSRope into a JSExtensibleString containing
878 * the full flattened text that the root represents, and mutate all other
879 * JSRopes in the interior of the DAG into JSDependentStrings that refer to
880 * this new JSExtensibleString.
882 * If the leftmost leaf of our DAG is a JSExtensibleString, consider
883 * stealing its buffer for use in our new root, and transforming it into a
884 * JSDependentString too. Do not mutate any of the other leaves.
886 * Perform a depth-first dag traversal, splatting each node's characters
887 * into a contiguous buffer. Visit each rope node three times:
888 * 1. record position in the buffer and recurse into left child;
889 * 2. recurse into the right child;
890 * 3. transform the node into a dependent string.
891 * To avoid maintaining a stack, tree nodes are mutated to indicate how many
892 * times they have been visited. Since ropes can be dags, a node may be
893 * encountered multiple times during traversal. However, step 3 above leaves
894 * a valid dependent string, so everything works out.
896 * While ropes avoid all sorts of quadratic cases with string concatenation,
897 * they can't help when ropes are immediately flattened. One idiomatic case
898 * that we'd like to keep linear (and has traditionally been linear in SM
899 * and other JS engines) is:
901 * while (...) {
902 * s += ...
903 * s.flatten
906 * Two behaviors accomplish this:
908 * - When the leftmost non-rope in the DAG we're flattening is a
909 * JSExtensibleString with sufficient capacity to hold the entire
910 * flattened string, we just flatten the DAG into its buffer. Then, when
911 * we transform the root of the DAG from a JSRope into a
912 * JSExtensibleString, we steal that buffer, and change the victim from a
913 * JSExtensibleString to a JSDependentString. In this case, the left-hand
914 * side of the string never needs to be copied.
916 * - Otherwise, we round up the total flattened size and create a fresh
917 * JSExtensibleString with that much capacity. If this in turn becomes the
918 * leftmost leaf of a subsequent flatten, we will hopefully be able to
919 * fill it, as in the case above.
921 * Note that, even though the code for creating JSDependentStrings avoids
922 * creating dependents of dependents, we can create that situation here: the
923 * JSExtensibleStrings we transform into JSDependentStrings might have
924 * JSDependentStrings pointing to them already. Stealing the buffer doesn't
925 * change its address, only its owning JSExtensibleString, so all chars()
926 * pointers in the JSDependentStrings are still valid.
928 * This chain of dependent strings could be problematic if the base string
929 * moves, either because it was initially allocated in the nursery or it
930 * gets deduplicated, because you might have a dependent ->
931 * tenured dependent -> nursery base string, and the store buffer would
932 * only capture the latter edge. Prevent this case from happening by
933 * marking the root as nondeduplicatable if the extensible string
934 * optimization applied.
936 const size_t wholeLength = root->length();
937 size_t wholeCapacity;
938 CharT* wholeChars;
940 AutoCheckCannotGC nogc;
942 Nursery& nursery = root->runtimeFromMainThread()->gc.nursery();
944 /* Find the left most string, containing the first string. */
945 JSRope* leftmostRope = root;
946 while (leftmostRope->leftChild()->isRope()) {
947 leftmostRope = &leftmostRope->leftChild()->asRope();
949 JSString* leftmostChild = leftmostRope->leftChild();
951 bool reuseLeftmostBuffer = CanReuseLeftmostBuffer(
952 leftmostChild, wholeLength, std::is_same_v<CharT, char16_t>);
954 if (reuseLeftmostBuffer) {
955 JSExtensibleString& left = leftmostChild->asExtensible();
956 wholeCapacity = left.capacity();
957 wholeChars = const_cast<CharT*>(left.nonInlineChars<CharT>(nogc));
959 // Nursery::registerMallocedBuffer is fallible, so attempt it first before
960 // doing anything irreversible.
961 if (!UpdateNurseryBuffersOnTransfer(nursery, &left, root, wholeChars,
962 wholeCapacity * sizeof(CharT))) {
963 return nullptr;
965 } else {
966 // If we can't reuse the leftmost child's buffer, allocate a new one.
967 if (!AllocCharsForFlatten(root, wholeLength, &wholeChars, &wholeCapacity)) {
968 return nullptr;
971 if (!root->isTenured()) {
972 if (!nursery.registerMallocedBuffer(wholeChars,
973 wholeCapacity * sizeof(CharT))) {
974 js_free(wholeChars);
975 return nullptr;
980 JSRope* str = root;
981 CharT* pos = wholeChars;
983 JSRope* parent = nullptr;
984 uint32_t parentFlag = 0;
986 first_visit_node: {
987 MOZ_ASSERT_IF(str != root, parent && parentFlag);
988 MOZ_ASSERT(!str->asRope().isBeingFlattened());
990 ropeBarrierDuringFlattening<usingBarrier>(str);
992 JSString& left = *str->d.s.u2.left;
993 str->d.s.u2.parent = parent;
994 str->setFlagBit(parentFlag);
995 parent = nullptr;
996 parentFlag = 0;
998 if (left.isRope()) {
999 /* Return to this node when 'left' done, then goto visit_right_child. */
1000 parent = str;
1001 parentFlag = FLATTEN_VISIT_RIGHT;
1002 str = &left.asRope();
1003 goto first_visit_node;
1005 if (!(reuseLeftmostBuffer && pos == wholeChars)) {
1006 CopyChars(pos, left.asLinear());
1008 pos += left.length();
1011 visit_right_child: {
1012 JSString& right = *str->d.s.u3.right;
1013 if (right.isRope()) {
1014 /* Return to this node when 'right' done, then goto finish_node. */
1015 parent = str;
1016 parentFlag = FLATTEN_FINISH_NODE;
1017 str = &right.asRope();
1018 goto first_visit_node;
1020 CopyChars(pos, right.asLinear());
1021 pos += right.length();
1024 finish_node: {
1025 if (str == root) {
1026 goto finish_root;
1029 MOZ_ASSERT(pos >= wholeChars);
1030 CharT* chars = pos - str->length();
1031 JSRope* strParent = str->d.s.u2.parent;
1032 str->setNonInlineChars(chars);
1034 MOZ_ASSERT(str->asRope().isBeingFlattened());
1035 mozilla::DebugOnly<bool> visitRight = str->flags() & FLATTEN_VISIT_RIGHT;
1036 bool finishNode = str->flags() & FLATTEN_FINISH_NODE;
1037 MOZ_ASSERT(visitRight != finishNode);
1039 // This also clears the flags related to flattening.
1040 str->setLengthAndFlags(str->length(),
1041 StringFlagsForCharType<CharT>(INIT_DEPENDENT_FLAGS));
1042 str->d.s.u3.base =
1043 reinterpret_cast<JSLinearString*>(root); /* will be true on exit */
1045 // Every interior (rope) node in the rope's tree will be visited during
1046 // the traversal and post-barriered here, so earlier additions of
1047 // dependent.base -> root pointers are handled by this barrier as well.
1049 // The only time post-barriers need do anything is when the root is in
1050 // the nursery. Note that the root was a rope but will be an extensible
1051 // string when we return, so it will not point to any strings and need
1052 // not be barriered.
1053 if (str->isTenured() && !root->isTenured()) {
1054 root->storeBuffer()->putWholeCell(str);
1057 str = strParent;
1058 if (finishNode) {
1059 goto finish_node;
1061 MOZ_ASSERT(visitRight);
1062 goto visit_right_child;
1065 finish_root:
1066 // We traversed all the way back up to the root so we're finished.
1067 MOZ_ASSERT(str == root);
1068 MOZ_ASSERT(pos == wholeChars + wholeLength);
1070 root->setLengthAndFlags(wholeLength,
1071 StringFlagsForCharType<CharT>(EXTENSIBLE_FLAGS));
1072 root->setNonInlineChars(wholeChars);
1073 root->d.s.u3.capacity = wholeCapacity;
1074 AddCellMemory(root, root->asLinear().allocSize(), MemoryUse::StringContents);
1076 if (reuseLeftmostBuffer) {
1077 // Remove memory association for left node we're about to make into a
1078 // dependent string.
1079 JSString& left = *leftmostChild;
1080 RemoveCellMemory(&left, left.allocSize(), MemoryUse::StringContents);
1082 uint32_t flags = INIT_DEPENDENT_FLAGS;
1083 if (left.inStringToAtomCache()) {
1084 flags |= IN_STRING_TO_ATOM_CACHE;
1086 left.setLengthAndFlags(left.length(), StringFlagsForCharType<CharT>(flags));
1087 left.d.s.u3.base = &root->asLinear();
1088 if (left.isTenured() && !root->isTenured()) {
1089 // leftmost child -> root is a tenured -> nursery edge. Put the leftmost
1090 // child in the store buffer and prevent the root's chars from moving or
1091 // being freed (because the leftmost child may have a tenured dependent
1092 // string that cannot be updated.)
1093 root->storeBuffer()->putWholeCell(&left);
1094 root->setNonDeduplicatable();
1098 return &root->asLinear();
1101 template <JSRope::UsingBarrier usingBarrier>
1102 /* static */
1103 inline void JSRope::ropeBarrierDuringFlattening(JSRope* rope) {
1104 MOZ_ASSERT(!rope->isBeingFlattened());
1105 if constexpr (usingBarrier) {
1106 gc::PreWriteBarrierDuringFlattening(rope->leftChild());
1107 gc::PreWriteBarrierDuringFlattening(rope->rightChild());
1111 template <AllowGC allowGC>
1112 static JSLinearString* EnsureLinear(
1113 JSContext* cx,
1114 typename MaybeRooted<JSString*, allowGC>::HandleType string) {
1115 JSLinearString* linear = string->ensureLinear(cx);
1116 // Don't report an exception if GC is not allowed, just return nullptr.
1117 if (!linear && !allowGC) {
1118 cx->recoverFromOutOfMemory();
1120 return linear;
1123 template <AllowGC allowGC>
1124 JSString* js::ConcatStrings(
1125 JSContext* cx, typename MaybeRooted<JSString*, allowGC>::HandleType left,
1126 typename MaybeRooted<JSString*, allowGC>::HandleType right, gc::Heap heap) {
1127 MOZ_ASSERT_IF(!left->isAtom(), cx->isInsideCurrentZone(left));
1128 MOZ_ASSERT_IF(!right->isAtom(), cx->isInsideCurrentZone(right));
1130 size_t leftLen = left->length();
1131 if (leftLen == 0) {
1132 return right;
1135 size_t rightLen = right->length();
1136 if (rightLen == 0) {
1137 return left;
1140 size_t wholeLength = leftLen + rightLen;
1141 if (MOZ_UNLIKELY(wholeLength > JSString::MAX_LENGTH)) {
1142 // Don't report an exception if GC is not allowed, just return nullptr.
1143 if (allowGC) {
1144 js::ReportOversizedAllocation(cx, JSMSG_ALLOC_OVERFLOW);
1146 return nullptr;
1149 bool isLatin1 = left->hasLatin1Chars() && right->hasLatin1Chars();
1150 bool canUseInline = isLatin1
1151 ? JSInlineString::lengthFits<Latin1Char>(wholeLength)
1152 : JSInlineString::lengthFits<char16_t>(wholeLength);
1153 if (canUseInline) {
1154 Latin1Char* latin1Buf = nullptr; // initialize to silence GCC warning
1155 char16_t* twoByteBuf = nullptr; // initialize to silence GCC warning
1156 JSInlineString* str =
1157 isLatin1
1158 ? AllocateInlineString<allowGC>(cx, wholeLength, &latin1Buf, heap)
1159 : AllocateInlineString<allowGC>(cx, wholeLength, &twoByteBuf, heap);
1160 if (!str) {
1161 return nullptr;
1164 AutoCheckCannotGC nogc;
1165 JSLinearString* leftLinear = EnsureLinear<allowGC>(cx, left);
1166 if (!leftLinear) {
1167 return nullptr;
1169 JSLinearString* rightLinear = EnsureLinear<allowGC>(cx, right);
1170 if (!rightLinear) {
1171 return nullptr;
1174 if (isLatin1) {
1175 PodCopy(latin1Buf, leftLinear->latin1Chars(nogc), leftLen);
1176 PodCopy(latin1Buf + leftLen, rightLinear->latin1Chars(nogc), rightLen);
1177 } else {
1178 if (leftLinear->hasTwoByteChars()) {
1179 PodCopy(twoByteBuf, leftLinear->twoByteChars(nogc), leftLen);
1180 } else {
1181 CopyAndInflateChars(twoByteBuf, leftLinear->latin1Chars(nogc), leftLen);
1183 if (rightLinear->hasTwoByteChars()) {
1184 PodCopy(twoByteBuf + leftLen, rightLinear->twoByteChars(nogc),
1185 rightLen);
1186 } else {
1187 CopyAndInflateChars(twoByteBuf + leftLen,
1188 rightLinear->latin1Chars(nogc), rightLen);
1192 return str;
1195 return JSRope::new_<allowGC>(cx, left, right, wholeLength, heap);
1198 template JSString* js::ConcatStrings<CanGC>(JSContext* cx, HandleString left,
1199 HandleString right, gc::Heap heap);
1201 template JSString* js::ConcatStrings<NoGC>(JSContext* cx, JSString* const& left,
1202 JSString* const& right,
1203 gc::Heap heap);
1205 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
1206 void JSDependentString::dumpOwnRepresentationFields(
1207 js::JSONPrinter& json) const {
1208 json.property("baseOffset", baseOffset());
1209 json.beginObjectProperty("base");
1210 base()->dumpRepresentationFields(json);
1211 json.endObject();
1213 #endif
1215 bool js::EqualChars(const JSLinearString* str1, const JSLinearString* str2) {
1216 // Assert this isn't called for strings the caller should handle with a fast
1217 // path.
1218 MOZ_ASSERT(str1->length() == str2->length());
1219 MOZ_ASSERT(str1 != str2);
1220 MOZ_ASSERT(!str1->isAtom() || !str2->isAtom());
1222 size_t len = str1->length();
1224 AutoCheckCannotGC nogc;
1225 if (str1->hasTwoByteChars()) {
1226 if (str2->hasTwoByteChars()) {
1227 return EqualChars(str1->twoByteChars(nogc), str2->twoByteChars(nogc),
1228 len);
1231 return EqualChars(str2->latin1Chars(nogc), str1->twoByteChars(nogc), len);
1234 if (str2->hasLatin1Chars()) {
1235 return EqualChars(str1->latin1Chars(nogc), str2->latin1Chars(nogc), len);
1238 return EqualChars(str1->latin1Chars(nogc), str2->twoByteChars(nogc), len);
1241 bool js::HasSubstringAt(JSLinearString* text, JSLinearString* pat,
1242 size_t start) {
1243 MOZ_ASSERT(start + pat->length() <= text->length());
1245 size_t patLen = pat->length();
1247 AutoCheckCannotGC nogc;
1248 if (text->hasLatin1Chars()) {
1249 const Latin1Char* textChars = text->latin1Chars(nogc) + start;
1250 if (pat->hasLatin1Chars()) {
1251 return EqualChars(textChars, pat->latin1Chars(nogc), patLen);
1254 return EqualChars(textChars, pat->twoByteChars(nogc), patLen);
1257 const char16_t* textChars = text->twoByteChars(nogc) + start;
1258 if (pat->hasTwoByteChars()) {
1259 return EqualChars(textChars, pat->twoByteChars(nogc), patLen);
1262 return EqualChars(pat->latin1Chars(nogc), textChars, patLen);
1265 bool js::EqualStrings(JSContext* cx, JSString* str1, JSString* str2,
1266 bool* result) {
1267 if (str1 == str2) {
1268 *result = true;
1269 return true;
1271 if (str1->length() != str2->length()) {
1272 *result = false;
1273 return true;
1275 if (str1->isAtom() && str2->isAtom()) {
1276 *result = false;
1277 return true;
1280 JSLinearString* linear1 = str1->ensureLinear(cx);
1281 if (!linear1) {
1282 return false;
1284 JSLinearString* linear2 = str2->ensureLinear(cx);
1285 if (!linear2) {
1286 return false;
1289 *result = EqualChars(linear1, linear2);
1290 return true;
1293 bool js::EqualStrings(const JSLinearString* str1, const JSLinearString* str2) {
1294 if (str1 == str2) {
1295 return true;
1297 if (str1->length() != str2->length()) {
1298 return false;
1300 if (str1->isAtom() && str2->isAtom()) {
1301 return false;
1303 return EqualChars(str1, str2);
1306 int32_t js::CompareChars(const char16_t* s1, size_t len1, JSLinearString* s2) {
1307 AutoCheckCannotGC nogc;
1308 return s2->hasLatin1Chars()
1309 ? CompareChars(s1, len1, s2->latin1Chars(nogc), s2->length())
1310 : CompareChars(s1, len1, s2->twoByteChars(nogc), s2->length());
1313 static int32_t CompareStringsImpl(const JSLinearString* str1,
1314 const JSLinearString* str2) {
1315 size_t len1 = str1->length();
1316 size_t len2 = str2->length();
1318 AutoCheckCannotGC nogc;
1319 if (str1->hasLatin1Chars()) {
1320 const Latin1Char* chars1 = str1->latin1Chars(nogc);
1321 return str2->hasLatin1Chars()
1322 ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2)
1323 : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2);
1326 const char16_t* chars1 = str1->twoByteChars(nogc);
1327 return str2->hasLatin1Chars()
1328 ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2)
1329 : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2);
1332 bool js::CompareStrings(JSContext* cx, JSString* str1, JSString* str2,
1333 int32_t* result) {
1334 MOZ_ASSERT(str1);
1335 MOZ_ASSERT(str2);
1337 if (str1 == str2) {
1338 *result = 0;
1339 return true;
1342 JSLinearString* linear1 = str1->ensureLinear(cx);
1343 if (!linear1) {
1344 return false;
1347 JSLinearString* linear2 = str2->ensureLinear(cx);
1348 if (!linear2) {
1349 return false;
1352 *result = CompareStringsImpl(linear1, linear2);
1353 return true;
1356 int32_t js::CompareStrings(const JSLinearString* str1,
1357 const JSLinearString* str2) {
1358 MOZ_ASSERT(str1);
1359 MOZ_ASSERT(str2);
1361 if (str1 == str2) {
1362 return 0;
1364 return CompareStringsImpl(str1, str2);
1367 bool js::StringIsAscii(JSLinearString* str) {
1368 JS::AutoCheckCannotGC nogc;
1369 if (str->hasLatin1Chars()) {
1370 return mozilla::IsAscii(
1371 AsChars(Span(str->latin1Chars(nogc), str->length())));
1373 return mozilla::IsAscii(Span(str->twoByteChars(nogc), str->length()));
1376 bool js::StringEqualsAscii(JSLinearString* str, const char* asciiBytes) {
1377 return StringEqualsAscii(str, asciiBytes, strlen(asciiBytes));
1380 bool js::StringEqualsAscii(JSLinearString* str, const char* asciiBytes,
1381 size_t length) {
1382 MOZ_ASSERT(JS::StringIsASCII(Span(asciiBytes, length)));
1384 if (length != str->length()) {
1385 return false;
1388 const Latin1Char* latin1 = reinterpret_cast<const Latin1Char*>(asciiBytes);
1390 AutoCheckCannotGC nogc;
1391 return str->hasLatin1Chars()
1392 ? EqualChars(latin1, str->latin1Chars(nogc), length)
1393 : EqualChars(latin1, str->twoByteChars(nogc), length);
1396 template <typename CharT>
1397 bool js::CheckStringIsIndex(const CharT* s, size_t length, uint32_t* indexp) {
1398 MOZ_ASSERT(length > 0);
1399 MOZ_ASSERT(length <= UINT32_CHAR_BUFFER_LENGTH);
1400 MOZ_ASSERT(IsAsciiDigit(*s),
1401 "caller's fast path must have checked first char");
1403 RangedPtr<const CharT> cp(s, length);
1404 const RangedPtr<const CharT> end(s + length, s, length);
1406 uint32_t index = AsciiDigitToNumber(*cp++);
1407 uint32_t oldIndex = 0;
1408 uint32_t c = 0;
1410 if (index != 0) {
1411 // Consume remaining characters only if the first character isn't '0'.
1412 while (cp < end && IsAsciiDigit(*cp)) {
1413 oldIndex = index;
1414 c = AsciiDigitToNumber(*cp);
1415 index = 10 * index + c;
1416 cp++;
1420 // It's not an integer index if there are characters after the number.
1421 if (cp != end) {
1422 return false;
1425 // Look out for "4294967295" and larger-number strings that fit in
1426 // UINT32_CHAR_BUFFER_LENGTH: only unsigned 32-bit integers less than or equal
1427 // to MAX_ARRAY_INDEX shall pass.
1428 if (oldIndex < MAX_ARRAY_INDEX / 10 ||
1429 (oldIndex == MAX_ARRAY_INDEX / 10 && c <= (MAX_ARRAY_INDEX % 10))) {
1430 MOZ_ASSERT(index <= MAX_ARRAY_INDEX);
1431 *indexp = index;
1432 return true;
1435 return false;
1438 template bool js::CheckStringIsIndex(const Latin1Char* s, size_t length,
1439 uint32_t* indexp);
1440 template bool js::CheckStringIsIndex(const char16_t* s, size_t length,
1441 uint32_t* indexp);
1443 template <typename CharT>
1444 static uint32_t AtomCharsToIndex(const CharT* s, size_t length) {
1445 // Chars are known to be a valid index value (as determined by
1446 // CheckStringIsIndex) that didn't fit in the "index value" bits in the
1447 // header.
1449 MOZ_ASSERT(length > 0);
1450 MOZ_ASSERT(length <= UINT32_CHAR_BUFFER_LENGTH);
1452 RangedPtr<const CharT> cp(s, length);
1453 const RangedPtr<const CharT> end(s + length, s, length);
1455 MOZ_ASSERT(IsAsciiDigit(*cp));
1456 uint32_t index = AsciiDigitToNumber(*cp++);
1457 MOZ_ASSERT(index != 0);
1459 while (cp < end) {
1460 MOZ_ASSERT(IsAsciiDigit(*cp));
1461 index = 10 * index + AsciiDigitToNumber(*cp);
1462 cp++;
1465 MOZ_ASSERT(index <= MAX_ARRAY_INDEX);
1466 return index;
1469 uint32_t JSAtom::getIndexSlow() const {
1470 MOZ_ASSERT(isIndex());
1471 MOZ_ASSERT(!hasIndexValue());
1473 size_t len = length();
1475 AutoCheckCannotGC nogc;
1476 return hasLatin1Chars() ? AtomCharsToIndex(latin1Chars(nogc), len)
1477 : AtomCharsToIndex(twoByteChars(nogc), len);
1480 // Prevent the actual owner of the string's characters from being deduplicated
1481 // (and thus freeing its characters, which would invalidate the ASSC's chars
1482 // pointer). Intermediate dependent strings on the chain can be deduplicated,
1483 // since the base will be updated to the root base during tenuring anyway and
1484 // the intermediates won't matter.
1485 void PreventRootBaseDeduplication(JSLinearString* s) {
1486 while (s->hasBase()) {
1487 s = s->base();
1489 if (!s->isTenured()) {
1490 s->setNonDeduplicatable();
1494 bool AutoStableStringChars::init(JSContext* cx, JSString* s) {
1495 Rooted<JSLinearString*> linearString(cx, s->ensureLinear(cx));
1496 if (!linearString) {
1497 return false;
1500 MOZ_ASSERT(state_ == Uninitialized);
1502 // Inline and nursery-allocated chars may move during a GC, so copy them
1503 // out into a temporary malloced buffer. Note that we cannot update the
1504 // string itself with a malloced buffer, because there may be dependent
1505 // strings that are using the original chars.
1506 if (linearString->hasMovableChars()) {
1507 return linearString->hasTwoByteChars() ? copyTwoByteChars(cx, linearString)
1508 : copyLatin1Chars(cx, linearString);
1511 if (linearString->hasLatin1Chars()) {
1512 state_ = Latin1;
1513 latin1Chars_ = linearString->rawLatin1Chars();
1514 } else {
1515 state_ = TwoByte;
1516 twoByteChars_ = linearString->rawTwoByteChars();
1519 PreventRootBaseDeduplication(linearString);
1521 s_ = linearString;
1522 return true;
1525 bool AutoStableStringChars::initTwoByte(JSContext* cx, JSString* s) {
1526 Rooted<JSLinearString*> linearString(cx, s->ensureLinear(cx));
1527 if (!linearString) {
1528 return false;
1531 MOZ_ASSERT(state_ == Uninitialized);
1533 if (linearString->hasLatin1Chars()) {
1534 return copyAndInflateLatin1Chars(cx, linearString);
1537 // Copy movable chars since they may be moved by GC (see above).
1538 if (linearString->hasMovableChars()) {
1539 return copyTwoByteChars(cx, linearString);
1542 state_ = TwoByte;
1543 twoByteChars_ = linearString->rawTwoByteChars();
1545 PreventRootBaseDeduplication(linearString);
1547 s_ = linearString;
1548 return true;
1551 template <typename T>
1552 T* AutoStableStringChars::allocOwnChars(JSContext* cx, size_t count) {
1553 static_assert(
1554 InlineCapacity >=
1555 sizeof(JS::Latin1Char) * JSFatInlineString::MAX_LENGTH_LATIN1 &&
1556 InlineCapacity >=
1557 sizeof(char16_t) * JSFatInlineString::MAX_LENGTH_TWO_BYTE,
1558 "InlineCapacity too small to hold fat inline strings");
1560 static_assert((JSString::MAX_LENGTH &
1561 mozilla::tl::MulOverflowMask<sizeof(T)>::value) == 0,
1562 "Size calculation can overflow");
1563 MOZ_ASSERT(count <= JSString::MAX_LENGTH);
1564 size_t size = sizeof(T) * count;
1566 ownChars_.emplace(cx);
1567 if (!ownChars_->resize(size)) {
1568 ownChars_.reset();
1569 return nullptr;
1572 return reinterpret_cast<T*>(ownChars_->begin());
1575 bool AutoStableStringChars::copyAndInflateLatin1Chars(
1576 JSContext* cx, Handle<JSLinearString*> linearString) {
1577 size_t length = linearString->length();
1578 char16_t* chars = allocOwnChars<char16_t>(cx, length);
1579 if (!chars) {
1580 return false;
1583 // Copy |src[0..length]| to |dest[0..length]| when copying doesn't narrow and
1584 // therefore can't lose information.
1585 auto src = AsChars(Span(linearString->rawLatin1Chars(), length));
1586 auto dest = Span(chars, length);
1587 ConvertLatin1toUtf16(src, dest);
1589 state_ = TwoByte;
1590 twoByteChars_ = chars;
1591 s_ = linearString;
1592 return true;
1595 bool AutoStableStringChars::copyLatin1Chars(
1596 JSContext* cx, Handle<JSLinearString*> linearString) {
1597 size_t length = linearString->length();
1598 JS::Latin1Char* chars = allocOwnChars<JS::Latin1Char>(cx, length);
1599 if (!chars) {
1600 return false;
1603 PodCopy(chars, linearString->rawLatin1Chars(), length);
1605 state_ = Latin1;
1606 latin1Chars_ = chars;
1607 s_ = linearString;
1608 return true;
1611 bool AutoStableStringChars::copyTwoByteChars(
1612 JSContext* cx, Handle<JSLinearString*> linearString) {
1613 size_t length = linearString->length();
1614 char16_t* chars = allocOwnChars<char16_t>(cx, length);
1615 if (!chars) {
1616 return false;
1619 PodCopy(chars, linearString->rawTwoByteChars(), length);
1621 state_ = TwoByte;
1622 twoByteChars_ = chars;
1623 s_ = linearString;
1624 return true;
1627 template <>
1628 bool JS::SourceText<char16_t>::initMaybeBorrowed(
1629 JSContext* cx, JS::AutoStableStringChars& linearChars) {
1630 MOZ_ASSERT(linearChars.isTwoByte(),
1631 "AutoStableStringChars must be initialized with char16_t");
1633 const char16_t* chars = linearChars.twoByteChars();
1634 size_t length = linearChars.length();
1635 JS::SourceOwnership ownership = linearChars.maybeGiveOwnershipToCaller()
1636 ? JS::SourceOwnership::TakeOwnership
1637 : JS::SourceOwnership::Borrowed;
1638 return initImpl(cx, chars, length, ownership);
1641 template <>
1642 bool JS::SourceText<char16_t>::initMaybeBorrowed(
1643 JS::FrontendContext* fc, JS::AutoStableStringChars& linearChars) {
1644 MOZ_ASSERT(linearChars.isTwoByte(),
1645 "AutoStableStringChars must be initialized with char16_t");
1647 const char16_t* chars = linearChars.twoByteChars();
1648 size_t length = linearChars.length();
1649 JS::SourceOwnership ownership = linearChars.maybeGiveOwnershipToCaller()
1650 ? JS::SourceOwnership::TakeOwnership
1651 : JS::SourceOwnership::Borrowed;
1652 return initImpl(fc, chars, length, ownership);
1655 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
1656 void JSAtom::dump(js::GenericPrinter& out) {
1657 out.printf("JSAtom* (%p) = ", (void*)this);
1658 this->JSString::dump(out);
1661 void JSAtom::dump() {
1662 Fprinter out(stderr);
1663 dump(out);
1666 void JSExternalString::dumpOwnRepresentationFields(
1667 js::JSONPrinter& json) const {
1668 json.formatProperty("callbacks", "(JSExternalStringCallbacks*)0x%p",
1669 callbacks());
1671 #endif /* defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) */
1673 JSLinearString* js::NewDependentString(JSContext* cx, JSString* baseArg,
1674 size_t start, size_t length,
1675 gc::Heap heap) {
1676 if (length == 0) {
1677 return cx->emptyString();
1680 JSLinearString* base = baseArg->ensureLinear(cx);
1681 if (!base) {
1682 return nullptr;
1685 if (start == 0 && length == base->length()) {
1686 return base;
1689 bool useInline;
1690 if (base->hasTwoByteChars()) {
1691 AutoCheckCannotGC nogc;
1692 const char16_t* chars = base->twoByteChars(nogc) + start;
1693 if (JSLinearString* staticStr = cx->staticStrings().lookup(chars, length)) {
1694 return staticStr;
1696 useInline = JSInlineString::lengthFits<char16_t>(length);
1697 } else {
1698 AutoCheckCannotGC nogc;
1699 const Latin1Char* chars = base->latin1Chars(nogc) + start;
1700 if (JSLinearString* staticStr = cx->staticStrings().lookup(chars, length)) {
1701 return staticStr;
1703 useInline = JSInlineString::lengthFits<Latin1Char>(length);
1706 if (useInline) {
1707 Rooted<JSLinearString*> rootedBase(cx, base);
1709 // Do not create a dependent string that would fit into an inline string.
1710 // First, that could create a string dependent on an inline base string's
1711 // chars, which would be an awkward moving-GC hazard. Second, this makes
1712 // it more likely to have a very short string keep a very long string alive.
1713 if (base->hasTwoByteChars()) {
1714 return NewInlineString<char16_t>(cx, rootedBase, start, length, heap);
1716 return NewInlineString<Latin1Char>(cx, rootedBase, start, length, heap);
1719 return JSDependentString::new_(cx, base, start, length, heap);
1722 static constexpr bool CanStoreCharsAsLatin1(const JS::Latin1Char* s,
1723 size_t length) {
1724 return true;
1727 static inline bool CanStoreCharsAsLatin1(const char16_t* s, size_t length) {
1728 return IsUtf16Latin1(Span(s, length));
1732 * Copy |src[0..length]| to |dest[0..length]| when copying *does* narrow, but
1733 * the user guarantees every runtime |src[i]| value can be stored without change
1734 * of value in |dest[i]|.
1736 static inline void FillFromCompatible(unsigned char* dest, const char16_t* src,
1737 size_t length) {
1738 LossyConvertUtf16toLatin1(Span(src, length),
1739 AsWritableChars(Span(dest, length)));
1742 template <AllowGC allowGC>
1743 static MOZ_ALWAYS_INLINE JSInlineString* NewInlineStringDeflated(
1744 JSContext* cx, const mozilla::Range<const char16_t>& chars,
1745 gc::Heap heap = gc::Heap::Default) {
1746 size_t len = chars.length();
1747 Latin1Char* storage;
1748 JSInlineString* str = AllocateInlineString<allowGC>(cx, len, &storage, heap);
1749 if (!str) {
1750 return nullptr;
1753 MOZ_ASSERT(CanStoreCharsAsLatin1(chars.begin().get(), len));
1754 FillFromCompatible(storage, chars.begin().get(), len);
1755 return str;
1758 template <AllowGC allowGC>
1759 static JSLinearString* NewStringDeflated(JSContext* cx, const char16_t* s,
1760 size_t n, gc::Heap heap) {
1761 if (JSLinearString* str = TryEmptyOrStaticString(cx, s, n)) {
1762 return str;
1765 if (JSInlineString::lengthFits<Latin1Char>(n)) {
1766 return NewInlineStringDeflated<allowGC>(
1767 cx, mozilla::Range<const char16_t>(s, n), heap);
1770 JS::Rooted<JSString::OwnedChars<Latin1Char>> news(
1771 cx, AllocChars<Latin1Char>(cx, n, heap));
1772 if (!news) {
1773 if (!allowGC) {
1774 cx->recoverFromOutOfMemory();
1776 return nullptr;
1779 MOZ_ASSERT(CanStoreCharsAsLatin1(s, n));
1780 FillFromCompatible(news.data(), s, n);
1782 return JSLinearString::new_<allowGC, Latin1Char>(cx, &news, heap);
1785 static MOZ_ALWAYS_INLINE JSAtom* NewInlineAtomDeflated(JSContext* cx,
1786 const char16_t* chars,
1787 size_t length,
1788 js::HashNumber hash) {
1789 Latin1Char* storage;
1790 JSAtom* str = AllocateInlineAtom(cx, length, &storage, hash);
1791 if (!str) {
1792 return nullptr;
1795 MOZ_ASSERT(CanStoreCharsAsLatin1(chars, length));
1796 FillFromCompatible(storage, chars, length);
1797 return str;
1800 static JSAtom* NewAtomDeflatedValidLength(JSContext* cx, const char16_t* s,
1801 size_t n, js::HashNumber hash) {
1802 if (JSAtom::lengthFitsInline<Latin1Char>(n)) {
1803 return NewInlineAtomDeflated(cx, s, n, hash);
1806 auto news = cx->make_pod_arena_array<Latin1Char>(js::StringBufferArena, n);
1807 if (!news) {
1808 cx->recoverFromOutOfMemory();
1809 return nullptr;
1812 MOZ_ASSERT(CanStoreCharsAsLatin1(s, n));
1813 FillFromCompatible(news.get(), s, n);
1815 return JSAtom::newValidLength(cx, std::move(news), n, hash);
1818 template <AllowGC allowGC, typename CharT>
1819 JSLinearString* js::NewStringDontDeflate(
1820 JSContext* cx, UniquePtr<CharT[], JS::FreePolicy> chars, size_t length,
1821 gc::Heap heap) {
1822 if (JSLinearString* str = TryEmptyOrStaticString(cx, chars.get(), length)) {
1823 return str;
1826 if (JSInlineString::lengthFits<CharT>(length)) {
1827 // |chars.get()| is safe because 1) |NewInlineString| necessarily *copies*,
1828 // and 2) |chars| frees its contents only when this function returns.
1829 return NewInlineString<allowGC>(
1830 cx, mozilla::Range<const CharT>(chars.get(), length), heap);
1833 JS::Rooted<JSString::OwnedChars<CharT>> ownedChars(cx, std::move(chars),
1834 length, true);
1835 return JSLinearString::new_<allowGC, CharT>(cx, &ownedChars, heap);
1838 template JSLinearString* js::NewStringDontDeflate<CanGC>(
1839 JSContext* cx, UniqueTwoByteChars chars, size_t length, gc::Heap heap);
1841 template JSLinearString* js::NewStringDontDeflate<NoGC>(
1842 JSContext* cx, UniqueTwoByteChars chars, size_t length, gc::Heap heap);
1844 template JSLinearString* js::NewStringDontDeflate<CanGC>(
1845 JSContext* cx, UniqueLatin1Chars chars, size_t length, gc::Heap heap);
1847 template JSLinearString* js::NewStringDontDeflate<NoGC>(JSContext* cx,
1848 UniqueLatin1Chars chars,
1849 size_t length,
1850 gc::Heap heap);
1852 template <AllowGC allowGC, typename CharT>
1853 JSLinearString* js::NewString(JSContext* cx,
1854 UniquePtr<CharT[], JS::FreePolicy> chars,
1855 size_t length, gc::Heap heap) {
1856 if constexpr (std::is_same_v<CharT, char16_t>) {
1857 if (CanStoreCharsAsLatin1(chars.get(), length)) {
1858 // Deflating copies from |chars.get()| and lets |chars| be freed on
1859 // return.
1860 return NewStringDeflated<allowGC>(cx, chars.get(), length, heap);
1864 return NewStringDontDeflate<allowGC>(cx, std::move(chars), length, heap);
1867 template JSLinearString* js::NewString<CanGC>(JSContext* cx,
1868 UniqueTwoByteChars chars,
1869 size_t length, gc::Heap heap);
1871 template JSLinearString* js::NewString<NoGC>(JSContext* cx,
1872 UniqueTwoByteChars chars,
1873 size_t length, gc::Heap heap);
1875 template JSLinearString* js::NewString<CanGC>(JSContext* cx,
1876 UniqueLatin1Chars chars,
1877 size_t length, gc::Heap heap);
1879 template JSLinearString* js::NewString<NoGC>(JSContext* cx,
1880 UniqueLatin1Chars chars,
1881 size_t length, gc::Heap heap);
1883 namespace js {
1885 template <AllowGC allowGC, typename CharT>
1886 JSLinearString* NewStringCopyNDontDeflateNonStaticValidLength(JSContext* cx,
1887 const CharT* s,
1888 size_t n,
1889 gc::Heap heap) {
1890 if (JSInlineString::lengthFits<CharT>(n)) {
1891 return NewInlineString<allowGC>(cx, mozilla::Range<const CharT>(s, n),
1892 heap);
1895 Rooted<JSString::OwnedChars<CharT>> news(cx,
1896 ::AllocChars<CharT>(cx, n, heap));
1897 if (!news) {
1898 if (!allowGC) {
1899 cx->recoverFromOutOfMemory();
1901 return nullptr;
1904 PodCopy(news.data(), s, n);
1906 return JSLinearString::newValidLength<allowGC, CharT>(cx, &news, heap);
1909 template JSLinearString* NewStringCopyNDontDeflateNonStaticValidLength<CanGC>(
1910 JSContext* cx, const char16_t* s, size_t n, gc::Heap heap);
1912 template JSLinearString* NewStringCopyNDontDeflateNonStaticValidLength<CanGC>(
1913 JSContext* cx, const Latin1Char* s, size_t n, gc::Heap heap);
1915 template <AllowGC allowGC, typename CharT>
1916 JSLinearString* NewStringCopyNDontDeflate(JSContext* cx, const CharT* s,
1917 size_t n, gc::Heap heap) {
1918 if (JSLinearString* str = TryEmptyOrStaticString(cx, s, n)) {
1919 return str;
1922 if (MOZ_UNLIKELY(!JSLinearString::validateLength(cx, n))) {
1923 return nullptr;
1926 return NewStringCopyNDontDeflateNonStaticValidLength<allowGC>(cx, s, n, heap);
1929 template JSLinearString* NewStringCopyNDontDeflate<CanGC>(JSContext* cx,
1930 const char16_t* s,
1931 size_t n,
1932 gc::Heap heap);
1934 template JSLinearString* NewStringCopyNDontDeflate<NoGC>(JSContext* cx,
1935 const char16_t* s,
1936 size_t n,
1937 gc::Heap heap);
1939 template JSLinearString* NewStringCopyNDontDeflate<CanGC>(JSContext* cx,
1940 const Latin1Char* s,
1941 size_t n,
1942 gc::Heap heap);
1944 template JSLinearString* NewStringCopyNDontDeflate<NoGC>(JSContext* cx,
1945 const Latin1Char* s,
1946 size_t n,
1947 gc::Heap heap);
1949 JSLinearString* NewLatin1StringZ(JSContext* cx, UniqueChars chars,
1950 gc::Heap heap) {
1951 size_t length = strlen(chars.get());
1952 UniqueLatin1Chars latin1(reinterpret_cast<Latin1Char*>(chars.release()));
1953 return NewString<CanGC>(cx, std::move(latin1), length, heap);
1956 template <AllowGC allowGC, typename CharT>
1957 JSLinearString* NewStringCopyN(JSContext* cx, const CharT* s, size_t n,
1958 gc::Heap heap) {
1959 if constexpr (std::is_same_v<CharT, char16_t>) {
1960 if (CanStoreCharsAsLatin1(s, n)) {
1961 return NewStringDeflated<allowGC>(cx, s, n, heap);
1965 return NewStringCopyNDontDeflate<allowGC>(cx, s, n, heap);
1968 template JSLinearString* NewStringCopyN<CanGC>(JSContext* cx, const char16_t* s,
1969 size_t n, gc::Heap heap);
1971 template JSLinearString* NewStringCopyN<NoGC>(JSContext* cx, const char16_t* s,
1972 size_t n, gc::Heap heap);
1974 template JSLinearString* NewStringCopyN<CanGC>(JSContext* cx,
1975 const Latin1Char* s, size_t n,
1976 gc::Heap heap);
1978 template JSLinearString* NewStringCopyN<NoGC>(JSContext* cx,
1979 const Latin1Char* s, size_t n,
1980 gc::Heap heap);
1982 template <typename CharT>
1983 JSAtom* NewAtomCopyNDontDeflateValidLength(JSContext* cx, const CharT* s,
1984 size_t n, js::HashNumber hash) {
1985 if constexpr (std::is_same_v<CharT, char16_t>) {
1986 MOZ_ASSERT(!CanStoreCharsAsLatin1(s, n));
1989 if (JSAtom::lengthFitsInline<CharT>(n)) {
1990 return NewInlineAtom(cx, s, n, hash);
1993 auto news = cx->make_pod_arena_array<CharT>(js::StringBufferArena, n);
1994 if (!news) {
1995 cx->recoverFromOutOfMemory();
1996 return nullptr;
1999 PodCopy(news.get(), s, n);
2001 return JSAtom::newValidLength(cx, std::move(news), n, hash);
2004 template JSAtom* NewAtomCopyNDontDeflateValidLength(JSContext* cx,
2005 const char16_t* s, size_t n,
2006 js::HashNumber hash);
2008 template JSAtom* NewAtomCopyNDontDeflateValidLength(JSContext* cx,
2009 const Latin1Char* s,
2010 size_t n,
2011 js::HashNumber hash);
2013 template <typename CharT>
2014 JSAtom* NewAtomCopyNMaybeDeflateValidLength(JSContext* cx, const CharT* s,
2015 size_t n, js::HashNumber hash) {
2016 if constexpr (std::is_same_v<CharT, char16_t>) {
2017 if (CanStoreCharsAsLatin1(s, n)) {
2018 return NewAtomDeflatedValidLength(cx, s, n, hash);
2022 return NewAtomCopyNDontDeflateValidLength(cx, s, n, hash);
2025 template JSAtom* NewAtomCopyNMaybeDeflateValidLength(JSContext* cx,
2026 const char16_t* s,
2027 size_t n,
2028 js::HashNumber hash);
2030 template JSAtom* NewAtomCopyNMaybeDeflateValidLength(JSContext* cx,
2031 const Latin1Char* s,
2032 size_t n,
2033 js::HashNumber hash);
2035 JSLinearString* NewStringCopyUTF8N(JSContext* cx, const JS::UTF8Chars& utf8,
2036 JS::SmallestEncoding encoding,
2037 gc::Heap heap) {
2038 if (encoding == JS::SmallestEncoding::ASCII) {
2039 return NewStringCopyN<js::CanGC>(cx, utf8.begin().get(), utf8.length(),
2040 heap);
2043 size_t length;
2044 if (encoding == JS::SmallestEncoding::Latin1) {
2045 UniqueLatin1Chars latin1(
2046 UTF8CharsToNewLatin1CharsZ(cx, utf8, &length, js::StringBufferArena)
2047 .get());
2048 if (!latin1) {
2049 return nullptr;
2052 return NewString<js::CanGC>(cx, std::move(latin1), length, heap);
2055 MOZ_ASSERT(encoding == JS::SmallestEncoding::UTF16);
2057 UniqueTwoByteChars utf16(
2058 UTF8CharsToNewTwoByteCharsZ(cx, utf8, &length, js::StringBufferArena)
2059 .get());
2060 if (!utf16) {
2061 return nullptr;
2064 return NewString<js::CanGC>(cx, std::move(utf16), length, heap);
2067 JSLinearString* NewStringCopyUTF8N(JSContext* cx, const JS::UTF8Chars& utf8,
2068 gc::Heap heap) {
2069 JS::SmallestEncoding encoding = JS::FindSmallestEncoding(utf8);
2070 return NewStringCopyUTF8N(cx, utf8, encoding, heap);
2073 template <typename CharT>
2074 MOZ_ALWAYS_INLINE JSExternalString* ExternalStringCache::lookupExternalImpl(
2075 const CharT* chars, size_t len) const {
2076 AutoCheckCannotGC nogc;
2078 for (size_t i = 0; i < NumEntries; i++) {
2079 JSExternalString* str = externalEntries_[i];
2080 if (!str || str->length() != len) {
2081 continue;
2084 if constexpr (std::is_same_v<CharT, JS::Latin1Char>) {
2085 if (!str->hasLatin1Chars()) {
2086 continue;
2088 } else {
2089 if (!str->hasTwoByteChars()) {
2090 continue;
2094 const CharT* strChars = str->nonInlineChars<CharT>(nogc);
2095 if (chars == strChars) {
2096 // Note that we don't need an incremental barrier here or below.
2097 // The cache is purged on GC so any string we get from the cache
2098 // must have been allocated after the GC started.
2099 return str;
2102 // Compare the chars. Don't do this for long strings as it will be
2103 // faster to allocate a new external string.
2104 static const size_t MaxLengthForCharComparison = 100;
2105 if (len <= MaxLengthForCharComparison && EqualChars(chars, strChars, len)) {
2106 return str;
2110 return nullptr;
2113 MOZ_ALWAYS_INLINE JSExternalString* ExternalStringCache::lookupExternal(
2114 const JS::Latin1Char* chars, size_t len) const {
2115 return lookupExternalImpl(chars, len);
2117 MOZ_ALWAYS_INLINE JSExternalString* ExternalStringCache::lookupExternal(
2118 const char16_t* chars, size_t len) const {
2119 return lookupExternalImpl(chars, len);
2122 MOZ_ALWAYS_INLINE void ExternalStringCache::putExternal(JSExternalString* str) {
2123 for (size_t i = NumEntries - 1; i > 0; i--) {
2124 externalEntries_[i] = externalEntries_[i - 1];
2126 externalEntries_[0] = str;
2129 template <typename CharT>
2130 MOZ_ALWAYS_INLINE JSInlineString* ExternalStringCache::lookupInlineImpl(
2131 const CharT* chars, size_t len) const {
2132 MOZ_ASSERT(CanStoreCharsAsLatin1(chars, len));
2133 MOZ_ASSERT(JSThinInlineString::lengthFits<Latin1Char>(len));
2135 AutoCheckCannotGC nogc;
2137 for (size_t i = 0; i < NumEntries; i++) {
2138 JSInlineString* str = inlineEntries_[i];
2139 if (!str || str->length() != len) {
2140 continue;
2143 const JS::Latin1Char* strChars = str->latin1Chars(nogc);
2144 if (EqualChars(chars, strChars, len)) {
2145 return str;
2149 return nullptr;
2152 MOZ_ALWAYS_INLINE JSInlineString* ExternalStringCache::lookupInline(
2153 const JS::Latin1Char* chars, size_t len) const {
2154 return lookupInlineImpl(chars, len);
2156 MOZ_ALWAYS_INLINE JSInlineString* ExternalStringCache::lookupInline(
2157 const char16_t* chars, size_t len) const {
2158 return lookupInlineImpl(chars, len);
2161 MOZ_ALWAYS_INLINE void ExternalStringCache::putInline(JSInlineString* str) {
2162 MOZ_ASSERT(str->hasLatin1Chars());
2164 for (size_t i = NumEntries - 1; i > 0; i--) {
2165 inlineEntries_[i] = inlineEntries_[i - 1];
2167 inlineEntries_[0] = str;
2170 } /* namespace js */
2172 template <AllowGC allowGC>
2173 static MOZ_ALWAYS_INLINE JSInlineString* NewInlineStringMaybeDeflated(
2174 JSContext* cx, const mozilla::Range<const JS::Latin1Char>& chars,
2175 gc::Heap heap = gc::Heap::Default) {
2176 return NewInlineString<allowGC>(cx, chars, heap);
2179 template <AllowGC allowGC>
2180 static MOZ_ALWAYS_INLINE JSInlineString* NewInlineStringMaybeDeflated(
2181 JSContext* cx, const mozilla::Range<const char16_t>& chars,
2182 gc::Heap heap = gc::Heap::Default) {
2183 return NewInlineStringDeflated<allowGC>(cx, chars, heap);
2186 namespace js {
2188 template <typename CharT>
2189 JSString* NewMaybeExternalString(JSContext* cx, const CharT* s, size_t n,
2190 const JSExternalStringCallbacks* callbacks,
2191 bool* allocatedExternal, gc::Heap heap) {
2192 if (JSString* str = TryEmptyOrStaticString(cx, s, n)) {
2193 *allocatedExternal = false;
2194 return str;
2197 ExternalStringCache& cache = cx->zone()->externalStringCache();
2199 if (JSThinInlineString::lengthFits<Latin1Char>(n) &&
2200 CanStoreCharsAsLatin1(s, n)) {
2201 *allocatedExternal = false;
2202 if (JSInlineString* str = cache.lookupInline(s, n)) {
2203 return str;
2205 JSInlineString* str = NewInlineStringMaybeDeflated<AllowGC::CanGC>(
2206 cx, mozilla::Range<const CharT>(s, n), heap);
2207 if (!str) {
2208 return nullptr;
2210 cache.putInline(str);
2211 return str;
2214 if (JSExternalString* str = cache.lookupExternal(s, n)) {
2215 *allocatedExternal = false;
2216 return str;
2219 JSExternalString* str = JSExternalString::new_(cx, s, n, callbacks);
2220 if (!str) {
2221 return nullptr;
2224 *allocatedExternal = true;
2225 cache.putExternal(str);
2226 return str;
2229 template JSString* NewMaybeExternalString(
2230 JSContext* cx, const JS::Latin1Char* s, size_t n,
2231 const JSExternalStringCallbacks* callbacks, bool* allocatedExternal,
2232 gc::Heap heap);
2234 template JSString* NewMaybeExternalString(
2235 JSContext* cx, const char16_t* s, size_t n,
2236 const JSExternalStringCallbacks* callbacks, bool* allocatedExternal,
2237 gc::Heap heap);
2239 } /* namespace js */
2241 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
2242 void JSExtensibleString::dumpOwnRepresentationFields(
2243 js::JSONPrinter& json) const {
2244 json.property("capacity", capacity());
2247 void JSInlineString::dumpOwnRepresentationFields(js::JSONPrinter& json) const {}
2249 void JSLinearString::dumpOwnRepresentationFields(js::JSONPrinter& json) const {
2250 if (!isInline()) {
2251 // Include whether the chars are in the nursery even for tenured
2252 // strings, which should always be false. For investigating bugs, it's
2253 // better to not assume that.
2254 js::Nursery& nursery = runtimeFromMainThread()->gc.nursery();
2255 bool inNursery = nursery.isInside(nonInlineCharsRaw());
2256 json.boolProperty("charsInNursery", inNursery);
2259 #endif
2261 struct RepresentativeExternalString : public JSExternalStringCallbacks {
2262 void finalize(JS::Latin1Char* chars) const override {
2263 // Constant chars, nothing to do.
2265 void finalize(char16_t* chars) const override {
2266 // Constant chars, nothing to do.
2268 size_t sizeOfBuffer(const JS::Latin1Char* chars,
2269 mozilla::MallocSizeOf mallocSizeOf) const override {
2270 // This string's buffer is not heap-allocated, so its malloc size is 0.
2271 return 0;
2273 size_t sizeOfBuffer(const char16_t* chars,
2274 mozilla::MallocSizeOf mallocSizeOf) const override {
2275 // This string's buffer is not heap-allocated, so its malloc size is 0.
2276 return 0;
2280 static const RepresentativeExternalString RepresentativeExternalStringCallbacks;
2282 template <typename CheckString, typename CharT>
2283 static bool FillWithRepresentatives(JSContext* cx, Handle<ArrayObject*> array,
2284 uint32_t* index, const CharT* chars,
2285 size_t len, size_t inlineStringMaxLength,
2286 size_t inlineAtomMaxLength,
2287 const CheckString& check, gc::Heap heap) {
2288 auto AppendString = [&check](JSContext* cx, Handle<ArrayObject*> array,
2289 uint32_t* index, HandleString s) {
2290 MOZ_ASSERT(check(s));
2291 (void)check; // silence clang -Wunused-lambda-capture in opt builds
2292 RootedValue val(cx, StringValue(s));
2293 return JS_DefineElement(cx, array, (*index)++, val, 0);
2296 MOZ_ASSERT(len > inlineStringMaxLength);
2297 MOZ_ASSERT(len > inlineAtomMaxLength);
2299 // Normal atom.
2300 RootedString atom1(cx, AtomizeChars(cx, chars, len));
2301 if (!atom1 || !AppendString(cx, array, index, atom1)) {
2302 return false;
2304 MOZ_ASSERT(atom1->isAtom());
2306 // Thin inline atom.
2307 RootedString atom2(cx, AtomizeChars(cx, chars, 2));
2308 if (!atom2 || !AppendString(cx, array, index, atom2)) {
2309 return false;
2311 MOZ_ASSERT(atom2->isAtom());
2312 MOZ_ASSERT(atom2->isInline());
2314 // Fat inline atom.
2315 RootedString atom3(cx, AtomizeChars(cx, chars, inlineAtomMaxLength));
2316 if (!atom3 || !AppendString(cx, array, index, atom3)) {
2317 return false;
2319 MOZ_ASSERT(atom3->isAtom());
2320 MOZ_ASSERT_IF(inlineStringMaxLength < inlineAtomMaxLength,
2321 atom3->isFatInline());
2323 // Normal linear string; maybe nursery.
2324 RootedString linear1(cx, NewStringCopyN<CanGC>(cx, chars, len, heap));
2325 if (!linear1 || !AppendString(cx, array, index, linear1)) {
2326 return false;
2328 MOZ_ASSERT(linear1->isLinear());
2330 // Inline string; maybe nursery.
2331 RootedString linear2(cx, NewStringCopyN<CanGC>(cx, chars, 3, heap));
2332 if (!linear2 || !AppendString(cx, array, index, linear2)) {
2333 return false;
2335 MOZ_ASSERT(linear2->isLinear());
2336 MOZ_ASSERT(linear2->isInline());
2338 // Fat inline string; maybe nursery.
2339 RootedString linear3(
2340 cx, NewStringCopyN<CanGC>(cx, chars, inlineStringMaxLength, heap));
2341 if (!linear3 || !AppendString(cx, array, index, linear3)) {
2342 return false;
2344 MOZ_ASSERT(linear3->isLinear());
2345 MOZ_ASSERT(linear3->isFatInline());
2347 // Rope; maybe nursery.
2348 RootedString rope(cx, ConcatStrings<CanGC>(cx, atom1, atom3, heap));
2349 if (!rope || !AppendString(cx, array, index, rope)) {
2350 return false;
2352 MOZ_ASSERT(rope->isRope());
2354 // Dependent; maybe nursery.
2355 RootedString dep(cx, NewDependentString(cx, atom1, 0, len - 2, heap));
2356 if (!dep || !AppendString(cx, array, index, dep)) {
2357 return false;
2359 MOZ_ASSERT(dep->isDependent());
2361 // Extensible; maybe nursery.
2362 RootedString temp1(cx, NewStringCopyN<CanGC>(cx, chars, len, heap));
2363 if (!temp1) {
2364 return false;
2366 RootedString extensible(cx, ConcatStrings<CanGC>(cx, temp1, atom3, heap));
2367 if (!extensible || !extensible->ensureLinear(cx)) {
2368 return false;
2370 if (!AppendString(cx, array, index, extensible)) {
2371 return false;
2373 MOZ_ASSERT(extensible->isExtensible());
2375 RootedString external1(cx), external2(cx);
2376 if constexpr (std::is_same_v<CharT, char16_t>) {
2377 external1 = JS_NewExternalUCString(cx, (const char16_t*)chars, len,
2378 &RepresentativeExternalStringCallbacks);
2379 if (!external1 || !AppendString(cx, array, index, external1)) {
2380 return false;
2382 MOZ_ASSERT(external1->isExternal());
2384 external2 = JS_NewExternalUCString(cx, (const char16_t*)chars, 2,
2385 &RepresentativeExternalStringCallbacks);
2386 if (!external2 || !AppendString(cx, array, index, external2)) {
2387 return false;
2389 MOZ_ASSERT(external2->isExternal());
2390 } else {
2391 external1 =
2392 JS_NewExternalStringLatin1(cx, (const Latin1Char*)chars, len,
2393 &RepresentativeExternalStringCallbacks);
2394 if (!external1 || !AppendString(cx, array, index, external1)) {
2395 return false;
2397 MOZ_ASSERT(external1->isExternal());
2399 external2 =
2400 JS_NewExternalStringLatin1(cx, (const Latin1Char*)chars, 2,
2401 &RepresentativeExternalStringCallbacks);
2402 if (!external2 || !AppendString(cx, array, index, external2)) {
2403 return false;
2405 MOZ_ASSERT(external2->isExternal());
2408 // Assert the strings still have the types we expect after creating the
2409 // other strings.
2411 MOZ_ASSERT(atom1->isAtom());
2412 MOZ_ASSERT(atom2->isAtom());
2413 MOZ_ASSERT(atom3->isAtom());
2414 MOZ_ASSERT(atom2->isInline());
2415 MOZ_ASSERT_IF(inlineStringMaxLength < inlineAtomMaxLength,
2416 atom3->isFatInline());
2418 MOZ_ASSERT(linear1->isLinear());
2419 MOZ_ASSERT(linear2->isLinear());
2420 MOZ_ASSERT(linear3->isLinear());
2421 MOZ_ASSERT(linear2->isInline());
2422 MOZ_ASSERT(linear3->isFatInline());
2424 MOZ_ASSERT(rope->isRope());
2425 MOZ_ASSERT(dep->isDependent());
2426 MOZ_ASSERT(extensible->isExtensible());
2427 MOZ_ASSERT(external1->isExternal());
2428 MOZ_ASSERT(external2->isExternal());
2429 return true;
2432 /* static */
2433 bool JSString::fillWithRepresentatives(JSContext* cx,
2434 Handle<ArrayObject*> array) {
2435 uint32_t index = 0;
2437 auto CheckTwoByte = [](JSString* str) { return str->hasTwoByteChars(); };
2438 auto CheckLatin1 = [](JSString* str) { return str->hasLatin1Chars(); };
2440 static const char16_t twoByteChars[] =
2441 u"\u1234abc\0def\u5678ghijklmasdfa\0xyz0123456789";
2442 static const Latin1Char latin1Chars[] = "abc\0defghijklmasdfa\0xyz0123456789";
2444 // Create strings using both the default heap and forcing the tenured heap. If
2445 // nursery strings are available, this is a best effort at creating them in
2446 // the default heap case. Since nursery strings may be disabled or a GC may
2447 // occur during this process, there may be duplicate representatives in the
2448 // final list.
2450 if (!FillWithRepresentatives(cx, array, &index, twoByteChars,
2451 std::size(twoByteChars) - 1,
2452 JSFatInlineString::MAX_LENGTH_TWO_BYTE,
2453 js::FatInlineAtom::MAX_LENGTH_TWO_BYTE,
2454 CheckTwoByte, gc::Heap::Tenured)) {
2455 return false;
2457 if (!FillWithRepresentatives(cx, array, &index, latin1Chars,
2458 std::size(latin1Chars) - 1,
2459 JSFatInlineString::MAX_LENGTH_LATIN1,
2460 js::FatInlineAtom::MAX_LENGTH_LATIN1,
2461 CheckLatin1, gc::Heap::Tenured)) {
2462 return false;
2464 if (!FillWithRepresentatives(cx, array, &index, twoByteChars,
2465 std::size(twoByteChars) - 1,
2466 JSFatInlineString::MAX_LENGTH_TWO_BYTE,
2467 js::FatInlineAtom::MAX_LENGTH_TWO_BYTE,
2468 CheckTwoByte, gc::Heap::Default)) {
2469 return false;
2471 if (!FillWithRepresentatives(cx, array, &index, latin1Chars,
2472 std::size(latin1Chars) - 1,
2473 JSFatInlineString::MAX_LENGTH_LATIN1,
2474 js::FatInlineAtom::MAX_LENGTH_LATIN1,
2475 CheckLatin1, gc::Heap::Default)) {
2476 return false;
2479 #ifdef DEBUG
2480 // * Normal atom
2481 // * Inline atom.
2482 // * Fat inline atom.
2483 // * Normal linear string
2484 // * Inline string
2485 // * Fat inline string
2486 // * Rope; maybe nursery.
2487 // * Dependent
2488 // * Extensible
2489 // * External with original len
2490 // * External with len==2
2491 static constexpr uint32_t StringTypes = 11;
2492 // * Latin1
2493 // * TwoByte
2494 static constexpr uint32_t CharTypes = 2;
2495 // * Tenured
2496 // * Default
2497 static constexpr uint32_t HeapType = 2;
2498 MOZ_ASSERT(index == StringTypes * CharTypes * HeapType);
2499 #endif
2501 return true;
2504 /*** Conversions ************************************************************/
2506 UniqueChars js::EncodeLatin1(JSContext* cx, JSString* str) {
2507 JSLinearString* linear = str->ensureLinear(cx);
2508 if (!linear) {
2509 return nullptr;
2512 JS::AutoCheckCannotGC nogc;
2513 if (linear->hasTwoByteChars()) {
2514 JS::Latin1CharsZ chars =
2515 JS::LossyTwoByteCharsToNewLatin1CharsZ(cx, linear->twoByteRange(nogc));
2516 return UniqueChars(chars.c_str());
2519 size_t len = str->length();
2520 Latin1Char* buf = cx->pod_malloc<Latin1Char>(len + 1);
2521 if (!buf) {
2522 return nullptr;
2525 PodCopy(buf, linear->latin1Chars(nogc), len);
2526 buf[len] = '\0';
2528 return UniqueChars(reinterpret_cast<char*>(buf));
2531 UniqueChars js::EncodeAscii(JSContext* cx, JSString* str) {
2532 JSLinearString* linear = str->ensureLinear(cx);
2533 if (!linear) {
2534 return nullptr;
2537 MOZ_ASSERT(StringIsAscii(linear));
2538 return EncodeLatin1(cx, linear);
2541 UniqueChars js::IdToPrintableUTF8(JSContext* cx, HandleId id,
2542 IdToPrintableBehavior behavior) {
2543 // ToString(<symbol>) throws a TypeError, therefore require that callers
2544 // request source representation when |id| is a property key.
2545 MOZ_ASSERT_IF(behavior == IdToPrintableBehavior::IdIsIdentifier,
2546 id.isAtom() && IsIdentifierNameOrPrivateName(id.toAtom()));
2548 RootedValue v(cx, IdToValue(id));
2549 JSString* str;
2550 if (behavior == IdToPrintableBehavior::IdIsPropertyKey) {
2551 str = ValueToSource(cx, v);
2552 } else {
2553 str = ToString<CanGC>(cx, v);
2555 if (!str) {
2556 return nullptr;
2558 return StringToNewUTF8CharsZ(cx, *str);
2561 template <AllowGC allowGC>
2562 JSString* js::ToStringSlow(
2563 JSContext* cx, typename MaybeRooted<Value, allowGC>::HandleType arg) {
2564 /* As with ToObjectSlow, callers must verify that |arg| isn't a string. */
2565 MOZ_ASSERT(!arg.isString());
2567 Value v = arg;
2568 if (!v.isPrimitive()) {
2569 if (!allowGC) {
2570 return nullptr;
2572 RootedValue v2(cx, v);
2573 if (!ToPrimitive(cx, JSTYPE_STRING, &v2)) {
2574 return nullptr;
2576 v = v2;
2579 JSString* str;
2580 if (v.isString()) {
2581 str = v.toString();
2582 } else if (v.isInt32()) {
2583 str = Int32ToString<allowGC>(cx, v.toInt32());
2584 } else if (v.isDouble()) {
2585 str = NumberToString<allowGC>(cx, v.toDouble());
2586 } else if (v.isBoolean()) {
2587 str = BooleanToString(cx, v.toBoolean());
2588 } else if (v.isNull()) {
2589 str = cx->names().null;
2590 } else if (v.isSymbol()) {
2591 if (allowGC) {
2592 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
2593 JSMSG_SYMBOL_TO_STRING);
2595 return nullptr;
2596 } else if (v.isBigInt()) {
2597 if (!allowGC) {
2598 return nullptr;
2600 RootedBigInt i(cx, v.toBigInt());
2601 str = BigInt::toString<CanGC>(cx, i, 10);
2603 #ifdef ENABLE_RECORD_TUPLE
2604 else if (v.isExtendedPrimitive()) {
2605 if (!allowGC) {
2606 return nullptr;
2608 if (IsTuple(v)) {
2609 Rooted<TupleType*> tup(cx, &TupleType::thisTupleValue(v));
2610 return TupleToSource(cx, tup);
2612 Rooted<RecordType*> rec(cx);
2613 MOZ_ALWAYS_TRUE(RecordObject::maybeUnbox(&v.getObjectPayload(), &rec));
2614 return RecordToSource(cx, rec);
2616 #endif
2617 else {
2618 MOZ_ASSERT(v.isUndefined());
2619 str = cx->names().undefined;
2621 return str;
2624 template JSString* js::ToStringSlow<CanGC>(JSContext* cx, HandleValue arg);
2626 template JSString* js::ToStringSlow<NoGC>(JSContext* cx, const Value& arg);
2628 JS_PUBLIC_API JSString* js::ToStringSlow(JSContext* cx, HandleValue v) {
2629 return ToStringSlow<CanGC>(cx, v);