1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #ifndef vm_StringType_h
8 #define vm_StringType_h
10 #include "mozilla/Maybe.h"
11 #include "mozilla/MemoryReporting.h"
12 #include "mozilla/Range.h"
13 #include "mozilla/Span.h"
14 #include "mozilla/TextUtils.h"
16 #include <string_view> // std::basic_string_view
18 #include "jstypes.h" // js::Bit
21 #include "gc/MaybeRooted.h"
22 #include "gc/Nursery.h"
23 #include "gc/RelocationOverlay.h"
24 #include "gc/StoreBuffer.h"
25 #include "js/CharacterEncoding.h"
26 #include "js/RootingAPI.h"
27 #include "js/shadow/String.h" // JS::shadow::String
28 #include "js/String.h" // JS::MaxStringLength
29 #include "js/UniquePtr.h"
30 #include "util/Text.h"
32 class JSDependentString
;
33 class JSExtensibleString
;
34 class JSExternalString
;
39 class JS_PUBLIC_API AutoStableStringChars
;
50 class ParserAtomsTable
;
51 class TaggedParserAtomIndex
;
52 class WellKnownParserAtoms
;
53 struct CompilationAtomCache
;
54 } // namespace frontend
60 /* The buffer length required to contain any unsigned 32-bit integer. */
61 static const size_t UINT32_CHAR_BUFFER_LENGTH
= sizeof("4294967295") - 1;
63 // Maximum array index. This value is defined in the spec (ES2021 draft, 6.1.7):
65 // An array index is an integer index whose numeric value i is in the range
66 // +0𝔽 ≤ i < 𝔽(2^32 - 1).
67 const uint32_t MAX_ARRAY_INDEX
= 4294967294u; // 2^32-2 (= UINT32_MAX-1)
69 // Returns true if the characters of `s` store an unsigned 32-bit integer value
70 // less than or equal to MAX_ARRAY_INDEX, initializing `*indexp` to that value
71 // if so. Leading '0' isn't allowed except 0 itself.
72 template <typename CharT
>
73 bool CheckStringIsIndex(const CharT
* s
, size_t length
, uint32_t* indexp
);
79 * [SMDOC] JavaScript Strings
81 * Conceptually, a JS string is just an array of chars and a length. This array
82 * of chars may or may not be null-terminated and, if it is, the null character
83 * is not included in the length.
85 * To improve performance of common operations, the following optimizations are
86 * made which affect the engine's representation of strings:
88 * - The plain vanilla representation is a "linear" string which consists of a
89 * string header in the GC heap and a malloc'd char array.
91 * - To avoid copying a substring of an existing "base" string , a "dependent"
92 * string (JSDependentString) can be created which points into the base
93 * string's char array.
95 * - To avoid O(n^2) char buffer copying, a "rope" node (JSRope) can be created
96 * to represent a delayed string concatenation. Concatenation (called
97 * flattening) is performed if and when a linear char array is requested. In
98 * general, ropes form a binary dag whose internal nodes are JSRope string
99 * headers with no associated char array and whose leaf nodes are linear
102 * - To avoid copying the leftmost string when flattening, we may produce an
103 * "extensible" string, which tracks not only its actual length but also its
104 * buffer's overall size. If such an "extensible" string appears as the
105 * leftmost string in a subsequent flatten, and its buffer has enough unused
106 * space, we can simply flatten the rest of the ropes into its buffer,
107 * leaving its text in place. We then transfer ownership of its buffer to the
108 * flattened rope, and mutate the donor extensible string into a dependent
109 * string referencing its original buffer.
111 * (The term "extensible" does not imply that we ever 'realloc' the buffer.
112 * Extensible strings may have dependent strings pointing into them, and the
113 * JSAPI hands out pointers to linear strings' buffers, so resizing with
114 * 'realloc' is generally not possible.)
116 * - To avoid allocating small char arrays, short strings can be stored inline
117 * in the string header (JSInlineString). These come in two flavours:
118 * JSThinInlineString, which is the same size as JSString; and
119 * JSFatInlineString, which has a larger header and so can fit more chars.
121 * - To avoid comparing O(n) string equality comparison, strings can be
122 * canonicalized to "atoms" (JSAtom) such that there is a single atom with a
123 * given (length,chars).
125 * - To avoid copying all strings created through the JSAPI, an "external"
126 * string (JSExternalString) can be created whose chars are managed by the
129 * - To avoid using two bytes per character for every string, string
130 * characters are stored as Latin1 instead of TwoByte if all characters are
131 * representable in Latin1.
133 * - To avoid slow conversions from strings to integer indexes, we cache 16 bit
134 * unsigned indexes on strings representing such numbers.
136 * Although all strings share the same basic memory layout, we can conceptually
137 * arrange them into a hierarchy of operations/invariants and represent this
138 * hierarchy in C++ with classes:
140 * C++ type operations+fields / invariants+properties
141 * ========================== =========================================
142 * JSString (abstract) get(Latin1|TwoByte)CharsZ, get(Latin1|TwoByte)Chars, length / -
144 * | JSRope leftChild, rightChild / -
146 * JSLinearString latin1Chars, twoByteChars / -
148 * +-- JSDependentString base / -
150 * +-- JSExternalString - / char array memory managed by embedding
152 * +-- JSExtensibleString - / tracks total buffer capacity (including current text)
154 * +-- JSInlineString (abstract) - / chars stored in header
156 * | +-- JSThinInlineString - / header is normal
158 * | +-- JSFatInlineString - / header is fat
160 * JSAtom (abstract) - / string equality === pointer equality
162 * | +-- js::NormalAtom JSLinearString + atom hash code / -
164 * | | +-- js::ThinInlineAtom
165 * | | possibly larger JSThinInlineString + atom hash code / -
167 * | +-- js::FatInlineAtom JSFatInlineString w/atom hash code / -
169 * js::PropertyName - / chars don't contain an index (uint32_t)
171 * Classes marked with (abstract) above are not literally C++ Abstract Base
172 * Classes (since there are no virtual functions, pure or not, in this
173 * hierarchy), but have the same meaning: there are no strings with this type as
174 * its most-derived type.
176 * Atoms can additionally be permanent, i.e. unable to be collected, and can
177 * be combined with other string types to create additional most-derived types
178 * that satisfy the invariants of more than one of the abovementioned
179 * most-derived types. Furthermore, each atom stores a hash number (based on its
180 * chars). This hash number is used as key in the atoms table and when the atom
181 * is used as key in a JS Map/Set.
183 * Derived string types can be queried from ancestor types via isX() and
184 * retrieved with asX() debug-only-checked casts.
186 * The ensureX() operations mutate 'this' in place to effectively make the type
187 * be at least X (e.g., ensureLinear will change a JSRope to be a JSLinearString).
191 class JSString
: public js::gc::CellWithLengthAndFlags
{
193 using Base
= js::gc::CellWithLengthAndFlags
;
195 static const size_t NUM_INLINE_CHARS_LATIN1
=
196 2 * sizeof(void*) / sizeof(JS::Latin1Char
);
197 static const size_t NUM_INLINE_CHARS_TWO_BYTE
=
198 2 * sizeof(void*) / sizeof(char16_t
);
201 // String length and flags are stored in the cell header.
203 size_t length() const { return headerLengthField(); }
205 uint32_t flags() const { return headerFlagsField(); }
207 // Class for temporarily holding character data that will be used for JSString
208 // contents. The data may be allocated in the nursery, the malloc heap, or in
209 // externally owned memory (perhaps on the stack). The class instance must be
210 // passed to the JSString constructor as a MutableHandle, so that if a GC
211 // occurs between the construction of the content and the construction of the
212 // JSString Cell to hold it, the contents can be transparently moved to the
213 // malloc heap before the nursery is reset.
214 template <typename CharT
>
216 mozilla::Span
<CharT
> chars_
;
221 // needsFree: the chars pointer should be passed to js_free() if OwnedChars
222 // dies while still possessing ownership.
224 // isMalloced: the chars pointer does not point into the nursery.
226 // These are not quite the same, since you might have non-nursery characters
227 // that are owned by something else. needsFree implies isMalloced.
228 OwnedChars(CharT
* chars
, size_t length
, bool isMalloced
, bool needsFree
);
229 OwnedChars(js::UniquePtr
<CharT
[], JS::FreePolicy
>&& chars
, size_t length
,
231 OwnedChars(OwnedChars
&&);
232 OwnedChars(const OwnedChars
&) = delete;
233 ~OwnedChars() { reset(); }
235 explicit operator bool() const { return !chars_
.empty(); }
236 mozilla::Span
<CharT
> span() const { return chars_
; }
237 CharT
* data() const { return chars_
.data(); }
238 size_t length() const { return chars_
.Length(); }
239 size_t size() const { return length() * sizeof(CharT
); }
240 bool isMalloced() const { return isMalloced_
; }
242 // Return the data and release ownership to the caller.
243 inline CharT
* release();
244 // Discard any owned data.
246 // Move any nursery data into the malloc heap.
247 inline void ensureNonNursery();
249 // If we GC with a live OwnedChars, copy the data out of the nursery to a
250 // safely malloced location.
251 void trace(JSTracer
* trc
) { ensureNonNursery(); }
255 /* Fields only apply to string types commented on the right. */
257 // Note: 32-bit length and flags fields are inherited from
258 // CellWithLengthAndFlags.
262 /* JS(Fat)InlineString */
263 JS::Latin1Char inlineStorageLatin1
[NUM_INLINE_CHARS_LATIN1
];
264 char16_t inlineStorageTwoByte
[NUM_INLINE_CHARS_TWO_BYTE
];
268 const JS::Latin1Char
* nonInlineCharsLatin1
; /* JSLinearString, except
269 JS(Fat)InlineString */
270 const char16_t
* nonInlineCharsTwoByte
; /* JSLinearString, except
271 JS(Fat)InlineString */
272 JSString
* left
; /* JSRope */
273 JSRope
* parent
; /* Used in flattening */
276 JSLinearString
* base
; /* JSDependentString */
277 JSString
* right
; /* JSRope */
278 size_t capacity
; /* JSLinearString (extensible) */
279 const JSExternalStringCallbacks
*
280 externalCallbacks
; /* JSExternalString */
287 /* Flags exposed only for jits */
292 * The first word of a JSString stores flags, index, and (on some
293 * platforms) the length. The flags store both the string's type and its
294 * character encoding.
296 * If LATIN1_CHARS_BIT is set, the string's characters are stored as Latin1
297 * instead of TwoByte. This flag can also be set for ropes, if both the
298 * left and right nodes are Latin1. Flattening will result in a Latin1
299 * string in this case.
301 * The other flags store the string's type. Instead of using a dense index
302 * to represent the most-derived type, string types are encoded to allow
303 * single-op tests for hot queries (isRope, isDependent, isAtom) which, in
304 * view of subtyping, would require slower (isX() || isY() || isZ()).
306 * The string type encoding can be summarized as follows. The "instance
307 * encoding" entry for a type specifies the flag bits used to create a
308 * string instance of that type. Abstract types have no instances and thus
309 * have no such entry. The "subtype predicate" entry for a type specifies
310 * the predicate used to query whether a JSString instance is subtype
311 * (reflexively) of that type.
313 * String Instance Subtype
314 * type encoding predicate
315 * -----------------------------------------
316 * Rope 000000 000 xxxx0x xxx
317 * Linear 000010 000 xxxx1x xxx
318 * Dependent 000110 000 xxx1xx xxx
319 * External 100010 000 100010 xxx
320 * Extensible 010010 000 010010 xxx
321 * Inline 001010 000 xx1xxx xxx
322 * FatInline 011010 000 x11xxx xxx
323 * JSAtom - xxxxx1 xxx
324 * NormalAtom 000011 000 xx0xx1 xxx
325 * PermanentAtom 100011 000 1xxxx1 xxx
326 * ThinInlineAtom 001011 000 x01xx1 xxx
327 * FatInlineAtom 011011 000 x11xx1 xxx
329 * |||||| ||\- [0] reserved (FORWARD_BIT)
330 * |||||| |\-- [1] reserved
331 * |||||| \--- [2] reserved
332 * |||||\----- [3] IsAtom
333 * ||||\------ [4] IsLinear
334 * |||\------- [5] IsDependent
335 * ||\-------- [6] IsInline
336 * |\--------- [7] FatInlineAtom/Extensible
337 * \---------- [8] External/Permanent
339 * Bits 0..2 are reserved for use by the GC (see
340 * gc::CellFlagBitsReservedForGC). In particular, bit 0 is currently used for
341 * FORWARD_BIT for forwarded nursery cells. The other 2 bits are currently
344 * Note that the first 4 flag bits 3..6 (from right to left in the previous
345 * table) have the following meaning and can be used for some hot queries:
347 * Bit 3: IsAtom (Atom, PermanentAtom)
350 * Bit 6: IsInline (Inline, FatInline, ThinInlineAtom, FatInlineAtom)
352 * If INDEX_VALUE_BIT is set, bits 16 and up will also hold an integer index.
355 // The low bits of flag word are reserved by GC.
356 static_assert(js::gc::CellFlagBitsReservedForGC
<= 3,
357 "JSString::flags must reserve enough bits for Cell");
359 static const uint32_t ATOM_BIT
= js::Bit(3);
360 static const uint32_t LINEAR_BIT
= js::Bit(4);
361 static const uint32_t DEPENDENT_BIT
= js::Bit(5);
362 static const uint32_t INLINE_CHARS_BIT
= js::Bit(6);
364 static const uint32_t EXTENSIBLE_FLAGS
= LINEAR_BIT
| js::Bit(7);
365 static const uint32_t EXTERNAL_FLAGS
= LINEAR_BIT
| js::Bit(8);
367 static const uint32_t FAT_INLINE_MASK
= INLINE_CHARS_BIT
| js::Bit(7);
369 /* Initial flags for various types of strings. */
370 static const uint32_t INIT_THIN_INLINE_FLAGS
= LINEAR_BIT
| INLINE_CHARS_BIT
;
371 static const uint32_t INIT_FAT_INLINE_FLAGS
= LINEAR_BIT
| FAT_INLINE_MASK
;
372 static const uint32_t INIT_ROPE_FLAGS
= 0;
373 static const uint32_t INIT_LINEAR_FLAGS
= LINEAR_BIT
;
374 static const uint32_t INIT_DEPENDENT_FLAGS
= LINEAR_BIT
| DEPENDENT_BIT
;
376 static const uint32_t TYPE_FLAGS_MASK
= js::BitMask(9) - js::BitMask(3);
377 static_assert((TYPE_FLAGS_MASK
& js::gc::HeaderWord::RESERVED_MASK
) == 0,
378 "GC reserved bits must not be used for Strings");
380 static const uint32_t LATIN1_CHARS_BIT
= js::Bit(9);
382 // Whether this atom's characters store an uint32 index value less than or
383 // equal to MAX_ARRAY_INDEX. Not used for non-atomized strings.
384 // See JSLinearString::isIndex.
385 static const uint32_t ATOM_IS_INDEX_BIT
= js::Bit(10);
387 static const uint32_t INDEX_VALUE_BIT
= js::Bit(11);
388 static const uint32_t INDEX_VALUE_SHIFT
= 16;
390 // NON_DEDUP_BIT is used in string deduplication during tenuring.
391 static const uint32_t NON_DEDUP_BIT
= js::Bit(12);
393 // If IN_STRING_TO_ATOM_CACHE is set, this string had an entry in the
394 // StringToAtomCache at some point. Note that GC can purge the cache without
395 // clearing this bit.
396 static const uint32_t IN_STRING_TO_ATOM_CACHE
= js::Bit(13);
398 // Flags used during rope flattening that indicate what action to perform when
399 // returning to the rope's parent rope.
400 static const uint32_t FLATTEN_VISIT_RIGHT
= js::Bit(14);
401 static const uint32_t FLATTEN_FINISH_NODE
= js::Bit(15);
402 static const uint32_t FLATTEN_MASK
=
403 FLATTEN_VISIT_RIGHT
| FLATTEN_FINISH_NODE
;
405 static const uint32_t PINNED_ATOM_BIT
= js::Bit(15);
406 static const uint32_t PERMANENT_ATOM_MASK
=
407 ATOM_BIT
| PINNED_ATOM_BIT
| js::Bit(8);
409 static const uint32_t MAX_LENGTH
= JS::MaxStringLength
;
411 static const JS::Latin1Char MAX_LATIN1_CHAR
= 0xff;
414 * Helper function to validate that a string of a given length is
415 * representable by a JSString. An allocation overflow is reported if false
418 static inline bool validateLength(JSContext
* maybecx
, size_t length
);
420 template <js::AllowGC allowGC
>
421 static inline bool validateLengthInternal(JSContext
* maybecx
, size_t length
);
423 static constexpr size_t offsetOfFlags() { return offsetOfHeaderFlags(); }
424 static constexpr size_t offsetOfLength() { return offsetOfHeaderLength(); }
426 bool sameLengthAndFlags(const JSString
& other
) const {
427 return length() == other
.length() && flags() == other
.flags();
430 static void staticAsserts() {
431 static_assert(JSString::MAX_LENGTH
< UINT32_MAX
,
432 "Length must fit in 32 bits");
434 sizeof(JSString
) == (offsetof(JSString
, d
.inlineStorageLatin1
) +
435 NUM_INLINE_CHARS_LATIN1
* sizeof(char)),
436 "Inline Latin1 chars must fit in a JSString");
438 sizeof(JSString
) == (offsetof(JSString
, d
.inlineStorageTwoByte
) +
439 NUM_INLINE_CHARS_TWO_BYTE
* sizeof(char16_t
)),
440 "Inline char16_t chars must fit in a JSString");
442 /* Ensure js::shadow::String has the same layout. */
443 using JS::shadow::String
;
445 JSString::offsetOfRawHeaderFlagsField() == offsetof(String
, flags_
),
446 "shadow::String flags offset must match JSString");
447 #if JS_BITS_PER_WORD == 32
448 static_assert(JSString::offsetOfLength() == offsetof(String
, length_
),
449 "shadow::String length offset must match JSString");
451 static_assert(offsetof(JSString
, d
.s
.u2
.nonInlineCharsLatin1
) ==
452 offsetof(String
, nonInlineCharsLatin1
),
453 "shadow::String nonInlineChars offset must match JSString");
454 static_assert(offsetof(JSString
, d
.s
.u2
.nonInlineCharsTwoByte
) ==
455 offsetof(String
, nonInlineCharsTwoByte
),
456 "shadow::String nonInlineChars offset must match JSString");
458 offsetof(JSString
, d
.s
.u3
.externalCallbacks
) ==
459 offsetof(String
, externalCallbacks
),
460 "shadow::String externalCallbacks offset must match JSString");
461 static_assert(offsetof(JSString
, d
.inlineStorageLatin1
) ==
462 offsetof(String
, inlineStorageLatin1
),
463 "shadow::String inlineStorage offset must match JSString");
464 static_assert(offsetof(JSString
, d
.inlineStorageTwoByte
) ==
465 offsetof(String
, inlineStorageTwoByte
),
466 "shadow::String inlineStorage offset must match JSString");
467 static_assert(ATOM_BIT
== String::ATOM_BIT
,
468 "shadow::String::ATOM_BIT must match JSString::ATOM_BIT");
469 static_assert(LINEAR_BIT
== String::LINEAR_BIT
,
470 "shadow::String::LINEAR_BIT must match JSString::LINEAR_BIT");
471 static_assert(INLINE_CHARS_BIT
== String::INLINE_CHARS_BIT
,
472 "shadow::String::INLINE_CHARS_BIT must match "
473 "JSString::INLINE_CHARS_BIT");
474 static_assert(LATIN1_CHARS_BIT
== String::LATIN1_CHARS_BIT
,
475 "shadow::String::LATIN1_CHARS_BIT must match "
476 "JSString::LATIN1_CHARS_BIT");
478 TYPE_FLAGS_MASK
== String::TYPE_FLAGS_MASK
,
479 "shadow::String::TYPE_FLAGS_MASK must match JSString::TYPE_FLAGS_MASK");
481 EXTERNAL_FLAGS
== String::EXTERNAL_FLAGS
,
482 "shadow::String::EXTERNAL_FLAGS must match JSString::EXTERNAL_FLAGS");
485 /* Avoid silly compile errors in JSRope::flatten */
488 friend class js::gc::RelocationOverlay
;
491 template <typename CharT
>
492 MOZ_ALWAYS_INLINE
void setNonInlineChars(const CharT
* chars
);
494 template <typename CharT
>
495 static MOZ_ALWAYS_INLINE
void checkStringCharsArena(const CharT
* chars
) {
497 js::AssertJSStringBufferInCorrectArena(chars
);
501 // Get correct non-inline chars enum arm for given type
502 template <typename CharT
>
503 MOZ_ALWAYS_INLINE
const CharT
* nonInlineCharsRaw() const;
507 bool empty() const { return length() == 0; }
509 inline bool getChar(JSContext
* cx
, size_t index
, char16_t
* code
);
510 inline bool getCodePoint(JSContext
* cx
, size_t index
, char32_t
* codePoint
);
512 /* Strings have either Latin1 or TwoByte chars. */
513 bool hasLatin1Chars() const { return flags() & LATIN1_CHARS_BIT
; }
514 bool hasTwoByteChars() const { return !(flags() & LATIN1_CHARS_BIT
); }
516 /* Strings might contain cached indexes. */
517 bool hasIndexValue() const { return flags() & INDEX_VALUE_BIT
; }
518 uint32_t getIndexValue() const {
519 MOZ_ASSERT(hasIndexValue());
520 MOZ_ASSERT(isLinear());
521 return flags() >> INDEX_VALUE_SHIFT
;
524 inline size_t allocSize() const;
526 /* Fallible conversions to more-derived string types. */
528 inline JSLinearString
* ensureLinear(JSContext
* cx
);
530 /* Type query and debug-checked casts */
533 bool isRope() const { return !(flags() & LINEAR_BIT
); }
536 JSRope
& asRope() const {
537 MOZ_ASSERT(isRope());
538 return *(JSRope
*)this;
542 bool isLinear() const { return flags() & LINEAR_BIT
; }
545 JSLinearString
& asLinear() const {
546 MOZ_ASSERT(JSString::isLinear());
547 return *(JSLinearString
*)this;
551 bool isDependent() const { return flags() & DEPENDENT_BIT
; }
554 JSDependentString
& asDependent() const {
555 MOZ_ASSERT(isDependent());
556 return *(JSDependentString
*)this;
560 bool isExtensible() const {
561 return (flags() & TYPE_FLAGS_MASK
) == EXTENSIBLE_FLAGS
;
565 JSExtensibleString
& asExtensible() const {
566 MOZ_ASSERT(isExtensible());
567 return *(JSExtensibleString
*)this;
571 bool isInline() const { return flags() & INLINE_CHARS_BIT
; }
574 JSInlineString
& asInline() const {
575 MOZ_ASSERT(isInline());
576 return *(JSInlineString
*)this;
580 bool isFatInline() const {
581 return (flags() & FAT_INLINE_MASK
) == FAT_INLINE_MASK
;
584 /* For hot code, prefer other type queries. */
585 bool isExternal() const {
586 return (flags() & TYPE_FLAGS_MASK
) == EXTERNAL_FLAGS
;
590 JSExternalString
& asExternal() const {
591 MOZ_ASSERT(isExternal());
592 return *(JSExternalString
*)this;
596 bool isAtom() const { return flags() & ATOM_BIT
; }
599 bool isPermanentAtom() const {
600 return (flags() & PERMANENT_ATOM_MASK
) == PERMANENT_ATOM_MASK
;
604 JSAtom
& asAtom() const {
605 MOZ_ASSERT(isAtom());
606 return *(JSAtom
*)this;
610 void setNonDeduplicatable() { setFlagBit(NON_DEDUP_BIT
); }
613 void clearNonDeduplicatable() { clearFlagBit(NON_DEDUP_BIT
); }
616 bool isDeduplicatable() { return !(flags() & NON_DEDUP_BIT
); }
618 void setInStringToAtomCache() {
619 MOZ_ASSERT(!isAtom());
620 setFlagBit(IN_STRING_TO_ATOM_CACHE
);
622 bool inStringToAtomCache() const { return flags() & IN_STRING_TO_ATOM_CACHE
; }
624 // Fills |array| with various strings that represent the different string
625 // kinds and character encodings.
626 static bool fillWithRepresentatives(JSContext
* cx
,
627 JS::Handle
<js::ArrayObject
*> array
);
629 /* Only called by the GC for dependent strings. */
631 inline bool hasBase() const { return isDependent(); }
633 inline JSLinearString
* base() const;
635 // The base may be forwarded and becomes a relocation overlay.
636 // The return value can be a relocation overlay when the base is forwarded,
637 // or the return value can be the actual base when it is not forwarded.
638 inline JSLinearString
* nurseryBaseOrRelocOverlay() const;
640 inline bool canOwnDependentChars() const;
642 inline void setBase(JSLinearString
* newBase
);
644 void traceBase(JSTracer
* trc
);
646 /* Only called by the GC for strings with the AllocKind::STRING kind. */
648 inline void finalize(JS::GCContext
* gcx
);
650 /* Gets the number of bytes that the chars take on the heap. */
652 size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf
);
654 bool hasOutOfLineChars() const {
655 return isLinear() && !isInline() && !isDependent() && !isExternal();
658 inline bool ownsMallocedChars() const;
660 /* Encode as many scalar values of the string as UTF-8 as can fit
661 * into the caller-provided buffer replacing unpaired surrogates
662 * with the REPLACEMENT CHARACTER.
664 * Returns the number of code units read and the number of code units
667 * The semantics of this method match the semantics of
668 * TextEncoder.encodeInto().
670 * This function doesn't modify the representation -- rope, linear,
671 * flat, atom, etc. -- of this string. If this string is a rope,
672 * it also doesn't modify the representation of left or right halves
673 * of this string, or of those halves, and so on.
675 * Returns mozilla::Nothing on OOM.
677 mozilla::Maybe
<std::tuple
<size_t, size_t>> encodeUTF8Partial(
678 const JS::AutoRequireNoGC
& nogc
, mozilla::Span
<char> buffer
) const;
681 // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler
682 // to call the method below.
683 friend class js::jit::MacroAssembler
;
684 static size_t offsetOfNonInlineChars() {
686 offsetof(JSString
, d
.s
.u2
.nonInlineCharsTwoByte
) ==
687 offsetof(JSString
, d
.s
.u2
.nonInlineCharsLatin1
),
688 "nonInlineCharsTwoByte and nonInlineCharsLatin1 must have same offset");
689 return offsetof(JSString
, d
.s
.u2
.nonInlineCharsTwoByte
);
693 static const JS::TraceKind TraceKind
= JS::TraceKind::String
;
695 JS::Zone
* zone() const {
697 // Allow permanent atoms to be accessed across zones and runtimes.
698 if (isPermanentAtom()) {
699 return zoneFromAnyThread();
701 return asTenured().zone();
703 return nurseryZone();
706 void setLengthAndFlags(uint32_t len
, uint32_t flags
) {
707 setHeaderLengthAndFlags(len
, flags
);
709 void setFlagBit(uint32_t flag
) { setHeaderFlagBit(flag
); }
710 void clearFlagBit(uint32_t flag
) { clearHeaderFlagBit(flag
); }
712 void fixupAfterMovingGC() {}
714 js::gc::AllocKind
getAllocKind() const {
715 using js::gc::AllocKind
;
719 kind
= AllocKind::FAT_INLINE_ATOM
;
721 kind
= AllocKind::ATOM
;
723 } else if (isFatInline()) {
724 kind
= AllocKind::FAT_INLINE_STRING
;
725 } else if (isExternal()) {
726 kind
= AllocKind::EXTERNAL_STRING
;
728 kind
= AllocKind::STRING
;
730 MOZ_ASSERT_IF(isTenured(), kind
== asTenured().getAllocKind());
734 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
735 void dump(); // Debugger-friendly stderr dump.
736 void dump(js::GenericPrinter
& out
);
737 void dumpNoNewline(js::GenericPrinter
& out
);
738 void dumpCharsNoNewline(js::GenericPrinter
& out
);
739 void dumpRepresentation(js::GenericPrinter
& out
, int indent
) const;
740 void dumpRepresentationHeader(js::GenericPrinter
& out
,
741 const char* subclass
) const;
742 void dumpCharsNoQuote(js::GenericPrinter
& out
);
744 template <typename CharT
>
745 static void dumpChars(const CharT
* s
, size_t len
, js::GenericPrinter
& out
);
747 template <typename CharT
>
748 static void dumpCharsNoQuote(const CharT
* s
, size_t len
,
749 js::GenericPrinter
& out
);
751 bool equals(const char* s
);
754 void traceChildren(JSTracer
* trc
);
756 // Override base class implementation to tell GC about permanent atoms.
757 bool isPermanentAndMayBeShared() const { return isPermanentAtom(); }
759 static void addCellAddressToStoreBuffer(js::gc::StoreBuffer
* buffer
,
760 js::gc::Cell
** cellp
) {
761 buffer
->putCell(reinterpret_cast<JSString
**>(cellp
));
764 static void removeCellAddressFromStoreBuffer(js::gc::StoreBuffer
* buffer
,
765 js::gc::Cell
** cellp
) {
766 buffer
->unputCell(reinterpret_cast<JSString
**>(cellp
));
770 JSString(const JSString
& other
) = delete;
771 void operator=(const JSString
& other
) = delete;
774 JSString() = default;
779 template <typename Wrapper
, typename CharT
>
780 class WrappedPtrOperations
<JSString::OwnedChars
<CharT
>, Wrapper
> {
781 const JSString::OwnedChars
<CharT
>& get() const {
782 return static_cast<const Wrapper
*>(this)->get();
786 explicit operator bool() const { return !!get(); }
787 mozilla::Span
<CharT
> span() const { return get().span(); }
788 CharT
* data() const { return get().data(); }
789 size_t length() const { return get().length(); }
790 size_t size() const { return get().size(); }
791 bool isMalloced() const { return get().isMalloced(); }
794 template <typename Wrapper
, typename CharT
>
795 class MutableWrappedPtrOperations
<JSString::OwnedChars
<CharT
>, Wrapper
>
796 : public WrappedPtrOperations
<JSString::OwnedChars
<CharT
>, Wrapper
> {
797 JSString::OwnedChars
<CharT
>& get() {
798 return static_cast<Wrapper
*>(this)->get();
802 CharT
* release() { return get().release(); }
803 void reset() { get().reset(); }
804 void ensureNonNursery() { get().ensureNonNursery(); }
809 class JSRope
: public JSString
{
810 friend class js::gc::CellAllocator
;
812 template <typename CharT
>
813 js::UniquePtr
<CharT
[], JS::FreePolicy
> copyCharsInternal(
814 JSContext
* cx
, arena_id_t destArenaId
) const;
816 enum UsingBarrier
: bool { NoBarrier
= false, WithIncrementalBarrier
= true };
818 friend class JSString
;
819 JSLinearString
* flatten(JSContext
* maybecx
);
821 JSLinearString
* flattenInternal();
822 template <UsingBarrier usingBarrier
>
823 JSLinearString
* flattenInternal();
825 template <UsingBarrier usingBarrier
, typename CharT
>
826 static JSLinearString
* flattenInternal(JSRope
* root
);
828 template <UsingBarrier usingBarrier
>
829 static void ropeBarrierDuringFlattening(JSRope
* rope
);
831 JSRope(JSString
* left
, JSString
* right
, size_t length
);
834 template <js::AllowGC allowGC
>
835 static inline JSRope
* new_(
837 typename
js::MaybeRooted
<JSString
*, allowGC
>::HandleType left
,
838 typename
js::MaybeRooted
<JSString
*, allowGC
>::HandleType right
,
839 size_t length
, js::gc::Heap
= js::gc::Heap::Default
);
841 js::UniquePtr
<JS::Latin1Char
[], JS::FreePolicy
> copyLatin1Chars(
842 JSContext
* maybecx
, arena_id_t destArenaId
) const;
843 JS::UniqueTwoByteChars
copyTwoByteChars(JSContext
* maybecx
,
844 arena_id_t destArenaId
) const;
846 template <typename CharT
>
847 js::UniquePtr
<CharT
[], JS::FreePolicy
> copyChars(
848 JSContext
* maybecx
, arena_id_t destArenaId
) const;
850 // Hash function specific for ropes that avoids allocating a temporary
851 // string. There are still allocations internally so it's technically
854 // Returns the same value as if this were a linear string being hashed.
855 [[nodiscard
]] bool hash(uint32_t* outhHash
) const;
857 // The process of flattening a rope temporarily overwrites the left pointer of
858 // interior nodes in the rope DAG with the parent pointer.
859 bool isBeingFlattened() const { return flags() & FLATTEN_MASK
; }
861 JSString
* leftChild() const {
862 MOZ_ASSERT(isRope());
863 MOZ_ASSERT(!isBeingFlattened()); // Flattening overwrites this field.
867 JSString
* rightChild() const {
868 MOZ_ASSERT(isRope());
872 void traceChildren(JSTracer
* trc
);
874 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
875 void dumpRepresentation(js::GenericPrinter
& out
, int indent
) const;
879 // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler
880 // to call the methods below.
881 friend class js::jit::MacroAssembler
;
883 static size_t offsetOfLeft() { return offsetof(JSRope
, d
.s
.u2
.left
); }
884 static size_t offsetOfRight() { return offsetof(JSRope
, d
.s
.u3
.right
); }
887 static_assert(sizeof(JSRope
) == sizeof(JSString
),
888 "string subclasses must be binary-compatible with JSString");
891 * There are optimized entry points for some string allocation functions.
893 * The meaning of suffix:
894 * * "MaybeDeflate": for char16_t variant, characters can fit Latin1
895 * * "DontDeflate": for char16_t variant, characters don't fit Latin1
896 * * "NonStatic": characters don't match StaticStrings
897 * * "ValidLength": length fits JSString::MAX_LENGTH
900 class JSLinearString
: public JSString
{
901 friend class JSString
;
902 friend class JS::AutoStableStringChars
;
903 friend class js::gc::TenuringTracer
;
904 friend class js::gc::CellAllocator
;
906 /* Vacuous and therefore unimplemented. */
907 JSLinearString
* ensureLinear(JSContext
* cx
) = delete;
908 bool isLinear() const = delete;
909 JSLinearString
& asLinear() const = delete;
911 JSLinearString(const char16_t
* chars
, size_t length
);
912 JSLinearString(const JS::Latin1Char
* chars
, size_t length
);
913 template <typename CharT
>
914 explicit inline JSLinearString(JS::MutableHandle
<OwnedChars
<CharT
>> chars
);
917 // Used to construct subclasses that do a full initialization themselves.
918 JSLinearString() = default;
920 /* Returns void pointer to latin1/twoByte chars, for finalizers. */
922 void* nonInlineCharsRaw() const {
923 MOZ_ASSERT(!isInline());
925 offsetof(JSLinearString
, d
.s
.u2
.nonInlineCharsTwoByte
) ==
926 offsetof(JSLinearString
, d
.s
.u2
.nonInlineCharsLatin1
),
927 "nonInlineCharsTwoByte and nonInlineCharsLatin1 must have same offset");
928 return (void*)d
.s
.u2
.nonInlineCharsTwoByte
;
931 MOZ_ALWAYS_INLINE
const JS::Latin1Char
* rawLatin1Chars() const;
932 MOZ_ALWAYS_INLINE
const char16_t
* rawTwoByteChars() const;
935 template <js::AllowGC allowGC
, typename CharT
>
936 static inline JSLinearString
* new_(JSContext
* cx
,
937 JS::MutableHandle
<OwnedChars
<CharT
>> chars
,
940 template <js::AllowGC allowGC
, typename CharT
>
941 static inline JSLinearString
* newValidLength(
942 JSContext
* cx
, JS::MutableHandle
<OwnedChars
<CharT
>> chars
,
945 // Convert a plain linear string to an extensible string. For testing. The
946 // caller must ensure that it is a plain or extensible string already, and
947 // that `capacity` is adequate.
948 JSExtensibleString
& makeExtensible(size_t capacity
);
950 template <typename CharT
>
951 MOZ_ALWAYS_INLINE
const CharT
* nonInlineChars(
952 const JS::AutoRequireNoGC
& nogc
) const;
955 const JS::Latin1Char
* nonInlineLatin1Chars(
956 const JS::AutoRequireNoGC
& nogc
) const {
957 MOZ_ASSERT(!isInline());
958 MOZ_ASSERT(hasLatin1Chars());
959 return d
.s
.u2
.nonInlineCharsLatin1
;
963 const char16_t
* nonInlineTwoByteChars(const JS::AutoRequireNoGC
& nogc
) const {
964 MOZ_ASSERT(!isInline());
965 MOZ_ASSERT(hasTwoByteChars());
966 return d
.s
.u2
.nonInlineCharsTwoByte
;
969 template <typename CharT
>
970 MOZ_ALWAYS_INLINE
const CharT
* chars(const JS::AutoRequireNoGC
& nogc
) const;
973 const JS::Latin1Char
* latin1Chars(const JS::AutoRequireNoGC
& nogc
) const {
974 return rawLatin1Chars();
978 const char16_t
* twoByteChars(const JS::AutoRequireNoGC
& nogc
) const {
979 return rawTwoByteChars();
982 mozilla::Range
<const JS::Latin1Char
> latin1Range(
983 const JS::AutoRequireNoGC
& nogc
) const {
984 MOZ_ASSERT(JSString::isLinear());
985 return mozilla::Range
<const JS::Latin1Char
>(latin1Chars(nogc
), length());
988 mozilla::Range
<const char16_t
> twoByteRange(
989 const JS::AutoRequireNoGC
& nogc
) const {
990 MOZ_ASSERT(JSString::isLinear());
991 return mozilla::Range
<const char16_t
>(twoByteChars(nogc
), length());
995 char16_t
latin1OrTwoByteChar(size_t index
) const {
996 MOZ_ASSERT(JSString::isLinear());
997 MOZ_ASSERT(index
< length());
998 JS::AutoCheckCannotGC nogc
;
999 return hasLatin1Chars() ? latin1Chars(nogc
)[index
]
1000 : twoByteChars(nogc
)[index
];
1003 bool isIndexSlow(uint32_t* indexp
) const {
1004 MOZ_ASSERT(JSString::isLinear());
1005 size_t len
= length();
1006 if (len
== 0 || len
> js::UINT32_CHAR_BUFFER_LENGTH
) {
1009 JS::AutoCheckCannotGC nogc
;
1010 if (hasLatin1Chars()) {
1011 const JS::Latin1Char
* s
= latin1Chars(nogc
);
1012 return mozilla::IsAsciiDigit(*s
) &&
1013 js::CheckStringIsIndex(s
, len
, indexp
);
1015 const char16_t
* s
= twoByteChars(nogc
);
1016 return mozilla::IsAsciiDigit(*s
) && js::CheckStringIsIndex(s
, len
, indexp
);
1019 // Returns true if this string's characters store an unsigned 32-bit integer
1020 // value less than or equal to MAX_ARRAY_INDEX, initializing *indexp to that
1021 // value if so. Leading '0' isn't allowed except 0 itself.
1022 // (Thus if calling isIndex returns true, js::IndexToString(cx, *indexp) will
1023 // be a string equal to this string.)
1024 inline bool isIndex(uint32_t* indexp
) const;
1026 // Return whether the characters of this string can be moved by minor or
1028 inline bool hasMovableChars() const;
1030 void maybeInitializeIndexValue(uint32_t index
, bool allowAtom
= false) {
1031 MOZ_ASSERT(JSString::isLinear());
1032 MOZ_ASSERT_IF(hasIndexValue(), getIndexValue() == index
);
1033 MOZ_ASSERT_IF(!allowAtom
, !isAtom());
1035 if (hasIndexValue() || index
> UINT16_MAX
) {
1039 mozilla::DebugOnly
<uint32_t> containedIndex
;
1040 MOZ_ASSERT(isIndexSlow(&containedIndex
));
1041 MOZ_ASSERT(index
== containedIndex
);
1043 setFlagBit((index
<< INDEX_VALUE_SHIFT
) | INDEX_VALUE_BIT
);
1044 MOZ_ASSERT(getIndexValue() == index
);
1048 * Returns a property name represented by this string, or null on failure.
1049 * You must verify that this is not an index per isIndex before calling
1052 inline js::PropertyName
* toPropertyName(JSContext
* cx
);
1054 // Make sure chars are not in the nursery, mallocing and copying if necessary.
1055 // Should only be called during minor GC on a string that has been promoted
1056 // to the tenured heap and may still point to nursery-allocated chars.
1057 template <typename CharT
>
1058 inline size_t maybeMallocCharsOnPromotion(js::Nursery
* nursery
);
1060 inline void finalize(JS::GCContext
* gcx
);
1061 inline size_t allocSize() const;
1063 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
1064 void dumpRepresentationChars(js::GenericPrinter
& out
, int indent
) const;
1065 void dumpRepresentation(js::GenericPrinter
& out
, int indent
) const;
1068 // Make a partially-initialized string safe for finalization.
1069 inline void disownCharsBecauseError();
1072 static_assert(sizeof(JSLinearString
) == sizeof(JSString
),
1073 "string subclasses must be binary-compatible with JSString");
1075 class JSDependentString
: public JSLinearString
{
1076 friend class JSString
;
1077 friend class js::gc::CellAllocator
;
1079 JSDependentString(JSLinearString
* base
, size_t start
, size_t length
);
1081 // For JIT string allocation.
1082 JSDependentString() = default;
1084 /* Vacuous and therefore unimplemented. */
1085 bool isDependent() const = delete;
1086 JSDependentString
& asDependent() const = delete;
1088 /* The offset of this string's chars in base->chars(). */
1089 MOZ_ALWAYS_INLINE
size_t baseOffset() const {
1090 MOZ_ASSERT(JSString::isDependent());
1091 JS::AutoCheckCannotGC nogc
;
1093 if (hasTwoByteChars()) {
1094 offset
= twoByteChars(nogc
) - base()->twoByteChars(nogc
);
1096 offset
= latin1Chars(nogc
) - base()->latin1Chars(nogc
);
1098 MOZ_ASSERT(offset
< base()->length());
1103 // This will always return a dependent string, and will assert if the chars
1104 // could fit into an inline string.
1105 static inline JSLinearString
* new_(JSContext
* cx
, JSLinearString
* base
,
1106 size_t start
, size_t length
,
1109 template <typename T
>
1110 void relocateNonInlineChars(T chars
, size_t offset
) {
1111 setNonInlineChars(chars
+ offset
);
1114 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
1115 void dumpRepresentation(js::GenericPrinter
& out
, int indent
) const;
1119 // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler
1120 // to call the method below.
1121 friend class js::jit::MacroAssembler
;
1123 inline static size_t offsetOfBase() {
1124 return offsetof(JSDependentString
, d
.s
.u3
.base
);
1128 static_assert(sizeof(JSDependentString
) == sizeof(JSString
),
1129 "string subclasses must be binary-compatible with JSString");
1131 class JSExtensibleString
: public JSLinearString
{
1132 /* Vacuous and therefore unimplemented. */
1133 bool isExtensible() const = delete;
1134 JSExtensibleString
& asExtensible() const = delete;
1138 size_t capacity() const {
1139 MOZ_ASSERT(JSString::isExtensible());
1140 return d
.s
.u3
.capacity
;
1143 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
1144 void dumpRepresentation(js::GenericPrinter
& out
, int indent
) const;
1148 static_assert(sizeof(JSExtensibleString
) == sizeof(JSString
),
1149 "string subclasses must be binary-compatible with JSString");
1151 class JSInlineString
: public JSLinearString
{
1154 const JS::Latin1Char
* latin1Chars(const JS::AutoRequireNoGC
& nogc
) const {
1155 MOZ_ASSERT(JSString::isInline());
1156 MOZ_ASSERT(hasLatin1Chars());
1157 return d
.inlineStorageLatin1
;
1161 const char16_t
* twoByteChars(const JS::AutoRequireNoGC
& nogc
) const {
1162 MOZ_ASSERT(JSString::isInline());
1163 MOZ_ASSERT(hasTwoByteChars());
1164 return d
.inlineStorageTwoByte
;
1167 template <typename CharT
>
1168 static bool lengthFits(size_t length
);
1170 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
1171 void dumpRepresentation(js::GenericPrinter
& out
, int indent
) const;
1175 // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler
1176 // to call the method below.
1177 friend class js::jit::MacroAssembler
;
1178 static size_t offsetOfInlineStorage() {
1179 return offsetof(JSInlineString
, d
.inlineStorageTwoByte
);
1183 static_assert(sizeof(JSInlineString
) == sizeof(JSString
),
1184 "string subclasses must be binary-compatible with JSString");
1187 * On 32-bit platforms, JSThinInlineString can store 8 Latin1 characters or 4
1188 * TwoByte characters inline. On 64-bit platforms, these numbers are 16 and 8,
1191 class JSThinInlineString
: public JSInlineString
{
1192 friend class js::gc::CellAllocator
;
1194 // The constructors return a mutable pointer to the data, because the first
1195 // thing any creator will do is copy in the string value. This also
1196 // conveniently allows doing overload resolution on CharT.
1197 explicit JSThinInlineString(size_t length
, JS::Latin1Char
** chars
);
1198 explicit JSThinInlineString(size_t length
, char16_t
** chars
);
1200 // For JIT string allocation.
1201 JSThinInlineString() = default;
1204 static constexpr size_t InlineBytes
= NUM_INLINE_CHARS_LATIN1
;
1206 static const size_t MAX_LENGTH_LATIN1
= NUM_INLINE_CHARS_LATIN1
;
1207 static const size_t MAX_LENGTH_TWO_BYTE
= NUM_INLINE_CHARS_TWO_BYTE
;
1209 template <js::AllowGC allowGC
>
1210 static inline JSThinInlineString
* new_(JSContext
* cx
, js::gc::Heap heap
);
1212 template <typename CharT
>
1213 static bool lengthFits(size_t length
);
1216 static_assert(sizeof(JSThinInlineString
) == sizeof(JSString
),
1217 "string subclasses must be binary-compatible with JSString");
1220 * On both 32-bit and 64-bit platforms, MAX_LENGTH_TWO_BYTE is 12 and
1221 * MAX_LENGTH_LATIN1 is 24. This is deliberate, in order to minimize potential
1222 * performance differences between 32-bit and 64-bit platforms.
1224 * There are still some differences due to NUM_INLINE_CHARS_* being different.
1225 * E.g. TwoByte strings of length 5--8 will be JSFatInlineStrings on 32-bit
1226 * platforms and JSThinInlineStrings on 64-bit platforms. But the more
1227 * significant transition from inline strings to non-inline strings occurs at
1228 * length 12 (for TwoByte strings) and 24 (Latin1 strings) on both 32-bit and
1231 class JSFatInlineString
: public JSInlineString
{
1232 friend class js::gc::CellAllocator
;
1234 static const size_t INLINE_EXTENSION_CHARS_LATIN1
=
1235 24 - NUM_INLINE_CHARS_LATIN1
;
1236 static const size_t INLINE_EXTENSION_CHARS_TWO_BYTE
=
1237 12 - NUM_INLINE_CHARS_TWO_BYTE
;
1239 // The constructors return a mutable pointer to the data, because the first
1240 // thing any creator will do is copy in the string value. This also
1241 // conveniently allows doing overload resolution on CharT.
1242 explicit JSFatInlineString(size_t length
, JS::Latin1Char
** chars
);
1243 explicit JSFatInlineString(size_t length
, char16_t
** chars
);
1245 // For JIT string allocation.
1246 JSFatInlineString() = default;
1248 protected: /* to fool clang into not warning this is unused */
1250 char inlineStorageExtensionLatin1
[INLINE_EXTENSION_CHARS_LATIN1
];
1251 char16_t inlineStorageExtensionTwoByte
[INLINE_EXTENSION_CHARS_TWO_BYTE
];
1255 template <js::AllowGC allowGC
>
1256 static inline JSFatInlineString
* new_(JSContext
* cx
, js::gc::Heap heap
);
1258 static const size_t MAX_LENGTH_LATIN1
=
1259 JSString::NUM_INLINE_CHARS_LATIN1
+ INLINE_EXTENSION_CHARS_LATIN1
;
1261 static const size_t MAX_LENGTH_TWO_BYTE
=
1262 JSString::NUM_INLINE_CHARS_TWO_BYTE
+ INLINE_EXTENSION_CHARS_TWO_BYTE
;
1264 template <typename CharT
>
1265 static bool lengthFits(size_t length
);
1267 // Only called by the GC for strings with the AllocKind::FAT_INLINE_STRING
1269 MOZ_ALWAYS_INLINE
void finalize(JS::GCContext
* gcx
);
1272 static_assert(sizeof(JSFatInlineString
) % js::gc::CellAlignBytes
== 0,
1273 "fat inline strings shouldn't waste space up to the next cell "
1276 class JSExternalString
: public JSLinearString
{
1277 friend class js::gc::CellAllocator
;
1279 JSExternalString(const JS::Latin1Char
* chars
, size_t length
,
1280 const JSExternalStringCallbacks
* callbacks
);
1281 JSExternalString(const char16_t
* chars
, size_t length
,
1282 const JSExternalStringCallbacks
* callbacks
);
1284 /* Vacuous and therefore unimplemented. */
1285 bool isExternal() const = delete;
1286 JSExternalString
& asExternal() const = delete;
1288 template <typename CharT
>
1289 static inline JSExternalString
* newImpl(
1290 JSContext
* cx
, const CharT
* chars
, size_t length
,
1291 const JSExternalStringCallbacks
* callbacks
);
1294 static inline JSExternalString
* new_(
1295 JSContext
* cx
, const JS::Latin1Char
* chars
, size_t length
,
1296 const JSExternalStringCallbacks
* callbacks
);
1297 static inline JSExternalString
* new_(
1298 JSContext
* cx
, const char16_t
* chars
, size_t length
,
1299 const JSExternalStringCallbacks
* callbacks
);
1301 const JSExternalStringCallbacks
* callbacks() const {
1302 MOZ_ASSERT(JSString::isExternal());
1303 return d
.s
.u3
.externalCallbacks
;
1306 // External chars are never allocated inline or in the nursery, so we can
1307 // safely expose this without requiring an AutoCheckCannotGC argument.
1308 const JS::Latin1Char
* latin1Chars() const { return rawLatin1Chars(); }
1309 const char16_t
* twoByteChars() const { return rawTwoByteChars(); }
1311 // Only called by the GC for strings with the AllocKind::EXTERNAL_STRING
1313 inline void finalize(JS::GCContext
* gcx
);
1315 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
1316 void dumpRepresentation(js::GenericPrinter
& out
, int indent
) const;
1320 static_assert(sizeof(JSExternalString
) == sizeof(JSString
),
1321 "string subclasses must be binary-compatible with JSString");
1323 class JSAtom
: public JSLinearString
{
1324 /* Vacuous and therefore unimplemented. */
1325 bool isAtom() const = delete;
1326 JSAtom
& asAtom() const = delete;
1329 template <typename CharT
>
1330 static inline JSAtom
* newValidLength(
1331 JSContext
* cx
, js::UniquePtr
<CharT
[], JS::FreePolicy
> chars
,
1332 size_t length
, js::HashNumber hash
);
1334 /* Returns the PropertyName for this. isIndex() must be false. */
1335 inline js::PropertyName
* asPropertyName();
1338 bool isPermanent() const { return JSString::isPermanentAtom(); }
1341 void makePermanent() {
1342 MOZ_ASSERT(JSString::isAtom());
1343 setFlagBit(PERMANENT_ATOM_MASK
);
1346 MOZ_ALWAYS_INLINE
bool isIndex() const {
1347 MOZ_ASSERT(JSString::isAtom());
1348 mozilla::DebugOnly
<uint32_t> index
;
1349 MOZ_ASSERT(!!(flags() & ATOM_IS_INDEX_BIT
) == isIndexSlow(&index
));
1350 return flags() & ATOM_IS_INDEX_BIT
;
1352 MOZ_ALWAYS_INLINE
bool isIndex(uint32_t* index
) const {
1353 MOZ_ASSERT(JSString::isAtom());
1357 *index
= hasIndexValue() ? getIndexValue() : getIndexSlow();
1361 uint32_t getIndexSlow() const;
1363 void setIsIndex(uint32_t index
) {
1364 MOZ_ASSERT(JSString::isAtom());
1365 setFlagBit(ATOM_IS_INDEX_BIT
);
1366 maybeInitializeIndexValue(index
, /* allowAtom = */ true);
1369 MOZ_ALWAYS_INLINE
bool isPinned() const { return flags() & PINNED_ATOM_BIT
; }
1372 MOZ_ASSERT(!isPinned());
1373 setFlagBit(PINNED_ATOM_BIT
);
1376 inline js::HashNumber
hash() const;
1377 inline void initHash(js::HashNumber hash
);
1379 template <typename CharT
>
1380 static bool lengthFitsInline(size_t length
);
1382 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
1383 void dump(js::GenericPrinter
& out
);
1390 class NormalAtom
: public JSAtom
{
1391 friend class gc::CellAllocator
;
1394 static constexpr size_t ExtensionBytes
=
1395 js::gc::CellAlignBytes
- sizeof(js::HashNumber
);
1397 char inlineStorage_
[ExtensionBytes
];
1400 // For subclasses to call.
1401 explicit NormalAtom(js::HashNumber hash
) : hash_(hash
) {}
1403 // Out of line atoms, mimicking JSLinearString constructors.
1404 NormalAtom(const char16_t
* chars
, size_t length
, js::HashNumber hash
);
1405 NormalAtom(const JS::Latin1Char
* chars
, size_t length
, js::HashNumber hash
);
1408 HashNumber
hash() const { return hash_
; }
1409 void initHash(HashNumber hash
) { hash_
= hash
; }
1411 static constexpr size_t offsetOfHash() { return offsetof(NormalAtom
, hash_
); }
1414 static_assert(sizeof(NormalAtom
) ==
1415 js::RoundUp(sizeof(JSString
) + sizeof(js::HashNumber
),
1416 js::gc::CellAlignBytes
),
1417 "NormalAtom must have size of a string + HashNumber, "
1418 "aligned to gc::CellAlignBytes");
1420 class ThinInlineAtom
: public NormalAtom
{
1421 friend class gc::CellAllocator
;
1424 static constexpr size_t MAX_LENGTH_LATIN1
=
1425 NUM_INLINE_CHARS_LATIN1
+ ExtensionBytes
/ sizeof(JS::Latin1Char
);
1426 static constexpr size_t MAX_LENGTH_TWO_BYTE
=
1427 NUM_INLINE_CHARS_TWO_BYTE
+ ExtensionBytes
/ sizeof(char16_t
);
1430 // Fat and Thin inline atoms are the same size. Only use fat.
1431 static constexpr bool EverInstantiated
= false;
1433 static constexpr bool EverInstantiated
= true;
1437 // Mimicking JSThinInlineString constructors.
1439 ThinInlineAtom(size_t length
, JS::Latin1Char
** chars
,
1440 js::HashNumber hash
) = delete;
1441 ThinInlineAtom(size_t length
, char16_t
** chars
, js::HashNumber hash
) = delete;
1443 ThinInlineAtom(size_t length
, JS::Latin1Char
** chars
, js::HashNumber hash
);
1444 ThinInlineAtom(size_t length
, char16_t
** chars
, js::HashNumber hash
);
1448 template <typename CharT
>
1449 static bool lengthFits(size_t length
) {
1450 if constexpr (sizeof(CharT
) == sizeof(JS::Latin1Char
)) {
1451 return length
<= MAX_LENGTH_LATIN1
;
1453 return length
<= MAX_LENGTH_TWO_BYTE
;
1458 // FatInlineAtom is basically a JSFatInlineString, except it has a hash value in
1459 // the last word that reduces the inline char storage.
1460 class FatInlineAtom
: public JSAtom
{
1461 friend class gc::CellAllocator
;
1463 // The space available for storing inline characters. It's the same amount of
1464 // space as a JSFatInlineString, except we take the hash value out of it.
1465 static constexpr size_t InlineBytes
= sizeof(JSFatInlineString
) -
1466 sizeof(JSString::Base
) -
1467 sizeof(js::HashNumber
);
1469 static constexpr size_t ExtensionBytes
=
1470 InlineBytes
- JSThinInlineString::InlineBytes
;
1473 static constexpr size_t MAX_LENGTH_LATIN1
=
1474 InlineBytes
/ sizeof(JS::Latin1Char
);
1475 static constexpr size_t MAX_LENGTH_TWO_BYTE
= InlineBytes
/ sizeof(char16_t
);
1477 protected: // Silence Clang unused-field warning.
1478 char inlineStorage_
[ExtensionBytes
];
1481 // Mimicking JSFatInlineString constructors.
1482 explicit FatInlineAtom(size_t length
, JS::Latin1Char
** chars
,
1483 js::HashNumber hash
);
1484 explicit FatInlineAtom(size_t length
, char16_t
** chars
, js::HashNumber hash
);
1487 HashNumber
hash() const { return hash_
; }
1488 void initHash(HashNumber hash
) { hash_
= hash
; }
1490 inline void finalize(JS::GCContext
* gcx
);
1492 static constexpr size_t offsetOfHash() {
1494 sizeof(FatInlineAtom
) ==
1495 js::RoundUp(sizeof(JSThinInlineString
) +
1496 FatInlineAtom::ExtensionBytes
+ sizeof(HashNumber
),
1497 gc::CellAlignBytes
),
1498 "FatInlineAtom must have size of a thin inline string + "
1499 "extension bytes if any + HashNumber, "
1500 "aligned to gc::CellAlignBytes");
1502 return offsetof(FatInlineAtom
, hash_
);
1505 template <typename CharT
>
1506 static bool lengthFits(size_t length
) {
1507 return length
* sizeof(CharT
) <= InlineBytes
;
1511 static_assert(sizeof(FatInlineAtom
) == sizeof(JSFatInlineString
),
1512 "FatInlineAtom must be the same size as a fat inline string");
1514 // When an algorithm does not need a string represented as a single linear
1515 // array of characters, this range utility may be used to traverse the string a
1516 // sequence of linear arrays of characters. This avoids flattening ropes.
1517 template <size_t Size
= 16>
1518 class StringSegmentRange
{
1519 // If malloc() shows up in any profiles from this vector, we can add a new
1520 // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx.
1521 using StackVector
= JS::GCVector
<JSString
*, Size
>;
1522 Rooted
<StackVector
> stack
;
1523 Rooted
<JSLinearString
*> cur
;
1525 bool settle(JSString
* str
) {
1526 while (str
->isRope()) {
1527 JSRope
& rope
= str
->asRope();
1528 if (!stack
.append(rope
.rightChild())) {
1531 str
= rope
.leftChild();
1533 cur
= &str
->asLinear();
1538 explicit StringSegmentRange(JSContext
* cx
)
1539 : stack(cx
, StackVector(cx
)), cur(cx
) {}
1541 [[nodiscard
]] bool init(JSString
* str
) {
1542 MOZ_ASSERT(stack
.empty());
1546 bool empty() const { return cur
== nullptr; }
1548 JSLinearString
* front() const {
1549 MOZ_ASSERT(!cur
->isRope());
1553 [[nodiscard
]] bool popFront() {
1554 MOZ_ASSERT(!empty());
1555 if (stack
.empty()) {
1559 return settle(stack
.popCopy());
1565 inline js::HashNumber
JSAtom::hash() const {
1566 if (isFatInline()) {
1567 return static_cast<const js::FatInlineAtom
*>(this)->hash();
1569 return static_cast<const js::NormalAtom
*>(this)->hash();
1572 inline void JSAtom::initHash(js::HashNumber hash
) {
1573 if (isFatInline()) {
1574 return static_cast<js::FatInlineAtom
*>(this)->initHash(hash
);
1576 return static_cast<js::NormalAtom
*>(this)->initHash(hash
);
1582 * Represents an atomized string which does not contain an index (that is, an
1583 * unsigned 32-bit value). Thus for any PropertyName propname,
1584 * ToString(ToUint32(propname)) never equals propname.
1586 * To more concretely illustrate the utility of PropertyName, consider that it
1587 * is used to partition, in a type-safe manner, the ways to refer to a
1588 * property, as follows:
1590 * - uint32_t indexes,
1591 * - PropertyName strings which don't encode uint32_t indexes,
1593 * - JS::PropertyKey::isVoid.
1595 class PropertyName
: public JSAtom
{
1597 /* Vacuous and therefore unimplemented. */
1598 PropertyName
* asPropertyName() = delete;
1601 static_assert(sizeof(PropertyName
) == sizeof(JSString
),
1602 "string subclasses must be binary-compatible with JSString");
1604 static MOZ_ALWAYS_INLINE jsid
NameToId(PropertyName
* name
) {
1605 return JS::PropertyKey::NonIntAtom(name
);
1608 using PropertyNameVector
= JS::GCVector
<PropertyName
*>;
1610 template <typename CharT
>
1611 void CopyChars(CharT
* dest
, const JSLinearString
& str
);
1613 static inline UniqueChars
StringToNewUTF8CharsZ(JSContext
* cx
, JSString
& str
) {
1614 JS::AutoCheckCannotGC nogc
;
1616 JSLinearString
* linear
= str
.ensureLinear(cx
);
1622 linear
->hasLatin1Chars()
1623 ? JS::CharsToNewUTF8CharsZ(cx
, linear
->latin1Range(nogc
)).c_str()
1624 : JS::CharsToNewUTF8CharsZ(cx
, linear
->twoByteRange(nogc
)).c_str());
1628 * Allocate a string with the given contents. If |allowGC == CanGC|, this may
1631 template <js::AllowGC allowGC
, typename CharT
>
1632 extern JSLinearString
* NewString(JSContext
* cx
,
1633 UniquePtr
<CharT
[], JS::FreePolicy
> chars
,
1635 js::gc::Heap heap
= js::gc::Heap::Default
);
1637 /* Like NewString, but doesn't try to deflate to Latin1. */
1638 template <js::AllowGC allowGC
, typename CharT
>
1639 extern JSLinearString
* NewStringDontDeflate(
1640 JSContext
* cx
, UniquePtr
<CharT
[], JS::FreePolicy
> chars
, size_t length
,
1641 js::gc::Heap heap
= js::gc::Heap::Default
);
1643 /* This may return a static string/atom or an inline string. */
1644 extern JSLinearString
* NewDependentString(
1645 JSContext
* cx
, JSString
* base
, size_t start
, size_t length
,
1646 js::gc::Heap heap
= js::gc::Heap::Default
);
1648 /* Take ownership of an array of Latin1Chars. */
1649 extern JSLinearString
* NewLatin1StringZ(
1650 JSContext
* cx
, UniqueChars chars
,
1651 js::gc::Heap heap
= js::gc::Heap::Default
);
1653 /* Copy a counted string and GC-allocate a descriptor for it. */
1654 template <js::AllowGC allowGC
, typename CharT
>
1655 extern JSLinearString
* NewStringCopyN(
1656 JSContext
* cx
, const CharT
* s
, size_t n
,
1657 js::gc::Heap heap
= js::gc::Heap::Default
);
1659 template <js::AllowGC allowGC
>
1660 inline JSLinearString
* NewStringCopyN(
1661 JSContext
* cx
, const char* s
, size_t n
,
1662 js::gc::Heap heap
= js::gc::Heap::Default
) {
1663 return NewStringCopyN
<allowGC
>(cx
, reinterpret_cast<const Latin1Char
*>(s
), n
,
1667 template <typename CharT
>
1668 extern JSAtom
* NewAtomCopyNMaybeDeflateValidLength(JSContext
* cx
,
1669 const CharT
* s
, size_t n
,
1670 js::HashNumber hash
);
1672 template <typename CharT
>
1673 extern JSAtom
* NewAtomCopyNDontDeflateValidLength(JSContext
* cx
, const CharT
* s
,
1675 js::HashNumber hash
);
1677 /* Copy a counted string and GC-allocate a descriptor for it. */
1678 template <js::AllowGC allowGC
, typename CharT
>
1679 inline JSLinearString
* NewStringCopy(
1680 JSContext
* cx
, mozilla::Span
<const CharT
> s
,
1681 js::gc::Heap heap
= js::gc::Heap::Default
) {
1682 return NewStringCopyN
<allowGC
>(cx
, s
.data(), s
.size(), heap
);
1685 /* Copy a counted string and GC-allocate a descriptor for it. */
1686 template <js::AllowGC allowGC
, typename CharT
>
1687 inline JSLinearString
* NewStringCopy(
1688 JSContext
* cx
, std::basic_string_view
<CharT
> s
,
1689 js::gc::Heap heap
= js::gc::Heap::Default
) {
1690 return NewStringCopyN
<allowGC
>(cx
, s
.data(), s
.size(), heap
);
1693 /* Like NewStringCopyN, but doesn't try to deflate to Latin1. */
1694 template <js::AllowGC allowGC
, typename CharT
>
1695 extern JSLinearString
* NewStringCopyNDontDeflate(
1696 JSContext
* cx
, const CharT
* s
, size_t n
,
1697 js::gc::Heap heap
= js::gc::Heap::Default
);
1699 template <js::AllowGC allowGC
, typename CharT
>
1700 extern JSLinearString
* NewStringCopyNDontDeflateNonStaticValidLength(
1701 JSContext
* cx
, const CharT
* s
, size_t n
,
1702 js::gc::Heap heap
= js::gc::Heap::Default
);
1704 /* Copy a C string and GC-allocate a descriptor for it. */
1705 template <js::AllowGC allowGC
>
1706 inline JSLinearString
* NewStringCopyZ(
1707 JSContext
* cx
, const char16_t
* s
,
1708 js::gc::Heap heap
= js::gc::Heap::Default
) {
1709 return NewStringCopyN
<allowGC
>(cx
, s
, js_strlen(s
), heap
);
1712 template <js::AllowGC allowGC
>
1713 inline JSLinearString
* NewStringCopyZ(
1714 JSContext
* cx
, const char* s
, js::gc::Heap heap
= js::gc::Heap::Default
) {
1715 return NewStringCopyN
<allowGC
>(cx
, s
, strlen(s
), heap
);
1718 extern JSLinearString
* NewStringCopyUTF8N(
1719 JSContext
* cx
, const JS::UTF8Chars
& utf8
, JS::SmallestEncoding encoding
,
1720 js::gc::Heap heap
= js::gc::Heap::Default
);
1722 extern JSLinearString
* NewStringCopyUTF8N(
1723 JSContext
* cx
, const JS::UTF8Chars
& utf8
,
1724 js::gc::Heap heap
= js::gc::Heap::Default
);
1726 inline JSLinearString
* NewStringCopyUTF8Z(
1727 JSContext
* cx
, const JS::ConstUTF8CharsZ utf8
,
1728 js::gc::Heap heap
= js::gc::Heap::Default
) {
1729 return NewStringCopyUTF8N(
1730 cx
, JS::UTF8Chars(utf8
.c_str(), strlen(utf8
.c_str())), heap
);
1733 template <typename CharT
>
1734 JSString
* NewMaybeExternalString(JSContext
* cx
, const CharT
* s
, size_t n
,
1735 const JSExternalStringCallbacks
* callbacks
,
1736 bool* allocatedExternal
,
1737 js::gc::Heap heap
= js::gc::Heap::Default
);
1739 static_assert(sizeof(HashNumber
) == 4);
1741 template <AllowGC allowGC
>
1742 extern JSString
* ConcatStrings(
1743 JSContext
* cx
, typename MaybeRooted
<JSString
*, allowGC
>::HandleType left
,
1744 typename MaybeRooted
<JSString
*, allowGC
>::HandleType right
,
1745 js::gc::Heap heap
= js::gc::Heap::Default
);
1748 * Test if strings are equal. The caller can call the function even if str1
1749 * or str2 are not GC-allocated things.
1751 extern bool EqualStrings(JSContext
* cx
, JSString
* str1
, JSString
* str2
,
1754 /* Use the infallible method instead! */
1755 extern bool EqualStrings(JSContext
* cx
, JSLinearString
* str1
,
1756 JSLinearString
* str2
, bool* result
) = delete;
1758 /* EqualStrings is infallible on linear strings. */
1759 extern bool EqualStrings(const JSLinearString
* str1
,
1760 const JSLinearString
* str2
);
1763 * Compare two strings that are known to be the same length.
1764 * Exposed for the JITs; for ordinary uses, EqualStrings() is more sensible.
1766 * The caller must have checked for the following cases that can be handled
1767 * efficiently without requiring a character comparison:
1769 * - str1->length() != str2->length()
1770 * - str1->isAtom() && str2->isAtom()
1772 extern bool EqualChars(const JSLinearString
* str1
, const JSLinearString
* str2
);
1775 * Return less than, equal to, or greater than zero depending on whether
1776 * `s1[0..len1]` is less than, equal to, or greater than `s2`.
1778 extern int32_t CompareChars(const char16_t
* s1
, size_t len1
,
1779 JSLinearString
* s2
);
1782 * Compare two strings, like CompareChars, but store the result in `*result`.
1783 * This flattens the strings and therefore can fail.
1785 extern bool CompareStrings(JSContext
* cx
, JSString
* str1
, JSString
* str2
,
1789 * Compare two strings, like CompareChars.
1791 extern int32_t CompareStrings(const JSLinearString
* str1
,
1792 const JSLinearString
* str2
);
1795 * Return true if the string contains only ASCII characters.
1797 extern bool StringIsAscii(JSLinearString
* str
);
1800 * Return true if the string matches the given sequence of ASCII bytes.
1802 extern bool StringEqualsAscii(JSLinearString
* str
, const char* asciiBytes
);
1804 * Return true if the string matches the given sequence of ASCII
1805 * bytes. The sequence of ASCII bytes must have length "length". The
1806 * length should not include the trailing null, if any.
1808 extern bool StringEqualsAscii(JSLinearString
* str
, const char* asciiBytes
,
1812 bool StringEqualsLiteral(JSLinearString
* str
, const char (&asciiBytes
)[N
]) {
1813 MOZ_ASSERT(asciiBytes
[N
- 1] == '\0');
1814 return StringEqualsAscii(str
, asciiBytes
, N
- 1);
1817 extern int StringFindPattern(JSLinearString
* text
, JSLinearString
* pat
,
1821 * Return true if the string contains a pattern at |start|.
1823 * Precondition: `text` is long enough that this might be true;
1824 * that is, it has at least `start + pat->length()` characters.
1826 extern bool HasSubstringAt(JSLinearString
* text
, JSLinearString
* pat
,
1830 * Computes |str|'s substring for the range [beginInt, beginInt + lengthInt).
1831 * Negative, overlarge, swapped, etc. |beginInt| and |lengthInt| are forbidden
1832 * and constitute API misuse.
1834 JSString
* SubstringKernel(JSContext
* cx
, HandleString str
, int32_t beginInt
,
1837 inline js::HashNumber
HashStringChars(JSLinearString
* str
) {
1838 JS::AutoCheckCannotGC nogc
;
1839 size_t len
= str
->length();
1840 return str
->hasLatin1Chars()
1841 ? mozilla::HashString(str
->latin1Chars(nogc
), len
)
1842 : mozilla::HashString(str
->twoByteChars(nogc
), len
);
1845 /*** Conversions ************************************************************/
1848 * Convert a string to a printable C string.
1850 * Asserts if the input contains any non-ASCII characters.
1852 UniqueChars
EncodeAscii(JSContext
* cx
, JSString
* str
);
1855 * Convert a string to a printable C string.
1857 UniqueChars
EncodeLatin1(JSContext
* cx
, JSString
* str
);
1859 enum class IdToPrintableBehavior
: bool {
1861 * Request the printable representation of an identifier.
1866 * Request the printable representation of a property key.
1872 * Convert a jsid to a printable C string encoded in UTF-8.
1874 extern UniqueChars
IdToPrintableUTF8(JSContext
* cx
, HandleId id
,
1875 IdToPrintableBehavior behavior
);
1878 * Convert a non-string value to a string, returning null after reporting an
1879 * error, otherwise returning a new string reference.
1881 template <AllowGC allowGC
>
1882 extern JSString
* ToStringSlow(
1883 JSContext
* cx
, typename MaybeRooted
<Value
, allowGC
>::HandleType arg
);
1886 * Convert the given value to a string. This method includes an inline
1887 * fast-path for the case where the value is already a string; if the value is
1888 * known not to be a string, use ToStringSlow instead.
1890 template <AllowGC allowGC
>
1891 static MOZ_ALWAYS_INLINE JSString
* ToString(JSContext
* cx
, JS::HandleValue v
) {
1893 return v
.toString();
1895 return ToStringSlow
<allowGC
>(cx
, v
);
1899 * This function implements E-262-3 section 9.8, toString. Convert the given
1900 * value to a string of characters appended to the given buffer. On error, the
1901 * passed buffer may have partial results appended.
1903 inline bool ValueToStringBuffer(JSContext
* cx
, const Value
& v
,
1906 } /* namespace js */
1908 MOZ_ALWAYS_INLINE
bool JSString::getChar(JSContext
* cx
, size_t index
,
1910 MOZ_ASSERT(index
< length());
1913 * Optimization for one level deep ropes.
1914 * This is common for the following pattern:
1917 * text = text.substr(0, x) + "bla" + text.substr(x)
1918 * test.charCodeAt(x + 1)
1921 * Note: keep this in sync with MacroAssembler::loadStringChar and
1922 * CanAttachStringChar.
1926 JSRope
* rope
= &asRope();
1927 if (uint32_t(index
) < rope
->leftChild()->length()) {
1928 str
= rope
->leftChild();
1930 str
= rope
->rightChild();
1931 index
-= rope
->leftChild()->length();
1937 if (!str
->ensureLinear(cx
)) {
1941 *code
= str
->asLinear().latin1OrTwoByteChar(index
);
1945 MOZ_ALWAYS_INLINE
bool JSString::getCodePoint(JSContext
* cx
, size_t index
,
1947 // C++ implementation of https://tc39.es/ecma262/#sec-codepointat
1948 size_t size
= length();
1949 MOZ_ASSERT(index
< size
);
1952 if (!getChar(cx
, index
, &first
)) {
1955 if (!js::unicode::IsLeadSurrogate(first
) || index
+ 1 == size
) {
1961 if (!getChar(cx
, index
+ 1, &second
)) {
1964 if (!js::unicode::IsTrailSurrogate(second
)) {
1969 *code
= js::unicode::UTF16Decode(first
, second
);
1973 MOZ_ALWAYS_INLINE JSLinearString
* JSString::ensureLinear(JSContext
* cx
) {
1974 return isLinear() ? &asLinear() : asRope().flatten(cx
);
1977 inline JSLinearString
* JSString::base() const {
1978 MOZ_ASSERT(hasBase());
1979 MOZ_ASSERT(!d
.s
.u3
.base
->isInline());
1983 inline JSLinearString
* JSString::nurseryBaseOrRelocOverlay() const {
1984 MOZ_ASSERT(hasBase());
1988 inline bool JSString::canOwnDependentChars() const {
1989 // A string that could own the malloced chars used by another (dependent)
1990 // string. It will not have a base and must be linear and non-inline.
1991 return isLinear() && !isInline() && !hasBase();
1994 inline void JSString::setBase(JSLinearString
* newBase
) {
1995 MOZ_ASSERT(hasBase());
1996 MOZ_ASSERT(!newBase
->isInline());
1997 d
.s
.u3
.base
= newBase
;
2001 MOZ_ALWAYS_INLINE
const char16_t
* JSLinearString::nonInlineChars(
2002 const JS::AutoRequireNoGC
& nogc
) const {
2003 return nonInlineTwoByteChars(nogc
);
2007 MOZ_ALWAYS_INLINE
const JS::Latin1Char
* JSLinearString::nonInlineChars(
2008 const JS::AutoRequireNoGC
& nogc
) const {
2009 return nonInlineLatin1Chars(nogc
);
2013 MOZ_ALWAYS_INLINE
const char16_t
* JSLinearString::chars(
2014 const JS::AutoRequireNoGC
& nogc
) const {
2015 return rawTwoByteChars();
2019 MOZ_ALWAYS_INLINE
const JS::Latin1Char
* JSLinearString::chars(
2020 const JS::AutoRequireNoGC
& nogc
) const {
2021 return rawLatin1Chars();
2025 MOZ_ALWAYS_INLINE
js::UniquePtr
<JS::Latin1Char
[], JS::FreePolicy
>
2026 JSRope::copyChars
<JS::Latin1Char
>(JSContext
* maybecx
,
2027 arena_id_t destArenaId
) const {
2028 return copyLatin1Chars(maybecx
, destArenaId
);
2032 MOZ_ALWAYS_INLINE
JS::UniqueTwoByteChars
JSRope::copyChars
<char16_t
>(
2033 JSContext
* maybecx
, arena_id_t destArenaId
) const {
2034 return copyTwoByteChars(maybecx
, destArenaId
);
2038 MOZ_ALWAYS_INLINE
bool JSThinInlineString::lengthFits
<JS::Latin1Char
>(
2040 return length
<= MAX_LENGTH_LATIN1
;
2044 MOZ_ALWAYS_INLINE
bool JSThinInlineString::lengthFits
<char16_t
>(size_t length
) {
2045 return length
<= MAX_LENGTH_TWO_BYTE
;
2049 MOZ_ALWAYS_INLINE
bool JSFatInlineString::lengthFits
<JS::Latin1Char
>(
2052 (INLINE_EXTENSION_CHARS_LATIN1
* sizeof(char)) % js::gc::CellAlignBytes
==
2054 "fat inline strings' Latin1 characters don't exactly "
2055 "fill subsequent cells and thus are wasteful");
2056 static_assert(MAX_LENGTH_LATIN1
==
2057 (sizeof(JSFatInlineString
) -
2058 offsetof(JSFatInlineString
, d
.inlineStorageLatin1
)) /
2060 "MAX_LENGTH_LATIN1 must be one less than inline Latin1 "
2063 return length
<= MAX_LENGTH_LATIN1
;
2067 MOZ_ALWAYS_INLINE
bool JSFatInlineString::lengthFits
<char16_t
>(size_t length
) {
2068 static_assert((INLINE_EXTENSION_CHARS_TWO_BYTE
* sizeof(char16_t
)) %
2069 js::gc::CellAlignBytes
==
2071 "fat inline strings' char16_t characters don't exactly "
2072 "fill subsequent cells and thus are wasteful");
2073 static_assert(MAX_LENGTH_TWO_BYTE
==
2074 (sizeof(JSFatInlineString
) -
2075 offsetof(JSFatInlineString
, d
.inlineStorageTwoByte
)) /
2077 "MAX_LENGTH_TWO_BYTE must be one less than inline "
2078 "char16_t storage count");
2080 return length
<= MAX_LENGTH_TWO_BYTE
;
2084 MOZ_ALWAYS_INLINE
bool JSInlineString::lengthFits
<JS::Latin1Char
>(
2086 // If it fits in a fat inline string, it fits in any inline string.
2087 return JSFatInlineString::lengthFits
<JS::Latin1Char
>(length
);
2091 MOZ_ALWAYS_INLINE
bool JSInlineString::lengthFits
<char16_t
>(size_t length
) {
2092 // If it fits in a fat inline string, it fits in any inline string.
2093 return JSFatInlineString::lengthFits
<char16_t
>(length
);
2097 MOZ_ALWAYS_INLINE
bool js::ThinInlineAtom::lengthFits
<JS::Latin1Char
>(
2099 return length
<= MAX_LENGTH_LATIN1
;
2103 MOZ_ALWAYS_INLINE
bool js::ThinInlineAtom::lengthFits
<char16_t
>(size_t length
) {
2104 return length
<= MAX_LENGTH_TWO_BYTE
;
2108 MOZ_ALWAYS_INLINE
bool js::FatInlineAtom::lengthFits
<JS::Latin1Char
>(
2110 return length
<= MAX_LENGTH_LATIN1
;
2114 MOZ_ALWAYS_INLINE
bool js::FatInlineAtom::lengthFits
<char16_t
>(size_t length
) {
2115 return length
<= MAX_LENGTH_TWO_BYTE
;
2119 MOZ_ALWAYS_INLINE
bool JSAtom::lengthFitsInline
<JS::Latin1Char
>(size_t length
) {
2120 // If it fits in a fat inline atom, it fits in any inline atom.
2121 return js::FatInlineAtom::lengthFits
<JS::Latin1Char
>(length
);
2125 MOZ_ALWAYS_INLINE
bool JSAtom::lengthFitsInline
<char16_t
>(size_t length
) {
2126 // If it fits in a fat inline atom, it fits in any inline atom.
2127 return js::FatInlineAtom::lengthFits
<char16_t
>(length
);
2131 MOZ_ALWAYS_INLINE
void JSString::setNonInlineChars(const char16_t
* chars
) {
2132 // Check that the new buffer is located in the StringBufferArena
2133 checkStringCharsArena(chars
);
2134 d
.s
.u2
.nonInlineCharsTwoByte
= chars
;
2138 MOZ_ALWAYS_INLINE
void JSString::setNonInlineChars(
2139 const JS::Latin1Char
* chars
) {
2140 // Check that the new buffer is located in the StringBufferArena
2141 checkStringCharsArena(chars
);
2142 d
.s
.u2
.nonInlineCharsLatin1
= chars
;
2145 MOZ_ALWAYS_INLINE
const JS::Latin1Char
* JSLinearString::rawLatin1Chars() const {
2146 MOZ_ASSERT(JSString::isLinear());
2147 MOZ_ASSERT(hasLatin1Chars());
2148 return isInline() ? d
.inlineStorageLatin1
: d
.s
.u2
.nonInlineCharsLatin1
;
2151 MOZ_ALWAYS_INLINE
const char16_t
* JSLinearString::rawTwoByteChars() const {
2152 MOZ_ASSERT(JSString::isLinear());
2153 MOZ_ASSERT(hasTwoByteChars());
2154 return isInline() ? d
.inlineStorageTwoByte
: d
.s
.u2
.nonInlineCharsTwoByte
;
2157 inline js::PropertyName
* JSAtom::asPropertyName() {
2158 MOZ_ASSERT(!isIndex());
2159 return static_cast<js::PropertyName
*>(this);
2162 inline bool JSLinearString::isIndex(uint32_t* indexp
) const {
2163 MOZ_ASSERT(JSString::isLinear());
2166 return asAtom().isIndex(indexp
);
2169 if (JSString::hasIndexValue()) {
2170 *indexp
= getIndexValue();
2174 return isIndexSlow(indexp
);
2180 inline JSString
* Cell::as
<JSString
>() {
2181 MOZ_ASSERT(is
<JSString
>());
2182 return reinterpret_cast<JSString
*>(this);
2186 inline JSString
* TenuredCell::as
<JSString
>() {
2187 MOZ_ASSERT(is
<JSString
>());
2188 return reinterpret_cast<JSString
*>(this);
2191 // StringRelocationOverlay assists with updating the string chars
2192 // pointers of dependent strings when their base strings are
2193 // deduplicated. It stores:
2194 // - nursery chars of a root base (root base is a non-dependent base), or
2195 // - nursery base of a dependent string
2196 // StringRelocationOverlay exploits the fact that the 3rd word of a JSString's
2197 // RelocationOverlay is not utilized and can be used to store extra information.
2198 class StringRelocationOverlay
: public RelocationOverlay
{
2200 // nursery chars of a root base
2201 const JS::Latin1Char
* nurseryCharsLatin1
;
2202 const char16_t
* nurseryCharsTwoByte
;
2204 // The nursery base can be forwarded, which becomes a string relocation
2205 // overlay, or it is not yet forwarded and is simply the base.
2206 JSLinearString
* nurseryBaseOrRelocOverlay
;
2210 explicit StringRelocationOverlay(Cell
* dst
) : RelocationOverlay(dst
) {
2211 static_assert(sizeof(JSString
) >= sizeof(StringRelocationOverlay
));
2214 static const StringRelocationOverlay
* fromCell(const Cell
* cell
) {
2215 return static_cast<const StringRelocationOverlay
*>(cell
);
2218 static StringRelocationOverlay
* fromCell(Cell
* cell
) {
2219 return static_cast<StringRelocationOverlay
*>(cell
);
2222 void setNext(StringRelocationOverlay
* next
) {
2223 MOZ_ASSERT(isForwarded());
2227 StringRelocationOverlay
* next() const {
2228 MOZ_ASSERT(isForwarded());
2229 return (StringRelocationOverlay
*)next_
;
2232 template <typename CharT
>
2233 MOZ_ALWAYS_INLINE
const CharT
* savedNurseryChars() const;
2235 const MOZ_ALWAYS_INLINE
JS::Latin1Char
* savedNurseryCharsLatin1() const {
2236 return nurseryCharsLatin1
;
2239 const MOZ_ALWAYS_INLINE char16_t
* savedNurseryCharsTwoByte() const {
2240 return nurseryCharsTwoByte
;
2243 JSLinearString
* savedNurseryBaseOrRelocOverlay() const {
2244 return nurseryBaseOrRelocOverlay
;
2247 // Transform a nursery string to a StringRelocationOverlay that is forwarded
2248 // to a tenured string.
2249 inline static StringRelocationOverlay
* forwardCell(JSString
* src
, Cell
* dst
) {
2250 MOZ_ASSERT(!src
->isForwarded());
2251 MOZ_ASSERT(!dst
->isForwarded());
2253 JS::AutoCheckCannotGC nogc
;
2254 StringRelocationOverlay
* overlay
;
2256 // Initialize the overlay, and remember the nursery base string if there is
2257 // one, or nursery non-inlined chars if it can be the root base of other
2260 // The non-inlined chars of a tenured dependent string should point to the
2261 // tenured root base's one with an offset. For example, a dependent string
2262 // may start from the 3rd char of its root base. During tenuring, offsets
2263 // of dependent strings can be computed from the nursery non-inlined chars
2264 // remembered in overlays.
2265 if (src
->hasBase()) {
2266 auto nurseryBaseOrRelocOverlay
= src
->nurseryBaseOrRelocOverlay();
2267 overlay
= new (src
) StringRelocationOverlay(dst
);
2268 overlay
->nurseryBaseOrRelocOverlay
= nurseryBaseOrRelocOverlay
;
2269 } else if (src
->canOwnDependentChars()) {
2270 if (src
->hasTwoByteChars()) {
2271 auto nurseryCharsTwoByte
= src
->asLinear().twoByteChars(nogc
);
2272 overlay
= new (src
) StringRelocationOverlay(dst
);
2273 overlay
->nurseryCharsTwoByte
= nurseryCharsTwoByte
;
2275 auto nurseryCharsLatin1
= src
->asLinear().latin1Chars(nogc
);
2276 overlay
= new (src
) StringRelocationOverlay(dst
);
2277 overlay
->nurseryCharsLatin1
= nurseryCharsLatin1
;
2280 overlay
= new (src
) StringRelocationOverlay(dst
);
2288 MOZ_ALWAYS_INLINE
const JS::Latin1Char
*
2289 StringRelocationOverlay::savedNurseryChars() const {
2290 return savedNurseryCharsLatin1();
2294 MOZ_ALWAYS_INLINE
const char16_t
* StringRelocationOverlay::savedNurseryChars()
2296 return savedNurseryCharsTwoByte();
2302 #endif /* vm_StringType_h */