Bug 1867190 - Add prefs for PHC probablities r=glandium
[gecko.git] / js / src / vm / StringType.h
blobcae4ec86a5e90688798059ab027a13c0c31ce39b
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #ifndef vm_StringType_h
8 #define vm_StringType_h
10 #include "mozilla/Maybe.h"
11 #include "mozilla/MemoryReporting.h"
12 #include "mozilla/Range.h"
13 #include "mozilla/Span.h"
14 #include "mozilla/TextUtils.h"
16 #include <string_view> // std::basic_string_view
18 #include "jstypes.h" // js::Bit
20 #include "gc/Cell.h"
21 #include "gc/MaybeRooted.h"
22 #include "gc/Nursery.h"
23 #include "gc/RelocationOverlay.h"
24 #include "gc/StoreBuffer.h"
25 #include "js/CharacterEncoding.h"
26 #include "js/RootingAPI.h"
27 #include "js/shadow/String.h" // JS::shadow::String
28 #include "js/String.h" // JS::MaxStringLength
29 #include "js/UniquePtr.h"
30 #include "util/Text.h"
32 class JSDependentString;
33 class JSExtensibleString;
34 class JSExternalString;
35 class JSInlineString;
36 class JSRope;
38 namespace JS {
39 class JS_PUBLIC_API AutoStableStringChars;
40 } // namespace JS
42 namespace js {
44 class ArrayObject;
45 class GenericPrinter;
46 class PropertyName;
47 class StringBuffer;
49 namespace frontend {
50 class ParserAtomsTable;
51 class TaggedParserAtomIndex;
52 class WellKnownParserAtoms;
53 struct CompilationAtomCache;
54 } // namespace frontend
56 namespace jit {
57 class MacroAssembler;
58 } // namespace jit
60 /* The buffer length required to contain any unsigned 32-bit integer. */
61 static const size_t UINT32_CHAR_BUFFER_LENGTH = sizeof("4294967295") - 1;
63 // Maximum array index. This value is defined in the spec (ES2021 draft, 6.1.7):
65 // An array index is an integer index whose numeric value i is in the range
66 // +0𝔽 ≤ i < 𝔽(2^32 - 1).
67 const uint32_t MAX_ARRAY_INDEX = 4294967294u; // 2^32-2 (= UINT32_MAX-1)
69 // Returns true if the characters of `s` store an unsigned 32-bit integer value
70 // less than or equal to MAX_ARRAY_INDEX, initializing `*indexp` to that value
71 // if so. Leading '0' isn't allowed except 0 itself.
72 template <typename CharT>
73 bool CheckStringIsIndex(const CharT* s, size_t length, uint32_t* indexp);
75 } /* namespace js */
77 // clang-format off
79 * [SMDOC] JavaScript Strings
81 * Conceptually, a JS string is just an array of chars and a length. This array
82 * of chars may or may not be null-terminated and, if it is, the null character
83 * is not included in the length.
85 * To improve performance of common operations, the following optimizations are
86 * made which affect the engine's representation of strings:
88 * - The plain vanilla representation is a "linear" string which consists of a
89 * string header in the GC heap and a malloc'd char array.
91 * - To avoid copying a substring of an existing "base" string , a "dependent"
92 * string (JSDependentString) can be created which points into the base
93 * string's char array.
95 * - To avoid O(n^2) char buffer copying, a "rope" node (JSRope) can be created
96 * to represent a delayed string concatenation. Concatenation (called
97 * flattening) is performed if and when a linear char array is requested. In
98 * general, ropes form a binary dag whose internal nodes are JSRope string
99 * headers with no associated char array and whose leaf nodes are linear
100 * strings.
102 * - To avoid copying the leftmost string when flattening, we may produce an
103 * "extensible" string, which tracks not only its actual length but also its
104 * buffer's overall size. If such an "extensible" string appears as the
105 * leftmost string in a subsequent flatten, and its buffer has enough unused
106 * space, we can simply flatten the rest of the ropes into its buffer,
107 * leaving its text in place. We then transfer ownership of its buffer to the
108 * flattened rope, and mutate the donor extensible string into a dependent
109 * string referencing its original buffer.
111 * (The term "extensible" does not imply that we ever 'realloc' the buffer.
112 * Extensible strings may have dependent strings pointing into them, and the
113 * JSAPI hands out pointers to linear strings' buffers, so resizing with
114 * 'realloc' is generally not possible.)
116 * - To avoid allocating small char arrays, short strings can be stored inline
117 * in the string header (JSInlineString). These come in two flavours:
118 * JSThinInlineString, which is the same size as JSString; and
119 * JSFatInlineString, which has a larger header and so can fit more chars.
121 * - To avoid comparing O(n) string equality comparison, strings can be
122 * canonicalized to "atoms" (JSAtom) such that there is a single atom with a
123 * given (length,chars).
125 * - To avoid copying all strings created through the JSAPI, an "external"
126 * string (JSExternalString) can be created whose chars are managed by the
127 * JSAPI client.
129 * - To avoid using two bytes per character for every string, string
130 * characters are stored as Latin1 instead of TwoByte if all characters are
131 * representable in Latin1.
133 * - To avoid slow conversions from strings to integer indexes, we cache 16 bit
134 * unsigned indexes on strings representing such numbers.
136 * Although all strings share the same basic memory layout, we can conceptually
137 * arrange them into a hierarchy of operations/invariants and represent this
138 * hierarchy in C++ with classes:
140 * C++ type operations+fields / invariants+properties
141 * ========================== =========================================
142 * JSString (abstract) get(Latin1|TwoByte)CharsZ, get(Latin1|TwoByte)Chars, length / -
143 * | \
144 * | JSRope leftChild, rightChild / -
146 * JSLinearString latin1Chars, twoByteChars / -
148 * +-- JSDependentString base / -
150 * +-- JSExternalString - / char array memory managed by embedding
152 * +-- JSExtensibleString - / tracks total buffer capacity (including current text)
154 * +-- JSInlineString (abstract) - / chars stored in header
155 * | |
156 * | +-- JSThinInlineString - / header is normal
157 * | |
158 * | +-- JSFatInlineString - / header is fat
160 * JSAtom (abstract) - / string equality === pointer equality
161 * | |
162 * | +-- js::NormalAtom JSLinearString + atom hash code / -
163 * | | |
164 * | | +-- js::ThinInlineAtom
165 * | | possibly larger JSThinInlineString + atom hash code / -
166 * | |
167 * | +-- js::FatInlineAtom JSFatInlineString w/atom hash code / -
169 * js::PropertyName - / chars don't contain an index (uint32_t)
171 * Classes marked with (abstract) above are not literally C++ Abstract Base
172 * Classes (since there are no virtual functions, pure or not, in this
173 * hierarchy), but have the same meaning: there are no strings with this type as
174 * its most-derived type.
176 * Atoms can additionally be permanent, i.e. unable to be collected, and can
177 * be combined with other string types to create additional most-derived types
178 * that satisfy the invariants of more than one of the abovementioned
179 * most-derived types. Furthermore, each atom stores a hash number (based on its
180 * chars). This hash number is used as key in the atoms table and when the atom
181 * is used as key in a JS Map/Set.
183 * Derived string types can be queried from ancestor types via isX() and
184 * retrieved with asX() debug-only-checked casts.
186 * The ensureX() operations mutate 'this' in place to effectively make the type
187 * be at least X (e.g., ensureLinear will change a JSRope to be a JSLinearString).
189 // clang-format on
191 class JSString : public js::gc::CellWithLengthAndFlags {
192 protected:
193 using Base = js::gc::CellWithLengthAndFlags;
195 static const size_t NUM_INLINE_CHARS_LATIN1 =
196 2 * sizeof(void*) / sizeof(JS::Latin1Char);
197 static const size_t NUM_INLINE_CHARS_TWO_BYTE =
198 2 * sizeof(void*) / sizeof(char16_t);
200 public:
201 // String length and flags are stored in the cell header.
202 MOZ_ALWAYS_INLINE
203 size_t length() const { return headerLengthField(); }
204 MOZ_ALWAYS_INLINE
205 uint32_t flags() const { return headerFlagsField(); }
207 // Class for temporarily holding character data that will be used for JSString
208 // contents. The data may be allocated in the nursery, the malloc heap, or in
209 // externally owned memory (perhaps on the stack). The class instance must be
210 // passed to the JSString constructor as a MutableHandle, so that if a GC
211 // occurs between the construction of the content and the construction of the
212 // JSString Cell to hold it, the contents can be transparently moved to the
213 // malloc heap before the nursery is reset.
214 template <typename CharT>
215 class OwnedChars {
216 mozilla::Span<CharT> chars_;
217 bool needsFree_;
218 bool isMalloced_;
220 public:
221 // needsFree: the chars pointer should be passed to js_free() if OwnedChars
222 // dies while still possessing ownership.
224 // isMalloced: the chars pointer does not point into the nursery.
226 // These are not quite the same, since you might have non-nursery characters
227 // that are owned by something else. needsFree implies isMalloced.
228 OwnedChars(CharT* chars, size_t length, bool isMalloced, bool needsFree);
229 OwnedChars(js::UniquePtr<CharT[], JS::FreePolicy>&& chars, size_t length,
230 bool isMalloced);
231 OwnedChars(OwnedChars&&);
232 OwnedChars(const OwnedChars&) = delete;
233 ~OwnedChars() { reset(); }
235 explicit operator bool() const { return !chars_.empty(); }
236 mozilla::Span<CharT> span() const { return chars_; }
237 CharT* data() const { return chars_.data(); }
238 size_t length() const { return chars_.Length(); }
239 size_t size() const { return length() * sizeof(CharT); }
240 bool isMalloced() const { return isMalloced_; }
242 // Return the data and release ownership to the caller.
243 inline CharT* release();
244 // Discard any owned data.
245 inline void reset();
246 // Move any nursery data into the malloc heap.
247 inline void ensureNonNursery();
249 // If we GC with a live OwnedChars, copy the data out of the nursery to a
250 // safely malloced location.
251 void trace(JSTracer* trc) { ensureNonNursery(); }
254 protected:
255 /* Fields only apply to string types commented on the right. */
256 struct Data {
257 // Note: 32-bit length and flags fields are inherited from
258 // CellWithLengthAndFlags.
260 union {
261 union {
262 /* JS(Fat)InlineString */
263 JS::Latin1Char inlineStorageLatin1[NUM_INLINE_CHARS_LATIN1];
264 char16_t inlineStorageTwoByte[NUM_INLINE_CHARS_TWO_BYTE];
266 struct {
267 union {
268 const JS::Latin1Char* nonInlineCharsLatin1; /* JSLinearString, except
269 JS(Fat)InlineString */
270 const char16_t* nonInlineCharsTwoByte; /* JSLinearString, except
271 JS(Fat)InlineString */
272 JSString* left; /* JSRope */
273 JSRope* parent; /* Used in flattening */
274 } u2;
275 union {
276 JSLinearString* base; /* JSDependentString */
277 JSString* right; /* JSRope */
278 size_t capacity; /* JSLinearString (extensible) */
279 const JSExternalStringCallbacks*
280 externalCallbacks; /* JSExternalString */
281 } u3;
282 } s;
284 } d;
286 public:
287 /* Flags exposed only for jits */
290 * Flag Encoding
292 * The first word of a JSString stores flags, index, and (on some
293 * platforms) the length. The flags store both the string's type and its
294 * character encoding.
296 * If LATIN1_CHARS_BIT is set, the string's characters are stored as Latin1
297 * instead of TwoByte. This flag can also be set for ropes, if both the
298 * left and right nodes are Latin1. Flattening will result in a Latin1
299 * string in this case.
301 * The other flags store the string's type. Instead of using a dense index
302 * to represent the most-derived type, string types are encoded to allow
303 * single-op tests for hot queries (isRope, isDependent, isAtom) which, in
304 * view of subtyping, would require slower (isX() || isY() || isZ()).
306 * The string type encoding can be summarized as follows. The "instance
307 * encoding" entry for a type specifies the flag bits used to create a
308 * string instance of that type. Abstract types have no instances and thus
309 * have no such entry. The "subtype predicate" entry for a type specifies
310 * the predicate used to query whether a JSString instance is subtype
311 * (reflexively) of that type.
313 * String Instance Subtype
314 * type encoding predicate
315 * -----------------------------------------
316 * Rope 000000 000 xxxx0x xxx
317 * Linear 000010 000 xxxx1x xxx
318 * Dependent 000110 000 xxx1xx xxx
319 * External 100010 000 100010 xxx
320 * Extensible 010010 000 010010 xxx
321 * Inline 001010 000 xx1xxx xxx
322 * FatInline 011010 000 x11xxx xxx
323 * JSAtom - xxxxx1 xxx
324 * NormalAtom 000011 000 xx0xx1 xxx
325 * PermanentAtom 100011 000 1xxxx1 xxx
326 * ThinInlineAtom 001011 000 x01xx1 xxx
327 * FatInlineAtom 011011 000 x11xx1 xxx
328 * |||||| |||
329 * |||||| ||\- [0] reserved (FORWARD_BIT)
330 * |||||| |\-- [1] reserved
331 * |||||| \--- [2] reserved
332 * |||||\----- [3] IsAtom
333 * ||||\------ [4] IsLinear
334 * |||\------- [5] IsDependent
335 * ||\-------- [6] IsInline
336 * |\--------- [7] FatInlineAtom/Extensible
337 * \---------- [8] External/Permanent
339 * Bits 0..2 are reserved for use by the GC (see
340 * gc::CellFlagBitsReservedForGC). In particular, bit 0 is currently used for
341 * FORWARD_BIT for forwarded nursery cells. The other 2 bits are currently
342 * unused.
344 * Note that the first 4 flag bits 3..6 (from right to left in the previous
345 * table) have the following meaning and can be used for some hot queries:
347 * Bit 3: IsAtom (Atom, PermanentAtom)
348 * Bit 4: IsLinear
349 * Bit 5: IsDependent
350 * Bit 6: IsInline (Inline, FatInline, ThinInlineAtom, FatInlineAtom)
352 * If INDEX_VALUE_BIT is set, bits 16 and up will also hold an integer index.
355 // The low bits of flag word are reserved by GC.
356 static_assert(js::gc::CellFlagBitsReservedForGC <= 3,
357 "JSString::flags must reserve enough bits for Cell");
359 static const uint32_t ATOM_BIT = js::Bit(3);
360 static const uint32_t LINEAR_BIT = js::Bit(4);
361 static const uint32_t DEPENDENT_BIT = js::Bit(5);
362 static const uint32_t INLINE_CHARS_BIT = js::Bit(6);
364 static const uint32_t EXTENSIBLE_FLAGS = LINEAR_BIT | js::Bit(7);
365 static const uint32_t EXTERNAL_FLAGS = LINEAR_BIT | js::Bit(8);
367 static const uint32_t FAT_INLINE_MASK = INLINE_CHARS_BIT | js::Bit(7);
369 /* Initial flags for various types of strings. */
370 static const uint32_t INIT_THIN_INLINE_FLAGS = LINEAR_BIT | INLINE_CHARS_BIT;
371 static const uint32_t INIT_FAT_INLINE_FLAGS = LINEAR_BIT | FAT_INLINE_MASK;
372 static const uint32_t INIT_ROPE_FLAGS = 0;
373 static const uint32_t INIT_LINEAR_FLAGS = LINEAR_BIT;
374 static const uint32_t INIT_DEPENDENT_FLAGS = LINEAR_BIT | DEPENDENT_BIT;
376 static const uint32_t TYPE_FLAGS_MASK = js::BitMask(9) - js::BitMask(3);
377 static_assert((TYPE_FLAGS_MASK & js::gc::HeaderWord::RESERVED_MASK) == 0,
378 "GC reserved bits must not be used for Strings");
380 static const uint32_t LATIN1_CHARS_BIT = js::Bit(9);
382 // Whether this atom's characters store an uint32 index value less than or
383 // equal to MAX_ARRAY_INDEX. Not used for non-atomized strings.
384 // See JSLinearString::isIndex.
385 static const uint32_t ATOM_IS_INDEX_BIT = js::Bit(10);
387 static const uint32_t INDEX_VALUE_BIT = js::Bit(11);
388 static const uint32_t INDEX_VALUE_SHIFT = 16;
390 // NON_DEDUP_BIT is used in string deduplication during tenuring.
391 static const uint32_t NON_DEDUP_BIT = js::Bit(12);
393 // If IN_STRING_TO_ATOM_CACHE is set, this string had an entry in the
394 // StringToAtomCache at some point. Note that GC can purge the cache without
395 // clearing this bit.
396 static const uint32_t IN_STRING_TO_ATOM_CACHE = js::Bit(13);
398 // Flags used during rope flattening that indicate what action to perform when
399 // returning to the rope's parent rope.
400 static const uint32_t FLATTEN_VISIT_RIGHT = js::Bit(14);
401 static const uint32_t FLATTEN_FINISH_NODE = js::Bit(15);
402 static const uint32_t FLATTEN_MASK =
403 FLATTEN_VISIT_RIGHT | FLATTEN_FINISH_NODE;
405 static const uint32_t PINNED_ATOM_BIT = js::Bit(15);
406 static const uint32_t PERMANENT_ATOM_MASK =
407 ATOM_BIT | PINNED_ATOM_BIT | js::Bit(8);
409 static const uint32_t MAX_LENGTH = JS::MaxStringLength;
411 static const JS::Latin1Char MAX_LATIN1_CHAR = 0xff;
414 * Helper function to validate that a string of a given length is
415 * representable by a JSString. An allocation overflow is reported if false
416 * is returned.
418 static inline bool validateLength(JSContext* maybecx, size_t length);
420 template <js::AllowGC allowGC>
421 static inline bool validateLengthInternal(JSContext* maybecx, size_t length);
423 static constexpr size_t offsetOfFlags() { return offsetOfHeaderFlags(); }
424 static constexpr size_t offsetOfLength() { return offsetOfHeaderLength(); }
426 bool sameLengthAndFlags(const JSString& other) const {
427 return length() == other.length() && flags() == other.flags();
430 static void staticAsserts() {
431 static_assert(JSString::MAX_LENGTH < UINT32_MAX,
432 "Length must fit in 32 bits");
433 static_assert(
434 sizeof(JSString) == (offsetof(JSString, d.inlineStorageLatin1) +
435 NUM_INLINE_CHARS_LATIN1 * sizeof(char)),
436 "Inline Latin1 chars must fit in a JSString");
437 static_assert(
438 sizeof(JSString) == (offsetof(JSString, d.inlineStorageTwoByte) +
439 NUM_INLINE_CHARS_TWO_BYTE * sizeof(char16_t)),
440 "Inline char16_t chars must fit in a JSString");
442 /* Ensure js::shadow::String has the same layout. */
443 using JS::shadow::String;
444 static_assert(
445 JSString::offsetOfRawHeaderFlagsField() == offsetof(String, flags_),
446 "shadow::String flags offset must match JSString");
447 #if JS_BITS_PER_WORD == 32
448 static_assert(JSString::offsetOfLength() == offsetof(String, length_),
449 "shadow::String length offset must match JSString");
450 #endif
451 static_assert(offsetof(JSString, d.s.u2.nonInlineCharsLatin1) ==
452 offsetof(String, nonInlineCharsLatin1),
453 "shadow::String nonInlineChars offset must match JSString");
454 static_assert(offsetof(JSString, d.s.u2.nonInlineCharsTwoByte) ==
455 offsetof(String, nonInlineCharsTwoByte),
456 "shadow::String nonInlineChars offset must match JSString");
457 static_assert(
458 offsetof(JSString, d.s.u3.externalCallbacks) ==
459 offsetof(String, externalCallbacks),
460 "shadow::String externalCallbacks offset must match JSString");
461 static_assert(offsetof(JSString, d.inlineStorageLatin1) ==
462 offsetof(String, inlineStorageLatin1),
463 "shadow::String inlineStorage offset must match JSString");
464 static_assert(offsetof(JSString, d.inlineStorageTwoByte) ==
465 offsetof(String, inlineStorageTwoByte),
466 "shadow::String inlineStorage offset must match JSString");
467 static_assert(ATOM_BIT == String::ATOM_BIT,
468 "shadow::String::ATOM_BIT must match JSString::ATOM_BIT");
469 static_assert(LINEAR_BIT == String::LINEAR_BIT,
470 "shadow::String::LINEAR_BIT must match JSString::LINEAR_BIT");
471 static_assert(INLINE_CHARS_BIT == String::INLINE_CHARS_BIT,
472 "shadow::String::INLINE_CHARS_BIT must match "
473 "JSString::INLINE_CHARS_BIT");
474 static_assert(LATIN1_CHARS_BIT == String::LATIN1_CHARS_BIT,
475 "shadow::String::LATIN1_CHARS_BIT must match "
476 "JSString::LATIN1_CHARS_BIT");
477 static_assert(
478 TYPE_FLAGS_MASK == String::TYPE_FLAGS_MASK,
479 "shadow::String::TYPE_FLAGS_MASK must match JSString::TYPE_FLAGS_MASK");
480 static_assert(
481 EXTERNAL_FLAGS == String::EXTERNAL_FLAGS,
482 "shadow::String::EXTERNAL_FLAGS must match JSString::EXTERNAL_FLAGS");
485 /* Avoid silly compile errors in JSRope::flatten */
486 friend class JSRope;
488 friend class js::gc::RelocationOverlay;
490 protected:
491 template <typename CharT>
492 MOZ_ALWAYS_INLINE void setNonInlineChars(const CharT* chars);
494 template <typename CharT>
495 static MOZ_ALWAYS_INLINE void checkStringCharsArena(const CharT* chars) {
496 #ifdef MOZ_DEBUG
497 js::AssertJSStringBufferInCorrectArena(chars);
498 #endif
501 // Get correct non-inline chars enum arm for given type
502 template <typename CharT>
503 MOZ_ALWAYS_INLINE const CharT* nonInlineCharsRaw() const;
505 public:
506 MOZ_ALWAYS_INLINE
507 bool empty() const { return length() == 0; }
509 inline bool getChar(JSContext* cx, size_t index, char16_t* code);
510 inline bool getCodePoint(JSContext* cx, size_t index, char32_t* codePoint);
512 /* Strings have either Latin1 or TwoByte chars. */
513 bool hasLatin1Chars() const { return flags() & LATIN1_CHARS_BIT; }
514 bool hasTwoByteChars() const { return !(flags() & LATIN1_CHARS_BIT); }
516 /* Strings might contain cached indexes. */
517 bool hasIndexValue() const { return flags() & INDEX_VALUE_BIT; }
518 uint32_t getIndexValue() const {
519 MOZ_ASSERT(hasIndexValue());
520 MOZ_ASSERT(isLinear());
521 return flags() >> INDEX_VALUE_SHIFT;
524 inline size_t allocSize() const;
526 /* Fallible conversions to more-derived string types. */
528 inline JSLinearString* ensureLinear(JSContext* cx);
530 /* Type query and debug-checked casts */
532 MOZ_ALWAYS_INLINE
533 bool isRope() const { return !(flags() & LINEAR_BIT); }
535 MOZ_ALWAYS_INLINE
536 JSRope& asRope() const {
537 MOZ_ASSERT(isRope());
538 return *(JSRope*)this;
541 MOZ_ALWAYS_INLINE
542 bool isLinear() const { return flags() & LINEAR_BIT; }
544 MOZ_ALWAYS_INLINE
545 JSLinearString& asLinear() const {
546 MOZ_ASSERT(JSString::isLinear());
547 return *(JSLinearString*)this;
550 MOZ_ALWAYS_INLINE
551 bool isDependent() const { return flags() & DEPENDENT_BIT; }
553 MOZ_ALWAYS_INLINE
554 JSDependentString& asDependent() const {
555 MOZ_ASSERT(isDependent());
556 return *(JSDependentString*)this;
559 MOZ_ALWAYS_INLINE
560 bool isExtensible() const {
561 return (flags() & TYPE_FLAGS_MASK) == EXTENSIBLE_FLAGS;
564 MOZ_ALWAYS_INLINE
565 JSExtensibleString& asExtensible() const {
566 MOZ_ASSERT(isExtensible());
567 return *(JSExtensibleString*)this;
570 MOZ_ALWAYS_INLINE
571 bool isInline() const { return flags() & INLINE_CHARS_BIT; }
573 MOZ_ALWAYS_INLINE
574 JSInlineString& asInline() const {
575 MOZ_ASSERT(isInline());
576 return *(JSInlineString*)this;
579 MOZ_ALWAYS_INLINE
580 bool isFatInline() const {
581 return (flags() & FAT_INLINE_MASK) == FAT_INLINE_MASK;
584 /* For hot code, prefer other type queries. */
585 bool isExternal() const {
586 return (flags() & TYPE_FLAGS_MASK) == EXTERNAL_FLAGS;
589 MOZ_ALWAYS_INLINE
590 JSExternalString& asExternal() const {
591 MOZ_ASSERT(isExternal());
592 return *(JSExternalString*)this;
595 MOZ_ALWAYS_INLINE
596 bool isAtom() const { return flags() & ATOM_BIT; }
598 MOZ_ALWAYS_INLINE
599 bool isPermanentAtom() const {
600 return (flags() & PERMANENT_ATOM_MASK) == PERMANENT_ATOM_MASK;
603 MOZ_ALWAYS_INLINE
604 JSAtom& asAtom() const {
605 MOZ_ASSERT(isAtom());
606 return *(JSAtom*)this;
609 MOZ_ALWAYS_INLINE
610 void setNonDeduplicatable() { setFlagBit(NON_DEDUP_BIT); }
612 MOZ_ALWAYS_INLINE
613 void clearNonDeduplicatable() { clearFlagBit(NON_DEDUP_BIT); }
615 MOZ_ALWAYS_INLINE
616 bool isDeduplicatable() { return !(flags() & NON_DEDUP_BIT); }
618 void setInStringToAtomCache() {
619 MOZ_ASSERT(!isAtom());
620 setFlagBit(IN_STRING_TO_ATOM_CACHE);
622 bool inStringToAtomCache() const { return flags() & IN_STRING_TO_ATOM_CACHE; }
624 // Fills |array| with various strings that represent the different string
625 // kinds and character encodings.
626 static bool fillWithRepresentatives(JSContext* cx,
627 JS::Handle<js::ArrayObject*> array);
629 /* Only called by the GC for dependent strings. */
631 inline bool hasBase() const { return isDependent(); }
633 inline JSLinearString* base() const;
635 // The base may be forwarded and becomes a relocation overlay.
636 // The return value can be a relocation overlay when the base is forwarded,
637 // or the return value can be the actual base when it is not forwarded.
638 inline JSLinearString* nurseryBaseOrRelocOverlay() const;
640 inline bool canOwnDependentChars() const;
642 inline void setBase(JSLinearString* newBase);
644 void traceBase(JSTracer* trc);
646 /* Only called by the GC for strings with the AllocKind::STRING kind. */
648 inline void finalize(JS::GCContext* gcx);
650 /* Gets the number of bytes that the chars take on the heap. */
652 size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);
654 bool hasOutOfLineChars() const {
655 return isLinear() && !isInline() && !isDependent() && !isExternal();
658 inline bool ownsMallocedChars() const;
660 /* Encode as many scalar values of the string as UTF-8 as can fit
661 * into the caller-provided buffer replacing unpaired surrogates
662 * with the REPLACEMENT CHARACTER.
664 * Returns the number of code units read and the number of code units
665 * written.
667 * The semantics of this method match the semantics of
668 * TextEncoder.encodeInto().
670 * This function doesn't modify the representation -- rope, linear,
671 * flat, atom, etc. -- of this string. If this string is a rope,
672 * it also doesn't modify the representation of left or right halves
673 * of this string, or of those halves, and so on.
675 * Returns mozilla::Nothing on OOM.
677 mozilla::Maybe<std::tuple<size_t, size_t>> encodeUTF8Partial(
678 const JS::AutoRequireNoGC& nogc, mozilla::Span<char> buffer) const;
680 private:
681 // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler
682 // to call the method below.
683 friend class js::jit::MacroAssembler;
684 static size_t offsetOfNonInlineChars() {
685 static_assert(
686 offsetof(JSString, d.s.u2.nonInlineCharsTwoByte) ==
687 offsetof(JSString, d.s.u2.nonInlineCharsLatin1),
688 "nonInlineCharsTwoByte and nonInlineCharsLatin1 must have same offset");
689 return offsetof(JSString, d.s.u2.nonInlineCharsTwoByte);
692 public:
693 static const JS::TraceKind TraceKind = JS::TraceKind::String;
695 JS::Zone* zone() const {
696 if (isTenured()) {
697 // Allow permanent atoms to be accessed across zones and runtimes.
698 if (isPermanentAtom()) {
699 return zoneFromAnyThread();
701 return asTenured().zone();
703 return nurseryZone();
706 void setLengthAndFlags(uint32_t len, uint32_t flags) {
707 setHeaderLengthAndFlags(len, flags);
709 void setFlagBit(uint32_t flag) { setHeaderFlagBit(flag); }
710 void clearFlagBit(uint32_t flag) { clearHeaderFlagBit(flag); }
712 void fixupAfterMovingGC() {}
714 js::gc::AllocKind getAllocKind() const {
715 using js::gc::AllocKind;
716 AllocKind kind;
717 if (isAtom()) {
718 if (isFatInline()) {
719 kind = AllocKind::FAT_INLINE_ATOM;
720 } else {
721 kind = AllocKind::ATOM;
723 } else if (isFatInline()) {
724 kind = AllocKind::FAT_INLINE_STRING;
725 } else if (isExternal()) {
726 kind = AllocKind::EXTERNAL_STRING;
727 } else {
728 kind = AllocKind::STRING;
730 MOZ_ASSERT_IF(isTenured(), kind == asTenured().getAllocKind());
731 return kind;
734 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
735 void dump(); // Debugger-friendly stderr dump.
736 void dump(js::GenericPrinter& out);
737 void dumpNoNewline(js::GenericPrinter& out);
738 void dumpCharsNoNewline(js::GenericPrinter& out);
739 void dumpRepresentation(js::GenericPrinter& out, int indent) const;
740 void dumpRepresentationHeader(js::GenericPrinter& out,
741 const char* subclass) const;
742 void dumpCharsNoQuote(js::GenericPrinter& out);
744 template <typename CharT>
745 static void dumpChars(const CharT* s, size_t len, js::GenericPrinter& out);
747 template <typename CharT>
748 static void dumpCharsNoQuote(const CharT* s, size_t len,
749 js::GenericPrinter& out);
751 bool equals(const char* s);
752 #endif
754 void traceChildren(JSTracer* trc);
756 // Override base class implementation to tell GC about permanent atoms.
757 bool isPermanentAndMayBeShared() const { return isPermanentAtom(); }
759 static void addCellAddressToStoreBuffer(js::gc::StoreBuffer* buffer,
760 js::gc::Cell** cellp) {
761 buffer->putCell(reinterpret_cast<JSString**>(cellp));
764 static void removeCellAddressFromStoreBuffer(js::gc::StoreBuffer* buffer,
765 js::gc::Cell** cellp) {
766 buffer->unputCell(reinterpret_cast<JSString**>(cellp));
769 private:
770 JSString(const JSString& other) = delete;
771 void operator=(const JSString& other) = delete;
773 protected:
774 JSString() = default;
777 namespace js {
779 template <typename Wrapper, typename CharT>
780 class WrappedPtrOperations<JSString::OwnedChars<CharT>, Wrapper> {
781 const JSString::OwnedChars<CharT>& get() const {
782 return static_cast<const Wrapper*>(this)->get();
785 public:
786 explicit operator bool() const { return !!get(); }
787 mozilla::Span<CharT> span() const { return get().span(); }
788 CharT* data() const { return get().data(); }
789 size_t length() const { return get().length(); }
790 size_t size() const { return get().size(); }
791 bool isMalloced() const { return get().isMalloced(); }
794 template <typename Wrapper, typename CharT>
795 class MutableWrappedPtrOperations<JSString::OwnedChars<CharT>, Wrapper>
796 : public WrappedPtrOperations<JSString::OwnedChars<CharT>, Wrapper> {
797 JSString::OwnedChars<CharT>& get() {
798 return static_cast<Wrapper*>(this)->get();
801 public:
802 CharT* release() { return get().release(); }
803 void reset() { get().reset(); }
804 void ensureNonNursery() { get().ensureNonNursery(); }
807 } /* namespace js */
809 class JSRope : public JSString {
810 friend class js::gc::CellAllocator;
812 template <typename CharT>
813 js::UniquePtr<CharT[], JS::FreePolicy> copyCharsInternal(
814 JSContext* cx, arena_id_t destArenaId) const;
816 enum UsingBarrier : bool { NoBarrier = false, WithIncrementalBarrier = true };
818 friend class JSString;
819 JSLinearString* flatten(JSContext* maybecx);
821 JSLinearString* flattenInternal();
822 template <UsingBarrier usingBarrier>
823 JSLinearString* flattenInternal();
825 template <UsingBarrier usingBarrier, typename CharT>
826 static JSLinearString* flattenInternal(JSRope* root);
828 template <UsingBarrier usingBarrier>
829 static void ropeBarrierDuringFlattening(JSRope* rope);
831 JSRope(JSString* left, JSString* right, size_t length);
833 public:
834 template <js::AllowGC allowGC>
835 static inline JSRope* new_(
836 JSContext* cx,
837 typename js::MaybeRooted<JSString*, allowGC>::HandleType left,
838 typename js::MaybeRooted<JSString*, allowGC>::HandleType right,
839 size_t length, js::gc::Heap = js::gc::Heap::Default);
841 js::UniquePtr<JS::Latin1Char[], JS::FreePolicy> copyLatin1Chars(
842 JSContext* maybecx, arena_id_t destArenaId) const;
843 JS::UniqueTwoByteChars copyTwoByteChars(JSContext* maybecx,
844 arena_id_t destArenaId) const;
846 template <typename CharT>
847 js::UniquePtr<CharT[], JS::FreePolicy> copyChars(
848 JSContext* maybecx, arena_id_t destArenaId) const;
850 // Hash function specific for ropes that avoids allocating a temporary
851 // string. There are still allocations internally so it's technically
852 // fallible.
854 // Returns the same value as if this were a linear string being hashed.
855 [[nodiscard]] bool hash(uint32_t* outhHash) const;
857 // The process of flattening a rope temporarily overwrites the left pointer of
858 // interior nodes in the rope DAG with the parent pointer.
859 bool isBeingFlattened() const { return flags() & FLATTEN_MASK; }
861 JSString* leftChild() const {
862 MOZ_ASSERT(isRope());
863 MOZ_ASSERT(!isBeingFlattened()); // Flattening overwrites this field.
864 return d.s.u2.left;
867 JSString* rightChild() const {
868 MOZ_ASSERT(isRope());
869 return d.s.u3.right;
872 void traceChildren(JSTracer* trc);
874 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
875 void dumpRepresentation(js::GenericPrinter& out, int indent) const;
876 #endif
878 private:
879 // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler
880 // to call the methods below.
881 friend class js::jit::MacroAssembler;
883 static size_t offsetOfLeft() { return offsetof(JSRope, d.s.u2.left); }
884 static size_t offsetOfRight() { return offsetof(JSRope, d.s.u3.right); }
887 static_assert(sizeof(JSRope) == sizeof(JSString),
888 "string subclasses must be binary-compatible with JSString");
891 * There are optimized entry points for some string allocation functions.
893 * The meaning of suffix:
894 * * "MaybeDeflate": for char16_t variant, characters can fit Latin1
895 * * "DontDeflate": for char16_t variant, characters don't fit Latin1
896 * * "NonStatic": characters don't match StaticStrings
897 * * "ValidLength": length fits JSString::MAX_LENGTH
900 class JSLinearString : public JSString {
901 friend class JSString;
902 friend class JS::AutoStableStringChars;
903 friend class js::gc::TenuringTracer;
904 friend class js::gc::CellAllocator;
906 /* Vacuous and therefore unimplemented. */
907 JSLinearString* ensureLinear(JSContext* cx) = delete;
908 bool isLinear() const = delete;
909 JSLinearString& asLinear() const = delete;
911 JSLinearString(const char16_t* chars, size_t length);
912 JSLinearString(const JS::Latin1Char* chars, size_t length);
913 template <typename CharT>
914 explicit inline JSLinearString(JS::MutableHandle<OwnedChars<CharT>> chars);
916 protected:
917 // Used to construct subclasses that do a full initialization themselves.
918 JSLinearString() = default;
920 /* Returns void pointer to latin1/twoByte chars, for finalizers. */
921 MOZ_ALWAYS_INLINE
922 void* nonInlineCharsRaw() const {
923 MOZ_ASSERT(!isInline());
924 static_assert(
925 offsetof(JSLinearString, d.s.u2.nonInlineCharsTwoByte) ==
926 offsetof(JSLinearString, d.s.u2.nonInlineCharsLatin1),
927 "nonInlineCharsTwoByte and nonInlineCharsLatin1 must have same offset");
928 return (void*)d.s.u2.nonInlineCharsTwoByte;
931 MOZ_ALWAYS_INLINE const JS::Latin1Char* rawLatin1Chars() const;
932 MOZ_ALWAYS_INLINE const char16_t* rawTwoByteChars() const;
934 public:
935 template <js::AllowGC allowGC, typename CharT>
936 static inline JSLinearString* new_(JSContext* cx,
937 JS::MutableHandle<OwnedChars<CharT>> chars,
938 js::gc::Heap heap);
940 template <js::AllowGC allowGC, typename CharT>
941 static inline JSLinearString* newValidLength(
942 JSContext* cx, JS::MutableHandle<OwnedChars<CharT>> chars,
943 js::gc::Heap heap);
945 // Convert a plain linear string to an extensible string. For testing. The
946 // caller must ensure that it is a plain or extensible string already, and
947 // that `capacity` is adequate.
948 JSExtensibleString& makeExtensible(size_t capacity);
950 template <typename CharT>
951 MOZ_ALWAYS_INLINE const CharT* nonInlineChars(
952 const JS::AutoRequireNoGC& nogc) const;
954 MOZ_ALWAYS_INLINE
955 const JS::Latin1Char* nonInlineLatin1Chars(
956 const JS::AutoRequireNoGC& nogc) const {
957 MOZ_ASSERT(!isInline());
958 MOZ_ASSERT(hasLatin1Chars());
959 return d.s.u2.nonInlineCharsLatin1;
962 MOZ_ALWAYS_INLINE
963 const char16_t* nonInlineTwoByteChars(const JS::AutoRequireNoGC& nogc) const {
964 MOZ_ASSERT(!isInline());
965 MOZ_ASSERT(hasTwoByteChars());
966 return d.s.u2.nonInlineCharsTwoByte;
969 template <typename CharT>
970 MOZ_ALWAYS_INLINE const CharT* chars(const JS::AutoRequireNoGC& nogc) const;
972 MOZ_ALWAYS_INLINE
973 const JS::Latin1Char* latin1Chars(const JS::AutoRequireNoGC& nogc) const {
974 return rawLatin1Chars();
977 MOZ_ALWAYS_INLINE
978 const char16_t* twoByteChars(const JS::AutoRequireNoGC& nogc) const {
979 return rawTwoByteChars();
982 mozilla::Range<const JS::Latin1Char> latin1Range(
983 const JS::AutoRequireNoGC& nogc) const {
984 MOZ_ASSERT(JSString::isLinear());
985 return mozilla::Range<const JS::Latin1Char>(latin1Chars(nogc), length());
988 mozilla::Range<const char16_t> twoByteRange(
989 const JS::AutoRequireNoGC& nogc) const {
990 MOZ_ASSERT(JSString::isLinear());
991 return mozilla::Range<const char16_t>(twoByteChars(nogc), length());
994 MOZ_ALWAYS_INLINE
995 char16_t latin1OrTwoByteChar(size_t index) const {
996 MOZ_ASSERT(JSString::isLinear());
997 MOZ_ASSERT(index < length());
998 JS::AutoCheckCannotGC nogc;
999 return hasLatin1Chars() ? latin1Chars(nogc)[index]
1000 : twoByteChars(nogc)[index];
1003 bool isIndexSlow(uint32_t* indexp) const {
1004 MOZ_ASSERT(JSString::isLinear());
1005 size_t len = length();
1006 if (len == 0 || len > js::UINT32_CHAR_BUFFER_LENGTH) {
1007 return false;
1009 JS::AutoCheckCannotGC nogc;
1010 if (hasLatin1Chars()) {
1011 const JS::Latin1Char* s = latin1Chars(nogc);
1012 return mozilla::IsAsciiDigit(*s) &&
1013 js::CheckStringIsIndex(s, len, indexp);
1015 const char16_t* s = twoByteChars(nogc);
1016 return mozilla::IsAsciiDigit(*s) && js::CheckStringIsIndex(s, len, indexp);
1019 // Returns true if this string's characters store an unsigned 32-bit integer
1020 // value less than or equal to MAX_ARRAY_INDEX, initializing *indexp to that
1021 // value if so. Leading '0' isn't allowed except 0 itself.
1022 // (Thus if calling isIndex returns true, js::IndexToString(cx, *indexp) will
1023 // be a string equal to this string.)
1024 inline bool isIndex(uint32_t* indexp) const;
1026 // Return whether the characters of this string can be moved by minor or
1027 // compacting GC.
1028 inline bool hasMovableChars() const;
1030 void maybeInitializeIndexValue(uint32_t index, bool allowAtom = false) {
1031 MOZ_ASSERT(JSString::isLinear());
1032 MOZ_ASSERT_IF(hasIndexValue(), getIndexValue() == index);
1033 MOZ_ASSERT_IF(!allowAtom, !isAtom());
1035 if (hasIndexValue() || index > UINT16_MAX) {
1036 return;
1039 mozilla::DebugOnly<uint32_t> containedIndex;
1040 MOZ_ASSERT(isIndexSlow(&containedIndex));
1041 MOZ_ASSERT(index == containedIndex);
1043 setFlagBit((index << INDEX_VALUE_SHIFT) | INDEX_VALUE_BIT);
1044 MOZ_ASSERT(getIndexValue() == index);
1048 * Returns a property name represented by this string, or null on failure.
1049 * You must verify that this is not an index per isIndex before calling
1050 * this method.
1052 inline js::PropertyName* toPropertyName(JSContext* cx);
1054 // Make sure chars are not in the nursery, mallocing and copying if necessary.
1055 // Should only be called during minor GC on a string that has been promoted
1056 // to the tenured heap and may still point to nursery-allocated chars.
1057 template <typename CharT>
1058 inline size_t maybeMallocCharsOnPromotion(js::Nursery* nursery);
1060 inline void finalize(JS::GCContext* gcx);
1061 inline size_t allocSize() const;
1063 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
1064 void dumpRepresentationChars(js::GenericPrinter& out, int indent) const;
1065 void dumpRepresentation(js::GenericPrinter& out, int indent) const;
1066 #endif
1068 // Make a partially-initialized string safe for finalization.
1069 inline void disownCharsBecauseError();
1072 static_assert(sizeof(JSLinearString) == sizeof(JSString),
1073 "string subclasses must be binary-compatible with JSString");
1075 class JSDependentString : public JSLinearString {
1076 friend class JSString;
1077 friend class js::gc::CellAllocator;
1079 JSDependentString(JSLinearString* base, size_t start, size_t length);
1081 // For JIT string allocation.
1082 JSDependentString() = default;
1084 /* Vacuous and therefore unimplemented. */
1085 bool isDependent() const = delete;
1086 JSDependentString& asDependent() const = delete;
1088 /* The offset of this string's chars in base->chars(). */
1089 MOZ_ALWAYS_INLINE size_t baseOffset() const {
1090 MOZ_ASSERT(JSString::isDependent());
1091 JS::AutoCheckCannotGC nogc;
1092 size_t offset;
1093 if (hasTwoByteChars()) {
1094 offset = twoByteChars(nogc) - base()->twoByteChars(nogc);
1095 } else {
1096 offset = latin1Chars(nogc) - base()->latin1Chars(nogc);
1098 MOZ_ASSERT(offset < base()->length());
1099 return offset;
1102 public:
1103 // This will always return a dependent string, and will assert if the chars
1104 // could fit into an inline string.
1105 static inline JSLinearString* new_(JSContext* cx, JSLinearString* base,
1106 size_t start, size_t length,
1107 js::gc::Heap heap);
1109 template <typename T>
1110 void relocateNonInlineChars(T chars, size_t offset) {
1111 setNonInlineChars(chars + offset);
1114 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
1115 void dumpRepresentation(js::GenericPrinter& out, int indent) const;
1116 #endif
1118 private:
1119 // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler
1120 // to call the method below.
1121 friend class js::jit::MacroAssembler;
1123 inline static size_t offsetOfBase() {
1124 return offsetof(JSDependentString, d.s.u3.base);
1128 static_assert(sizeof(JSDependentString) == sizeof(JSString),
1129 "string subclasses must be binary-compatible with JSString");
1131 class JSExtensibleString : public JSLinearString {
1132 /* Vacuous and therefore unimplemented. */
1133 bool isExtensible() const = delete;
1134 JSExtensibleString& asExtensible() const = delete;
1136 public:
1137 MOZ_ALWAYS_INLINE
1138 size_t capacity() const {
1139 MOZ_ASSERT(JSString::isExtensible());
1140 return d.s.u3.capacity;
1143 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
1144 void dumpRepresentation(js::GenericPrinter& out, int indent) const;
1145 #endif
1148 static_assert(sizeof(JSExtensibleString) == sizeof(JSString),
1149 "string subclasses must be binary-compatible with JSString");
1151 class JSInlineString : public JSLinearString {
1152 public:
1153 MOZ_ALWAYS_INLINE
1154 const JS::Latin1Char* latin1Chars(const JS::AutoRequireNoGC& nogc) const {
1155 MOZ_ASSERT(JSString::isInline());
1156 MOZ_ASSERT(hasLatin1Chars());
1157 return d.inlineStorageLatin1;
1160 MOZ_ALWAYS_INLINE
1161 const char16_t* twoByteChars(const JS::AutoRequireNoGC& nogc) const {
1162 MOZ_ASSERT(JSString::isInline());
1163 MOZ_ASSERT(hasTwoByteChars());
1164 return d.inlineStorageTwoByte;
1167 template <typename CharT>
1168 static bool lengthFits(size_t length);
1170 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
1171 void dumpRepresentation(js::GenericPrinter& out, int indent) const;
1172 #endif
1174 private:
1175 // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler
1176 // to call the method below.
1177 friend class js::jit::MacroAssembler;
1178 static size_t offsetOfInlineStorage() {
1179 return offsetof(JSInlineString, d.inlineStorageTwoByte);
1183 static_assert(sizeof(JSInlineString) == sizeof(JSString),
1184 "string subclasses must be binary-compatible with JSString");
1187 * On 32-bit platforms, JSThinInlineString can store 8 Latin1 characters or 4
1188 * TwoByte characters inline. On 64-bit platforms, these numbers are 16 and 8,
1189 * respectively.
1191 class JSThinInlineString : public JSInlineString {
1192 friend class js::gc::CellAllocator;
1194 // The constructors return a mutable pointer to the data, because the first
1195 // thing any creator will do is copy in the string value. This also
1196 // conveniently allows doing overload resolution on CharT.
1197 explicit JSThinInlineString(size_t length, JS::Latin1Char** chars);
1198 explicit JSThinInlineString(size_t length, char16_t** chars);
1200 // For JIT string allocation.
1201 JSThinInlineString() = default;
1203 public:
1204 static constexpr size_t InlineBytes = NUM_INLINE_CHARS_LATIN1;
1206 static const size_t MAX_LENGTH_LATIN1 = NUM_INLINE_CHARS_LATIN1;
1207 static const size_t MAX_LENGTH_TWO_BYTE = NUM_INLINE_CHARS_TWO_BYTE;
1209 template <js::AllowGC allowGC>
1210 static inline JSThinInlineString* new_(JSContext* cx, js::gc::Heap heap);
1212 template <typename CharT>
1213 static bool lengthFits(size_t length);
1216 static_assert(sizeof(JSThinInlineString) == sizeof(JSString),
1217 "string subclasses must be binary-compatible with JSString");
1220 * On both 32-bit and 64-bit platforms, MAX_LENGTH_TWO_BYTE is 12 and
1221 * MAX_LENGTH_LATIN1 is 24. This is deliberate, in order to minimize potential
1222 * performance differences between 32-bit and 64-bit platforms.
1224 * There are still some differences due to NUM_INLINE_CHARS_* being different.
1225 * E.g. TwoByte strings of length 5--8 will be JSFatInlineStrings on 32-bit
1226 * platforms and JSThinInlineStrings on 64-bit platforms. But the more
1227 * significant transition from inline strings to non-inline strings occurs at
1228 * length 12 (for TwoByte strings) and 24 (Latin1 strings) on both 32-bit and
1229 * 64-bit platforms.
1231 class JSFatInlineString : public JSInlineString {
1232 friend class js::gc::CellAllocator;
1234 static const size_t INLINE_EXTENSION_CHARS_LATIN1 =
1235 24 - NUM_INLINE_CHARS_LATIN1;
1236 static const size_t INLINE_EXTENSION_CHARS_TWO_BYTE =
1237 12 - NUM_INLINE_CHARS_TWO_BYTE;
1239 // The constructors return a mutable pointer to the data, because the first
1240 // thing any creator will do is copy in the string value. This also
1241 // conveniently allows doing overload resolution on CharT.
1242 explicit JSFatInlineString(size_t length, JS::Latin1Char** chars);
1243 explicit JSFatInlineString(size_t length, char16_t** chars);
1245 // For JIT string allocation.
1246 JSFatInlineString() = default;
1248 protected: /* to fool clang into not warning this is unused */
1249 union {
1250 char inlineStorageExtensionLatin1[INLINE_EXTENSION_CHARS_LATIN1];
1251 char16_t inlineStorageExtensionTwoByte[INLINE_EXTENSION_CHARS_TWO_BYTE];
1254 public:
1255 template <js::AllowGC allowGC>
1256 static inline JSFatInlineString* new_(JSContext* cx, js::gc::Heap heap);
1258 static const size_t MAX_LENGTH_LATIN1 =
1259 JSString::NUM_INLINE_CHARS_LATIN1 + INLINE_EXTENSION_CHARS_LATIN1;
1261 static const size_t MAX_LENGTH_TWO_BYTE =
1262 JSString::NUM_INLINE_CHARS_TWO_BYTE + INLINE_EXTENSION_CHARS_TWO_BYTE;
1264 template <typename CharT>
1265 static bool lengthFits(size_t length);
1267 // Only called by the GC for strings with the AllocKind::FAT_INLINE_STRING
1268 // kind.
1269 MOZ_ALWAYS_INLINE void finalize(JS::GCContext* gcx);
1272 static_assert(sizeof(JSFatInlineString) % js::gc::CellAlignBytes == 0,
1273 "fat inline strings shouldn't waste space up to the next cell "
1274 "boundary");
1276 class JSExternalString : public JSLinearString {
1277 friend class js::gc::CellAllocator;
1279 JSExternalString(const JS::Latin1Char* chars, size_t length,
1280 const JSExternalStringCallbacks* callbacks);
1281 JSExternalString(const char16_t* chars, size_t length,
1282 const JSExternalStringCallbacks* callbacks);
1284 /* Vacuous and therefore unimplemented. */
1285 bool isExternal() const = delete;
1286 JSExternalString& asExternal() const = delete;
1288 template <typename CharT>
1289 static inline JSExternalString* newImpl(
1290 JSContext* cx, const CharT* chars, size_t length,
1291 const JSExternalStringCallbacks* callbacks);
1293 public:
1294 static inline JSExternalString* new_(
1295 JSContext* cx, const JS::Latin1Char* chars, size_t length,
1296 const JSExternalStringCallbacks* callbacks);
1297 static inline JSExternalString* new_(
1298 JSContext* cx, const char16_t* chars, size_t length,
1299 const JSExternalStringCallbacks* callbacks);
1301 const JSExternalStringCallbacks* callbacks() const {
1302 MOZ_ASSERT(JSString::isExternal());
1303 return d.s.u3.externalCallbacks;
1306 // External chars are never allocated inline or in the nursery, so we can
1307 // safely expose this without requiring an AutoCheckCannotGC argument.
1308 const JS::Latin1Char* latin1Chars() const { return rawLatin1Chars(); }
1309 const char16_t* twoByteChars() const { return rawTwoByteChars(); }
1311 // Only called by the GC for strings with the AllocKind::EXTERNAL_STRING
1312 // kind.
1313 inline void finalize(JS::GCContext* gcx);
1315 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
1316 void dumpRepresentation(js::GenericPrinter& out, int indent) const;
1317 #endif
1320 static_assert(sizeof(JSExternalString) == sizeof(JSString),
1321 "string subclasses must be binary-compatible with JSString");
1323 class JSAtom : public JSLinearString {
1324 /* Vacuous and therefore unimplemented. */
1325 bool isAtom() const = delete;
1326 JSAtom& asAtom() const = delete;
1328 public:
1329 template <typename CharT>
1330 static inline JSAtom* newValidLength(
1331 JSContext* cx, js::UniquePtr<CharT[], JS::FreePolicy> chars,
1332 size_t length, js::HashNumber hash);
1334 /* Returns the PropertyName for this. isIndex() must be false. */
1335 inline js::PropertyName* asPropertyName();
1337 MOZ_ALWAYS_INLINE
1338 bool isPermanent() const { return JSString::isPermanentAtom(); }
1340 MOZ_ALWAYS_INLINE
1341 void makePermanent() {
1342 MOZ_ASSERT(JSString::isAtom());
1343 setFlagBit(PERMANENT_ATOM_MASK);
1346 MOZ_ALWAYS_INLINE bool isIndex() const {
1347 MOZ_ASSERT(JSString::isAtom());
1348 mozilla::DebugOnly<uint32_t> index;
1349 MOZ_ASSERT(!!(flags() & ATOM_IS_INDEX_BIT) == isIndexSlow(&index));
1350 return flags() & ATOM_IS_INDEX_BIT;
1352 MOZ_ALWAYS_INLINE bool isIndex(uint32_t* index) const {
1353 MOZ_ASSERT(JSString::isAtom());
1354 if (!isIndex()) {
1355 return false;
1357 *index = hasIndexValue() ? getIndexValue() : getIndexSlow();
1358 return true;
1361 uint32_t getIndexSlow() const;
1363 void setIsIndex(uint32_t index) {
1364 MOZ_ASSERT(JSString::isAtom());
1365 setFlagBit(ATOM_IS_INDEX_BIT);
1366 maybeInitializeIndexValue(index, /* allowAtom = */ true);
1369 MOZ_ALWAYS_INLINE bool isPinned() const { return flags() & PINNED_ATOM_BIT; }
1371 void setPinned() {
1372 MOZ_ASSERT(!isPinned());
1373 setFlagBit(PINNED_ATOM_BIT);
1376 inline js::HashNumber hash() const;
1377 inline void initHash(js::HashNumber hash);
1379 template <typename CharT>
1380 static bool lengthFitsInline(size_t length);
1382 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
1383 void dump(js::GenericPrinter& out);
1384 void dump();
1385 #endif
1388 namespace js {
1390 class NormalAtom : public JSAtom {
1391 friend class gc::CellAllocator;
1393 protected:
1394 static constexpr size_t ExtensionBytes =
1395 js::gc::CellAlignBytes - sizeof(js::HashNumber);
1397 char inlineStorage_[ExtensionBytes];
1398 HashNumber hash_;
1400 // For subclasses to call.
1401 explicit NormalAtom(js::HashNumber hash) : hash_(hash) {}
1403 // Out of line atoms, mimicking JSLinearString constructors.
1404 NormalAtom(const char16_t* chars, size_t length, js::HashNumber hash);
1405 NormalAtom(const JS::Latin1Char* chars, size_t length, js::HashNumber hash);
1407 public:
1408 HashNumber hash() const { return hash_; }
1409 void initHash(HashNumber hash) { hash_ = hash; }
1411 static constexpr size_t offsetOfHash() { return offsetof(NormalAtom, hash_); }
1414 static_assert(sizeof(NormalAtom) ==
1415 js::RoundUp(sizeof(JSString) + sizeof(js::HashNumber),
1416 js::gc::CellAlignBytes),
1417 "NormalAtom must have size of a string + HashNumber, "
1418 "aligned to gc::CellAlignBytes");
1420 class ThinInlineAtom : public NormalAtom {
1421 friend class gc::CellAllocator;
1423 public:
1424 static constexpr size_t MAX_LENGTH_LATIN1 =
1425 NUM_INLINE_CHARS_LATIN1 + ExtensionBytes / sizeof(JS::Latin1Char);
1426 static constexpr size_t MAX_LENGTH_TWO_BYTE =
1427 NUM_INLINE_CHARS_TWO_BYTE + ExtensionBytes / sizeof(char16_t);
1429 #ifdef JS_64BIT
1430 // Fat and Thin inline atoms are the same size. Only use fat.
1431 static constexpr bool EverInstantiated = false;
1432 #else
1433 static constexpr bool EverInstantiated = true;
1434 #endif
1436 protected:
1437 // Mimicking JSThinInlineString constructors.
1438 #ifdef JS_64BIT
1439 ThinInlineAtom(size_t length, JS::Latin1Char** chars,
1440 js::HashNumber hash) = delete;
1441 ThinInlineAtom(size_t length, char16_t** chars, js::HashNumber hash) = delete;
1442 #else
1443 ThinInlineAtom(size_t length, JS::Latin1Char** chars, js::HashNumber hash);
1444 ThinInlineAtom(size_t length, char16_t** chars, js::HashNumber hash);
1445 #endif
1447 public:
1448 template <typename CharT>
1449 static bool lengthFits(size_t length) {
1450 if constexpr (sizeof(CharT) == sizeof(JS::Latin1Char)) {
1451 return length <= MAX_LENGTH_LATIN1;
1452 } else {
1453 return length <= MAX_LENGTH_TWO_BYTE;
1458 // FatInlineAtom is basically a JSFatInlineString, except it has a hash value in
1459 // the last word that reduces the inline char storage.
1460 class FatInlineAtom : public JSAtom {
1461 friend class gc::CellAllocator;
1463 // The space available for storing inline characters. It's the same amount of
1464 // space as a JSFatInlineString, except we take the hash value out of it.
1465 static constexpr size_t InlineBytes = sizeof(JSFatInlineString) -
1466 sizeof(JSString::Base) -
1467 sizeof(js::HashNumber);
1469 static constexpr size_t ExtensionBytes =
1470 InlineBytes - JSThinInlineString::InlineBytes;
1472 public:
1473 static constexpr size_t MAX_LENGTH_LATIN1 =
1474 InlineBytes / sizeof(JS::Latin1Char);
1475 static constexpr size_t MAX_LENGTH_TWO_BYTE = InlineBytes / sizeof(char16_t);
1477 protected: // Silence Clang unused-field warning.
1478 char inlineStorage_[ExtensionBytes];
1479 HashNumber hash_;
1481 // Mimicking JSFatInlineString constructors.
1482 explicit FatInlineAtom(size_t length, JS::Latin1Char** chars,
1483 js::HashNumber hash);
1484 explicit FatInlineAtom(size_t length, char16_t** chars, js::HashNumber hash);
1486 public:
1487 HashNumber hash() const { return hash_; }
1488 void initHash(HashNumber hash) { hash_ = hash; }
1490 inline void finalize(JS::GCContext* gcx);
1492 static constexpr size_t offsetOfHash() {
1493 static_assert(
1494 sizeof(FatInlineAtom) ==
1495 js::RoundUp(sizeof(JSThinInlineString) +
1496 FatInlineAtom::ExtensionBytes + sizeof(HashNumber),
1497 gc::CellAlignBytes),
1498 "FatInlineAtom must have size of a thin inline string + "
1499 "extension bytes if any + HashNumber, "
1500 "aligned to gc::CellAlignBytes");
1502 return offsetof(FatInlineAtom, hash_);
1505 template <typename CharT>
1506 static bool lengthFits(size_t length) {
1507 return length * sizeof(CharT) <= InlineBytes;
1511 static_assert(sizeof(FatInlineAtom) == sizeof(JSFatInlineString),
1512 "FatInlineAtom must be the same size as a fat inline string");
1514 // When an algorithm does not need a string represented as a single linear
1515 // array of characters, this range utility may be used to traverse the string a
1516 // sequence of linear arrays of characters. This avoids flattening ropes.
1517 template <size_t Size = 16>
1518 class StringSegmentRange {
1519 // If malloc() shows up in any profiles from this vector, we can add a new
1520 // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx.
1521 using StackVector = JS::GCVector<JSString*, Size>;
1522 Rooted<StackVector> stack;
1523 Rooted<JSLinearString*> cur;
1525 bool settle(JSString* str) {
1526 while (str->isRope()) {
1527 JSRope& rope = str->asRope();
1528 if (!stack.append(rope.rightChild())) {
1529 return false;
1531 str = rope.leftChild();
1533 cur = &str->asLinear();
1534 return true;
1537 public:
1538 explicit StringSegmentRange(JSContext* cx)
1539 : stack(cx, StackVector(cx)), cur(cx) {}
1541 [[nodiscard]] bool init(JSString* str) {
1542 MOZ_ASSERT(stack.empty());
1543 return settle(str);
1546 bool empty() const { return cur == nullptr; }
1548 JSLinearString* front() const {
1549 MOZ_ASSERT(!cur->isRope());
1550 return cur;
1553 [[nodiscard]] bool popFront() {
1554 MOZ_ASSERT(!empty());
1555 if (stack.empty()) {
1556 cur = nullptr;
1557 return true;
1559 return settle(stack.popCopy());
1563 } // namespace js
1565 inline js::HashNumber JSAtom::hash() const {
1566 if (isFatInline()) {
1567 return static_cast<const js::FatInlineAtom*>(this)->hash();
1569 return static_cast<const js::NormalAtom*>(this)->hash();
1572 inline void JSAtom::initHash(js::HashNumber hash) {
1573 if (isFatInline()) {
1574 return static_cast<js::FatInlineAtom*>(this)->initHash(hash);
1576 return static_cast<js::NormalAtom*>(this)->initHash(hash);
1579 namespace js {
1582 * Represents an atomized string which does not contain an index (that is, an
1583 * unsigned 32-bit value). Thus for any PropertyName propname,
1584 * ToString(ToUint32(propname)) never equals propname.
1586 * To more concretely illustrate the utility of PropertyName, consider that it
1587 * is used to partition, in a type-safe manner, the ways to refer to a
1588 * property, as follows:
1590 * - uint32_t indexes,
1591 * - PropertyName strings which don't encode uint32_t indexes,
1592 * - Symbol, and
1593 * - JS::PropertyKey::isVoid.
1595 class PropertyName : public JSAtom {
1596 private:
1597 /* Vacuous and therefore unimplemented. */
1598 PropertyName* asPropertyName() = delete;
1601 static_assert(sizeof(PropertyName) == sizeof(JSString),
1602 "string subclasses must be binary-compatible with JSString");
1604 static MOZ_ALWAYS_INLINE jsid NameToId(PropertyName* name) {
1605 return JS::PropertyKey::NonIntAtom(name);
1608 using PropertyNameVector = JS::GCVector<PropertyName*>;
1610 template <typename CharT>
1611 void CopyChars(CharT* dest, const JSLinearString& str);
1613 static inline UniqueChars StringToNewUTF8CharsZ(JSContext* cx, JSString& str) {
1614 JS::AutoCheckCannotGC nogc;
1616 JSLinearString* linear = str.ensureLinear(cx);
1617 if (!linear) {
1618 return nullptr;
1621 return UniqueChars(
1622 linear->hasLatin1Chars()
1623 ? JS::CharsToNewUTF8CharsZ(cx, linear->latin1Range(nogc)).c_str()
1624 : JS::CharsToNewUTF8CharsZ(cx, linear->twoByteRange(nogc)).c_str());
1628 * Allocate a string with the given contents. If |allowGC == CanGC|, this may
1629 * trigger a GC.
1631 template <js::AllowGC allowGC, typename CharT>
1632 extern JSLinearString* NewString(JSContext* cx,
1633 UniquePtr<CharT[], JS::FreePolicy> chars,
1634 size_t length,
1635 js::gc::Heap heap = js::gc::Heap::Default);
1637 /* Like NewString, but doesn't try to deflate to Latin1. */
1638 template <js::AllowGC allowGC, typename CharT>
1639 extern JSLinearString* NewStringDontDeflate(
1640 JSContext* cx, UniquePtr<CharT[], JS::FreePolicy> chars, size_t length,
1641 js::gc::Heap heap = js::gc::Heap::Default);
1643 /* This may return a static string/atom or an inline string. */
1644 extern JSLinearString* NewDependentString(
1645 JSContext* cx, JSString* base, size_t start, size_t length,
1646 js::gc::Heap heap = js::gc::Heap::Default);
1648 /* Take ownership of an array of Latin1Chars. */
1649 extern JSLinearString* NewLatin1StringZ(
1650 JSContext* cx, UniqueChars chars,
1651 js::gc::Heap heap = js::gc::Heap::Default);
1653 /* Copy a counted string and GC-allocate a descriptor for it. */
1654 template <js::AllowGC allowGC, typename CharT>
1655 extern JSLinearString* NewStringCopyN(
1656 JSContext* cx, const CharT* s, size_t n,
1657 js::gc::Heap heap = js::gc::Heap::Default);
1659 template <js::AllowGC allowGC>
1660 inline JSLinearString* NewStringCopyN(
1661 JSContext* cx, const char* s, size_t n,
1662 js::gc::Heap heap = js::gc::Heap::Default) {
1663 return NewStringCopyN<allowGC>(cx, reinterpret_cast<const Latin1Char*>(s), n,
1664 heap);
1667 template <typename CharT>
1668 extern JSAtom* NewAtomCopyNMaybeDeflateValidLength(JSContext* cx,
1669 const CharT* s, size_t n,
1670 js::HashNumber hash);
1672 template <typename CharT>
1673 extern JSAtom* NewAtomCopyNDontDeflateValidLength(JSContext* cx, const CharT* s,
1674 size_t n,
1675 js::HashNumber hash);
1677 /* Copy a counted string and GC-allocate a descriptor for it. */
1678 template <js::AllowGC allowGC, typename CharT>
1679 inline JSLinearString* NewStringCopy(
1680 JSContext* cx, mozilla::Span<const CharT> s,
1681 js::gc::Heap heap = js::gc::Heap::Default) {
1682 return NewStringCopyN<allowGC>(cx, s.data(), s.size(), heap);
1685 /* Copy a counted string and GC-allocate a descriptor for it. */
1686 template <js::AllowGC allowGC, typename CharT>
1687 inline JSLinearString* NewStringCopy(
1688 JSContext* cx, std::basic_string_view<CharT> s,
1689 js::gc::Heap heap = js::gc::Heap::Default) {
1690 return NewStringCopyN<allowGC>(cx, s.data(), s.size(), heap);
1693 /* Like NewStringCopyN, but doesn't try to deflate to Latin1. */
1694 template <js::AllowGC allowGC, typename CharT>
1695 extern JSLinearString* NewStringCopyNDontDeflate(
1696 JSContext* cx, const CharT* s, size_t n,
1697 js::gc::Heap heap = js::gc::Heap::Default);
1699 template <js::AllowGC allowGC, typename CharT>
1700 extern JSLinearString* NewStringCopyNDontDeflateNonStaticValidLength(
1701 JSContext* cx, const CharT* s, size_t n,
1702 js::gc::Heap heap = js::gc::Heap::Default);
1704 /* Copy a C string and GC-allocate a descriptor for it. */
1705 template <js::AllowGC allowGC>
1706 inline JSLinearString* NewStringCopyZ(
1707 JSContext* cx, const char16_t* s,
1708 js::gc::Heap heap = js::gc::Heap::Default) {
1709 return NewStringCopyN<allowGC>(cx, s, js_strlen(s), heap);
1712 template <js::AllowGC allowGC>
1713 inline JSLinearString* NewStringCopyZ(
1714 JSContext* cx, const char* s, js::gc::Heap heap = js::gc::Heap::Default) {
1715 return NewStringCopyN<allowGC>(cx, s, strlen(s), heap);
1718 extern JSLinearString* NewStringCopyUTF8N(
1719 JSContext* cx, const JS::UTF8Chars& utf8, JS::SmallestEncoding encoding,
1720 js::gc::Heap heap = js::gc::Heap::Default);
1722 extern JSLinearString* NewStringCopyUTF8N(
1723 JSContext* cx, const JS::UTF8Chars& utf8,
1724 js::gc::Heap heap = js::gc::Heap::Default);
1726 inline JSLinearString* NewStringCopyUTF8Z(
1727 JSContext* cx, const JS::ConstUTF8CharsZ utf8,
1728 js::gc::Heap heap = js::gc::Heap::Default) {
1729 return NewStringCopyUTF8N(
1730 cx, JS::UTF8Chars(utf8.c_str(), strlen(utf8.c_str())), heap);
1733 template <typename CharT>
1734 JSString* NewMaybeExternalString(JSContext* cx, const CharT* s, size_t n,
1735 const JSExternalStringCallbacks* callbacks,
1736 bool* allocatedExternal,
1737 js::gc::Heap heap = js::gc::Heap::Default);
1739 static_assert(sizeof(HashNumber) == 4);
1741 template <AllowGC allowGC>
1742 extern JSString* ConcatStrings(
1743 JSContext* cx, typename MaybeRooted<JSString*, allowGC>::HandleType left,
1744 typename MaybeRooted<JSString*, allowGC>::HandleType right,
1745 js::gc::Heap heap = js::gc::Heap::Default);
1748 * Test if strings are equal. The caller can call the function even if str1
1749 * or str2 are not GC-allocated things.
1751 extern bool EqualStrings(JSContext* cx, JSString* str1, JSString* str2,
1752 bool* result);
1754 /* Use the infallible method instead! */
1755 extern bool EqualStrings(JSContext* cx, JSLinearString* str1,
1756 JSLinearString* str2, bool* result) = delete;
1758 /* EqualStrings is infallible on linear strings. */
1759 extern bool EqualStrings(const JSLinearString* str1,
1760 const JSLinearString* str2);
1763 * Compare two strings that are known to be the same length.
1764 * Exposed for the JITs; for ordinary uses, EqualStrings() is more sensible.
1766 * The caller must have checked for the following cases that can be handled
1767 * efficiently without requiring a character comparison:
1768 * - str1 == str2
1769 * - str1->length() != str2->length()
1770 * - str1->isAtom() && str2->isAtom()
1772 extern bool EqualChars(const JSLinearString* str1, const JSLinearString* str2);
1775 * Return less than, equal to, or greater than zero depending on whether
1776 * `s1[0..len1]` is less than, equal to, or greater than `s2`.
1778 extern int32_t CompareChars(const char16_t* s1, size_t len1,
1779 JSLinearString* s2);
1782 * Compare two strings, like CompareChars, but store the result in `*result`.
1783 * This flattens the strings and therefore can fail.
1785 extern bool CompareStrings(JSContext* cx, JSString* str1, JSString* str2,
1786 int32_t* result);
1789 * Compare two strings, like CompareChars.
1791 extern int32_t CompareStrings(const JSLinearString* str1,
1792 const JSLinearString* str2);
1795 * Return true if the string contains only ASCII characters.
1797 extern bool StringIsAscii(JSLinearString* str);
1800 * Return true if the string matches the given sequence of ASCII bytes.
1802 extern bool StringEqualsAscii(JSLinearString* str, const char* asciiBytes);
1804 * Return true if the string matches the given sequence of ASCII
1805 * bytes. The sequence of ASCII bytes must have length "length". The
1806 * length should not include the trailing null, if any.
1808 extern bool StringEqualsAscii(JSLinearString* str, const char* asciiBytes,
1809 size_t length);
1811 template <size_t N>
1812 bool StringEqualsLiteral(JSLinearString* str, const char (&asciiBytes)[N]) {
1813 MOZ_ASSERT(asciiBytes[N - 1] == '\0');
1814 return StringEqualsAscii(str, asciiBytes, N - 1);
1817 extern int StringFindPattern(JSLinearString* text, JSLinearString* pat,
1818 size_t start);
1821 * Return true if the string contains a pattern at |start|.
1823 * Precondition: `text` is long enough that this might be true;
1824 * that is, it has at least `start + pat->length()` characters.
1826 extern bool HasSubstringAt(JSLinearString* text, JSLinearString* pat,
1827 size_t start);
1830 * Computes |str|'s substring for the range [beginInt, beginInt + lengthInt).
1831 * Negative, overlarge, swapped, etc. |beginInt| and |lengthInt| are forbidden
1832 * and constitute API misuse.
1834 JSString* SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt,
1835 int32_t lengthInt);
1837 inline js::HashNumber HashStringChars(JSLinearString* str) {
1838 JS::AutoCheckCannotGC nogc;
1839 size_t len = str->length();
1840 return str->hasLatin1Chars()
1841 ? mozilla::HashString(str->latin1Chars(nogc), len)
1842 : mozilla::HashString(str->twoByteChars(nogc), len);
1845 /*** Conversions ************************************************************/
1848 * Convert a string to a printable C string.
1850 * Asserts if the input contains any non-ASCII characters.
1852 UniqueChars EncodeAscii(JSContext* cx, JSString* str);
1855 * Convert a string to a printable C string.
1857 UniqueChars EncodeLatin1(JSContext* cx, JSString* str);
1859 enum class IdToPrintableBehavior : bool {
1861 * Request the printable representation of an identifier.
1863 IdIsIdentifier,
1866 * Request the printable representation of a property key.
1868 IdIsPropertyKey
1872 * Convert a jsid to a printable C string encoded in UTF-8.
1874 extern UniqueChars IdToPrintableUTF8(JSContext* cx, HandleId id,
1875 IdToPrintableBehavior behavior);
1878 * Convert a non-string value to a string, returning null after reporting an
1879 * error, otherwise returning a new string reference.
1881 template <AllowGC allowGC>
1882 extern JSString* ToStringSlow(
1883 JSContext* cx, typename MaybeRooted<Value, allowGC>::HandleType arg);
1886 * Convert the given value to a string. This method includes an inline
1887 * fast-path for the case where the value is already a string; if the value is
1888 * known not to be a string, use ToStringSlow instead.
1890 template <AllowGC allowGC>
1891 static MOZ_ALWAYS_INLINE JSString* ToString(JSContext* cx, JS::HandleValue v) {
1892 if (v.isString()) {
1893 return v.toString();
1895 return ToStringSlow<allowGC>(cx, v);
1899 * This function implements E-262-3 section 9.8, toString. Convert the given
1900 * value to a string of characters appended to the given buffer. On error, the
1901 * passed buffer may have partial results appended.
1903 inline bool ValueToStringBuffer(JSContext* cx, const Value& v,
1904 StringBuffer& sb);
1906 } /* namespace js */
1908 MOZ_ALWAYS_INLINE bool JSString::getChar(JSContext* cx, size_t index,
1909 char16_t* code) {
1910 MOZ_ASSERT(index < length());
1913 * Optimization for one level deep ropes.
1914 * This is common for the following pattern:
1916 * while() {
1917 * text = text.substr(0, x) + "bla" + text.substr(x)
1918 * test.charCodeAt(x + 1)
1921 * Note: keep this in sync with MacroAssembler::loadStringChar and
1922 * CanAttachStringChar.
1924 JSString* str;
1925 if (isRope()) {
1926 JSRope* rope = &asRope();
1927 if (uint32_t(index) < rope->leftChild()->length()) {
1928 str = rope->leftChild();
1929 } else {
1930 str = rope->rightChild();
1931 index -= rope->leftChild()->length();
1933 } else {
1934 str = this;
1937 if (!str->ensureLinear(cx)) {
1938 return false;
1941 *code = str->asLinear().latin1OrTwoByteChar(index);
1942 return true;
1945 MOZ_ALWAYS_INLINE bool JSString::getCodePoint(JSContext* cx, size_t index,
1946 char32_t* code) {
1947 // C++ implementation of https://tc39.es/ecma262/#sec-codepointat
1948 size_t size = length();
1949 MOZ_ASSERT(index < size);
1951 char16_t first;
1952 if (!getChar(cx, index, &first)) {
1953 return false;
1955 if (!js::unicode::IsLeadSurrogate(first) || index + 1 == size) {
1956 *code = first;
1957 return true;
1960 char16_t second;
1961 if (!getChar(cx, index + 1, &second)) {
1962 return false;
1964 if (!js::unicode::IsTrailSurrogate(second)) {
1965 *code = first;
1966 return true;
1969 *code = js::unicode::UTF16Decode(first, second);
1970 return true;
1973 MOZ_ALWAYS_INLINE JSLinearString* JSString::ensureLinear(JSContext* cx) {
1974 return isLinear() ? &asLinear() : asRope().flatten(cx);
1977 inline JSLinearString* JSString::base() const {
1978 MOZ_ASSERT(hasBase());
1979 MOZ_ASSERT(!d.s.u3.base->isInline());
1980 return d.s.u3.base;
1983 inline JSLinearString* JSString::nurseryBaseOrRelocOverlay() const {
1984 MOZ_ASSERT(hasBase());
1985 return d.s.u3.base;
1988 inline bool JSString::canOwnDependentChars() const {
1989 // A string that could own the malloced chars used by another (dependent)
1990 // string. It will not have a base and must be linear and non-inline.
1991 return isLinear() && !isInline() && !hasBase();
1994 inline void JSString::setBase(JSLinearString* newBase) {
1995 MOZ_ASSERT(hasBase());
1996 MOZ_ASSERT(!newBase->isInline());
1997 d.s.u3.base = newBase;
2000 template <>
2001 MOZ_ALWAYS_INLINE const char16_t* JSLinearString::nonInlineChars(
2002 const JS::AutoRequireNoGC& nogc) const {
2003 return nonInlineTwoByteChars(nogc);
2006 template <>
2007 MOZ_ALWAYS_INLINE const JS::Latin1Char* JSLinearString::nonInlineChars(
2008 const JS::AutoRequireNoGC& nogc) const {
2009 return nonInlineLatin1Chars(nogc);
2012 template <>
2013 MOZ_ALWAYS_INLINE const char16_t* JSLinearString::chars(
2014 const JS::AutoRequireNoGC& nogc) const {
2015 return rawTwoByteChars();
2018 template <>
2019 MOZ_ALWAYS_INLINE const JS::Latin1Char* JSLinearString::chars(
2020 const JS::AutoRequireNoGC& nogc) const {
2021 return rawLatin1Chars();
2024 template <>
2025 MOZ_ALWAYS_INLINE js::UniquePtr<JS::Latin1Char[], JS::FreePolicy>
2026 JSRope::copyChars<JS::Latin1Char>(JSContext* maybecx,
2027 arena_id_t destArenaId) const {
2028 return copyLatin1Chars(maybecx, destArenaId);
2031 template <>
2032 MOZ_ALWAYS_INLINE JS::UniqueTwoByteChars JSRope::copyChars<char16_t>(
2033 JSContext* maybecx, arena_id_t destArenaId) const {
2034 return copyTwoByteChars(maybecx, destArenaId);
2037 template <>
2038 MOZ_ALWAYS_INLINE bool JSThinInlineString::lengthFits<JS::Latin1Char>(
2039 size_t length) {
2040 return length <= MAX_LENGTH_LATIN1;
2043 template <>
2044 MOZ_ALWAYS_INLINE bool JSThinInlineString::lengthFits<char16_t>(size_t length) {
2045 return length <= MAX_LENGTH_TWO_BYTE;
2048 template <>
2049 MOZ_ALWAYS_INLINE bool JSFatInlineString::lengthFits<JS::Latin1Char>(
2050 size_t length) {
2051 static_assert(
2052 (INLINE_EXTENSION_CHARS_LATIN1 * sizeof(char)) % js::gc::CellAlignBytes ==
2054 "fat inline strings' Latin1 characters don't exactly "
2055 "fill subsequent cells and thus are wasteful");
2056 static_assert(MAX_LENGTH_LATIN1 ==
2057 (sizeof(JSFatInlineString) -
2058 offsetof(JSFatInlineString, d.inlineStorageLatin1)) /
2059 sizeof(char),
2060 "MAX_LENGTH_LATIN1 must be one less than inline Latin1 "
2061 "storage count");
2063 return length <= MAX_LENGTH_LATIN1;
2066 template <>
2067 MOZ_ALWAYS_INLINE bool JSFatInlineString::lengthFits<char16_t>(size_t length) {
2068 static_assert((INLINE_EXTENSION_CHARS_TWO_BYTE * sizeof(char16_t)) %
2069 js::gc::CellAlignBytes ==
2071 "fat inline strings' char16_t characters don't exactly "
2072 "fill subsequent cells and thus are wasteful");
2073 static_assert(MAX_LENGTH_TWO_BYTE ==
2074 (sizeof(JSFatInlineString) -
2075 offsetof(JSFatInlineString, d.inlineStorageTwoByte)) /
2076 sizeof(char16_t),
2077 "MAX_LENGTH_TWO_BYTE must be one less than inline "
2078 "char16_t storage count");
2080 return length <= MAX_LENGTH_TWO_BYTE;
2083 template <>
2084 MOZ_ALWAYS_INLINE bool JSInlineString::lengthFits<JS::Latin1Char>(
2085 size_t length) {
2086 // If it fits in a fat inline string, it fits in any inline string.
2087 return JSFatInlineString::lengthFits<JS::Latin1Char>(length);
2090 template <>
2091 MOZ_ALWAYS_INLINE bool JSInlineString::lengthFits<char16_t>(size_t length) {
2092 // If it fits in a fat inline string, it fits in any inline string.
2093 return JSFatInlineString::lengthFits<char16_t>(length);
2096 template <>
2097 MOZ_ALWAYS_INLINE bool js::ThinInlineAtom::lengthFits<JS::Latin1Char>(
2098 size_t length) {
2099 return length <= MAX_LENGTH_LATIN1;
2102 template <>
2103 MOZ_ALWAYS_INLINE bool js::ThinInlineAtom::lengthFits<char16_t>(size_t length) {
2104 return length <= MAX_LENGTH_TWO_BYTE;
2107 template <>
2108 MOZ_ALWAYS_INLINE bool js::FatInlineAtom::lengthFits<JS::Latin1Char>(
2109 size_t length) {
2110 return length <= MAX_LENGTH_LATIN1;
2113 template <>
2114 MOZ_ALWAYS_INLINE bool js::FatInlineAtom::lengthFits<char16_t>(size_t length) {
2115 return length <= MAX_LENGTH_TWO_BYTE;
2118 template <>
2119 MOZ_ALWAYS_INLINE bool JSAtom::lengthFitsInline<JS::Latin1Char>(size_t length) {
2120 // If it fits in a fat inline atom, it fits in any inline atom.
2121 return js::FatInlineAtom::lengthFits<JS::Latin1Char>(length);
2124 template <>
2125 MOZ_ALWAYS_INLINE bool JSAtom::lengthFitsInline<char16_t>(size_t length) {
2126 // If it fits in a fat inline atom, it fits in any inline atom.
2127 return js::FatInlineAtom::lengthFits<char16_t>(length);
2130 template <>
2131 MOZ_ALWAYS_INLINE void JSString::setNonInlineChars(const char16_t* chars) {
2132 // Check that the new buffer is located in the StringBufferArena
2133 checkStringCharsArena(chars);
2134 d.s.u2.nonInlineCharsTwoByte = chars;
2137 template <>
2138 MOZ_ALWAYS_INLINE void JSString::setNonInlineChars(
2139 const JS::Latin1Char* chars) {
2140 // Check that the new buffer is located in the StringBufferArena
2141 checkStringCharsArena(chars);
2142 d.s.u2.nonInlineCharsLatin1 = chars;
2145 MOZ_ALWAYS_INLINE const JS::Latin1Char* JSLinearString::rawLatin1Chars() const {
2146 MOZ_ASSERT(JSString::isLinear());
2147 MOZ_ASSERT(hasLatin1Chars());
2148 return isInline() ? d.inlineStorageLatin1 : d.s.u2.nonInlineCharsLatin1;
2151 MOZ_ALWAYS_INLINE const char16_t* JSLinearString::rawTwoByteChars() const {
2152 MOZ_ASSERT(JSString::isLinear());
2153 MOZ_ASSERT(hasTwoByteChars());
2154 return isInline() ? d.inlineStorageTwoByte : d.s.u2.nonInlineCharsTwoByte;
2157 inline js::PropertyName* JSAtom::asPropertyName() {
2158 MOZ_ASSERT(!isIndex());
2159 return static_cast<js::PropertyName*>(this);
2162 inline bool JSLinearString::isIndex(uint32_t* indexp) const {
2163 MOZ_ASSERT(JSString::isLinear());
2165 if (isAtom()) {
2166 return asAtom().isIndex(indexp);
2169 if (JSString::hasIndexValue()) {
2170 *indexp = getIndexValue();
2171 return true;
2174 return isIndexSlow(indexp);
2177 namespace js {
2178 namespace gc {
2179 template <>
2180 inline JSString* Cell::as<JSString>() {
2181 MOZ_ASSERT(is<JSString>());
2182 return reinterpret_cast<JSString*>(this);
2185 template <>
2186 inline JSString* TenuredCell::as<JSString>() {
2187 MOZ_ASSERT(is<JSString>());
2188 return reinterpret_cast<JSString*>(this);
2191 // StringRelocationOverlay assists with updating the string chars
2192 // pointers of dependent strings when their base strings are
2193 // deduplicated. It stores:
2194 // - nursery chars of a root base (root base is a non-dependent base), or
2195 // - nursery base of a dependent string
2196 // StringRelocationOverlay exploits the fact that the 3rd word of a JSString's
2197 // RelocationOverlay is not utilized and can be used to store extra information.
2198 class StringRelocationOverlay : public RelocationOverlay {
2199 union {
2200 // nursery chars of a root base
2201 const JS::Latin1Char* nurseryCharsLatin1;
2202 const char16_t* nurseryCharsTwoByte;
2204 // The nursery base can be forwarded, which becomes a string relocation
2205 // overlay, or it is not yet forwarded and is simply the base.
2206 JSLinearString* nurseryBaseOrRelocOverlay;
2209 public:
2210 explicit StringRelocationOverlay(Cell* dst) : RelocationOverlay(dst) {
2211 static_assert(sizeof(JSString) >= sizeof(StringRelocationOverlay));
2214 static const StringRelocationOverlay* fromCell(const Cell* cell) {
2215 return static_cast<const StringRelocationOverlay*>(cell);
2218 static StringRelocationOverlay* fromCell(Cell* cell) {
2219 return static_cast<StringRelocationOverlay*>(cell);
2222 void setNext(StringRelocationOverlay* next) {
2223 MOZ_ASSERT(isForwarded());
2224 next_ = next;
2227 StringRelocationOverlay* next() const {
2228 MOZ_ASSERT(isForwarded());
2229 return (StringRelocationOverlay*)next_;
2232 template <typename CharT>
2233 MOZ_ALWAYS_INLINE const CharT* savedNurseryChars() const;
2235 const MOZ_ALWAYS_INLINE JS::Latin1Char* savedNurseryCharsLatin1() const {
2236 return nurseryCharsLatin1;
2239 const MOZ_ALWAYS_INLINE char16_t* savedNurseryCharsTwoByte() const {
2240 return nurseryCharsTwoByte;
2243 JSLinearString* savedNurseryBaseOrRelocOverlay() const {
2244 return nurseryBaseOrRelocOverlay;
2247 // Transform a nursery string to a StringRelocationOverlay that is forwarded
2248 // to a tenured string.
2249 inline static StringRelocationOverlay* forwardCell(JSString* src, Cell* dst) {
2250 MOZ_ASSERT(!src->isForwarded());
2251 MOZ_ASSERT(!dst->isForwarded());
2253 JS::AutoCheckCannotGC nogc;
2254 StringRelocationOverlay* overlay;
2256 // Initialize the overlay, and remember the nursery base string if there is
2257 // one, or nursery non-inlined chars if it can be the root base of other
2258 // strings.
2260 // The non-inlined chars of a tenured dependent string should point to the
2261 // tenured root base's one with an offset. For example, a dependent string
2262 // may start from the 3rd char of its root base. During tenuring, offsets
2263 // of dependent strings can be computed from the nursery non-inlined chars
2264 // remembered in overlays.
2265 if (src->hasBase()) {
2266 auto nurseryBaseOrRelocOverlay = src->nurseryBaseOrRelocOverlay();
2267 overlay = new (src) StringRelocationOverlay(dst);
2268 overlay->nurseryBaseOrRelocOverlay = nurseryBaseOrRelocOverlay;
2269 } else if (src->canOwnDependentChars()) {
2270 if (src->hasTwoByteChars()) {
2271 auto nurseryCharsTwoByte = src->asLinear().twoByteChars(nogc);
2272 overlay = new (src) StringRelocationOverlay(dst);
2273 overlay->nurseryCharsTwoByte = nurseryCharsTwoByte;
2274 } else {
2275 auto nurseryCharsLatin1 = src->asLinear().latin1Chars(nogc);
2276 overlay = new (src) StringRelocationOverlay(dst);
2277 overlay->nurseryCharsLatin1 = nurseryCharsLatin1;
2279 } else {
2280 overlay = new (src) StringRelocationOverlay(dst);
2283 return overlay;
2287 template <>
2288 MOZ_ALWAYS_INLINE const JS::Latin1Char*
2289 StringRelocationOverlay::savedNurseryChars() const {
2290 return savedNurseryCharsLatin1();
2293 template <>
2294 MOZ_ALWAYS_INLINE const char16_t* StringRelocationOverlay::savedNurseryChars()
2295 const {
2296 return savedNurseryCharsTwoByte();
2299 } // namespace gc
2300 } // namespace js
2302 #endif /* vm_StringType_h */