Bug 1848090 - When parsing JSON, Stop allocating in the nursery after two nursery...
[gecko.git] / js / src / util / StringBuffer.h
blob23c1ff61ae736c21e01ecdb6bd5833f1ce2eab32
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #ifndef util_StringBuffer_h
8 #define util_StringBuffer_h
10 #include "mozilla/CheckedInt.h"
11 #include "mozilla/MaybeOneOf.h"
12 #include "mozilla/Utf8.h"
14 #include "frontend/FrontendContext.h"
15 #include "js/Vector.h"
16 #include "vm/StringType.h"
18 namespace js {
20 class FrontendContext;
22 namespace frontend {
23 class ParserAtomsTable;
24 class TaggedParserAtomIndex;
25 } // namespace frontend
27 namespace detail {
29 // GrowEltsAggressively will multiply the space by a factor of 8 on overflow, to
30 // avoid very expensive memcpys for large strings (eg giant toJSON output for
31 // sessionstore.js). Drop back to the normal expansion policy once the buffer
32 // hits 128MB.
33 static constexpr size_t AggressiveLimit = 128 << 20;
35 template <size_t EltSize>
36 inline size_t GrowEltsAggressively(size_t aOldElts, size_t aIncr) {
37 mozilla::CheckedInt<size_t> required =
38 mozilla::CheckedInt<size_t>(aOldElts) + aIncr;
39 if (!(required * 2).isValid()) {
40 return 0;
42 required = mozilla::RoundUpPow2(required.value());
43 required *= 8;
44 if (!(required * EltSize).isValid() || required.value() > AggressiveLimit) {
45 // Fall back to doubling behavior if the aggressive growth fails or gets too
46 // big.
47 return mozilla::detail::GrowEltsByDoubling<EltSize>(aOldElts, aIncr);
49 return required.value();
52 } // namespace detail
54 class StringBufferAllocPolicy {
55 TempAllocPolicy impl_;
56 const arena_id_t& arenaId_;
58 public:
59 StringBufferAllocPolicy(FrontendContext* fc, const arena_id_t& arenaId)
60 : impl_(fc), arenaId_(arenaId) {}
62 StringBufferAllocPolicy(JSContext* cx, const arena_id_t& arenaId)
63 : impl_(cx), arenaId_(arenaId) {}
65 template <typename T>
66 T* maybe_pod_malloc(size_t numElems) {
67 return impl_.maybe_pod_arena_malloc<T>(arenaId_, numElems);
69 template <typename T>
70 T* maybe_pod_calloc(size_t numElems) {
71 return impl_.maybe_pod_arena_calloc<T>(arenaId_, numElems);
73 template <typename T>
74 T* maybe_pod_realloc(T* p, size_t oldSize, size_t newSize) {
75 return impl_.maybe_pod_arena_realloc<T>(arenaId_, p, oldSize, newSize);
77 template <typename T>
78 T* pod_malloc(size_t numElems) {
79 return impl_.pod_arena_malloc<T>(arenaId_, numElems);
81 template <typename T>
82 T* pod_calloc(size_t numElems) {
83 return impl_.pod_arena_calloc<T>(arenaId_, numElems);
85 template <typename T>
86 T* pod_realloc(T* p, size_t oldSize, size_t newSize) {
87 return impl_.pod_arena_realloc<T>(arenaId_, p, oldSize, newSize);
89 template <typename T>
90 void free_(T* p, size_t numElems = 0) {
91 impl_.free_(p, numElems);
93 void reportAllocOverflow() const { impl_.reportAllocOverflow(); }
94 bool checkSimulatedOOM() const { return impl_.checkSimulatedOOM(); }
96 // See ComputeGrowth in mfbt/Vector.h.
97 template <size_t EltSize>
98 static size_t computeGrowth(size_t aOldElts, size_t aIncr) {
99 return detail::GrowEltsAggressively<EltSize>(aOldElts, aIncr);
104 * String builder that eagerly checks for over-allocation past the maximum
105 * string length.
107 * Any operation which would exceed the maximum string length causes an
108 * exception report on the context and results in a failed return value.
110 * Well-sized extractions (which waste no more than 1/4 of their char
111 * buffer space) are guaranteed for strings built by this interface.
112 * See |extractWellSized|.
114 class StringBuffer {
115 protected:
116 template <typename CharT>
117 using BufferType = Vector<CharT, 64 / sizeof(CharT), StringBufferAllocPolicy>;
120 * The Vector's buffer may be either stolen or copied, so we need to use
121 * TempAllocPolicy and account for the memory manually when stealing.
123 using Latin1CharBuffer = BufferType<Latin1Char>;
124 using TwoByteCharBuffer = BufferType<char16_t>;
126 JSContext* maybeCx_ = nullptr;
129 * If Latin1 strings are enabled, cb starts out as a Latin1CharBuffer. When
130 * a TwoByte char is appended, inflateChars() constructs a TwoByteCharBuffer
131 * and copies the Latin1 chars.
133 mozilla::MaybeOneOf<Latin1CharBuffer, TwoByteCharBuffer> cb;
135 /* Number of reserve()'d chars, see inflateChars. */
136 size_t reserved_ = 0;
138 StringBuffer(const StringBuffer& other) = delete;
139 void operator=(const StringBuffer& other) = delete;
141 template <typename CharT>
142 MOZ_ALWAYS_INLINE bool isCharType() const {
143 return cb.constructed<BufferType<CharT>>();
146 MOZ_ALWAYS_INLINE bool isLatin1() const { return isCharType<Latin1Char>(); }
148 MOZ_ALWAYS_INLINE bool isTwoByte() const { return isCharType<char16_t>(); }
150 template <typename CharT>
151 MOZ_ALWAYS_INLINE BufferType<CharT>& chars() {
152 MOZ_ASSERT(isCharType<CharT>());
153 return cb.ref<BufferType<CharT>>();
156 template <typename CharT>
157 MOZ_ALWAYS_INLINE const BufferType<CharT>& chars() const {
158 MOZ_ASSERT(isCharType<CharT>());
159 return cb.ref<BufferType<CharT>>();
162 MOZ_ALWAYS_INLINE TwoByteCharBuffer& twoByteChars() {
163 return chars<char16_t>();
166 MOZ_ALWAYS_INLINE const TwoByteCharBuffer& twoByteChars() const {
167 return chars<char16_t>();
170 MOZ_ALWAYS_INLINE Latin1CharBuffer& latin1Chars() {
171 return chars<Latin1Char>();
174 MOZ_ALWAYS_INLINE const Latin1CharBuffer& latin1Chars() const {
175 return chars<Latin1Char>();
178 [[nodiscard]] bool inflateChars();
180 template <typename CharT>
181 JSLinearString* finishStringInternal(JSContext* cx, gc::Heap heap);
183 public:
184 explicit StringBuffer(JSContext* cx,
185 const arena_id_t& arenaId = js::MallocArena)
186 : maybeCx_(cx) {
187 MOZ_ASSERT(cx);
188 cb.construct<Latin1CharBuffer>(StringBufferAllocPolicy{cx, arenaId});
191 // This constructor should only be used if the methods related to the
192 // following are not used, because they require a JSContext:
193 // * JSString
194 // * JSAtom
195 // * mozilla::Utf8Unit
196 explicit StringBuffer(FrontendContext* fc,
197 const arena_id_t& arenaId = js::MallocArena) {
198 MOZ_ASSERT(fc);
199 cb.construct<Latin1CharBuffer>(StringBufferAllocPolicy{fc, arenaId});
202 void clear() {
203 if (isLatin1()) {
204 latin1Chars().clear();
205 } else {
206 twoByteChars().clear();
209 [[nodiscard]] bool reserve(size_t len) {
210 if (len > reserved_) {
211 reserved_ = len;
213 return isLatin1() ? latin1Chars().reserve(len)
214 : twoByteChars().reserve(len);
216 [[nodiscard]] bool resize(size_t len) {
217 return isLatin1() ? latin1Chars().resize(len) : twoByteChars().resize(len);
219 [[nodiscard]] bool growByUninitialized(size_t incr) {
220 return isLatin1() ? latin1Chars().growByUninitialized(incr)
221 : twoByteChars().growByUninitialized(incr);
223 void shrinkTo(size_t newLength) {
224 return isLatin1() ? latin1Chars().shrinkTo(newLength)
225 : twoByteChars().shrinkTo(newLength);
227 bool empty() const {
228 return isLatin1() ? latin1Chars().empty() : twoByteChars().empty();
230 size_t length() const {
231 return isLatin1() ? latin1Chars().length() : twoByteChars().length();
233 char16_t getChar(size_t idx) const {
234 return isLatin1() ? latin1Chars()[idx] : twoByteChars()[idx];
237 [[nodiscard]] bool ensureTwoByteChars() {
238 return isTwoByte() || inflateChars();
241 [[nodiscard]] bool append(const char16_t c) {
242 if (isLatin1()) {
243 if (c <= JSString::MAX_LATIN1_CHAR) {
244 return latin1Chars().append(Latin1Char(c));
246 if (!inflateChars()) {
247 return false;
250 return twoByteChars().append(c);
252 [[nodiscard]] bool append(Latin1Char c) {
253 return isLatin1() ? latin1Chars().append(c) : twoByteChars().append(c);
255 [[nodiscard]] bool append(char c) { return append(Latin1Char(c)); }
257 [[nodiscard]] inline bool append(const char16_t* begin, const char16_t* end);
259 [[nodiscard]] bool append(const char16_t* chars, size_t len) {
260 return append(chars, chars + len);
263 [[nodiscard]] bool append(const Latin1Char* begin, const Latin1Char* end) {
264 return isLatin1() ? latin1Chars().append(begin, end)
265 : twoByteChars().append(begin, end);
267 [[nodiscard]] bool append(const Latin1Char* chars, size_t len) {
268 return append(chars, chars + len);
272 * Interpret the provided count of UTF-8 code units as UTF-8, and append
273 * the represented code points to this. If the code units contain invalid
274 * UTF-8, leave the internal buffer in a consistent but unspecified state,
275 * report an error, and return false.
277 [[nodiscard]] bool append(const mozilla::Utf8Unit* units, size_t len);
279 [[nodiscard]] bool append(const JS::ConstCharPtr chars, size_t len) {
280 return append(chars.get(), chars.get() + len);
282 [[nodiscard]] bool appendN(Latin1Char c, size_t n) {
283 return isLatin1() ? latin1Chars().appendN(c, n)
284 : twoByteChars().appendN(c, n);
287 [[nodiscard]] inline bool append(JSString* str);
288 [[nodiscard]] inline bool append(JSLinearString* str);
289 [[nodiscard]] inline bool appendSubstring(JSString* base, size_t off,
290 size_t len);
291 [[nodiscard]] inline bool appendSubstring(JSLinearString* base, size_t off,
292 size_t len);
293 [[nodiscard]] bool append(const frontend::ParserAtomsTable& parserAtoms,
294 frontend::TaggedParserAtomIndex atom);
296 [[nodiscard]] bool append(const char* chars, size_t len) {
297 return append(reinterpret_cast<const Latin1Char*>(chars), len);
300 template <size_t ArrayLength>
301 [[nodiscard]] bool append(const char (&array)[ArrayLength]) {
302 return append(array, ArrayLength - 1); /* No trailing '\0'. */
305 /* Infallible variants usable when the corresponding space is reserved. */
306 void infallibleAppend(Latin1Char c) {
307 if (isLatin1()) {
308 latin1Chars().infallibleAppend(c);
309 } else {
310 twoByteChars().infallibleAppend(c);
313 void infallibleAppend(char c) { infallibleAppend(Latin1Char(c)); }
314 void infallibleAppend(const Latin1Char* chars, size_t len) {
315 if (isLatin1()) {
316 latin1Chars().infallibleAppend(chars, len);
317 } else {
318 twoByteChars().infallibleAppend(chars, len);
321 void infallibleAppend(const char* chars, size_t len) {
322 infallibleAppend(reinterpret_cast<const Latin1Char*>(chars), len);
325 void infallibleAppendSubstring(JSLinearString* base, size_t off, size_t len);
328 * Because inflation is fallible, these methods should only be used after
329 * calling ensureTwoByteChars().
331 void infallibleAppend(const char16_t* chars, size_t len) {
332 twoByteChars().infallibleAppend(chars, len);
334 void infallibleAppend(char16_t c) { twoByteChars().infallibleAppend(c); }
336 bool isUnderlyingBufferLatin1() const { return isLatin1(); }
338 template <typename CharT>
339 CharT* begin() {
340 return chars<CharT>().begin();
343 template <typename CharT>
344 CharT* end() {
345 return chars<CharT>().end();
348 template <typename CharT>
349 const CharT* begin() const {
350 return chars<CharT>().begin();
353 template <typename CharT>
354 const CharT* end() const {
355 return chars<CharT>().end();
358 char16_t* rawTwoByteBegin() { return begin<char16_t>(); }
359 char16_t* rawTwoByteEnd() { return end<char16_t>(); }
360 const char16_t* rawTwoByteBegin() const { return begin<char16_t>(); }
361 const char16_t* rawTwoByteEnd() const { return end<char16_t>(); }
363 Latin1Char* rawLatin1Begin() { return begin<Latin1Char>(); }
364 Latin1Char* rawLatin1End() { return end<Latin1Char>(); }
365 const Latin1Char* rawLatin1Begin() const { return begin<Latin1Char>(); }
366 const Latin1Char* rawLatin1End() const { return end<Latin1Char>(); }
368 /* Identical to finishString() except that an atom is created. */
369 JSAtom* finishAtom();
370 frontend::TaggedParserAtomIndex finishParserAtom(
371 frontend::ParserAtomsTable& parserAtoms, FrontendContext* fc);
374 * Creates a raw string from the characters in this buffer. The string is
375 * exactly the characters in this buffer (inflated to TwoByte), it is *not*
376 * null-terminated unless the last appended character was '\0'.
378 char16_t* stealChars();
381 // Like StringBuffer, but uses StringBufferArena for the characters.
382 class JSStringBuilder : public StringBuffer {
383 public:
384 explicit JSStringBuilder(JSContext* cx)
385 : StringBuffer(cx, js::StringBufferArena) {}
388 * Creates a string from the characters in this buffer, then (regardless
389 * whether string creation succeeded or failed) empties the buffer.
391 * Returns nullptr if string creation failed.
393 JSLinearString* finishString(gc::Heap heap = gc::Heap::Default);
396 inline bool StringBuffer::append(const char16_t* begin, const char16_t* end) {
397 MOZ_ASSERT(begin <= end);
398 if (isLatin1()) {
399 while (true) {
400 if (begin >= end) {
401 return true;
403 if (*begin > JSString::MAX_LATIN1_CHAR) {
404 break;
406 if (!latin1Chars().append(*begin)) {
407 return false;
409 ++begin;
411 if (!inflateChars()) {
412 return false;
415 return twoByteChars().append(begin, end);
418 inline bool StringBuffer::append(JSLinearString* str) {
419 JS::AutoCheckCannotGC nogc;
420 if (isLatin1()) {
421 if (str->hasLatin1Chars()) {
422 return latin1Chars().append(str->latin1Chars(nogc), str->length());
424 if (!inflateChars()) {
425 return false;
428 return str->hasLatin1Chars()
429 ? twoByteChars().append(str->latin1Chars(nogc), str->length())
430 : twoByteChars().append(str->twoByteChars(nogc), str->length());
433 inline void StringBuffer::infallibleAppendSubstring(JSLinearString* base,
434 size_t off, size_t len) {
435 MOZ_ASSERT(off + len <= base->length());
436 MOZ_ASSERT_IF(base->hasTwoByteChars(), isTwoByte());
438 JS::AutoCheckCannotGC nogc;
439 if (base->hasLatin1Chars()) {
440 infallibleAppend(base->latin1Chars(nogc) + off, len);
441 } else {
442 infallibleAppend(base->twoByteChars(nogc) + off, len);
446 inline bool StringBuffer::appendSubstring(JSLinearString* base, size_t off,
447 size_t len) {
448 MOZ_ASSERT(off + len <= base->length());
450 JS::AutoCheckCannotGC nogc;
451 if (isLatin1()) {
452 if (base->hasLatin1Chars()) {
453 return latin1Chars().append(base->latin1Chars(nogc) + off, len);
455 if (!inflateChars()) {
456 return false;
459 return base->hasLatin1Chars()
460 ? twoByteChars().append(base->latin1Chars(nogc) + off, len)
461 : twoByteChars().append(base->twoByteChars(nogc) + off, len);
464 inline bool StringBuffer::appendSubstring(JSString* base, size_t off,
465 size_t len) {
466 MOZ_ASSERT(maybeCx_);
468 JSLinearString* linear = base->ensureLinear(maybeCx_);
469 if (!linear) {
470 return false;
473 return appendSubstring(linear, off, len);
476 inline bool StringBuffer::append(JSString* str) {
477 MOZ_ASSERT(maybeCx_);
479 JSLinearString* linear = str->ensureLinear(maybeCx_);
480 if (!linear) {
481 return false;
484 return append(linear);
487 /* ES5 9.8 ToString, appending the result to the string buffer. */
488 extern bool ValueToStringBufferSlow(JSContext* cx, const Value& v,
489 StringBuffer& sb);
491 inline bool ValueToStringBuffer(JSContext* cx, const Value& v,
492 StringBuffer& sb) {
493 if (v.isString()) {
494 return sb.append(v.toString());
497 return ValueToStringBufferSlow(cx, v, sb);
500 /* ES5 9.8 ToString for booleans, appending the result to the string buffer. */
501 inline bool BooleanToStringBuffer(bool b, StringBuffer& sb) {
502 return b ? sb.append("true") : sb.append("false");
505 } /* namespace js */
507 #endif /* util_StringBuffer_h */