1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #ifndef util_StringBuffer_h
8 #define util_StringBuffer_h
10 #include "mozilla/CheckedInt.h"
11 #include "mozilla/MaybeOneOf.h"
12 #include "mozilla/Utf8.h"
14 #include "frontend/FrontendContext.h"
15 #include "js/Vector.h"
16 #include "vm/StringType.h"
20 class FrontendContext
;
23 class ParserAtomsTable
;
24 class TaggedParserAtomIndex
;
25 } // namespace frontend
29 // GrowEltsAggressively will multiply the space by a factor of 8 on overflow, to
30 // avoid very expensive memcpys for large strings (eg giant toJSON output for
31 // sessionstore.js). Drop back to the normal expansion policy once the buffer
33 static constexpr size_t AggressiveLimit
= 128 << 20;
35 template <size_t EltSize
>
36 inline size_t GrowEltsAggressively(size_t aOldElts
, size_t aIncr
) {
37 mozilla::CheckedInt
<size_t> required
=
38 mozilla::CheckedInt
<size_t>(aOldElts
) + aIncr
;
39 if (!(required
* 2).isValid()) {
42 required
= mozilla::RoundUpPow2(required
.value());
44 if (!(required
* EltSize
).isValid() || required
.value() > AggressiveLimit
) {
45 // Fall back to doubling behavior if the aggressive growth fails or gets too
47 return mozilla::detail::GrowEltsByDoubling
<EltSize
>(aOldElts
, aIncr
);
49 return required
.value();
54 class StringBufferAllocPolicy
{
55 TempAllocPolicy impl_
;
56 const arena_id_t
& arenaId_
;
59 StringBufferAllocPolicy(FrontendContext
* fc
, const arena_id_t
& arenaId
)
60 : impl_(fc
), arenaId_(arenaId
) {}
62 StringBufferAllocPolicy(JSContext
* cx
, const arena_id_t
& arenaId
)
63 : impl_(cx
), arenaId_(arenaId
) {}
66 T
* maybe_pod_malloc(size_t numElems
) {
67 return impl_
.maybe_pod_arena_malloc
<T
>(arenaId_
, numElems
);
70 T
* maybe_pod_calloc(size_t numElems
) {
71 return impl_
.maybe_pod_arena_calloc
<T
>(arenaId_
, numElems
);
74 T
* maybe_pod_realloc(T
* p
, size_t oldSize
, size_t newSize
) {
75 return impl_
.maybe_pod_arena_realloc
<T
>(arenaId_
, p
, oldSize
, newSize
);
78 T
* pod_malloc(size_t numElems
) {
79 return impl_
.pod_arena_malloc
<T
>(arenaId_
, numElems
);
82 T
* pod_calloc(size_t numElems
) {
83 return impl_
.pod_arena_calloc
<T
>(arenaId_
, numElems
);
86 T
* pod_realloc(T
* p
, size_t oldSize
, size_t newSize
) {
87 return impl_
.pod_arena_realloc
<T
>(arenaId_
, p
, oldSize
, newSize
);
90 void free_(T
* p
, size_t numElems
= 0) {
91 impl_
.free_(p
, numElems
);
93 void reportAllocOverflow() const { impl_
.reportAllocOverflow(); }
94 bool checkSimulatedOOM() const { return impl_
.checkSimulatedOOM(); }
96 // See ComputeGrowth in mfbt/Vector.h.
97 template <size_t EltSize
>
98 static size_t computeGrowth(size_t aOldElts
, size_t aIncr
) {
99 return detail::GrowEltsAggressively
<EltSize
>(aOldElts
, aIncr
);
104 * String builder that eagerly checks for over-allocation past the maximum
107 * Any operation which would exceed the maximum string length causes an
108 * exception report on the context and results in a failed return value.
110 * Well-sized extractions (which waste no more than 1/4 of their char
111 * buffer space) are guaranteed for strings built by this interface.
112 * See |extractWellSized|.
116 template <typename CharT
>
117 using BufferType
= Vector
<CharT
, 64 / sizeof(CharT
), StringBufferAllocPolicy
>;
120 * The Vector's buffer may be either stolen or copied, so we need to use
121 * TempAllocPolicy and account for the memory manually when stealing.
123 using Latin1CharBuffer
= BufferType
<Latin1Char
>;
124 using TwoByteCharBuffer
= BufferType
<char16_t
>;
126 JSContext
* maybeCx_
= nullptr;
129 * If Latin1 strings are enabled, cb starts out as a Latin1CharBuffer. When
130 * a TwoByte char is appended, inflateChars() constructs a TwoByteCharBuffer
131 * and copies the Latin1 chars.
133 mozilla::MaybeOneOf
<Latin1CharBuffer
, TwoByteCharBuffer
> cb
;
135 /* Number of reserve()'d chars, see inflateChars. */
136 size_t reserved_
= 0;
138 StringBuffer(const StringBuffer
& other
) = delete;
139 void operator=(const StringBuffer
& other
) = delete;
141 template <typename CharT
>
142 MOZ_ALWAYS_INLINE
bool isCharType() const {
143 return cb
.constructed
<BufferType
<CharT
>>();
146 MOZ_ALWAYS_INLINE
bool isLatin1() const { return isCharType
<Latin1Char
>(); }
148 MOZ_ALWAYS_INLINE
bool isTwoByte() const { return isCharType
<char16_t
>(); }
150 template <typename CharT
>
151 MOZ_ALWAYS_INLINE BufferType
<CharT
>& chars() {
152 MOZ_ASSERT(isCharType
<CharT
>());
153 return cb
.ref
<BufferType
<CharT
>>();
156 template <typename CharT
>
157 MOZ_ALWAYS_INLINE
const BufferType
<CharT
>& chars() const {
158 MOZ_ASSERT(isCharType
<CharT
>());
159 return cb
.ref
<BufferType
<CharT
>>();
162 MOZ_ALWAYS_INLINE TwoByteCharBuffer
& twoByteChars() {
163 return chars
<char16_t
>();
166 MOZ_ALWAYS_INLINE
const TwoByteCharBuffer
& twoByteChars() const {
167 return chars
<char16_t
>();
170 MOZ_ALWAYS_INLINE Latin1CharBuffer
& latin1Chars() {
171 return chars
<Latin1Char
>();
174 MOZ_ALWAYS_INLINE
const Latin1CharBuffer
& latin1Chars() const {
175 return chars
<Latin1Char
>();
178 [[nodiscard
]] bool inflateChars();
180 template <typename CharT
>
181 JSLinearString
* finishStringInternal(JSContext
* cx
, gc::Heap heap
);
184 explicit StringBuffer(JSContext
* cx
,
185 const arena_id_t
& arenaId
= js::MallocArena
)
188 cb
.construct
<Latin1CharBuffer
>(StringBufferAllocPolicy
{cx
, arenaId
});
191 // This constructor should only be used if the methods related to the
192 // following are not used, because they require a JSContext:
195 // * mozilla::Utf8Unit
196 explicit StringBuffer(FrontendContext
* fc
,
197 const arena_id_t
& arenaId
= js::MallocArena
) {
199 cb
.construct
<Latin1CharBuffer
>(StringBufferAllocPolicy
{fc
, arenaId
});
204 latin1Chars().clear();
206 twoByteChars().clear();
209 [[nodiscard
]] bool reserve(size_t len
) {
210 if (len
> reserved_
) {
213 return isLatin1() ? latin1Chars().reserve(len
)
214 : twoByteChars().reserve(len
);
216 [[nodiscard
]] bool resize(size_t len
) {
217 return isLatin1() ? latin1Chars().resize(len
) : twoByteChars().resize(len
);
219 [[nodiscard
]] bool growByUninitialized(size_t incr
) {
220 return isLatin1() ? latin1Chars().growByUninitialized(incr
)
221 : twoByteChars().growByUninitialized(incr
);
223 void shrinkTo(size_t newLength
) {
224 return isLatin1() ? latin1Chars().shrinkTo(newLength
)
225 : twoByteChars().shrinkTo(newLength
);
228 return isLatin1() ? latin1Chars().empty() : twoByteChars().empty();
230 size_t length() const {
231 return isLatin1() ? latin1Chars().length() : twoByteChars().length();
233 char16_t
getChar(size_t idx
) const {
234 return isLatin1() ? latin1Chars()[idx
] : twoByteChars()[idx
];
237 [[nodiscard
]] bool ensureTwoByteChars() {
238 return isTwoByte() || inflateChars();
241 [[nodiscard
]] bool append(const char16_t c
) {
243 if (c
<= JSString::MAX_LATIN1_CHAR
) {
244 return latin1Chars().append(Latin1Char(c
));
246 if (!inflateChars()) {
250 return twoByteChars().append(c
);
252 [[nodiscard
]] bool append(Latin1Char c
) {
253 return isLatin1() ? latin1Chars().append(c
) : twoByteChars().append(c
);
255 [[nodiscard
]] bool append(char c
) { return append(Latin1Char(c
)); }
257 [[nodiscard
]] inline bool append(const char16_t
* begin
, const char16_t
* end
);
259 [[nodiscard
]] bool append(const char16_t
* chars
, size_t len
) {
260 return append(chars
, chars
+ len
);
263 [[nodiscard
]] bool append(const Latin1Char
* begin
, const Latin1Char
* end
) {
264 return isLatin1() ? latin1Chars().append(begin
, end
)
265 : twoByteChars().append(begin
, end
);
267 [[nodiscard
]] bool append(const Latin1Char
* chars
, size_t len
) {
268 return append(chars
, chars
+ len
);
272 * Interpret the provided count of UTF-8 code units as UTF-8, and append
273 * the represented code points to this. If the code units contain invalid
274 * UTF-8, leave the internal buffer in a consistent but unspecified state,
275 * report an error, and return false.
277 [[nodiscard
]] bool append(const mozilla::Utf8Unit
* units
, size_t len
);
279 [[nodiscard
]] bool append(const JS::ConstCharPtr chars
, size_t len
) {
280 return append(chars
.get(), chars
.get() + len
);
282 [[nodiscard
]] bool appendN(Latin1Char c
, size_t n
) {
283 return isLatin1() ? latin1Chars().appendN(c
, n
)
284 : twoByteChars().appendN(c
, n
);
287 [[nodiscard
]] inline bool append(JSString
* str
);
288 [[nodiscard
]] inline bool append(JSLinearString
* str
);
289 [[nodiscard
]] inline bool appendSubstring(JSString
* base
, size_t off
,
291 [[nodiscard
]] inline bool appendSubstring(JSLinearString
* base
, size_t off
,
293 [[nodiscard
]] bool append(const frontend::ParserAtomsTable
& parserAtoms
,
294 frontend::TaggedParserAtomIndex atom
);
296 [[nodiscard
]] bool append(const char* chars
, size_t len
) {
297 return append(reinterpret_cast<const Latin1Char
*>(chars
), len
);
300 template <size_t ArrayLength
>
301 [[nodiscard
]] bool append(const char (&array
)[ArrayLength
]) {
302 return append(array
, ArrayLength
- 1); /* No trailing '\0'. */
305 /* Infallible variants usable when the corresponding space is reserved. */
306 void infallibleAppend(Latin1Char c
) {
308 latin1Chars().infallibleAppend(c
);
310 twoByteChars().infallibleAppend(c
);
313 void infallibleAppend(char c
) { infallibleAppend(Latin1Char(c
)); }
314 void infallibleAppend(const Latin1Char
* chars
, size_t len
) {
316 latin1Chars().infallibleAppend(chars
, len
);
318 twoByteChars().infallibleAppend(chars
, len
);
321 void infallibleAppend(const char* chars
, size_t len
) {
322 infallibleAppend(reinterpret_cast<const Latin1Char
*>(chars
), len
);
325 void infallibleAppendSubstring(JSLinearString
* base
, size_t off
, size_t len
);
328 * Because inflation is fallible, these methods should only be used after
329 * calling ensureTwoByteChars().
331 void infallibleAppend(const char16_t
* chars
, size_t len
) {
332 twoByteChars().infallibleAppend(chars
, len
);
334 void infallibleAppend(char16_t c
) { twoByteChars().infallibleAppend(c
); }
336 bool isUnderlyingBufferLatin1() const { return isLatin1(); }
338 template <typename CharT
>
340 return chars
<CharT
>().begin();
343 template <typename CharT
>
345 return chars
<CharT
>().end();
348 template <typename CharT
>
349 const CharT
* begin() const {
350 return chars
<CharT
>().begin();
353 template <typename CharT
>
354 const CharT
* end() const {
355 return chars
<CharT
>().end();
358 char16_t
* rawTwoByteBegin() { return begin
<char16_t
>(); }
359 char16_t
* rawTwoByteEnd() { return end
<char16_t
>(); }
360 const char16_t
* rawTwoByteBegin() const { return begin
<char16_t
>(); }
361 const char16_t
* rawTwoByteEnd() const { return end
<char16_t
>(); }
363 Latin1Char
* rawLatin1Begin() { return begin
<Latin1Char
>(); }
364 Latin1Char
* rawLatin1End() { return end
<Latin1Char
>(); }
365 const Latin1Char
* rawLatin1Begin() const { return begin
<Latin1Char
>(); }
366 const Latin1Char
* rawLatin1End() const { return end
<Latin1Char
>(); }
368 /* Identical to finishString() except that an atom is created. */
369 JSAtom
* finishAtom();
370 frontend::TaggedParserAtomIndex
finishParserAtom(
371 frontend::ParserAtomsTable
& parserAtoms
, FrontendContext
* fc
);
374 * Creates a raw string from the characters in this buffer. The string is
375 * exactly the characters in this buffer (inflated to TwoByte), it is *not*
376 * null-terminated unless the last appended character was '\0'.
378 char16_t
* stealChars();
381 // Like StringBuffer, but uses StringBufferArena for the characters.
382 class JSStringBuilder
: public StringBuffer
{
384 explicit JSStringBuilder(JSContext
* cx
)
385 : StringBuffer(cx
, js::StringBufferArena
) {}
388 * Creates a string from the characters in this buffer, then (regardless
389 * whether string creation succeeded or failed) empties the buffer.
391 * Returns nullptr if string creation failed.
393 JSLinearString
* finishString(gc::Heap heap
= gc::Heap::Default
);
396 inline bool StringBuffer::append(const char16_t
* begin
, const char16_t
* end
) {
397 MOZ_ASSERT(begin
<= end
);
403 if (*begin
> JSString::MAX_LATIN1_CHAR
) {
406 if (!latin1Chars().append(*begin
)) {
411 if (!inflateChars()) {
415 return twoByteChars().append(begin
, end
);
418 inline bool StringBuffer::append(JSLinearString
* str
) {
419 JS::AutoCheckCannotGC nogc
;
421 if (str
->hasLatin1Chars()) {
422 return latin1Chars().append(str
->latin1Chars(nogc
), str
->length());
424 if (!inflateChars()) {
428 return str
->hasLatin1Chars()
429 ? twoByteChars().append(str
->latin1Chars(nogc
), str
->length())
430 : twoByteChars().append(str
->twoByteChars(nogc
), str
->length());
433 inline void StringBuffer::infallibleAppendSubstring(JSLinearString
* base
,
434 size_t off
, size_t len
) {
435 MOZ_ASSERT(off
+ len
<= base
->length());
436 MOZ_ASSERT_IF(base
->hasTwoByteChars(), isTwoByte());
438 JS::AutoCheckCannotGC nogc
;
439 if (base
->hasLatin1Chars()) {
440 infallibleAppend(base
->latin1Chars(nogc
) + off
, len
);
442 infallibleAppend(base
->twoByteChars(nogc
) + off
, len
);
446 inline bool StringBuffer::appendSubstring(JSLinearString
* base
, size_t off
,
448 MOZ_ASSERT(off
+ len
<= base
->length());
450 JS::AutoCheckCannotGC nogc
;
452 if (base
->hasLatin1Chars()) {
453 return latin1Chars().append(base
->latin1Chars(nogc
) + off
, len
);
455 if (!inflateChars()) {
459 return base
->hasLatin1Chars()
460 ? twoByteChars().append(base
->latin1Chars(nogc
) + off
, len
)
461 : twoByteChars().append(base
->twoByteChars(nogc
) + off
, len
);
464 inline bool StringBuffer::appendSubstring(JSString
* base
, size_t off
,
466 MOZ_ASSERT(maybeCx_
);
468 JSLinearString
* linear
= base
->ensureLinear(maybeCx_
);
473 return appendSubstring(linear
, off
, len
);
476 inline bool StringBuffer::append(JSString
* str
) {
477 MOZ_ASSERT(maybeCx_
);
479 JSLinearString
* linear
= str
->ensureLinear(maybeCx_
);
484 return append(linear
);
487 /* ES5 9.8 ToString, appending the result to the string buffer. */
488 extern bool ValueToStringBufferSlow(JSContext
* cx
, const Value
& v
,
491 inline bool ValueToStringBuffer(JSContext
* cx
, const Value
& v
,
494 return sb
.append(v
.toString());
497 return ValueToStringBufferSlow(cx
, v
, sb
);
500 /* ES5 9.8 ToString for booleans, appending the result to the string buffer. */
501 inline bool BooleanToStringBuffer(bool b
, StringBuffer
& sb
) {
502 return b
? sb
.append("true") : sb
.append("false");
507 #endif /* util_StringBuffer_h */