Merge mozilla-central to autoland. a=merge CLOSED TREE
[gecko.git] / js / public / String.h
blob3e003f291c4cd910e4b446ca2abbb055432f618c
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /* JavaScript string operations. */
9 #ifndef js_String_h
10 #define js_String_h
12 #include "js/shadow/String.h" // JS::shadow::String
14 #include "mozilla/Assertions.h" // MOZ_ASSERT
15 #include "mozilla/Attributes.h" // MOZ_ALWAYS_INLINE
16 #include "mozilla/Likely.h" // MOZ_LIKELY
17 #include "mozilla/Maybe.h" // mozilla::Maybe
18 #include "mozilla/Range.h" // mozilla::Range
19 #include "mozilla/Span.h" // mozilla::Span
20 // std::tuple
22 #include <algorithm> // std::copy_n
23 #include <stddef.h> // size_t
24 #include <stdint.h> // uint32_t, uint64_t, INT32_MAX
26 #include "jstypes.h" // JS_PUBLIC_API
28 #include "js/CharacterEncoding.h" // JS::UTF8Chars, JS::ConstUTF8CharsZ
29 #include "js/RootingAPI.h" // JS::Handle
30 #include "js/TypeDecls.h" // JS::Latin1Char
31 #include "js/UniquePtr.h" // JS::UniquePtr
32 #include "js/Utility.h" // JS::FreePolicy, JS::UniqueTwoByteChars
33 #include "js/Value.h" // JS::Value
35 struct JS_PUBLIC_API JSContext;
36 class JS_PUBLIC_API JSAtom;
37 class JSLinearString;
38 class JS_PUBLIC_API JSString;
40 namespace JS {
42 class JS_PUBLIC_API AutoRequireNoGC;
44 } // namespace JS
46 extern JS_PUBLIC_API JSString* JS_GetEmptyString(JSContext* cx);
48 // Don't want to export data, so provide accessors for non-inline Values.
49 extern JS_PUBLIC_API JS::Value JS_GetEmptyStringValue(JSContext* cx);
52 * String creation.
54 * NB: JS_NewUCString takes ownership of bytes on success, avoiding a copy;
55 * but on error (signified by null return), it leaves chars owned by the
56 * caller. So the caller must free bytes in the error case, if it has no use
57 * for them. In contrast, all the JS_New*StringCopy* functions do not take
58 * ownership of the character memory passed to them -- they copy it.
61 extern JS_PUBLIC_API JSString* JS_NewStringCopyN(JSContext* cx, const char* s,
62 size_t n);
64 extern JS_PUBLIC_API JSString* JS_NewStringCopyZ(JSContext* cx, const char* s);
66 extern JS_PUBLIC_API JSString* JS_NewStringCopyUTF8Z(
67 JSContext* cx, const JS::ConstUTF8CharsZ s);
69 extern JS_PUBLIC_API JSString* JS_NewStringCopyUTF8N(JSContext* cx,
70 const JS::UTF8Chars& s);
72 extern JS_PUBLIC_API JSString* JS_AtomizeStringN(JSContext* cx, const char* s,
73 size_t length);
75 extern JS_PUBLIC_API JSString* JS_AtomizeString(JSContext* cx, const char* s);
77 // Note: unlike the non-pinning JS_Atomize* functions, this can be called
78 // without entering a realm/zone.
79 extern JS_PUBLIC_API JSString* JS_AtomizeAndPinStringN(JSContext* cx,
80 const char* s,
81 size_t length);
83 // Note: unlike the non-pinning JS_Atomize* functions, this can be called
84 // without entering a realm/zone.
85 extern JS_PUBLIC_API JSString* JS_AtomizeAndPinString(JSContext* cx,
86 const char* s);
88 extern JS_PUBLIC_API JSString* JS_NewLatin1String(
89 JSContext* cx, js::UniquePtr<JS::Latin1Char[], JS::FreePolicy> chars,
90 size_t length);
92 extern JS_PUBLIC_API JSString* JS_NewUCString(JSContext* cx,
93 JS::UniqueTwoByteChars chars,
94 size_t length);
96 extern JS_PUBLIC_API JSString* JS_NewUCStringDontDeflate(
97 JSContext* cx, JS::UniqueTwoByteChars chars, size_t length);
99 extern JS_PUBLIC_API JSString* JS_NewUCStringCopyN(JSContext* cx,
100 const char16_t* s, size_t n);
102 extern JS_PUBLIC_API JSString* JS_NewUCStringCopyZ(JSContext* cx,
103 const char16_t* s);
105 extern JS_PUBLIC_API JSString* JS_AtomizeUCStringN(JSContext* cx,
106 const char16_t* s,
107 size_t length);
109 extern JS_PUBLIC_API JSString* JS_AtomizeUCString(JSContext* cx,
110 const char16_t* s);
112 extern JS_PUBLIC_API bool JS_CompareStrings(JSContext* cx, JSString* str1,
113 JSString* str2, int32_t* result);
115 [[nodiscard]] extern JS_PUBLIC_API bool JS_StringEqualsAscii(
116 JSContext* cx, JSString* str, const char* asciiBytes, bool* match);
118 // Same as above, but when the length of asciiBytes (excluding the
119 // trailing null, if any) is known.
120 [[nodiscard]] extern JS_PUBLIC_API bool JS_StringEqualsAscii(
121 JSContext* cx, JSString* str, const char* asciiBytes, size_t length,
122 bool* match);
124 template <size_t N>
125 [[nodiscard]] bool JS_StringEqualsLiteral(JSContext* cx, JSString* str,
126 const char (&asciiBytes)[N],
127 bool* match) {
128 MOZ_ASSERT(asciiBytes[N - 1] == '\0');
129 return JS_StringEqualsAscii(cx, str, asciiBytes, N - 1, match);
132 extern JS_PUBLIC_API size_t JS_PutEscapedString(JSContext* cx, char* buffer,
133 size_t size, JSString* str,
134 char quote);
137 * Extracting string characters and length.
139 * While getting the length of a string is infallible, getting the chars can
140 * fail. As indicated by the lack of a JSContext parameter, there are two
141 * special cases where getting the chars is infallible:
143 * The first case is for strings that have been atomized, e.g. directly by
144 * JS_AtomizeAndPinString or implicitly because it is stored in a jsid.
146 * The second case is "linear" strings that have been explicitly prepared in a
147 * fallible context by JS_EnsureLinearString. To catch errors, a separate opaque
148 * JSLinearString type is returned by JS_EnsureLinearString and expected by
149 * JS_Get{Latin1,TwoByte}StringCharsAndLength. Note, though, that this is purely
150 * a syntactic distinction: the input and output of JS_EnsureLinearString are
151 * the same actual GC-thing. If a JSString is known to be linear,
152 * JS_ASSERT_STRING_IS_LINEAR can be used to make a debug-checked cast. Example:
154 * // In a fallible context.
155 * JSLinearString* lstr = JS_EnsureLinearString(cx, str);
156 * if (!lstr) {
157 * return false;
159 * MOZ_ASSERT(lstr == JS_ASSERT_STRING_IS_LINEAR(str));
161 * // In an infallible context, for the same 'str'.
162 * AutoCheckCannotGC nogc;
163 * const char16_t* chars = JS::GetTwoByteLinearStringChars(nogc, lstr)
164 * MOZ_ASSERT(chars);
166 * Note: JS strings (including linear strings and atoms) are not
167 * null-terminated!
169 * Additionally, string characters are stored as either Latin1Char (8-bit)
170 * or char16_t (16-bit). Clients can use JS::StringHasLatin1Chars and can then
171 * call either the Latin1* or TwoByte* functions. Some functions like
172 * JS_CopyStringChars and JS_GetStringCharAt accept both Latin1 and TwoByte
173 * strings.
176 extern JS_PUBLIC_API size_t JS_GetStringLength(JSString* str);
178 extern JS_PUBLIC_API bool JS_StringIsLinear(JSString* str);
180 extern JS_PUBLIC_API const JS::Latin1Char* JS_GetLatin1StringCharsAndLength(
181 JSContext* cx, const JS::AutoRequireNoGC& nogc, JSString* str,
182 size_t* length);
184 extern JS_PUBLIC_API const char16_t* JS_GetTwoByteStringCharsAndLength(
185 JSContext* cx, const JS::AutoRequireNoGC& nogc, JSString* str,
186 size_t* length);
188 extern JS_PUBLIC_API bool JS_GetStringCharAt(JSContext* cx, JSString* str,
189 size_t index, char16_t* res);
191 extern JS_PUBLIC_API const char16_t* JS_GetTwoByteExternalStringChars(
192 JSString* str);
194 extern JS_PUBLIC_API bool JS_CopyStringChars(
195 JSContext* cx, const mozilla::Range<char16_t>& dest, JSString* str);
198 * Copies the string's characters to a null-terminated char16_t buffer.
200 * Returns nullptr on OOM.
202 extern JS_PUBLIC_API JS::UniqueTwoByteChars JS_CopyStringCharsZ(JSContext* cx,
203 JSString* str);
205 extern JS_PUBLIC_API JSLinearString* JS_EnsureLinearString(JSContext* cx,
206 JSString* str);
208 static MOZ_ALWAYS_INLINE JSLinearString* JS_ASSERT_STRING_IS_LINEAR(
209 JSString* str) {
210 MOZ_ASSERT(JS_StringIsLinear(str));
211 return reinterpret_cast<JSLinearString*>(str);
214 static MOZ_ALWAYS_INLINE JSString* JS_FORGET_STRING_LINEARNESS(
215 JSLinearString* str) {
216 return reinterpret_cast<JSString*>(str);
220 * Additional APIs that avoid fallibility when given a linear string.
223 extern JS_PUBLIC_API bool JS_LinearStringEqualsAscii(JSLinearString* str,
224 const char* asciiBytes);
225 extern JS_PUBLIC_API bool JS_LinearStringEqualsAscii(JSLinearString* str,
226 const char* asciiBytes,
227 size_t length);
229 template <size_t N>
230 bool JS_LinearStringEqualsLiteral(JSLinearString* str,
231 const char (&asciiBytes)[N]) {
232 MOZ_ASSERT(asciiBytes[N - 1] == '\0');
233 return JS_LinearStringEqualsAscii(str, asciiBytes, N - 1);
236 extern JS_PUBLIC_API size_t JS_PutEscapedLinearString(char* buffer, size_t size,
237 JSLinearString* str,
238 char quote);
241 * Create a dependent string, i.e., a string that owns no character storage,
242 * but that refers to a slice of another string's chars. Dependent strings
243 * are mutable by definition, so the thread safety comments above apply.
245 extern JS_PUBLIC_API JSString* JS_NewDependentString(JSContext* cx,
246 JS::Handle<JSString*> str,
247 size_t start,
248 size_t length);
251 * Concatenate two strings, possibly resulting in a rope.
252 * See above for thread safety comments.
254 extern JS_PUBLIC_API JSString* JS_ConcatStrings(JSContext* cx,
255 JS::Handle<JSString*> left,
256 JS::Handle<JSString*> right);
259 * For JS_DecodeBytes, set *dstlenp to the size of the destination buffer before
260 * the call; on return, *dstlenp contains the number of characters actually
261 * stored. To determine the necessary destination buffer size, make a sizing
262 * call that passes nullptr for dst.
264 * On errors, the functions report the error. In that case, *dstlenp contains
265 * the number of characters or bytes transferred so far. If cx is nullptr, no
266 * error is reported on failure, and the functions simply return false.
268 * NB: This function does not store an additional zero byte or char16_t after
269 * the transcoded string.
271 JS_PUBLIC_API bool JS_DecodeBytes(JSContext* cx, const char* src, size_t srclen,
272 char16_t* dst, size_t* dstlenp);
275 * Get number of bytes in the string encoding (without accounting for a
276 * terminating zero bytes. The function returns (size_t) -1 if the string
277 * can not be encoded into bytes and reports an error using cx accordingly.
279 JS_PUBLIC_API size_t JS_GetStringEncodingLength(JSContext* cx, JSString* str);
282 * Encode string into a buffer. The function does not stores an additional
283 * zero byte. The function returns (size_t) -1 if the string can not be
284 * encoded into bytes with no error reported. Otherwise it returns the number
285 * of bytes that are necessary to encode the string. If that exceeds the
286 * length parameter, the string will be cut and only length bytes will be
287 * written into the buffer.
289 [[nodiscard]] JS_PUBLIC_API bool JS_EncodeStringToBuffer(JSContext* cx,
290 JSString* str,
291 char* buffer,
292 size_t length);
295 * Encode as many scalar values of the string as UTF-8 as can fit
296 * into the caller-provided buffer replacing unpaired surrogates
297 * with the REPLACEMENT CHARACTER.
299 * If JS::StringHasLatin1Chars(str) returns true, the function
300 * is guaranteed to convert the entire string if
301 * buffer.Length() >= 2 * JS_GetStringLength(str). Otherwise,
302 * the function is guaranteed to convert the entire string if
303 * buffer.Length() >= 3 * JS_GetStringLength(str).
305 * This function does not alter the representation of |str| or
306 * any |JSString*| substring that is a constituent part of it.
307 * Returns mozilla::Nothing() on OOM, without reporting an error;
308 * some data may have been written to |buffer| when this happens.
310 * If there's no OOM, returns the number of code units read and
311 * the number of code units written.
313 * The semantics of this method match the semantics of
314 * TextEncoder.encodeInto().
316 * The function does not store an additional zero byte.
318 JS_PUBLIC_API mozilla::Maybe<std::tuple<size_t, size_t>>
319 JS_EncodeStringToUTF8BufferPartial(JSContext* cx, JSString* str,
320 mozilla::Span<char> buffer);
322 namespace JS {
325 * Maximum length of a JS string. This is chosen so that the number of bytes
326 * allocated for a null-terminated TwoByte string still fits in int32_t.
328 static constexpr uint32_t MaxStringLength = (1 << 30) - 2;
330 static_assert((uint64_t(MaxStringLength) + 1) * sizeof(char16_t) <= INT32_MAX,
331 "size of null-terminated JSString char buffer must fit in "
332 "INT32_MAX");
334 /** Compute the length of a string. */
335 MOZ_ALWAYS_INLINE size_t GetStringLength(JSString* s) {
336 return shadow::AsShadowString(s)->length();
339 /** Compute the length of a linear string. */
340 MOZ_ALWAYS_INLINE size_t GetLinearStringLength(JSLinearString* s) {
341 return shadow::AsShadowString(s)->length();
344 /** Return true iff the given linear string uses Latin-1 storage. */
345 MOZ_ALWAYS_INLINE bool LinearStringHasLatin1Chars(JSLinearString* s) {
346 return shadow::AsShadowString(s)->hasLatin1Chars();
349 /** Return true iff the given string uses Latin-1 storage. */
350 MOZ_ALWAYS_INLINE bool StringHasLatin1Chars(JSString* s) {
351 return shadow::AsShadowString(s)->hasLatin1Chars();
355 * Given a linear string known to use Latin-1 storage, return a pointer to that
356 * storage. This pointer remains valid only as long as no GC occurs.
358 MOZ_ALWAYS_INLINE const Latin1Char* GetLatin1LinearStringChars(
359 const AutoRequireNoGC& nogc, JSLinearString* linear) {
360 return shadow::AsShadowString(linear)->latin1LinearChars();
364 * Given a linear string known to use two-byte storage, return a pointer to that
365 * storage. This pointer remains valid only as long as no GC occurs.
367 MOZ_ALWAYS_INLINE const char16_t* GetTwoByteLinearStringChars(
368 const AutoRequireNoGC& nogc, JSLinearString* linear) {
369 return shadow::AsShadowString(linear)->twoByteLinearChars();
373 * Given an in-range index into the provided string, return the character at
374 * that index.
376 MOZ_ALWAYS_INLINE char16_t GetLinearStringCharAt(JSLinearString* linear,
377 size_t index) {
378 shadow::String* s = shadow::AsShadowString(linear);
379 MOZ_ASSERT(index < s->length());
381 return s->hasLatin1Chars() ? s->latin1LinearChars()[index]
382 : s->twoByteLinearChars()[index];
386 * Convert an atom to a linear string. All atoms are linear, so this
387 * operation is infallible.
389 MOZ_ALWAYS_INLINE JSLinearString* AtomToLinearString(JSAtom* atom) {
390 return reinterpret_cast<JSLinearString*>(atom);
394 * If the provided string uses externally-managed latin-1 storage, return true
395 * and set |*callbacks| to the external-string callbacks used to create it and
396 * |*chars| to a pointer to its latin1 storage. (These pointers remain valid
397 * as long as the provided string is kept alive.)
399 MOZ_ALWAYS_INLINE bool IsExternalStringLatin1(
400 JSString* str, const JSExternalStringCallbacks** callbacks,
401 const JS::Latin1Char** chars) {
402 shadow::String* s = shadow::AsShadowString(str);
404 if (!s->isExternal() || !s->hasLatin1Chars()) {
405 return false;
408 *callbacks = s->externalCallbacks;
409 *chars = s->nonInlineCharsLatin1;
410 return true;
414 * If the provided string uses externally-managed two-byte storage, return true
415 * and set |*callbacks| to the external-string callbacks used to create it and
416 * |*chars| to a pointer to its two-byte storage. (These pointers remain valid
417 * as long as the provided string is kept alive.)
419 MOZ_ALWAYS_INLINE bool IsExternalUCString(
420 JSString* str, const JSExternalStringCallbacks** callbacks,
421 const char16_t** chars) {
422 shadow::String* s = shadow::AsShadowString(str);
424 if (!s->isExternal() || s->hasLatin1Chars()) {
425 return false;
428 *callbacks = s->externalCallbacks;
429 *chars = s->nonInlineCharsTwoByte;
430 return true;
433 namespace detail {
435 extern JS_PUBLIC_API JSLinearString* StringToLinearStringSlow(JSContext* cx,
436 JSString* str);
438 } // namespace detail
440 /** Convert a string to a linear string. */
441 MOZ_ALWAYS_INLINE JSLinearString* StringToLinearString(JSContext* cx,
442 JSString* str) {
443 if (MOZ_LIKELY(shadow::AsShadowString(str)->isLinear())) {
444 return reinterpret_cast<JSLinearString*>(str);
447 return detail::StringToLinearStringSlow(cx, str);
450 /** Copy characters in |s[start..start + len]| to |dest[0..len]|. */
451 MOZ_ALWAYS_INLINE void CopyLinearStringChars(char16_t* dest, JSLinearString* s,
452 size_t len, size_t start = 0) {
453 #ifdef DEBUG
454 size_t stringLen = GetLinearStringLength(s);
455 MOZ_ASSERT(start <= stringLen);
456 MOZ_ASSERT(len <= stringLen - start);
457 #endif
459 shadow::String* str = shadow::AsShadowString(s);
461 if (str->hasLatin1Chars()) {
462 const Latin1Char* src = str->latin1LinearChars();
463 for (size_t i = 0; i < len; i++) {
464 dest[i] = src[start + i];
466 } else {
467 const char16_t* src = str->twoByteLinearChars();
468 std::copy_n(src + start, len, dest);
473 * Copy characters in |s[start..start + len]| to |dest[0..len]|, lossily
474 * truncating 16-bit values to |char| if necessary.
476 MOZ_ALWAYS_INLINE void LossyCopyLinearStringChars(char* dest, JSLinearString* s,
477 size_t len,
478 size_t start = 0) {
479 #ifdef DEBUG
480 size_t stringLen = GetLinearStringLength(s);
481 MOZ_ASSERT(start <= stringLen);
482 MOZ_ASSERT(len <= stringLen - start);
483 #endif
485 shadow::String* str = shadow::AsShadowString(s);
487 if (LinearStringHasLatin1Chars(s)) {
488 const Latin1Char* src = str->latin1LinearChars();
489 for (size_t i = 0; i < len; i++) {
490 dest[i] = char(src[start + i]);
492 } else {
493 const char16_t* src = str->twoByteLinearChars();
494 for (size_t i = 0; i < len; i++) {
495 dest[i] = char(src[start + i]);
501 * Copy characters in |s[start..start + len]| to |dest[0..len]|.
503 * This function is fallible. If you already have a linear string, use the
504 * infallible |JS::CopyLinearStringChars| above instead.
506 [[nodiscard]] inline bool CopyStringChars(JSContext* cx, char16_t* dest,
507 JSString* s, size_t len,
508 size_t start = 0) {
509 JSLinearString* linear = StringToLinearString(cx, s);
510 if (!linear) {
511 return false;
514 CopyLinearStringChars(dest, linear, len, start);
515 return true;
519 * Copy characters in |s[start..start + len]| to |dest[0..len]|, lossily
520 * truncating 16-bit values to |char| if necessary.
522 * This function is fallible. If you already have a linear string, use the
523 * infallible |JS::LossyCopyLinearStringChars| above instead.
525 [[nodiscard]] inline bool LossyCopyStringChars(JSContext* cx, char* dest,
526 JSString* s, size_t len,
527 size_t start = 0) {
528 JSLinearString* linear = StringToLinearString(cx, s);
529 if (!linear) {
530 return false;
533 LossyCopyLinearStringChars(dest, linear, len, start);
534 return true;
537 } // namespace JS
539 /** DO NOT USE, only present for Rust bindings as a temporary hack */
540 [[deprecated]] extern JS_PUBLIC_API bool JS_DeprecatedStringHasLatin1Chars(
541 JSString* str);
543 // JSString* is an aligned pointer, but this information isn't available in the
544 // public header. We specialize HasFreeLSB here so that JS::Result<JSString*>
545 // compiles.
547 namespace mozilla {
548 namespace detail {
549 template <>
550 struct HasFreeLSB<JSString*> {
551 static constexpr bool value = true;
553 } // namespace detail
554 } // namespace mozilla
556 #endif // js_String_h