1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /* JavaScript string operations. */
12 #include "js/shadow/String.h" // JS::shadow::String
14 #include "mozilla/Assertions.h" // MOZ_ASSERT
15 #include "mozilla/Attributes.h" // MOZ_ALWAYS_INLINE
16 #include "mozilla/Likely.h" // MOZ_LIKELY
17 #include "mozilla/Maybe.h" // mozilla::Maybe
18 #include "mozilla/Range.h" // mozilla::Range
19 #include "mozilla/Span.h" // mozilla::Span
20 #include "mozilla/Tuple.h" // mozilla::Tuple
22 #include <algorithm> // std::copy_n
23 #include <stddef.h> // size_t
24 #include <stdint.h> // uint32_t, uint64_t, INT32_MAX
26 #include "jstypes.h" // JS_PUBLIC_API
28 #include "js/CharacterEncoding.h" // JS::UTF8Chars, JS::ConstUTF8CharsZ
29 #include "js/Id.h" // jsid, JSID_IS_STRING, JSID_TO_STRING
30 #include "js/RootingAPI.h" // JS::Handle
31 #include "js/TypeDecls.h" // JS::Latin1Char
32 #include "js/UniquePtr.h" // JS::UniquePtr
33 #include "js/Utility.h" // JS::FreePolicy, JS::UniqueTwoByteChars
34 #include "js/Value.h" // JS::Value
36 struct JS_PUBLIC_API JSContext
;
37 class JS_PUBLIC_API JSAtom
;
39 class JS_PUBLIC_API JSString
;
43 class JS_PUBLIC_API AutoRequireNoGC
;
47 extern JS_PUBLIC_API JSString
* JS_GetEmptyString(JSContext
* cx
);
49 // Don't want to export data, so provide accessors for non-inline Values.
50 extern JS_PUBLIC_API
JS::Value
JS_GetEmptyStringValue(JSContext
* cx
);
55 * NB: JS_NewUCString takes ownership of bytes on success, avoiding a copy;
56 * but on error (signified by null return), it leaves chars owned by the
57 * caller. So the caller must free bytes in the error case, if it has no use
58 * for them. In contrast, all the JS_New*StringCopy* functions do not take
59 * ownership of the character memory passed to them -- they copy it.
62 extern JS_PUBLIC_API JSString
* JS_NewStringCopyN(JSContext
* cx
, const char* s
,
65 extern JS_PUBLIC_API JSString
* JS_NewStringCopyZ(JSContext
* cx
, const char* s
);
67 extern JS_PUBLIC_API JSString
* JS_NewStringCopyUTF8Z(
68 JSContext
* cx
, const JS::ConstUTF8CharsZ s
);
70 extern JS_PUBLIC_API JSString
* JS_NewStringCopyUTF8N(JSContext
* cx
,
71 const JS::UTF8Chars s
);
73 extern JS_PUBLIC_API JSString
* JS_AtomizeAndPinJSString(JSContext
* cx
,
74 JS::HandleString str
);
76 extern JS_PUBLIC_API JSString
* JS_AtomizeStringN(JSContext
* cx
, const char* s
,
79 extern JS_PUBLIC_API JSString
* JS_AtomizeString(JSContext
* cx
, const char* s
);
81 extern JS_PUBLIC_API JSString
* JS_AtomizeAndPinStringN(JSContext
* cx
,
85 extern JS_PUBLIC_API JSString
* JS_AtomizeAndPinString(JSContext
* cx
,
88 extern JS_PUBLIC_API JSString
* JS_NewLatin1String(
89 JSContext
* cx
, js::UniquePtr
<JS::Latin1Char
[], JS::FreePolicy
> chars
,
92 extern JS_PUBLIC_API JSString
* JS_NewUCString(JSContext
* cx
,
93 JS::UniqueTwoByteChars chars
,
96 extern JS_PUBLIC_API JSString
* JS_NewUCStringDontDeflate(
97 JSContext
* cx
, JS::UniqueTwoByteChars chars
, size_t length
);
99 extern JS_PUBLIC_API JSString
* JS_NewUCStringCopyN(JSContext
* cx
,
100 const char16_t
* s
, size_t n
);
102 extern JS_PUBLIC_API JSString
* JS_NewUCStringCopyZ(JSContext
* cx
,
105 extern JS_PUBLIC_API JSString
* JS_AtomizeUCStringN(JSContext
* cx
,
109 extern JS_PUBLIC_API JSString
* JS_AtomizeUCString(JSContext
* cx
,
112 extern JS_PUBLIC_API JSString
* JS_AtomizeAndPinUCStringN(JSContext
* cx
,
116 extern JS_PUBLIC_API JSString
* JS_AtomizeAndPinUCString(JSContext
* cx
,
119 extern JS_PUBLIC_API
bool JS_CompareStrings(JSContext
* cx
, JSString
* str1
,
120 JSString
* str2
, int32_t* result
);
122 [[nodiscard
]] extern JS_PUBLIC_API
bool JS_StringEqualsAscii(
123 JSContext
* cx
, JSString
* str
, const char* asciiBytes
, bool* match
);
125 // Same as above, but when the length of asciiBytes (excluding the
126 // trailing null, if any) is known.
127 [[nodiscard
]] extern JS_PUBLIC_API
bool JS_StringEqualsAscii(
128 JSContext
* cx
, JSString
* str
, const char* asciiBytes
, size_t length
,
132 [[nodiscard
]] bool JS_StringEqualsLiteral(JSContext
* cx
, JSString
* str
,
133 const char (&asciiBytes
)[N
],
135 MOZ_ASSERT(asciiBytes
[N
- 1] == '\0');
136 return JS_StringEqualsAscii(cx
, str
, asciiBytes
, N
- 1, match
);
139 extern JS_PUBLIC_API
size_t JS_PutEscapedString(JSContext
* cx
, char* buffer
,
140 size_t size
, JSString
* str
,
144 * Extracting string characters and length.
146 * While getting the length of a string is infallible, getting the chars can
147 * fail. As indicated by the lack of a JSContext parameter, there are two
148 * special cases where getting the chars is infallible:
150 * The first case is for strings that have been atomized, e.g. directly by
151 * JS_AtomizeAndPinString or implicitly because it is stored in a jsid.
153 * The second case is "linear" strings that have been explicitly prepared in a
154 * fallible context by JS_EnsureLinearString. To catch errors, a separate opaque
155 * JSLinearString type is returned by JS_EnsureLinearString and expected by
156 * JS_Get{Latin1,TwoByte}StringCharsAndLength. Note, though, that this is purely
157 * a syntactic distinction: the input and output of JS_EnsureLinearString are
158 * the same actual GC-thing. If a JSString is known to be linear,
159 * JS_ASSERT_STRING_IS_LINEAR can be used to make a debug-checked cast. Example:
161 * // In a fallible context.
162 * JSLinearString* lstr = JS_EnsureLinearString(cx, str);
166 * MOZ_ASSERT(lstr == JS_ASSERT_STRING_IS_LINEAR(str));
168 * // In an infallible context, for the same 'str'.
169 * AutoCheckCannotGC nogc;
170 * const char16_t* chars = JS::GetTwoByteLinearStringChars(nogc, lstr)
173 * Note: JS strings (including linear strings and atoms) are not
176 * Additionally, string characters are stored as either Latin1Char (8-bit)
177 * or char16_t (16-bit). Clients can use JS::StringHasLatin1Chars and can then
178 * call either the Latin1* or TwoByte* functions. Some functions like
179 * JS_CopyStringChars and JS_GetStringCharAt accept both Latin1 and TwoByte
183 extern JS_PUBLIC_API
size_t JS_GetStringLength(JSString
* str
);
185 extern JS_PUBLIC_API
bool JS_StringIsLinear(JSString
* str
);
187 extern JS_PUBLIC_API
const JS::Latin1Char
* JS_GetLatin1StringCharsAndLength(
188 JSContext
* cx
, const JS::AutoRequireNoGC
& nogc
, JSString
* str
,
191 extern JS_PUBLIC_API
const char16_t
* JS_GetTwoByteStringCharsAndLength(
192 JSContext
* cx
, const JS::AutoRequireNoGC
& nogc
, JSString
* str
,
195 extern JS_PUBLIC_API
bool JS_GetStringCharAt(JSContext
* cx
, JSString
* str
,
196 size_t index
, char16_t
* res
);
198 extern JS_PUBLIC_API
const char16_t
* JS_GetTwoByteExternalStringChars(
201 extern JS_PUBLIC_API
bool JS_CopyStringChars(JSContext
* cx
,
202 mozilla::Range
<char16_t
> dest
,
206 * Copies the string's characters to a null-terminated char16_t buffer.
208 * Returns nullptr on OOM.
210 extern JS_PUBLIC_API
JS::UniqueTwoByteChars
JS_CopyStringCharsZ(JSContext
* cx
,
213 extern JS_PUBLIC_API JSLinearString
* JS_EnsureLinearString(JSContext
* cx
,
216 static MOZ_ALWAYS_INLINE JSLinearString
* JSID_TO_LINEAR_STRING(jsid id
) {
217 MOZ_ASSERT(JSID_IS_STRING(id
));
218 return reinterpret_cast<JSLinearString
*>(JSID_TO_STRING(id
));
221 static MOZ_ALWAYS_INLINE JSLinearString
* JS_ASSERT_STRING_IS_LINEAR(
223 MOZ_ASSERT(JS_StringIsLinear(str
));
224 return reinterpret_cast<JSLinearString
*>(str
);
227 static MOZ_ALWAYS_INLINE JSString
* JS_FORGET_STRING_LINEARNESS(
228 JSLinearString
* str
) {
229 return reinterpret_cast<JSString
*>(str
);
233 * Additional APIs that avoid fallibility when given a linear string.
236 extern JS_PUBLIC_API
bool JS_LinearStringEqualsAscii(JSLinearString
* str
,
237 const char* asciiBytes
);
238 extern JS_PUBLIC_API
bool JS_LinearStringEqualsAscii(JSLinearString
* str
,
239 const char* asciiBytes
,
243 bool JS_LinearStringEqualsLiteral(JSLinearString
* str
,
244 const char (&asciiBytes
)[N
]) {
245 MOZ_ASSERT(asciiBytes
[N
- 1] == '\0');
246 return JS_LinearStringEqualsAscii(str
, asciiBytes
, N
- 1);
249 extern JS_PUBLIC_API
size_t JS_PutEscapedLinearString(char* buffer
, size_t size
,
254 * Create a dependent string, i.e., a string that owns no character storage,
255 * but that refers to a slice of another string's chars. Dependent strings
256 * are mutable by definition, so the thread safety comments above apply.
258 extern JS_PUBLIC_API JSString
* JS_NewDependentString(JSContext
* cx
,
259 JS::Handle
<JSString
*> str
,
264 * Concatenate two strings, possibly resulting in a rope.
265 * See above for thread safety comments.
267 extern JS_PUBLIC_API JSString
* JS_ConcatStrings(JSContext
* cx
,
268 JS::Handle
<JSString
*> left
,
269 JS::Handle
<JSString
*> right
);
272 * For JS_DecodeBytes, set *dstlenp to the size of the destination buffer before
273 * the call; on return, *dstlenp contains the number of characters actually
274 * stored. To determine the necessary destination buffer size, make a sizing
275 * call that passes nullptr for dst.
277 * On errors, the functions report the error. In that case, *dstlenp contains
278 * the number of characters or bytes transferred so far. If cx is nullptr, no
279 * error is reported on failure, and the functions simply return false.
281 * NB: This function does not store an additional zero byte or char16_t after
282 * the transcoded string.
284 JS_PUBLIC_API
bool JS_DecodeBytes(JSContext
* cx
, const char* src
, size_t srclen
,
285 char16_t
* dst
, size_t* dstlenp
);
288 * Get number of bytes in the string encoding (without accounting for a
289 * terminating zero bytes. The function returns (size_t) -1 if the string
290 * can not be encoded into bytes and reports an error using cx accordingly.
292 JS_PUBLIC_API
size_t JS_GetStringEncodingLength(JSContext
* cx
, JSString
* str
);
295 * Encode string into a buffer. The function does not stores an additional
296 * zero byte. The function returns (size_t) -1 if the string can not be
297 * encoded into bytes with no error reported. Otherwise it returns the number
298 * of bytes that are necessary to encode the string. If that exceeds the
299 * length parameter, the string will be cut and only length bytes will be
300 * written into the buffer.
302 [[nodiscard
]] JS_PUBLIC_API
bool JS_EncodeStringToBuffer(JSContext
* cx
,
308 * Encode as many scalar values of the string as UTF-8 as can fit
309 * into the caller-provided buffer replacing unpaired surrogates
310 * with the REPLACEMENT CHARACTER.
312 * If JS::StringHasLatin1Chars(str) returns true, the function
313 * is guaranteed to convert the entire string if
314 * buffer.Length() >= 2 * JS_GetStringLength(str). Otherwise,
315 * the function is guaranteed to convert the entire string if
316 * buffer.Length() >= 3 * JS_GetStringLength(str).
318 * This function does not alter the representation of |str| or
319 * any |JSString*| substring that is a constituent part of it.
320 * Returns mozilla::Nothing() on OOM, without reporting an error;
321 * some data may have been written to |buffer| when this happens.
323 * If there's no OOM, returns the number of code units read and
324 * the number of code units written.
326 * The semantics of this method match the semantics of
327 * TextEncoder.encodeInto().
329 * The function does not store an additional zero byte.
331 JS_PUBLIC_API
mozilla::Maybe
<mozilla::Tuple
<size_t, size_t>>
332 JS_EncodeStringToUTF8BufferPartial(JSContext
* cx
, JSString
* str
,
333 mozilla::Span
<char> buffer
);
338 * Maximum length of a JS string. This is chosen so that the number of bytes
339 * allocated for a null-terminated TwoByte string still fits in int32_t.
341 static constexpr uint32_t MaxStringLength
= (1 << 30) - 2;
343 static_assert((uint64_t(MaxStringLength
) + 1) * sizeof(char16_t
) <= INT32_MAX
,
344 "size of null-terminated JSString char buffer must fit in "
347 /** Compute the length of a string. */
348 MOZ_ALWAYS_INLINE
size_t GetStringLength(JSString
* s
) {
349 return shadow::AsShadowString(s
)->length();
352 /** Compute the length of a linear string. */
353 MOZ_ALWAYS_INLINE
size_t GetLinearStringLength(JSLinearString
* s
) {
354 return shadow::AsShadowString(s
)->length();
357 /** Return true iff the given linear string uses Latin-1 storage. */
358 MOZ_ALWAYS_INLINE
bool LinearStringHasLatin1Chars(JSLinearString
* s
) {
359 return shadow::AsShadowString(s
)->hasLatin1Chars();
362 /** Return true iff the given string uses Latin-1 storage. */
363 MOZ_ALWAYS_INLINE
bool StringHasLatin1Chars(JSString
* s
) {
364 return shadow::AsShadowString(s
)->hasLatin1Chars();
368 * Given a linear string known to use Latin-1 storage, return a pointer to that
369 * storage. This pointer remains valid only as long as no GC occurs.
371 MOZ_ALWAYS_INLINE
const Latin1Char
* GetLatin1LinearStringChars(
372 const AutoRequireNoGC
& nogc
, JSLinearString
* linear
) {
373 return shadow::AsShadowString(linear
)->latin1LinearChars();
377 * Given a linear string known to use two-byte storage, return a pointer to that
378 * storage. This pointer remains valid only as long as no GC occurs.
380 MOZ_ALWAYS_INLINE
const char16_t
* GetTwoByteLinearStringChars(
381 const AutoRequireNoGC
& nogc
, JSLinearString
* linear
) {
382 return shadow::AsShadowString(linear
)->twoByteLinearChars();
386 * Given an in-range index into the provided string, return the character at
389 MOZ_ALWAYS_INLINE char16_t
GetLinearStringCharAt(JSLinearString
* linear
,
391 shadow::String
* s
= shadow::AsShadowString(linear
);
392 MOZ_ASSERT(index
< s
->length());
394 return s
->hasLatin1Chars() ? s
->latin1LinearChars()[index
]
395 : s
->twoByteLinearChars()[index
];
399 * Convert an atom to a linear string. All atoms are linear, so this
400 * operation is infallible.
402 MOZ_ALWAYS_INLINE JSLinearString
* AtomToLinearString(JSAtom
* atom
) {
403 return reinterpret_cast<JSLinearString
*>(atom
);
407 * If the provided string uses externally-managed storage, return true and set
408 * |*callbacks| to the external-string callbacks used to create it and |*chars|
409 * to a pointer to its two-byte storage. (These pointers remain valid as long
410 * as the provided string is kept alive.)
412 MOZ_ALWAYS_INLINE
bool IsExternalString(
413 JSString
* str
, const JSExternalStringCallbacks
** callbacks
,
414 const char16_t
** chars
) {
415 shadow::String
* s
= shadow::AsShadowString(str
);
417 if (!s
->isExternal()) {
421 *callbacks
= s
->externalCallbacks
;
422 *chars
= s
->nonInlineCharsTwoByte
;
428 extern JS_PUBLIC_API JSLinearString
* StringToLinearStringSlow(JSContext
* cx
,
431 } // namespace detail
433 /** Convert a string to a linear string. */
434 MOZ_ALWAYS_INLINE JSLinearString
* StringToLinearString(JSContext
* cx
,
436 if (MOZ_LIKELY(shadow::AsShadowString(str
)->isLinear())) {
437 return reinterpret_cast<JSLinearString
*>(str
);
440 return detail::StringToLinearStringSlow(cx
, str
);
443 /** Copy characters in |s[start..start + len]| to |dest[0..len]|. */
444 MOZ_ALWAYS_INLINE
void CopyLinearStringChars(char16_t
* dest
, JSLinearString
* s
,
445 size_t len
, size_t start
= 0) {
447 size_t stringLen
= GetLinearStringLength(s
);
448 MOZ_ASSERT(start
<= stringLen
);
449 MOZ_ASSERT(len
<= stringLen
- start
);
452 shadow::String
* str
= shadow::AsShadowString(s
);
454 if (str
->hasLatin1Chars()) {
455 const Latin1Char
* src
= str
->latin1LinearChars();
456 for (size_t i
= 0; i
< len
; i
++) {
457 dest
[i
] = src
[start
+ i
];
460 const char16_t
* src
= str
->twoByteLinearChars();
461 std::copy_n(src
+ start
, len
, dest
);
466 * Copy characters in |s[start..start + len]| to |dest[0..len]|, lossily
467 * truncating 16-bit values to |char| if necessary.
469 MOZ_ALWAYS_INLINE
void LossyCopyLinearStringChars(char* dest
, JSLinearString
* s
,
473 size_t stringLen
= GetLinearStringLength(s
);
474 MOZ_ASSERT(start
<= stringLen
);
475 MOZ_ASSERT(len
<= stringLen
- start
);
478 shadow::String
* str
= shadow::AsShadowString(s
);
480 if (LinearStringHasLatin1Chars(s
)) {
481 const Latin1Char
* src
= str
->latin1LinearChars();
482 for (size_t i
= 0; i
< len
; i
++) {
483 dest
[i
] = char(src
[start
+ i
]);
486 const char16_t
* src
= str
->twoByteLinearChars();
487 for (size_t i
= 0; i
< len
; i
++) {
488 dest
[i
] = char(src
[start
+ i
]);
494 * Copy characters in |s[start..start + len]| to |dest[0..len]|.
496 * This function is fallible. If you already have a linear string, use the
497 * infallible |JS::CopyLinearStringChars| above instead.
499 [[nodiscard
]] inline bool CopyStringChars(JSContext
* cx
, char16_t
* dest
,
500 JSString
* s
, size_t len
,
502 JSLinearString
* linear
= StringToLinearString(cx
, s
);
507 CopyLinearStringChars(dest
, linear
, len
, start
);
512 * Copy characters in |s[start..start + len]| to |dest[0..len]|, lossily
513 * truncating 16-bit values to |char| if necessary.
515 * This function is fallible. If you already have a linear string, use the
516 * infallible |JS::LossyCopyLinearStringChars| above instead.
518 [[nodiscard
]] inline bool LossyCopyStringChars(JSContext
* cx
, char* dest
,
519 JSString
* s
, size_t len
,
521 JSLinearString
* linear
= StringToLinearString(cx
, s
);
526 LossyCopyLinearStringChars(dest
, linear
, len
, start
);
532 /** DO NOT USE, only present for Rust bindings as a temporary hack */
533 [[deprecated
]] extern JS_PUBLIC_API
bool JS_DeprecatedStringHasLatin1Chars(
536 #endif // js_String_h