1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 * SourceText encapsulates a count of char16_t (UTF-16) or Utf8Unit (UTF-8)
8 * code units (note: code *units*, not bytes or code points) and those code
9 * units ("source units"). (Latin-1 is not supported: all places where Latin-1
10 * must be compiled first convert to a supported encoding.)
12 * A SourceText either observes without owning, or takes ownership of, source
13 * units passed to |SourceText::init|. Thus SourceText can be used to
14 * efficiently avoid copying.
18 * 1) The passed-in source units must be allocated with js_malloc(),
19 * js_calloc(), or js_realloc() if |SourceText::init| is instructed to take
20 * ownership of the source units.
21 * 2) If |SourceText::init| merely borrows the source units, the user must
22 * keep them alive until associated JS compilation is complete.
23 * 3) Code that calls |SourceText::take{Chars,Units}()| must keep the source
24 * units alive until JS compilation completes. Normally only the JS engine
25 * should call |SourceText::take{Chars,Units}()|.
26 * 4) Use the appropriate SourceText parameterization depending on the source
31 * size_t length = 512;
32 * char16_t* chars = js_pod_malloc<char16_t>(length);
34 * JS_ReportOutOfMemory(cx);
37 * JS::SourceText<char16_t> srcBuf;
38 * if (!srcBuf.init(cx, chars, length, JS::SourceOwnership::TakeOwnership)) {
41 * JS::Rooted<JSScript*> script(cx);
42 * if (!JS::Compile(cx, options, srcBuf, &script)) {
47 #ifndef js_SourceText_h
48 #define js_SourceText_h
50 #include "mozilla/Assertions.h" // MOZ_ASSERT
51 #include "mozilla/Attributes.h" // MOZ_COLD, MOZ_IS_CLASS_INIT
52 #include "mozilla/Likely.h" // MOZ_UNLIKELY
54 #include <stddef.h> // size_t
55 #include <stdint.h> // UINT32_MAX
56 #include <type_traits> // std::conditional_t, std::is_same_v
58 #include "js/UniquePtr.h" // js::UniquePtr
59 #include "js/Utility.h" // JS::FreePolicy
69 MOZ_COLD
extern JS_PUBLIC_API
void ReportSourceTooLong(JSContext
* cx
);
73 enum class SourceOwnership
{
78 template <typename Unit
>
79 class SourceText final
{
81 static_assert(std::is_same_v
<Unit
, mozilla::Utf8Unit
> ||
82 std::is_same_v
<Unit
, char16_t
>,
83 "Unit must be either char16_t or Utf8Unit for "
86 /** |char16_t| or |Utf8Unit| source units of uncertain validity. */
87 const Unit
* units_
= nullptr;
89 /** The length in code units of |units_|. */
93 * Whether this owns |units_| or merely observes source units owned by some
96 bool ownsUnits_
= false;
99 // A C++ character type that can represent the source units -- suitable for
100 // passing to C++ string functions.
102 std::conditional_t
<std::is_same_v
<Unit
, char16_t
>, char16_t
, char>;
106 * Construct a SourceText. It must be initialized using |init()| before it
107 * can be used as compilation source text.
109 SourceText() = default;
112 * Construct a SourceText from contents extracted from |other|. This
113 * SourceText will then act exactly as |other| would have acted, had it
114 * not been passed to this function. |other| will return to its default-
115 * constructed state and must have |init()| called on it to use it.
117 SourceText(SourceText
&& other
)
118 : units_(other
.units_
),
119 length_(other
.length_
),
120 ownsUnits_(other
.ownsUnits_
) {
121 other
.units_
= nullptr;
123 other
.ownsUnits_
= false;
128 js_free(const_cast<Unit
*>(units_
));
133 * Initialize this with source unit data: |char16_t| for UTF-16 source
134 * units, or |Utf8Unit| for UTF-8 source units.
136 * If |ownership == TakeOwnership|, *this function* takes ownership of
137 * |units|, *even if* this function fails, and you MUST NOT free |units|
138 * yourself. This single-owner-friendly approach reduces risk of leaks on
141 * |units| may be null if |unitsLength == 0|; if so, this will silently be
142 * initialized using non-null, unowned units.
144 [[nodiscard
]] MOZ_IS_CLASS_INIT
bool init(JSContext
* cx
, const Unit
* units
,
146 SourceOwnership ownership
) {
147 MOZ_ASSERT_IF(units
== nullptr, unitsLength
== 0);
149 // Ideally we'd use |Unit| and not cast below, but the risk of a static
150 // initializer is too great.
151 static const CharT emptyString
[] = {'\0'};
153 // Initialize all fields *before* checking length. This ensures that
154 // if |ownership == SourceOwnership::TakeOwnership|, |units| will be
155 // freed when |this|'s destructor is called.
158 length_
= static_cast<uint32_t>(unitsLength
);
159 ownsUnits_
= ownership
== SourceOwnership::TakeOwnership
;
161 units_
= reinterpret_cast<const Unit
*>(emptyString
);
166 // IMPLEMENTATION DETAIL, DO NOT RELY ON: This limit is used so we can
167 // store offsets in |JSScript|s as |uint32_t|. It could be lifted
168 // fairly easily if desired, as the compiler uses |size_t| internally.
169 if (MOZ_UNLIKELY(unitsLength
> UINT32_MAX
)) {
170 detail::ReportSourceTooLong(cx
);
178 * Exactly identical to the |init()| overload above that accepts
179 * |const Unit*|, but instead takes character data: |const CharT*|.
181 * (We can't just write this to accept |const CharT*|, because then in the
182 * UTF-16 case this overload and the one above would be identical. So we
183 * use SFINAE to expose the |CharT| overload only if it's different.)
185 template <typename Char
,
186 typename
= std::enable_if_t
<std::is_same_v
<Char
, CharT
> &&
187 !std::is_same_v
<Char
, Unit
>>>
188 [[nodiscard
]] MOZ_IS_CLASS_INIT
bool init(JSContext
* cx
, const Char
* chars
,
190 SourceOwnership ownership
) {
191 return init(cx
, reinterpret_cast<const Unit
*>(chars
), charsLength
,
196 * Initialize this using source units transferred out of |data|.
198 [[nodiscard
]] bool init(JSContext
* cx
,
199 js::UniquePtr
<Unit
[], JS::FreePolicy
> data
,
201 return init(cx
, data
.release(), dataLength
, SourceOwnership::TakeOwnership
);
205 * Exactly identical to the |init()| overload above that accepts
206 * |UniquePtr<Unit[], JS::FreePolicy>|, but instead takes character data:
207 * |UniquePtr<CharT[], JS::FreePolicy>|.
209 * (We can't just duplicate the signature above with s/Unit/CharT/, because
210 * then in the UTF-16 case this overload and the one above would be identical.
211 * So we use SFINAE to expose the |CharT| overload only if it's different.)
213 template <typename Char
,
214 typename
= std::enable_if_t
<std::is_same_v
<Char
, CharT
> &&
215 !std::is_same_v
<Char
, Unit
>>>
216 [[nodiscard
]] bool init(JSContext
* cx
,
217 js::UniquePtr
<Char
[], JS::FreePolicy
> data
,
219 return init(cx
, data
.release(), dataLength
, SourceOwnership::TakeOwnership
);
223 * Access the encapsulated data using a code unit type.
225 * This function is useful for code that wants to interact with source text
226 * as *code units*, not as string data. This doesn't matter for UTF-16,
227 * but it's a crucial distinction for UTF-8. When UTF-8 source text is
228 * encapsulated, |Unit| being |mozilla::Utf8Unit| unambiguously indicates
229 * that the code units are UTF-8. In contrast |const char*| returned by
230 * |get()| below could hold UTF-8 (or its ASCII subset) or Latin-1 or (in
231 * particularly cursed embeddings) EBCDIC or some other legacy character
232 * set. Prefer this function to |get()| wherever possible.
234 const Unit
* units() const { return units_
; }
237 * Access the encapsulated data using a character type.
239 * This function is useful for interactions with character-centric actions
240 * like interacting with UniqueChars/UniqueTwoByteChars or printing out
241 * text in a debugger, that only work with |CharT|. But as |CharT| loses
242 * encoding specificity when UTF-8 source text is encapsulated, prefer
243 * |units()| to this function.
245 const CharT
* get() const { return reinterpret_cast<const CharT
*>(units_
); }
248 * Returns true if this owns the source units and will free them on
249 * destruction. If true, it is legal to call |take{Chars,Units}()|.
251 bool ownsUnits() const { return ownsUnits_
; }
254 * Count of the underlying source units -- code units, not bytes or code
257 uint32_t length() const { return length_
; }
260 * Retrieve and take ownership of the underlying source units. The caller
261 * is now responsible for calling js_free() on the returned value, *but
262 * only after JS script compilation has completed*.
264 * After underlying source units have been taken, this will continue to
265 * refer to the same data -- it just won't own the data. get() and
266 * length() will return the same values, but ownsUnits() will be false.
267 * The taken source units must be kept alive until after JS script
268 * compilation completes, as noted above, for this to be safe.
270 * The caller must check ownsUnits() before calling takeUnits(). Taking
271 * and then free'ing an unowned buffer will have dire consequences.
274 MOZ_ASSERT(ownsUnits_
);
276 return const_cast<Unit
*>(units_
);
280 * Akin to |takeUnits()| in all respects, but returns characters rather
283 CharT
* takeChars() { return reinterpret_cast<CharT
*>(takeUnits()); }
286 SourceText(const SourceText
&) = delete;
287 void operator=(const SourceText
&) = delete;
292 #endif /* js_SourceText_h */