Backed out changeset 2450366cf7ca (bug 1891629) for causing win msix mochitest failures
[gecko.git] / js / public / SourceText.h
blob88592be8752cd773704fd4374468f63f0e336f55
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 /*
7 * SourceText encapsulates a count of char16_t (UTF-16) or Utf8Unit (UTF-8)
8 * code units (note: code *units*, not bytes or code points) and those code
9 * units ("source units"). (Latin-1 is not supported: all places where Latin-1
10 * must be compiled first convert to a supported encoding.)
12 * A SourceText either observes without owning, or takes ownership of, source
13 * units passed to |SourceText::init|. Thus SourceText can be used to
14 * efficiently avoid copying.
16 * Rules for use:
18 * 1) The passed-in source units must be allocated with js_malloc(),
19 * js_calloc(), or js_realloc() if |SourceText::init| is instructed to take
20 * ownership of the source units.
21 * 2) If |SourceText::init| merely borrows the source units, the user must
22 * keep them alive until associated JS compilation is complete.
23 * 3) Code that calls |SourceText::take{Chars,Units}()| must keep the source
24 * units alive until JS compilation completes. Normally only the JS engine
25 * should call |SourceText::take{Chars,Units}()|.
26 * 4) Use the appropriate SourceText parameterization depending on the source
27 * units encoding.
29 * Example use:
31 * size_t length = 512;
32 * char16_t* chars = js_pod_malloc<char16_t>(length);
33 * if (!chars) {
34 * JS_ReportOutOfMemory(cx);
35 * return false;
36 * }
37 * JS::SourceText<char16_t> srcBuf;
38 * if (!srcBuf.init(cx, chars, length, JS::SourceOwnership::TakeOwnership)) {
39 * return false;
40 * }
41 * JS::Rooted<JSScript*> script(cx);
42 * if (!JS::Compile(cx, options, srcBuf, &script)) {
43 * return false;
44 * }
47 #ifndef js_SourceText_h
48 #define js_SourceText_h
50 #include "mozilla/Assertions.h" // MOZ_ASSERT
51 #include "mozilla/Attributes.h" // MOZ_COLD, MOZ_IS_CLASS_INIT
52 #include "mozilla/Likely.h" // MOZ_UNLIKELY
54 #include <stddef.h> // size_t
55 #include <stdint.h> // UINT32_MAX
56 #include <type_traits> // std::conditional_t, std::is_same_v
58 #include "js/UniquePtr.h" // js::UniquePtr
59 #include "js/Utility.h" // JS::FreePolicy
61 namespace mozilla {
62 union Utf8Unit;
65 namespace js {
66 class FrontendContext;
67 } // namespace js
69 namespace JS {
71 class JS_PUBLIC_API AutoStableStringChars;
72 using FrontendContext = js::FrontendContext;
74 namespace detail {
76 MOZ_COLD extern JS_PUBLIC_API void ReportSourceTooLong(JSContext* cx);
77 MOZ_COLD extern JS_PUBLIC_API void ReportSourceTooLong(JS::FrontendContext* fc);
79 } // namespace detail
81 enum class SourceOwnership {
82 Borrowed,
83 TakeOwnership,
86 template <typename Unit>
87 class SourceText final {
88 private:
89 static_assert(std::is_same_v<Unit, mozilla::Utf8Unit> ||
90 std::is_same_v<Unit, char16_t>,
91 "Unit must be either char16_t or Utf8Unit for "
92 "SourceText<Unit>");
94 /** |char16_t| or |Utf8Unit| source units of uncertain validity. */
95 const Unit* units_ = nullptr;
97 /** The length in code units of |units_|. */
98 uint32_t length_ = 0;
101 * Whether this owns |units_| or merely observes source units owned by some
102 * other object.
104 bool ownsUnits_ = false;
106 public:
107 // A C++ character type that can represent the source units -- suitable for
108 // passing to C++ string functions.
109 using CharT =
110 std::conditional_t<std::is_same_v<Unit, char16_t>, char16_t, char>;
112 public:
114 * Construct a SourceText. It must be initialized using |init()| before it
115 * can be used as compilation source text.
117 SourceText() = default;
120 * Construct a SourceText from contents extracted from |other|. This
121 * SourceText will then act exactly as |other| would have acted, had it
122 * not been passed to this function. |other| will return to its default-
123 * constructed state and must have |init()| called on it to use it.
125 SourceText(SourceText&& other)
126 : units_(other.units_),
127 length_(other.length_),
128 ownsUnits_(other.ownsUnits_) {
129 other.units_ = nullptr;
130 other.length_ = 0;
131 other.ownsUnits_ = false;
134 ~SourceText() {
135 if (ownsUnits_) {
136 js_free(const_cast<Unit*>(units_));
140 private:
141 template <typename ContextT>
142 [[nodiscard]] MOZ_IS_CLASS_INIT bool initImpl(ContextT* context,
143 const Unit* units,
144 size_t unitsLength,
145 SourceOwnership ownership) {
146 MOZ_ASSERT_IF(units == nullptr, unitsLength == 0);
148 // Ideally we'd use |Unit| and not cast below, but the risk of a static
149 // initializer is too great.
150 static const CharT emptyString[] = {'\0'};
152 // Initialize all fields *before* checking length. This ensures that
153 // if |ownership == SourceOwnership::TakeOwnership|, |units| will be
154 // freed when |this|'s destructor is called.
155 if (units) {
156 units_ = units;
157 length_ = static_cast<uint32_t>(unitsLength);
158 ownsUnits_ = ownership == SourceOwnership::TakeOwnership;
159 } else {
160 units_ = reinterpret_cast<const Unit*>(emptyString);
161 length_ = 0;
162 ownsUnits_ = false;
165 // IMPLEMENTATION DETAIL, DO NOT RELY ON: This limit is used so we can
166 // store offsets in |JSScript|s as |uint32_t|. It could be lifted
167 // fairly easily if desired, as the compiler uses |size_t| internally.
168 if (MOZ_UNLIKELY(unitsLength > UINT32_MAX)) {
169 detail::ReportSourceTooLong(context);
170 return false;
173 return true;
176 public:
178 * Initialize this with source unit data: |char16_t| for UTF-16 source
179 * units, or |Utf8Unit| for UTF-8 source units.
181 * If |ownership == TakeOwnership|, *this function* takes ownership of
182 * |units|, *even if* this function fails, and you MUST NOT free |units|
183 * yourself. This single-owner-friendly approach reduces risk of leaks on
184 * failure.
186 * |units| may be null if |unitsLength == 0|; if so, this will silently be
187 * initialized using non-null, unowned units.
189 [[nodiscard]] MOZ_IS_CLASS_INIT bool init(JSContext* cx, const Unit* units,
190 size_t unitsLength,
191 SourceOwnership ownership) {
192 return initImpl(cx, units, unitsLength, ownership);
194 [[nodiscard]] MOZ_IS_CLASS_INIT bool init(JS::FrontendContext* fc,
195 const Unit* units,
196 size_t unitsLength,
197 SourceOwnership ownership) {
198 return initImpl(fc, units, unitsLength, ownership);
202 * Exactly identical to the |init()| overload above that accepts
203 * |const Unit*|, but instead takes character data: |const CharT*|.
205 * (We can't just write this to accept |const CharT*|, because then in the
206 * UTF-16 case this overload and the one above would be identical. So we
207 * use SFINAE to expose the |CharT| overload only if it's different.)
209 template <typename Char,
210 typename = std::enable_if_t<std::is_same_v<Char, CharT> &&
211 !std::is_same_v<Char, Unit>>>
212 [[nodiscard]] MOZ_IS_CLASS_INIT bool init(JSContext* cx, const Char* chars,
213 size_t charsLength,
214 SourceOwnership ownership) {
215 return initImpl(cx, reinterpret_cast<const Unit*>(chars), charsLength,
216 ownership);
218 template <typename Char,
219 typename = std::enable_if_t<std::is_same_v<Char, CharT> &&
220 !std::is_same_v<Char, Unit>>>
221 [[nodiscard]] MOZ_IS_CLASS_INIT bool init(JS::FrontendContext* fc,
222 const Char* chars,
223 size_t charsLength,
224 SourceOwnership ownership) {
225 return initImpl(fc, reinterpret_cast<const Unit*>(chars), charsLength,
226 ownership);
230 * Initialize this using source units transferred out of |data|.
232 [[nodiscard]] bool init(JSContext* cx,
233 js::UniquePtr<Unit[], JS::FreePolicy> data,
234 size_t dataLength) {
235 return initImpl(cx, data.release(), dataLength,
236 SourceOwnership::TakeOwnership);
238 [[nodiscard]] bool init(JS::FrontendContext* fc,
239 js::UniquePtr<Unit[], JS::FreePolicy> data,
240 size_t dataLength) {
241 return initImpl(fc, data.release(), dataLength,
242 SourceOwnership::TakeOwnership);
246 * Exactly identical to the |init()| overload above that accepts
247 * |UniquePtr<Unit[], JS::FreePolicy>|, but instead takes character data:
248 * |UniquePtr<CharT[], JS::FreePolicy>|.
250 * (We can't just duplicate the signature above with s/Unit/CharT/, because
251 * then in the UTF-16 case this overload and the one above would be identical.
252 * So we use SFINAE to expose the |CharT| overload only if it's different.)
254 template <typename Char,
255 typename = std::enable_if_t<std::is_same_v<Char, CharT> &&
256 !std::is_same_v<Char, Unit>>>
257 [[nodiscard]] bool init(JSContext* cx,
258 js::UniquePtr<Char[], JS::FreePolicy> data,
259 size_t dataLength) {
260 return init(cx, data.release(), dataLength, SourceOwnership::TakeOwnership);
262 template <typename Char,
263 typename = std::enable_if_t<std::is_same_v<Char, CharT> &&
264 !std::is_same_v<Char, Unit>>>
265 [[nodiscard]] bool init(JS::FrontendContext* fc,
266 js::UniquePtr<Char[], JS::FreePolicy> data,
267 size_t dataLength) {
268 return init(fc, data.release(), dataLength, SourceOwnership::TakeOwnership);
272 * Initialize this using an AutoStableStringChars. Transfers the code units if
273 * they are owned by the AutoStableStringChars, otherwise borrow directly from
274 * the underlying JSString. The AutoStableStringChars must outlive this
275 * SourceText and must be explicitly configured to the same unit type as this
276 * SourceText.
278 [[nodiscard]] bool initMaybeBorrowed(JSContext* cx,
279 AutoStableStringChars& linearChars);
280 [[nodiscard]] bool initMaybeBorrowed(JS::FrontendContext* fc,
281 AutoStableStringChars& linearChars);
284 * Access the encapsulated data using a code unit type.
286 * This function is useful for code that wants to interact with source text
287 * as *code units*, not as string data. This doesn't matter for UTF-16,
288 * but it's a crucial distinction for UTF-8. When UTF-8 source text is
289 * encapsulated, |Unit| being |mozilla::Utf8Unit| unambiguously indicates
290 * that the code units are UTF-8. In contrast |const char*| returned by
291 * |get()| below could hold UTF-8 (or its ASCII subset) or Latin-1 or (in
292 * particularly cursed embeddings) EBCDIC or some other legacy character
293 * set. Prefer this function to |get()| wherever possible.
295 const Unit* units() const { return units_; }
298 * Access the encapsulated data using a character type.
300 * This function is useful for interactions with character-centric actions
301 * like interacting with UniqueChars/UniqueTwoByteChars or printing out
302 * text in a debugger, that only work with |CharT|. But as |CharT| loses
303 * encoding specificity when UTF-8 source text is encapsulated, prefer
304 * |units()| to this function.
306 const CharT* get() const { return reinterpret_cast<const CharT*>(units_); }
309 * Returns true if this owns the source units and will free them on
310 * destruction. If true, it is legal to call |take{Chars,Units}()|.
312 bool ownsUnits() const { return ownsUnits_; }
315 * Count of the underlying source units -- code units, not bytes or code
316 * points -- in this.
318 uint32_t length() const { return length_; }
321 * Retrieve and take ownership of the underlying source units. The caller
322 * is now responsible for calling js_free() on the returned value, *but
323 * only after JS script compilation has completed*.
325 * After underlying source units have been taken, this will continue to
326 * refer to the same data -- it just won't own the data. get() and
327 * length() will return the same values, but ownsUnits() will be false.
328 * The taken source units must be kept alive until after JS script
329 * compilation completes, as noted above, for this to be safe.
331 * The caller must check ownsUnits() before calling takeUnits(). Taking
332 * and then free'ing an unowned buffer will have dire consequences.
334 Unit* takeUnits() {
335 MOZ_ASSERT(ownsUnits_);
336 ownsUnits_ = false;
337 return const_cast<Unit*>(units_);
341 * Akin to |takeUnits()| in all respects, but returns characters rather
342 * than units.
344 CharT* takeChars() { return reinterpret_cast<CharT*>(takeUnits()); }
346 private:
347 SourceText(const SourceText&) = delete;
348 void operator=(const SourceText&) = delete;
351 } // namespace JS
353 #endif /* js_SourceText_h */