Bug 1839526 [wpt PR 40658] - Update wpt metadata, a=testonly
[gecko.git] / js / src / irregexp / RegExpShim.h
blob5f9aff064e32dc66a2591eb5cca0a92feb90197d
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // Copyright 2019 the V8 project authors. All rights reserved.
8 // Use of this source code is governed by a BSD-style license that can be
9 // found in the LICENSE file.
11 #ifndef RegexpShim_h
12 #define RegexpShim_h
14 #include "mozilla/Assertions.h"
15 #include "mozilla/Attributes.h"
16 #include "mozilla/MathAlgorithms.h"
17 #include "mozilla/Maybe.h"
18 #include "mozilla/SegmentedVector.h"
19 #include "mozilla/Sprintf.h"
20 #include "mozilla/Types.h"
22 #include <algorithm>
23 #include <cctype>
24 #include <iterator>
26 #include "irregexp/RegExpTypes.h"
27 #include "irregexp/util/FlagsShim.h"
28 #include "irregexp/util/VectorShim.h"
29 #include "irregexp/util/ZoneShim.h"
30 #include "jit/JitCode.h"
31 #include "jit/Label.h"
32 #include "jit/shared/Assembler-shared.h"
33 #include "js/friend/StackLimits.h" // js::AutoCheckRecursionLimit
34 #include "js/RegExpFlags.h"
35 #include "js/Value.h"
36 #include "threading/ExclusiveData.h"
37 #include "util/DifferentialTesting.h"
38 #include "vm/JSContext.h"
39 #include "vm/MutexIDs.h"
40 #include "vm/NativeObject.h"
41 #include "vm/RegExpShared.h"
43 // Forward declaration of classes
44 namespace v8 {
45 namespace internal {
47 class Heap;
48 class Isolate;
49 class RegExpMatchInfo;
50 class RegExpStack;
52 template <typename T>
53 class Handle;
55 } // namespace internal
56 } // namespace v8
58 #define V8_WARN_UNUSED_RESULT [[nodiscard]]
59 #define V8_EXPORT_PRIVATE
60 #define V8_FALLTHROUGH [[fallthrough]]
61 #define V8_NODISCARD [[nodiscard]]
62 #define V8_NOEXCEPT noexcept
64 #define FATAL(x) MOZ_CRASH(x)
65 #define UNREACHABLE() MOZ_CRASH("unreachable code")
66 #define UNIMPLEMENTED() MOZ_CRASH("unimplemented code")
67 #define STATIC_ASSERT(exp) static_assert(exp, #exp)
69 #define DCHECK MOZ_ASSERT
70 #define DCHECK_EQ(lhs, rhs) MOZ_ASSERT((lhs) == (rhs))
71 #define DCHECK_NE(lhs, rhs) MOZ_ASSERT((lhs) != (rhs))
72 #define DCHECK_GT(lhs, rhs) MOZ_ASSERT((lhs) > (rhs))
73 #define DCHECK_GE(lhs, rhs) MOZ_ASSERT((lhs) >= (rhs))
74 #define DCHECK_LT(lhs, rhs) MOZ_ASSERT((lhs) < (rhs))
75 #define DCHECK_LE(lhs, rhs) MOZ_ASSERT((lhs) <= (rhs))
76 #define DCHECK_NULL(val) MOZ_ASSERT((val) == nullptr)
77 #define DCHECK_NOT_NULL(val) MOZ_ASSERT((val) != nullptr)
78 #define DCHECK_IMPLIES(lhs, rhs) MOZ_ASSERT_IF(lhs, rhs)
79 #define CHECK MOZ_RELEASE_ASSERT
80 #define CHECK_EQ(lhs, rhs) MOZ_RELEASE_ASSERT((lhs) == (rhs))
81 #define CHECK_LE(lhs, rhs) MOZ_RELEASE_ASSERT((lhs) <= (rhs))
82 #define CHECK_GE(lhs, rhs) MOZ_RELEASE_ASSERT((lhs) >= (rhs))
83 #define CONSTEXPR_DCHECK MOZ_ASSERT
85 #define MemCopy memcpy
87 // Origin:
88 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L310-L319
89 // ptrdiff_t is 't' according to the standard, but MSVC uses 'I'.
90 #ifdef _MSC_VER
91 # define V8PRIxPTRDIFF "Ix"
92 # define V8PRIdPTRDIFF "Id"
93 # define V8PRIuPTRDIFF "Iu"
94 #else
95 # define V8PRIxPTRDIFF "tx"
96 # define V8PRIdPTRDIFF "td"
97 # define V8PRIuPTRDIFF "tu"
98 #endif
100 #define arraysize std::size
102 // Explicitly declare the assignment operator as deleted.
103 #define DISALLOW_ASSIGN(TypeName) TypeName& operator=(const TypeName&) = delete
105 // Explicitly declare the copy constructor and assignment operator as deleted.
106 // This also deletes the implicit move constructor and implicit move assignment
107 // operator, but still allows to manually define them.
108 #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
109 TypeName(const TypeName&) = delete; \
110 DISALLOW_ASSIGN(TypeName)
112 // Explicitly declare all implicit constructors as deleted, namely the
113 // default constructor, copy constructor and operator= functions.
114 // This is especially useful for classes containing only static methods.
115 #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
116 TypeName() = delete; \
117 DISALLOW_COPY_AND_ASSIGN(TypeName)
119 namespace v8 {
121 // Origin:
122 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L364-L367
123 template <typename T, typename U>
124 constexpr inline bool IsAligned(T value, U alignment) {
125 return (value & (alignment - 1)) == 0;
128 using byte = uint8_t;
129 using Address = uintptr_t;
130 static const Address kNullAddress = 0;
132 inline uintptr_t GetCurrentStackPosition() {
133 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
136 namespace base {
138 // Latin1/UTF-16 constants
139 // Code-point values in Unicode 4.0 are 21 bits wide.
140 // Code units in UTF-16 are 16 bits wide.
141 using uc16 = char16_t;
142 using uc32 = uint32_t;
144 constexpr int kUC16Size = sizeof(base::uc16);
146 // Origin:
147 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L247-L258
148 // The USE(x, ...) template is used to silence C++ compiler warnings
149 // issued for (yet) unused variables (typically parameters).
150 // The arguments are guaranteed to be evaluated from left to right.
151 struct Use {
152 template <typename T>
153 Use(T&&) {} // NOLINT(runtime/explicit)
155 #define USE(...) \
156 do { \
157 ::v8::base::Use unused_tmp_array_for_use_macro[]{__VA_ARGS__}; \
158 (void)unused_tmp_array_for_use_macro; \
159 } while (false)
161 // Origin:
162 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/safe_conversions.h#L35-L39
163 // saturated_cast<> is analogous to static_cast<> for numeric types, except
164 // that the specified numeric conversion will saturate rather than overflow or
165 // underflow.
166 template <typename Dst, typename Src>
167 inline Dst saturated_cast(Src value);
169 // This is the only specialization that is needed for regexp code.
170 // Instead of pulling in dozens of lines of template goo
171 // to derive it, I used the implementation from uint8_clamped in
172 // ArrayBufferObject.h.
173 template <>
174 inline uint8_t saturated_cast<uint8_t, int>(int x) {
175 return (x >= 0) ? ((x < 255) ? uint8_t(x) : 255) : 0;
178 // Origin:
179 // https://github.com/v8/v8/blob/fc088cdaccadede84886eee881e67af9db53669a/src/base/bounds.h#L14-L28
180 // Checks if value is in range [lower_limit, higher_limit] using a single
181 // branch.
182 template <typename T, typename U>
183 inline constexpr bool IsInRange(T value, U lower_limit, U higher_limit) {
184 using unsigned_T = typename std::make_unsigned<T>::type;
185 // Use static_cast to support enum classes.
186 return static_cast<unsigned_T>(static_cast<unsigned_T>(value) -
187 static_cast<unsigned_T>(lower_limit)) <=
188 static_cast<unsigned_T>(static_cast<unsigned_T>(higher_limit) -
189 static_cast<unsigned_T>(lower_limit));
192 #define LAZY_INSTANCE_INITIALIZER \
195 template <typename T>
196 class LazyInstanceImpl {
197 public:
198 LazyInstanceImpl() : value_(js::mutexid::IrregexpLazyStatic) {}
200 const T* Pointer() {
201 auto val = value_.lock();
202 if (val->isNothing()) {
203 val->emplace();
205 return val->ptr();
208 private:
209 js::ExclusiveData<mozilla::Maybe<T>> value_;
212 template <typename T>
213 class LazyInstance {
214 public:
215 using type = LazyInstanceImpl<T>;
218 // Origin:
219 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/utils.h#L40-L48
220 // Returns the value (0 .. 15) of a hexadecimal character c.
221 // If c is not a legal hexadecimal character, returns a value < 0.
222 // Used in regexp-parser.cc
223 inline int HexValue(base::uc32 c) {
224 c -= '0';
225 if (static_cast<unsigned>(c) <= 9) return c;
226 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
227 if (static_cast<unsigned>(c) <= 5) return c + 10;
228 return -1;
231 template <typename... Args>
232 [[nodiscard]] uint32_t hash_combine(uint32_t aHash, Args... aArgs) {
233 return mozilla::AddToHash(aHash, aArgs...);
236 template <typename T>
237 class Optional {
238 mozilla::Maybe<T> inner_;
240 public:
241 Optional() = default;
242 Optional(T t) { inner_.emplace(t); }
244 bool has_value() const { return inner_.isSome(); }
245 const T& value() const { return inner_.ref(); }
248 namespace bits {
250 inline uint64_t CountTrailingZeros(uint64_t value) {
251 return mozilla::CountTrailingZeroes64(value);
254 inline size_t RoundUpToPowerOfTwo32(size_t value) {
255 return mozilla::RoundUpPow2(value);
258 template <typename T>
259 constexpr bool IsPowerOfTwo(T value) {
260 return value > 0 && (value & (value - 1)) == 0;
263 } // namespace bits
264 } // namespace base
266 namespace unibrow {
268 using uchar = unsigned int;
270 // Origin:
271 // https://github.com/v8/v8/blob/1f1e4cdb04c75eab77adbecd5f5514ddc3eb56cf/src/strings/unicode.h#L133-L150
272 class Latin1 {
273 public:
274 static const base::uc16 kMaxChar = 0xff;
276 // Convert the character to Latin-1 case equivalent if possible.
277 static inline base::uc16 TryConvertToLatin1(base::uc16 c) {
278 // "GREEK CAPITAL LETTER MU" case maps to "MICRO SIGN".
279 // "GREEK SMALL LETTER MU" case maps to "MICRO SIGN".
280 if (c == 0x039C || c == 0x03BC) {
281 return 0xB5;
283 // "LATIN CAPITAL LETTER Y WITH DIAERESIS" case maps to "LATIN SMALL LETTER
284 // Y WITH DIAERESIS".
285 if (c == 0x0178) {
286 return 0xFF;
288 return c;
292 // Origin:
293 // https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L99-L131
294 class Utf16 {
295 public:
296 static inline bool IsLeadSurrogate(int code) {
297 return js::unicode::IsLeadSurrogate(code);
299 static inline bool IsTrailSurrogate(int code) {
300 return js::unicode::IsTrailSurrogate(code);
302 static inline base::uc16 LeadSurrogate(uint32_t char_code) {
303 return js::unicode::LeadSurrogate(char_code);
305 static inline base::uc16 TrailSurrogate(uint32_t char_code) {
306 return js::unicode::TrailSurrogate(char_code);
308 static inline uint32_t CombineSurrogatePair(char16_t lead, char16_t trail) {
309 return js::unicode::UTF16Decode(lead, trail);
311 static const uchar kMaxNonSurrogateCharCode = 0xffff;
314 #ifndef V8_INTL_SUPPORT
316 // A cache used in case conversion. It caches the value for characters
317 // that either have no mapping or map to a single character independent
318 // of context. Characters that map to more than one character or that
319 // map differently depending on context are always looked up.
320 // Origin:
321 // https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L64-L88
322 template <class T, int size = 256>
323 class Mapping {
324 public:
325 inline Mapping() = default;
326 inline int get(uchar c, uchar n, uchar* result) {
327 CacheEntry entry = entries_[c & kMask];
328 if (entry.code_point_ == c) {
329 if (entry.offset_ == 0) {
330 return 0;
331 } else {
332 result[0] = c + entry.offset_;
333 return 1;
335 } else {
336 return CalculateValue(c, n, result);
340 private:
341 int CalculateValue(uchar c, uchar n, uchar* result) {
342 bool allow_caching = true;
343 int length = T::Convert(c, n, result, &allow_caching);
344 if (allow_caching) {
345 if (length == 1) {
346 entries_[c & kMask] = CacheEntry(c, result[0] - c);
347 return 1;
348 } else {
349 entries_[c & kMask] = CacheEntry(c, 0);
350 return 0;
352 } else {
353 return length;
357 struct CacheEntry {
358 inline CacheEntry() : code_point_(kNoChar), offset_(0) {}
359 inline CacheEntry(uchar code_point, signed offset)
360 : code_point_(code_point), offset_(offset) {}
361 uchar code_point_;
362 signed offset_;
363 static const int kNoChar = (1 << 21) - 1;
365 static const int kSize = size;
366 static const int kMask = kSize - 1;
367 CacheEntry entries_[kSize];
370 // Origin:
371 // https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L241-L252
372 struct Ecma262Canonicalize {
373 static const int kMaxWidth = 1;
374 static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
376 struct Ecma262UnCanonicalize {
377 static const int kMaxWidth = 4;
378 static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
380 struct CanonicalizationRange {
381 static const int kMaxWidth = 1;
382 static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
385 #endif // !V8_INTL_SUPPORT
387 struct Letter {
388 static bool Is(uchar c);
391 } // namespace unibrow
393 namespace internal {
395 #define PRINTF_FORMAT(x, y) MOZ_FORMAT_PRINTF(x, y)
396 void PRINTF_FORMAT(1, 2) PrintF(const char* format, ...);
397 void PRINTF_FORMAT(2, 3) PrintF(FILE* out, const char* format, ...);
399 // Superclass for classes only using static method functions.
400 // The subclass of AllStatic cannot be instantiated at all.
401 class AllStatic {
402 #ifdef DEBUG
403 public:
404 AllStatic() = delete;
405 #endif
408 // Superclass for classes managed with new and delete.
409 // In irregexp, this is only AlternativeGeneration (in regexp-compiler.cc)
410 // Compare:
411 // https://github.com/v8/v8/blob/7b3332844212d78ee87a9426f3a6f7f781a8fbfa/src/utils/allocation.cc#L88-L96
412 class Malloced {
413 public:
414 static void* operator new(size_t size) {
415 js::AutoEnterOOMUnsafeRegion oomUnsafe;
416 void* result = js_malloc(size);
417 if (!result) {
418 oomUnsafe.crash("Irregexp Malloced shim");
420 return result;
422 static void operator delete(void* p) { js_free(p); }
425 constexpr int32_t KB = 1024;
426 constexpr int32_t MB = 1024 * 1024;
428 #define kMaxInt JSVAL_INT_MAX
429 #define kMinInt JSVAL_INT_MIN
430 constexpr int kSystemPointerSize = sizeof(void*);
432 // The largest integer n such that n and n + 1 are both exactly
433 // representable as a Number value. ES6 section 20.1.2.6
434 constexpr double kMaxSafeInteger = 9007199254740991.0; // 2^53-1
436 constexpr int kBitsPerByte = 8;
437 constexpr int kBitsPerByteLog2 = 3;
438 constexpr int kUInt16Size = sizeof(uint16_t);
439 constexpr int kUInt32Size = sizeof(uint32_t);
440 constexpr int kInt64Size = sizeof(int64_t);
442 constexpr int kMaxUInt16 = (1 << 16) - 1;
444 inline constexpr bool IsDecimalDigit(base::uc32 c) {
445 return c >= '0' && c <= '9';
448 inline constexpr int AsciiAlphaToLower(base::uc32 c) { return c | 0x20; }
450 inline bool is_uint24(int64_t val) { return (val >> 24) == 0; }
451 inline bool is_int24(int64_t val) {
452 int64_t limit = int64_t(1) << 23;
453 return (-limit <= val) && (val < limit);
456 inline bool IsIdentifierStart(base::uc32 c) {
457 return js::unicode::IsIdentifierStart(char32_t(c));
459 inline bool IsIdentifierPart(base::uc32 c) {
460 return js::unicode::IsIdentifierPart(char32_t(c));
463 // Wrappers to disambiguate char16_t and uc16.
464 struct AsUC16 {
465 explicit AsUC16(char16_t v) : value(v) {}
466 char16_t value;
469 struct AsUC32 {
470 explicit AsUC32(int32_t v) : value(v) {}
471 int32_t value;
474 std::ostream& operator<<(std::ostream& os, const AsUC16& c);
475 std::ostream& operator<<(std::ostream& os, const AsUC32& c);
477 // This class is used for the output of trace-regexp-parser. V8 has
478 // an elaborate implementation to ensure that the output gets to the
479 // right place, even on Android. We just need something that will
480 // print output (ideally to stderr, to match the rest of our tracing
481 // code). This is an empty wrapper that will convert itself to
482 // std::cerr when used.
483 class StdoutStream {
484 public:
485 operator std::ostream&() const;
486 template <typename T>
487 std::ostream& operator<<(T t);
490 // Reuse existing Maybe implementation
491 using mozilla::Maybe;
493 template <typename T>
494 Maybe<T> Just(const T& value) {
495 return mozilla::Some(value);
498 template <typename T>
499 mozilla::Nothing Nothing() {
500 return mozilla::Nothing();
503 template <typename T>
504 using PseudoHandle = mozilla::UniquePtr<T, JS::FreePolicy>;
506 // Compare 8bit/16bit chars to 8bit/16bit chars.
507 // Used indirectly by regexp-interpreter.cc
508 // Taken from: https://github.com/v8/v8/blob/master/src/utils/utils.h
509 template <typename lchar, typename rchar>
510 inline int CompareCharsUnsigned(const lchar* lhs, const rchar* rhs,
511 size_t chars) {
512 const lchar* limit = lhs + chars;
513 if (sizeof(*lhs) == sizeof(char) && sizeof(*rhs) == sizeof(char)) {
514 // memcmp compares byte-by-byte, yielding wrong results for two-byte
515 // strings on little-endian systems.
516 return memcmp(lhs, rhs, chars);
518 while (lhs < limit) {
519 int r = static_cast<int>(*lhs) - static_cast<int>(*rhs);
520 if (r != 0) return r;
521 ++lhs;
522 ++rhs;
524 return 0;
526 template <typename lchar, typename rchar>
527 inline int CompareChars(const lchar* lhs, const rchar* rhs, size_t chars) {
528 DCHECK_LE(sizeof(lchar), 2);
529 DCHECK_LE(sizeof(rchar), 2);
530 if (sizeof(lchar) == 1) {
531 if (sizeof(rchar) == 1) {
532 return CompareCharsUnsigned(reinterpret_cast<const uint8_t*>(lhs),
533 reinterpret_cast<const uint8_t*>(rhs), chars);
534 } else {
535 return CompareCharsUnsigned(reinterpret_cast<const uint8_t*>(lhs),
536 reinterpret_cast<const char16_t*>(rhs),
537 chars);
539 } else {
540 if (sizeof(rchar) == 1) {
541 return CompareCharsUnsigned(reinterpret_cast<const char16_t*>(lhs),
542 reinterpret_cast<const uint8_t*>(rhs), chars);
543 } else {
544 return CompareCharsUnsigned(reinterpret_cast<const char16_t*>(lhs),
545 reinterpret_cast<const char16_t*>(rhs),
546 chars);
551 // Compare 8bit/16bit chars to 8bit/16bit chars.
552 template <typename lchar, typename rchar>
553 inline bool CompareCharsEqualUnsigned(const lchar* lhs, const rchar* rhs,
554 size_t chars) {
555 STATIC_ASSERT(std::is_unsigned<lchar>::value);
556 STATIC_ASSERT(std::is_unsigned<rchar>::value);
557 if (sizeof(*lhs) == sizeof(*rhs)) {
558 // memcmp compares byte-by-byte, but for equality it doesn't matter whether
559 // two-byte char comparison is little- or big-endian.
560 return memcmp(lhs, rhs, chars * sizeof(*lhs)) == 0;
562 for (const lchar* limit = lhs + chars; lhs < limit; ++lhs, ++rhs) {
563 if (*lhs != *rhs) return false;
565 return true;
568 template <typename lchar, typename rchar>
569 inline bool CompareCharsEqual(const lchar* lhs, const rchar* rhs,
570 size_t chars) {
571 using ulchar = typename std::make_unsigned<lchar>::type;
572 using urchar = typename std::make_unsigned<rchar>::type;
573 return CompareCharsEqualUnsigned(reinterpret_cast<const ulchar*>(lhs),
574 reinterpret_cast<const urchar*>(rhs), chars);
577 // V8::Object ~= JS::Value
578 class Object {
579 public:
580 // The default object constructor in V8 stores a nullptr,
581 // which has its low bit clear and is interpreted as Smi(0).
582 constexpr Object() : asBits_(JS::Int32Value(0).asRawBits()) {}
584 Object(const JS::Value& value) : asBits_(value.asRawBits()) {}
586 // This constructor is only used in an unused implementation of
587 // IsCharacterInRangeArray in regexp-macro-assembler.cc.
588 Object(uintptr_t raw) : asBits_(raw) { MOZ_CRASH("unused"); }
590 // Used in regexp-interpreter.cc to check the return value of
591 // isolate->stack_guard()->HandleInterrupts(). We want to handle
592 // interrupts in the caller, so we always return false from
593 // HandleInterrupts and true here.
594 inline bool IsException(Isolate*) const {
595 MOZ_ASSERT(!value().toBoolean());
596 return true;
599 JS::Value value() const { return JS::Value::fromRawBits(asBits_); }
601 inline static Object cast(Object object) { return object; }
603 protected:
604 void setValue(const JS::Value& val) { asBits_ = val.asRawBits(); }
605 uint64_t asBits_;
606 } JS_HAZ_GC_POINTER;
608 class Smi : public Object {
609 public:
610 static Smi FromInt(int32_t value) {
611 Smi smi;
612 smi.setValue(JS::Int32Value(value));
613 return smi;
615 static inline int32_t ToInt(const Object object) {
616 return object.value().toInt32();
620 // V8::HeapObject ~= GC thing
621 class HeapObject : public Object {
622 public:
623 inline static HeapObject cast(Object object) {
624 HeapObject h;
625 h.setValue(object.value());
626 return h;
630 // A fixed-size array with Objects (aka Values) as element types.
631 // Implemented using the dense elements of an ArrayObject.
632 // Used for named captures.
633 class FixedArray : public HeapObject {
634 public:
635 inline void set(uint32_t index, Object value) {
636 inner()->setDenseElement(index, value.value());
638 inline static FixedArray cast(Object object) {
639 FixedArray f;
640 f.setValue(object.value());
641 return f;
643 js::NativeObject* inner() {
644 return &value().toObject().as<js::NativeObject>();
649 * Conceptually, ByteArrayData is a variable-size structure. To
650 * implement this in a C++-approved way, we allocate a struct
651 * containing the 32-bit length field, followed by additional memory
652 * for the data. To access the data, we get a pointer to the next byte
653 * after the length field and cast it to the correct type.
655 inline uint8_t* ByteArrayData::data() {
656 static_assert(alignof(uint8_t) <= alignof(ByteArrayData),
657 "The trailing data must be aligned to start immediately "
658 "after the header with no padding.");
659 ByteArrayData* immediatelyAfter = this + 1;
660 return reinterpret_cast<uint8_t*>(immediatelyAfter);
663 template <typename T>
664 T* ByteArrayData::typedData() {
665 static_assert(alignof(T) <= alignof(ByteArrayData));
666 MOZ_ASSERT(uintptr_t(data()) % alignof(T) == 0);
667 return reinterpret_cast<T*>(data());
670 template <typename T>
671 T ByteArrayData::getTyped(uint32_t index) {
672 MOZ_ASSERT(index < length / sizeof(T));
673 return typedData<T>()[index];
676 template <typename T>
677 void ByteArrayData::setTyped(uint32_t index, T value) {
678 MOZ_ASSERT(index < length / sizeof(T));
679 typedData<T>()[index] = value;
682 // A fixed-size array of bytes.
683 class ByteArray : public HeapObject {
684 protected:
685 ByteArrayData* inner() const {
686 return static_cast<ByteArrayData*>(value().toPrivate());
689 public:
690 PseudoHandle<ByteArrayData> takeOwnership(Isolate* isolate);
691 PseudoHandle<ByteArrayData> maybeTakeOwnership(Isolate* isolate);
693 byte get(uint32_t index) { return inner()->get(index); }
694 void set(uint32_t index, byte val) { inner()->set(index, val); }
696 uint32_t length() const { return inner()->length; }
697 byte* GetDataStartAddress() { return inner()->data(); }
699 static ByteArray cast(Object object) {
700 ByteArray b;
701 b.setValue(object.value());
702 return b;
705 bool IsByteArray() const { return true; }
707 friend class SMRegExpMacroAssembler;
710 // This is a convenience class used in V8 for treating a ByteArray as an array
711 // of fixed-size integers. This version supports integral types up to 32 bits.
712 template <typename T>
713 class FixedIntegerArray : public ByteArray {
714 static_assert(alignof(T) <= alignof(ByteArrayData));
715 static_assert(std::is_integral<T>::value);
717 public:
718 static Handle<FixedIntegerArray<T>> New(Isolate* isolate, uint32_t length);
720 T get(uint32_t index) { return inner()->template getTyped<T>(index); };
721 void set(uint32_t index, T value) {
722 inner()->template setTyped<T>(index, value);
725 static FixedIntegerArray<T> cast(Object object) {
726 FixedIntegerArray<T> f;
727 f.setValue(object.value());
728 return f;
732 using FixedUInt16Array = FixedIntegerArray<uint16_t>;
734 // Like Handles in SM, V8 handles are references to marked pointers.
735 // Unlike SM, where Rooted pointers are created individually on the
736 // stack, the target of a V8 handle lives in an arena on the isolate
737 // (~= JSContext). Whenever a Handle is created, a new "root" is
738 // created at the end of the arena.
740 // HandleScopes are used to manage the lifetimes of these handles. A
741 // HandleScope lives on the stack and stores the size of the arena at
742 // the time of its creation. When the function returns and the
743 // HandleScope is destroyed, the arena is truncated to its previous
744 // size, clearing all roots that were created since the creation of
745 // the HandleScope.
747 // In some cases, objects that are GC-allocated in V8 are not in SM.
748 // In particular, irregexp allocates ByteArrays during code generation
749 // to store lookup tables. This does not play nicely with the SM
750 // macroassembler's requirement that no GC allocations take place
751 // while it is on the stack. To work around this, this shim layer also
752 // provides the ability to create pseudo-handles, which are not
753 // managed by the GC but provide the same API to irregexp. The "root"
754 // of a pseudohandle is a unique pointer living in a second arena. If
755 // the allocated object should outlive the HandleScope, it must be
756 // manually moved out of the arena using maybeTakeOwnership.
757 // (If maybeTakeOwnership is called multiple times, it will return
758 // a null pointer on subsequent calls.)
760 class MOZ_STACK_CLASS HandleScope {
761 public:
762 HandleScope(Isolate* isolate);
763 ~HandleScope();
765 private:
766 size_t level_ = 0;
767 size_t non_gc_level_ = 0;
768 Isolate* isolate_;
770 friend class Isolate;
773 // Origin:
774 // https://github.com/v8/v8/blob/5792f3587116503fc047d2f68c951c72dced08a5/src/handles/handles.h#L88-L171
775 template <typename T>
776 class MOZ_NONHEAP_CLASS Handle {
777 public:
778 Handle() : location_(nullptr) {}
779 Handle(T object, Isolate* isolate);
780 Handle(const JS::Value& value, Isolate* isolate);
782 // Constructor for handling automatic up casting.
783 template <typename S,
784 typename = std::enable_if_t<std::is_convertible_v<S*, T*>>>
785 inline Handle(Handle<S> handle) : location_(handle.location_) {}
787 inline bool is_null() const { return location_ == nullptr; }
789 inline T operator*() const { return T::cast(Object(*location_)); };
791 // {ObjectRef} is returned by {Handle::operator->}. It should never be stored
792 // anywhere or used in any other code; no one should ever have to spell out
793 // {ObjectRef} in code. Its only purpose is to be dereferenced immediately by
794 // "operator-> chaining". Returning the address of the field is valid because
795 // this object's lifetime only ends at the end of the full statement.
796 // Origin:
797 // https://github.com/v8/v8/blob/03aaa4b3bf4cb01eee1f223b252e6869b04ab08c/src/handles/handles.h#L91-L105
798 class MOZ_TEMPORARY_CLASS ObjectRef {
799 public:
800 T* operator->() { return &object_; }
802 private:
803 friend class Handle;
804 explicit ObjectRef(T object) : object_(object) {}
806 T object_;
808 inline ObjectRef operator->() const { return ObjectRef{**this}; }
810 static Handle<T> fromHandleValue(JS::HandleValue handle) {
811 return Handle(handle.address());
814 private:
815 Handle(const JS::Value* location) : location_(location) {}
817 template <typename>
818 friend class Handle;
819 template <typename>
820 friend class MaybeHandle;
822 const JS::Value* location_;
825 // A Handle can be converted into a MaybeHandle. Converting a MaybeHandle
826 // into a Handle requires checking that it does not point to nullptr. This
827 // ensures nullptr checks before use.
829 // Also note that Handles do not provide default equality comparison or hashing
830 // operators on purpose. Such operators would be misleading, because intended
831 // semantics is ambiguous between Handle location and object identity.
832 // Origin:
833 // https://github.com/v8/v8/blob/5792f3587116503fc047d2f68c951c72dced08a5/src/handles/maybe-handles.h#L15-L78
834 template <typename T>
835 class MOZ_NONHEAP_CLASS MaybeHandle final {
836 public:
837 MaybeHandle() : location_(nullptr) {}
839 // Constructor for handling automatic up casting from Handle.
840 // Ex. Handle<JSArray> can be passed when MaybeHandle<Object> is expected.
841 template <typename S,
842 typename = std::enable_if_t<std::is_convertible_v<S*, T*>>>
843 MaybeHandle(Handle<S> handle) : location_(handle.location_) {}
845 inline Handle<T> ToHandleChecked() const {
846 MOZ_RELEASE_ASSERT(location_);
847 return Handle<T>(location_);
850 // Convert to a Handle with a type that can be upcasted to.
851 template <typename S>
852 inline bool ToHandle(Handle<S>* out) const {
853 if (location_) {
854 *out = Handle<T>(location_);
855 return true;
856 } else {
857 *out = Handle<T>();
858 return false;
862 private:
863 JS::Value* location_;
866 // From v8/src/handles/handles-inl.h
868 template <typename T>
869 inline Handle<T> handle(T object, Isolate* isolate) {
870 return Handle<T>(object, isolate);
873 // RAII Guard classes
875 using DisallowGarbageCollection = JS::AutoAssertNoGC;
877 // V8 uses this inside DisallowGarbageCollection regions to turn
878 // allocation back on before throwing a stack overflow exception or
879 // handling interrupts. AutoSuppressGC is sufficient for the former
880 // case, but not for the latter: handling interrupts can execute
881 // arbitrary script code, and V8 jumps through some scary hoops to
882 // "manually relocate unhandlified references" afterwards. To keep
883 // things sane, we don't try to handle interrupts while regex code is
884 // still on the stack. Instead, we return EXCEPTION and handle
885 // interrupts in the caller. (See RegExpShared::execute.)
887 class AllowGarbageCollection {
888 public:
889 AllowGarbageCollection() {}
892 // Origin:
893 // https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L83-L474
894 class String : public HeapObject {
895 private:
896 JSString* str() const { return value().toString(); }
898 public:
899 String() = default;
900 String(JSString* str) { setValue(JS::StringValue(str)); }
902 operator JSString*() const { return str(); }
904 // Max char codes.
905 static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar;
906 static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar;
907 static const int kMaxUtf16CodeUnit = 0xffff;
908 static const uint32_t kMaxUtf16CodeUnitU = kMaxUtf16CodeUnit;
909 static const base::uc32 kMaxCodePoint = 0x10ffff;
911 MOZ_ALWAYS_INLINE int length() const { return str()->length(); }
912 bool IsFlat() { return str()->isLinear(); };
914 // Origin:
915 // https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L95-L152
916 class FlatContent {
917 public:
918 FlatContent(JSLinearString* string, const DisallowGarbageCollection& no_gc)
919 : string_(string), no_gc_(no_gc) {}
920 inline bool IsOneByte() const { return string_->hasLatin1Chars(); }
921 inline bool IsTwoByte() const { return !string_->hasLatin1Chars(); }
923 base::Vector<const uint8_t> ToOneByteVector() const {
924 MOZ_ASSERT(IsOneByte());
925 return base::Vector<const uint8_t>(string_->latin1Chars(no_gc_),
926 string_->length());
928 base::Vector<const base::uc16> ToUC16Vector() const {
929 MOZ_ASSERT(IsTwoByte());
930 return base::Vector<const base::uc16>(string_->twoByteChars(no_gc_),
931 string_->length());
933 void UnsafeDisableChecksumVerification() {
934 // Intentional no-op. See the comment for AllowGarbageCollection above.
937 private:
938 const JSLinearString* string_;
939 const JS::AutoAssertNoGC& no_gc_;
941 FlatContent GetFlatContent(const DisallowGarbageCollection& no_gc) {
942 MOZ_ASSERT(IsFlat());
943 return FlatContent(&str()->asLinear(), no_gc);
946 static Handle<String> Flatten(Isolate* isolate, Handle<String> string);
948 inline static String cast(Object object) {
949 String s;
950 MOZ_ASSERT(object.value().isString());
951 s.setValue(object.value());
952 return s;
955 inline static bool IsOneByteRepresentationUnderneath(String string) {
956 return string.str()->hasLatin1Chars();
958 inline bool IsOneByteRepresentation() const {
959 return str()->hasLatin1Chars();
962 std::unique_ptr<char[]> ToCString();
964 template <typename Char>
965 base::Vector<const Char> GetCharVector(
966 const DisallowGarbageCollection& no_gc);
969 template <>
970 inline base::Vector<const uint8_t> String::GetCharVector(
971 const DisallowGarbageCollection& no_gc) {
972 String::FlatContent flat = GetFlatContent(no_gc);
973 MOZ_ASSERT(flat.IsOneByte());
974 return flat.ToOneByteVector();
977 template <>
978 inline base::Vector<const base::uc16> String::GetCharVector(
979 const DisallowGarbageCollection& no_gc) {
980 String::FlatContent flat = GetFlatContent(no_gc);
981 MOZ_ASSERT(flat.IsTwoByte());
982 return flat.ToUC16Vector();
985 class JSRegExp : public HeapObject {
986 public:
987 JSRegExp() : HeapObject() {}
988 JSRegExp(js::RegExpShared* re) { setValue(JS::PrivateGCThingValue(re)); }
990 // ******************************************************
991 // Methods that are called from inside the implementation
992 // ******************************************************
993 void TierUpTick() { inner()->tierUpTick(); }
995 Object bytecode(bool is_latin1) const {
996 return Object(JS::PrivateValue(inner()->getByteCode(is_latin1)));
999 // TODO: should we expose this?
1000 uint32_t backtrack_limit() const { return 0; }
1002 static JSRegExp cast(Object object) {
1003 JSRegExp regexp;
1004 js::gc::Cell* regexpShared = object.value().toGCThing();
1005 MOZ_ASSERT(regexpShared->is<js::RegExpShared>());
1006 regexp.setValue(JS::PrivateGCThingValue(regexpShared));
1007 return regexp;
1010 // Each capture (including the match itself) needs two registers.
1011 static constexpr int RegistersForCaptureCount(int count) {
1012 return (count + 1) * 2;
1015 inline uint32_t max_register_count() const {
1016 return inner()->getMaxRegisters();
1019 // ******************************
1020 // Static constants
1021 // ******************************
1023 static constexpr int kMaxCaptures = (1 << 15) - 1;
1025 static constexpr int kNoBacktrackLimit = 0;
1027 private:
1028 js::RegExpShared* inner() const {
1029 return value().toGCThing()->as<js::RegExpShared>();
1033 using RegExpFlags = JS::RegExpFlags;
1035 inline bool IsUnicode(RegExpFlags flags) { return flags.unicode(); }
1036 inline bool IsGlobal(RegExpFlags flags) { return flags.global(); }
1037 inline bool IsIgnoreCase(RegExpFlags flags) { return flags.ignoreCase(); }
1038 inline bool IsMultiline(RegExpFlags flags) { return flags.multiline(); }
1039 inline bool IsDotAll(RegExpFlags flags) { return flags.dotAll(); }
1040 inline bool IsSticky(RegExpFlags flags) { return flags.sticky(); }
1041 inline bool IsUnicodeSets(RegExpFlags flags) { return flags.unicodeSets(); }
1042 inline bool IsEitherUnicode(RegExpFlags flags) {
1043 return flags.unicode() || flags.unicodeSets();
1046 class Histogram {
1047 public:
1048 inline void AddSample(int sample) {}
1051 class Counters {
1052 public:
1053 Histogram* regexp_backtracks() { return &regexp_backtracks_; }
1055 private:
1056 Histogram regexp_backtracks_;
1059 enum class AllocationType : uint8_t {
1060 kYoung, // Allocate in the nursery
1061 kOld, // Allocate in the tenured heap
1064 using StackGuard = Isolate;
1065 using Factory = Isolate;
1067 class Isolate {
1068 public:
1069 Isolate(JSContext* cx) : cx_(cx) {}
1070 ~Isolate();
1071 bool init();
1073 size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const;
1075 //********** Isolate code **********//
1076 RegExpStack* regexp_stack() const { return regexpStack_; }
1078 // This is called from inside no-GC code. Instead of suppressing GC
1079 // to allocate the error, we return false from Execute and call
1080 // ReportOverRecursed in the caller.
1081 void StackOverflow() {}
1083 #ifndef V8_INTL_SUPPORT
1084 unibrow::Mapping<unibrow::Ecma262UnCanonicalize>* jsregexp_uncanonicalize() {
1085 return &jsregexp_uncanonicalize_;
1087 unibrow::Mapping<unibrow::Ecma262Canonicalize>*
1088 regexp_macro_assembler_canonicalize() {
1089 return &regexp_macro_assembler_canonicalize_;
1091 unibrow::Mapping<unibrow::CanonicalizationRange>* jsregexp_canonrange() {
1092 return &jsregexp_canonrange_;
1095 private:
1096 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize_;
1097 unibrow::Mapping<unibrow::Ecma262Canonicalize>
1098 regexp_macro_assembler_canonicalize_;
1099 unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange_;
1100 #endif // !V8_INTL_SUPPORT
1102 public:
1103 // An empty stub for telemetry we don't support
1104 void IncreaseTotalRegexpCodeGenerated(Handle<HeapObject> code) {}
1106 Counters* counters() { return &counters_; }
1108 //********** Factory code **********//
1109 inline Factory* factory() { return this; }
1111 Handle<ByteArray> NewByteArray(
1112 int length, AllocationType allocation = AllocationType::kYoung);
1114 // Allocates a fixed array initialized with undefined values.
1115 Handle<FixedArray> NewFixedArray(int length);
1117 template <typename T>
1118 Handle<FixedIntegerArray<T>> NewFixedIntegerArray(uint32_t length);
1120 template <typename Char>
1121 Handle<String> InternalizeString(const base::Vector<const Char>& str);
1123 //********** Stack guard code **********//
1124 inline StackGuard* stack_guard() { return this; }
1126 uintptr_t real_climit() { return cx_->stackLimit(JS::StackForSystemCode); }
1128 // This is called from inside no-GC code. V8 runs the interrupt
1129 // inside the no-GC code and then "manually relocates unhandlified
1130 // references" afterwards. We just return false and let the caller
1131 // handle interrupts.
1132 Object HandleInterrupts() { return Object(JS::BooleanValue(false)); }
1134 JSContext* cx() const { return cx_; }
1136 void trace(JSTracer* trc);
1138 //********** Handle code **********//
1140 JS::Value* getHandleLocation(const JS::Value& value);
1142 private:
1143 mozilla::SegmentedVector<JS::Value, 256> handleArena_;
1144 mozilla::SegmentedVector<PseudoHandle<void>, 256> uniquePtrArena_;
1146 void* allocatePseudoHandle(size_t bytes);
1148 public:
1149 template <typename T>
1150 PseudoHandle<T> takeOwnership(void* ptr);
1151 template <typename T>
1152 PseudoHandle<T> maybeTakeOwnership(void* ptr);
1154 uint32_t liveHandles() const { return handleArena_.Length(); }
1155 uint32_t livePseudoHandles() const { return uniquePtrArena_.Length(); }
1157 private:
1158 void openHandleScope(HandleScope& scope) {
1159 scope.level_ = handleArena_.Length();
1160 scope.non_gc_level_ = uniquePtrArena_.Length();
1162 void closeHandleScope(size_t prevLevel, size_t prevUniqueLevel) {
1163 size_t currLevel = handleArena_.Length();
1164 handleArena_.PopLastN(currLevel - prevLevel);
1166 size_t currUniqueLevel = uniquePtrArena_.Length();
1167 uniquePtrArena_.PopLastN(currUniqueLevel - prevUniqueLevel);
1169 friend class HandleScope;
1171 JSContext* cx_;
1172 RegExpStack* regexpStack_{};
1173 Counters counters_{};
1174 #ifdef DEBUG
1175 public:
1176 uint32_t shouldSimulateInterrupt_ = 0;
1177 #endif
1180 // Origin:
1181 // https://github.com/v8/v8/blob/50dcf2af54ce27801a71c47c1be1d2c5e36b0dd6/src/execution/isolate.h#L1909-L1931
1182 class StackLimitCheck {
1183 public:
1184 StackLimitCheck(Isolate* isolate) : cx_(isolate->cx()) {}
1186 // Use this to check for stack-overflows in C++ code.
1187 bool HasOverflowed() {
1188 js::AutoCheckRecursionLimit recursion(cx_);
1189 bool overflowed = !recursion.checkDontReport(cx_);
1190 if (overflowed && js::SupportDifferentialTesting()) {
1191 // We don't report overrecursion here, but we throw an exception later
1192 // and this still affects differential testing. Mimic ReportOverRecursed
1193 // (the fuzzers check for this particular string).
1194 fprintf(stderr, "ReportOverRecursed called\n");
1196 return overflowed;
1199 // Use this to check for interrupt request in C++ code.
1200 bool InterruptRequested() {
1201 return cx_->hasPendingInterrupt(js::InterruptReason::CallbackUrgent);
1204 // Use this to check for stack-overflow when entering runtime from JS code.
1205 bool JsHasOverflowed() {
1206 js::AutoCheckRecursionLimit recursion(cx_);
1207 return !recursion.checkDontReport(cx_);
1210 private:
1211 JSContext* cx_;
1214 class ExternalReference {
1215 public:
1216 static const void* TopOfRegexpStack(Isolate* isolate);
1217 static size_t SizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf,
1218 RegExpStack* regexpStack);
1221 class Code : public HeapObject {
1222 public:
1223 uint8_t* raw_instruction_start() { return inner()->raw(); }
1225 static Code cast(Object object) {
1226 Code c;
1227 js::gc::Cell* jitCode = object.value().toGCThing();
1228 MOZ_ASSERT(jitCode->is<js::jit::JitCode>());
1229 c.setValue(JS::PrivateGCThingValue(jitCode));
1230 return c;
1232 js::jit::JitCode* inner() {
1233 return value().toGCThing()->as<js::jit::JitCode>();
1237 // Only used in function signature of functions we don't implement
1238 // (NativeRegExpMacroAssembler::CheckStackGuardState)
1239 class InstructionStream {};
1241 // Origin: https://github.com/v8/v8/blob/master/src/codegen/label.h
1242 class Label {
1243 public:
1244 Label() : inner_(js::jit::Label()) {}
1246 js::jit::Label* inner() { return &inner_; }
1248 void Unuse() { inner_.reset(); }
1250 bool is_linked() { return inner_.used(); }
1251 bool is_bound() { return inner_.bound(); }
1252 bool is_unused() { return !inner_.used() && !inner_.bound(); }
1254 int pos() { return inner_.offset(); }
1255 void link_to(int pos) { inner_.use(pos); }
1256 void bind_to(int pos) { inner_.bind(pos); }
1258 private:
1259 js::jit::Label inner_;
1260 js::jit::CodeOffset patchOffset_;
1262 friend class SMRegExpMacroAssembler;
1265 #define v8_flags js::jit::JitOptions
1267 #define V8_USE_COMPUTED_GOTO 1
1268 #define COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
1270 } // namespace internal
1271 } // namespace v8
1273 namespace V8 {
1275 inline void FatalProcessOutOfMemory(v8::internal::Isolate* isolate,
1276 const char* msg) {
1277 js::AutoEnterOOMUnsafeRegion oomUnsafe;
1278 oomUnsafe.crash(msg);
1281 } // namespace V8
1283 #endif // RegexpShim_h