1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // Copyright 2019 the V8 project authors. All rights reserved.
8 // Use of this source code is governed by a BSD-style license that can be
9 // found in the LICENSE file.
14 #include "mozilla/Assertions.h"
15 #include "mozilla/Attributes.h"
16 #include "mozilla/MathAlgorithms.h"
17 #include "mozilla/Maybe.h"
18 #include "mozilla/SegmentedVector.h"
19 #include "mozilla/Sprintf.h"
20 #include "mozilla/Types.h"
26 #include "irregexp/RegExpTypes.h"
27 #include "irregexp/util/FlagsShim.h"
28 #include "irregexp/util/VectorShim.h"
29 #include "irregexp/util/ZoneShim.h"
30 #include "jit/JitCode.h"
31 #include "jit/Label.h"
32 #include "jit/shared/Assembler-shared.h"
33 #include "js/friend/StackLimits.h" // js::AutoCheckRecursionLimit
34 #include "js/RegExpFlags.h"
36 #include "threading/ExclusiveData.h"
37 #include "util/DifferentialTesting.h"
38 #include "vm/JSContext.h"
39 #include "vm/MutexIDs.h"
40 #include "vm/NativeObject.h"
41 #include "vm/RegExpShared.h"
43 // Forward declaration of classes
49 class RegExpMatchInfo
;
55 } // namespace internal
58 #define V8_WARN_UNUSED_RESULT [[nodiscard]]
59 #define V8_EXPORT_PRIVATE
60 #define V8_FALLTHROUGH [[fallthrough]]
61 #define V8_NODISCARD [[nodiscard]]
62 #define V8_NOEXCEPT noexcept
64 #define FATAL(x) MOZ_CRASH(x)
65 #define UNREACHABLE() MOZ_CRASH("unreachable code")
66 #define UNIMPLEMENTED() MOZ_CRASH("unimplemented code")
67 #define STATIC_ASSERT(exp) static_assert(exp, #exp)
69 #define DCHECK MOZ_ASSERT
70 #define DCHECK_EQ(lhs, rhs) MOZ_ASSERT((lhs) == (rhs))
71 #define DCHECK_NE(lhs, rhs) MOZ_ASSERT((lhs) != (rhs))
72 #define DCHECK_GT(lhs, rhs) MOZ_ASSERT((lhs) > (rhs))
73 #define DCHECK_GE(lhs, rhs) MOZ_ASSERT((lhs) >= (rhs))
74 #define DCHECK_LT(lhs, rhs) MOZ_ASSERT((lhs) < (rhs))
75 #define DCHECK_LE(lhs, rhs) MOZ_ASSERT((lhs) <= (rhs))
76 #define DCHECK_NULL(val) MOZ_ASSERT((val) == nullptr)
77 #define DCHECK_NOT_NULL(val) MOZ_ASSERT((val) != nullptr)
78 #define DCHECK_IMPLIES(lhs, rhs) MOZ_ASSERT_IF(lhs, rhs)
79 #define CHECK MOZ_RELEASE_ASSERT
80 #define CHECK_EQ(lhs, rhs) MOZ_RELEASE_ASSERT((lhs) == (rhs))
81 #define CHECK_LE(lhs, rhs) MOZ_RELEASE_ASSERT((lhs) <= (rhs))
82 #define CHECK_GE(lhs, rhs) MOZ_RELEASE_ASSERT((lhs) >= (rhs))
83 #define CONSTEXPR_DCHECK MOZ_ASSERT
85 #define MemCopy memcpy
88 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L310-L319
89 // ptrdiff_t is 't' according to the standard, but MSVC uses 'I'.
91 # define V8PRIxPTRDIFF "Ix"
92 # define V8PRIdPTRDIFF "Id"
93 # define V8PRIuPTRDIFF "Iu"
95 # define V8PRIxPTRDIFF "tx"
96 # define V8PRIdPTRDIFF "td"
97 # define V8PRIuPTRDIFF "tu"
100 #define arraysize std::size
102 // Explicitly declare the assignment operator as deleted.
103 #define DISALLOW_ASSIGN(TypeName) TypeName& operator=(const TypeName&) = delete
105 // Explicitly declare the copy constructor and assignment operator as deleted.
106 // This also deletes the implicit move constructor and implicit move assignment
107 // operator, but still allows to manually define them.
108 #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
109 TypeName(const TypeName&) = delete; \
110 DISALLOW_ASSIGN(TypeName)
112 // Explicitly declare all implicit constructors as deleted, namely the
113 // default constructor, copy constructor and operator= functions.
114 // This is especially useful for classes containing only static methods.
115 #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
116 TypeName() = delete; \
117 DISALLOW_COPY_AND_ASSIGN(TypeName)
122 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L364-L367
123 template <typename T
, typename U
>
124 constexpr inline bool IsAligned(T value
, U alignment
) {
125 return (value
& (alignment
- 1)) == 0;
128 using Address
= uintptr_t;
129 static const Address kNullAddress
= 0;
131 inline uintptr_t GetCurrentStackPosition() {
132 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
137 // Latin1/UTF-16 constants
138 // Code-point values in Unicode 4.0 are 21 bits wide.
139 // Code units in UTF-16 are 16 bits wide.
140 using uc16
= char16_t
;
141 using uc32
= uint32_t;
143 constexpr int kUC16Size
= sizeof(base::uc16
);
146 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L247-L258
147 // The USE(x, ...) template is used to silence C++ compiler warnings
148 // issued for (yet) unused variables (typically parameters).
149 // The arguments are guaranteed to be evaluated from left to right.
151 template <typename T
>
152 Use(T
&&) {} // NOLINT(runtime/explicit)
156 ::v8::base::Use unused_tmp_array_for_use_macro[]{__VA_ARGS__}; \
157 (void)unused_tmp_array_for_use_macro; \
161 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/safe_conversions.h#L35-L39
162 // saturated_cast<> is analogous to static_cast<> for numeric types, except
163 // that the specified numeric conversion will saturate rather than overflow or
165 template <typename Dst
, typename Src
>
166 inline Dst
saturated_cast(Src value
);
168 // This is the only specialization that is needed for regexp code.
169 // Instead of pulling in dozens of lines of template goo
170 // to derive it, I used the implementation from uint8_clamped in
171 // ArrayBufferObject.h.
173 inline uint8_t saturated_cast
<uint8_t, int>(int x
) {
174 return (x
>= 0) ? ((x
< 255) ? uint8_t(x
) : 255) : 0;
178 // https://github.com/v8/v8/blob/fc088cdaccadede84886eee881e67af9db53669a/src/base/bounds.h#L14-L28
179 // Checks if value is in range [lower_limit, higher_limit] using a single
181 template <typename T
, typename U
>
182 inline constexpr bool IsInRange(T value
, U lower_limit
, U higher_limit
) {
183 using unsigned_T
= typename
std::make_unsigned
<T
>::type
;
184 // Use static_cast to support enum classes.
185 return static_cast<unsigned_T
>(static_cast<unsigned_T
>(value
) -
186 static_cast<unsigned_T
>(lower_limit
)) <=
187 static_cast<unsigned_T
>(static_cast<unsigned_T
>(higher_limit
) -
188 static_cast<unsigned_T
>(lower_limit
));
191 #define LAZY_INSTANCE_INITIALIZER \
194 template <typename T
>
195 class LazyInstanceImpl
{
197 LazyInstanceImpl() : value_(js::mutexid::IrregexpLazyStatic
) {}
200 auto val
= value_
.lock();
201 if (val
->isNothing()) {
208 js::ExclusiveData
<mozilla::Maybe
<T
>> value_
;
211 template <typename T
>
214 using type
= LazyInstanceImpl
<T
>;
218 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/utils.h#L40-L48
219 // Returns the value (0 .. 15) of a hexadecimal character c.
220 // If c is not a legal hexadecimal character, returns a value < 0.
221 // Used in regexp-parser.cc
222 inline int HexValue(base::uc32 c
) {
224 if (static_cast<unsigned>(c
) <= 9) return c
;
225 c
= (c
| 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
226 if (static_cast<unsigned>(c
) <= 5) return c
+ 10;
230 template <typename
... Args
>
231 [[nodiscard
]] uint32_t hash_combine(uint32_t aHash
, Args
... aArgs
) {
232 return mozilla::AddToHash(aHash
, aArgs
...);
235 template <typename T
>
237 mozilla::Maybe
<T
> inner_
;
240 Optional() = default;
241 Optional(T t
) { inner_
.emplace(t
); }
243 bool has_value() const { return inner_
.isSome(); }
244 const T
& value() const { return inner_
.ref(); }
249 inline uint64_t CountTrailingZeros(uint64_t value
) {
250 return mozilla::CountTrailingZeroes64(value
);
253 inline size_t RoundUpToPowerOfTwo32(size_t value
) {
254 return mozilla::RoundUpPow2(value
);
257 template <typename T
>
258 constexpr bool IsPowerOfTwo(T value
) {
259 return value
> 0 && (value
& (value
- 1)) == 0;
267 using uchar
= unsigned int;
270 // https://github.com/v8/v8/blob/1f1e4cdb04c75eab77adbecd5f5514ddc3eb56cf/src/strings/unicode.h#L133-L150
273 static const base::uc16 kMaxChar
= 0xff;
275 // Convert the character to Latin-1 case equivalent if possible.
276 static inline base::uc16
TryConvertToLatin1(base::uc16 c
) {
277 // "GREEK CAPITAL LETTER MU" case maps to "MICRO SIGN".
278 // "GREEK SMALL LETTER MU" case maps to "MICRO SIGN".
279 if (c
== 0x039C || c
== 0x03BC) {
282 // "LATIN CAPITAL LETTER Y WITH DIAERESIS" case maps to "LATIN SMALL LETTER
283 // Y WITH DIAERESIS".
292 // https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L99-L131
295 static inline bool IsLeadSurrogate(int code
) {
296 return js::unicode::IsLeadSurrogate(code
);
298 static inline bool IsTrailSurrogate(int code
) {
299 return js::unicode::IsTrailSurrogate(code
);
301 static inline base::uc16
LeadSurrogate(uint32_t char_code
) {
302 return js::unicode::LeadSurrogate(char_code
);
304 static inline base::uc16
TrailSurrogate(uint32_t char_code
) {
305 return js::unicode::TrailSurrogate(char_code
);
307 static inline uint32_t CombineSurrogatePair(char16_t lead
, char16_t trail
) {
308 return js::unicode::UTF16Decode(lead
, trail
);
310 static const uchar kMaxNonSurrogateCharCode
= 0xffff;
313 #ifndef V8_INTL_SUPPORT
315 // A cache used in case conversion. It caches the value for characters
316 // that either have no mapping or map to a single character independent
317 // of context. Characters that map to more than one character or that
318 // map differently depending on context are always looked up.
320 // https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L64-L88
321 template <class T
, int size
= 256>
324 inline Mapping() = default;
325 inline int get(uchar c
, uchar n
, uchar
* result
) {
326 CacheEntry entry
= entries_
[c
& kMask
];
327 if (entry
.code_point_
== c
) {
328 if (entry
.offset_
== 0) {
331 result
[0] = c
+ entry
.offset_
;
335 return CalculateValue(c
, n
, result
);
340 int CalculateValue(uchar c
, uchar n
, uchar
* result
) {
341 bool allow_caching
= true;
342 int length
= T::Convert(c
, n
, result
, &allow_caching
);
345 entries_
[c
& kMask
] = CacheEntry(c
, result
[0] - c
);
348 entries_
[c
& kMask
] = CacheEntry(c
, 0);
357 inline CacheEntry() : code_point_(kNoChar
), offset_(0) {}
358 inline CacheEntry(uchar code_point
, signed offset
)
359 : code_point_(code_point
), offset_(offset
) {}
362 static const int kNoChar
= (1 << 21) - 1;
364 static const int kSize
= size
;
365 static const int kMask
= kSize
- 1;
366 CacheEntry entries_
[kSize
];
370 // https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L241-L252
371 struct Ecma262Canonicalize
{
372 static const int kMaxWidth
= 1;
373 static int Convert(uchar c
, uchar n
, uchar
* result
, bool* allow_caching_ptr
);
375 struct Ecma262UnCanonicalize
{
376 static const int kMaxWidth
= 4;
377 static int Convert(uchar c
, uchar n
, uchar
* result
, bool* allow_caching_ptr
);
379 struct CanonicalizationRange
{
380 static const int kMaxWidth
= 1;
381 static int Convert(uchar c
, uchar n
, uchar
* result
, bool* allow_caching_ptr
);
384 #endif // !V8_INTL_SUPPORT
387 static bool Is(uchar c
);
390 } // namespace unibrow
394 #define PRINTF_FORMAT(x, y) MOZ_FORMAT_PRINTF(x, y)
395 void PRINTF_FORMAT(1, 2) PrintF(const char* format
, ...);
396 void PRINTF_FORMAT(2, 3) PrintF(FILE* out
, const char* format
, ...);
398 // Superclass for classes only using static method functions.
399 // The subclass of AllStatic cannot be instantiated at all.
403 AllStatic() = delete;
407 // Superclass for classes managed with new and delete.
408 // In irregexp, this is only AlternativeGeneration (in regexp-compiler.cc)
410 // https://github.com/v8/v8/blob/7b3332844212d78ee87a9426f3a6f7f781a8fbfa/src/utils/allocation.cc#L88-L96
413 static void* operator new(size_t size
) {
414 js::AutoEnterOOMUnsafeRegion oomUnsafe
;
415 void* result
= js_malloc(size
);
417 oomUnsafe
.crash("Irregexp Malloced shim");
421 static void operator delete(void* p
) { js_free(p
); }
424 constexpr int32_t KB
= 1024;
425 constexpr int32_t MB
= 1024 * 1024;
427 #define kMaxInt JSVAL_INT_MAX
428 #define kMinInt JSVAL_INT_MIN
429 constexpr int kSystemPointerSize
= sizeof(void*);
431 // The largest integer n such that n and n + 1 are both exactly
432 // representable as a Number value. ES6 section 20.1.2.6
433 constexpr double kMaxSafeInteger
= 9007199254740991.0; // 2^53-1
435 constexpr int kBitsPerByte
= 8;
436 constexpr int kBitsPerByteLog2
= 3;
437 constexpr int kUInt16Size
= sizeof(uint16_t);
438 constexpr int kUInt32Size
= sizeof(uint32_t);
439 constexpr int kInt64Size
= sizeof(int64_t);
441 constexpr int kMaxUInt16
= (1 << 16) - 1;
443 inline constexpr bool IsDecimalDigit(base::uc32 c
) {
444 return c
>= '0' && c
<= '9';
447 inline constexpr int AsciiAlphaToLower(base::uc32 c
) { return c
| 0x20; }
449 inline bool is_uint24(int64_t val
) { return (val
>> 24) == 0; }
450 inline bool is_int24(int64_t val
) {
451 int64_t limit
= int64_t(1) << 23;
452 return (-limit
<= val
) && (val
< limit
);
455 inline bool IsIdentifierStart(base::uc32 c
) {
456 return js::unicode::IsIdentifierStart(char32_t(c
));
458 inline bool IsIdentifierPart(base::uc32 c
) {
459 return js::unicode::IsIdentifierPart(char32_t(c
));
462 // Wrappers to disambiguate char16_t and uc16.
464 explicit AsUC16(char16_t v
) : value(v
) {}
469 explicit AsUC32(int32_t v
) : value(v
) {}
473 std::ostream
& operator<<(std::ostream
& os
, const AsUC16
& c
);
474 std::ostream
& operator<<(std::ostream
& os
, const AsUC32
& c
);
476 // This class is used for the output of trace-regexp-parser. V8 has
477 // an elaborate implementation to ensure that the output gets to the
478 // right place, even on Android. We just need something that will
479 // print output (ideally to stderr, to match the rest of our tracing
480 // code). This is an empty wrapper that will convert itself to
481 // std::cerr when used.
484 operator std::ostream
&() const;
485 template <typename T
>
486 std::ostream
& operator<<(T t
);
489 // Reuse existing Maybe implementation
490 using mozilla::Maybe
;
492 template <typename T
>
493 Maybe
<T
> Just(const T
& value
) {
494 return mozilla::Some(value
);
497 template <typename T
>
498 mozilla::Nothing
Nothing() {
499 return mozilla::Nothing();
502 template <typename T
>
503 using PseudoHandle
= mozilla::UniquePtr
<T
, JS::FreePolicy
>;
505 // Compare 8bit/16bit chars to 8bit/16bit chars.
506 // Used indirectly by regexp-interpreter.cc
507 // Taken from: https://github.com/v8/v8/blob/master/src/utils/utils.h
508 template <typename lchar
, typename rchar
>
509 inline int CompareCharsUnsigned(const lchar
* lhs
, const rchar
* rhs
,
511 const lchar
* limit
= lhs
+ chars
;
512 if (sizeof(*lhs
) == sizeof(char) && sizeof(*rhs
) == sizeof(char)) {
513 // memcmp compares byte-by-byte, yielding wrong results for two-byte
514 // strings on little-endian systems.
515 return memcmp(lhs
, rhs
, chars
);
517 while (lhs
< limit
) {
518 int r
= static_cast<int>(*lhs
) - static_cast<int>(*rhs
);
519 if (r
!= 0) return r
;
525 template <typename lchar
, typename rchar
>
526 inline int CompareChars(const lchar
* lhs
, const rchar
* rhs
, size_t chars
) {
527 DCHECK_LE(sizeof(lchar
), 2);
528 DCHECK_LE(sizeof(rchar
), 2);
529 if (sizeof(lchar
) == 1) {
530 if (sizeof(rchar
) == 1) {
531 return CompareCharsUnsigned(reinterpret_cast<const uint8_t*>(lhs
),
532 reinterpret_cast<const uint8_t*>(rhs
), chars
);
534 return CompareCharsUnsigned(reinterpret_cast<const uint8_t*>(lhs
),
535 reinterpret_cast<const char16_t
*>(rhs
),
539 if (sizeof(rchar
) == 1) {
540 return CompareCharsUnsigned(reinterpret_cast<const char16_t
*>(lhs
),
541 reinterpret_cast<const uint8_t*>(rhs
), chars
);
543 return CompareCharsUnsigned(reinterpret_cast<const char16_t
*>(lhs
),
544 reinterpret_cast<const char16_t
*>(rhs
),
550 // Compare 8bit/16bit chars to 8bit/16bit chars.
551 template <typename lchar
, typename rchar
>
552 inline bool CompareCharsEqualUnsigned(const lchar
* lhs
, const rchar
* rhs
,
554 STATIC_ASSERT(std::is_unsigned
<lchar
>::value
);
555 STATIC_ASSERT(std::is_unsigned
<rchar
>::value
);
556 if (sizeof(*lhs
) == sizeof(*rhs
)) {
557 // memcmp compares byte-by-byte, but for equality it doesn't matter whether
558 // two-byte char comparison is little- or big-endian.
559 return memcmp(lhs
, rhs
, chars
* sizeof(*lhs
)) == 0;
561 for (const lchar
* limit
= lhs
+ chars
; lhs
< limit
; ++lhs
, ++rhs
) {
562 if (*lhs
!= *rhs
) return false;
567 template <typename lchar
, typename rchar
>
568 inline bool CompareCharsEqual(const lchar
* lhs
, const rchar
* rhs
,
570 using ulchar
= typename
std::make_unsigned
<lchar
>::type
;
571 using urchar
= typename
std::make_unsigned
<rchar
>::type
;
572 return CompareCharsEqualUnsigned(reinterpret_cast<const ulchar
*>(lhs
),
573 reinterpret_cast<const urchar
*>(rhs
), chars
);
576 // V8::Object ~= JS::Value
579 // The default object constructor in V8 stores a nullptr,
580 // which has its low bit clear and is interpreted as Smi(0).
581 constexpr Object() : asBits_(JS::Int32Value(0).asRawBits()) {}
583 Object(const JS::Value
& value
) : asBits_(value
.asRawBits()) {}
585 // This constructor is only used in an unused implementation of
586 // IsCharacterInRangeArray in regexp-macro-assembler.cc.
587 Object(uintptr_t raw
) : asBits_(raw
) { MOZ_CRASH("unused"); }
589 JS::Value
value() const { return JS::Value::fromRawBits(asBits_
); }
591 inline static Object
cast(Object object
) { return object
; }
594 void setValue(const JS::Value
& val
) { asBits_
= val
.asRawBits(); }
598 // Used in regexp-interpreter.cc to check the return value of
599 // isolate->stack_guard()->HandleInterrupts(). We want to handle
600 // interrupts in the caller, so we return a magic value from
601 // HandleInterrupts and check for it here.
602 inline bool IsException(Object obj
, Isolate
*) {
603 return obj
.value().isMagic(JS_INTERRUPT_REGEXP
);
606 class Smi
: public Object
{
608 static Smi
FromInt(int32_t value
) {
610 smi
.setValue(JS::Int32Value(value
));
613 static inline int32_t ToInt(const Object object
) {
614 return object
.value().toInt32();
618 // V8::HeapObject ~= GC thing
619 class HeapObject
: public Object
{
621 inline static HeapObject
cast(Object object
) {
623 h
.setValue(object
.value());
628 // V8's values use low-bit tagging. If the LSB is 0, it's a small
629 // integer. If the LSB is 1, it's a pointer to some GC thing. In V8,
630 // this wrapper class is used to represent a pointer that has the low
631 // bit set, or a small integer that has been shifted left by one
632 // bit. We don't use the same tagging system, so all we need is a
633 // transparent wrapper that automatically converts to/from the wrapped
635 template <typename T
>
639 MOZ_IMPLICIT
Tagged(const T
& value
) : value_(value
) {}
640 MOZ_IMPLICIT
Tagged(T
&& value
) : value_(std::move(value
)) {}
642 T
* operator->() { return &value_
; }
643 constexpr operator T() const { return value_
; }
649 // A fixed-size array with Objects (aka Values) as element types.
650 // Implemented using the dense elements of an ArrayObject.
651 // Used for named captures.
652 class FixedArray
: public HeapObject
{
654 inline void set(uint32_t index
, Object value
) {
655 inner()->setDenseElement(index
, value
.value());
657 inline static FixedArray
cast(Object object
) {
659 f
.setValue(object
.value());
662 js::NativeObject
* inner() {
663 return &value().toObject().as
<js::NativeObject
>();
668 * Conceptually, ByteArrayData is a variable-size structure. To
669 * implement this in a C++-approved way, we allocate a struct
670 * containing the 32-bit length field, followed by additional memory
671 * for the data. To access the data, we get a pointer to the next byte
672 * after the length field and cast it to the correct type.
674 inline uint8_t* ByteArrayData::data() {
675 static_assert(alignof(uint8_t) <= alignof(ByteArrayData
),
676 "The trailing data must be aligned to start immediately "
677 "after the header with no padding.");
678 ByteArrayData
* immediatelyAfter
= this + 1;
679 return reinterpret_cast<uint8_t*>(immediatelyAfter
);
682 template <typename T
>
683 T
* ByteArrayData::typedData() {
684 static_assert(alignof(T
) <= alignof(ByteArrayData
));
685 MOZ_ASSERT(uintptr_t(data()) % alignof(T
) == 0);
686 return reinterpret_cast<T
*>(data());
689 template <typename T
>
690 T
ByteArrayData::getTyped(uint32_t index
) {
691 MOZ_ASSERT(index
< length() / sizeof(T
));
692 return typedData
<T
>()[index
];
695 template <typename T
>
696 void ByteArrayData::setTyped(uint32_t index
, T value
) {
697 MOZ_ASSERT(index
< length() / sizeof(T
));
698 typedData
<T
>()[index
] = value
;
701 // A fixed-size array of bytes.
702 class ByteArray
: public HeapObject
{
704 ByteArrayData
* inner() const {
705 return static_cast<ByteArrayData
*>(value().toPrivate());
707 friend bool IsByteArray(Object obj
);
710 PseudoHandle
<ByteArrayData
> takeOwnership(Isolate
* isolate
);
711 PseudoHandle
<ByteArrayData
> maybeTakeOwnership(Isolate
* isolate
);
713 uint8_t get(uint32_t index
) { return inner()->get(index
); }
714 void set(uint32_t index
, uint8_t val
) { inner()->set(index
, val
); }
716 uint32_t length() const { return inner()->length(); }
717 uint8_t* begin() { return inner()->data(); }
719 static ByteArray
cast(Object object
) {
721 b
.setValue(object
.value());
725 friend class SMRegExpMacroAssembler
;
728 // This is only used in assertions. In debug builds, we put a magic value
729 // in the header of each ByteArrayData, and assert here that it matches.
730 inline bool IsByteArray(Object obj
) {
731 MOZ_ASSERT(ByteArray::cast(obj
).inner()->magic() ==
732 ByteArrayData::ExpectedMagic
);
736 // This is a convenience class used in V8 for treating a ByteArray as an array
737 // of fixed-size integers. This version supports integral types up to 32 bits.
738 template <typename T
>
739 class FixedIntegerArray
: public ByteArray
{
740 static_assert(alignof(T
) <= alignof(ByteArrayData
));
741 static_assert(std::is_integral
<T
>::value
);
744 static Handle
<FixedIntegerArray
<T
>> New(Isolate
* isolate
, uint32_t length
);
746 T
get(uint32_t index
) { return inner()->template getTyped
<T
>(index
); };
747 void set(uint32_t index
, T value
) {
748 inner()->template setTyped
<T
>(index
, value
);
751 static FixedIntegerArray
<T
> cast(Object object
) {
752 FixedIntegerArray
<T
> f
;
753 f
.setValue(object
.value());
758 using FixedUInt16Array
= FixedIntegerArray
<uint16_t>;
760 // Like Handles in SM, V8 handles are references to marked pointers.
761 // Unlike SM, where Rooted pointers are created individually on the
762 // stack, the target of a V8 handle lives in an arena on the isolate
763 // (~= JSContext). Whenever a Handle is created, a new "root" is
764 // created at the end of the arena.
766 // HandleScopes are used to manage the lifetimes of these handles. A
767 // HandleScope lives on the stack and stores the size of the arena at
768 // the time of its creation. When the function returns and the
769 // HandleScope is destroyed, the arena is truncated to its previous
770 // size, clearing all roots that were created since the creation of
773 // In some cases, objects that are GC-allocated in V8 are not in SM.
774 // In particular, irregexp allocates ByteArrays during code generation
775 // to store lookup tables. This does not play nicely with the SM
776 // macroassembler's requirement that no GC allocations take place
777 // while it is on the stack. To work around this, this shim layer also
778 // provides the ability to create pseudo-handles, which are not
779 // managed by the GC but provide the same API to irregexp. The "root"
780 // of a pseudohandle is a unique pointer living in a second arena. If
781 // the allocated object should outlive the HandleScope, it must be
782 // manually moved out of the arena using maybeTakeOwnership.
783 // (If maybeTakeOwnership is called multiple times, it will return
784 // a null pointer on subsequent calls.)
786 class MOZ_STACK_CLASS HandleScope
{
788 HandleScope(Isolate
* isolate
);
793 size_t non_gc_level_
= 0;
796 friend class Isolate
;
800 // https://github.com/v8/v8/blob/5792f3587116503fc047d2f68c951c72dced08a5/src/handles/handles.h#L88-L171
801 template <typename T
>
802 class MOZ_NONHEAP_CLASS Handle
{
804 Handle() : location_(nullptr) {}
805 Handle(T object
, Isolate
* isolate
);
806 Handle(const JS::Value
& value
, Isolate
* isolate
);
808 // Constructor for handling automatic up casting.
809 template <typename S
,
810 typename
= std::enable_if_t
<std::is_convertible_v
<S
*, T
*>>>
811 inline Handle(Handle
<S
> handle
) : location_(handle
.location_
) {}
813 inline bool is_null() const { return location_
== nullptr; }
815 inline T
operator*() const { return T::cast(Object(*location_
)); };
817 // {ObjectRef} is returned by {Handle::operator->}. It should never be stored
818 // anywhere or used in any other code; no one should ever have to spell out
819 // {ObjectRef} in code. Its only purpose is to be dereferenced immediately by
820 // "operator-> chaining". Returning the address of the field is valid because
821 // this object's lifetime only ends at the end of the full statement.
823 // https://github.com/v8/v8/blob/03aaa4b3bf4cb01eee1f223b252e6869b04ab08c/src/handles/handles.h#L91-L105
824 class MOZ_TEMPORARY_CLASS ObjectRef
{
826 T
* operator->() { return &object_
; }
830 explicit ObjectRef(T object
) : object_(object
) {}
834 inline ObjectRef
operator->() const { return ObjectRef
{**this}; }
836 static Handle
<T
> fromHandleValue(JS::HandleValue handle
) {
837 return Handle(handle
.address());
841 Handle(const JS::Value
* location
) : location_(location
) {}
846 friend class MaybeHandle
;
848 const JS::Value
* location_
;
851 // A Handle can be converted into a MaybeHandle. Converting a MaybeHandle
852 // into a Handle requires checking that it does not point to nullptr. This
853 // ensures nullptr checks before use.
855 // Also note that Handles do not provide default equality comparison or hashing
856 // operators on purpose. Such operators would be misleading, because intended
857 // semantics is ambiguous between Handle location and object identity.
859 // https://github.com/v8/v8/blob/5792f3587116503fc047d2f68c951c72dced08a5/src/handles/maybe-handles.h#L15-L78
860 template <typename T
>
861 class MOZ_NONHEAP_CLASS MaybeHandle final
{
863 MaybeHandle() : location_(nullptr) {}
865 // Constructor for handling automatic up casting from Handle.
866 // Ex. Handle<JSArray> can be passed when MaybeHandle<Object> is expected.
867 template <typename S
,
868 typename
= std::enable_if_t
<std::is_convertible_v
<S
*, T
*>>>
869 MaybeHandle(Handle
<S
> handle
) : location_(handle
.location_
) {}
871 inline Handle
<T
> ToHandleChecked() const {
872 MOZ_RELEASE_ASSERT(location_
);
873 return Handle
<T
>(location_
);
876 // Convert to a Handle with a type that can be upcasted to.
877 template <typename S
>
878 inline bool ToHandle(Handle
<S
>* out
) const {
880 *out
= Handle
<T
>(location_
);
889 JS::Value
* location_
;
892 // From v8/src/handles/handles-inl.h
894 template <typename T
>
895 inline Handle
<T
> handle(T object
, Isolate
* isolate
) {
896 return Handle
<T
>(object
, isolate
);
899 // RAII Guard classes
901 using DisallowGarbageCollection
= JS::AutoAssertNoGC
;
903 // V8 uses this inside DisallowGarbageCollection regions to turn
904 // allocation back on before throwing a stack overflow exception or
905 // handling interrupts. AutoSuppressGC is sufficient for the former
906 // case, but not for the latter: handling interrupts can execute
907 // arbitrary script code, and V8 jumps through some scary hoops to
908 // "manually relocate unhandlified references" afterwards. To keep
909 // things sane, we don't try to handle interrupts while regex code is
910 // still on the stack. Instead, we return EXCEPTION and handle
911 // interrupts in the caller. (See RegExpShared::execute.)
913 class AllowGarbageCollection
{
915 AllowGarbageCollection() {}
919 // https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L83-L474
920 class String
: public HeapObject
{
922 JSString
* str() const { return value().toString(); }
926 String(JSString
* str
) { setValue(JS::StringValue(str
)); }
928 operator JSString
*() const { return str(); }
931 static const int32_t kMaxOneByteCharCode
= unibrow::Latin1::kMaxChar
;
932 static const uint32_t kMaxOneByteCharCodeU
= unibrow::Latin1::kMaxChar
;
933 static const int kMaxUtf16CodeUnit
= 0xffff;
934 static const uint32_t kMaxUtf16CodeUnitU
= kMaxUtf16CodeUnit
;
935 static const base::uc32 kMaxCodePoint
= 0x10ffff;
937 MOZ_ALWAYS_INLINE
int length() const { return str()->length(); }
938 bool IsFlat() { return str()->isLinear(); };
941 // https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L95-L152
944 FlatContent(JSLinearString
* string
, const DisallowGarbageCollection
& no_gc
)
945 : string_(string
), no_gc_(no_gc
) {}
946 inline bool IsOneByte() const { return string_
->hasLatin1Chars(); }
947 inline bool IsTwoByte() const { return !string_
->hasLatin1Chars(); }
949 base::Vector
<const uint8_t> ToOneByteVector() const {
950 MOZ_ASSERT(IsOneByte());
951 return base::Vector
<const uint8_t>(string_
->latin1Chars(no_gc_
),
954 base::Vector
<const base::uc16
> ToUC16Vector() const {
955 MOZ_ASSERT(IsTwoByte());
956 return base::Vector
<const base::uc16
>(string_
->twoByteChars(no_gc_
),
959 void UnsafeDisableChecksumVerification() {
960 // Intentional no-op. See the comment for AllowGarbageCollection above.
964 const JSLinearString
* string_
;
965 const JS::AutoAssertNoGC
& no_gc_
;
967 FlatContent
GetFlatContent(const DisallowGarbageCollection
& no_gc
) {
968 MOZ_ASSERT(IsFlat());
969 return FlatContent(&str()->asLinear(), no_gc
);
972 static Handle
<String
> Flatten(Isolate
* isolate
, Handle
<String
> string
);
974 inline static String
cast(Object object
) {
976 MOZ_ASSERT(object
.value().isString());
977 s
.setValue(object
.value());
981 inline static bool IsOneByteRepresentationUnderneath(String string
) {
982 return string
.str()->hasLatin1Chars();
984 inline bool IsOneByteRepresentation() const {
985 return str()->hasLatin1Chars();
988 std::unique_ptr
<char[]> ToCString();
990 template <typename Char
>
991 base::Vector
<const Char
> GetCharVector(
992 const DisallowGarbageCollection
& no_gc
);
996 inline base::Vector
<const uint8_t> String::GetCharVector(
997 const DisallowGarbageCollection
& no_gc
) {
998 String::FlatContent flat
= GetFlatContent(no_gc
);
999 MOZ_ASSERT(flat
.IsOneByte());
1000 return flat
.ToOneByteVector();
1004 inline base::Vector
<const base::uc16
> String::GetCharVector(
1005 const DisallowGarbageCollection
& no_gc
) {
1006 String::FlatContent flat
= GetFlatContent(no_gc
);
1007 MOZ_ASSERT(flat
.IsTwoByte());
1008 return flat
.ToUC16Vector();
1011 class JSRegExp
: public HeapObject
{
1013 JSRegExp() : HeapObject() {}
1014 JSRegExp(js::RegExpShared
* re
) { setValue(JS::PrivateGCThingValue(re
)); }
1016 // ******************************************************
1017 // Methods that are called from inside the implementation
1018 // ******************************************************
1019 void TierUpTick() { inner()->tierUpTick(); }
1021 Object
bytecode(bool is_latin1
) const {
1022 return Object(JS::PrivateValue(inner()->getByteCode(is_latin1
)));
1025 // TODO: should we expose this?
1026 uint32_t backtrack_limit() const { return 0; }
1028 static JSRegExp
cast(Object object
) {
1030 js::gc::Cell
* regexpShared
= object
.value().toGCThing();
1031 MOZ_ASSERT(regexpShared
->is
<js::RegExpShared
>());
1032 regexp
.setValue(JS::PrivateGCThingValue(regexpShared
));
1036 // Each capture (including the match itself) needs two registers.
1037 static constexpr int RegistersForCaptureCount(int count
) {
1038 return (count
+ 1) * 2;
1041 inline uint32_t max_register_count() const {
1042 return inner()->getMaxRegisters();
1045 // ******************************
1047 // ******************************
1049 static constexpr int kMaxCaptures
= (1 << 15) - 1;
1051 static constexpr int kNoBacktrackLimit
= 0;
1054 js::RegExpShared
* inner() const {
1055 return value().toGCThing()->as
<js::RegExpShared
>();
1059 using RegExpFlags
= JS::RegExpFlags
;
1060 using RegExpFlag
= JS::RegExpFlags::Flag
;
1062 inline bool IsUnicode(RegExpFlags flags
) { return flags
.unicode(); }
1063 inline bool IsGlobal(RegExpFlags flags
) { return flags
.global(); }
1064 inline bool IsIgnoreCase(RegExpFlags flags
) { return flags
.ignoreCase(); }
1065 inline bool IsMultiline(RegExpFlags flags
) { return flags
.multiline(); }
1066 inline bool IsDotAll(RegExpFlags flags
) { return flags
.dotAll(); }
1067 inline bool IsSticky(RegExpFlags flags
) { return flags
.sticky(); }
1068 inline bool IsUnicodeSets(RegExpFlags flags
) { return flags
.unicodeSets(); }
1069 inline bool IsEitherUnicode(RegExpFlags flags
) {
1070 return flags
.unicode() || flags
.unicodeSets();
1073 inline base::Optional
<RegExpFlag
> TryRegExpFlagFromChar(char c
) {
1076 // The parser only calls this after verifying that it's a supported flag.
1077 MOZ_ALWAYS_TRUE(JS::MaybeParseRegExpFlag(c
, &flag
));
1079 return base::Optional(flag
);
1082 inline bool operator==(const RegExpFlags
& lhs
, const int& rhs
) {
1083 return lhs
.value() == rhs
;
1085 inline bool operator!=(const RegExpFlags
& lhs
, const int& rhs
) {
1086 return !(lhs
== rhs
);
1091 inline void AddSample(int sample
) {}
1096 Histogram
* regexp_backtracks() { return ®exp_backtracks_
; }
1099 Histogram regexp_backtracks_
;
1102 enum class AllocationType
: uint8_t {
1103 kYoung
, // Allocate in the nursery
1104 kOld
, // Allocate in the tenured heap
1107 using StackGuard
= Isolate
;
1108 using Factory
= Isolate
;
1112 Isolate(JSContext
* cx
) : cx_(cx
) {}
1116 size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf
) const;
1118 //********** Isolate code **********//
1119 RegExpStack
* regexp_stack() const { return regexpStack_
; }
1121 // This is called from inside no-GC code. Instead of suppressing GC
1122 // to allocate the error, we return false from Execute and call
1123 // ReportOverRecursed in the caller.
1124 void StackOverflow() {}
1126 #ifndef V8_INTL_SUPPORT
1127 unibrow::Mapping
<unibrow::Ecma262UnCanonicalize
>* jsregexp_uncanonicalize() {
1128 return &jsregexp_uncanonicalize_
;
1130 unibrow::Mapping
<unibrow::Ecma262Canonicalize
>*
1131 regexp_macro_assembler_canonicalize() {
1132 return ®exp_macro_assembler_canonicalize_
;
1134 unibrow::Mapping
<unibrow::CanonicalizationRange
>* jsregexp_canonrange() {
1135 return &jsregexp_canonrange_
;
1139 unibrow::Mapping
<unibrow::Ecma262UnCanonicalize
> jsregexp_uncanonicalize_
;
1140 unibrow::Mapping
<unibrow::Ecma262Canonicalize
>
1141 regexp_macro_assembler_canonicalize_
;
1142 unibrow::Mapping
<unibrow::CanonicalizationRange
> jsregexp_canonrange_
;
1143 #endif // !V8_INTL_SUPPORT
1146 // An empty stub for telemetry we don't support
1147 void IncreaseTotalRegexpCodeGenerated(Handle
<HeapObject
> code
) {}
1149 Counters
* counters() { return &counters_
; }
1151 //********** Factory code **********//
1152 inline Factory
* factory() { return this; }
1154 Handle
<ByteArray
> NewByteArray(
1155 int length
, AllocationType allocation
= AllocationType::kYoung
);
1157 // Allocates a fixed array initialized with undefined values.
1158 Handle
<FixedArray
> NewFixedArray(int length
);
1160 template <typename T
>
1161 Handle
<FixedIntegerArray
<T
>> NewFixedIntegerArray(uint32_t length
);
1163 template <typename Char
>
1164 Handle
<String
> InternalizeString(const base::Vector
<const Char
>& str
);
1166 //********** Stack guard code **********//
1167 inline StackGuard
* stack_guard() { return this; }
1169 uintptr_t real_climit() { return cx_
->stackLimit(JS::StackForSystemCode
); }
1171 // This is called from inside no-GC code. V8 runs the interrupt
1172 // inside the no-GC code and then "manually relocates unhandlified
1173 // references" afterwards. We just return a magic value and let the
1174 // caller handle interrupts.
1175 Object
HandleInterrupts() {
1176 return Object(JS::MagicValue(JS_INTERRUPT_REGEXP
));
1179 JSContext
* cx() const { return cx_
; }
1181 void trace(JSTracer
* trc
);
1183 //********** Handle code **********//
1185 JS::Value
* getHandleLocation(const JS::Value
& value
);
1188 mozilla::SegmentedVector
<JS::Value
, 256> handleArena_
;
1189 mozilla::SegmentedVector
<PseudoHandle
<void>, 256> uniquePtrArena_
;
1191 void* allocatePseudoHandle(size_t bytes
);
1194 template <typename T
>
1195 PseudoHandle
<T
> takeOwnership(void* ptr
);
1196 template <typename T
>
1197 PseudoHandle
<T
> maybeTakeOwnership(void* ptr
);
1199 uint32_t liveHandles() const { return handleArena_
.Length(); }
1200 uint32_t livePseudoHandles() const { return uniquePtrArena_
.Length(); }
1203 void openHandleScope(HandleScope
& scope
) {
1204 scope
.level_
= handleArena_
.Length();
1205 scope
.non_gc_level_
= uniquePtrArena_
.Length();
1207 void closeHandleScope(size_t prevLevel
, size_t prevUniqueLevel
) {
1208 size_t currLevel
= handleArena_
.Length();
1209 handleArena_
.PopLastN(currLevel
- prevLevel
);
1211 size_t currUniqueLevel
= uniquePtrArena_
.Length();
1212 uniquePtrArena_
.PopLastN(currUniqueLevel
- prevUniqueLevel
);
1214 friend class HandleScope
;
1217 RegExpStack
* regexpStack_
{};
1218 Counters counters_
{};
1221 uint32_t shouldSimulateInterrupt_
= 0;
1226 // https://github.com/v8/v8/blob/50dcf2af54ce27801a71c47c1be1d2c5e36b0dd6/src/execution/isolate.h#L1909-L1931
1227 class StackLimitCheck
{
1229 StackLimitCheck(Isolate
* isolate
) : cx_(isolate
->cx()) {}
1231 // Use this to check for stack-overflows in C++ code.
1232 bool HasOverflowed() {
1233 js::AutoCheckRecursionLimit
recursion(cx_
);
1234 bool overflowed
= !recursion
.checkDontReport(cx_
);
1235 if (overflowed
&& js::SupportDifferentialTesting()) {
1236 // We don't report overrecursion here, but we throw an exception later
1237 // and this still affects differential testing. Mimic ReportOverRecursed
1238 // (the fuzzers check for this particular string).
1239 fprintf(stderr
, "ReportOverRecursed called\n");
1244 // Use this to check for interrupt request in C++ code.
1245 bool InterruptRequested() {
1246 return cx_
->hasPendingInterrupt(js::InterruptReason::CallbackUrgent
);
1249 // Use this to check for stack-overflow when entering runtime from JS code.
1250 bool JsHasOverflowed() {
1251 js::AutoCheckRecursionLimit
recursion(cx_
);
1252 return !recursion
.checkDontReport(cx_
);
1259 class ExternalReference
{
1261 static const void* TopOfRegexpStack(Isolate
* isolate
);
1262 static size_t SizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf
,
1263 RegExpStack
* regexpStack
);
1266 class Code
: public HeapObject
{
1268 uint8_t* raw_instruction_start() { return inner()->raw(); }
1270 static Code
cast(Object object
) {
1272 js::gc::Cell
* jitCode
= object
.value().toGCThing();
1273 MOZ_ASSERT(jitCode
->is
<js::jit::JitCode
>());
1274 c
.setValue(JS::PrivateGCThingValue(jitCode
));
1277 js::jit::JitCode
* inner() {
1278 return value().toGCThing()->as
<js::jit::JitCode
>();
1282 // Only used in function signature of functions we don't implement
1283 // (NativeRegExpMacroAssembler::CheckStackGuardState)
1284 class InstructionStream
{};
1286 // Origin: https://github.com/v8/v8/blob/master/src/codegen/label.h
1289 Label() : inner_(js::jit::Label()) {}
1291 js::jit::Label
* inner() { return &inner_
; }
1293 void Unuse() { inner_
.reset(); }
1295 bool is_linked() { return inner_
.used(); }
1296 bool is_bound() { return inner_
.bound(); }
1297 bool is_unused() { return !inner_
.used() && !inner_
.bound(); }
1299 int pos() { return inner_
.offset(); }
1300 void link_to(int pos
) { inner_
.use(pos
); }
1301 void bind_to(int pos
) { inner_
.bind(pos
); }
1304 js::jit::Label inner_
;
1305 js::jit::CodeOffset patchOffset_
;
1307 friend class SMRegExpMacroAssembler
;
1310 #define v8_flags js::jit::JitOptions
1312 #define V8_USE_COMPUTED_GOTO 1
1313 #define COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
1315 } // namespace internal
1320 inline void FatalProcessOutOfMemory(v8::internal::Isolate
* isolate
,
1322 js::AutoEnterOOMUnsafeRegion oomUnsafe
;
1323 oomUnsafe
.crash(msg
);
1328 #endif // RegexpShim_h