1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // Copyright 2019 the V8 project authors. All rights reserved.
8 // Use of this source code is governed by a BSD-style license that can be
9 // found in the LICENSE file.
14 #include "mozilla/Assertions.h"
15 #include "mozilla/Attributes.h"
16 #include "mozilla/MathAlgorithms.h"
17 #include "mozilla/Maybe.h"
18 #include "mozilla/SegmentedVector.h"
19 #include "mozilla/Sprintf.h"
20 #include "mozilla/Types.h"
26 #include "irregexp/RegExpTypes.h"
27 #include "irregexp/util/FlagsShim.h"
28 #include "irregexp/util/VectorShim.h"
29 #include "irregexp/util/ZoneShim.h"
30 #include "jit/JitCode.h"
31 #include "jit/Label.h"
32 #include "jit/shared/Assembler-shared.h"
33 #include "js/friend/StackLimits.h" // js::AutoCheckRecursionLimit
34 #include "js/RegExpFlags.h"
36 #include "threading/ExclusiveData.h"
37 #include "util/DifferentialTesting.h"
38 #include "vm/JSContext.h"
39 #include "vm/MutexIDs.h"
40 #include "vm/NativeObject.h"
41 #include "vm/RegExpShared.h"
43 // Forward declaration of classes
49 class RegExpMatchInfo
;
55 } // namespace internal
58 #define V8_WARN_UNUSED_RESULT [[nodiscard]]
59 #define V8_EXPORT_PRIVATE
60 #define V8_FALLTHROUGH [[fallthrough]]
61 #define V8_NODISCARD [[nodiscard]]
62 #define V8_NOEXCEPT noexcept
64 #define FATAL(x) MOZ_CRASH(x)
65 #define UNREACHABLE() MOZ_CRASH("unreachable code")
66 #define UNIMPLEMENTED() MOZ_CRASH("unimplemented code")
67 #define STATIC_ASSERT(exp) static_assert(exp, #exp)
69 #define DCHECK MOZ_ASSERT
70 #define DCHECK_EQ(lhs, rhs) MOZ_ASSERT((lhs) == (rhs))
71 #define DCHECK_NE(lhs, rhs) MOZ_ASSERT((lhs) != (rhs))
72 #define DCHECK_GT(lhs, rhs) MOZ_ASSERT((lhs) > (rhs))
73 #define DCHECK_GE(lhs, rhs) MOZ_ASSERT((lhs) >= (rhs))
74 #define DCHECK_LT(lhs, rhs) MOZ_ASSERT((lhs) < (rhs))
75 #define DCHECK_LE(lhs, rhs) MOZ_ASSERT((lhs) <= (rhs))
76 #define DCHECK_NULL(val) MOZ_ASSERT((val) == nullptr)
77 #define DCHECK_NOT_NULL(val) MOZ_ASSERT((val) != nullptr)
78 #define DCHECK_IMPLIES(lhs, rhs) MOZ_ASSERT_IF(lhs, rhs)
79 #define CHECK MOZ_RELEASE_ASSERT
80 #define CHECK_EQ(lhs, rhs) MOZ_RELEASE_ASSERT((lhs) == (rhs))
81 #define CHECK_LE(lhs, rhs) MOZ_RELEASE_ASSERT((lhs) <= (rhs))
82 #define CHECK_GE(lhs, rhs) MOZ_RELEASE_ASSERT((lhs) >= (rhs))
83 #define CONSTEXPR_DCHECK MOZ_ASSERT
85 #define MemCopy memcpy
88 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L310-L319
89 // ptrdiff_t is 't' according to the standard, but MSVC uses 'I'.
91 # define V8PRIxPTRDIFF "Ix"
92 # define V8PRIdPTRDIFF "Id"
93 # define V8PRIuPTRDIFF "Iu"
95 # define V8PRIxPTRDIFF "tx"
96 # define V8PRIdPTRDIFF "td"
97 # define V8PRIuPTRDIFF "tu"
100 #define arraysize std::size
102 // Explicitly declare the assignment operator as deleted.
103 #define DISALLOW_ASSIGN(TypeName) TypeName& operator=(const TypeName&) = delete
105 // Explicitly declare the copy constructor and assignment operator as deleted.
106 // This also deletes the implicit move constructor and implicit move assignment
107 // operator, but still allows to manually define them.
108 #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
109 TypeName(const TypeName&) = delete; \
110 DISALLOW_ASSIGN(TypeName)
112 // Explicitly declare all implicit constructors as deleted, namely the
113 // default constructor, copy constructor and operator= functions.
114 // This is especially useful for classes containing only static methods.
115 #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
116 TypeName() = delete; \
117 DISALLOW_COPY_AND_ASSIGN(TypeName)
122 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L364-L367
123 template <typename T
, typename U
>
124 constexpr inline bool IsAligned(T value
, U alignment
) {
125 return (value
& (alignment
- 1)) == 0;
128 using byte
= uint8_t;
129 using Address
= uintptr_t;
130 static const Address kNullAddress
= 0;
132 inline uintptr_t GetCurrentStackPosition() {
133 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
138 // Latin1/UTF-16 constants
139 // Code-point values in Unicode 4.0 are 21 bits wide.
140 // Code units in UTF-16 are 16 bits wide.
141 using uc16
= char16_t
;
142 using uc32
= uint32_t;
144 constexpr int kUC16Size
= sizeof(base::uc16
);
147 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L247-L258
148 // The USE(x, ...) template is used to silence C++ compiler warnings
149 // issued for (yet) unused variables (typically parameters).
150 // The arguments are guaranteed to be evaluated from left to right.
152 template <typename T
>
153 Use(T
&&) {} // NOLINT(runtime/explicit)
157 ::v8::base::Use unused_tmp_array_for_use_macro[]{__VA_ARGS__}; \
158 (void)unused_tmp_array_for_use_macro; \
162 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/safe_conversions.h#L35-L39
163 // saturated_cast<> is analogous to static_cast<> for numeric types, except
164 // that the specified numeric conversion will saturate rather than overflow or
166 template <typename Dst
, typename Src
>
167 inline Dst
saturated_cast(Src value
);
169 // This is the only specialization that is needed for regexp code.
170 // Instead of pulling in dozens of lines of template goo
171 // to derive it, I used the implementation from uint8_clamped in
172 // ArrayBufferObject.h.
174 inline uint8_t saturated_cast
<uint8_t, int>(int x
) {
175 return (x
>= 0) ? ((x
< 255) ? uint8_t(x
) : 255) : 0;
179 // https://github.com/v8/v8/blob/fc088cdaccadede84886eee881e67af9db53669a/src/base/bounds.h#L14-L28
180 // Checks if value is in range [lower_limit, higher_limit] using a single
182 template <typename T
, typename U
>
183 inline constexpr bool IsInRange(T value
, U lower_limit
, U higher_limit
) {
184 using unsigned_T
= typename
std::make_unsigned
<T
>::type
;
185 // Use static_cast to support enum classes.
186 return static_cast<unsigned_T
>(static_cast<unsigned_T
>(value
) -
187 static_cast<unsigned_T
>(lower_limit
)) <=
188 static_cast<unsigned_T
>(static_cast<unsigned_T
>(higher_limit
) -
189 static_cast<unsigned_T
>(lower_limit
));
192 #define LAZY_INSTANCE_INITIALIZER \
195 template <typename T
>
196 class LazyInstanceImpl
{
198 LazyInstanceImpl() : value_(js::mutexid::IrregexpLazyStatic
) {}
201 auto val
= value_
.lock();
202 if (val
->isNothing()) {
209 js::ExclusiveData
<mozilla::Maybe
<T
>> value_
;
212 template <typename T
>
215 using type
= LazyInstanceImpl
<T
>;
219 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/utils.h#L40-L48
220 // Returns the value (0 .. 15) of a hexadecimal character c.
221 // If c is not a legal hexadecimal character, returns a value < 0.
222 // Used in regexp-parser.cc
223 inline int HexValue(base::uc32 c
) {
225 if (static_cast<unsigned>(c
) <= 9) return c
;
226 c
= (c
| 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
227 if (static_cast<unsigned>(c
) <= 5) return c
+ 10;
231 template <typename
... Args
>
232 [[nodiscard
]] uint32_t hash_combine(uint32_t aHash
, Args
... aArgs
) {
233 return mozilla::AddToHash(aHash
, aArgs
...);
236 template <typename T
>
238 mozilla::Maybe
<T
> inner_
;
241 Optional() = default;
242 Optional(T t
) { inner_
.emplace(t
); }
244 bool has_value() const { return inner_
.isSome(); }
245 const T
& value() const { return inner_
.ref(); }
250 inline uint64_t CountTrailingZeros(uint64_t value
) {
251 return mozilla::CountTrailingZeroes64(value
);
254 inline size_t RoundUpToPowerOfTwo32(size_t value
) {
255 return mozilla::RoundUpPow2(value
);
258 template <typename T
>
259 constexpr bool IsPowerOfTwo(T value
) {
260 return value
> 0 && (value
& (value
- 1)) == 0;
268 using uchar
= unsigned int;
271 // https://github.com/v8/v8/blob/1f1e4cdb04c75eab77adbecd5f5514ddc3eb56cf/src/strings/unicode.h#L133-L150
274 static const base::uc16 kMaxChar
= 0xff;
276 // Convert the character to Latin-1 case equivalent if possible.
277 static inline base::uc16
TryConvertToLatin1(base::uc16 c
) {
278 // "GREEK CAPITAL LETTER MU" case maps to "MICRO SIGN".
279 // "GREEK SMALL LETTER MU" case maps to "MICRO SIGN".
280 if (c
== 0x039C || c
== 0x03BC) {
283 // "LATIN CAPITAL LETTER Y WITH DIAERESIS" case maps to "LATIN SMALL LETTER
284 // Y WITH DIAERESIS".
293 // https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L99-L131
296 static inline bool IsLeadSurrogate(int code
) {
297 return js::unicode::IsLeadSurrogate(code
);
299 static inline bool IsTrailSurrogate(int code
) {
300 return js::unicode::IsTrailSurrogate(code
);
302 static inline base::uc16
LeadSurrogate(uint32_t char_code
) {
303 return js::unicode::LeadSurrogate(char_code
);
305 static inline base::uc16
TrailSurrogate(uint32_t char_code
) {
306 return js::unicode::TrailSurrogate(char_code
);
308 static inline uint32_t CombineSurrogatePair(char16_t lead
, char16_t trail
) {
309 return js::unicode::UTF16Decode(lead
, trail
);
311 static const uchar kMaxNonSurrogateCharCode
= 0xffff;
314 #ifndef V8_INTL_SUPPORT
316 // A cache used in case conversion. It caches the value for characters
317 // that either have no mapping or map to a single character independent
318 // of context. Characters that map to more than one character or that
319 // map differently depending on context are always looked up.
321 // https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L64-L88
322 template <class T
, int size
= 256>
325 inline Mapping() = default;
326 inline int get(uchar c
, uchar n
, uchar
* result
) {
327 CacheEntry entry
= entries_
[c
& kMask
];
328 if (entry
.code_point_
== c
) {
329 if (entry
.offset_
== 0) {
332 result
[0] = c
+ entry
.offset_
;
336 return CalculateValue(c
, n
, result
);
341 int CalculateValue(uchar c
, uchar n
, uchar
* result
) {
342 bool allow_caching
= true;
343 int length
= T::Convert(c
, n
, result
, &allow_caching
);
346 entries_
[c
& kMask
] = CacheEntry(c
, result
[0] - c
);
349 entries_
[c
& kMask
] = CacheEntry(c
, 0);
358 inline CacheEntry() : code_point_(kNoChar
), offset_(0) {}
359 inline CacheEntry(uchar code_point
, signed offset
)
360 : code_point_(code_point
), offset_(offset
) {}
363 static const int kNoChar
= (1 << 21) - 1;
365 static const int kSize
= size
;
366 static const int kMask
= kSize
- 1;
367 CacheEntry entries_
[kSize
];
371 // https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L241-L252
372 struct Ecma262Canonicalize
{
373 static const int kMaxWidth
= 1;
374 static int Convert(uchar c
, uchar n
, uchar
* result
, bool* allow_caching_ptr
);
376 struct Ecma262UnCanonicalize
{
377 static const int kMaxWidth
= 4;
378 static int Convert(uchar c
, uchar n
, uchar
* result
, bool* allow_caching_ptr
);
380 struct CanonicalizationRange
{
381 static const int kMaxWidth
= 1;
382 static int Convert(uchar c
, uchar n
, uchar
* result
, bool* allow_caching_ptr
);
385 #endif // !V8_INTL_SUPPORT
388 static bool Is(uchar c
);
391 } // namespace unibrow
395 #define PRINTF_FORMAT(x, y) MOZ_FORMAT_PRINTF(x, y)
396 void PRINTF_FORMAT(1, 2) PrintF(const char* format
, ...);
397 void PRINTF_FORMAT(2, 3) PrintF(FILE* out
, const char* format
, ...);
399 // Superclass for classes only using static method functions.
400 // The subclass of AllStatic cannot be instantiated at all.
404 AllStatic() = delete;
408 // Superclass for classes managed with new and delete.
409 // In irregexp, this is only AlternativeGeneration (in regexp-compiler.cc)
411 // https://github.com/v8/v8/blob/7b3332844212d78ee87a9426f3a6f7f781a8fbfa/src/utils/allocation.cc#L88-L96
414 static void* operator new(size_t size
) {
415 js::AutoEnterOOMUnsafeRegion oomUnsafe
;
416 void* result
= js_malloc(size
);
418 oomUnsafe
.crash("Irregexp Malloced shim");
422 static void operator delete(void* p
) { js_free(p
); }
425 constexpr int32_t KB
= 1024;
426 constexpr int32_t MB
= 1024 * 1024;
428 #define kMaxInt JSVAL_INT_MAX
429 #define kMinInt JSVAL_INT_MIN
430 constexpr int kSystemPointerSize
= sizeof(void*);
432 // The largest integer n such that n and n + 1 are both exactly
433 // representable as a Number value. ES6 section 20.1.2.6
434 constexpr double kMaxSafeInteger
= 9007199254740991.0; // 2^53-1
436 constexpr int kBitsPerByte
= 8;
437 constexpr int kBitsPerByteLog2
= 3;
438 constexpr int kUInt16Size
= sizeof(uint16_t);
439 constexpr int kUInt32Size
= sizeof(uint32_t);
440 constexpr int kInt64Size
= sizeof(int64_t);
442 constexpr int kMaxUInt16
= (1 << 16) - 1;
444 inline constexpr bool IsDecimalDigit(base::uc32 c
) {
445 return c
>= '0' && c
<= '9';
448 inline constexpr int AsciiAlphaToLower(base::uc32 c
) { return c
| 0x20; }
450 inline bool is_uint24(int64_t val
) { return (val
>> 24) == 0; }
451 inline bool is_int24(int64_t val
) {
452 int64_t limit
= int64_t(1) << 23;
453 return (-limit
<= val
) && (val
< limit
);
456 inline bool IsIdentifierStart(base::uc32 c
) {
457 return js::unicode::IsIdentifierStart(char32_t(c
));
459 inline bool IsIdentifierPart(base::uc32 c
) {
460 return js::unicode::IsIdentifierPart(char32_t(c
));
463 // Wrappers to disambiguate char16_t and uc16.
465 explicit AsUC16(char16_t v
) : value(v
) {}
470 explicit AsUC32(int32_t v
) : value(v
) {}
474 std::ostream
& operator<<(std::ostream
& os
, const AsUC16
& c
);
475 std::ostream
& operator<<(std::ostream
& os
, const AsUC32
& c
);
477 // This class is used for the output of trace-regexp-parser. V8 has
478 // an elaborate implementation to ensure that the output gets to the
479 // right place, even on Android. We just need something that will
480 // print output (ideally to stderr, to match the rest of our tracing
481 // code). This is an empty wrapper that will convert itself to
482 // std::cerr when used.
485 operator std::ostream
&() const;
486 template <typename T
>
487 std::ostream
& operator<<(T t
);
490 // Reuse existing Maybe implementation
491 using mozilla::Maybe
;
493 template <typename T
>
494 Maybe
<T
> Just(const T
& value
) {
495 return mozilla::Some(value
);
498 template <typename T
>
499 mozilla::Nothing
Nothing() {
500 return mozilla::Nothing();
503 template <typename T
>
504 using PseudoHandle
= mozilla::UniquePtr
<T
, JS::FreePolicy
>;
506 // Compare 8bit/16bit chars to 8bit/16bit chars.
507 // Used indirectly by regexp-interpreter.cc
508 // Taken from: https://github.com/v8/v8/blob/master/src/utils/utils.h
509 template <typename lchar
, typename rchar
>
510 inline int CompareCharsUnsigned(const lchar
* lhs
, const rchar
* rhs
,
512 const lchar
* limit
= lhs
+ chars
;
513 if (sizeof(*lhs
) == sizeof(char) && sizeof(*rhs
) == sizeof(char)) {
514 // memcmp compares byte-by-byte, yielding wrong results for two-byte
515 // strings on little-endian systems.
516 return memcmp(lhs
, rhs
, chars
);
518 while (lhs
< limit
) {
519 int r
= static_cast<int>(*lhs
) - static_cast<int>(*rhs
);
520 if (r
!= 0) return r
;
526 template <typename lchar
, typename rchar
>
527 inline int CompareChars(const lchar
* lhs
, const rchar
* rhs
, size_t chars
) {
528 DCHECK_LE(sizeof(lchar
), 2);
529 DCHECK_LE(sizeof(rchar
), 2);
530 if (sizeof(lchar
) == 1) {
531 if (sizeof(rchar
) == 1) {
532 return CompareCharsUnsigned(reinterpret_cast<const uint8_t*>(lhs
),
533 reinterpret_cast<const uint8_t*>(rhs
), chars
);
535 return CompareCharsUnsigned(reinterpret_cast<const uint8_t*>(lhs
),
536 reinterpret_cast<const char16_t
*>(rhs
),
540 if (sizeof(rchar
) == 1) {
541 return CompareCharsUnsigned(reinterpret_cast<const char16_t
*>(lhs
),
542 reinterpret_cast<const uint8_t*>(rhs
), chars
);
544 return CompareCharsUnsigned(reinterpret_cast<const char16_t
*>(lhs
),
545 reinterpret_cast<const char16_t
*>(rhs
),
551 // Compare 8bit/16bit chars to 8bit/16bit chars.
552 template <typename lchar
, typename rchar
>
553 inline bool CompareCharsEqualUnsigned(const lchar
* lhs
, const rchar
* rhs
,
555 STATIC_ASSERT(std::is_unsigned
<lchar
>::value
);
556 STATIC_ASSERT(std::is_unsigned
<rchar
>::value
);
557 if (sizeof(*lhs
) == sizeof(*rhs
)) {
558 // memcmp compares byte-by-byte, but for equality it doesn't matter whether
559 // two-byte char comparison is little- or big-endian.
560 return memcmp(lhs
, rhs
, chars
* sizeof(*lhs
)) == 0;
562 for (const lchar
* limit
= lhs
+ chars
; lhs
< limit
; ++lhs
, ++rhs
) {
563 if (*lhs
!= *rhs
) return false;
568 template <typename lchar
, typename rchar
>
569 inline bool CompareCharsEqual(const lchar
* lhs
, const rchar
* rhs
,
571 using ulchar
= typename
std::make_unsigned
<lchar
>::type
;
572 using urchar
= typename
std::make_unsigned
<rchar
>::type
;
573 return CompareCharsEqualUnsigned(reinterpret_cast<const ulchar
*>(lhs
),
574 reinterpret_cast<const urchar
*>(rhs
), chars
);
577 // V8::Object ~= JS::Value
580 // The default object constructor in V8 stores a nullptr,
581 // which has its low bit clear and is interpreted as Smi(0).
582 constexpr Object() : asBits_(JS::Int32Value(0).asRawBits()) {}
584 Object(const JS::Value
& value
) : asBits_(value
.asRawBits()) {}
586 // This constructor is only used in an unused implementation of
587 // IsCharacterInRangeArray in regexp-macro-assembler.cc.
588 Object(uintptr_t raw
) : asBits_(raw
) { MOZ_CRASH("unused"); }
590 // Used in regexp-interpreter.cc to check the return value of
591 // isolate->stack_guard()->HandleInterrupts(). We want to handle
592 // interrupts in the caller, so we always return false from
593 // HandleInterrupts and true here.
594 inline bool IsException(Isolate
*) const {
595 MOZ_ASSERT(!value().toBoolean());
599 JS::Value
value() const { return JS::Value::fromRawBits(asBits_
); }
601 inline static Object
cast(Object object
) { return object
; }
604 void setValue(const JS::Value
& val
) { asBits_
= val
.asRawBits(); }
608 class Smi
: public Object
{
610 static Smi
FromInt(int32_t value
) {
612 smi
.setValue(JS::Int32Value(value
));
615 static inline int32_t ToInt(const Object object
) {
616 return object
.value().toInt32();
620 // V8::HeapObject ~= GC thing
621 class HeapObject
: public Object
{
623 inline static HeapObject
cast(Object object
) {
625 h
.setValue(object
.value());
630 // A fixed-size array with Objects (aka Values) as element types.
631 // Implemented using the dense elements of an ArrayObject.
632 // Used for named captures.
633 class FixedArray
: public HeapObject
{
635 inline void set(uint32_t index
, Object value
) {
636 inner()->setDenseElement(index
, value
.value());
638 inline static FixedArray
cast(Object object
) {
640 f
.setValue(object
.value());
643 js::NativeObject
* inner() {
644 return &value().toObject().as
<js::NativeObject
>();
649 * Conceptually, ByteArrayData is a variable-size structure. To
650 * implement this in a C++-approved way, we allocate a struct
651 * containing the 32-bit length field, followed by additional memory
652 * for the data. To access the data, we get a pointer to the next byte
653 * after the length field and cast it to the correct type.
655 inline uint8_t* ByteArrayData::data() {
656 static_assert(alignof(uint8_t) <= alignof(ByteArrayData
),
657 "The trailing data must be aligned to start immediately "
658 "after the header with no padding.");
659 ByteArrayData
* immediatelyAfter
= this + 1;
660 return reinterpret_cast<uint8_t*>(immediatelyAfter
);
663 template <typename T
>
664 T
* ByteArrayData::typedData() {
665 static_assert(alignof(T
) <= alignof(ByteArrayData
));
666 MOZ_ASSERT(uintptr_t(data()) % alignof(T
) == 0);
667 return reinterpret_cast<T
*>(data());
670 template <typename T
>
671 T
ByteArrayData::getTyped(uint32_t index
) {
672 MOZ_ASSERT(index
< length
/ sizeof(T
));
673 return typedData
<T
>()[index
];
676 template <typename T
>
677 void ByteArrayData::setTyped(uint32_t index
, T value
) {
678 MOZ_ASSERT(index
< length
/ sizeof(T
));
679 typedData
<T
>()[index
] = value
;
682 // A fixed-size array of bytes.
683 class ByteArray
: public HeapObject
{
685 ByteArrayData
* inner() const {
686 return static_cast<ByteArrayData
*>(value().toPrivate());
690 PseudoHandle
<ByteArrayData
> takeOwnership(Isolate
* isolate
);
691 PseudoHandle
<ByteArrayData
> maybeTakeOwnership(Isolate
* isolate
);
693 byte
get(uint32_t index
) { return inner()->get(index
); }
694 void set(uint32_t index
, byte val
) { inner()->set(index
, val
); }
696 uint32_t length() const { return inner()->length
; }
697 byte
* GetDataStartAddress() { return inner()->data(); }
699 static ByteArray
cast(Object object
) {
701 b
.setValue(object
.value());
705 bool IsByteArray() const { return true; }
707 friend class SMRegExpMacroAssembler
;
710 // This is a convenience class used in V8 for treating a ByteArray as an array
711 // of fixed-size integers. This version supports integral types up to 32 bits.
712 template <typename T
>
713 class FixedIntegerArray
: public ByteArray
{
714 static_assert(alignof(T
) <= alignof(ByteArrayData
));
715 static_assert(std::is_integral
<T
>::value
);
718 static Handle
<FixedIntegerArray
<T
>> New(Isolate
* isolate
, uint32_t length
);
720 T
get(uint32_t index
) { return inner()->template getTyped
<T
>(index
); };
721 void set(uint32_t index
, T value
) {
722 inner()->template setTyped
<T
>(index
, value
);
725 static FixedIntegerArray
<T
> cast(Object object
) {
726 FixedIntegerArray
<T
> f
;
727 f
.setValue(object
.value());
732 using FixedUInt16Array
= FixedIntegerArray
<uint16_t>;
734 // Like Handles in SM, V8 handles are references to marked pointers.
735 // Unlike SM, where Rooted pointers are created individually on the
736 // stack, the target of a V8 handle lives in an arena on the isolate
737 // (~= JSContext). Whenever a Handle is created, a new "root" is
738 // created at the end of the arena.
740 // HandleScopes are used to manage the lifetimes of these handles. A
741 // HandleScope lives on the stack and stores the size of the arena at
742 // the time of its creation. When the function returns and the
743 // HandleScope is destroyed, the arena is truncated to its previous
744 // size, clearing all roots that were created since the creation of
747 // In some cases, objects that are GC-allocated in V8 are not in SM.
748 // In particular, irregexp allocates ByteArrays during code generation
749 // to store lookup tables. This does not play nicely with the SM
750 // macroassembler's requirement that no GC allocations take place
751 // while it is on the stack. To work around this, this shim layer also
752 // provides the ability to create pseudo-handles, which are not
753 // managed by the GC but provide the same API to irregexp. The "root"
754 // of a pseudohandle is a unique pointer living in a second arena. If
755 // the allocated object should outlive the HandleScope, it must be
756 // manually moved out of the arena using maybeTakeOwnership.
757 // (If maybeTakeOwnership is called multiple times, it will return
758 // a null pointer on subsequent calls.)
760 class MOZ_STACK_CLASS HandleScope
{
762 HandleScope(Isolate
* isolate
);
767 size_t non_gc_level_
= 0;
770 friend class Isolate
;
774 // https://github.com/v8/v8/blob/5792f3587116503fc047d2f68c951c72dced08a5/src/handles/handles.h#L88-L171
775 template <typename T
>
776 class MOZ_NONHEAP_CLASS Handle
{
778 Handle() : location_(nullptr) {}
779 Handle(T object
, Isolate
* isolate
);
780 Handle(const JS::Value
& value
, Isolate
* isolate
);
782 // Constructor for handling automatic up casting.
783 template <typename S
,
784 typename
= std::enable_if_t
<std::is_convertible_v
<S
*, T
*>>>
785 inline Handle(Handle
<S
> handle
) : location_(handle
.location_
) {}
787 inline bool is_null() const { return location_
== nullptr; }
789 inline T
operator*() const { return T::cast(Object(*location_
)); };
791 // {ObjectRef} is returned by {Handle::operator->}. It should never be stored
792 // anywhere or used in any other code; no one should ever have to spell out
793 // {ObjectRef} in code. Its only purpose is to be dereferenced immediately by
794 // "operator-> chaining". Returning the address of the field is valid because
795 // this object's lifetime only ends at the end of the full statement.
797 // https://github.com/v8/v8/blob/03aaa4b3bf4cb01eee1f223b252e6869b04ab08c/src/handles/handles.h#L91-L105
798 class MOZ_TEMPORARY_CLASS ObjectRef
{
800 T
* operator->() { return &object_
; }
804 explicit ObjectRef(T object
) : object_(object
) {}
808 inline ObjectRef
operator->() const { return ObjectRef
{**this}; }
810 static Handle
<T
> fromHandleValue(JS::HandleValue handle
) {
811 return Handle(handle
.address());
815 Handle(const JS::Value
* location
) : location_(location
) {}
820 friend class MaybeHandle
;
822 const JS::Value
* location_
;
825 // A Handle can be converted into a MaybeHandle. Converting a MaybeHandle
826 // into a Handle requires checking that it does not point to nullptr. This
827 // ensures nullptr checks before use.
829 // Also note that Handles do not provide default equality comparison or hashing
830 // operators on purpose. Such operators would be misleading, because intended
831 // semantics is ambiguous between Handle location and object identity.
833 // https://github.com/v8/v8/blob/5792f3587116503fc047d2f68c951c72dced08a5/src/handles/maybe-handles.h#L15-L78
834 template <typename T
>
835 class MOZ_NONHEAP_CLASS MaybeHandle final
{
837 MaybeHandle() : location_(nullptr) {}
839 // Constructor for handling automatic up casting from Handle.
840 // Ex. Handle<JSArray> can be passed when MaybeHandle<Object> is expected.
841 template <typename S
,
842 typename
= std::enable_if_t
<std::is_convertible_v
<S
*, T
*>>>
843 MaybeHandle(Handle
<S
> handle
) : location_(handle
.location_
) {}
845 inline Handle
<T
> ToHandleChecked() const {
846 MOZ_RELEASE_ASSERT(location_
);
847 return Handle
<T
>(location_
);
850 // Convert to a Handle with a type that can be upcasted to.
851 template <typename S
>
852 inline bool ToHandle(Handle
<S
>* out
) const {
854 *out
= Handle
<T
>(location_
);
863 JS::Value
* location_
;
866 // From v8/src/handles/handles-inl.h
868 template <typename T
>
869 inline Handle
<T
> handle(T object
, Isolate
* isolate
) {
870 return Handle
<T
>(object
, isolate
);
873 // RAII Guard classes
875 using DisallowGarbageCollection
= JS::AutoAssertNoGC
;
877 // V8 uses this inside DisallowGarbageCollection regions to turn
878 // allocation back on before throwing a stack overflow exception or
879 // handling interrupts. AutoSuppressGC is sufficient for the former
880 // case, but not for the latter: handling interrupts can execute
881 // arbitrary script code, and V8 jumps through some scary hoops to
882 // "manually relocate unhandlified references" afterwards. To keep
883 // things sane, we don't try to handle interrupts while regex code is
884 // still on the stack. Instead, we return EXCEPTION and handle
885 // interrupts in the caller. (See RegExpShared::execute.)
887 class AllowGarbageCollection
{
889 AllowGarbageCollection() {}
893 // https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L83-L474
894 class String
: public HeapObject
{
896 JSString
* str() const { return value().toString(); }
900 String(JSString
* str
) { setValue(JS::StringValue(str
)); }
902 operator JSString
*() const { return str(); }
905 static const int32_t kMaxOneByteCharCode
= unibrow::Latin1::kMaxChar
;
906 static const uint32_t kMaxOneByteCharCodeU
= unibrow::Latin1::kMaxChar
;
907 static const int kMaxUtf16CodeUnit
= 0xffff;
908 static const uint32_t kMaxUtf16CodeUnitU
= kMaxUtf16CodeUnit
;
909 static const base::uc32 kMaxCodePoint
= 0x10ffff;
911 MOZ_ALWAYS_INLINE
int length() const { return str()->length(); }
912 bool IsFlat() { return str()->isLinear(); };
915 // https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L95-L152
918 FlatContent(JSLinearString
* string
, const DisallowGarbageCollection
& no_gc
)
919 : string_(string
), no_gc_(no_gc
) {}
920 inline bool IsOneByte() const { return string_
->hasLatin1Chars(); }
921 inline bool IsTwoByte() const { return !string_
->hasLatin1Chars(); }
923 base::Vector
<const uint8_t> ToOneByteVector() const {
924 MOZ_ASSERT(IsOneByte());
925 return base::Vector
<const uint8_t>(string_
->latin1Chars(no_gc_
),
928 base::Vector
<const base::uc16
> ToUC16Vector() const {
929 MOZ_ASSERT(IsTwoByte());
930 return base::Vector
<const base::uc16
>(string_
->twoByteChars(no_gc_
),
933 void UnsafeDisableChecksumVerification() {
934 // Intentional no-op. See the comment for AllowGarbageCollection above.
938 const JSLinearString
* string_
;
939 const JS::AutoAssertNoGC
& no_gc_
;
941 FlatContent
GetFlatContent(const DisallowGarbageCollection
& no_gc
) {
942 MOZ_ASSERT(IsFlat());
943 return FlatContent(&str()->asLinear(), no_gc
);
946 static Handle
<String
> Flatten(Isolate
* isolate
, Handle
<String
> string
);
948 inline static String
cast(Object object
) {
950 MOZ_ASSERT(object
.value().isString());
951 s
.setValue(object
.value());
955 inline static bool IsOneByteRepresentationUnderneath(String string
) {
956 return string
.str()->hasLatin1Chars();
958 inline bool IsOneByteRepresentation() const {
959 return str()->hasLatin1Chars();
962 std::unique_ptr
<char[]> ToCString();
964 template <typename Char
>
965 base::Vector
<const Char
> GetCharVector(
966 const DisallowGarbageCollection
& no_gc
);
970 inline base::Vector
<const uint8_t> String::GetCharVector(
971 const DisallowGarbageCollection
& no_gc
) {
972 String::FlatContent flat
= GetFlatContent(no_gc
);
973 MOZ_ASSERT(flat
.IsOneByte());
974 return flat
.ToOneByteVector();
978 inline base::Vector
<const base::uc16
> String::GetCharVector(
979 const DisallowGarbageCollection
& no_gc
) {
980 String::FlatContent flat
= GetFlatContent(no_gc
);
981 MOZ_ASSERT(flat
.IsTwoByte());
982 return flat
.ToUC16Vector();
985 class JSRegExp
: public HeapObject
{
987 JSRegExp() : HeapObject() {}
988 JSRegExp(js::RegExpShared
* re
) { setValue(JS::PrivateGCThingValue(re
)); }
990 // ******************************************************
991 // Methods that are called from inside the implementation
992 // ******************************************************
993 void TierUpTick() { inner()->tierUpTick(); }
995 Object
bytecode(bool is_latin1
) const {
996 return Object(JS::PrivateValue(inner()->getByteCode(is_latin1
)));
999 // TODO: should we expose this?
1000 uint32_t backtrack_limit() const { return 0; }
1002 static JSRegExp
cast(Object object
) {
1004 js::gc::Cell
* regexpShared
= object
.value().toGCThing();
1005 MOZ_ASSERT(regexpShared
->is
<js::RegExpShared
>());
1006 regexp
.setValue(JS::PrivateGCThingValue(regexpShared
));
1010 // Each capture (including the match itself) needs two registers.
1011 static constexpr int RegistersForCaptureCount(int count
) {
1012 return (count
+ 1) * 2;
1015 inline uint32_t max_register_count() const {
1016 return inner()->getMaxRegisters();
1019 // ******************************
1021 // ******************************
1023 static constexpr int kMaxCaptures
= (1 << 15) - 1;
1025 static constexpr int kNoBacktrackLimit
= 0;
1028 js::RegExpShared
* inner() const {
1029 return value().toGCThing()->as
<js::RegExpShared
>();
1033 using RegExpFlags
= JS::RegExpFlags
;
1035 inline bool IsUnicode(RegExpFlags flags
) { return flags
.unicode(); }
1036 inline bool IsGlobal(RegExpFlags flags
) { return flags
.global(); }
1037 inline bool IsIgnoreCase(RegExpFlags flags
) { return flags
.ignoreCase(); }
1038 inline bool IsMultiline(RegExpFlags flags
) { return flags
.multiline(); }
1039 inline bool IsDotAll(RegExpFlags flags
) { return flags
.dotAll(); }
1040 inline bool IsSticky(RegExpFlags flags
) { return flags
.sticky(); }
1041 inline bool IsUnicodeSets(RegExpFlags flags
) { return flags
.unicodeSets(); }
1042 inline bool IsEitherUnicode(RegExpFlags flags
) {
1043 return flags
.unicode() || flags
.unicodeSets();
1048 inline void AddSample(int sample
) {}
1053 Histogram
* regexp_backtracks() { return ®exp_backtracks_
; }
1056 Histogram regexp_backtracks_
;
1059 enum class AllocationType
: uint8_t {
1060 kYoung
, // Allocate in the nursery
1061 kOld
, // Allocate in the tenured heap
1064 using StackGuard
= Isolate
;
1065 using Factory
= Isolate
;
1069 Isolate(JSContext
* cx
) : cx_(cx
) {}
1073 size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf
) const;
1075 //********** Isolate code **********//
1076 RegExpStack
* regexp_stack() const { return regexpStack_
; }
1078 // This is called from inside no-GC code. Instead of suppressing GC
1079 // to allocate the error, we return false from Execute and call
1080 // ReportOverRecursed in the caller.
1081 void StackOverflow() {}
1083 #ifndef V8_INTL_SUPPORT
1084 unibrow::Mapping
<unibrow::Ecma262UnCanonicalize
>* jsregexp_uncanonicalize() {
1085 return &jsregexp_uncanonicalize_
;
1087 unibrow::Mapping
<unibrow::Ecma262Canonicalize
>*
1088 regexp_macro_assembler_canonicalize() {
1089 return ®exp_macro_assembler_canonicalize_
;
1091 unibrow::Mapping
<unibrow::CanonicalizationRange
>* jsregexp_canonrange() {
1092 return &jsregexp_canonrange_
;
1096 unibrow::Mapping
<unibrow::Ecma262UnCanonicalize
> jsregexp_uncanonicalize_
;
1097 unibrow::Mapping
<unibrow::Ecma262Canonicalize
>
1098 regexp_macro_assembler_canonicalize_
;
1099 unibrow::Mapping
<unibrow::CanonicalizationRange
> jsregexp_canonrange_
;
1100 #endif // !V8_INTL_SUPPORT
1103 // An empty stub for telemetry we don't support
1104 void IncreaseTotalRegexpCodeGenerated(Handle
<HeapObject
> code
) {}
1106 Counters
* counters() { return &counters_
; }
1108 //********** Factory code **********//
1109 inline Factory
* factory() { return this; }
1111 Handle
<ByteArray
> NewByteArray(
1112 int length
, AllocationType allocation
= AllocationType::kYoung
);
1114 // Allocates a fixed array initialized with undefined values.
1115 Handle
<FixedArray
> NewFixedArray(int length
);
1117 template <typename T
>
1118 Handle
<FixedIntegerArray
<T
>> NewFixedIntegerArray(uint32_t length
);
1120 template <typename Char
>
1121 Handle
<String
> InternalizeString(const base::Vector
<const Char
>& str
);
1123 //********** Stack guard code **********//
1124 inline StackGuard
* stack_guard() { return this; }
1126 uintptr_t real_climit() { return cx_
->stackLimit(JS::StackForSystemCode
); }
1128 // This is called from inside no-GC code. V8 runs the interrupt
1129 // inside the no-GC code and then "manually relocates unhandlified
1130 // references" afterwards. We just return false and let the caller
1131 // handle interrupts.
1132 Object
HandleInterrupts() { return Object(JS::BooleanValue(false)); }
1134 JSContext
* cx() const { return cx_
; }
1136 void trace(JSTracer
* trc
);
1138 //********** Handle code **********//
1140 JS::Value
* getHandleLocation(const JS::Value
& value
);
1143 mozilla::SegmentedVector
<JS::Value
, 256> handleArena_
;
1144 mozilla::SegmentedVector
<PseudoHandle
<void>, 256> uniquePtrArena_
;
1146 void* allocatePseudoHandle(size_t bytes
);
1149 template <typename T
>
1150 PseudoHandle
<T
> takeOwnership(void* ptr
);
1151 template <typename T
>
1152 PseudoHandle
<T
> maybeTakeOwnership(void* ptr
);
1154 uint32_t liveHandles() const { return handleArena_
.Length(); }
1155 uint32_t livePseudoHandles() const { return uniquePtrArena_
.Length(); }
1158 void openHandleScope(HandleScope
& scope
) {
1159 scope
.level_
= handleArena_
.Length();
1160 scope
.non_gc_level_
= uniquePtrArena_
.Length();
1162 void closeHandleScope(size_t prevLevel
, size_t prevUniqueLevel
) {
1163 size_t currLevel
= handleArena_
.Length();
1164 handleArena_
.PopLastN(currLevel
- prevLevel
);
1166 size_t currUniqueLevel
= uniquePtrArena_
.Length();
1167 uniquePtrArena_
.PopLastN(currUniqueLevel
- prevUniqueLevel
);
1169 friend class HandleScope
;
1172 RegExpStack
* regexpStack_
{};
1173 Counters counters_
{};
1176 uint32_t shouldSimulateInterrupt_
= 0;
1181 // https://github.com/v8/v8/blob/50dcf2af54ce27801a71c47c1be1d2c5e36b0dd6/src/execution/isolate.h#L1909-L1931
1182 class StackLimitCheck
{
1184 StackLimitCheck(Isolate
* isolate
) : cx_(isolate
->cx()) {}
1186 // Use this to check for stack-overflows in C++ code.
1187 bool HasOverflowed() {
1188 js::AutoCheckRecursionLimit
recursion(cx_
);
1189 bool overflowed
= !recursion
.checkDontReport(cx_
);
1190 if (overflowed
&& js::SupportDifferentialTesting()) {
1191 // We don't report overrecursion here, but we throw an exception later
1192 // and this still affects differential testing. Mimic ReportOverRecursed
1193 // (the fuzzers check for this particular string).
1194 fprintf(stderr
, "ReportOverRecursed called\n");
1199 // Use this to check for interrupt request in C++ code.
1200 bool InterruptRequested() {
1201 return cx_
->hasPendingInterrupt(js::InterruptReason::CallbackUrgent
);
1204 // Use this to check for stack-overflow when entering runtime from JS code.
1205 bool JsHasOverflowed() {
1206 js::AutoCheckRecursionLimit
recursion(cx_
);
1207 return !recursion
.checkDontReport(cx_
);
1214 class ExternalReference
{
1216 static const void* TopOfRegexpStack(Isolate
* isolate
);
1217 static size_t SizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf
,
1218 RegExpStack
* regexpStack
);
1221 class Code
: public HeapObject
{
1223 uint8_t* raw_instruction_start() { return inner()->raw(); }
1225 static Code
cast(Object object
) {
1227 js::gc::Cell
* jitCode
= object
.value().toGCThing();
1228 MOZ_ASSERT(jitCode
->is
<js::jit::JitCode
>());
1229 c
.setValue(JS::PrivateGCThingValue(jitCode
));
1232 js::jit::JitCode
* inner() {
1233 return value().toGCThing()->as
<js::jit::JitCode
>();
1237 // Only used in function signature of functions we don't implement
1238 // (NativeRegExpMacroAssembler::CheckStackGuardState)
1239 class InstructionStream
{};
1241 // Origin: https://github.com/v8/v8/blob/master/src/codegen/label.h
1244 Label() : inner_(js::jit::Label()) {}
1246 js::jit::Label
* inner() { return &inner_
; }
1248 void Unuse() { inner_
.reset(); }
1250 bool is_linked() { return inner_
.used(); }
1251 bool is_bound() { return inner_
.bound(); }
1252 bool is_unused() { return !inner_
.used() && !inner_
.bound(); }
1254 int pos() { return inner_
.offset(); }
1255 void link_to(int pos
) { inner_
.use(pos
); }
1256 void bind_to(int pos
) { inner_
.bind(pos
); }
1259 js::jit::Label inner_
;
1260 js::jit::CodeOffset patchOffset_
;
1262 friend class SMRegExpMacroAssembler
;
1265 #define v8_flags js::jit::JitOptions
1267 #define V8_USE_COMPUTED_GOTO 1
1268 #define COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
1270 } // namespace internal
1275 inline void FatalProcessOutOfMemory(v8::internal::Isolate
* isolate
,
1277 js::AutoEnterOOMUnsafeRegion oomUnsafe
;
1278 oomUnsafe
.crash(msg
);
1283 #endif // RegexpShim_h