Bug 1839526 [wpt PR 40658] - Update wpt metadata, a=testonly
[gecko.git] / js / src / irregexp / RegExpNativeMacroAssembler.h
blob3c3acf40d45b43eea6622c4ef3b1275c3f50c7b4
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // Copyright 2020 the V8 project authors. All rights reserved.
8 // Use of this source code is governed by a BSD-style license that can be
9 // found in the LICENSE file.
11 // This file implements the NativeRegExpMacroAssembler interface for
12 // SpiderMonkey. It provides the same interface as each of V8's
13 // architecture-specific implementations.
15 #ifndef RegexpMacroAssemblerArch_h
16 #define RegexpMacroAssemblerArch_h
18 #include "irregexp/imported/regexp-macro-assembler.h"
19 #include "jit/MacroAssembler.h"
21 namespace v8 {
22 namespace internal {
24 struct FrameData {
25 // Character position at the start of the input, stored as a
26 // negative offset from the end of the string (input_end_pointer_).
27 size_t inputStart;
29 // The backtrack_stack_pointer_ register points to the top of the stack.
30 // This points to the bottom of the backtrack stack.
31 void* backtrackStackBase;
33 // Copy of the input MatchPairs.
34 int32_t* matches; // pointer to capture array
35 int32_t numMatches; // size of capture array
38 class SMRegExpMacroAssembler final : public NativeRegExpMacroAssembler {
39 public:
40 SMRegExpMacroAssembler(JSContext* cx, js::jit::StackMacroAssembler& masm,
41 Zone* zone, Mode mode, uint32_t num_capture_registers);
42 virtual ~SMRegExpMacroAssembler() = default;
44 virtual int stack_limit_slack();
45 virtual IrregexpImplementation Implementation();
47 virtual bool Succeed();
48 virtual void Fail();
50 virtual void AdvanceCurrentPosition(int by);
51 virtual void PopCurrentPosition();
52 virtual void PushCurrentPosition();
53 virtual void SetCurrentPositionFromEnd(int by);
55 virtual void Backtrack();
56 virtual void Bind(Label* label);
57 virtual void GoTo(Label* label);
58 virtual void PushBacktrack(Label* label);
60 virtual void CheckCharacter(uint32_t c, Label* on_equal);
61 virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
62 virtual void CheckCharacterGT(base::uc16 limit, Label* on_greater);
63 virtual void CheckCharacterLT(base::uc16 limit, Label* on_less);
64 virtual void CheckCharacterAfterAnd(uint32_t c, uint32_t mask,
65 Label* on_equal);
66 virtual void CheckNotCharacterAfterAnd(uint32_t c, uint32_t mask,
67 Label* on_not_equal);
68 virtual void CheckNotCharacterAfterMinusAnd(base::uc16 c, base::uc16 minus,
69 base::uc16 mask,
70 Label* on_not_equal);
71 virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
72 virtual void CheckCharacterInRange(base::uc16 from, base::uc16 to,
73 Label* on_in_range);
74 virtual void CheckCharacterNotInRange(base::uc16 from, base::uc16 to,
75 Label* on_not_in_range);
76 virtual bool CheckCharacterInRangeArray(
77 const ZoneList<CharacterRange>* ranges, Label* on_in_range);
78 virtual bool CheckCharacterNotInRangeArray(
79 const ZoneList<CharacterRange>* ranges, Label* on_not_in_range);
80 virtual void CheckAtStart(int cp_offset, Label* on_at_start);
81 virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
82 virtual void CheckPosition(int cp_offset, Label* on_outside_input);
83 virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
84 virtual bool CheckSpecialCharacterClass(StandardCharacterSet type,
85 Label* on_no_match);
86 virtual void CheckNotBackReference(int start_reg, bool read_backward,
87 Label* on_no_match);
88 virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
89 bool read_backward, bool unicode,
90 Label* on_no_match);
92 virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
93 bool check_bounds, int characters,
94 int eats_at_least);
96 virtual void AdvanceRegister(int reg, int by);
97 virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
98 virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
99 virtual void IfRegisterEqPos(int reg, Label* if_eq);
100 virtual void PopRegister(int register_index);
101 virtual void PushRegister(int register_index,
102 StackCheckFlag check_stack_limit);
103 virtual void ReadCurrentPositionFromRegister(int reg);
104 virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
105 virtual void ReadStackPointerFromRegister(int reg);
106 virtual void WriteStackPointerToRegister(int reg);
107 virtual void SetRegister(int register_index, int to);
108 virtual void ClearRegisters(int reg_from, int reg_to);
110 virtual Handle<HeapObject> GetCode(Handle<String> source);
112 virtual bool CanReadUnaligned() const;
114 private:
115 size_t frameSize_ = 0;
117 void createStackFrame();
118 void initFrameAndRegs();
119 void successHandler();
120 void exitHandler();
121 void backtrackHandler();
122 void stackOverflowHandler();
124 // Push a register on the backtrack stack.
125 void Push(js::jit::Register value);
127 // Pop a value from the backtrack stack.
128 void Pop(js::jit::Register target);
130 void CheckAtStartImpl(int cp_offset, Label* on_cond,
131 js::jit::Assembler::Condition cond);
132 void CheckCharacterImpl(js::jit::Imm32 c, Label* on_cond,
133 js::jit::Assembler::Condition cond);
134 void CheckCharacterAfterAndImpl(uint32_t c, uint32_t and_with, Label* on_cond,
135 bool negate);
136 void CheckCharacterInRangeImpl(base::uc16 from, base::uc16 to, Label* on_cond,
137 js::jit::Assembler::Condition cond);
138 void CheckNotBackReferenceImpl(int start_reg, bool read_backward,
139 bool unicode, Label* on_no_match,
140 bool ignore_case);
141 void CallIsCharacterInRangeArray(const ZoneList<CharacterRange>* ranges);
143 void LoadCurrentCharacterUnchecked(int cp_offset, int characters);
145 void JumpOrBacktrack(Label* to);
147 // MacroAssembler methods that take a Label can be called with a
148 // null label, which means that we should backtrack if we would jump
149 // to that label. This is a helper to avoid writing out the same
150 // logic a dozen times.
151 inline js::jit::Label* LabelOrBacktrack(Label* to) {
152 return to ? to->inner() : &backtrack_label_;
155 void CheckBacktrackStackLimit();
157 public:
158 static bool GrowBacktrackStack(RegExpStack* regexp_stack);
160 static uint32_t CaseInsensitiveCompareNonUnicode(const char16_t* substring1,
161 const char16_t* substring2,
162 size_t byteLength);
163 static uint32_t CaseInsensitiveCompareUnicode(const char16_t* substring1,
164 const char16_t* substring2,
165 size_t byteLength);
166 static bool IsCharacterInRangeArray(uint32_t c, ByteArrayData* ranges);
168 private:
169 inline int char_size() { return static_cast<int>(mode_); }
170 inline js::jit::Scale factor() {
171 return mode_ == UC16 ? js::jit::TimesTwo : js::jit::TimesOne;
174 js::jit::Address inputStart() {
175 return js::jit::Address(masm_.getStackPointer(),
176 offsetof(FrameData, inputStart));
178 js::jit::Address backtrackStackBase() {
179 return js::jit::Address(masm_.getStackPointer(),
180 offsetof(FrameData, backtrackStackBase));
182 js::jit::Address matches() {
183 return js::jit::Address(masm_.getStackPointer(),
184 offsetof(FrameData, matches));
186 js::jit::Address numMatches() {
187 return js::jit::Address(masm_.getStackPointer(),
188 offsetof(FrameData, numMatches));
191 // The stack-pointer-relative location of a regexp register.
192 js::jit::Address register_location(int register_index) {
193 return js::jit::Address(masm_.getStackPointer(),
194 register_offset(register_index));
197 int32_t register_offset(int register_index) {
198 MOZ_ASSERT(register_index >= 0 && register_index <= kMaxRegister);
199 if (num_registers_ <= register_index) {
200 num_registers_ = register_index + 1;
202 static_assert(alignof(uintptr_t) <= alignof(FrameData));
203 return sizeof(FrameData) + register_index * sizeof(uintptr_t*);
206 JSContext* cx_;
207 js::jit::StackMacroAssembler& masm_;
210 * This assembler uses the following registers:
212 * - current_character_:
213 * Contains the character (or characters) currently being examined.
214 * Must be loaded using LoadCurrentCharacter before using any of the
215 * dispatch methods. After a matching pass for a global regexp,
216 * temporarily stores the index of capture start.
217 * - current_position_:
218 * Current position in input *as negative byte offset from end of string*.
219 * - input_end_pointer_:
220 * Points to byte after last character in the input. current_position_ is
221 * relative to this.
222 * - backtrack_stack_pointer_:
223 * Points to tip of the (heap-allocated) backtrack stack. The stack grows
224 * downward (like the native stack).
225 * - temp0_, temp1_, temp2_:
226 * Scratch registers.
228 * The native stack pointer is used to access arguments (InputOutputData),
229 * local variables (FrameData), and irregexp's internal virtual registers
230 * (see register_location).
233 js::jit::Register current_character_;
234 js::jit::Register current_position_;
235 js::jit::Register input_end_pointer_;
236 js::jit::Register backtrack_stack_pointer_;
237 js::jit::Register temp0_, temp1_, temp2_;
239 // These labels are used in various API calls and bound (if used) in
240 // GetCode. If we abort in the middle of a compilation, as may
241 // happen if a regexp is too big, they may be used but not
242 // bound.
243 js::jit::NonAssertingLabel entry_label_;
244 js::jit::NonAssertingLabel start_label_;
245 js::jit::NonAssertingLabel backtrack_label_;
246 js::jit::NonAssertingLabel success_label_;
247 js::jit::NonAssertingLabel exit_label_;
248 js::jit::NonAssertingLabel stack_overflow_label_;
249 js::jit::NonAssertingLabel exit_with_exception_label_;
251 // When we generate the code to push a backtrack label's address
252 // onto the backtrack stack, we don't know its final address. We
253 // have to patch it after linking. This is slightly delicate, as the
254 // Label itself (which is allocated on the stack) may not exist by
255 // the time we link. The approach is as follows:
257 // 1. When we push a label on the backtrack stack (PushBacktrack),
258 // we bind the label's patchOffset_ field to the offset within
259 // the code that should be overwritten. This works because each
260 // label is only pushed by a single instruction.
262 // 2. When we bind a label (Bind), we check to see if it has a
263 // bound patchOffset_. If it does, we create a LabelPatch mapping
264 // its patch offset to the offset of the label itself.
266 // 3. While linking the code, we walk the list of label patches
267 // and patch the code accordingly.
268 class LabelPatch {
269 public:
270 LabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset)
271 : patchOffset_(patchOffset), labelOffset_(labelOffset) {}
273 js::jit::CodeOffset patchOffset_;
274 size_t labelOffset_ = 0;
277 js::Vector<LabelPatch, 4, js::SystemAllocPolicy> labelPatches_;
278 void AddLabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset) {
279 js::AutoEnterOOMUnsafeRegion oomUnsafe;
280 if (!labelPatches_.emplaceBack(patchOffset, labelOffset)) {
281 oomUnsafe.crash("Irregexp label patch");
285 Mode mode_;
286 int num_registers_;
287 int num_capture_registers_;
288 js::jit::LiveGeneralRegisterSet savedRegisters_;
290 public:
291 using TableVector =
292 js::Vector<PseudoHandle<ByteArrayData>, 4, js::SystemAllocPolicy>;
293 TableVector& tables() { return tables_; }
295 private:
296 TableVector tables_;
297 void AddTable(PseudoHandle<ByteArrayData> table) {
298 js::AutoEnterOOMUnsafeRegion oomUnsafe;
299 if (!tables_.append(std::move(table))) {
300 oomUnsafe.crash("Irregexp table append");
305 } // namespace internal
306 } // namespace v8
308 #endif // RegexpMacroAssemblerArch_h