1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // Copyright 2020 the V8 project authors. All rights reserved.
8 // Use of this source code is governed by a BSD-style license that can be
9 // found in the LICENSE file.
11 // This file implements the NativeRegExpMacroAssembler interface for
12 // SpiderMonkey. It provides the same interface as each of V8's
13 // architecture-specific implementations.
15 #ifndef RegexpMacroAssemblerArch_h
16 #define RegexpMacroAssemblerArch_h
18 #include "irregexp/imported/regexp-macro-assembler.h"
19 #include "jit/MacroAssembler.h"
25 // Character position at the start of the input, stored as a
26 // negative offset from the end of the string (input_end_pointer_).
29 // The backtrack_stack_pointer_ register points to the top of the stack.
30 // This points to the bottom of the backtrack stack.
31 void* backtrackStackBase
;
33 // Copy of the input MatchPairs.
34 int32_t* matches
; // pointer to capture array
35 int32_t numMatches
; // size of capture array
38 class SMRegExpMacroAssembler final
: public NativeRegExpMacroAssembler
{
40 SMRegExpMacroAssembler(JSContext
* cx
, js::jit::StackMacroAssembler
& masm
,
41 Zone
* zone
, Mode mode
, uint32_t num_capture_registers
);
42 virtual ~SMRegExpMacroAssembler() = default;
44 virtual int stack_limit_slack();
45 virtual IrregexpImplementation
Implementation();
47 virtual bool Succeed();
50 virtual void AdvanceCurrentPosition(int by
);
51 virtual void PopCurrentPosition();
52 virtual void PushCurrentPosition();
53 virtual void SetCurrentPositionFromEnd(int by
);
55 virtual void Backtrack();
56 virtual void Bind(Label
* label
);
57 virtual void GoTo(Label
* label
);
58 virtual void PushBacktrack(Label
* label
);
60 virtual void CheckCharacter(uint32_t c
, Label
* on_equal
);
61 virtual void CheckNotCharacter(uint32_t c
, Label
* on_not_equal
);
62 virtual void CheckCharacterGT(base::uc16 limit
, Label
* on_greater
);
63 virtual void CheckCharacterLT(base::uc16 limit
, Label
* on_less
);
64 virtual void CheckCharacterAfterAnd(uint32_t c
, uint32_t mask
,
66 virtual void CheckNotCharacterAfterAnd(uint32_t c
, uint32_t mask
,
68 virtual void CheckNotCharacterAfterMinusAnd(base::uc16 c
, base::uc16 minus
,
71 virtual void CheckGreedyLoop(Label
* on_tos_equals_current_position
);
72 virtual void CheckCharacterInRange(base::uc16 from
, base::uc16 to
,
74 virtual void CheckCharacterNotInRange(base::uc16 from
, base::uc16 to
,
75 Label
* on_not_in_range
);
76 virtual bool CheckCharacterInRangeArray(
77 const ZoneList
<CharacterRange
>* ranges
, Label
* on_in_range
);
78 virtual bool CheckCharacterNotInRangeArray(
79 const ZoneList
<CharacterRange
>* ranges
, Label
* on_not_in_range
);
80 virtual void CheckAtStart(int cp_offset
, Label
* on_at_start
);
81 virtual void CheckNotAtStart(int cp_offset
, Label
* on_not_at_start
);
82 virtual void CheckPosition(int cp_offset
, Label
* on_outside_input
);
83 virtual void CheckBitInTable(Handle
<ByteArray
> table
, Label
* on_bit_set
);
84 virtual bool CheckSpecialCharacterClass(StandardCharacterSet type
,
86 virtual void CheckNotBackReference(int start_reg
, bool read_backward
,
88 virtual void CheckNotBackReferenceIgnoreCase(int start_reg
,
89 bool read_backward
, bool unicode
,
92 virtual void LoadCurrentCharacterImpl(int cp_offset
, Label
* on_end_of_input
,
93 bool check_bounds
, int characters
,
96 virtual void AdvanceRegister(int reg
, int by
);
97 virtual void IfRegisterGE(int reg
, int comparand
, Label
* if_ge
);
98 virtual void IfRegisterLT(int reg
, int comparand
, Label
* if_lt
);
99 virtual void IfRegisterEqPos(int reg
, Label
* if_eq
);
100 virtual void PopRegister(int register_index
);
101 virtual void PushRegister(int register_index
,
102 StackCheckFlag check_stack_limit
);
103 virtual void ReadCurrentPositionFromRegister(int reg
);
104 virtual void WriteCurrentPositionToRegister(int reg
, int cp_offset
);
105 virtual void ReadStackPointerFromRegister(int reg
);
106 virtual void WriteStackPointerToRegister(int reg
);
107 virtual void SetRegister(int register_index
, int to
);
108 virtual void ClearRegisters(int reg_from
, int reg_to
);
110 virtual Handle
<HeapObject
> GetCode(Handle
<String
> source
);
112 virtual bool CanReadUnaligned() const;
115 size_t frameSize_
= 0;
117 void createStackFrame();
118 void initFrameAndRegs();
119 void successHandler();
121 void backtrackHandler();
122 void stackOverflowHandler();
124 // Push a register on the backtrack stack.
125 void Push(js::jit::Register value
);
127 // Pop a value from the backtrack stack.
128 void Pop(js::jit::Register target
);
130 void CheckAtStartImpl(int cp_offset
, Label
* on_cond
,
131 js::jit::Assembler::Condition cond
);
132 void CheckCharacterImpl(js::jit::Imm32 c
, Label
* on_cond
,
133 js::jit::Assembler::Condition cond
);
134 void CheckCharacterAfterAndImpl(uint32_t c
, uint32_t and_with
, Label
* on_cond
,
136 void CheckCharacterInRangeImpl(base::uc16 from
, base::uc16 to
, Label
* on_cond
,
137 js::jit::Assembler::Condition cond
);
138 void CheckNotBackReferenceImpl(int start_reg
, bool read_backward
,
139 bool unicode
, Label
* on_no_match
,
141 void CallIsCharacterInRangeArray(const ZoneList
<CharacterRange
>* ranges
);
143 void LoadCurrentCharacterUnchecked(int cp_offset
, int characters
);
145 void JumpOrBacktrack(Label
* to
);
147 // MacroAssembler methods that take a Label can be called with a
148 // null label, which means that we should backtrack if we would jump
149 // to that label. This is a helper to avoid writing out the same
150 // logic a dozen times.
151 inline js::jit::Label
* LabelOrBacktrack(Label
* to
) {
152 return to
? to
->inner() : &backtrack_label_
;
155 void CheckBacktrackStackLimit();
158 static bool GrowBacktrackStack(RegExpStack
* regexp_stack
);
160 static uint32_t CaseInsensitiveCompareNonUnicode(const char16_t
* substring1
,
161 const char16_t
* substring2
,
163 static uint32_t CaseInsensitiveCompareUnicode(const char16_t
* substring1
,
164 const char16_t
* substring2
,
166 static bool IsCharacterInRangeArray(uint32_t c
, ByteArrayData
* ranges
);
169 inline int char_size() { return static_cast<int>(mode_
); }
170 inline js::jit::Scale
factor() {
171 return mode_
== UC16
? js::jit::TimesTwo
: js::jit::TimesOne
;
174 js::jit::Address
inputStart() {
175 return js::jit::Address(masm_
.getStackPointer(),
176 offsetof(FrameData
, inputStart
));
178 js::jit::Address
backtrackStackBase() {
179 return js::jit::Address(masm_
.getStackPointer(),
180 offsetof(FrameData
, backtrackStackBase
));
182 js::jit::Address
matches() {
183 return js::jit::Address(masm_
.getStackPointer(),
184 offsetof(FrameData
, matches
));
186 js::jit::Address
numMatches() {
187 return js::jit::Address(masm_
.getStackPointer(),
188 offsetof(FrameData
, numMatches
));
191 // The stack-pointer-relative location of a regexp register.
192 js::jit::Address
register_location(int register_index
) {
193 return js::jit::Address(masm_
.getStackPointer(),
194 register_offset(register_index
));
197 int32_t register_offset(int register_index
) {
198 MOZ_ASSERT(register_index
>= 0 && register_index
<= kMaxRegister
);
199 if (num_registers_
<= register_index
) {
200 num_registers_
= register_index
+ 1;
202 static_assert(alignof(uintptr_t) <= alignof(FrameData
));
203 return sizeof(FrameData
) + register_index
* sizeof(uintptr_t*);
207 js::jit::StackMacroAssembler
& masm_
;
210 * This assembler uses the following registers:
212 * - current_character_:
213 * Contains the character (or characters) currently being examined.
214 * Must be loaded using LoadCurrentCharacter before using any of the
215 * dispatch methods. After a matching pass for a global regexp,
216 * temporarily stores the index of capture start.
217 * - current_position_:
218 * Current position in input *as negative byte offset from end of string*.
219 * - input_end_pointer_:
220 * Points to byte after last character in the input. current_position_ is
222 * - backtrack_stack_pointer_:
223 * Points to tip of the (heap-allocated) backtrack stack. The stack grows
224 * downward (like the native stack).
225 * - temp0_, temp1_, temp2_:
228 * The native stack pointer is used to access arguments (InputOutputData),
229 * local variables (FrameData), and irregexp's internal virtual registers
230 * (see register_location).
233 js::jit::Register current_character_
;
234 js::jit::Register current_position_
;
235 js::jit::Register input_end_pointer_
;
236 js::jit::Register backtrack_stack_pointer_
;
237 js::jit::Register temp0_
, temp1_
, temp2_
;
239 // These labels are used in various API calls and bound (if used) in
240 // GetCode. If we abort in the middle of a compilation, as may
241 // happen if a regexp is too big, they may be used but not
243 js::jit::NonAssertingLabel entry_label_
;
244 js::jit::NonAssertingLabel start_label_
;
245 js::jit::NonAssertingLabel backtrack_label_
;
246 js::jit::NonAssertingLabel success_label_
;
247 js::jit::NonAssertingLabel exit_label_
;
248 js::jit::NonAssertingLabel stack_overflow_label_
;
249 js::jit::NonAssertingLabel exit_with_exception_label_
;
251 // When we generate the code to push a backtrack label's address
252 // onto the backtrack stack, we don't know its final address. We
253 // have to patch it after linking. This is slightly delicate, as the
254 // Label itself (which is allocated on the stack) may not exist by
255 // the time we link. The approach is as follows:
257 // 1. When we push a label on the backtrack stack (PushBacktrack),
258 // we bind the label's patchOffset_ field to the offset within
259 // the code that should be overwritten. This works because each
260 // label is only pushed by a single instruction.
262 // 2. When we bind a label (Bind), we check to see if it has a
263 // bound patchOffset_. If it does, we create a LabelPatch mapping
264 // its patch offset to the offset of the label itself.
266 // 3. While linking the code, we walk the list of label patches
267 // and patch the code accordingly.
270 LabelPatch(js::jit::CodeOffset patchOffset
, size_t labelOffset
)
271 : patchOffset_(patchOffset
), labelOffset_(labelOffset
) {}
273 js::jit::CodeOffset patchOffset_
;
274 size_t labelOffset_
= 0;
277 js::Vector
<LabelPatch
, 4, js::SystemAllocPolicy
> labelPatches_
;
278 void AddLabelPatch(js::jit::CodeOffset patchOffset
, size_t labelOffset
) {
279 js::AutoEnterOOMUnsafeRegion oomUnsafe
;
280 if (!labelPatches_
.emplaceBack(patchOffset
, labelOffset
)) {
281 oomUnsafe
.crash("Irregexp label patch");
287 int num_capture_registers_
;
288 js::jit::LiveGeneralRegisterSet savedRegisters_
;
292 js::Vector
<PseudoHandle
<ByteArrayData
>, 4, js::SystemAllocPolicy
>;
293 TableVector
& tables() { return tables_
; }
297 void AddTable(PseudoHandle
<ByteArrayData
> table
) {
298 js::AutoEnterOOMUnsafeRegion oomUnsafe
;
299 if (!tables_
.append(std::move(table
))) {
300 oomUnsafe
.crash("Irregexp table append");
305 } // namespace internal
308 #endif // RegexpMacroAssemblerArch_h