1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // Copyright 2020 the V8 project authors. All rights reserved.
8 // Use of this source code is governed by a BSD-style license that can be
9 // found in the LICENSE file.
11 #include "irregexp/imported/regexp-macro-assembler-arch.h"
12 #include "irregexp/imported/regexp-stack.h"
13 #include "irregexp/imported/special-case.h"
14 #include "jit/Linker.h"
15 #include "jit/PerfSpewer.h"
16 #include "vm/MatchPairs.h"
19 # include "vtune/VTuneWrapper.h"
22 #include "jit/ABIFunctionList-inl.h"
23 #include "jit/MacroAssembler-inl.h"
29 using js::jit::AbsoluteAddress
;
30 using js::jit::Address
;
31 using js::jit::AllocatableGeneralRegisterSet
;
32 using js::jit::Assembler
;
33 using js::jit::BaseIndex
;
34 using js::jit::CodeLocationLabel
;
35 using js::jit::GeneralRegisterBackwardIterator
;
36 using js::jit::GeneralRegisterForwardIterator
;
37 using js::jit::GeneralRegisterSet
;
39 using js::jit::ImmPtr
;
40 using js::jit::ImmWord
;
41 using js::jit::JitCode
;
42 using js::jit::Linker
;
43 using js::jit::LiveGeneralRegisterSet
;
44 using js::jit::Register
;
45 using js::jit::Registers
;
46 using js::jit::StackMacroAssembler
;
48 SMRegExpMacroAssembler::SMRegExpMacroAssembler(JSContext
* cx
,
49 StackMacroAssembler
& masm
,
50 Zone
* zone
, Mode mode
,
51 uint32_t num_capture_registers
)
52 : NativeRegExpMacroAssembler(cx
->isolate
.ref(), zone
),
56 num_registers_(num_capture_registers
),
57 num_capture_registers_(num_capture_registers
) {
58 // Each capture has a start and an end register
59 MOZ_ASSERT(num_capture_registers_
% 2 == 0);
61 AllocatableGeneralRegisterSet
regs(GeneralRegisterSet::All());
63 input_end_pointer_
= regs
.takeAny();
64 current_character_
= regs
.takeAny();
65 current_position_
= regs
.takeAny();
66 backtrack_stack_pointer_
= regs
.takeAny();
67 temp0_
= regs
.takeAny();
68 temp1_
= regs
.takeAny();
70 // Not enough registers on x86.
71 temp2_
= regs
.takeAny();
73 savedRegisters_
= js::jit::SavedNonVolatileRegisters(regs
);
75 masm_
.jump(&entry_label_
); // We'll generate the entry code later
76 masm_
.bind(&start_label_
); // and continue from here.
79 int SMRegExpMacroAssembler::stack_limit_slack() {
80 return RegExpStack::kStackLimitSlack
;
83 void SMRegExpMacroAssembler::AdvanceCurrentPosition(int by
) {
85 masm_
.addPtr(Imm32(by
* char_size()), current_position_
);
89 void SMRegExpMacroAssembler::AdvanceRegister(int reg
, int by
) {
90 MOZ_ASSERT(reg
>= 0 && reg
< num_registers_
);
92 masm_
.addPtr(Imm32(by
), register_location(reg
));
96 void SMRegExpMacroAssembler::Backtrack() {
98 js::jit::Label bailOut
;
99 // Check for simulating interrupt
100 masm_
.branch32(Assembler::NotEqual
,
101 AbsoluteAddress(&cx_
->isolate
->shouldSimulateInterrupt_
),
104 // Check for an interrupt. We have to restart from the beginning if we
105 // are interrupted, so we only check for urgent interrupts.
106 js::jit::Label noInterrupt
;
108 Assembler::Zero
, AbsoluteAddress(cx_
->addressOfInterruptBits()),
109 Imm32(uint32_t(js::InterruptReason::CallbackUrgent
)), &noInterrupt
);
111 // bailing out if we have simulating interrupt flag set
112 masm_
.bind(&bailOut
);
114 masm_
.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Error
)), temp0_
);
115 masm_
.jump(&exit_label_
);
116 masm_
.bind(&noInterrupt
);
118 // Pop code location from backtrack stack and jump to location.
123 void SMRegExpMacroAssembler::Bind(Label
* label
) {
124 masm_
.bind(label
->inner());
125 if (label
->patchOffset_
.bound()) {
126 AddLabelPatch(label
->patchOffset_
, label
->pos());
130 // Check if current_position + cp_offset is the input start
131 void SMRegExpMacroAssembler::CheckAtStartImpl(int cp_offset
, Label
* on_cond
,
132 Assembler::Condition cond
) {
133 Address
addr(current_position_
, cp_offset
* char_size());
134 masm_
.computeEffectiveAddress(addr
, temp0_
);
136 masm_
.branchPtr(cond
, inputStart(), temp0_
, LabelOrBacktrack(on_cond
));
139 void SMRegExpMacroAssembler::CheckAtStart(int cp_offset
, Label
* on_at_start
) {
140 CheckAtStartImpl(cp_offset
, on_at_start
, Assembler::Equal
);
143 void SMRegExpMacroAssembler::CheckNotAtStart(int cp_offset
,
144 Label
* on_not_at_start
) {
145 CheckAtStartImpl(cp_offset
, on_not_at_start
, Assembler::NotEqual
);
148 void SMRegExpMacroAssembler::CheckCharacterImpl(Imm32 c
, Label
* on_cond
,
149 Assembler::Condition cond
) {
150 masm_
.branch32(cond
, current_character_
, c
, LabelOrBacktrack(on_cond
));
153 void SMRegExpMacroAssembler::CheckCharacter(uint32_t c
, Label
* on_equal
) {
154 CheckCharacterImpl(Imm32(c
), on_equal
, Assembler::Equal
);
157 void SMRegExpMacroAssembler::CheckNotCharacter(uint32_t c
,
158 Label
* on_not_equal
) {
159 CheckCharacterImpl(Imm32(c
), on_not_equal
, Assembler::NotEqual
);
162 void SMRegExpMacroAssembler::CheckCharacterGT(base::uc16 limit
,
164 CheckCharacterImpl(Imm32(limit
), on_greater
, Assembler::GreaterThan
);
167 void SMRegExpMacroAssembler::CheckCharacterLT(base::uc16 limit
,
169 CheckCharacterImpl(Imm32(limit
), on_less
, Assembler::LessThan
);
172 // Bitwise-and the current character with mask and then check for a
174 void SMRegExpMacroAssembler::CheckCharacterAfterAndImpl(uint32_t c
,
179 Assembler::Condition cond
= is_not
? Assembler::NonZero
: Assembler::Zero
;
180 masm_
.branchTest32(cond
, current_character_
, Imm32(mask
),
181 LabelOrBacktrack(on_cond
));
183 Assembler::Condition cond
= is_not
? Assembler::NotEqual
: Assembler::Equal
;
184 masm_
.move32(Imm32(mask
), temp0_
);
185 masm_
.and32(current_character_
, temp0_
);
186 masm_
.branch32(cond
, temp0_
, Imm32(c
), LabelOrBacktrack(on_cond
));
190 void SMRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c
, uint32_t mask
,
192 CheckCharacterAfterAndImpl(c
, mask
, on_equal
, /*is_not =*/false);
195 void SMRegExpMacroAssembler::CheckNotCharacterAfterAnd(uint32_t c
,
197 Label
* on_not_equal
) {
198 CheckCharacterAfterAndImpl(c
, mask
, on_not_equal
, /*is_not =*/true);
201 // Subtract minus from the current character, then bitwise-and the
202 // result with mask, then check for a match with c.
203 void SMRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(
204 base::uc16 c
, base::uc16 minus
, base::uc16 mask
, Label
* on_not_equal
) {
205 masm_
.computeEffectiveAddress(Address(current_character_
, -minus
), temp0_
);
207 masm_
.branchTest32(Assembler::NonZero
, temp0_
, Imm32(mask
),
208 LabelOrBacktrack(on_not_equal
));
210 masm_
.and32(Imm32(mask
), temp0_
);
211 masm_
.branch32(Assembler::NotEqual
, temp0_
, Imm32(c
),
212 LabelOrBacktrack(on_not_equal
));
216 // If the current position matches the position stored on top of the backtrack
217 // stack, pops the backtrack stack and branches to the given label.
218 void SMRegExpMacroAssembler::CheckGreedyLoop(Label
* on_equal
) {
219 js::jit::Label fallthrough
;
220 masm_
.branchPtr(Assembler::NotEqual
, Address(backtrack_stack_pointer_
, 0),
221 current_position_
, &fallthrough
);
222 masm_
.addPtr(Imm32(sizeof(void*)), backtrack_stack_pointer_
); // Pop.
223 JumpOrBacktrack(on_equal
);
224 masm_
.bind(&fallthrough
);
227 void SMRegExpMacroAssembler::CheckCharacterInRangeImpl(
228 base::uc16 from
, base::uc16 to
, Label
* on_cond
, Assembler::Condition cond
) {
229 // x is in [from,to] if unsigned(x - from) <= to - from
230 masm_
.computeEffectiveAddress(Address(current_character_
, -from
), temp0_
);
231 masm_
.branch32(cond
, temp0_
, Imm32(to
- from
), LabelOrBacktrack(on_cond
));
234 void SMRegExpMacroAssembler::CheckCharacterInRange(base::uc16 from
,
236 Label
* on_in_range
) {
237 CheckCharacterInRangeImpl(from
, to
, on_in_range
, Assembler::BelowOrEqual
);
240 void SMRegExpMacroAssembler::CheckCharacterNotInRange(base::uc16 from
,
242 Label
* on_not_in_range
) {
243 CheckCharacterInRangeImpl(from
, to
, on_not_in_range
, Assembler::Above
);
247 bool SMRegExpMacroAssembler::IsCharacterInRangeArray(uint32_t c
,
248 ByteArrayData
* ranges
) {
249 js::AutoUnsafeCallWithABI unsafe
;
250 MOZ_ASSERT(ranges
->length() % sizeof(uint16_t) == 0);
251 uint32_t length
= ranges
->length() / sizeof(uint16_t);
252 MOZ_ASSERT(length
> 0);
255 if (c
< ranges
->getTyped
<uint16_t>(0)) {
256 // |c| is lower than the start of the first range.
257 // It is not in the range array.
260 if (c
>= ranges
->getTyped
<uint16_t>(length
- 1)) {
261 // |c| is higher than the last entry. If the table contains an odd
262 // number of entries, the last range is open-ended, so |c| is in
263 // the range array iff |length| is odd.
264 return (length
% 2) != 0;
267 // |ranges| is stored as an interval list: an ordered list of
268 // starting points, where every even index marks the beginning of a
269 // range of characters that are included, and every odd index marks
270 // the beginning of a range of characters that are excluded. For
271 // example, the set [1,2,3,7,8,9] would be represented as the
272 // range array [1,4,7,10]. If |ranges| has an odd number of entries,
273 // the last included range is open-ended (so the set containing
274 // every character would be represented as [0]).
276 // Because of the symmetry between included and excluded ranges, we
277 // can do a binary search for the index in |ranges| with the value
278 // closest to but not exceeding |c|. If that index is even, |c| is
279 // in an included range. If that index is odd, |c| is in an excluded
282 uint32_t upper
= length
;
285 mid
= lower
+ (upper
- lower
) / 2;
286 const base::uc16 elem
= ranges
->getTyped
<uint16_t>(mid
);
289 } else if (c
> elem
) {
294 } while (lower
< upper
);
295 uint32_t rangeIndex
= c
< ranges
->getTyped
<uint16_t>(mid
) ? mid
- 1 : mid
;
297 // Included ranges start at even indices and end at odd indices.
298 return rangeIndex
% 2 == 0;
301 void SMRegExpMacroAssembler::CallIsCharacterInRangeArray(
302 const ZoneList
<CharacterRange
>* ranges
) {
303 Handle
<ByteArray
> rangeArray
= GetOrAddRangeArray(ranges
);
304 masm_
.movePtr(ImmPtr(rangeArray
->inner()), temp0_
);
306 // Save volatile regs. Temp regs don't need to be saved.
307 LiveGeneralRegisterSet
volatileRegs(GeneralRegisterSet::Volatile());
308 volatileRegs
.takeUnchecked(temp0_
);
309 volatileRegs
.takeUnchecked(temp1_
);
310 if (temp2_
!= js::jit::InvalidReg
) {
311 volatileRegs
.takeUnchecked(temp2_
);
313 masm_
.PushRegsInMask(volatileRegs
);
315 using Fn
= bool (*)(uint32_t, ByteArrayData
*);
316 masm_
.setupUnalignedABICall(temp1_
);
317 masm_
.passABIArg(current_character_
);
318 masm_
.passABIArg(temp0_
);
320 masm_
.callWithABI
<Fn
, ::js::irregexp::IsCharacterInRangeArray
>();
321 masm_
.storeCallBoolResult(temp1_
);
322 masm_
.PopRegsInMask(volatileRegs
);
324 // GetOrAddRangeArray caches previously seen range arrays to reduce
325 // memory usage, so this may not be the first time we've seen this
326 // range array. We only need to transfer ownership from the
327 // HandleScope to the |tables_| vector once.
328 PseudoHandle
<ByteArrayData
> rawRangeArray
=
329 rangeArray
->maybeTakeOwnership(isolate());
331 AddTable(std::move(rawRangeArray
));
335 bool SMRegExpMacroAssembler::CheckCharacterInRangeArray(
336 const ZoneList
<CharacterRange
>* ranges
, Label
* on_in_range
) {
337 CallIsCharacterInRangeArray(ranges
);
338 masm_
.branchTest32(Assembler::NonZero
, temp1_
, temp1_
,
339 LabelOrBacktrack(on_in_range
));
343 bool SMRegExpMacroAssembler::CheckCharacterNotInRangeArray(
344 const ZoneList
<CharacterRange
>* ranges
, Label
* on_not_in_range
) {
345 CallIsCharacterInRangeArray(ranges
);
346 masm_
.branchTest32(Assembler::Zero
, temp1_
, temp1_
,
347 LabelOrBacktrack(on_not_in_range
));
351 void SMRegExpMacroAssembler::CheckBitInTable(Handle
<ByteArray
> table
,
353 // Claim ownership of the ByteArray from the current HandleScope.
354 // ByteArrays are allocated on the C++ heap and are (eventually)
355 // owned by the RegExpShared.
356 PseudoHandle
<ByteArrayData
> rawTable
= table
->takeOwnership(isolate());
358 masm_
.movePtr(ImmPtr(rawTable
->data()), temp0_
);
360 masm_
.move32(Imm32(kTableMask
), temp1_
);
361 masm_
.and32(current_character_
, temp1_
);
363 masm_
.load8ZeroExtend(BaseIndex(temp0_
, temp1_
, js::jit::TimesOne
), temp0_
);
364 masm_
.branchTest32(Assembler::NonZero
, temp0_
, temp0_
,
365 LabelOrBacktrack(on_bit_set
));
367 // Transfer ownership of |rawTable| to the |tables_| vector.
368 AddTable(std::move(rawTable
));
371 void SMRegExpMacroAssembler::CheckNotBackReferenceImpl(int start_reg
,
376 js::jit::Label fallthrough
;
378 // Captures are stored as a sequential pair of registers.
379 // Find the length of the back-referenced capture and load the
380 // capture's start index into current_character_.
381 masm_
.loadPtr(register_location(start_reg
), // index of start
383 masm_
.loadPtr(register_location(start_reg
+ 1), temp0_
); // index of end
384 masm_
.subPtr(current_character_
, temp0_
); // length of capture
386 // Capture registers are either both set or both cleared.
387 // If the capture length is zero, then the capture is either empty or cleared.
388 // Fall through in both cases.
389 masm_
.branchPtr(Assembler::Equal
, temp0_
, ImmWord(0), &fallthrough
);
391 // Check that there are sufficient characters left in the input.
393 // If start + len > current, there isn't enough room for a
394 // lookbehind backreference.
395 masm_
.loadPtr(inputStart(), temp1_
);
396 masm_
.addPtr(temp0_
, temp1_
);
397 masm_
.branchPtr(Assembler::GreaterThan
, temp1_
, current_position_
,
398 LabelOrBacktrack(on_no_match
));
400 // current_position_ is the negative offset from the end.
401 // If current + len > 0, there isn't enough room for a backreference.
402 masm_
.movePtr(current_position_
, temp1_
);
403 masm_
.addPtr(temp0_
, temp1_
);
404 masm_
.branchPtr(Assembler::GreaterThan
, temp1_
, ImmWord(0),
405 LabelOrBacktrack(on_no_match
));
408 if (mode_
== UC16
&& ignore_case
) {
409 // We call a helper function for case-insensitive non-latin1 strings.
411 // Save volatile regs. temp1_, temp2_, and current_character_
412 // don't need to be saved. current_position_ needs to be saved
413 // even if it's non-volatile, because we modify it to use as an argument.
414 LiveGeneralRegisterSet
volatileRegs(GeneralRegisterSet::Volatile());
415 volatileRegs
.addUnchecked(current_position_
);
416 volatileRegs
.takeUnchecked(temp1_
);
417 if (temp2_
!= js::jit::InvalidReg
) {
418 volatileRegs
.takeUnchecked(temp2_
);
420 volatileRegs
.takeUnchecked(current_character_
);
421 masm_
.PushRegsInMask(volatileRegs
);
424 // Address captured - Address of captured substring's start.
425 // Address current - Address of current character position.
426 // size_t byte_length - length of capture (in bytes)
428 // Compute |captured|
429 masm_
.addPtr(input_end_pointer_
, current_character_
);
432 masm_
.addPtr(input_end_pointer_
, current_position_
);
434 // Offset by length when matching backwards.
435 masm_
.subPtr(temp0_
, current_position_
);
438 using Fn
= uint32_t (*)(const char16_t
*, const char16_t
*, size_t);
439 masm_
.setupUnalignedABICall(temp1_
);
440 masm_
.passABIArg(current_character_
);
441 masm_
.passABIArg(current_position_
);
442 masm_
.passABIArg(temp0_
);
445 masm_
.callWithABI
<Fn
, ::js::irregexp::CaseInsensitiveCompareUnicode
>();
447 masm_
.callWithABI
<Fn
, ::js::irregexp::CaseInsensitiveCompareNonUnicode
>();
449 masm_
.storeCallInt32Result(temp1_
);
450 masm_
.PopRegsInMask(volatileRegs
);
451 masm_
.branchTest32(Assembler::Zero
, temp1_
, temp1_
,
452 LabelOrBacktrack(on_no_match
));
454 // On success, advance position by length of capture
456 masm_
.subPtr(temp0_
, current_position_
);
458 masm_
.addPtr(temp0_
, current_position_
);
461 masm_
.bind(&fallthrough
);
465 // We will be modifying current_position_. Save it in case the match fails.
466 masm_
.push(current_position_
);
468 // Compute start of capture string
469 masm_
.addPtr(input_end_pointer_
, current_character_
);
471 // Compute start of match string
472 masm_
.addPtr(input_end_pointer_
, current_position_
);
474 // Offset by length when matching backwards.
475 masm_
.subPtr(temp0_
, current_position_
);
478 // Compute end of match string
479 masm_
.addPtr(current_position_
, temp0_
);
481 Register nextCaptureChar
= temp1_
;
482 Register nextMatchChar
= temp2_
;
484 if (temp2_
== js::jit::InvalidReg
) {
485 masm_
.push(backtrack_stack_pointer_
);
486 nextMatchChar
= backtrack_stack_pointer_
;
489 js::jit::Label success
;
494 // Load next character from each string.
495 if (mode_
== LATIN1
) {
496 masm_
.load8ZeroExtend(Address(current_character_
, 0), nextCaptureChar
);
497 masm_
.load8ZeroExtend(Address(current_position_
, 0), nextMatchChar
);
499 masm_
.load16ZeroExtend(Address(current_character_
, 0), nextCaptureChar
);
500 masm_
.load16ZeroExtend(Address(current_position_
, 0), nextMatchChar
);
504 MOZ_ASSERT(mode_
== LATIN1
);
506 js::jit::Label loop_increment
;
507 masm_
.branch32(Assembler::Equal
, nextCaptureChar
, nextMatchChar
,
510 // Mismatch. Try case-insensitive match.
511 // Force the capture character to lower case (by setting bit 0x20)
512 // then check to see if it is a letter.
513 js::jit::Label convert_match
;
514 masm_
.or32(Imm32(0x20), nextCaptureChar
);
516 // Check if it is in [a,z].
517 masm_
.computeEffectiveAddress(Address(nextCaptureChar
, -'a'),
519 masm_
.branch32(Assembler::BelowOrEqual
, nextMatchChar
, Imm32('z' - 'a'),
521 // Check for values in range [224,254].
522 // Exclude 247 (U+00F7 DIVISION SIGN).
523 masm_
.sub32(Imm32(224 - 'a'), nextMatchChar
);
524 masm_
.branch32(Assembler::Above
, nextMatchChar
, Imm32(254 - 224), &fail
);
525 masm_
.branch32(Assembler::Equal
, nextMatchChar
, Imm32(247 - 224), &fail
);
527 // Capture character is lower case. Convert match character
528 // to lower case and compare.
529 masm_
.bind(&convert_match
);
530 masm_
.load8ZeroExtend(Address(current_position_
, 0), nextMatchChar
);
531 masm_
.or32(Imm32(0x20), nextMatchChar
);
532 masm_
.branch32(Assembler::NotEqual
, nextCaptureChar
, nextMatchChar
, &fail
);
534 masm_
.bind(&loop_increment
);
536 // Fail if characters do not match.
537 masm_
.branch32(Assembler::NotEqual
, nextCaptureChar
, nextMatchChar
, &fail
);
540 // Increment pointers into match and capture strings.
541 masm_
.addPtr(Imm32(char_size()), current_character_
);
542 masm_
.addPtr(Imm32(char_size()), current_position_
);
544 // Loop if we have not reached the end of the match string.
545 masm_
.branchPtr(Assembler::Below
, current_position_
, temp0_
, &loop
);
546 masm_
.jump(&success
);
548 // If we fail, restore current_position_ and branch.
550 if (temp2_
== js::jit::InvalidReg
) {
551 // Restore backtrack_stack_pointer_ when it was used as a temp register.
552 masm_
.pop(backtrack_stack_pointer_
);
554 masm_
.pop(current_position_
);
555 JumpOrBacktrack(on_no_match
);
557 masm_
.bind(&success
);
559 if (temp2_
== js::jit::InvalidReg
) {
560 // Restore backtrack_stack_pointer_ when it was used as a temp register.
561 masm_
.pop(backtrack_stack_pointer_
);
563 // Drop saved value of current_position_
564 masm_
.addToStackPtr(Imm32(sizeof(uintptr_t)));
566 // current_position_ is a pointer. Convert it back to an offset.
567 masm_
.subPtr(input_end_pointer_
, current_position_
);
569 // Subtract match length if we matched backward
570 masm_
.addPtr(register_location(start_reg
), current_position_
);
571 masm_
.subPtr(register_location(start_reg
+ 1), current_position_
);
574 masm_
.bind(&fallthrough
);
577 // Branch if a back-reference does not match a previous capture.
578 void SMRegExpMacroAssembler::CheckNotBackReference(int start_reg
,
580 Label
* on_no_match
) {
581 CheckNotBackReferenceImpl(start_reg
, read_backward
, /*unicode = */ false,
582 on_no_match
, /*ignore_case = */ false);
585 void SMRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
586 int start_reg
, bool read_backward
, bool unicode
, Label
* on_no_match
) {
587 CheckNotBackReferenceImpl(start_reg
, read_backward
, unicode
, on_no_match
,
588 /*ignore_case = */ true);
591 // Checks whether the given offset from the current position is
592 // inside the input string.
593 void SMRegExpMacroAssembler::CheckPosition(int cp_offset
,
594 Label
* on_outside_input
) {
595 // Note: current_position_ is a (negative) byte offset relative to
596 // the end of the input string.
597 if (cp_offset
>= 0) {
598 // end + current + offset >= end
599 // <=> current + offset >= 0
600 // <=> current >= -offset
601 masm_
.branchPtr(Assembler::GreaterThanOrEqual
, current_position_
,
602 ImmWord(-cp_offset
* char_size()),
603 LabelOrBacktrack(on_outside_input
));
605 // Compute offset position
606 masm_
.computeEffectiveAddress(
607 Address(current_position_
, cp_offset
* char_size()), temp0_
);
609 // Compare to start of input.
610 masm_
.branchPtr(Assembler::GreaterThan
, inputStart(), temp0_
,
611 LabelOrBacktrack(on_outside_input
));
615 // This function attempts to generate special case code for character classes.
616 // Returns true if a special case is generated.
617 // Otherwise returns false and generates no code.
618 bool SMRegExpMacroAssembler::CheckSpecialCharacterClass(
619 StandardCharacterSet type
, Label
* on_no_match
) {
620 js::jit::Label
* no_match
= LabelOrBacktrack(on_no_match
);
622 // Note: throughout this function, range checks (c in [min, max])
623 // are implemented by an unsigned (c - min) <= (max - min) check.
625 case StandardCharacterSet::kWhitespace
: {
626 // Match space-characters
627 if (mode_
!= LATIN1
) {
630 js::jit::Label success
;
631 // One byte space characters are ' ', '\t'..'\r', and '\u00a0' (NBSP).
634 masm_
.branch32(Assembler::Equal
, current_character_
, Imm32(' '),
638 masm_
.computeEffectiveAddress(Address(current_character_
, -'\t'), temp0_
);
639 masm_
.branch32(Assembler::BelowOrEqual
, temp0_
, Imm32('\r' - '\t'),
643 masm_
.branch32(Assembler::NotEqual
, temp0_
, Imm32(0x00a0 - '\t'),
646 masm_
.bind(&success
);
649 case StandardCharacterSet::kNotWhitespace
:
650 // The emitted code for generic character classes is good enough.
652 case StandardCharacterSet::kDigit
:
653 // Match latin1 digits ('0'-'9')
654 masm_
.computeEffectiveAddress(Address(current_character_
, -'0'), temp0_
);
655 masm_
.branch32(Assembler::Above
, temp0_
, Imm32('9' - '0'), no_match
);
657 case StandardCharacterSet::kNotDigit
:
658 // Match anything except latin1 digits ('0'-'9')
659 masm_
.computeEffectiveAddress(Address(current_character_
, -'0'), temp0_
);
660 masm_
.branch32(Assembler::BelowOrEqual
, temp0_
, Imm32('9' - '0'),
663 case StandardCharacterSet::kNotLineTerminator
:
664 // Match non-newlines. This excludes '\n' (0x0a), '\r' (0x0d),
665 // U+2028 LINE SEPARATOR, and U+2029 PARAGRAPH SEPARATOR.
666 // See https://tc39.es/ecma262/#prod-LineTerminator
668 // To test for 0x0a and 0x0d efficiently, we XOR the input with 1.
669 // This converts 0x0a to 0x0b, and 0x0d to 0x0c, allowing us to
670 // test for the contiguous range 0x0b..0x0c.
671 masm_
.move32(current_character_
, temp0_
);
672 masm_
.xor32(Imm32(0x01), temp0_
);
673 masm_
.sub32(Imm32(0x0b), temp0_
);
674 masm_
.branch32(Assembler::BelowOrEqual
, temp0_
, Imm32(0x0c - 0x0b),
678 // Compare original value to 0x2028 and 0x2029, using the already
679 // computed (current_char ^ 0x01 - 0x0b). I.e., check for
680 // 0x201d (0x2028 - 0x0b) or 0x201e.
681 masm_
.sub32(Imm32(0x2028 - 0x0b), temp0_
);
682 masm_
.branch32(Assembler::BelowOrEqual
, temp0_
, Imm32(0x2029 - 0x2028),
686 case StandardCharacterSet::kWord
:
687 // \w matches the set of 63 characters defined in Runtime Semantics:
688 // WordCharacters. We use a static lookup table, which is defined in
689 // regexp-macro-assembler.cc.
690 // Note: if both Unicode and IgnoreCase are true, \w matches a
691 // larger set of characters. That case is handled elsewhere.
692 if (mode_
!= LATIN1
) {
693 masm_
.branch32(Assembler::Above
, current_character_
, Imm32('z'),
696 static_assert(arraysize(word_character_map
) > unibrow::Latin1::kMaxChar
);
697 masm_
.movePtr(ImmPtr(word_character_map
), temp0_
);
698 masm_
.load8ZeroExtend(
699 BaseIndex(temp0_
, current_character_
, js::jit::TimesOne
), temp0_
);
700 masm_
.branchTest32(Assembler::Zero
, temp0_
, temp0_
, no_match
);
702 case StandardCharacterSet::kNotWord
: {
705 if (mode_
!= LATIN1
) {
706 masm_
.branch32(Assembler::Above
, current_character_
, Imm32('z'), &done
);
708 static_assert(arraysize(word_character_map
) > unibrow::Latin1::kMaxChar
);
709 masm_
.movePtr(ImmPtr(word_character_map
), temp0_
);
710 masm_
.load8ZeroExtend(
711 BaseIndex(temp0_
, current_character_
, js::jit::TimesOne
), temp0_
);
712 masm_
.branchTest32(Assembler::NonZero
, temp0_
, temp0_
, no_match
);
713 if (mode_
!= LATIN1
) {
718 ////////////////////////////////////////////////////////////////////////
719 // Non-standard classes (with no syntactic shorthand) used internally //
720 ////////////////////////////////////////////////////////////////////////
721 case StandardCharacterSet::kEverything
:
722 // Match any character
724 case StandardCharacterSet::kLineTerminator
:
725 // Match newlines. The opposite of '.'. See '.' above.
726 masm_
.move32(current_character_
, temp0_
);
727 masm_
.xor32(Imm32(0x01), temp0_
);
728 masm_
.sub32(Imm32(0x0b), temp0_
);
729 if (mode_
== LATIN1
) {
730 masm_
.branch32(Assembler::Above
, temp0_
, Imm32(0x0c - 0x0b), no_match
);
732 MOZ_ASSERT(mode_
== UC16
);
734 masm_
.branch32(Assembler::BelowOrEqual
, temp0_
, Imm32(0x0c - 0x0b),
737 // Compare original value to 0x2028 and 0x2029, using the already
738 // computed (current_char ^ 0x01 - 0x0b). I.e., check for
739 // 0x201d (0x2028 - 0x0b) or 0x201e.
740 masm_
.sub32(Imm32(0x2028 - 0x0b), temp0_
);
741 masm_
.branch32(Assembler::Above
, temp0_
, Imm32(0x2029 - 0x2028),
750 void SMRegExpMacroAssembler::Fail() {
751 masm_
.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Success_NotFound
)),
753 masm_
.jump(&exit_label_
);
756 void SMRegExpMacroAssembler::GoTo(Label
* to
) {
757 masm_
.jump(LabelOrBacktrack(to
));
760 void SMRegExpMacroAssembler::IfRegisterGE(int reg
, int comparand
,
762 masm_
.branchPtr(Assembler::GreaterThanOrEqual
, register_location(reg
),
763 ImmWord(comparand
), LabelOrBacktrack(if_ge
));
766 void SMRegExpMacroAssembler::IfRegisterLT(int reg
, int comparand
,
768 masm_
.branchPtr(Assembler::LessThan
, register_location(reg
),
769 ImmWord(comparand
), LabelOrBacktrack(if_lt
));
772 void SMRegExpMacroAssembler::IfRegisterEqPos(int reg
, Label
* if_eq
) {
773 masm_
.branchPtr(Assembler::Equal
, register_location(reg
), current_position_
,
774 LabelOrBacktrack(if_eq
));
777 // This is a word-for-word identical copy of the V8 code, which is
778 // duplicated in at least nine different places in V8 (one per
779 // supported architecture) with no differences outside of comments and
780 // formatting. It should be hoisted into the superclass. Once that is
781 // done upstream, this version can be deleted.
782 void SMRegExpMacroAssembler::LoadCurrentCharacterImpl(int cp_offset
,
783 Label
* on_end_of_input
,
787 // It's possible to preload a small number of characters when each success
788 // path requires a large number of characters, but not the reverse.
789 MOZ_ASSERT(eats_at_least
>= characters
);
790 MOZ_ASSERT(cp_offset
< (1 << 30)); // Be sane! (And ensure negation works)
793 if (cp_offset
>= 0) {
794 CheckPosition(cp_offset
+ eats_at_least
- 1, on_end_of_input
);
796 CheckPosition(cp_offset
, on_end_of_input
);
799 LoadCurrentCharacterUnchecked(cp_offset
, characters
);
802 // Load the character (or characters) at the specified offset from the
803 // current position. Zero-extend to 32 bits.
804 void SMRegExpMacroAssembler::LoadCurrentCharacterUnchecked(int cp_offset
,
806 BaseIndex
address(input_end_pointer_
, current_position_
, js::jit::TimesOne
,
807 cp_offset
* char_size());
808 if (mode_
== LATIN1
) {
809 if (characters
== 4) {
810 masm_
.load32(address
, current_character_
);
811 } else if (characters
== 2) {
812 masm_
.load16ZeroExtend(address
, current_character_
);
814 MOZ_ASSERT(characters
== 1);
815 masm_
.load8ZeroExtend(address
, current_character_
);
818 MOZ_ASSERT(mode_
== UC16
);
819 if (characters
== 2) {
820 masm_
.load32(address
, current_character_
);
822 MOZ_ASSERT(characters
== 1);
823 masm_
.load16ZeroExtend(address
, current_character_
);
828 void SMRegExpMacroAssembler::PopCurrentPosition() { Pop(current_position_
); }
830 void SMRegExpMacroAssembler::PopRegister(int register_index
) {
832 masm_
.storePtr(temp0_
, register_location(register_index
));
835 void SMRegExpMacroAssembler::PushBacktrack(Label
* label
) {
836 MOZ_ASSERT(!label
->is_bound());
837 MOZ_ASSERT(!label
->patchOffset_
.bound());
838 label
->patchOffset_
= masm_
.movWithPatch(ImmPtr(nullptr), temp0_
);
839 MOZ_ASSERT(label
->patchOffset_
.bound());
843 CheckBacktrackStackLimit();
846 void SMRegExpMacroAssembler::PushCurrentPosition() { Push(current_position_
); }
848 void SMRegExpMacroAssembler::PushRegister(int register_index
,
849 StackCheckFlag check_stack_limit
) {
850 masm_
.loadPtr(register_location(register_index
), temp0_
);
852 if (check_stack_limit
) {
853 CheckBacktrackStackLimit();
857 void SMRegExpMacroAssembler::ReadCurrentPositionFromRegister(int reg
) {
858 masm_
.loadPtr(register_location(reg
), current_position_
);
861 void SMRegExpMacroAssembler::WriteCurrentPositionToRegister(int reg
,
863 if (cp_offset
== 0) {
864 masm_
.storePtr(current_position_
, register_location(reg
));
866 Address
addr(current_position_
, cp_offset
* char_size());
867 masm_
.computeEffectiveAddress(addr
, temp0_
);
868 masm_
.storePtr(temp0_
, register_location(reg
));
872 // Note: The backtrack stack pointer is stored in a register as an
873 // offset from the stack top, not as a bare pointer, so that it is not
874 // corrupted if the backtrack stack grows (and therefore moves).
875 void SMRegExpMacroAssembler::ReadStackPointerFromRegister(int reg
) {
876 masm_
.loadPtr(register_location(reg
), backtrack_stack_pointer_
);
877 masm_
.addPtr(backtrackStackBase(), backtrack_stack_pointer_
);
879 void SMRegExpMacroAssembler::WriteStackPointerToRegister(int reg
) {
880 masm_
.movePtr(backtrack_stack_pointer_
, temp0_
);
881 masm_
.subPtr(backtrackStackBase(), temp0_
);
882 masm_
.storePtr(temp0_
, register_location(reg
));
885 // When matching a regexp that is anchored at the end, this operation
886 // is used to try skipping the beginning of long strings. If the
887 // maximum length of a match is less than the length of the string, we
888 // can skip the initial len - max_len bytes.
889 void SMRegExpMacroAssembler::SetCurrentPositionFromEnd(int by
) {
890 js::jit::Label after_position
;
891 masm_
.branchPtr(Assembler::GreaterThanOrEqual
, current_position_
,
892 ImmWord(-by
* char_size()), &after_position
);
893 masm_
.movePtr(ImmWord(-by
* char_size()), current_position_
);
895 // On RegExp code entry (where this operation is used), the character before
896 // the current position is expected to be already loaded.
897 // We have advanced the position, so it's safe to read backwards.
898 LoadCurrentCharacterUnchecked(-1, 1);
899 masm_
.bind(&after_position
);
902 void SMRegExpMacroAssembler::SetRegister(int register_index
, int to
) {
903 MOZ_ASSERT(register_index
>= num_capture_registers_
);
904 masm_
.storePtr(ImmWord(to
), register_location(register_index
));
907 // Returns true if a regexp match can be restarted (aka the regexp is global).
908 // The return value is not used anywhere, but we implement it to be safe.
909 bool SMRegExpMacroAssembler::Succeed() {
910 masm_
.jump(&success_label_
);
914 // Capture registers are initialized to input[-1]
915 void SMRegExpMacroAssembler::ClearRegisters(int reg_from
, int reg_to
) {
916 MOZ_ASSERT(reg_from
<= reg_to
);
917 masm_
.loadPtr(inputStart(), temp0_
);
918 masm_
.subPtr(Imm32(char_size()), temp0_
);
919 for (int reg
= reg_from
; reg
<= reg_to
; reg
++) {
920 masm_
.storePtr(temp0_
, register_location(reg
));
924 void SMRegExpMacroAssembler::Push(Register source
) {
925 MOZ_ASSERT(source
!= backtrack_stack_pointer_
);
927 masm_
.subPtr(Imm32(sizeof(void*)), backtrack_stack_pointer_
);
928 masm_
.storePtr(source
, Address(backtrack_stack_pointer_
, 0));
931 void SMRegExpMacroAssembler::Pop(Register target
) {
932 MOZ_ASSERT(target
!= backtrack_stack_pointer_
);
934 masm_
.loadPtr(Address(backtrack_stack_pointer_
, 0), target
);
935 masm_
.addPtr(Imm32(sizeof(void*)), backtrack_stack_pointer_
);
938 void SMRegExpMacroAssembler::JumpOrBacktrack(Label
* to
) {
940 masm_
.jump(to
->inner());
946 // Generate a quick inline test for backtrack stack overflow.
947 // If the test fails, call an OOL handler to try growing the stack.
948 void SMRegExpMacroAssembler::CheckBacktrackStackLimit() {
949 js::jit::Label no_stack_overflow
;
951 Assembler::BelowOrEqual
,
952 AbsoluteAddress(isolate()->regexp_stack()->limit_address_address()),
953 backtrack_stack_pointer_
, &no_stack_overflow
);
955 masm_
.call(&stack_overflow_label_
);
957 // Exit with an exception if the call failed
958 masm_
.branchTest32(Assembler::Zero
, temp0_
, temp0_
,
959 &exit_with_exception_label_
);
961 masm_
.bind(&no_stack_overflow
);
964 // This is used to sneak an OOM through the V8 layer.
965 static Handle
<HeapObject
> DummyCode() {
966 return Handle
<HeapObject
>::fromHandleValue(JS::UndefinedHandleValue
);
969 // Finalize code. This is called last, so that we know how many
970 // registers we need.
971 Handle
<HeapObject
> SMRegExpMacroAssembler::GetCode(Handle
<String
> source
) {
972 if (!cx_
->zone()->ensureJitZoneExists(cx_
)) {
976 masm_
.bind(&entry_label_
);
981 masm_
.jump(&start_label_
);
986 stackOverflowHandler();
988 Linker
linker(masm_
);
989 JitCode
* code
= linker
.newCode(cx_
, js::jit::CodeKind::RegExp
);
994 for (LabelPatch
& lp
: labelPatches_
) {
995 Assembler::PatchDataWithValueCheck(CodeLocationLabel(code
, lp
.patchOffset_
),
996 ImmPtr(code
->raw() + lp
.labelOffset_
),
1000 CollectPerfSpewerJitCodeProfile(code
, "RegExp");
1003 js::vtune::MarkStub(code
, "RegExp");
1006 return Handle
<HeapObject
>(JS::PrivateGCThingValue(code
), isolate());
1010 * The stack will have the following structure:
1013 * - backtrack stack base
1017 * - Capture positions
1018 * - Scratch registers
1019 * --- frame alignment ---
1020 * - Saved register area
1021 * fp-> - Frame pointer
1024 void SMRegExpMacroAssembler::createStackFrame() {
1025 #ifdef JS_CODEGEN_ARM64
1026 // ARM64 communicates stack address via SP, but uses a pseudo-sp (PSP) for
1027 // addressing. The register we use for PSP may however also be used by
1028 // calling code, and it is nonvolatile, so save it. Do this as a special
1029 // case first because the generic save/restore code needs the PSP to be
1030 // initialized already.
1031 MOZ_ASSERT(js::jit::PseudoStackPointer64
.Is(masm_
.GetStackPointer64()));
1032 masm_
.Str(js::jit::PseudoStackPointer64
,
1033 vixl::MemOperand(js::jit::sp
, -16, vixl::PreIndex
));
1035 // Initialize the PSP from the SP.
1036 masm_
.initPseudoStackPtr();
1039 masm_
.Push(js::jit::FramePointer
);
1040 masm_
.moveStackPtrTo(js::jit::FramePointer
);
1042 // Push non-volatile registers which might be modified by jitcode.
1043 for (GeneralRegisterForwardIterator
iter(savedRegisters_
); iter
.more();
1048 // The pointer to InputOutputData is passed as the first argument.
1049 // On x86 we have to load it off the stack into temp0_.
1050 // On other platforms it is already in a register.
1051 #ifdef JS_CODEGEN_X86
1052 Address
ioDataAddr(js::jit::FramePointer
, 2 * sizeof(void*));
1053 masm_
.loadPtr(ioDataAddr
, temp0_
);
1055 if (js::jit::IntArgReg0
!= temp0_
) {
1056 masm_
.movePtr(js::jit::IntArgReg0
, temp0_
);
1060 // Start a new stack frame.
1061 size_t frameBytes
= sizeof(FrameData
) + num_registers_
* sizeof(void*);
1062 frameSize_
= js::jit::StackDecrementForCall(js::jit::ABIStackAlignment
,
1063 masm_
.framePushed(), frameBytes
);
1064 masm_
.reserveStack(frameSize_
);
1065 masm_
.checkStackAlignment();
1067 // Check if we have space on the stack. Use the *NoInterrupt stack limit to
1068 // avoid failing repeatedly when the regex code is called from Ion JIT code.
1069 // (See bug 1208819)
1070 js::jit::Label stack_ok
;
1071 AbsoluteAddress
limit_addr(cx_
->addressOfJitStackLimitNoInterrupt());
1072 masm_
.branchStackPtrRhs(Assembler::Below
, limit_addr
, &stack_ok
);
1074 // There is not enough space on the stack. Exit with an exception.
1075 masm_
.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Error
)), temp0_
);
1076 masm_
.jump(&exit_label_
);
1078 masm_
.bind(&stack_ok
);
1081 void SMRegExpMacroAssembler::initFrameAndRegs() {
1082 // At this point, an uninitialized stack frame has been created,
1083 // and the address of the InputOutputData is in temp0_.
1084 Register ioDataReg
= temp0_
;
1086 Register matchesReg
= temp1_
;
1087 masm_
.loadPtr(Address(ioDataReg
, offsetof(InputOutputData
, matches
)),
1090 // Initialize output registers
1091 // Use |backtrack_stack_pointer_| as an additional temp register. This is safe
1092 // because we haven't yet written any data to |backtrack_stack_pointer_|.
1093 Register extraTemp
= backtrack_stack_pointer_
;
1095 masm_
.loadPtr(Address(matchesReg
, MatchPairs::offsetOfPairs()), extraTemp
);
1096 masm_
.storePtr(extraTemp
, matches());
1097 masm_
.load32(Address(matchesReg
, MatchPairs::offsetOfPairCount()), extraTemp
);
1098 masm_
.store32(extraTemp
, numMatches());
1101 // Bounds-check numMatches.
1102 js::jit::Label enoughRegisters
;
1103 masm_
.branchPtr(Assembler::GreaterThanOrEqual
, extraTemp
,
1104 ImmWord(num_capture_registers_
/ 2), &enoughRegisters
);
1105 masm_
.assumeUnreachable("Not enough output pairs for RegExp");
1106 masm_
.bind(&enoughRegisters
);
1109 // Load input start pointer.
1110 masm_
.loadPtr(Address(ioDataReg
, offsetof(InputOutputData
, inputStart
)),
1113 // Load input end pointer
1114 masm_
.loadPtr(Address(ioDataReg
, offsetof(InputOutputData
, inputEnd
)),
1115 input_end_pointer_
);
1117 // Set up input position to be negative offset from string end.
1118 masm_
.subPtr(input_end_pointer_
, current_position_
);
1121 masm_
.storePtr(current_position_
, inputStart());
1124 Register startIndexReg
= temp1_
;
1125 masm_
.loadPtr(Address(ioDataReg
, offsetof(InputOutputData
, startIndex
)),
1127 masm_
.computeEffectiveAddress(
1128 BaseIndex(current_position_
, startIndexReg
, factor()), current_position_
);
1130 // Initialize current_character_.
1131 // Load newline if index is at start, or previous character otherwise.
1132 js::jit::Label start_regexp
;
1133 js::jit::Label load_previous_character
;
1134 masm_
.branchPtr(Assembler::NotEqual
, startIndexReg
, ImmWord(0),
1135 &load_previous_character
);
1136 masm_
.movePtr(ImmWord('\n'), current_character_
);
1137 masm_
.jump(&start_regexp
);
1139 masm_
.bind(&load_previous_character
);
1140 LoadCurrentCharacterUnchecked(-1, 1);
1141 masm_
.bind(&start_regexp
);
1143 // Initialize captured registers with inputStart - 1
1144 MOZ_ASSERT(num_capture_registers_
> 0);
1145 Register inputStartMinusOneReg
= temp0_
;
1146 masm_
.loadPtr(inputStart(), inputStartMinusOneReg
);
1147 masm_
.subPtr(Imm32(char_size()), inputStartMinusOneReg
);
1148 if (num_capture_registers_
> 8) {
1149 masm_
.movePtr(ImmWord(register_offset(0)), temp1_
);
1150 js::jit::Label init_loop
;
1151 masm_
.bind(&init_loop
);
1152 masm_
.storePtr(inputStartMinusOneReg
, BaseIndex(masm_
.getStackPointer(),
1153 temp1_
, js::jit::TimesOne
));
1154 masm_
.addPtr(ImmWord(sizeof(void*)), temp1_
);
1155 masm_
.branchPtr(Assembler::LessThanOrEqual
, temp1_
,
1156 ImmWord(register_offset(num_capture_registers_
- 1)),
1160 for (int i
= 0; i
< num_capture_registers_
; i
++) {
1161 masm_
.storePtr(inputStartMinusOneReg
, register_location(i
));
1165 // Initialize backtrack stack pointer
1166 masm_
.loadPtr(AbsoluteAddress(ExternalReference::TopOfRegexpStack(isolate())),
1167 backtrack_stack_pointer_
);
1168 masm_
.storePtr(backtrack_stack_pointer_
, backtrackStackBase());
1171 // Called when we find a match. May not be generated if we can
1172 // determine ahead of time that a regexp cannot match: for example,
1173 // when compiling /\u1e9e/ for latin-1 inputs.
1174 void SMRegExpMacroAssembler::successHandler() {
1175 if (!success_label_
.used()) {
1178 masm_
.bind(&success_label_
);
1180 // Copy captures to the MatchPairs pointed to by the InputOutputData.
1181 // Captures are stored as positions, which are negative byte offsets
1182 // from the end of the string. We must convert them to actual
1185 // Index: [ 0 ][ 1 ][ 2 ][ 3 ][ 4 ][ 5 ][END]
1186 // Pos (1-byte): [-6 ][-5 ][-4 ][-3 ][-2 ][-1 ][ 0 ] // IS = -6
1187 // Pos (2-byte): [-12][-10][-8 ][-6 ][-4 ][-2 ][ 0 ] // IS = -12
1189 // To convert a position to an index, we subtract InputStart, and
1190 // divide the result by char_size.
1191 Register matchesReg
= temp1_
;
1192 masm_
.loadPtr(matches(), matchesReg
);
1194 // Use |backtrack_stack_pointer_| as an additional temp register. This is safe
1195 // because we don't read from |backtrack_stack_pointer_| after this point.
1196 Register extraTemp
= backtrack_stack_pointer_
;
1198 Register inputStartReg
= extraTemp
;
1199 masm_
.loadPtr(inputStart(), inputStartReg
);
1201 for (int i
= 0; i
< num_capture_registers_
; i
++) {
1202 masm_
.loadPtr(register_location(i
), temp0_
);
1203 masm_
.subPtr(inputStartReg
, temp0_
);
1204 if (mode_
== UC16
) {
1205 masm_
.rshiftPtrArithmetic(Imm32(1), temp0_
);
1207 masm_
.store32(temp0_
, Address(matchesReg
, i
* sizeof(int32_t)));
1210 masm_
.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Success
)), temp0_
);
1211 // This falls through to the exit handler.
1214 void SMRegExpMacroAssembler::exitHandler() {
1215 masm_
.bind(&exit_label_
);
1217 if (temp0_
!= js::jit::ReturnReg
) {
1218 masm_
.movePtr(temp0_
, js::jit::ReturnReg
);
1221 masm_
.freeStack(frameSize_
);
1223 // Restore registers which were saved on entry
1224 for (GeneralRegisterBackwardIterator
iter(savedRegisters_
); iter
.more();
1229 masm_
.Pop(js::jit::FramePointer
);
1231 #ifdef JS_CODEGEN_ARM64
1232 // Now restore the value that was in the PSP register on entry, and return.
1234 // Obtain the correct SP from the PSP.
1235 masm_
.Mov(js::jit::sp
, js::jit::PseudoStackPointer64
);
1237 // Restore the saved value of the PSP register, this value is whatever the
1238 // caller had saved in it, not any actual SP value, and it must not be
1239 // overwritten subsequently.
1240 masm_
.Ldr(js::jit::PseudoStackPointer64
,
1241 vixl::MemOperand(js::jit::sp
, 16, vixl::PostIndex
));
1243 // Perform a plain Ret(), as abiret() will move SP <- PSP and that is wrong.
1244 masm_
.Ret(vixl::lr
);
1249 if (exit_with_exception_label_
.used()) {
1250 masm_
.bind(&exit_with_exception_label_
);
1252 // Exit with an error result to signal thrown exception
1253 masm_
.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Error
)), temp0_
);
1254 masm_
.jump(&exit_label_
);
1258 void SMRegExpMacroAssembler::backtrackHandler() {
1259 if (!backtrack_label_
.used()) {
1262 masm_
.bind(&backtrack_label_
);
1266 void SMRegExpMacroAssembler::stackOverflowHandler() {
1267 if (!stack_overflow_label_
.used()) {
1271 js::jit::AutoCreatedBy
acb(masm_
,
1272 "SMRegExpMacroAssembler::stackOverflowHandler");
1274 // Called if the backtrack-stack limit has been hit.
1275 masm_
.bind(&stack_overflow_label_
);
1278 masm_
.movePtr(ImmPtr(isolate()->regexp_stack()), temp1_
);
1280 // Save registers before calling C function
1281 LiveGeneralRegisterSet
volatileRegs(GeneralRegisterSet::Volatile());
1283 #ifdef JS_USE_LINK_REGISTER
1284 masm_
.pushReturnAddress();
1287 // Adjust for the return address on the stack.
1288 size_t frameOffset
= sizeof(void*);
1290 volatileRegs
.takeUnchecked(temp0_
);
1291 volatileRegs
.takeUnchecked(temp1_
);
1292 masm_
.PushRegsInMask(volatileRegs
);
1294 using Fn
= bool (*)(RegExpStack
* regexp_stack
);
1295 masm_
.setupUnalignedABICall(temp0_
);
1296 masm_
.passABIArg(temp1_
);
1297 masm_
.callWithABI
<Fn
, ::js::irregexp::GrowBacktrackStack
>();
1298 masm_
.storeCallBoolResult(temp0_
);
1300 masm_
.PopRegsInMask(volatileRegs
);
1302 // If GrowBacktrackStack returned false, we have failed to grow the
1303 // stack, and must exit with a stack-overflow exception. Do this in
1304 // the caller so that the stack is adjusted by our return instruction.
1305 js::jit::Label overflow_return
;
1306 masm_
.branchTest32(Assembler::Zero
, temp0_
, temp0_
, &overflow_return
);
1308 // Otherwise, store the new backtrack stack base and recompute the new
1309 // top of the stack.
1310 Address
bsbAddress(masm_
.getStackPointer(),
1311 offsetof(FrameData
, backtrackStackBase
) + frameOffset
);
1312 masm_
.subPtr(bsbAddress
, backtrack_stack_pointer_
);
1314 masm_
.loadPtr(AbsoluteAddress(ExternalReference::TopOfRegexpStack(isolate())),
1316 masm_
.storePtr(temp1_
, bsbAddress
);
1317 masm_
.addPtr(temp1_
, backtrack_stack_pointer_
);
1319 // Resume execution in calling code.
1320 masm_
.bind(&overflow_return
);
1324 // This is only used by tracing code.
1325 // The return value doesn't matter.
1326 RegExpMacroAssembler::IrregexpImplementation
1327 SMRegExpMacroAssembler::Implementation() {
1328 return kBytecodeImplementation
;
1331 // Compare two strings in `/i` mode (ignoreCase, but not unicode).
1333 uint32_t SMRegExpMacroAssembler::CaseInsensitiveCompareNonUnicode(
1334 const char16_t
* substring1
, const char16_t
* substring2
, size_t byteLength
) {
1335 js::AutoUnsafeCallWithABI unsafe
;
1337 MOZ_ASSERT(byteLength
% sizeof(char16_t
) == 0);
1338 size_t length
= byteLength
/ sizeof(char16_t
);
1340 for (size_t i
= 0; i
< length
; i
++) {
1341 char16_t c1
= substring1
[i
];
1342 char16_t c2
= substring2
[i
];
1344 #ifdef JS_HAS_INTL_API
1345 // Non-unicode regexps have weird case-folding rules.
1346 c1
= RegExpCaseFolding::Canonicalize(c1
);
1347 c2
= RegExpCaseFolding::Canonicalize(c2
);
1349 // If we aren't building with ICU, fall back to `/iu` mode. The only
1350 // differences are in corner cases.
1351 c1
= js::unicode::FoldCase(c1
);
1352 c2
= js::unicode::FoldCase(c2
);
1363 // Compare two strings in `/iu` mode (ignoreCase and unicode).
1365 uint32_t SMRegExpMacroAssembler::CaseInsensitiveCompareUnicode(
1366 const char16_t
* substring1
, const char16_t
* substring2
, size_t byteLength
) {
1367 js::AutoUnsafeCallWithABI unsafe
;
1369 MOZ_ASSERT(byteLength
% sizeof(char16_t
) == 0);
1370 size_t length
= byteLength
/ sizeof(char16_t
);
1372 for (size_t i
= 0; i
< length
; i
++) {
1373 char16_t c1
= substring1
[i
];
1374 char16_t c2
= substring2
[i
];
1376 // Unicode regexps use the common and simple case-folding
1377 // mappings of the Unicode Character Database.
1378 c1
= js::unicode::FoldCase(c1
);
1379 c2
= js::unicode::FoldCase(c2
);
1390 bool SMRegExpMacroAssembler::GrowBacktrackStack(RegExpStack
* regexp_stack
) {
1391 js::AutoUnsafeCallWithABI unsafe
;
1392 size_t size
= regexp_stack
->memory_size();
1393 return !!regexp_stack
->EnsureCapacity(size
* 2);
1396 bool SMRegExpMacroAssembler::CanReadUnaligned() const {
1397 #if defined(JS_CODEGEN_ARM)
1398 return !js::jit::ARMFlags::HasAlignmentFault();
1399 #elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
1406 } // namespace internal