Bug 1838729 - test(webgpu): accept observed intermittents in `backlog`
[gecko.git] / js / src / irregexp / RegExpNativeMacroAssembler.cpp
blob1edd0c06807e22c2511209c022b7039f10a103a0
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // Copyright 2020 the V8 project authors. All rights reserved.
8 // Use of this source code is governed by a BSD-style license that can be
9 // found in the LICENSE file.
11 #include "irregexp/imported/regexp-macro-assembler-arch.h"
12 #include "irregexp/imported/regexp-stack.h"
13 #include "irregexp/imported/special-case.h"
14 #include "jit/Linker.h"
15 #include "jit/PerfSpewer.h"
16 #include "vm/MatchPairs.h"
17 #include "vm/Realm.h"
18 #ifdef MOZ_VTUNE
19 # include "vtune/VTuneWrapper.h"
20 #endif
22 #include "jit/ABIFunctionList-inl.h"
23 #include "jit/MacroAssembler-inl.h"
25 namespace v8 {
26 namespace internal {
28 using js::MatchPairs;
29 using js::jit::AbsoluteAddress;
30 using js::jit::Address;
31 using js::jit::AllocatableGeneralRegisterSet;
32 using js::jit::Assembler;
33 using js::jit::BaseIndex;
34 using js::jit::CodeLocationLabel;
35 using js::jit::GeneralRegisterBackwardIterator;
36 using js::jit::GeneralRegisterForwardIterator;
37 using js::jit::GeneralRegisterSet;
38 using js::jit::Imm32;
39 using js::jit::ImmPtr;
40 using js::jit::ImmWord;
41 using js::jit::JitCode;
42 using js::jit::Linker;
43 using js::jit::LiveGeneralRegisterSet;
44 using js::jit::Register;
45 using js::jit::Registers;
46 using js::jit::StackMacroAssembler;
48 SMRegExpMacroAssembler::SMRegExpMacroAssembler(JSContext* cx,
49 StackMacroAssembler& masm,
50 Zone* zone, Mode mode,
51 uint32_t num_capture_registers)
52 : NativeRegExpMacroAssembler(cx->isolate.ref(), zone),
53 cx_(cx),
54 masm_(masm),
55 mode_(mode),
56 num_registers_(num_capture_registers),
57 num_capture_registers_(num_capture_registers) {
58 // Each capture has a start and an end register
59 MOZ_ASSERT(num_capture_registers_ % 2 == 0);
61 AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
63 input_end_pointer_ = regs.takeAny();
64 current_character_ = regs.takeAny();
65 current_position_ = regs.takeAny();
66 backtrack_stack_pointer_ = regs.takeAny();
67 temp0_ = regs.takeAny();
68 temp1_ = regs.takeAny();
69 if (!regs.empty()) {
70 // Not enough registers on x86.
71 temp2_ = regs.takeAny();
73 savedRegisters_ = js::jit::SavedNonVolatileRegisters(regs);
75 masm_.jump(&entry_label_); // We'll generate the entry code later
76 masm_.bind(&start_label_); // and continue from here.
79 int SMRegExpMacroAssembler::stack_limit_slack() {
80 return RegExpStack::kStackLimitSlack;
83 void SMRegExpMacroAssembler::AdvanceCurrentPosition(int by) {
84 if (by != 0) {
85 masm_.addPtr(Imm32(by * char_size()), current_position_);
89 void SMRegExpMacroAssembler::AdvanceRegister(int reg, int by) {
90 MOZ_ASSERT(reg >= 0 && reg < num_registers_);
91 if (by != 0) {
92 masm_.addPtr(Imm32(by), register_location(reg));
96 void SMRegExpMacroAssembler::Backtrack() {
97 #ifdef DEBUG
98 js::jit::Label bailOut;
99 // Check for simulating interrupt
100 masm_.branch32(Assembler::NotEqual,
101 AbsoluteAddress(&cx_->isolate->shouldSimulateInterrupt_),
102 Imm32(0), &bailOut);
103 #endif
104 // Check for an interrupt. We have to restart from the beginning if we
105 // are interrupted, so we only check for urgent interrupts.
106 js::jit::Label noInterrupt;
107 masm_.branchTest32(
108 Assembler::Zero, AbsoluteAddress(cx_->addressOfInterruptBits()),
109 Imm32(uint32_t(js::InterruptReason::CallbackUrgent)), &noInterrupt);
110 #ifdef DEBUG
111 // bailing out if we have simulating interrupt flag set
112 masm_.bind(&bailOut);
113 #endif
114 masm_.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Error)), temp0_);
115 masm_.jump(&exit_label_);
116 masm_.bind(&noInterrupt);
118 // Pop code location from backtrack stack and jump to location.
119 Pop(temp0_);
120 masm_.jump(temp0_);
123 void SMRegExpMacroAssembler::Bind(Label* label) {
124 masm_.bind(label->inner());
125 if (label->patchOffset_.bound()) {
126 AddLabelPatch(label->patchOffset_, label->pos());
130 // Check if current_position + cp_offset is the input start
131 void SMRegExpMacroAssembler::CheckAtStartImpl(int cp_offset, Label* on_cond,
132 Assembler::Condition cond) {
133 Address addr(current_position_, cp_offset * char_size());
134 masm_.computeEffectiveAddress(addr, temp0_);
136 masm_.branchPtr(cond, inputStart(), temp0_, LabelOrBacktrack(on_cond));
139 void SMRegExpMacroAssembler::CheckAtStart(int cp_offset, Label* on_at_start) {
140 CheckAtStartImpl(cp_offset, on_at_start, Assembler::Equal);
143 void SMRegExpMacroAssembler::CheckNotAtStart(int cp_offset,
144 Label* on_not_at_start) {
145 CheckAtStartImpl(cp_offset, on_not_at_start, Assembler::NotEqual);
148 void SMRegExpMacroAssembler::CheckCharacterImpl(Imm32 c, Label* on_cond,
149 Assembler::Condition cond) {
150 masm_.branch32(cond, current_character_, c, LabelOrBacktrack(on_cond));
153 void SMRegExpMacroAssembler::CheckCharacter(uint32_t c, Label* on_equal) {
154 CheckCharacterImpl(Imm32(c), on_equal, Assembler::Equal);
157 void SMRegExpMacroAssembler::CheckNotCharacter(uint32_t c,
158 Label* on_not_equal) {
159 CheckCharacterImpl(Imm32(c), on_not_equal, Assembler::NotEqual);
162 void SMRegExpMacroAssembler::CheckCharacterGT(base::uc16 limit,
163 Label* on_greater) {
164 CheckCharacterImpl(Imm32(limit), on_greater, Assembler::GreaterThan);
167 void SMRegExpMacroAssembler::CheckCharacterLT(base::uc16 limit,
168 Label* on_less) {
169 CheckCharacterImpl(Imm32(limit), on_less, Assembler::LessThan);
172 // Bitwise-and the current character with mask and then check for a
173 // match with c.
174 void SMRegExpMacroAssembler::CheckCharacterAfterAndImpl(uint32_t c,
175 uint32_t mask,
176 Label* on_cond,
177 bool is_not) {
178 if (c == 0) {
179 Assembler::Condition cond = is_not ? Assembler::NonZero : Assembler::Zero;
180 masm_.branchTest32(cond, current_character_, Imm32(mask),
181 LabelOrBacktrack(on_cond));
182 } else {
183 Assembler::Condition cond = is_not ? Assembler::NotEqual : Assembler::Equal;
184 masm_.move32(Imm32(mask), temp0_);
185 masm_.and32(current_character_, temp0_);
186 masm_.branch32(cond, temp0_, Imm32(c), LabelOrBacktrack(on_cond));
190 void SMRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c, uint32_t mask,
191 Label* on_equal) {
192 CheckCharacterAfterAndImpl(c, mask, on_equal, /*is_not =*/false);
195 void SMRegExpMacroAssembler::CheckNotCharacterAfterAnd(uint32_t c,
196 uint32_t mask,
197 Label* on_not_equal) {
198 CheckCharacterAfterAndImpl(c, mask, on_not_equal, /*is_not =*/true);
201 // Subtract minus from the current character, then bitwise-and the
202 // result with mask, then check for a match with c.
203 void SMRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(
204 base::uc16 c, base::uc16 minus, base::uc16 mask, Label* on_not_equal) {
205 masm_.computeEffectiveAddress(Address(current_character_, -minus), temp0_);
206 if (c == 0) {
207 masm_.branchTest32(Assembler::NonZero, temp0_, Imm32(mask),
208 LabelOrBacktrack(on_not_equal));
209 } else {
210 masm_.and32(Imm32(mask), temp0_);
211 masm_.branch32(Assembler::NotEqual, temp0_, Imm32(c),
212 LabelOrBacktrack(on_not_equal));
216 // If the current position matches the position stored on top of the backtrack
217 // stack, pops the backtrack stack and branches to the given label.
218 void SMRegExpMacroAssembler::CheckGreedyLoop(Label* on_equal) {
219 js::jit::Label fallthrough;
220 masm_.branchPtr(Assembler::NotEqual, Address(backtrack_stack_pointer_, 0),
221 current_position_, &fallthrough);
222 masm_.addPtr(Imm32(sizeof(void*)), backtrack_stack_pointer_); // Pop.
223 JumpOrBacktrack(on_equal);
224 masm_.bind(&fallthrough);
227 void SMRegExpMacroAssembler::CheckCharacterInRangeImpl(
228 base::uc16 from, base::uc16 to, Label* on_cond, Assembler::Condition cond) {
229 // x is in [from,to] if unsigned(x - from) <= to - from
230 masm_.computeEffectiveAddress(Address(current_character_, -from), temp0_);
231 masm_.branch32(cond, temp0_, Imm32(to - from), LabelOrBacktrack(on_cond));
234 void SMRegExpMacroAssembler::CheckCharacterInRange(base::uc16 from,
235 base::uc16 to,
236 Label* on_in_range) {
237 CheckCharacterInRangeImpl(from, to, on_in_range, Assembler::BelowOrEqual);
240 void SMRegExpMacroAssembler::CheckCharacterNotInRange(base::uc16 from,
241 base::uc16 to,
242 Label* on_not_in_range) {
243 CheckCharacterInRangeImpl(from, to, on_not_in_range, Assembler::Above);
246 /* static */
247 bool SMRegExpMacroAssembler::IsCharacterInRangeArray(uint32_t c,
248 ByteArrayData* ranges) {
249 js::AutoUnsafeCallWithABI unsafe;
250 MOZ_ASSERT(ranges->length() % sizeof(uint16_t) == 0);
251 uint32_t length = ranges->length() / sizeof(uint16_t);
252 MOZ_ASSERT(length > 0);
254 // Fast paths.
255 if (c < ranges->getTyped<uint16_t>(0)) {
256 // |c| is lower than the start of the first range.
257 // It is not in the range array.
258 return false;
260 if (c >= ranges->getTyped<uint16_t>(length - 1)) {
261 // |c| is higher than the last entry. If the table contains an odd
262 // number of entries, the last range is open-ended, so |c| is in
263 // the range array iff |length| is odd.
264 return (length % 2) != 0;
267 // |ranges| is stored as an interval list: an ordered list of
268 // starting points, where every even index marks the beginning of a
269 // range of characters that are included, and every odd index marks
270 // the beginning of a range of characters that are excluded. For
271 // example, the set [1,2,3,7,8,9] would be represented as the
272 // range array [1,4,7,10]. If |ranges| has an odd number of entries,
273 // the last included range is open-ended (so the set containing
274 // every character would be represented as [0]).
276 // Because of the symmetry between included and excluded ranges, we
277 // can do a binary search for the index in |ranges| with the value
278 // closest to but not exceeding |c|. If that index is even, |c| is
279 // in an included range. If that index is odd, |c| is in an excluded
280 // range.
281 uint32_t lower = 0;
282 uint32_t upper = length;
283 uint32_t mid = 0;
284 do {
285 mid = lower + (upper - lower) / 2;
286 const base::uc16 elem = ranges->getTyped<uint16_t>(mid);
287 if (c < elem) {
288 upper = mid;
289 } else if (c > elem) {
290 lower = mid + 1;
291 } else {
292 break;
294 } while (lower < upper);
295 uint32_t rangeIndex = c < ranges->getTyped<uint16_t>(mid) ? mid - 1 : mid;
297 // Included ranges start at even indices and end at odd indices.
298 return rangeIndex % 2 == 0;
301 void SMRegExpMacroAssembler::CallIsCharacterInRangeArray(
302 const ZoneList<CharacterRange>* ranges) {
303 Handle<ByteArray> rangeArray = GetOrAddRangeArray(ranges);
304 masm_.movePtr(ImmPtr(rangeArray->inner()), temp0_);
306 // Save volatile regs. Temp regs don't need to be saved.
307 LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile());
308 volatileRegs.takeUnchecked(temp0_);
309 volatileRegs.takeUnchecked(temp1_);
310 if (temp2_ != js::jit::InvalidReg) {
311 volatileRegs.takeUnchecked(temp2_);
313 masm_.PushRegsInMask(volatileRegs);
315 using Fn = bool (*)(uint32_t, ByteArrayData*);
316 masm_.setupUnalignedABICall(temp1_);
317 masm_.passABIArg(current_character_);
318 masm_.passABIArg(temp0_);
320 masm_.callWithABI<Fn, ::js::irregexp::IsCharacterInRangeArray>();
321 masm_.storeCallBoolResult(temp1_);
322 masm_.PopRegsInMask(volatileRegs);
324 // GetOrAddRangeArray caches previously seen range arrays to reduce
325 // memory usage, so this may not be the first time we've seen this
326 // range array. We only need to transfer ownership from the
327 // HandleScope to the |tables_| vector once.
328 PseudoHandle<ByteArrayData> rawRangeArray =
329 rangeArray->maybeTakeOwnership(isolate());
330 if (rawRangeArray) {
331 AddTable(std::move(rawRangeArray));
335 bool SMRegExpMacroAssembler::CheckCharacterInRangeArray(
336 const ZoneList<CharacterRange>* ranges, Label* on_in_range) {
337 CallIsCharacterInRangeArray(ranges);
338 masm_.branchTest32(Assembler::NonZero, temp1_, temp1_,
339 LabelOrBacktrack(on_in_range));
340 return true;
343 bool SMRegExpMacroAssembler::CheckCharacterNotInRangeArray(
344 const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) {
345 CallIsCharacterInRangeArray(ranges);
346 masm_.branchTest32(Assembler::Zero, temp1_, temp1_,
347 LabelOrBacktrack(on_not_in_range));
348 return true;
351 void SMRegExpMacroAssembler::CheckBitInTable(Handle<ByteArray> table,
352 Label* on_bit_set) {
353 // Claim ownership of the ByteArray from the current HandleScope.
354 // ByteArrays are allocated on the C++ heap and are (eventually)
355 // owned by the RegExpShared.
356 PseudoHandle<ByteArrayData> rawTable = table->takeOwnership(isolate());
358 masm_.movePtr(ImmPtr(rawTable->data()), temp0_);
360 masm_.move32(Imm32(kTableMask), temp1_);
361 masm_.and32(current_character_, temp1_);
363 masm_.load8ZeroExtend(BaseIndex(temp0_, temp1_, js::jit::TimesOne), temp0_);
364 masm_.branchTest32(Assembler::NonZero, temp0_, temp0_,
365 LabelOrBacktrack(on_bit_set));
367 // Transfer ownership of |rawTable| to the |tables_| vector.
368 AddTable(std::move(rawTable));
371 void SMRegExpMacroAssembler::CheckNotBackReferenceImpl(int start_reg,
372 bool read_backward,
373 bool unicode,
374 Label* on_no_match,
375 bool ignore_case) {
376 js::jit::Label fallthrough;
378 // Captures are stored as a sequential pair of registers.
379 // Find the length of the back-referenced capture and load the
380 // capture's start index into current_character_.
381 masm_.loadPtr(register_location(start_reg), // index of start
382 current_character_);
383 masm_.loadPtr(register_location(start_reg + 1), temp0_); // index of end
384 masm_.subPtr(current_character_, temp0_); // length of capture
386 // Capture registers are either both set or both cleared.
387 // If the capture length is zero, then the capture is either empty or cleared.
388 // Fall through in both cases.
389 masm_.branchPtr(Assembler::Equal, temp0_, ImmWord(0), &fallthrough);
391 // Check that there are sufficient characters left in the input.
392 if (read_backward) {
393 // If start + len > current, there isn't enough room for a
394 // lookbehind backreference.
395 masm_.loadPtr(inputStart(), temp1_);
396 masm_.addPtr(temp0_, temp1_);
397 masm_.branchPtr(Assembler::GreaterThan, temp1_, current_position_,
398 LabelOrBacktrack(on_no_match));
399 } else {
400 // current_position_ is the negative offset from the end.
401 // If current + len > 0, there isn't enough room for a backreference.
402 masm_.movePtr(current_position_, temp1_);
403 masm_.addPtr(temp0_, temp1_);
404 masm_.branchPtr(Assembler::GreaterThan, temp1_, ImmWord(0),
405 LabelOrBacktrack(on_no_match));
408 if (mode_ == UC16 && ignore_case) {
409 // We call a helper function for case-insensitive non-latin1 strings.
411 // Save volatile regs. temp1_, temp2_, and current_character_
412 // don't need to be saved. current_position_ needs to be saved
413 // even if it's non-volatile, because we modify it to use as an argument.
414 LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile());
415 volatileRegs.addUnchecked(current_position_);
416 volatileRegs.takeUnchecked(temp1_);
417 if (temp2_ != js::jit::InvalidReg) {
418 volatileRegs.takeUnchecked(temp2_);
420 volatileRegs.takeUnchecked(current_character_);
421 masm_.PushRegsInMask(volatileRegs);
423 // Parameters are
424 // Address captured - Address of captured substring's start.
425 // Address current - Address of current character position.
426 // size_t byte_length - length of capture (in bytes)
428 // Compute |captured|
429 masm_.addPtr(input_end_pointer_, current_character_);
431 // Compute |current|
432 masm_.addPtr(input_end_pointer_, current_position_);
433 if (read_backward) {
434 // Offset by length when matching backwards.
435 masm_.subPtr(temp0_, current_position_);
438 using Fn = uint32_t (*)(const char16_t*, const char16_t*, size_t);
439 masm_.setupUnalignedABICall(temp1_);
440 masm_.passABIArg(current_character_);
441 masm_.passABIArg(current_position_);
442 masm_.passABIArg(temp0_);
444 if (unicode) {
445 masm_.callWithABI<Fn, ::js::irregexp::CaseInsensitiveCompareUnicode>();
446 } else {
447 masm_.callWithABI<Fn, ::js::irregexp::CaseInsensitiveCompareNonUnicode>();
449 masm_.storeCallInt32Result(temp1_);
450 masm_.PopRegsInMask(volatileRegs);
451 masm_.branchTest32(Assembler::Zero, temp1_, temp1_,
452 LabelOrBacktrack(on_no_match));
454 // On success, advance position by length of capture
455 if (read_backward) {
456 masm_.subPtr(temp0_, current_position_);
457 } else {
458 masm_.addPtr(temp0_, current_position_);
461 masm_.bind(&fallthrough);
462 return;
465 // We will be modifying current_position_. Save it in case the match fails.
466 masm_.push(current_position_);
468 // Compute start of capture string
469 masm_.addPtr(input_end_pointer_, current_character_);
471 // Compute start of match string
472 masm_.addPtr(input_end_pointer_, current_position_);
473 if (read_backward) {
474 // Offset by length when matching backwards.
475 masm_.subPtr(temp0_, current_position_);
478 // Compute end of match string
479 masm_.addPtr(current_position_, temp0_);
481 Register nextCaptureChar = temp1_;
482 Register nextMatchChar = temp2_;
484 if (temp2_ == js::jit::InvalidReg) {
485 masm_.push(backtrack_stack_pointer_);
486 nextMatchChar = backtrack_stack_pointer_;
489 js::jit::Label success;
490 js::jit::Label fail;
491 js::jit::Label loop;
492 masm_.bind(&loop);
494 // Load next character from each string.
495 if (mode_ == LATIN1) {
496 masm_.load8ZeroExtend(Address(current_character_, 0), nextCaptureChar);
497 masm_.load8ZeroExtend(Address(current_position_, 0), nextMatchChar);
498 } else {
499 masm_.load16ZeroExtend(Address(current_character_, 0), nextCaptureChar);
500 masm_.load16ZeroExtend(Address(current_position_, 0), nextMatchChar);
503 if (ignore_case) {
504 MOZ_ASSERT(mode_ == LATIN1);
505 // Try exact match.
506 js::jit::Label loop_increment;
507 masm_.branch32(Assembler::Equal, nextCaptureChar, nextMatchChar,
508 &loop_increment);
510 // Mismatch. Try case-insensitive match.
511 // Force the capture character to lower case (by setting bit 0x20)
512 // then check to see if it is a letter.
513 js::jit::Label convert_match;
514 masm_.or32(Imm32(0x20), nextCaptureChar);
516 // Check if it is in [a,z].
517 masm_.computeEffectiveAddress(Address(nextCaptureChar, -'a'),
518 nextMatchChar);
519 masm_.branch32(Assembler::BelowOrEqual, nextMatchChar, Imm32('z' - 'a'),
520 &convert_match);
521 // Check for values in range [224,254].
522 // Exclude 247 (U+00F7 DIVISION SIGN).
523 masm_.sub32(Imm32(224 - 'a'), nextMatchChar);
524 masm_.branch32(Assembler::Above, nextMatchChar, Imm32(254 - 224), &fail);
525 masm_.branch32(Assembler::Equal, nextMatchChar, Imm32(247 - 224), &fail);
527 // Capture character is lower case. Convert match character
528 // to lower case and compare.
529 masm_.bind(&convert_match);
530 masm_.load8ZeroExtend(Address(current_position_, 0), nextMatchChar);
531 masm_.or32(Imm32(0x20), nextMatchChar);
532 masm_.branch32(Assembler::NotEqual, nextCaptureChar, nextMatchChar, &fail);
534 masm_.bind(&loop_increment);
535 } else {
536 // Fail if characters do not match.
537 masm_.branch32(Assembler::NotEqual, nextCaptureChar, nextMatchChar, &fail);
540 // Increment pointers into match and capture strings.
541 masm_.addPtr(Imm32(char_size()), current_character_);
542 masm_.addPtr(Imm32(char_size()), current_position_);
544 // Loop if we have not reached the end of the match string.
545 masm_.branchPtr(Assembler::Below, current_position_, temp0_, &loop);
546 masm_.jump(&success);
548 // If we fail, restore current_position_ and branch.
549 masm_.bind(&fail);
550 if (temp2_ == js::jit::InvalidReg) {
551 // Restore backtrack_stack_pointer_ when it was used as a temp register.
552 masm_.pop(backtrack_stack_pointer_);
554 masm_.pop(current_position_);
555 JumpOrBacktrack(on_no_match);
557 masm_.bind(&success);
559 if (temp2_ == js::jit::InvalidReg) {
560 // Restore backtrack_stack_pointer_ when it was used as a temp register.
561 masm_.pop(backtrack_stack_pointer_);
563 // Drop saved value of current_position_
564 masm_.addToStackPtr(Imm32(sizeof(uintptr_t)));
566 // current_position_ is a pointer. Convert it back to an offset.
567 masm_.subPtr(input_end_pointer_, current_position_);
568 if (read_backward) {
569 // Subtract match length if we matched backward
570 masm_.addPtr(register_location(start_reg), current_position_);
571 masm_.subPtr(register_location(start_reg + 1), current_position_);
574 masm_.bind(&fallthrough);
577 // Branch if a back-reference does not match a previous capture.
578 void SMRegExpMacroAssembler::CheckNotBackReference(int start_reg,
579 bool read_backward,
580 Label* on_no_match) {
581 CheckNotBackReferenceImpl(start_reg, read_backward, /*unicode = */ false,
582 on_no_match, /*ignore_case = */ false);
585 void SMRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
586 int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
587 CheckNotBackReferenceImpl(start_reg, read_backward, unicode, on_no_match,
588 /*ignore_case = */ true);
591 // Checks whether the given offset from the current position is
592 // inside the input string.
593 void SMRegExpMacroAssembler::CheckPosition(int cp_offset,
594 Label* on_outside_input) {
595 // Note: current_position_ is a (negative) byte offset relative to
596 // the end of the input string.
597 if (cp_offset >= 0) {
598 // end + current + offset >= end
599 // <=> current + offset >= 0
600 // <=> current >= -offset
601 masm_.branchPtr(Assembler::GreaterThanOrEqual, current_position_,
602 ImmWord(-cp_offset * char_size()),
603 LabelOrBacktrack(on_outside_input));
604 } else {
605 // Compute offset position
606 masm_.computeEffectiveAddress(
607 Address(current_position_, cp_offset * char_size()), temp0_);
609 // Compare to start of input.
610 masm_.branchPtr(Assembler::GreaterThan, inputStart(), temp0_,
611 LabelOrBacktrack(on_outside_input));
615 // This function attempts to generate special case code for character classes.
616 // Returns true if a special case is generated.
617 // Otherwise returns false and generates no code.
618 bool SMRegExpMacroAssembler::CheckSpecialCharacterClass(
619 StandardCharacterSet type, Label* on_no_match) {
620 js::jit::Label* no_match = LabelOrBacktrack(on_no_match);
622 // Note: throughout this function, range checks (c in [min, max])
623 // are implemented by an unsigned (c - min) <= (max - min) check.
624 switch (type) {
625 case StandardCharacterSet::kWhitespace: {
626 // Match space-characters
627 if (mode_ != LATIN1) {
628 return false;
630 js::jit::Label success;
631 // One byte space characters are ' ', '\t'..'\r', and '\u00a0' (NBSP).
633 // Check ' '
634 masm_.branch32(Assembler::Equal, current_character_, Imm32(' '),
635 &success);
637 // Check '\t'..'\r'
638 masm_.computeEffectiveAddress(Address(current_character_, -'\t'), temp0_);
639 masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32('\r' - '\t'),
640 &success);
642 // Check \u00a0.
643 masm_.branch32(Assembler::NotEqual, temp0_, Imm32(0x00a0 - '\t'),
644 no_match);
646 masm_.bind(&success);
647 return true;
649 case StandardCharacterSet::kNotWhitespace:
650 // The emitted code for generic character classes is good enough.
651 return false;
652 case StandardCharacterSet::kDigit:
653 // Match latin1 digits ('0'-'9')
654 masm_.computeEffectiveAddress(Address(current_character_, -'0'), temp0_);
655 masm_.branch32(Assembler::Above, temp0_, Imm32('9' - '0'), no_match);
656 return true;
657 case StandardCharacterSet::kNotDigit:
658 // Match anything except latin1 digits ('0'-'9')
659 masm_.computeEffectiveAddress(Address(current_character_, -'0'), temp0_);
660 masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32('9' - '0'),
661 no_match);
662 return true;
663 case StandardCharacterSet::kNotLineTerminator:
664 // Match non-newlines. This excludes '\n' (0x0a), '\r' (0x0d),
665 // U+2028 LINE SEPARATOR, and U+2029 PARAGRAPH SEPARATOR.
666 // See https://tc39.es/ecma262/#prod-LineTerminator
668 // To test for 0x0a and 0x0d efficiently, we XOR the input with 1.
669 // This converts 0x0a to 0x0b, and 0x0d to 0x0c, allowing us to
670 // test for the contiguous range 0x0b..0x0c.
671 masm_.move32(current_character_, temp0_);
672 masm_.xor32(Imm32(0x01), temp0_);
673 masm_.sub32(Imm32(0x0b), temp0_);
674 masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x0c - 0x0b),
675 no_match);
677 if (mode_ == UC16) {
678 // Compare original value to 0x2028 and 0x2029, using the already
679 // computed (current_char ^ 0x01 - 0x0b). I.e., check for
680 // 0x201d (0x2028 - 0x0b) or 0x201e.
681 masm_.sub32(Imm32(0x2028 - 0x0b), temp0_);
682 masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x2029 - 0x2028),
683 no_match);
685 return true;
686 case StandardCharacterSet::kWord:
687 // \w matches the set of 63 characters defined in Runtime Semantics:
688 // WordCharacters. We use a static lookup table, which is defined in
689 // regexp-macro-assembler.cc.
690 // Note: if both Unicode and IgnoreCase are true, \w matches a
691 // larger set of characters. That case is handled elsewhere.
692 if (mode_ != LATIN1) {
693 masm_.branch32(Assembler::Above, current_character_, Imm32('z'),
694 no_match);
696 static_assert(arraysize(word_character_map) > unibrow::Latin1::kMaxChar);
697 masm_.movePtr(ImmPtr(word_character_map), temp0_);
698 masm_.load8ZeroExtend(
699 BaseIndex(temp0_, current_character_, js::jit::TimesOne), temp0_);
700 masm_.branchTest32(Assembler::Zero, temp0_, temp0_, no_match);
701 return true;
702 case StandardCharacterSet::kNotWord: {
703 // See 'w' above.
704 js::jit::Label done;
705 if (mode_ != LATIN1) {
706 masm_.branch32(Assembler::Above, current_character_, Imm32('z'), &done);
708 static_assert(arraysize(word_character_map) > unibrow::Latin1::kMaxChar);
709 masm_.movePtr(ImmPtr(word_character_map), temp0_);
710 masm_.load8ZeroExtend(
711 BaseIndex(temp0_, current_character_, js::jit::TimesOne), temp0_);
712 masm_.branchTest32(Assembler::NonZero, temp0_, temp0_, no_match);
713 if (mode_ != LATIN1) {
714 masm_.bind(&done);
716 return true;
718 ////////////////////////////////////////////////////////////////////////
719 // Non-standard classes (with no syntactic shorthand) used internally //
720 ////////////////////////////////////////////////////////////////////////
721 case StandardCharacterSet::kEverything:
722 // Match any character
723 return true;
724 case StandardCharacterSet::kLineTerminator:
725 // Match newlines. The opposite of '.'. See '.' above.
726 masm_.move32(current_character_, temp0_);
727 masm_.xor32(Imm32(0x01), temp0_);
728 masm_.sub32(Imm32(0x0b), temp0_);
729 if (mode_ == LATIN1) {
730 masm_.branch32(Assembler::Above, temp0_, Imm32(0x0c - 0x0b), no_match);
731 } else {
732 MOZ_ASSERT(mode_ == UC16);
733 js::jit::Label done;
734 masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x0c - 0x0b),
735 &done);
737 // Compare original value to 0x2028 and 0x2029, using the already
738 // computed (current_char ^ 0x01 - 0x0b). I.e., check for
739 // 0x201d (0x2028 - 0x0b) or 0x201e.
740 masm_.sub32(Imm32(0x2028 - 0x0b), temp0_);
741 masm_.branch32(Assembler::Above, temp0_, Imm32(0x2029 - 0x2028),
742 no_match);
743 masm_.bind(&done);
745 return true;
747 return false;
750 void SMRegExpMacroAssembler::Fail() {
751 masm_.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Success_NotFound)),
752 temp0_);
753 masm_.jump(&exit_label_);
756 void SMRegExpMacroAssembler::GoTo(Label* to) {
757 masm_.jump(LabelOrBacktrack(to));
760 void SMRegExpMacroAssembler::IfRegisterGE(int reg, int comparand,
761 Label* if_ge) {
762 masm_.branchPtr(Assembler::GreaterThanOrEqual, register_location(reg),
763 ImmWord(comparand), LabelOrBacktrack(if_ge));
766 void SMRegExpMacroAssembler::IfRegisterLT(int reg, int comparand,
767 Label* if_lt) {
768 masm_.branchPtr(Assembler::LessThan, register_location(reg),
769 ImmWord(comparand), LabelOrBacktrack(if_lt));
772 void SMRegExpMacroAssembler::IfRegisterEqPos(int reg, Label* if_eq) {
773 masm_.branchPtr(Assembler::Equal, register_location(reg), current_position_,
774 LabelOrBacktrack(if_eq));
777 // This is a word-for-word identical copy of the V8 code, which is
778 // duplicated in at least nine different places in V8 (one per
779 // supported architecture) with no differences outside of comments and
780 // formatting. It should be hoisted into the superclass. Once that is
781 // done upstream, this version can be deleted.
782 void SMRegExpMacroAssembler::LoadCurrentCharacterImpl(int cp_offset,
783 Label* on_end_of_input,
784 bool check_bounds,
785 int characters,
786 int eats_at_least) {
787 // It's possible to preload a small number of characters when each success
788 // path requires a large number of characters, but not the reverse.
789 MOZ_ASSERT(eats_at_least >= characters);
790 MOZ_ASSERT(cp_offset < (1 << 30)); // Be sane! (And ensure negation works)
792 if (check_bounds) {
793 if (cp_offset >= 0) {
794 CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input);
795 } else {
796 CheckPosition(cp_offset, on_end_of_input);
799 LoadCurrentCharacterUnchecked(cp_offset, characters);
802 // Load the character (or characters) at the specified offset from the
803 // current position. Zero-extend to 32 bits.
804 void SMRegExpMacroAssembler::LoadCurrentCharacterUnchecked(int cp_offset,
805 int characters) {
806 BaseIndex address(input_end_pointer_, current_position_, js::jit::TimesOne,
807 cp_offset * char_size());
808 if (mode_ == LATIN1) {
809 if (characters == 4) {
810 masm_.load32(address, current_character_);
811 } else if (characters == 2) {
812 masm_.load16ZeroExtend(address, current_character_);
813 } else {
814 MOZ_ASSERT(characters == 1);
815 masm_.load8ZeroExtend(address, current_character_);
817 } else {
818 MOZ_ASSERT(mode_ == UC16);
819 if (characters == 2) {
820 masm_.load32(address, current_character_);
821 } else {
822 MOZ_ASSERT(characters == 1);
823 masm_.load16ZeroExtend(address, current_character_);
828 void SMRegExpMacroAssembler::PopCurrentPosition() { Pop(current_position_); }
830 void SMRegExpMacroAssembler::PopRegister(int register_index) {
831 Pop(temp0_);
832 masm_.storePtr(temp0_, register_location(register_index));
835 void SMRegExpMacroAssembler::PushBacktrack(Label* label) {
836 MOZ_ASSERT(!label->is_bound());
837 MOZ_ASSERT(!label->patchOffset_.bound());
838 label->patchOffset_ = masm_.movWithPatch(ImmPtr(nullptr), temp0_);
839 MOZ_ASSERT(label->patchOffset_.bound());
841 Push(temp0_);
843 CheckBacktrackStackLimit();
846 void SMRegExpMacroAssembler::PushCurrentPosition() { Push(current_position_); }
848 void SMRegExpMacroAssembler::PushRegister(int register_index,
849 StackCheckFlag check_stack_limit) {
850 masm_.loadPtr(register_location(register_index), temp0_);
851 Push(temp0_);
852 if (check_stack_limit) {
853 CheckBacktrackStackLimit();
857 void SMRegExpMacroAssembler::ReadCurrentPositionFromRegister(int reg) {
858 masm_.loadPtr(register_location(reg), current_position_);
861 void SMRegExpMacroAssembler::WriteCurrentPositionToRegister(int reg,
862 int cp_offset) {
863 if (cp_offset == 0) {
864 masm_.storePtr(current_position_, register_location(reg));
865 } else {
866 Address addr(current_position_, cp_offset * char_size());
867 masm_.computeEffectiveAddress(addr, temp0_);
868 masm_.storePtr(temp0_, register_location(reg));
872 // Note: The backtrack stack pointer is stored in a register as an
873 // offset from the stack top, not as a bare pointer, so that it is not
874 // corrupted if the backtrack stack grows (and therefore moves).
875 void SMRegExpMacroAssembler::ReadStackPointerFromRegister(int reg) {
876 masm_.loadPtr(register_location(reg), backtrack_stack_pointer_);
877 masm_.addPtr(backtrackStackBase(), backtrack_stack_pointer_);
879 void SMRegExpMacroAssembler::WriteStackPointerToRegister(int reg) {
880 masm_.movePtr(backtrack_stack_pointer_, temp0_);
881 masm_.subPtr(backtrackStackBase(), temp0_);
882 masm_.storePtr(temp0_, register_location(reg));
885 // When matching a regexp that is anchored at the end, this operation
886 // is used to try skipping the beginning of long strings. If the
887 // maximum length of a match is less than the length of the string, we
888 // can skip the initial len - max_len bytes.
889 void SMRegExpMacroAssembler::SetCurrentPositionFromEnd(int by) {
890 js::jit::Label after_position;
891 masm_.branchPtr(Assembler::GreaterThanOrEqual, current_position_,
892 ImmWord(-by * char_size()), &after_position);
893 masm_.movePtr(ImmWord(-by * char_size()), current_position_);
895 // On RegExp code entry (where this operation is used), the character before
896 // the current position is expected to be already loaded.
897 // We have advanced the position, so it's safe to read backwards.
898 LoadCurrentCharacterUnchecked(-1, 1);
899 masm_.bind(&after_position);
902 void SMRegExpMacroAssembler::SetRegister(int register_index, int to) {
903 MOZ_ASSERT(register_index >= num_capture_registers_);
904 masm_.storePtr(ImmWord(to), register_location(register_index));
907 // Returns true if a regexp match can be restarted (aka the regexp is global).
908 // The return value is not used anywhere, but we implement it to be safe.
909 bool SMRegExpMacroAssembler::Succeed() {
910 masm_.jump(&success_label_);
911 return global();
914 // Capture registers are initialized to input[-1]
915 void SMRegExpMacroAssembler::ClearRegisters(int reg_from, int reg_to) {
916 MOZ_ASSERT(reg_from <= reg_to);
917 masm_.loadPtr(inputStart(), temp0_);
918 masm_.subPtr(Imm32(char_size()), temp0_);
919 for (int reg = reg_from; reg <= reg_to; reg++) {
920 masm_.storePtr(temp0_, register_location(reg));
924 void SMRegExpMacroAssembler::Push(Register source) {
925 MOZ_ASSERT(source != backtrack_stack_pointer_);
927 masm_.subPtr(Imm32(sizeof(void*)), backtrack_stack_pointer_);
928 masm_.storePtr(source, Address(backtrack_stack_pointer_, 0));
931 void SMRegExpMacroAssembler::Pop(Register target) {
932 MOZ_ASSERT(target != backtrack_stack_pointer_);
934 masm_.loadPtr(Address(backtrack_stack_pointer_, 0), target);
935 masm_.addPtr(Imm32(sizeof(void*)), backtrack_stack_pointer_);
938 void SMRegExpMacroAssembler::JumpOrBacktrack(Label* to) {
939 if (to) {
940 masm_.jump(to->inner());
941 } else {
942 Backtrack();
946 // Generate a quick inline test for backtrack stack overflow.
947 // If the test fails, call an OOL handler to try growing the stack.
948 void SMRegExpMacroAssembler::CheckBacktrackStackLimit() {
949 js::jit::Label no_stack_overflow;
950 masm_.branchPtr(
951 Assembler::BelowOrEqual,
952 AbsoluteAddress(isolate()->regexp_stack()->limit_address_address()),
953 backtrack_stack_pointer_, &no_stack_overflow);
955 masm_.call(&stack_overflow_label_);
957 // Exit with an exception if the call failed
958 masm_.branchTest32(Assembler::Zero, temp0_, temp0_,
959 &exit_with_exception_label_);
961 masm_.bind(&no_stack_overflow);
964 // This is used to sneak an OOM through the V8 layer.
965 static Handle<HeapObject> DummyCode() {
966 return Handle<HeapObject>::fromHandleValue(JS::UndefinedHandleValue);
969 // Finalize code. This is called last, so that we know how many
970 // registers we need.
971 Handle<HeapObject> SMRegExpMacroAssembler::GetCode(Handle<String> source) {
972 if (!cx_->zone()->ensureJitZoneExists(cx_)) {
973 return DummyCode();
976 masm_.bind(&entry_label_);
978 createStackFrame();
979 initFrameAndRegs();
981 masm_.jump(&start_label_);
983 successHandler();
984 exitHandler();
985 backtrackHandler();
986 stackOverflowHandler();
988 Linker linker(masm_);
989 JitCode* code = linker.newCode(cx_, js::jit::CodeKind::RegExp);
990 if (!code) {
991 return DummyCode();
994 for (LabelPatch& lp : labelPatches_) {
995 Assembler::PatchDataWithValueCheck(CodeLocationLabel(code, lp.patchOffset_),
996 ImmPtr(code->raw() + lp.labelOffset_),
997 ImmPtr(nullptr));
1000 CollectPerfSpewerJitCodeProfile(code, "RegExp");
1002 #ifdef MOZ_VTUNE
1003 js::vtune::MarkStub(code, "RegExp");
1004 #endif
1006 return Handle<HeapObject>(JS::PrivateGCThingValue(code), isolate());
1010 * The stack will have the following structure:
1011 * sp-> - FrameData
1012 * - inputStart
1013 * - backtrack stack base
1014 * - matches
1015 * - numMatches
1016 * - Registers
1017 * - Capture positions
1018 * - Scratch registers
1019 * --- frame alignment ---
1020 * - Saved register area
1021 * fp-> - Frame pointer
1022 * - Return address
1024 void SMRegExpMacroAssembler::createStackFrame() {
1025 #ifdef JS_CODEGEN_ARM64
1026 // ARM64 communicates stack address via SP, but uses a pseudo-sp (PSP) for
1027 // addressing. The register we use for PSP may however also be used by
1028 // calling code, and it is nonvolatile, so save it. Do this as a special
1029 // case first because the generic save/restore code needs the PSP to be
1030 // initialized already.
1031 MOZ_ASSERT(js::jit::PseudoStackPointer64.Is(masm_.GetStackPointer64()));
1032 masm_.Str(js::jit::PseudoStackPointer64,
1033 vixl::MemOperand(js::jit::sp, -16, vixl::PreIndex));
1035 // Initialize the PSP from the SP.
1036 masm_.initPseudoStackPtr();
1037 #endif
1039 masm_.Push(js::jit::FramePointer);
1040 masm_.moveStackPtrTo(js::jit::FramePointer);
1042 // Push non-volatile registers which might be modified by jitcode.
1043 for (GeneralRegisterForwardIterator iter(savedRegisters_); iter.more();
1044 ++iter) {
1045 masm_.Push(*iter);
1048 // The pointer to InputOutputData is passed as the first argument.
1049 // On x86 we have to load it off the stack into temp0_.
1050 // On other platforms it is already in a register.
1051 #ifdef JS_CODEGEN_X86
1052 Address ioDataAddr(js::jit::FramePointer, 2 * sizeof(void*));
1053 masm_.loadPtr(ioDataAddr, temp0_);
1054 #else
1055 if (js::jit::IntArgReg0 != temp0_) {
1056 masm_.movePtr(js::jit::IntArgReg0, temp0_);
1058 #endif
1060 // Start a new stack frame.
1061 size_t frameBytes = sizeof(FrameData) + num_registers_ * sizeof(void*);
1062 frameSize_ = js::jit::StackDecrementForCall(js::jit::ABIStackAlignment,
1063 masm_.framePushed(), frameBytes);
1064 masm_.reserveStack(frameSize_);
1065 masm_.checkStackAlignment();
1067 // Check if we have space on the stack. Use the *NoInterrupt stack limit to
1068 // avoid failing repeatedly when the regex code is called from Ion JIT code.
1069 // (See bug 1208819)
1070 js::jit::Label stack_ok;
1071 AbsoluteAddress limit_addr(cx_->addressOfJitStackLimitNoInterrupt());
1072 masm_.branchStackPtrRhs(Assembler::Below, limit_addr, &stack_ok);
1074 // There is not enough space on the stack. Exit with an exception.
1075 masm_.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Error)), temp0_);
1076 masm_.jump(&exit_label_);
1078 masm_.bind(&stack_ok);
1081 void SMRegExpMacroAssembler::initFrameAndRegs() {
1082 // At this point, an uninitialized stack frame has been created,
1083 // and the address of the InputOutputData is in temp0_.
1084 Register ioDataReg = temp0_;
1086 Register matchesReg = temp1_;
1087 masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, matches)),
1088 matchesReg);
1090 // Initialize output registers
1091 // Use |backtrack_stack_pointer_| as an additional temp register. This is safe
1092 // because we haven't yet written any data to |backtrack_stack_pointer_|.
1093 Register extraTemp = backtrack_stack_pointer_;
1095 masm_.loadPtr(Address(matchesReg, MatchPairs::offsetOfPairs()), extraTemp);
1096 masm_.storePtr(extraTemp, matches());
1097 masm_.load32(Address(matchesReg, MatchPairs::offsetOfPairCount()), extraTemp);
1098 masm_.store32(extraTemp, numMatches());
1100 #ifdef DEBUG
1101 // Bounds-check numMatches.
1102 js::jit::Label enoughRegisters;
1103 masm_.branchPtr(Assembler::GreaterThanOrEqual, extraTemp,
1104 ImmWord(num_capture_registers_ / 2), &enoughRegisters);
1105 masm_.assumeUnreachable("Not enough output pairs for RegExp");
1106 masm_.bind(&enoughRegisters);
1107 #endif
1109 // Load input start pointer.
1110 masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, inputStart)),
1111 current_position_);
1113 // Load input end pointer
1114 masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, inputEnd)),
1115 input_end_pointer_);
1117 // Set up input position to be negative offset from string end.
1118 masm_.subPtr(input_end_pointer_, current_position_);
1120 // Store inputStart
1121 masm_.storePtr(current_position_, inputStart());
1123 // Load start index
1124 Register startIndexReg = temp1_;
1125 masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, startIndex)),
1126 startIndexReg);
1127 masm_.computeEffectiveAddress(
1128 BaseIndex(current_position_, startIndexReg, factor()), current_position_);
1130 // Initialize current_character_.
1131 // Load newline if index is at start, or previous character otherwise.
1132 js::jit::Label start_regexp;
1133 js::jit::Label load_previous_character;
1134 masm_.branchPtr(Assembler::NotEqual, startIndexReg, ImmWord(0),
1135 &load_previous_character);
1136 masm_.movePtr(ImmWord('\n'), current_character_);
1137 masm_.jump(&start_regexp);
1139 masm_.bind(&load_previous_character);
1140 LoadCurrentCharacterUnchecked(-1, 1);
1141 masm_.bind(&start_regexp);
1143 // Initialize captured registers with inputStart - 1
1144 MOZ_ASSERT(num_capture_registers_ > 0);
1145 Register inputStartMinusOneReg = temp0_;
1146 masm_.loadPtr(inputStart(), inputStartMinusOneReg);
1147 masm_.subPtr(Imm32(char_size()), inputStartMinusOneReg);
1148 if (num_capture_registers_ > 8) {
1149 masm_.movePtr(ImmWord(register_offset(0)), temp1_);
1150 js::jit::Label init_loop;
1151 masm_.bind(&init_loop);
1152 masm_.storePtr(inputStartMinusOneReg, BaseIndex(masm_.getStackPointer(),
1153 temp1_, js::jit::TimesOne));
1154 masm_.addPtr(ImmWord(sizeof(void*)), temp1_);
1155 masm_.branchPtr(Assembler::LessThanOrEqual, temp1_,
1156 ImmWord(register_offset(num_capture_registers_ - 1)),
1157 &init_loop);
1158 } else {
1159 // Unroll the loop
1160 for (int i = 0; i < num_capture_registers_; i++) {
1161 masm_.storePtr(inputStartMinusOneReg, register_location(i));
1165 // Initialize backtrack stack pointer
1166 masm_.loadPtr(AbsoluteAddress(ExternalReference::TopOfRegexpStack(isolate())),
1167 backtrack_stack_pointer_);
1168 masm_.storePtr(backtrack_stack_pointer_, backtrackStackBase());
1171 // Called when we find a match. May not be generated if we can
1172 // determine ahead of time that a regexp cannot match: for example,
1173 // when compiling /\u1e9e/ for latin-1 inputs.
1174 void SMRegExpMacroAssembler::successHandler() {
1175 if (!success_label_.used()) {
1176 return;
1178 masm_.bind(&success_label_);
1180 // Copy captures to the MatchPairs pointed to by the InputOutputData.
1181 // Captures are stored as positions, which are negative byte offsets
1182 // from the end of the string. We must convert them to actual
1183 // indices.
1185 // Index: [ 0 ][ 1 ][ 2 ][ 3 ][ 4 ][ 5 ][END]
1186 // Pos (1-byte): [-6 ][-5 ][-4 ][-3 ][-2 ][-1 ][ 0 ] // IS = -6
1187 // Pos (2-byte): [-12][-10][-8 ][-6 ][-4 ][-2 ][ 0 ] // IS = -12
1189 // To convert a position to an index, we subtract InputStart, and
1190 // divide the result by char_size.
1191 Register matchesReg = temp1_;
1192 masm_.loadPtr(matches(), matchesReg);
1194 // Use |backtrack_stack_pointer_| as an additional temp register. This is safe
1195 // because we don't read from |backtrack_stack_pointer_| after this point.
1196 Register extraTemp = backtrack_stack_pointer_;
1198 Register inputStartReg = extraTemp;
1199 masm_.loadPtr(inputStart(), inputStartReg);
1201 for (int i = 0; i < num_capture_registers_; i++) {
1202 masm_.loadPtr(register_location(i), temp0_);
1203 masm_.subPtr(inputStartReg, temp0_);
1204 if (mode_ == UC16) {
1205 masm_.rshiftPtrArithmetic(Imm32(1), temp0_);
1207 masm_.store32(temp0_, Address(matchesReg, i * sizeof(int32_t)));
1210 masm_.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Success)), temp0_);
1211 // This falls through to the exit handler.
1214 void SMRegExpMacroAssembler::exitHandler() {
1215 masm_.bind(&exit_label_);
1217 if (temp0_ != js::jit::ReturnReg) {
1218 masm_.movePtr(temp0_, js::jit::ReturnReg);
1221 masm_.freeStack(frameSize_);
1223 // Restore registers which were saved on entry
1224 for (GeneralRegisterBackwardIterator iter(savedRegisters_); iter.more();
1225 ++iter) {
1226 masm_.Pop(*iter);
1229 masm_.Pop(js::jit::FramePointer);
1231 #ifdef JS_CODEGEN_ARM64
1232 // Now restore the value that was in the PSP register on entry, and return.
1234 // Obtain the correct SP from the PSP.
1235 masm_.Mov(js::jit::sp, js::jit::PseudoStackPointer64);
1237 // Restore the saved value of the PSP register, this value is whatever the
1238 // caller had saved in it, not any actual SP value, and it must not be
1239 // overwritten subsequently.
1240 masm_.Ldr(js::jit::PseudoStackPointer64,
1241 vixl::MemOperand(js::jit::sp, 16, vixl::PostIndex));
1243 // Perform a plain Ret(), as abiret() will move SP <- PSP and that is wrong.
1244 masm_.Ret(vixl::lr);
1245 #else
1246 masm_.abiret();
1247 #endif
1249 if (exit_with_exception_label_.used()) {
1250 masm_.bind(&exit_with_exception_label_);
1252 // Exit with an error result to signal thrown exception
1253 masm_.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Error)), temp0_);
1254 masm_.jump(&exit_label_);
1258 void SMRegExpMacroAssembler::backtrackHandler() {
1259 if (!backtrack_label_.used()) {
1260 return;
1262 masm_.bind(&backtrack_label_);
1263 Backtrack();
1266 void SMRegExpMacroAssembler::stackOverflowHandler() {
1267 if (!stack_overflow_label_.used()) {
1268 return;
1271 js::jit::AutoCreatedBy acb(masm_,
1272 "SMRegExpMacroAssembler::stackOverflowHandler");
1274 // Called if the backtrack-stack limit has been hit.
1275 masm_.bind(&stack_overflow_label_);
1277 // Load argument
1278 masm_.movePtr(ImmPtr(isolate()->regexp_stack()), temp1_);
1280 // Save registers before calling C function
1281 LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile());
1283 #ifdef JS_USE_LINK_REGISTER
1284 masm_.pushReturnAddress();
1285 #endif
1287 // Adjust for the return address on the stack.
1288 size_t frameOffset = sizeof(void*);
1290 volatileRegs.takeUnchecked(temp0_);
1291 volatileRegs.takeUnchecked(temp1_);
1292 masm_.PushRegsInMask(volatileRegs);
1294 using Fn = bool (*)(RegExpStack* regexp_stack);
1295 masm_.setupUnalignedABICall(temp0_);
1296 masm_.passABIArg(temp1_);
1297 masm_.callWithABI<Fn, ::js::irregexp::GrowBacktrackStack>();
1298 masm_.storeCallBoolResult(temp0_);
1300 masm_.PopRegsInMask(volatileRegs);
1302 // If GrowBacktrackStack returned false, we have failed to grow the
1303 // stack, and must exit with a stack-overflow exception. Do this in
1304 // the caller so that the stack is adjusted by our return instruction.
1305 js::jit::Label overflow_return;
1306 masm_.branchTest32(Assembler::Zero, temp0_, temp0_, &overflow_return);
1308 // Otherwise, store the new backtrack stack base and recompute the new
1309 // top of the stack.
1310 Address bsbAddress(masm_.getStackPointer(),
1311 offsetof(FrameData, backtrackStackBase) + frameOffset);
1312 masm_.subPtr(bsbAddress, backtrack_stack_pointer_);
1314 masm_.loadPtr(AbsoluteAddress(ExternalReference::TopOfRegexpStack(isolate())),
1315 temp1_);
1316 masm_.storePtr(temp1_, bsbAddress);
1317 masm_.addPtr(temp1_, backtrack_stack_pointer_);
1319 // Resume execution in calling code.
1320 masm_.bind(&overflow_return);
1321 masm_.ret();
1324 // This is only used by tracing code.
1325 // The return value doesn't matter.
1326 RegExpMacroAssembler::IrregexpImplementation
1327 SMRegExpMacroAssembler::Implementation() {
1328 return kBytecodeImplementation;
1331 // Compare two strings in `/i` mode (ignoreCase, but not unicode).
1332 /*static */
1333 uint32_t SMRegExpMacroAssembler::CaseInsensitiveCompareNonUnicode(
1334 const char16_t* substring1, const char16_t* substring2, size_t byteLength) {
1335 js::AutoUnsafeCallWithABI unsafe;
1337 MOZ_ASSERT(byteLength % sizeof(char16_t) == 0);
1338 size_t length = byteLength / sizeof(char16_t);
1340 for (size_t i = 0; i < length; i++) {
1341 char16_t c1 = substring1[i];
1342 char16_t c2 = substring2[i];
1343 if (c1 != c2) {
1344 #ifdef JS_HAS_INTL_API
1345 // Non-unicode regexps have weird case-folding rules.
1346 c1 = RegExpCaseFolding::Canonicalize(c1);
1347 c2 = RegExpCaseFolding::Canonicalize(c2);
1348 #else
1349 // If we aren't building with ICU, fall back to `/iu` mode. The only
1350 // differences are in corner cases.
1351 c1 = js::unicode::FoldCase(c1);
1352 c2 = js::unicode::FoldCase(c2);
1353 #endif
1354 if (c1 != c2) {
1355 return 0;
1360 return 1;
1363 // Compare two strings in `/iu` mode (ignoreCase and unicode).
1364 /*static */
1365 uint32_t SMRegExpMacroAssembler::CaseInsensitiveCompareUnicode(
1366 const char16_t* substring1, const char16_t* substring2, size_t byteLength) {
1367 js::AutoUnsafeCallWithABI unsafe;
1369 MOZ_ASSERT(byteLength % sizeof(char16_t) == 0);
1370 size_t length = byteLength / sizeof(char16_t);
1372 for (size_t i = 0; i < length; i++) {
1373 char16_t c1 = substring1[i];
1374 char16_t c2 = substring2[i];
1375 if (c1 != c2) {
1376 // Unicode regexps use the common and simple case-folding
1377 // mappings of the Unicode Character Database.
1378 c1 = js::unicode::FoldCase(c1);
1379 c2 = js::unicode::FoldCase(c2);
1380 if (c1 != c2) {
1381 return 0;
1386 return 1;
1389 /* static */
1390 bool SMRegExpMacroAssembler::GrowBacktrackStack(RegExpStack* regexp_stack) {
1391 js::AutoUnsafeCallWithABI unsafe;
1392 size_t size = regexp_stack->memory_size();
1393 return !!regexp_stack->EnsureCapacity(size * 2);
1396 bool SMRegExpMacroAssembler::CanReadUnaligned() const {
1397 #if defined(JS_CODEGEN_ARM)
1398 return !js::jit::ARMFlags::HasAlignmentFault();
1399 #elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
1400 return false;
1401 #else
1402 return true;
1403 #endif
1406 } // namespace internal
1407 } // namespace v8