no bug - Bumping Firefox l10n changesets r=release a=l10n-bump DONTBUILD CLOSED TREE
[gecko.git] / js / src / wasm / WasmBCFrame.h
blob0713761250c1519616bedafa20c92a1c3ca21c59
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
4 * Copyright 2016 Mozilla Foundation
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 // This is an INTERNAL header for Wasm baseline compiler: CPU stack frame,
20 // stack maps, and associated logic.
22 #ifndef wasm_wasm_baseline_frame_h
23 #define wasm_wasm_baseline_frame_h
25 #include "wasm/WasmBaselineCompile.h" // For BaseLocalIter
26 #include "wasm/WasmBCDefs.h"
27 #include "wasm/WasmBCRegDefs.h"
28 #include "wasm/WasmBCStk.h"
29 #include "wasm/WasmConstants.h" // For MaxFrameSize
31 // [SMDOC] Wasm baseline compiler's stack frame.
33 // For background, see "Wasm's ABIs" in WasmFrame.h, the following should never
34 // be in conflict with that.
36 // The stack frame has four parts ("below" means at lower addresses):
38 // - the Frame element;
39 // - the Local area, including the DebugFrame element and possibly a spilled
40 // pointer to stack results, if any; allocated below the header with various
41 // forms of alignment;
42 // - the Dynamic area, comprising the temporary storage the compiler uses for
43 // register spilling, allocated below the Local area;
44 // - the Arguments area, comprising memory allocated for outgoing calls,
45 // allocated below the Dynamic area.
47 // +==============================+
48 // | Incoming stack arg |
49 // | ... |
50 // ------------- +==============================+
51 // | Frame (fixed size) |
52 // ------------- +==============================+ <-------------------- FP
53 // ^ | DebugFrame (optional) | ^ ^ ^^
54 // localSize | Register arg local | | | ||
55 // | | ... | | | framePushed
56 // | | Register stack result ptr?| | | ||
57 // | | Non-arg local | | | ||
58 // | | ... | | | ||
59 // | | (padding) | | | ||
60 // | | Instance pointer | | | ||
61 // | +------------------------------+ | | ||
62 // v | (padding) | | v ||
63 // ------------- +==============================+ currentStackHeight ||
64 // ^ | Dynamic (variable size) | | ||
65 // dynamicSize | ... | | ||
66 // v | ... | v ||
67 // ------------- | (free space, sometimes) | --------- v|
68 // +==============================+ <----- SP not-during calls
69 // | Arguments (sometimes) | |
70 // | ... | v
71 // +==============================+ <----- SP during calls
73 // The Frame is addressed off the stack pointer. masm.framePushed() is always
74 // correct, and masm.getStackPointer() + masm.framePushed() always addresses the
75 // Frame, with the DebugFrame optionally below it.
77 // The Local area (including the DebugFrame and, if needed, the spilled value of
78 // the stack results area pointer) is laid out by BaseLocalIter and is allocated
79 // and deallocated by standard prologue and epilogue functions that manipulate
80 // the stack pointer, but it is accessed via BaseStackFrame.
82 // The Dynamic area is maintained by and accessed via BaseStackFrame. On some
83 // systems (such as ARM64), the Dynamic memory may be allocated in chunks
84 // because the SP needs a specific alignment, and in this case there will
85 // normally be some free space directly above the SP. The stack height does not
86 // include the free space, it reflects the logically used space only.
88 // The Dynamic area is where space for stack results is allocated when calling
89 // functions that return results on the stack. If a function has stack results,
90 // a pointer to the low address of the stack result area is passed as an
91 // additional argument, according to the usual ABI. See
92 // ABIResultIter::HasStackResults.
94 // The Arguments area is allocated and deallocated via BaseStackFrame (see
95 // comments later) but is accessed directly off the stack pointer.
97 namespace js {
98 namespace wasm {
100 using namespace js::jit;
102 // Abstraction of the height of the stack frame, to avoid type confusion.
104 class StackHeight {
105 friend class BaseStackFrameAllocator;
107 uint32_t height;
109 public:
110 explicit StackHeight(uint32_t h) : height(h) {}
111 static StackHeight Invalid() { return StackHeight(UINT32_MAX); }
112 bool isValid() const { return height != UINT32_MAX; }
113 bool operator==(StackHeight rhs) const {
114 MOZ_ASSERT(isValid() && rhs.isValid());
115 return height == rhs.height;
117 bool operator!=(StackHeight rhs) const { return !(*this == rhs); }
120 // Abstraction for where multi-value results go on the machine stack.
122 class StackResultsLoc {
123 uint32_t bytes_;
124 size_t count_;
125 Maybe<uint32_t> height_;
127 public:
128 StackResultsLoc() : bytes_(0), count_(0){};
129 StackResultsLoc(uint32_t bytes, size_t count, uint32_t height)
130 : bytes_(bytes), count_(count), height_(Some(height)) {
131 MOZ_ASSERT(bytes != 0);
132 MOZ_ASSERT(count != 0);
133 MOZ_ASSERT(height != 0);
136 uint32_t bytes() const { return bytes_; }
137 uint32_t count() const { return count_; }
138 uint32_t height() const { return height_.value(); }
140 bool hasStackResults() const { return bytes() != 0; }
141 StackResults stackResults() const {
142 return hasStackResults() ? StackResults::HasStackResults
143 : StackResults::NoStackResults;
147 // Abstraction of the baseline compiler's stack frame (except for the Frame /
148 // DebugFrame parts). See comments above for more. Remember, "below" on the
149 // stack means at lower addresses.
151 // The abstraction is split into two parts: BaseStackFrameAllocator is
152 // responsible for allocating and deallocating space on the stack and for
153 // performing computations that are affected by how the allocation is performed;
154 // BaseStackFrame then provides a pleasant interface for stack frame management.
156 class BaseStackFrameAllocator {
157 MacroAssembler& masm;
159 #ifdef RABALDR_CHUNKY_STACK
160 // On platforms that require the stack pointer to be aligned on a boundary
161 // greater than the typical stack item (eg, ARM64 requires 16-byte alignment
162 // but items are 8 bytes), allocate stack memory in chunks, and use a
163 // separate stack height variable to track the effective stack pointer
164 // within the allocated area. Effectively, there's a variable amount of
165 // free space directly above the stack pointer. See diagram above.
167 // The following must be true in order for the stack height to be
168 // predictable at control flow joins:
170 // - The Local area is always aligned according to WasmStackAlignment, ie,
171 // masm.framePushed() % WasmStackAlignment is zero after allocating
172 // locals.
174 // - ChunkSize is always a multiple of WasmStackAlignment.
176 // - Pushing and popping are always in units of ChunkSize (hence preserving
177 // alignment).
179 // - The free space on the stack (masm.framePushed() - currentStackHeight_)
180 // is a predictable (nonnegative) amount.
182 // As an optimization, we pre-allocate some space on the stack, the size of
183 // this allocation is InitialChunk and it must be a multiple of ChunkSize.
184 // It is allocated as part of the function prologue and deallocated as part
185 // of the epilogue, along with the locals.
187 // If ChunkSize is too large then we risk overflowing the stack on simple
188 // recursions with few live values where stack overflow should not be a
189 // risk; if it is too small we spend too much time adjusting the stack
190 // pointer.
192 // Good values for ChunkSize are the subject of future empirical analysis;
193 // eight words is just an educated guess.
195 static constexpr uint32_t ChunkSize = 8 * sizeof(void*);
196 static constexpr uint32_t InitialChunk = ChunkSize;
198 // The current logical height of the frame is
199 // currentStackHeight_ = localSize_ + dynamicSize
200 // where dynamicSize is not accounted for explicitly and localSize_ also
201 // includes size for the DebugFrame.
203 // The allocated size of the frame, provided by masm.framePushed(), is usually
204 // larger than currentStackHeight_, notably at the beginning of execution when
205 // we've allocated InitialChunk extra space.
207 uint32_t currentStackHeight_;
208 #endif
210 // Size of the Local area in bytes (stable after BaseCompiler::init() has
211 // called BaseStackFrame::setupLocals(), which in turn calls
212 // BaseStackFrameAllocator::setLocalSize()), always rounded to the proper
213 // stack alignment. The Local area is then allocated in beginFunction(),
214 // following the allocation of the Header. See onFixedStackAllocated()
215 // below.
217 uint32_t localSize_;
219 protected:
220 ///////////////////////////////////////////////////////////////////////////
222 // Initialization
224 explicit BaseStackFrameAllocator(MacroAssembler& masm)
225 : masm(masm),
226 #ifdef RABALDR_CHUNKY_STACK
227 currentStackHeight_(0),
228 #endif
229 localSize_(UINT32_MAX) {
232 protected:
233 //////////////////////////////////////////////////////////////////////
235 // The Local area - the static part of the frame.
237 // Record the size of the Local area, once it is known.
239 void setLocalSize(uint32_t localSize) {
240 MOZ_ASSERT(localSize == AlignBytes(localSize, sizeof(void*)),
241 "localSize_ should be aligned to at least a pointer");
242 MOZ_ASSERT(localSize_ == UINT32_MAX);
243 localSize_ = localSize;
246 // Record the current stack height, after it has become stable in
247 // beginFunction(). See also BaseStackFrame::onFixedStackAllocated().
249 void onFixedStackAllocated() {
250 MOZ_ASSERT(localSize_ != UINT32_MAX);
251 #ifdef RABALDR_CHUNKY_STACK
252 currentStackHeight_ = localSize_;
253 #endif
256 public:
257 // The fixed amount of memory, in bytes, allocated on the stack below the
258 // Header for purposes such as locals and other fixed values. Includes all
259 // necessary alignment, and on ARM64 also the initial chunk for the working
260 // stack memory.
262 uint32_t fixedAllocSize() const {
263 MOZ_ASSERT(localSize_ != UINT32_MAX);
264 #ifdef RABALDR_CHUNKY_STACK
265 return localSize_ + InitialChunk;
266 #else
267 return localSize_;
268 #endif
271 #ifdef RABALDR_CHUNKY_STACK
272 // The allocated frame size is frequently larger than the logical stack
273 // height; we round up to a chunk boundary, and special case the initial
274 // chunk.
275 uint32_t framePushedForHeight(uint32_t logicalHeight) {
276 if (logicalHeight <= fixedAllocSize()) {
277 return fixedAllocSize();
279 return fixedAllocSize() +
280 AlignBytes(logicalHeight - fixedAllocSize(), ChunkSize);
282 #endif
284 protected:
285 //////////////////////////////////////////////////////////////////////
287 // The Dynamic area - the dynamic part of the frame, for spilling and saving
288 // intermediate values.
290 // Offset off of sp_ for the slot at stack area location `offset`.
292 int32_t stackOffset(int32_t offset) {
293 MOZ_ASSERT(offset > 0);
294 return masm.framePushed() - offset;
297 uint32_t computeHeightWithStackResults(StackHeight stackBase,
298 uint32_t stackResultBytes) {
299 MOZ_ASSERT(stackResultBytes);
300 MOZ_ASSERT(currentStackHeight() >= stackBase.height);
301 return stackBase.height + stackResultBytes;
304 #ifdef RABALDR_CHUNKY_STACK
305 void pushChunkyBytes(uint32_t bytes) {
306 checkChunkyInvariants();
307 uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
308 if (freeSpace < bytes) {
309 uint32_t bytesToReserve = AlignBytes(bytes - freeSpace, ChunkSize);
310 MOZ_ASSERT(bytesToReserve + freeSpace >= bytes);
311 masm.reserveStack(bytesToReserve);
313 currentStackHeight_ += bytes;
314 checkChunkyInvariants();
317 void popChunkyBytes(uint32_t bytes) {
318 checkChunkyInvariants();
319 currentStackHeight_ -= bytes;
320 // Sometimes, popChunkyBytes() is used to pop a larger area, as when we drop
321 // values consumed by a call, and we may need to drop several chunks. But
322 // never drop the initial chunk. Crucially, the amount we drop is always an
323 // integral number of chunks.
324 uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
325 if (freeSpace >= ChunkSize) {
326 uint32_t targetAllocSize = framePushedForHeight(currentStackHeight_);
327 uint32_t amountToFree = masm.framePushed() - targetAllocSize;
328 MOZ_ASSERT(amountToFree % ChunkSize == 0);
329 if (amountToFree) {
330 masm.freeStack(amountToFree);
333 checkChunkyInvariants();
335 #endif
337 uint32_t currentStackHeight() const {
338 #ifdef RABALDR_CHUNKY_STACK
339 return currentStackHeight_;
340 #else
341 return masm.framePushed();
342 #endif
345 private:
346 #ifdef RABALDR_CHUNKY_STACK
347 void checkChunkyInvariants() {
348 MOZ_ASSERT(masm.framePushed() >= fixedAllocSize());
349 MOZ_ASSERT(masm.framePushed() >= currentStackHeight_);
350 MOZ_ASSERT(masm.framePushed() == fixedAllocSize() ||
351 masm.framePushed() - currentStackHeight_ < ChunkSize);
352 MOZ_ASSERT((masm.framePushed() - localSize_) % ChunkSize == 0);
354 #endif
356 // For a given stack height, return the appropriate size of the allocated
357 // frame.
359 uint32_t framePushedForHeight(StackHeight stackHeight) {
360 #ifdef RABALDR_CHUNKY_STACK
361 // A more complicated adjustment is needed.
362 return framePushedForHeight(stackHeight.height);
363 #else
364 // The allocated frame size equals the stack height.
365 return stackHeight.height;
366 #endif
369 public:
370 // The current height of the stack area, not necessarily zero-based, in a
371 // type-safe way.
373 StackHeight stackHeight() const { return StackHeight(currentStackHeight()); }
375 // Set the frame height to a previously recorded value.
377 void setStackHeight(StackHeight amount) {
378 #ifdef RABALDR_CHUNKY_STACK
379 currentStackHeight_ = amount.height;
380 masm.setFramePushed(framePushedForHeight(amount));
381 checkChunkyInvariants();
382 #else
383 masm.setFramePushed(amount.height);
384 #endif
387 // The current height of the dynamic part of the stack area (ie, the backing
388 // store for the evaluation stack), zero-based.
390 uint32_t dynamicHeight() const { return currentStackHeight() - localSize_; }
392 // Before branching to an outer control label, pop the execution stack to
393 // the level expected by that region, but do not update masm.framePushed()
394 // as that will happen as compilation leaves the block.
396 // Note these operate directly on the stack pointer register.
398 void popStackBeforeBranch(StackHeight destStackHeight,
399 uint32_t stackResultBytes) {
400 uint32_t framePushedHere = masm.framePushed();
401 StackHeight heightThere =
402 StackHeight(destStackHeight.height + stackResultBytes);
403 uint32_t framePushedThere = framePushedForHeight(heightThere);
404 if (framePushedHere > framePushedThere) {
405 masm.addToStackPtr(Imm32(framePushedHere - framePushedThere));
409 void popStackBeforeBranch(StackHeight destStackHeight, ResultType type) {
410 popStackBeforeBranch(destStackHeight,
411 ABIResultIter::MeasureStackBytes(type));
414 // Given that there are |stackParamSize| bytes on the dynamic stack
415 // corresponding to the stack results, return the stack height once these
416 // parameters are popped.
418 StackHeight stackResultsBase(uint32_t stackParamSize) {
419 return StackHeight(currentStackHeight() - stackParamSize);
422 // For most of WebAssembly, adjacent instructions have fallthrough control
423 // flow between them, which allows us to simply thread the current stack
424 // height through the compiler. There are two exceptions to this rule: when
425 // leaving a block via dead code, and when entering the "else" arm of an "if".
426 // In these cases, the stack height is the block entry height, plus any stack
427 // values (results in the block exit case, parameters in the else entry case).
429 void resetStackHeight(StackHeight destStackHeight, ResultType type) {
430 uint32_t height = destStackHeight.height;
431 height += ABIResultIter::MeasureStackBytes(type);
432 setStackHeight(StackHeight(height));
435 // Return offset of stack result.
437 uint32_t locateStackResult(const ABIResult& result, StackHeight stackBase,
438 uint32_t stackResultBytes) {
439 MOZ_ASSERT(result.onStack());
440 MOZ_ASSERT(result.stackOffset() + result.size() <= stackResultBytes);
441 uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
442 return end - result.stackOffset();
445 public:
446 //////////////////////////////////////////////////////////////////////
448 // The Argument area - for outgoing calls.
450 // We abstract these operations as an optimization: we can merge the freeing
451 // of the argument area and dropping values off the stack after a call. But
452 // they always amount to manipulating the real stack pointer by some amount.
454 // Note that we do not update currentStackHeight_ for this; the frame does
455 // not know about outgoing arguments. But we do update framePushed(), so we
456 // can still index into the frame below the outgoing arguments area.
458 // This is always equivalent to a masm.reserveStack() call.
460 void allocArgArea(size_t argSize) {
461 if (argSize) {
462 masm.reserveStack(argSize);
466 // This frees the argument area allocated by allocArgArea(), and `argSize`
467 // must be equal to the `argSize` argument to allocArgArea(). In addition
468 // we drop some values from the frame, corresponding to the values that were
469 // consumed by the call.
471 void freeArgAreaAndPopBytes(size_t argSize, size_t dropSize) {
472 // The method is called to re-initialize SP after the call. Note that
473 // this operation shall not be optimized for argSize + dropSize == 0.
474 #ifdef RABALDR_CHUNKY_STACK
475 // Freeing the outgoing arguments and freeing the consumed values have
476 // different semantics here, which is why the operation is split.
477 masm.freeStackTo(masm.framePushed() - argSize);
478 popChunkyBytes(dropSize);
479 #else
480 masm.freeStackTo(masm.framePushed() - (argSize + dropSize));
481 #endif
485 class BaseStackFrame final : public BaseStackFrameAllocator {
486 MacroAssembler& masm;
488 // The largest observed value of masm.framePushed(), ie, the size of the
489 // stack frame. Read this for its true value only when code generation is
490 // finished.
491 uint32_t maxFramePushed_;
493 // Patch point where we check for stack overflow.
494 CodeOffset stackAddOffset_;
496 // Low byte offset of pointer to stack results, if any.
497 Maybe<int32_t> stackResultsPtrOffset_;
499 // The offset of instance pointer.
500 uint32_t instancePointerOffset_;
502 // Low byte offset of local area for true locals (not parameters).
503 uint32_t varLow_;
505 // High byte offset + 1 of local area for true locals.
506 uint32_t varHigh_;
508 // The stack pointer, cached for brevity.
509 RegisterOrSP sp_;
511 public:
512 explicit BaseStackFrame(MacroAssembler& masm)
513 : BaseStackFrameAllocator(masm),
514 masm(masm),
515 maxFramePushed_(0),
516 stackAddOffset_(0),
517 instancePointerOffset_(UINT32_MAX),
518 varLow_(UINT32_MAX),
519 varHigh_(UINT32_MAX),
520 sp_(masm.getStackPointer()) {}
522 ///////////////////////////////////////////////////////////////////////////
524 // Stack management and overflow checking
526 // This must be called once beginFunction has allocated space for the Header
527 // (the Frame and DebugFrame) and the Local area, and will record the current
528 // frame size for internal use by the stack abstractions.
530 void onFixedStackAllocated() {
531 maxFramePushed_ = masm.framePushed();
532 BaseStackFrameAllocator::onFixedStackAllocated();
535 // We won't know until after we've generated code how big the frame will be
536 // (we may need arbitrary spill slots and outgoing param slots) so emit a
537 // patchable add that is patched in endFunction().
539 // Note the platform scratch register may be used by branchPtr(), so
540 // generally tmp must be something else.
542 void checkStack(Register tmp, BytecodeOffset trapOffset) {
543 stackAddOffset_ = masm.sub32FromStackPtrWithPatch(tmp);
544 Label ok;
545 masm.branchPtr(Assembler::Below,
546 Address(InstanceReg, wasm::Instance::offsetOfStackLimit()),
547 tmp, &ok);
548 masm.wasmTrap(Trap::StackOverflow, trapOffset);
549 masm.bind(&ok);
552 void patchCheckStack() {
553 masm.patchSub32FromStackPtr(stackAddOffset_,
554 Imm32(int32_t(maxFramePushed_)));
557 // Very large frames are implausible, probably an attack.
559 bool checkStackHeight() { return maxFramePushed_ <= MaxFrameSize; }
561 ///////////////////////////////////////////////////////////////////////////
563 // Local area
565 struct Local {
566 // Type of the value.
567 const MIRType type;
569 // Byte offset from Frame "into" the locals, ie positive for true locals
570 // and negative for incoming args that read directly from the arg area.
571 // It assumes the stack is growing down and that locals are on the stack
572 // at lower addresses than Frame, and is the offset from Frame of the
573 // lowest-addressed byte of the local.
574 const int32_t offs;
576 Local(MIRType type, int32_t offs) : type(type), offs(offs) {}
578 bool isStackArgument() const { return offs < 0; }
581 // Profiling shows that the number of parameters and locals frequently
582 // touches or exceeds 8. So 16 seems like a reasonable starting point.
583 using LocalVector = Vector<Local, 16, SystemAllocPolicy>;
585 // Initialize `localInfo` based on the types of `locals` and `args`.
586 [[nodiscard]] bool setupLocals(const ValTypeVector& locals,
587 const ArgTypeVector& args, bool debugEnabled,
588 LocalVector* localInfo) {
589 if (!localInfo->reserve(locals.length())) {
590 return false;
593 DebugOnly<uint32_t> index = 0;
594 BaseLocalIter i(locals, args, debugEnabled);
595 for (; !i.done() && i.index() < args.lengthWithoutStackResults(); i++) {
596 MOZ_ASSERT(i.isArg());
597 MOZ_ASSERT(i.index() == index);
598 localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
599 index++;
602 varLow_ = i.frameSize();
603 for (; !i.done(); i++) {
604 MOZ_ASSERT(!i.isArg());
605 MOZ_ASSERT(i.index() == index);
606 localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
607 index++;
609 varHigh_ = i.frameSize();
611 // Reserve an additional stack slot for the instance pointer.
612 const uint32_t pointerAlignedVarHigh = AlignBytes(varHigh_, sizeof(void*));
613 const uint32_t localSize = pointerAlignedVarHigh + sizeof(void*);
614 instancePointerOffset_ = localSize;
616 setLocalSize(AlignBytes(localSize, WasmStackAlignment));
618 if (args.hasSyntheticStackResultPointerArg()) {
619 stackResultsPtrOffset_ = Some(i.stackResultPointerOffset());
622 return true;
625 void zeroLocals(BaseRegAlloc* ra);
627 Address addressOfLocal(const Local& local, uint32_t additionalOffset = 0) {
628 if (local.isStackArgument()) {
629 return Address(FramePointer,
630 stackArgumentOffsetFromFp(local) + additionalOffset);
632 return Address(sp_, localOffsetFromSp(local) + additionalOffset);
635 void loadLocalI32(const Local& src, RegI32 dest) {
636 masm.load32(addressOfLocal(src), dest);
639 #ifndef JS_PUNBOX64
640 void loadLocalI64Low(const Local& src, RegI32 dest) {
641 masm.load32(addressOfLocal(src, INT64LOW_OFFSET), dest);
644 void loadLocalI64High(const Local& src, RegI32 dest) {
645 masm.load32(addressOfLocal(src, INT64HIGH_OFFSET), dest);
647 #endif
649 void loadLocalI64(const Local& src, RegI64 dest) {
650 masm.load64(addressOfLocal(src), dest);
653 void loadLocalRef(const Local& src, RegRef dest) {
654 masm.loadPtr(addressOfLocal(src), dest);
657 void loadLocalF64(const Local& src, RegF64 dest) {
658 masm.loadDouble(addressOfLocal(src), dest);
661 void loadLocalF32(const Local& src, RegF32 dest) {
662 masm.loadFloat32(addressOfLocal(src), dest);
665 #ifdef ENABLE_WASM_SIMD
666 void loadLocalV128(const Local& src, RegV128 dest) {
667 masm.loadUnalignedSimd128(addressOfLocal(src), dest);
669 #endif
671 void storeLocalI32(RegI32 src, const Local& dest) {
672 masm.store32(src, addressOfLocal(dest));
675 void storeLocalI64(RegI64 src, const Local& dest) {
676 masm.store64(src, addressOfLocal(dest));
679 void storeLocalRef(RegRef src, const Local& dest) {
680 masm.storePtr(src, addressOfLocal(dest));
683 void storeLocalF64(RegF64 src, const Local& dest) {
684 masm.storeDouble(src, addressOfLocal(dest));
687 void storeLocalF32(RegF32 src, const Local& dest) {
688 masm.storeFloat32(src, addressOfLocal(dest));
691 #ifdef ENABLE_WASM_SIMD
692 void storeLocalV128(RegV128 src, const Local& dest) {
693 masm.storeUnalignedSimd128(src, addressOfLocal(dest));
695 #endif
697 // Offset off of sp_ for `local`.
698 int32_t localOffsetFromSp(const Local& local) {
699 MOZ_ASSERT(!local.isStackArgument());
700 return localOffset(local.offs);
703 // Offset off of frame pointer for `stack argument`.
704 int32_t stackArgumentOffsetFromFp(const Local& local) {
705 MOZ_ASSERT(local.isStackArgument());
706 return -local.offs;
709 // The incoming stack result area pointer is for stack results of the function
710 // being compiled.
711 void loadIncomingStackResultAreaPtr(RegPtr reg) {
712 const int32_t offset = stackResultsPtrOffset_.value();
713 Address src = offset < 0 ? Address(FramePointer, -offset)
714 : Address(sp_, stackOffset(offset));
715 masm.loadPtr(src, reg);
718 void storeIncomingStackResultAreaPtr(RegPtr reg) {
719 // If we get here, that means the pointer to the stack results area was
720 // passed in as a register, and therefore it will be spilled below the
721 // frame, so the offset is a positive height.
722 MOZ_ASSERT(stackResultsPtrOffset_.value() > 0);
723 masm.storePtr(reg,
724 Address(sp_, stackOffset(stackResultsPtrOffset_.value())));
727 void loadInstancePtr(Register dst) {
728 // Sometimes loadInstancePtr is used in context when SP is not sync is FP,
729 // e.g. just after tail calls returns.
730 masm.loadPtr(Address(FramePointer, -instancePointerOffset_), dst);
733 void storeInstancePtr(Register instance) {
734 masm.storePtr(instance, Address(sp_, stackOffset(instancePointerOffset_)));
737 int32_t getInstancePtrOffset() { return stackOffset(instancePointerOffset_); }
739 // An outgoing stack result area pointer is for stack results of callees of
740 // the function being compiled.
741 void computeOutgoingStackResultAreaPtr(const StackResultsLoc& results,
742 RegPtr dest) {
743 MOZ_ASSERT(results.height() <= masm.framePushed());
744 uint32_t offsetFromSP = masm.framePushed() - results.height();
745 masm.moveStackPtrTo(dest);
746 if (offsetFromSP) {
747 masm.addPtr(Imm32(offsetFromSP), dest);
751 private:
752 // Offset off of sp_ for a local with offset `offset` from Frame.
753 int32_t localOffset(int32_t offset) { return masm.framePushed() - offset; }
755 public:
756 ///////////////////////////////////////////////////////////////////////////
758 // Dynamic area
760 static constexpr size_t StackSizeOfPtr = ABIResult::StackSizeOfPtr;
761 static constexpr size_t StackSizeOfInt64 = ABIResult::StackSizeOfInt64;
762 static constexpr size_t StackSizeOfFloat = ABIResult::StackSizeOfFloat;
763 static constexpr size_t StackSizeOfDouble = ABIResult::StackSizeOfDouble;
764 #ifdef ENABLE_WASM_SIMD
765 static constexpr size_t StackSizeOfV128 = ABIResult::StackSizeOfV128;
766 #endif
768 // Pushes the register `r` to the stack. This pushes the full 64-bit width on
769 // 64-bit systems, and 32-bits otherwise.
770 uint32_t pushGPR(Register r) {
771 DebugOnly<uint32_t> stackBefore = currentStackHeight();
772 #ifdef RABALDR_CHUNKY_STACK
773 pushChunkyBytes(StackSizeOfPtr);
774 masm.storePtr(r, Address(sp_, stackOffset(currentStackHeight())));
775 #else
776 masm.Push(r);
777 #endif
778 maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
779 MOZ_ASSERT(stackBefore + StackSizeOfPtr == currentStackHeight());
780 return currentStackHeight();
783 uint32_t pushFloat32(FloatRegister r) {
784 DebugOnly<uint32_t> stackBefore = currentStackHeight();
785 #ifdef RABALDR_CHUNKY_STACK
786 pushChunkyBytes(StackSizeOfFloat);
787 masm.storeFloat32(r, Address(sp_, stackOffset(currentStackHeight())));
788 #else
789 masm.Push(r);
790 #endif
791 maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
792 MOZ_ASSERT(stackBefore + StackSizeOfFloat == currentStackHeight());
793 return currentStackHeight();
796 #ifdef ENABLE_WASM_SIMD
797 uint32_t pushV128(RegV128 r) {
798 DebugOnly<uint32_t> stackBefore = currentStackHeight();
799 # ifdef RABALDR_CHUNKY_STACK
800 pushChunkyBytes(StackSizeOfV128);
801 # else
802 masm.adjustStack(-(int)StackSizeOfV128);
803 # endif
804 masm.storeUnalignedSimd128(r,
805 Address(sp_, stackOffset(currentStackHeight())));
806 maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
807 MOZ_ASSERT(stackBefore + StackSizeOfV128 == currentStackHeight());
808 return currentStackHeight();
810 #endif
812 uint32_t pushDouble(FloatRegister r) {
813 DebugOnly<uint32_t> stackBefore = currentStackHeight();
814 #ifdef RABALDR_CHUNKY_STACK
815 pushChunkyBytes(StackSizeOfDouble);
816 masm.storeDouble(r, Address(sp_, stackOffset(currentStackHeight())));
817 #else
818 masm.Push(r);
819 #endif
820 maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
821 MOZ_ASSERT(stackBefore + StackSizeOfDouble == currentStackHeight());
822 return currentStackHeight();
825 // Pops the stack into the register `r`. This pops the full 64-bit width on
826 // 64-bit systems, and 32-bits otherwise.
827 void popGPR(Register r) {
828 DebugOnly<uint32_t> stackBefore = currentStackHeight();
829 #ifdef RABALDR_CHUNKY_STACK
830 masm.loadPtr(Address(sp_, stackOffset(currentStackHeight())), r);
831 popChunkyBytes(StackSizeOfPtr);
832 #else
833 masm.Pop(r);
834 #endif
835 MOZ_ASSERT(stackBefore - StackSizeOfPtr == currentStackHeight());
838 void popFloat32(FloatRegister r) {
839 DebugOnly<uint32_t> stackBefore = currentStackHeight();
840 #ifdef RABALDR_CHUNKY_STACK
841 masm.loadFloat32(Address(sp_, stackOffset(currentStackHeight())), r);
842 popChunkyBytes(StackSizeOfFloat);
843 #else
844 masm.Pop(r);
845 #endif
846 MOZ_ASSERT(stackBefore - StackSizeOfFloat == currentStackHeight());
849 void popDouble(FloatRegister r) {
850 DebugOnly<uint32_t> stackBefore = currentStackHeight();
851 #ifdef RABALDR_CHUNKY_STACK
852 masm.loadDouble(Address(sp_, stackOffset(currentStackHeight())), r);
853 popChunkyBytes(StackSizeOfDouble);
854 #else
855 masm.Pop(r);
856 #endif
857 MOZ_ASSERT(stackBefore - StackSizeOfDouble == currentStackHeight());
860 #ifdef ENABLE_WASM_SIMD
861 void popV128(RegV128 r) {
862 DebugOnly<uint32_t> stackBefore = currentStackHeight();
863 masm.loadUnalignedSimd128(Address(sp_, stackOffset(currentStackHeight())),
865 # ifdef RABALDR_CHUNKY_STACK
866 popChunkyBytes(StackSizeOfV128);
867 # else
868 masm.adjustStack((int)StackSizeOfV128);
869 # endif
870 MOZ_ASSERT(stackBefore - StackSizeOfV128 == currentStackHeight());
872 #endif
874 void popBytes(size_t bytes) {
875 if (bytes > 0) {
876 #ifdef RABALDR_CHUNKY_STACK
877 popChunkyBytes(bytes);
878 #else
879 masm.freeStack(bytes);
880 #endif
884 void loadStackI32(int32_t offset, RegI32 dest) {
885 masm.load32(Address(sp_, stackOffset(offset)), dest);
888 void loadStackI64(int32_t offset, RegI64 dest) {
889 masm.load64(Address(sp_, stackOffset(offset)), dest);
892 #ifndef JS_PUNBOX64
893 void loadStackI64Low(int32_t offset, RegI32 dest) {
894 masm.load32(Address(sp_, stackOffset(offset - INT64LOW_OFFSET)), dest);
897 void loadStackI64High(int32_t offset, RegI32 dest) {
898 masm.load32(Address(sp_, stackOffset(offset - INT64HIGH_OFFSET)), dest);
900 #endif
902 void loadStackRef(int32_t offset, RegRef dest) {
903 masm.loadPtr(Address(sp_, stackOffset(offset)), dest);
906 void loadStackF64(int32_t offset, RegF64 dest) {
907 masm.loadDouble(Address(sp_, stackOffset(offset)), dest);
910 void loadStackF32(int32_t offset, RegF32 dest) {
911 masm.loadFloat32(Address(sp_, stackOffset(offset)), dest);
914 #ifdef ENABLE_WASM_SIMD
915 void loadStackV128(int32_t offset, RegV128 dest) {
916 masm.loadUnalignedSimd128(Address(sp_, stackOffset(offset)), dest);
918 #endif
920 uint32_t prepareStackResultArea(StackHeight stackBase,
921 uint32_t stackResultBytes) {
922 uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
923 if (currentStackHeight() < end) {
924 uint32_t bytes = end - currentStackHeight();
925 #ifdef RABALDR_CHUNKY_STACK
926 pushChunkyBytes(bytes);
927 #else
928 masm.reserveStack(bytes);
929 #endif
930 maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
932 return end;
935 void finishStackResultArea(StackHeight stackBase, uint32_t stackResultBytes) {
936 uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
937 MOZ_ASSERT(currentStackHeight() >= end);
938 popBytes(currentStackHeight() - end);
941 // |srcHeight| and |destHeight| are stack heights *including* |bytes|.
942 void shuffleStackResultsTowardFP(uint32_t srcHeight, uint32_t destHeight,
943 uint32_t bytes, Register temp) {
944 MOZ_ASSERT(destHeight < srcHeight);
945 MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
946 // The shuffleStackResultsTowardFP is used when SP/framePushed is not
947 // tracked by the compiler, e.g. after possible return call -- use
948 // FramePointer instead of sp_.
949 int32_t destOffset = int32_t(-destHeight + bytes);
950 int32_t srcOffset = int32_t(-srcHeight + bytes);
951 while (bytes >= sizeof(intptr_t)) {
952 destOffset -= sizeof(intptr_t);
953 srcOffset -= sizeof(intptr_t);
954 bytes -= sizeof(intptr_t);
955 masm.loadPtr(Address(FramePointer, srcOffset), temp);
956 masm.storePtr(temp, Address(FramePointer, destOffset));
958 if (bytes) {
959 MOZ_ASSERT(bytes == sizeof(uint32_t));
960 destOffset -= sizeof(uint32_t);
961 srcOffset -= sizeof(uint32_t);
962 masm.load32(Address(FramePointer, srcOffset), temp);
963 masm.store32(temp, Address(FramePointer, destOffset));
967 // Unlike the overload that operates on raw heights, |srcHeight| and
968 // |destHeight| are stack heights *not including* |bytes|.
969 void shuffleStackResultsTowardFP(StackHeight srcHeight,
970 StackHeight destHeight, uint32_t bytes,
971 Register temp) {
972 MOZ_ASSERT(srcHeight.isValid());
973 MOZ_ASSERT(destHeight.isValid());
974 uint32_t src = computeHeightWithStackResults(srcHeight, bytes);
975 uint32_t dest = computeHeightWithStackResults(destHeight, bytes);
976 MOZ_ASSERT(src <= currentStackHeight());
977 MOZ_ASSERT(dest <= currentStackHeight());
978 shuffleStackResultsTowardFP(src, dest, bytes, temp);
981 // |srcHeight| and |destHeight| are stack heights *including* |bytes|.
982 void shuffleStackResultsTowardSP(uint32_t srcHeight, uint32_t destHeight,
983 uint32_t bytes, Register temp) {
984 MOZ_ASSERT(destHeight > srcHeight);
985 MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
986 uint32_t destOffset = stackOffset(destHeight);
987 uint32_t srcOffset = stackOffset(srcHeight);
988 while (bytes >= sizeof(intptr_t)) {
989 masm.loadPtr(Address(sp_, srcOffset), temp);
990 masm.storePtr(temp, Address(sp_, destOffset));
991 destOffset += sizeof(intptr_t);
992 srcOffset += sizeof(intptr_t);
993 bytes -= sizeof(intptr_t);
995 if (bytes) {
996 MOZ_ASSERT(bytes == sizeof(uint32_t));
997 masm.load32(Address(sp_, srcOffset), temp);
998 masm.store32(temp, Address(sp_, destOffset));
1002 // Copy results from the top of the current stack frame to an area of memory,
1003 // and pop the stack accordingly. `dest` is the address of the low byte of
1004 // that memory.
1005 void popStackResultsToMemory(Register dest, uint32_t bytes, Register temp) {
1006 MOZ_ASSERT(bytes <= currentStackHeight());
1007 MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
1008 uint32_t bytesToPop = bytes;
1009 uint32_t srcOffset = stackOffset(currentStackHeight());
1010 uint32_t destOffset = 0;
1011 while (bytes >= sizeof(intptr_t)) {
1012 masm.loadPtr(Address(sp_, srcOffset), temp);
1013 masm.storePtr(temp, Address(dest, destOffset));
1014 destOffset += sizeof(intptr_t);
1015 srcOffset += sizeof(intptr_t);
1016 bytes -= sizeof(intptr_t);
1018 if (bytes) {
1019 MOZ_ASSERT(bytes == sizeof(uint32_t));
1020 masm.load32(Address(sp_, srcOffset), temp);
1021 masm.store32(temp, Address(dest, destOffset));
1023 popBytes(bytesToPop);
1026 void allocArgArea(size_t argSize) {
1027 if (argSize) {
1028 BaseStackFrameAllocator::allocArgArea(argSize);
1029 maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
1033 private:
1034 void store32BitsToStack(int32_t imm, uint32_t destHeight, Register temp) {
1035 masm.move32(Imm32(imm), temp);
1036 masm.store32(temp, Address(sp_, stackOffset(destHeight)));
1039 void store64BitsToStack(int64_t imm, uint32_t destHeight, Register temp) {
1040 #ifdef JS_PUNBOX64
1041 masm.move64(Imm64(imm), Register64(temp));
1042 masm.store64(Register64(temp), Address(sp_, stackOffset(destHeight)));
1043 #else
1044 union {
1045 int64_t i64;
1046 int32_t i32[2];
1047 } bits = {.i64 = imm};
1048 static_assert(sizeof(bits) == 8);
1049 store32BitsToStack(bits.i32[0], destHeight, temp);
1050 store32BitsToStack(bits.i32[1], destHeight - sizeof(int32_t), temp);
1051 #endif
1054 public:
1055 void storeImmediatePtrToStack(intptr_t imm, uint32_t destHeight,
1056 Register temp) {
1057 #ifdef JS_PUNBOX64
1058 static_assert(StackSizeOfPtr == 8);
1059 store64BitsToStack(imm, destHeight, temp);
1060 #else
1061 static_assert(StackSizeOfPtr == 4);
1062 store32BitsToStack(int32_t(imm), destHeight, temp);
1063 #endif
1066 void storeImmediateI64ToStack(int64_t imm, uint32_t destHeight,
1067 Register temp) {
1068 store64BitsToStack(imm, destHeight, temp);
1071 void storeImmediateF32ToStack(float imm, uint32_t destHeight, Register temp) {
1072 union {
1073 int32_t i32;
1074 float f32;
1075 } bits = {.f32 = imm};
1076 static_assert(sizeof(bits) == 4);
1077 // Do not store 4 bytes if StackSizeOfFloat == 8. It's probably OK to do
1078 // so, but it costs little to store something predictable.
1079 if (StackSizeOfFloat == 4) {
1080 store32BitsToStack(bits.i32, destHeight, temp);
1081 } else {
1082 store64BitsToStack(uint32_t(bits.i32), destHeight, temp);
1086 void storeImmediateF64ToStack(double imm, uint32_t destHeight,
1087 Register temp) {
1088 union {
1089 int64_t i64;
1090 double f64;
1091 } bits = {.f64 = imm};
1092 static_assert(sizeof(bits) == 8);
1093 store64BitsToStack(bits.i64, destHeight, temp);
1096 #ifdef ENABLE_WASM_SIMD
1097 void storeImmediateV128ToStack(V128 imm, uint32_t destHeight, Register temp) {
1098 union {
1099 int32_t i32[4];
1100 uint8_t bytes[16];
1101 } bits{};
1102 static_assert(sizeof(bits) == 16);
1103 memcpy(bits.bytes, imm.bytes, 16);
1104 for (unsigned i = 0; i < 4; i++) {
1105 store32BitsToStack(bits.i32[i], destHeight - i * sizeof(int32_t), temp);
1108 #endif
1111 //////////////////////////////////////////////////////////////////////////////
1113 // MachineStackTracker, used for stack-slot pointerness tracking.
1115 // An expensive operation in stack-map creation is copying of the
1116 // MachineStackTracker (MST) into the final StackMap. This is done in
1117 // StackMapGenerator::createStackMap. Given that this is basically a
1118 // bit-array copy, it is reasonable to ask whether the two classes could have
1119 // a more similar representation, so that the copy could then be done with
1120 // `memcpy`.
1122 // Although in principle feasible, the follow complications exist, and so for
1123 // the moment, this has not been done.
1125 // * StackMap is optimised for compact size (storage) since there will be
1126 // many, so it uses a true bitmap. MST is intended to be fast and simple,
1127 // and only one exists at once (per compilation thread). Doing this would
1128 // require MST to use a true bitmap, and hence ..
1130 // * .. the copying can't be a straight memcpy, since StackMap has entries for
1131 // words not covered by MST. Hence the copy would need to shift bits in
1132 // each byte left or right (statistically speaking, in 7 cases out of 8) in
1133 // order to ensure no "holes" in the resulting bitmap.
1135 // * Furthermore the copying would need to logically invert the direction of
1136 // the stacks. For MST, index zero in the vector corresponds to the highest
1137 // address in the stack. For StackMap, bit index zero corresponds to the
1138 // lowest address in the stack.
1140 // * Finally, StackMap is a variable-length structure whose size must be known
1141 // at creation time. The size of an MST by contrast isn't known at creation
1142 // time -- it grows as the baseline compiler pushes stuff on its value
1143 // stack. That's why it has to have vector entry 0 being the highest address.
1145 // * Although not directly relevant, StackMaps are also created by the via-Ion
1146 // compilation routes, by translation from the pre-existing "JS-era"
1147 // LSafePoints (CreateStackMapFromLSafepoint). So if we want to mash
1148 // StackMap around to suit baseline better, we also need to ensure it
1149 // doesn't break Ion somehow.
1151 class MachineStackTracker {
1152 // Simulates the machine's stack, with one bool per word. The booleans are
1153 // represented as `uint8_t`s so as to guarantee the element size is one
1154 // byte. Index zero in this vector corresponds to the highest address in
1155 // the machine's stack. The last entry corresponds to what SP currently
1156 // points at. This all assumes a grow-down stack.
1158 // numPtrs_ contains the number of "true" values in vec_, and is therefore
1159 // redundant. But it serves as a constant-time way to detect the common
1160 // case where vec_ holds no "true" values.
1161 size_t numPtrs_;
1162 Vector<uint8_t, 64, SystemAllocPolicy> vec_;
1164 public:
1165 MachineStackTracker() : numPtrs_(0) {}
1167 ~MachineStackTracker() {
1168 #ifdef DEBUG
1169 size_t n = 0;
1170 for (uint8_t b : vec_) {
1171 n += (b ? 1 : 0);
1173 MOZ_ASSERT(n == numPtrs_);
1174 #endif
1177 // Clone this MachineStackTracker, writing the result at |dst|.
1178 [[nodiscard]] bool cloneTo(MachineStackTracker* dst);
1180 // Notionally push |n| non-pointers on the stack.
1181 [[nodiscard]] bool pushNonGCPointers(size_t n) {
1182 return vec_.appendN(uint8_t(false), n);
1185 // Mark the stack slot |offsetFromSP| up from the bottom as holding a
1186 // pointer.
1187 void setGCPointer(size_t offsetFromSP) {
1188 // offsetFromSP == 0 denotes the most recently pushed item, == 1 the
1189 // second most recently pushed item, etc.
1190 MOZ_ASSERT(offsetFromSP < vec_.length());
1192 size_t offsetFromTop = vec_.length() - 1 - offsetFromSP;
1193 numPtrs_ = numPtrs_ + 1 - (vec_[offsetFromTop] ? 1 : 0);
1194 vec_[offsetFromTop] = uint8_t(true);
1197 // Query the pointerness of the slot |offsetFromSP| up from the bottom.
1198 bool isGCPointer(size_t offsetFromSP) const {
1199 MOZ_ASSERT(offsetFromSP < vec_.length());
1201 size_t offsetFromTop = vec_.length() - 1 - offsetFromSP;
1202 return bool(vec_[offsetFromTop]);
1205 // Return the number of words tracked by this MachineStackTracker.
1206 size_t length() const { return vec_.length(); }
1208 // Return the number of pointer-typed words tracked by this
1209 // MachineStackTracker.
1210 size_t numPtrs() const {
1211 MOZ_ASSERT(numPtrs_ <= length());
1212 return numPtrs_;
1215 // Discard all contents, but (per mozilla::Vector::clear semantics) don't
1216 // free or reallocate any dynamic storage associated with |vec_|.
1217 void clear() {
1218 vec_.clear();
1219 numPtrs_ = 0;
1222 // An iterator that produces indices of reftyped slots, starting at the
1223 // logical bottom of the (grow-down) stack. Indices have the same meaning
1224 // as the arguments to `isGCPointer`. That is, if this iterator produces a
1225 // value `i`, then it means that `isGCPointer(i) == true`; if the value `i`
1226 // is never produced then `isGCPointer(i) == false`. The values are
1227 // produced in ascending order.
1229 // Because most slots are non-reftyped, some effort has been put into
1230 // skipping over large groups of non-reftyped slots quickly.
1231 class Iter {
1232 // Both `bufU8_` and `bufU32_` are made to point to `vec_`s array of
1233 // `uint8_t`s, so we can scan (backwards) through it either in bytes or
1234 // 32-bit words. Recall that the last element in `vec_` pertains to the
1235 // lowest-addressed word in the machine's grow-down stack, and we want to
1236 // iterate logically "up" this stack, so we need to iterate backwards
1237 // through `vec_`.
1239 // This dual-pointer scheme assumes that the `vec_`s content array is at
1240 // least 32-bit aligned.
1241 const uint8_t* bufU8_;
1242 const uint32_t* bufU32_;
1243 // The number of elements in `bufU8_`.
1244 const size_t nElems_;
1245 // The index in `bufU8_` where the next search should start.
1246 size_t next_;
1248 public:
1249 explicit Iter(const MachineStackTracker& mst)
1250 : bufU8_((uint8_t*)mst.vec_.begin()),
1251 bufU32_((uint32_t*)mst.vec_.begin()),
1252 nElems_(mst.vec_.length()),
1253 next_(mst.vec_.length() - 1) {
1254 MOZ_ASSERT(uintptr_t(bufU8_) == uintptr_t(bufU32_));
1255 // Check minimum alignment constraint on the array.
1256 MOZ_ASSERT(0 == (uintptr_t(bufU8_) & 3));
1259 ~Iter() { MOZ_ASSERT(uintptr_t(bufU8_) == uintptr_t(bufU32_)); }
1261 // It is important, for termination of the search loop in `next()`, that
1262 // this has the value obtained by subtracting 1 from size_t(0).
1263 static constexpr size_t FINISHED = ~size_t(0);
1264 static_assert(FINISHED == size_t(0) - 1);
1266 // Returns the next index `i` for which `isGCPointer(i) == true`.
1267 size_t get() {
1268 while (next_ != FINISHED) {
1269 if (bufU8_[next_]) {
1270 next_--;
1271 return nElems_ - 1 - (next_ + 1);
1273 // Invariant: next_ != FINISHED (so it's still a valid index)
1274 // and: bufU8_[next_] == 0
1275 // (so we need to move backwards by at least 1)
1277 // BEGIN optimization -- this could be removed without affecting
1278 // correctness.
1279 if ((next_ & 7) == 0) {
1280 // We're at the "bottom" of the current dual-4-element word. Check
1281 // if we can jump backwards by 8. This saves a conditional branch
1282 // and a few cycles by ORing two adjacent 32-bit words together,
1283 // whilst not requiring 64-bit alignment of `bufU32_`.
1284 while (next_ >= 8 &&
1285 (bufU32_[(next_ - 4) >> 2] | bufU32_[(next_ - 8) >> 2]) == 0) {
1286 next_ -= 8;
1289 // END optimization
1290 next_--;
1292 return FINISHED;
1297 //////////////////////////////////////////////////////////////////////////////
1299 // StackMapGenerator, which carries all state needed to create stackmaps.
1301 enum class HasDebugFrameWithLiveRefs { No, Maybe };
1303 struct StackMapGenerator {
1304 private:
1305 // --- These are constant for the life of the function's compilation ---
1307 // For generating stackmaps, we'll need to know the offsets of registers
1308 // as saved by the trap exit stub.
1309 const RegisterOffsets& trapExitLayout_;
1310 const size_t trapExitLayoutNumWords_;
1312 // Completed stackmaps are added here
1313 StackMaps* stackMaps_;
1315 // So as to be able to get current offset when creating stackmaps
1316 const MacroAssembler& masm_;
1318 public:
1319 // --- These are constant once we've completed beginFunction() ---
1321 // The number of bytes of arguments passed to this function in memory.
1322 size_t numStackArgBytes;
1324 MachineStackTracker machineStackTracker; // tracks machine stack pointerness
1326 // This holds masm.framePushed at entry to the function's body. It is a
1327 // Maybe because createStackMap needs to know whether or not we're still
1328 // in the prologue. It makes a Nothing-to-Some transition just once per
1329 // function.
1330 Maybe<uint32_t> framePushedAtEntryToBody;
1332 // --- These can change at any point ---
1334 // This holds masm.framePushed at it would be be for a function call
1335 // instruction, but excluding the stack area used to pass arguments in
1336 // memory. That is, for an upcoming function call, this will hold
1338 // masm.framePushed() at the call instruction -
1339 // StackArgAreaSizeAligned(argumentTypes)
1341 // This value denotes the lowest-addressed stack word covered by the current
1342 // function's stackmap. Words below this point form the highest-addressed
1343 // area of the callee's stackmap. Note that all alignment padding above the
1344 // arguments-in-memory themselves belongs to the callee's stackmap, as return
1345 // calls will replace the function arguments with a new set of arguments which
1346 // may have different alignment.
1348 // When not inside a function call setup/teardown sequence, it is Nothing.
1349 // It can make Nothing-to/from-Some transitions arbitrarily as we progress
1350 // through the function body.
1351 Maybe<uint32_t> framePushedExcludingOutboundCallArgs;
1353 // The number of memory-resident, ref-typed entries on the containing
1354 // BaseCompiler::stk_.
1355 size_t memRefsOnStk;
1357 // This is a copy of machineStackTracker that is used only within individual
1358 // calls to createStackMap. It is here only to avoid possible heap allocation
1359 // costs resulting from making it local to createStackMap().
1360 MachineStackTracker augmentedMst;
1362 StackMapGenerator(StackMaps* stackMaps, const RegisterOffsets& trapExitLayout,
1363 const size_t trapExitLayoutNumWords,
1364 const MacroAssembler& masm)
1365 : trapExitLayout_(trapExitLayout),
1366 trapExitLayoutNumWords_(trapExitLayoutNumWords),
1367 stackMaps_(stackMaps),
1368 masm_(masm),
1369 numStackArgBytes(0),
1370 memRefsOnStk(0) {}
1372 // At the beginning of a function, we may have live roots in registers (as
1373 // arguments) at the point where we perform a stack overflow check. This
1374 // method generates the "extra" stackmap entries to describe that, in the
1375 // case that the check fails and we wind up calling into the wasm exit
1376 // stub, as generated by GenerateTrapExit().
1378 // The resulting map must correspond precisely with the stack layout
1379 // created for the integer registers as saved by (code generated by)
1380 // GenerateTrapExit(). To do that we use trapExitLayout_ and
1381 // trapExitLayoutNumWords_, which together comprise a description of the
1382 // layout and are created by GenerateTrapExitRegisterOffsets().
1383 [[nodiscard]] bool generateStackmapEntriesForTrapExit(
1384 const ArgTypeVector& args, ExitStubMapVector* extras);
1386 // Creates a stackmap associated with the instruction denoted by
1387 // |assemblerOffset|, incorporating pointers from the current operand
1388 // stack |stk|, incorporating possible extra pointers in |extra| at the
1389 // lower addressed end, and possibly with the associated frame having a
1390 // DebugFrame that must be traced, as indicated by |debugFrameWithLiveRefs|.
1391 [[nodiscard]] bool createStackMap(
1392 const char* who, const ExitStubMapVector& extras,
1393 uint32_t assemblerOffset,
1394 HasDebugFrameWithLiveRefs debugFrameWithLiveRefs, const StkVector& stk);
1397 } // namespace wasm
1398 } // namespace js
1400 #endif // wasm_wasm_baseline_frame_h