1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
4 * Copyright 2016 Mozilla Foundation
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 // This is an INTERNAL header for Wasm baseline compiler: CPU stack frame,
20 // stack maps, and associated logic.
22 #ifndef wasm_wasm_baseline_frame_h
23 #define wasm_wasm_baseline_frame_h
25 #include "wasm/WasmBaselineCompile.h" // For BaseLocalIter
26 #include "wasm/WasmBCDefs.h"
27 #include "wasm/WasmBCRegDefs.h"
28 #include "wasm/WasmBCStk.h"
29 #include "wasm/WasmConstants.h" // For MaxFrameSize
31 // [SMDOC] Wasm baseline compiler's stack frame.
33 // For background, see "Wasm's ABIs" in WasmFrame.h, the following should never
34 // be in conflict with that.
36 // The stack frame has four parts ("below" means at lower addresses):
38 // - the Frame element;
39 // - the Local area, including the DebugFrame element and possibly a spilled
40 // pointer to stack results, if any; allocated below the header with various
41 // forms of alignment;
42 // - the Dynamic area, comprising the temporary storage the compiler uses for
43 // register spilling, allocated below the Local area;
44 // - the Arguments area, comprising memory allocated for outgoing calls,
45 // allocated below the Dynamic area.
47 // +==============================+
48 // | Incoming stack arg |
50 // ------------- +==============================+
51 // | Frame (fixed size) |
52 // ------------- +==============================+ <-------------------- FP
53 // ^ | DebugFrame (optional) | ^ ^ ^^
54 // localSize | Register arg local | | | ||
55 // | | ... | | | framePushed
56 // | | Register stack result ptr?| | | ||
57 // | | Non-arg local | | | ||
59 // | | (padding) | | | ||
60 // | | Instance pointer | | | ||
61 // | +------------------------------+ | | ||
62 // v | (padding) | | v ||
63 // ------------- +==============================+ currentStackHeight ||
64 // ^ | Dynamic (variable size) | | ||
65 // dynamicSize | ... | | ||
67 // ------------- | (free space, sometimes) | --------- v|
68 // +==============================+ <----- SP not-during calls
69 // | Arguments (sometimes) | |
71 // +==============================+ <----- SP during calls
73 // The Frame is addressed off the stack pointer. masm.framePushed() is always
74 // correct, and masm.getStackPointer() + masm.framePushed() always addresses the
75 // Frame, with the DebugFrame optionally below it.
77 // The Local area (including the DebugFrame and, if needed, the spilled value of
78 // the stack results area pointer) is laid out by BaseLocalIter and is allocated
79 // and deallocated by standard prologue and epilogue functions that manipulate
80 // the stack pointer, but it is accessed via BaseStackFrame.
82 // The Dynamic area is maintained by and accessed via BaseStackFrame. On some
83 // systems (such as ARM64), the Dynamic memory may be allocated in chunks
84 // because the SP needs a specific alignment, and in this case there will
85 // normally be some free space directly above the SP. The stack height does not
86 // include the free space, it reflects the logically used space only.
88 // The Dynamic area is where space for stack results is allocated when calling
89 // functions that return results on the stack. If a function has stack results,
90 // a pointer to the low address of the stack result area is passed as an
91 // additional argument, according to the usual ABI. See
92 // ABIResultIter::HasStackResults.
94 // The Arguments area is allocated and deallocated via BaseStackFrame (see
95 // comments later) but is accessed directly off the stack pointer.
100 using namespace js::jit
;
102 // Abstraction of the height of the stack frame, to avoid type confusion.
105 friend class BaseStackFrameAllocator
;
110 explicit StackHeight(uint32_t h
) : height(h
) {}
111 static StackHeight
Invalid() { return StackHeight(UINT32_MAX
); }
112 bool isValid() const { return height
!= UINT32_MAX
; }
113 bool operator==(StackHeight rhs
) const {
114 MOZ_ASSERT(isValid() && rhs
.isValid());
115 return height
== rhs
.height
;
117 bool operator!=(StackHeight rhs
) const { return !(*this == rhs
); }
120 // Abstraction for where multi-value results go on the machine stack.
122 class StackResultsLoc
{
125 Maybe
<uint32_t> height_
;
128 StackResultsLoc() : bytes_(0), count_(0){};
129 StackResultsLoc(uint32_t bytes
, size_t count
, uint32_t height
)
130 : bytes_(bytes
), count_(count
), height_(Some(height
)) {
131 MOZ_ASSERT(bytes
!= 0);
132 MOZ_ASSERT(count
!= 0);
133 MOZ_ASSERT(height
!= 0);
136 uint32_t bytes() const { return bytes_
; }
137 uint32_t count() const { return count_
; }
138 uint32_t height() const { return height_
.value(); }
140 bool hasStackResults() const { return bytes() != 0; }
141 StackResults
stackResults() const {
142 return hasStackResults() ? StackResults::HasStackResults
143 : StackResults::NoStackResults
;
147 // Abstraction of the baseline compiler's stack frame (except for the Frame /
148 // DebugFrame parts). See comments above for more. Remember, "below" on the
149 // stack means at lower addresses.
151 // The abstraction is split into two parts: BaseStackFrameAllocator is
152 // responsible for allocating and deallocating space on the stack and for
153 // performing computations that are affected by how the allocation is performed;
154 // BaseStackFrame then provides a pleasant interface for stack frame management.
156 class BaseStackFrameAllocator
{
157 MacroAssembler
& masm
;
159 #ifdef RABALDR_CHUNKY_STACK
160 // On platforms that require the stack pointer to be aligned on a boundary
161 // greater than the typical stack item (eg, ARM64 requires 16-byte alignment
162 // but items are 8 bytes), allocate stack memory in chunks, and use a
163 // separate stack height variable to track the effective stack pointer
164 // within the allocated area. Effectively, there's a variable amount of
165 // free space directly above the stack pointer. See diagram above.
167 // The following must be true in order for the stack height to be
168 // predictable at control flow joins:
170 // - The Local area is always aligned according to WasmStackAlignment, ie,
171 // masm.framePushed() % WasmStackAlignment is zero after allocating
174 // - ChunkSize is always a multiple of WasmStackAlignment.
176 // - Pushing and popping are always in units of ChunkSize (hence preserving
179 // - The free space on the stack (masm.framePushed() - currentStackHeight_)
180 // is a predictable (nonnegative) amount.
182 // As an optimization, we pre-allocate some space on the stack, the size of
183 // this allocation is InitialChunk and it must be a multiple of ChunkSize.
184 // It is allocated as part of the function prologue and deallocated as part
185 // of the epilogue, along with the locals.
187 // If ChunkSize is too large then we risk overflowing the stack on simple
188 // recursions with few live values where stack overflow should not be a
189 // risk; if it is too small we spend too much time adjusting the stack
192 // Good values for ChunkSize are the subject of future empirical analysis;
193 // eight words is just an educated guess.
195 static constexpr uint32_t ChunkSize
= 8 * sizeof(void*);
196 static constexpr uint32_t InitialChunk
= ChunkSize
;
198 // The current logical height of the frame is
199 // currentStackHeight_ = localSize_ + dynamicSize
200 // where dynamicSize is not accounted for explicitly and localSize_ also
201 // includes size for the DebugFrame.
203 // The allocated size of the frame, provided by masm.framePushed(), is usually
204 // larger than currentStackHeight_, notably at the beginning of execution when
205 // we've allocated InitialChunk extra space.
207 uint32_t currentStackHeight_
;
210 // Size of the Local area in bytes (stable after BaseCompiler::init() has
211 // called BaseStackFrame::setupLocals(), which in turn calls
212 // BaseStackFrameAllocator::setLocalSize()), always rounded to the proper
213 // stack alignment. The Local area is then allocated in beginFunction(),
214 // following the allocation of the Header. See onFixedStackAllocated()
220 ///////////////////////////////////////////////////////////////////////////
224 explicit BaseStackFrameAllocator(MacroAssembler
& masm
)
226 #ifdef RABALDR_CHUNKY_STACK
227 currentStackHeight_(0),
229 localSize_(UINT32_MAX
) {
233 //////////////////////////////////////////////////////////////////////
235 // The Local area - the static part of the frame.
237 // Record the size of the Local area, once it is known.
239 void setLocalSize(uint32_t localSize
) {
240 MOZ_ASSERT(localSize
== AlignBytes(localSize
, sizeof(void*)),
241 "localSize_ should be aligned to at least a pointer");
242 MOZ_ASSERT(localSize_
== UINT32_MAX
);
243 localSize_
= localSize
;
246 // Record the current stack height, after it has become stable in
247 // beginFunction(). See also BaseStackFrame::onFixedStackAllocated().
249 void onFixedStackAllocated() {
250 MOZ_ASSERT(localSize_
!= UINT32_MAX
);
251 #ifdef RABALDR_CHUNKY_STACK
252 currentStackHeight_
= localSize_
;
257 // The fixed amount of memory, in bytes, allocated on the stack below the
258 // Header for purposes such as locals and other fixed values. Includes all
259 // necessary alignment, and on ARM64 also the initial chunk for the working
262 uint32_t fixedAllocSize() const {
263 MOZ_ASSERT(localSize_
!= UINT32_MAX
);
264 #ifdef RABALDR_CHUNKY_STACK
265 return localSize_
+ InitialChunk
;
271 #ifdef RABALDR_CHUNKY_STACK
272 // The allocated frame size is frequently larger than the logical stack
273 // height; we round up to a chunk boundary, and special case the initial
275 uint32_t framePushedForHeight(uint32_t logicalHeight
) {
276 if (logicalHeight
<= fixedAllocSize()) {
277 return fixedAllocSize();
279 return fixedAllocSize() +
280 AlignBytes(logicalHeight
- fixedAllocSize(), ChunkSize
);
285 //////////////////////////////////////////////////////////////////////
287 // The Dynamic area - the dynamic part of the frame, for spilling and saving
288 // intermediate values.
290 // Offset off of sp_ for the slot at stack area location `offset`.
292 int32_t stackOffset(int32_t offset
) {
293 MOZ_ASSERT(offset
> 0);
294 return masm
.framePushed() - offset
;
297 uint32_t computeHeightWithStackResults(StackHeight stackBase
,
298 uint32_t stackResultBytes
) {
299 MOZ_ASSERT(stackResultBytes
);
300 MOZ_ASSERT(currentStackHeight() >= stackBase
.height
);
301 return stackBase
.height
+ stackResultBytes
;
304 #ifdef RABALDR_CHUNKY_STACK
305 void pushChunkyBytes(uint32_t bytes
) {
306 checkChunkyInvariants();
307 uint32_t freeSpace
= masm
.framePushed() - currentStackHeight_
;
308 if (freeSpace
< bytes
) {
309 uint32_t bytesToReserve
= AlignBytes(bytes
- freeSpace
, ChunkSize
);
310 MOZ_ASSERT(bytesToReserve
+ freeSpace
>= bytes
);
311 masm
.reserveStack(bytesToReserve
);
313 currentStackHeight_
+= bytes
;
314 checkChunkyInvariants();
317 void popChunkyBytes(uint32_t bytes
) {
318 checkChunkyInvariants();
319 currentStackHeight_
-= bytes
;
320 // Sometimes, popChunkyBytes() is used to pop a larger area, as when we drop
321 // values consumed by a call, and we may need to drop several chunks. But
322 // never drop the initial chunk. Crucially, the amount we drop is always an
323 // integral number of chunks.
324 uint32_t freeSpace
= masm
.framePushed() - currentStackHeight_
;
325 if (freeSpace
>= ChunkSize
) {
326 uint32_t targetAllocSize
= framePushedForHeight(currentStackHeight_
);
327 uint32_t amountToFree
= masm
.framePushed() - targetAllocSize
;
328 MOZ_ASSERT(amountToFree
% ChunkSize
== 0);
330 masm
.freeStack(amountToFree
);
333 checkChunkyInvariants();
337 uint32_t currentStackHeight() const {
338 #ifdef RABALDR_CHUNKY_STACK
339 return currentStackHeight_
;
341 return masm
.framePushed();
346 #ifdef RABALDR_CHUNKY_STACK
347 void checkChunkyInvariants() {
348 MOZ_ASSERT(masm
.framePushed() >= fixedAllocSize());
349 MOZ_ASSERT(masm
.framePushed() >= currentStackHeight_
);
350 MOZ_ASSERT(masm
.framePushed() == fixedAllocSize() ||
351 masm
.framePushed() - currentStackHeight_
< ChunkSize
);
352 MOZ_ASSERT((masm
.framePushed() - localSize_
) % ChunkSize
== 0);
356 // For a given stack height, return the appropriate size of the allocated
359 uint32_t framePushedForHeight(StackHeight stackHeight
) {
360 #ifdef RABALDR_CHUNKY_STACK
361 // A more complicated adjustment is needed.
362 return framePushedForHeight(stackHeight
.height
);
364 // The allocated frame size equals the stack height.
365 return stackHeight
.height
;
370 // The current height of the stack area, not necessarily zero-based, in a
373 StackHeight
stackHeight() const { return StackHeight(currentStackHeight()); }
375 // Set the frame height to a previously recorded value.
377 void setStackHeight(StackHeight amount
) {
378 #ifdef RABALDR_CHUNKY_STACK
379 currentStackHeight_
= amount
.height
;
380 masm
.setFramePushed(framePushedForHeight(amount
));
381 checkChunkyInvariants();
383 masm
.setFramePushed(amount
.height
);
387 // The current height of the dynamic part of the stack area (ie, the backing
388 // store for the evaluation stack), zero-based.
390 uint32_t dynamicHeight() const { return currentStackHeight() - localSize_
; }
392 // Before branching to an outer control label, pop the execution stack to
393 // the level expected by that region, but do not update masm.framePushed()
394 // as that will happen as compilation leaves the block.
396 // Note these operate directly on the stack pointer register.
398 void popStackBeforeBranch(StackHeight destStackHeight
,
399 uint32_t stackResultBytes
) {
400 uint32_t framePushedHere
= masm
.framePushed();
401 StackHeight heightThere
=
402 StackHeight(destStackHeight
.height
+ stackResultBytes
);
403 uint32_t framePushedThere
= framePushedForHeight(heightThere
);
404 if (framePushedHere
> framePushedThere
) {
405 masm
.addToStackPtr(Imm32(framePushedHere
- framePushedThere
));
409 void popStackBeforeBranch(StackHeight destStackHeight
, ResultType type
) {
410 popStackBeforeBranch(destStackHeight
,
411 ABIResultIter::MeasureStackBytes(type
));
414 // Given that there are |stackParamSize| bytes on the dynamic stack
415 // corresponding to the stack results, return the stack height once these
416 // parameters are popped.
418 StackHeight
stackResultsBase(uint32_t stackParamSize
) {
419 return StackHeight(currentStackHeight() - stackParamSize
);
422 // For most of WebAssembly, adjacent instructions have fallthrough control
423 // flow between them, which allows us to simply thread the current stack
424 // height through the compiler. There are two exceptions to this rule: when
425 // leaving a block via dead code, and when entering the "else" arm of an "if".
426 // In these cases, the stack height is the block entry height, plus any stack
427 // values (results in the block exit case, parameters in the else entry case).
429 void resetStackHeight(StackHeight destStackHeight
, ResultType type
) {
430 uint32_t height
= destStackHeight
.height
;
431 height
+= ABIResultIter::MeasureStackBytes(type
);
432 setStackHeight(StackHeight(height
));
435 // Return offset of stack result.
437 uint32_t locateStackResult(const ABIResult
& result
, StackHeight stackBase
,
438 uint32_t stackResultBytes
) {
439 MOZ_ASSERT(result
.onStack());
440 MOZ_ASSERT(result
.stackOffset() + result
.size() <= stackResultBytes
);
441 uint32_t end
= computeHeightWithStackResults(stackBase
, stackResultBytes
);
442 return end
- result
.stackOffset();
446 //////////////////////////////////////////////////////////////////////
448 // The Argument area - for outgoing calls.
450 // We abstract these operations as an optimization: we can merge the freeing
451 // of the argument area and dropping values off the stack after a call. But
452 // they always amount to manipulating the real stack pointer by some amount.
454 // Note that we do not update currentStackHeight_ for this; the frame does
455 // not know about outgoing arguments. But we do update framePushed(), so we
456 // can still index into the frame below the outgoing arguments area.
458 // This is always equivalent to a masm.reserveStack() call.
460 void allocArgArea(size_t argSize
) {
462 masm
.reserveStack(argSize
);
466 // This frees the argument area allocated by allocArgArea(), and `argSize`
467 // must be equal to the `argSize` argument to allocArgArea(). In addition
468 // we drop some values from the frame, corresponding to the values that were
469 // consumed by the call.
471 void freeArgAreaAndPopBytes(size_t argSize
, size_t dropSize
) {
472 // The method is called to re-initialize SP after the call. Note that
473 // this operation shall not be optimized for argSize + dropSize == 0.
474 #ifdef RABALDR_CHUNKY_STACK
475 // Freeing the outgoing arguments and freeing the consumed values have
476 // different semantics here, which is why the operation is split.
477 masm
.freeStackTo(masm
.framePushed() - argSize
);
478 popChunkyBytes(dropSize
);
480 masm
.freeStackTo(masm
.framePushed() - (argSize
+ dropSize
));
485 class BaseStackFrame final
: public BaseStackFrameAllocator
{
486 MacroAssembler
& masm
;
488 // The largest observed value of masm.framePushed(), ie, the size of the
489 // stack frame. Read this for its true value only when code generation is
491 uint32_t maxFramePushed_
;
493 // Patch point where we check for stack overflow.
494 CodeOffset stackAddOffset_
;
496 // Low byte offset of pointer to stack results, if any.
497 Maybe
<int32_t> stackResultsPtrOffset_
;
499 // The offset of instance pointer.
500 uint32_t instancePointerOffset_
;
502 // Low byte offset of local area for true locals (not parameters).
505 // High byte offset + 1 of local area for true locals.
508 // The stack pointer, cached for brevity.
512 explicit BaseStackFrame(MacroAssembler
& masm
)
513 : BaseStackFrameAllocator(masm
),
517 instancePointerOffset_(UINT32_MAX
),
519 varHigh_(UINT32_MAX
),
520 sp_(masm
.getStackPointer()) {}
522 ///////////////////////////////////////////////////////////////////////////
524 // Stack management and overflow checking
526 // This must be called once beginFunction has allocated space for the Header
527 // (the Frame and DebugFrame) and the Local area, and will record the current
528 // frame size for internal use by the stack abstractions.
530 void onFixedStackAllocated() {
531 maxFramePushed_
= masm
.framePushed();
532 BaseStackFrameAllocator::onFixedStackAllocated();
535 // We won't know until after we've generated code how big the frame will be
536 // (we may need arbitrary spill slots and outgoing param slots) so emit a
537 // patchable add that is patched in endFunction().
539 // Note the platform scratch register may be used by branchPtr(), so
540 // generally tmp must be something else.
542 void checkStack(Register tmp
, BytecodeOffset trapOffset
) {
543 stackAddOffset_
= masm
.sub32FromStackPtrWithPatch(tmp
);
545 masm
.branchPtr(Assembler::Below
,
546 Address(InstanceReg
, wasm::Instance::offsetOfStackLimit()),
548 masm
.wasmTrap(Trap::StackOverflow
, trapOffset
);
552 void patchCheckStack() {
553 masm
.patchSub32FromStackPtr(stackAddOffset_
,
554 Imm32(int32_t(maxFramePushed_
)));
557 // Very large frames are implausible, probably an attack.
559 bool checkStackHeight() { return maxFramePushed_
<= MaxFrameSize
; }
561 ///////////////////////////////////////////////////////////////////////////
566 // Type of the value.
569 // Byte offset from Frame "into" the locals, ie positive for true locals
570 // and negative for incoming args that read directly from the arg area.
571 // It assumes the stack is growing down and that locals are on the stack
572 // at lower addresses than Frame, and is the offset from Frame of the
573 // lowest-addressed byte of the local.
576 Local(MIRType type
, int32_t offs
) : type(type
), offs(offs
) {}
578 bool isStackArgument() const { return offs
< 0; }
581 // Profiling shows that the number of parameters and locals frequently
582 // touches or exceeds 8. So 16 seems like a reasonable starting point.
583 using LocalVector
= Vector
<Local
, 16, SystemAllocPolicy
>;
585 // Initialize `localInfo` based on the types of `locals` and `args`.
586 [[nodiscard
]] bool setupLocals(const ValTypeVector
& locals
,
587 const ArgTypeVector
& args
, bool debugEnabled
,
588 LocalVector
* localInfo
) {
589 if (!localInfo
->reserve(locals
.length())) {
593 DebugOnly
<uint32_t> index
= 0;
594 BaseLocalIter
i(locals
, args
, debugEnabled
);
595 for (; !i
.done() && i
.index() < args
.lengthWithoutStackResults(); i
++) {
596 MOZ_ASSERT(i
.isArg());
597 MOZ_ASSERT(i
.index() == index
);
598 localInfo
->infallibleEmplaceBack(i
.mirType(), i
.frameOffset());
602 varLow_
= i
.frameSize();
603 for (; !i
.done(); i
++) {
604 MOZ_ASSERT(!i
.isArg());
605 MOZ_ASSERT(i
.index() == index
);
606 localInfo
->infallibleEmplaceBack(i
.mirType(), i
.frameOffset());
609 varHigh_
= i
.frameSize();
611 // Reserve an additional stack slot for the instance pointer.
612 const uint32_t pointerAlignedVarHigh
= AlignBytes(varHigh_
, sizeof(void*));
613 const uint32_t localSize
= pointerAlignedVarHigh
+ sizeof(void*);
614 instancePointerOffset_
= localSize
;
616 setLocalSize(AlignBytes(localSize
, WasmStackAlignment
));
618 if (args
.hasSyntheticStackResultPointerArg()) {
619 stackResultsPtrOffset_
= Some(i
.stackResultPointerOffset());
625 void zeroLocals(BaseRegAlloc
* ra
);
627 Address
addressOfLocal(const Local
& local
, uint32_t additionalOffset
= 0) {
628 if (local
.isStackArgument()) {
629 return Address(FramePointer
,
630 stackArgumentOffsetFromFp(local
) + additionalOffset
);
632 return Address(sp_
, localOffsetFromSp(local
) + additionalOffset
);
635 void loadLocalI32(const Local
& src
, RegI32 dest
) {
636 masm
.load32(addressOfLocal(src
), dest
);
640 void loadLocalI64Low(const Local
& src
, RegI32 dest
) {
641 masm
.load32(addressOfLocal(src
, INT64LOW_OFFSET
), dest
);
644 void loadLocalI64High(const Local
& src
, RegI32 dest
) {
645 masm
.load32(addressOfLocal(src
, INT64HIGH_OFFSET
), dest
);
649 void loadLocalI64(const Local
& src
, RegI64 dest
) {
650 masm
.load64(addressOfLocal(src
), dest
);
653 void loadLocalRef(const Local
& src
, RegRef dest
) {
654 masm
.loadPtr(addressOfLocal(src
), dest
);
657 void loadLocalF64(const Local
& src
, RegF64 dest
) {
658 masm
.loadDouble(addressOfLocal(src
), dest
);
661 void loadLocalF32(const Local
& src
, RegF32 dest
) {
662 masm
.loadFloat32(addressOfLocal(src
), dest
);
665 #ifdef ENABLE_WASM_SIMD
666 void loadLocalV128(const Local
& src
, RegV128 dest
) {
667 masm
.loadUnalignedSimd128(addressOfLocal(src
), dest
);
671 void storeLocalI32(RegI32 src
, const Local
& dest
) {
672 masm
.store32(src
, addressOfLocal(dest
));
675 void storeLocalI64(RegI64 src
, const Local
& dest
) {
676 masm
.store64(src
, addressOfLocal(dest
));
679 void storeLocalRef(RegRef src
, const Local
& dest
) {
680 masm
.storePtr(src
, addressOfLocal(dest
));
683 void storeLocalF64(RegF64 src
, const Local
& dest
) {
684 masm
.storeDouble(src
, addressOfLocal(dest
));
687 void storeLocalF32(RegF32 src
, const Local
& dest
) {
688 masm
.storeFloat32(src
, addressOfLocal(dest
));
691 #ifdef ENABLE_WASM_SIMD
692 void storeLocalV128(RegV128 src
, const Local
& dest
) {
693 masm
.storeUnalignedSimd128(src
, addressOfLocal(dest
));
697 // Offset off of sp_ for `local`.
698 int32_t localOffsetFromSp(const Local
& local
) {
699 MOZ_ASSERT(!local
.isStackArgument());
700 return localOffset(local
.offs
);
703 // Offset off of frame pointer for `stack argument`.
704 int32_t stackArgumentOffsetFromFp(const Local
& local
) {
705 MOZ_ASSERT(local
.isStackArgument());
709 // The incoming stack result area pointer is for stack results of the function
711 void loadIncomingStackResultAreaPtr(RegPtr reg
) {
712 const int32_t offset
= stackResultsPtrOffset_
.value();
713 Address src
= offset
< 0 ? Address(FramePointer
, -offset
)
714 : Address(sp_
, stackOffset(offset
));
715 masm
.loadPtr(src
, reg
);
718 void storeIncomingStackResultAreaPtr(RegPtr reg
) {
719 // If we get here, that means the pointer to the stack results area was
720 // passed in as a register, and therefore it will be spilled below the
721 // frame, so the offset is a positive height.
722 MOZ_ASSERT(stackResultsPtrOffset_
.value() > 0);
724 Address(sp_
, stackOffset(stackResultsPtrOffset_
.value())));
727 void loadInstancePtr(Register dst
) {
728 // Sometimes loadInstancePtr is used in context when SP is not sync is FP,
729 // e.g. just after tail calls returns.
730 masm
.loadPtr(Address(FramePointer
, -instancePointerOffset_
), dst
);
733 void storeInstancePtr(Register instance
) {
734 masm
.storePtr(instance
, Address(sp_
, stackOffset(instancePointerOffset_
)));
737 int32_t getInstancePtrOffset() { return stackOffset(instancePointerOffset_
); }
739 // An outgoing stack result area pointer is for stack results of callees of
740 // the function being compiled.
741 void computeOutgoingStackResultAreaPtr(const StackResultsLoc
& results
,
743 MOZ_ASSERT(results
.height() <= masm
.framePushed());
744 uint32_t offsetFromSP
= masm
.framePushed() - results
.height();
745 masm
.moveStackPtrTo(dest
);
747 masm
.addPtr(Imm32(offsetFromSP
), dest
);
752 // Offset off of sp_ for a local with offset `offset` from Frame.
753 int32_t localOffset(int32_t offset
) { return masm
.framePushed() - offset
; }
756 ///////////////////////////////////////////////////////////////////////////
760 static constexpr size_t StackSizeOfPtr
= ABIResult::StackSizeOfPtr
;
761 static constexpr size_t StackSizeOfInt64
= ABIResult::StackSizeOfInt64
;
762 static constexpr size_t StackSizeOfFloat
= ABIResult::StackSizeOfFloat
;
763 static constexpr size_t StackSizeOfDouble
= ABIResult::StackSizeOfDouble
;
764 #ifdef ENABLE_WASM_SIMD
765 static constexpr size_t StackSizeOfV128
= ABIResult::StackSizeOfV128
;
768 // Pushes the register `r` to the stack. This pushes the full 64-bit width on
769 // 64-bit systems, and 32-bits otherwise.
770 uint32_t pushGPR(Register r
) {
771 DebugOnly
<uint32_t> stackBefore
= currentStackHeight();
772 #ifdef RABALDR_CHUNKY_STACK
773 pushChunkyBytes(StackSizeOfPtr
);
774 masm
.storePtr(r
, Address(sp_
, stackOffset(currentStackHeight())));
778 maxFramePushed_
= std::max(maxFramePushed_
, masm
.framePushed());
779 MOZ_ASSERT(stackBefore
+ StackSizeOfPtr
== currentStackHeight());
780 return currentStackHeight();
783 uint32_t pushFloat32(FloatRegister r
) {
784 DebugOnly
<uint32_t> stackBefore
= currentStackHeight();
785 #ifdef RABALDR_CHUNKY_STACK
786 pushChunkyBytes(StackSizeOfFloat
);
787 masm
.storeFloat32(r
, Address(sp_
, stackOffset(currentStackHeight())));
791 maxFramePushed_
= std::max(maxFramePushed_
, masm
.framePushed());
792 MOZ_ASSERT(stackBefore
+ StackSizeOfFloat
== currentStackHeight());
793 return currentStackHeight();
796 #ifdef ENABLE_WASM_SIMD
797 uint32_t pushV128(RegV128 r
) {
798 DebugOnly
<uint32_t> stackBefore
= currentStackHeight();
799 # ifdef RABALDR_CHUNKY_STACK
800 pushChunkyBytes(StackSizeOfV128
);
802 masm
.adjustStack(-(int)StackSizeOfV128
);
804 masm
.storeUnalignedSimd128(r
,
805 Address(sp_
, stackOffset(currentStackHeight())));
806 maxFramePushed_
= std::max(maxFramePushed_
, masm
.framePushed());
807 MOZ_ASSERT(stackBefore
+ StackSizeOfV128
== currentStackHeight());
808 return currentStackHeight();
812 uint32_t pushDouble(FloatRegister r
) {
813 DebugOnly
<uint32_t> stackBefore
= currentStackHeight();
814 #ifdef RABALDR_CHUNKY_STACK
815 pushChunkyBytes(StackSizeOfDouble
);
816 masm
.storeDouble(r
, Address(sp_
, stackOffset(currentStackHeight())));
820 maxFramePushed_
= std::max(maxFramePushed_
, masm
.framePushed());
821 MOZ_ASSERT(stackBefore
+ StackSizeOfDouble
== currentStackHeight());
822 return currentStackHeight();
825 // Pops the stack into the register `r`. This pops the full 64-bit width on
826 // 64-bit systems, and 32-bits otherwise.
827 void popGPR(Register r
) {
828 DebugOnly
<uint32_t> stackBefore
= currentStackHeight();
829 #ifdef RABALDR_CHUNKY_STACK
830 masm
.loadPtr(Address(sp_
, stackOffset(currentStackHeight())), r
);
831 popChunkyBytes(StackSizeOfPtr
);
835 MOZ_ASSERT(stackBefore
- StackSizeOfPtr
== currentStackHeight());
838 void popFloat32(FloatRegister r
) {
839 DebugOnly
<uint32_t> stackBefore
= currentStackHeight();
840 #ifdef RABALDR_CHUNKY_STACK
841 masm
.loadFloat32(Address(sp_
, stackOffset(currentStackHeight())), r
);
842 popChunkyBytes(StackSizeOfFloat
);
846 MOZ_ASSERT(stackBefore
- StackSizeOfFloat
== currentStackHeight());
849 void popDouble(FloatRegister r
) {
850 DebugOnly
<uint32_t> stackBefore
= currentStackHeight();
851 #ifdef RABALDR_CHUNKY_STACK
852 masm
.loadDouble(Address(sp_
, stackOffset(currentStackHeight())), r
);
853 popChunkyBytes(StackSizeOfDouble
);
857 MOZ_ASSERT(stackBefore
- StackSizeOfDouble
== currentStackHeight());
860 #ifdef ENABLE_WASM_SIMD
861 void popV128(RegV128 r
) {
862 DebugOnly
<uint32_t> stackBefore
= currentStackHeight();
863 masm
.loadUnalignedSimd128(Address(sp_
, stackOffset(currentStackHeight())),
865 # ifdef RABALDR_CHUNKY_STACK
866 popChunkyBytes(StackSizeOfV128
);
868 masm
.adjustStack((int)StackSizeOfV128
);
870 MOZ_ASSERT(stackBefore
- StackSizeOfV128
== currentStackHeight());
874 void popBytes(size_t bytes
) {
876 #ifdef RABALDR_CHUNKY_STACK
877 popChunkyBytes(bytes
);
879 masm
.freeStack(bytes
);
884 void loadStackI32(int32_t offset
, RegI32 dest
) {
885 masm
.load32(Address(sp_
, stackOffset(offset
)), dest
);
888 void loadStackI64(int32_t offset
, RegI64 dest
) {
889 masm
.load64(Address(sp_
, stackOffset(offset
)), dest
);
893 void loadStackI64Low(int32_t offset
, RegI32 dest
) {
894 masm
.load32(Address(sp_
, stackOffset(offset
- INT64LOW_OFFSET
)), dest
);
897 void loadStackI64High(int32_t offset
, RegI32 dest
) {
898 masm
.load32(Address(sp_
, stackOffset(offset
- INT64HIGH_OFFSET
)), dest
);
902 void loadStackRef(int32_t offset
, RegRef dest
) {
903 masm
.loadPtr(Address(sp_
, stackOffset(offset
)), dest
);
906 void loadStackF64(int32_t offset
, RegF64 dest
) {
907 masm
.loadDouble(Address(sp_
, stackOffset(offset
)), dest
);
910 void loadStackF32(int32_t offset
, RegF32 dest
) {
911 masm
.loadFloat32(Address(sp_
, stackOffset(offset
)), dest
);
914 #ifdef ENABLE_WASM_SIMD
915 void loadStackV128(int32_t offset
, RegV128 dest
) {
916 masm
.loadUnalignedSimd128(Address(sp_
, stackOffset(offset
)), dest
);
920 uint32_t prepareStackResultArea(StackHeight stackBase
,
921 uint32_t stackResultBytes
) {
922 uint32_t end
= computeHeightWithStackResults(stackBase
, stackResultBytes
);
923 if (currentStackHeight() < end
) {
924 uint32_t bytes
= end
- currentStackHeight();
925 #ifdef RABALDR_CHUNKY_STACK
926 pushChunkyBytes(bytes
);
928 masm
.reserveStack(bytes
);
930 maxFramePushed_
= std::max(maxFramePushed_
, masm
.framePushed());
935 void finishStackResultArea(StackHeight stackBase
, uint32_t stackResultBytes
) {
936 uint32_t end
= computeHeightWithStackResults(stackBase
, stackResultBytes
);
937 MOZ_ASSERT(currentStackHeight() >= end
);
938 popBytes(currentStackHeight() - end
);
941 // |srcHeight| and |destHeight| are stack heights *including* |bytes|.
942 void shuffleStackResultsTowardFP(uint32_t srcHeight
, uint32_t destHeight
,
943 uint32_t bytes
, Register temp
) {
944 MOZ_ASSERT(destHeight
< srcHeight
);
945 MOZ_ASSERT(bytes
% sizeof(uint32_t) == 0);
946 // The shuffleStackResultsTowardFP is used when SP/framePushed is not
947 // tracked by the compiler, e.g. after possible return call -- use
948 // FramePointer instead of sp_.
949 int32_t destOffset
= int32_t(-destHeight
+ bytes
);
950 int32_t srcOffset
= int32_t(-srcHeight
+ bytes
);
951 while (bytes
>= sizeof(intptr_t)) {
952 destOffset
-= sizeof(intptr_t);
953 srcOffset
-= sizeof(intptr_t);
954 bytes
-= sizeof(intptr_t);
955 masm
.loadPtr(Address(FramePointer
, srcOffset
), temp
);
956 masm
.storePtr(temp
, Address(FramePointer
, destOffset
));
959 MOZ_ASSERT(bytes
== sizeof(uint32_t));
960 destOffset
-= sizeof(uint32_t);
961 srcOffset
-= sizeof(uint32_t);
962 masm
.load32(Address(FramePointer
, srcOffset
), temp
);
963 masm
.store32(temp
, Address(FramePointer
, destOffset
));
967 // Unlike the overload that operates on raw heights, |srcHeight| and
968 // |destHeight| are stack heights *not including* |bytes|.
969 void shuffleStackResultsTowardFP(StackHeight srcHeight
,
970 StackHeight destHeight
, uint32_t bytes
,
972 MOZ_ASSERT(srcHeight
.isValid());
973 MOZ_ASSERT(destHeight
.isValid());
974 uint32_t src
= computeHeightWithStackResults(srcHeight
, bytes
);
975 uint32_t dest
= computeHeightWithStackResults(destHeight
, bytes
);
976 MOZ_ASSERT(src
<= currentStackHeight());
977 MOZ_ASSERT(dest
<= currentStackHeight());
978 shuffleStackResultsTowardFP(src
, dest
, bytes
, temp
);
981 // |srcHeight| and |destHeight| are stack heights *including* |bytes|.
982 void shuffleStackResultsTowardSP(uint32_t srcHeight
, uint32_t destHeight
,
983 uint32_t bytes
, Register temp
) {
984 MOZ_ASSERT(destHeight
> srcHeight
);
985 MOZ_ASSERT(bytes
% sizeof(uint32_t) == 0);
986 uint32_t destOffset
= stackOffset(destHeight
);
987 uint32_t srcOffset
= stackOffset(srcHeight
);
988 while (bytes
>= sizeof(intptr_t)) {
989 masm
.loadPtr(Address(sp_
, srcOffset
), temp
);
990 masm
.storePtr(temp
, Address(sp_
, destOffset
));
991 destOffset
+= sizeof(intptr_t);
992 srcOffset
+= sizeof(intptr_t);
993 bytes
-= sizeof(intptr_t);
996 MOZ_ASSERT(bytes
== sizeof(uint32_t));
997 masm
.load32(Address(sp_
, srcOffset
), temp
);
998 masm
.store32(temp
, Address(sp_
, destOffset
));
1002 // Copy results from the top of the current stack frame to an area of memory,
1003 // and pop the stack accordingly. `dest` is the address of the low byte of
1005 void popStackResultsToMemory(Register dest
, uint32_t bytes
, Register temp
) {
1006 MOZ_ASSERT(bytes
<= currentStackHeight());
1007 MOZ_ASSERT(bytes
% sizeof(uint32_t) == 0);
1008 uint32_t bytesToPop
= bytes
;
1009 uint32_t srcOffset
= stackOffset(currentStackHeight());
1010 uint32_t destOffset
= 0;
1011 while (bytes
>= sizeof(intptr_t)) {
1012 masm
.loadPtr(Address(sp_
, srcOffset
), temp
);
1013 masm
.storePtr(temp
, Address(dest
, destOffset
));
1014 destOffset
+= sizeof(intptr_t);
1015 srcOffset
+= sizeof(intptr_t);
1016 bytes
-= sizeof(intptr_t);
1019 MOZ_ASSERT(bytes
== sizeof(uint32_t));
1020 masm
.load32(Address(sp_
, srcOffset
), temp
);
1021 masm
.store32(temp
, Address(dest
, destOffset
));
1023 popBytes(bytesToPop
);
1026 void allocArgArea(size_t argSize
) {
1028 BaseStackFrameAllocator::allocArgArea(argSize
);
1029 maxFramePushed_
= std::max(maxFramePushed_
, masm
.framePushed());
1034 void store32BitsToStack(int32_t imm
, uint32_t destHeight
, Register temp
) {
1035 masm
.move32(Imm32(imm
), temp
);
1036 masm
.store32(temp
, Address(sp_
, stackOffset(destHeight
)));
1039 void store64BitsToStack(int64_t imm
, uint32_t destHeight
, Register temp
) {
1041 masm
.move64(Imm64(imm
), Register64(temp
));
1042 masm
.store64(Register64(temp
), Address(sp_
, stackOffset(destHeight
)));
1047 } bits
= {.i64
= imm
};
1048 static_assert(sizeof(bits
) == 8);
1049 store32BitsToStack(bits
.i32
[0], destHeight
, temp
);
1050 store32BitsToStack(bits
.i32
[1], destHeight
- sizeof(int32_t), temp
);
1055 void storeImmediatePtrToStack(intptr_t imm
, uint32_t destHeight
,
1058 static_assert(StackSizeOfPtr
== 8);
1059 store64BitsToStack(imm
, destHeight
, temp
);
1061 static_assert(StackSizeOfPtr
== 4);
1062 store32BitsToStack(int32_t(imm
), destHeight
, temp
);
1066 void storeImmediateI64ToStack(int64_t imm
, uint32_t destHeight
,
1068 store64BitsToStack(imm
, destHeight
, temp
);
1071 void storeImmediateF32ToStack(float imm
, uint32_t destHeight
, Register temp
) {
1075 } bits
= {.f32
= imm
};
1076 static_assert(sizeof(bits
) == 4);
1077 // Do not store 4 bytes if StackSizeOfFloat == 8. It's probably OK to do
1078 // so, but it costs little to store something predictable.
1079 if (StackSizeOfFloat
== 4) {
1080 store32BitsToStack(bits
.i32
, destHeight
, temp
);
1082 store64BitsToStack(uint32_t(bits
.i32
), destHeight
, temp
);
1086 void storeImmediateF64ToStack(double imm
, uint32_t destHeight
,
1091 } bits
= {.f64
= imm
};
1092 static_assert(sizeof(bits
) == 8);
1093 store64BitsToStack(bits
.i64
, destHeight
, temp
);
1096 #ifdef ENABLE_WASM_SIMD
1097 void storeImmediateV128ToStack(V128 imm
, uint32_t destHeight
, Register temp
) {
1102 static_assert(sizeof(bits
) == 16);
1103 memcpy(bits
.bytes
, imm
.bytes
, 16);
1104 for (unsigned i
= 0; i
< 4; i
++) {
1105 store32BitsToStack(bits
.i32
[i
], destHeight
- i
* sizeof(int32_t), temp
);
1111 //////////////////////////////////////////////////////////////////////////////
1113 // MachineStackTracker, used for stack-slot pointerness tracking.
1115 // An expensive operation in stack-map creation is copying of the
1116 // MachineStackTracker (MST) into the final StackMap. This is done in
1117 // StackMapGenerator::createStackMap. Given that this is basically a
1118 // bit-array copy, it is reasonable to ask whether the two classes could have
1119 // a more similar representation, so that the copy could then be done with
1122 // Although in principle feasible, the follow complications exist, and so for
1123 // the moment, this has not been done.
1125 // * StackMap is optimised for compact size (storage) since there will be
1126 // many, so it uses a true bitmap. MST is intended to be fast and simple,
1127 // and only one exists at once (per compilation thread). Doing this would
1128 // require MST to use a true bitmap, and hence ..
1130 // * .. the copying can't be a straight memcpy, since StackMap has entries for
1131 // words not covered by MST. Hence the copy would need to shift bits in
1132 // each byte left or right (statistically speaking, in 7 cases out of 8) in
1133 // order to ensure no "holes" in the resulting bitmap.
1135 // * Furthermore the copying would need to logically invert the direction of
1136 // the stacks. For MST, index zero in the vector corresponds to the highest
1137 // address in the stack. For StackMap, bit index zero corresponds to the
1138 // lowest address in the stack.
1140 // * Finally, StackMap is a variable-length structure whose size must be known
1141 // at creation time. The size of an MST by contrast isn't known at creation
1142 // time -- it grows as the baseline compiler pushes stuff on its value
1143 // stack. That's why it has to have vector entry 0 being the highest address.
1145 // * Although not directly relevant, StackMaps are also created by the via-Ion
1146 // compilation routes, by translation from the pre-existing "JS-era"
1147 // LSafePoints (CreateStackMapFromLSafepoint). So if we want to mash
1148 // StackMap around to suit baseline better, we also need to ensure it
1149 // doesn't break Ion somehow.
1151 class MachineStackTracker
{
1152 // Simulates the machine's stack, with one bool per word. The booleans are
1153 // represented as `uint8_t`s so as to guarantee the element size is one
1154 // byte. Index zero in this vector corresponds to the highest address in
1155 // the machine's stack. The last entry corresponds to what SP currently
1156 // points at. This all assumes a grow-down stack.
1158 // numPtrs_ contains the number of "true" values in vec_, and is therefore
1159 // redundant. But it serves as a constant-time way to detect the common
1160 // case where vec_ holds no "true" values.
1162 Vector
<uint8_t, 64, SystemAllocPolicy
> vec_
;
1165 MachineStackTracker() : numPtrs_(0) {}
1167 ~MachineStackTracker() {
1170 for (uint8_t b
: vec_
) {
1173 MOZ_ASSERT(n
== numPtrs_
);
1177 // Clone this MachineStackTracker, writing the result at |dst|.
1178 [[nodiscard
]] bool cloneTo(MachineStackTracker
* dst
);
1180 // Notionally push |n| non-pointers on the stack.
1181 [[nodiscard
]] bool pushNonGCPointers(size_t n
) {
1182 return vec_
.appendN(uint8_t(false), n
);
1185 // Mark the stack slot |offsetFromSP| up from the bottom as holding a
1187 void setGCPointer(size_t offsetFromSP
) {
1188 // offsetFromSP == 0 denotes the most recently pushed item, == 1 the
1189 // second most recently pushed item, etc.
1190 MOZ_ASSERT(offsetFromSP
< vec_
.length());
1192 size_t offsetFromTop
= vec_
.length() - 1 - offsetFromSP
;
1193 numPtrs_
= numPtrs_
+ 1 - (vec_
[offsetFromTop
] ? 1 : 0);
1194 vec_
[offsetFromTop
] = uint8_t(true);
1197 // Query the pointerness of the slot |offsetFromSP| up from the bottom.
1198 bool isGCPointer(size_t offsetFromSP
) const {
1199 MOZ_ASSERT(offsetFromSP
< vec_
.length());
1201 size_t offsetFromTop
= vec_
.length() - 1 - offsetFromSP
;
1202 return bool(vec_
[offsetFromTop
]);
1205 // Return the number of words tracked by this MachineStackTracker.
1206 size_t length() const { return vec_
.length(); }
1208 // Return the number of pointer-typed words tracked by this
1209 // MachineStackTracker.
1210 size_t numPtrs() const {
1211 MOZ_ASSERT(numPtrs_
<= length());
1215 // Discard all contents, but (per mozilla::Vector::clear semantics) don't
1216 // free or reallocate any dynamic storage associated with |vec_|.
1222 // An iterator that produces indices of reftyped slots, starting at the
1223 // logical bottom of the (grow-down) stack. Indices have the same meaning
1224 // as the arguments to `isGCPointer`. That is, if this iterator produces a
1225 // value `i`, then it means that `isGCPointer(i) == true`; if the value `i`
1226 // is never produced then `isGCPointer(i) == false`. The values are
1227 // produced in ascending order.
1229 // Because most slots are non-reftyped, some effort has been put into
1230 // skipping over large groups of non-reftyped slots quickly.
1232 // Both `bufU8_` and `bufU32_` are made to point to `vec_`s array of
1233 // `uint8_t`s, so we can scan (backwards) through it either in bytes or
1234 // 32-bit words. Recall that the last element in `vec_` pertains to the
1235 // lowest-addressed word in the machine's grow-down stack, and we want to
1236 // iterate logically "up" this stack, so we need to iterate backwards
1239 // This dual-pointer scheme assumes that the `vec_`s content array is at
1240 // least 32-bit aligned.
1241 const uint8_t* bufU8_
;
1242 const uint32_t* bufU32_
;
1243 // The number of elements in `bufU8_`.
1244 const size_t nElems_
;
1245 // The index in `bufU8_` where the next search should start.
1249 explicit Iter(const MachineStackTracker
& mst
)
1250 : bufU8_((uint8_t*)mst
.vec_
.begin()),
1251 bufU32_((uint32_t*)mst
.vec_
.begin()),
1252 nElems_(mst
.vec_
.length()),
1253 next_(mst
.vec_
.length() - 1) {
1254 MOZ_ASSERT(uintptr_t(bufU8_
) == uintptr_t(bufU32_
));
1255 // Check minimum alignment constraint on the array.
1256 MOZ_ASSERT(0 == (uintptr_t(bufU8_
) & 3));
1259 ~Iter() { MOZ_ASSERT(uintptr_t(bufU8_
) == uintptr_t(bufU32_
)); }
1261 // It is important, for termination of the search loop in `next()`, that
1262 // this has the value obtained by subtracting 1 from size_t(0).
1263 static constexpr size_t FINISHED
= ~size_t(0);
1264 static_assert(FINISHED
== size_t(0) - 1);
1266 // Returns the next index `i` for which `isGCPointer(i) == true`.
1268 while (next_
!= FINISHED
) {
1269 if (bufU8_
[next_
]) {
1271 return nElems_
- 1 - (next_
+ 1);
1273 // Invariant: next_ != FINISHED (so it's still a valid index)
1274 // and: bufU8_[next_] == 0
1275 // (so we need to move backwards by at least 1)
1277 // BEGIN optimization -- this could be removed without affecting
1279 if ((next_
& 7) == 0) {
1280 // We're at the "bottom" of the current dual-4-element word. Check
1281 // if we can jump backwards by 8. This saves a conditional branch
1282 // and a few cycles by ORing two adjacent 32-bit words together,
1283 // whilst not requiring 64-bit alignment of `bufU32_`.
1284 while (next_
>= 8 &&
1285 (bufU32_
[(next_
- 4) >> 2] | bufU32_
[(next_
- 8) >> 2]) == 0) {
1297 //////////////////////////////////////////////////////////////////////////////
1299 // StackMapGenerator, which carries all state needed to create stackmaps.
1301 enum class HasDebugFrameWithLiveRefs
{ No
, Maybe
};
1303 struct StackMapGenerator
{
1305 // --- These are constant for the life of the function's compilation ---
1307 // For generating stackmaps, we'll need to know the offsets of registers
1308 // as saved by the trap exit stub.
1309 const RegisterOffsets
& trapExitLayout_
;
1310 const size_t trapExitLayoutNumWords_
;
1312 // Completed stackmaps are added here
1313 StackMaps
* stackMaps_
;
1315 // So as to be able to get current offset when creating stackmaps
1316 const MacroAssembler
& masm_
;
1319 // --- These are constant once we've completed beginFunction() ---
1321 // The number of bytes of arguments passed to this function in memory.
1322 size_t numStackArgBytes
;
1324 MachineStackTracker machineStackTracker
; // tracks machine stack pointerness
1326 // This holds masm.framePushed at entry to the function's body. It is a
1327 // Maybe because createStackMap needs to know whether or not we're still
1328 // in the prologue. It makes a Nothing-to-Some transition just once per
1330 Maybe
<uint32_t> framePushedAtEntryToBody
;
1332 // --- These can change at any point ---
1334 // This holds masm.framePushed at it would be be for a function call
1335 // instruction, but excluding the stack area used to pass arguments in
1336 // memory. That is, for an upcoming function call, this will hold
1338 // masm.framePushed() at the call instruction -
1339 // StackArgAreaSizeAligned(argumentTypes)
1341 // This value denotes the lowest-addressed stack word covered by the current
1342 // function's stackmap. Words below this point form the highest-addressed
1343 // area of the callee's stackmap. Note that all alignment padding above the
1344 // arguments-in-memory themselves belongs to the callee's stackmap, as return
1345 // calls will replace the function arguments with a new set of arguments which
1346 // may have different alignment.
1348 // When not inside a function call setup/teardown sequence, it is Nothing.
1349 // It can make Nothing-to/from-Some transitions arbitrarily as we progress
1350 // through the function body.
1351 Maybe
<uint32_t> framePushedExcludingOutboundCallArgs
;
1353 // The number of memory-resident, ref-typed entries on the containing
1354 // BaseCompiler::stk_.
1355 size_t memRefsOnStk
;
1357 // This is a copy of machineStackTracker that is used only within individual
1358 // calls to createStackMap. It is here only to avoid possible heap allocation
1359 // costs resulting from making it local to createStackMap().
1360 MachineStackTracker augmentedMst
;
1362 StackMapGenerator(StackMaps
* stackMaps
, const RegisterOffsets
& trapExitLayout
,
1363 const size_t trapExitLayoutNumWords
,
1364 const MacroAssembler
& masm
)
1365 : trapExitLayout_(trapExitLayout
),
1366 trapExitLayoutNumWords_(trapExitLayoutNumWords
),
1367 stackMaps_(stackMaps
),
1369 numStackArgBytes(0),
1372 // At the beginning of a function, we may have live roots in registers (as
1373 // arguments) at the point where we perform a stack overflow check. This
1374 // method generates the "extra" stackmap entries to describe that, in the
1375 // case that the check fails and we wind up calling into the wasm exit
1376 // stub, as generated by GenerateTrapExit().
1378 // The resulting map must correspond precisely with the stack layout
1379 // created for the integer registers as saved by (code generated by)
1380 // GenerateTrapExit(). To do that we use trapExitLayout_ and
1381 // trapExitLayoutNumWords_, which together comprise a description of the
1382 // layout and are created by GenerateTrapExitRegisterOffsets().
1383 [[nodiscard
]] bool generateStackmapEntriesForTrapExit(
1384 const ArgTypeVector
& args
, ExitStubMapVector
* extras
);
1386 // Creates a stackmap associated with the instruction denoted by
1387 // |assemblerOffset|, incorporating pointers from the current operand
1388 // stack |stk|, incorporating possible extra pointers in |extra| at the
1389 // lower addressed end, and possibly with the associated frame having a
1390 // DebugFrame that must be traced, as indicated by |debugFrameWithLiveRefs|.
1391 [[nodiscard
]] bool createStackMap(
1392 const char* who
, const ExitStubMapVector
& extras
,
1393 uint32_t assemblerOffset
,
1394 HasDebugFrameWithLiveRefs debugFrameWithLiveRefs
, const StkVector
& stk
);
1400 #endif // wasm_wasm_baseline_frame_h