2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/vasm.h"
19 #include "hphp/runtime/base/stats.h"
21 #include "hphp/runtime/vm/jit/abi.h"
22 #include "hphp/runtime/vm/jit/print.h"
23 #include "hphp/runtime/vm/jit/punt.h"
24 #include "hphp/runtime/vm/jit/reg-algorithms.h"
25 #include "hphp/runtime/vm/jit/timer.h"
26 #include "hphp/runtime/vm/jit/vasm-instr.h"
27 #include "hphp/runtime/vm/jit/vasm-print.h"
28 #include "hphp/runtime/vm/jit/vasm-reg.h"
29 #include "hphp/runtime/vm/jit/vasm-unit.h"
30 #include "hphp/runtime/vm/jit/vasm-util.h"
31 #include "hphp/runtime/vm/jit/vasm-visit.h"
33 #include "hphp/util/arch.h"
34 #include "hphp/util/assertions.h"
35 #include "hphp/util/dataflow-worklist.h"
36 #include "hphp/util/trace.h"
38 #include <boost/dynamic_bitset.hpp>
39 #include <boost/range/adaptor/reversed.hpp>
40 #include <folly/Format.h>
41 #include <folly/Optional.h>
47 // - #3098509 streamline code, vectors vs linked lists, etc
48 // - #3098685 Optimize lifetime splitting
49 // - #3098739 new features now possible with XLS
53 namespace HPHP
{ namespace jit
{
54 ///////////////////////////////////////////////////////////////////////////////
57 ///////////////////////////////////////////////////////////////////////////////
59 std::atomic
<size_t> s_counter
;
61 DEBUG_ONLY
constexpr auto kHintLevel
= 5;
66 enum class Constraint
{ Any
, CopySrc
, Gpr
, Simd
, Sf
};
68 Constraint
constraint(const Vreg
&) { return Constraint::Any
; }
69 Constraint
constraint(const Vreg64
&) { return Constraint::Gpr
; }
70 Constraint
constraint(const Vreg32
&) { return Constraint::Gpr
; }
71 Constraint
constraint(const Vreg16
&) { return Constraint::Gpr
; }
72 Constraint
constraint(const Vreg8
&) { return Constraint::Gpr
; }
73 Constraint
constraint(const VregDbl
&) { return Constraint::Simd
; }
74 Constraint
constraint(const Vreg128
&) { return Constraint::Simd
; }
75 Constraint
constraint(const VregSF
&) { return Constraint::Sf
; }
77 bool is_wide(const Vreg128
&) { return true; }
78 template<class T
> bool is_wide(const T
&) { return false; }
83 constexpr int kInvalidPhiGroup
= -1;
84 constexpr int kInvalidSpillSlot
= -1;
85 const unsigned kMaxPos
= UINT_MAX
; // "infinity" use position
88 * A Use refers to the position where an interval is used or defined.
92 Use(Constraint kind
, unsigned pos
, Vreg hint
)
99 * Constraint imposed on the Vreg at this use.
101 Constraint kind
{Constraint::Any
};
103 * Position of this use or def.
105 unsigned pos
{kMaxPos
};
107 * If valid, try to assign the same physical register here as `hint' was
113 * Index of phi group metadata.
115 int phi_group
{kInvalidPhiGroup
};
119 * A LiveRange is an closed-open range of positions where an interval is live.
121 * Specifically, for the LiveRange [start, end), start is in the range and
125 bool contains(unsigned pos
) const { return pos
>= start
&& pos
< end
; }
126 bool intersects(LiveRange r
) const { return r
.start
< end
&& start
< r
.end
; }
127 bool contains(LiveRange r
) const { return r
.start
>= start
&& r
.end
<= end
; }
135 * An Interval represents the lifetime of a Vreg as a sorted list of disjoint
136 * ranges and a sorted list of use positions. It is the unit of register
139 * Intervals may be split---e.g., because the Vreg needed to be spilled in some
140 * subrange. All (sub-)Intervals for a given Vreg are connected as a singly
141 * linked list, sorted by start position.
143 * Every use position must be inside one of the ranges, or exactly at the end
144 * of the last range. Allowing a use exactly at the end facilitates lifetime
145 * splitting when the use at the position of an instruction clobbers registers
146 * as a side effect, e.g. a call.
148 * The intuition for allowing uses at the end of an Interval is that, in truth,
149 * the picture at a given position looks like this:
153 * +-----|-------------+ copy{s, d} <-+
155 * + - - - - - - - - - + +--- position n
157 * +-------------|-----+ <-+
161 * We represent an instruction with a single position `n'. All the use(s) and
162 * def(s) of that instruction are live at some point within it, but their
163 * lifetimes nonetheless do not overlap. Since we don't represent instructions
164 * using two position numbers, instead, we allow uses on the open end side of
165 * Intervals, because they don't actually conflict with, e.g., a def of another
166 * Interval that starts at the same position.
169 explicit Interval(Variable
* var
) : var(var
) {}
171 std::string
toString() const;
174 * Endpoints. Intervals are [closed, open), just like LiveRanges.
176 unsigned start() const { return ranges
.front().start
; }
177 unsigned end() const { return ranges
.back().end
; }
180 * Head of this Interval chain.
182 const Interval
* leader() const;
186 * Register allocation state.
189 bool constant() const;
190 bool spilled() const;
194 * Split this interval at `pos', returning the new `this->next'.
196 * If `keep_uses' is set, uses exactly at the end of the first interval will
197 * stay with the first split (rather than the second).
199 * @requires: pos > start() && pos < end(); this ensures that both
200 * subintervals are nonempty.
202 Interval
* split(unsigned pos
, bool keep_uses
= false);
204 /////////////////////////////////////////////////////////////////////////////
207 // These operate only on `this' and not its children.
210 * Get the index of the first range or use that is not strictly lower than
211 * `pos' (i.e., which contains/is at `pos' or is strictly higher than `pos').
213 unsigned findRange(unsigned pos
) const;
214 unsigned findUse(unsigned pos
) const;
217 * Whether there is a range that includes `pos', or a use at `pos'.
219 bool covers(unsigned pos
) const;
220 bool usedAt(unsigned pos
) const;
223 * The position of a use [relative to `pos'] that requires a register (i.e.,
224 * CopySrc uses are ignored).
226 * firstUseAfter: The first use >= `pos', kMaxPos if there are no more uses.
227 * lastUseBefore: The first use <= `pos'; 0 if the first use is after `pos'.
228 * firstUse: The first use in `this'.
230 unsigned firstUseAfter(unsigned pos
) const;
231 unsigned lastUseBefore(unsigned pos
) const;
232 unsigned firstUse() const;
235 // The Variable whose (sub-)lifetime this (sub-)interval represents.
238 // Pointer to the next subinterval.
239 Interval
* next
{nullptr};
241 // Live ranges and use positions.
242 jit::vector
<LiveRange
> ranges
;
243 jit::vector
<Use
> uses
;
245 // The physical register assigned to this subinterval.
250 * Metadata about a variable which is the object of register allocation.
252 * Each Variable represents a Vreg in the Vunit we're performing register
253 * allocation on---including lifetime ranges, uses, def information, etc.
256 explicit Variable(Vreg r
) : vreg(r
) {}
258 static Variable
* create(Vreg r
);
259 static void destroy(Variable
* var
);
264 Interval
* ivl() const { return (Interval
*)(this + 1); }
265 bool fixed() const { return vreg
.isPhys(); }
268 * Return the subinterval which covers or has a use at `pos', else nullptr if
269 * no subinterval does.
271 Interval
* ivlAt(unsigned pos
);
272 Interval
* ivlAtUse(unsigned pos
);
275 // The Vreg this value represents.
278 // Whether this is a SIMD value.
281 // Whether this variable is a constant, and its constant value.
282 bool constant
{false};
285 // The spill slot assigned to this value. Since we don't spill physical
286 // registers, and Vregs are SSA, a value's assigned slot never changes.
287 int slot
{kInvalidSpillSlot
};
289 // Position of, and block containing, the variable's single def instruction;
290 // invalid for physical registers.
291 unsigned def_pos
{kMaxPos
};
294 // List of recordbasenativesps this vreg is live across.
295 jit::vector
<unsigned> recordbasenativesps
{};
297 // Copy of the Vreg's def instruction.
301 template<class Fn
> void forEach(const LiveSet
& bits
, Fn fn
) {
302 for (auto i
= bits
.find_first(); i
!= bits
.npos
; i
= bits
.find_next(i
)) {
308 * Sack of inputs and pre-computed data used by the main XLS algorithm.
311 explicit VxlsContext(const Abi
& abi
)
317 tmp
= reg::xmm15
; // reserve xmm15 to break shuffle cycles
323 this->abi
.simdUnreserved
-= tmp
;
324 this->abi
.simdReserved
|= tmp
;
325 assertx(!abi
.gpUnreserved
.contains(sp
));
326 assertx(!abi
.gpUnreserved
.contains(tmp
));
331 // Arch-dependent stack pointer.
333 // Temp register used only for breaking cycles.
335 // Debug-only run identifier.
339 jit::vector
<Vlabel
> blocks
;
340 // [start,end) position of each block.
341 jit::vector
<LiveRange
> block_ranges
;
342 // Per-block sp[offset] to spill-slots. folly::none in cases where we
343 // have still not reached the recordbasenativesp instruction.
344 jit::vector
<folly::Optional
<int>> spill_offsets
;
345 // Per-block live-in sets.
346 jit::vector
<LiveSet
> livein
;
349 ///////////////////////////////////////////////////////////////////////////////
352 const Interval
* Interval::leader() const { return var
->ivl(); }
353 Interval
* Interval::leader() { return var
->ivl(); }
355 bool Interval::live() const { return reg
!= InvalidReg
; }
356 bool Interval::constant() const { return var
->constant
; }
357 bool Interval::spilled() const { return !live() && var
->slot
>= 0; }
358 bool Interval::fixed() const { return var
->fixed(); }
360 unsigned Interval::findRange(unsigned pos
) const {
362 for (unsigned hi
= ranges
.size(); lo
< hi
;) {
363 auto mid
= (lo
+ hi
) / 2;
364 auto r
= ranges
[mid
];
367 } else if (r
.end
<= pos
) {
373 assertx(lo
== ranges
.size() || pos
< ranges
[lo
].start
);
377 unsigned Interval::findUse(unsigned pos
) const {
378 unsigned lo
= 0, hi
= uses
.size();
380 auto mid
= (lo
+ hi
) / 2;
381 auto u
= uses
[mid
].pos
;
384 } else if (u
< pos
) {
390 assertx(lo
== uses
.size() || pos
< uses
[lo
].pos
);
394 bool Interval::covers(unsigned pos
) const {
395 if (pos
< start() || pos
>= end()) return false;
396 auto i
= ranges
.begin() + findRange(pos
);
397 return i
!= ranges
.end() && i
->contains(pos
);
400 bool Interval::usedAt(unsigned pos
) const {
401 if (pos
< start() || pos
> end()) return false;
402 auto i
= uses
.begin() + findUse(pos
);
403 return i
!= uses
.end() && pos
== i
->pos
;
406 unsigned Interval::firstUseAfter(unsigned pos
) const {
407 for (auto& u
: uses
) {
408 if (u
.kind
== Constraint::CopySrc
) continue;
409 if (u
.pos
>= pos
) return u
.pos
;
414 unsigned Interval::lastUseBefore(unsigned pos
) const {
416 for (auto& u
: uses
) {
417 if (u
.kind
== Constraint::CopySrc
) continue;
418 if (u
.pos
> pos
) return prev
;
424 unsigned Interval::firstUse() const {
425 for (auto& u
: uses
) {
426 if (u
.kind
!= Constraint::CopySrc
) return u
.pos
;
431 Interval
* Interval::split(unsigned pos
, bool keep_uses
) {
432 assertx(pos
> start() && pos
< end()); // both parts will be non-empty
434 auto child
= jit::make
<Interval
>(var
);
438 // advance r1 to the first range we want in child; maybe split a range.
439 auto r1
= ranges
.begin() + findRange(pos
);
440 if (pos
> r1
->start
) { // split r at pos
441 child
->ranges
.push_back({pos
, r1
->end
});
445 child
->ranges
.insert(child
->ranges
.end(), r1
, ranges
.end());
446 ranges
.erase(r1
, ranges
.end());
448 // advance u1 to the first use position in child, then copy u1..end to child.
449 auto u1
= uses
.begin() + findUse(end());
450 auto u2
= uses
.end();
452 while (u1
!= u2
&& u1
->pos
<= end()) u1
++;
454 while (u1
!= u2
&& u1
->pos
< child
->start()) u1
++;
456 child
->uses
.insert(child
->uses
.end(), u1
, u2
);
462 ///////////////////////////////////////////////////////////////////////////////
465 Variable
* Variable::create(Vreg r
) {
466 auto const mem
= malloc(sizeof(Variable
) + sizeof(Interval
));
467 auto const var
= new (mem
) Variable(r
);
468 new (reinterpret_cast<void*>(var
+ 1)) Interval(var
);
472 void Variable::destroy(Variable
* var
) {
474 auto ivl
= reinterpret_cast<Interval
*>(var
+ 1);
479 Interval
* Variable::ivlAt(unsigned pos
) {
480 for (auto ivl
= this->ivl(); ivl
; ivl
= ivl
->next
) {
481 if (pos
< ivl
->start()) return nullptr;
482 if (ivl
->covers(pos
)) return ivl
;
487 Interval
* Variable::ivlAtUse(unsigned pos
) {
488 for (auto ivl
= this->ivl(); ivl
; ivl
= ivl
->next
) {
489 if (pos
< ivl
->start()) return nullptr;
490 if (ivl
->usedAt(pos
)) return ivl
;
495 ///////////////////////////////////////////////////////////////////////////////
498 * Extended Linear Scan is based on Wimmer & Franz "Linear Scan Register
499 * Allocation on SSA Form". As currently written, it also works on non-ssa
502 * 1. Sort blocks such that all predecessors of B come before B, except
503 * loop-edge predecessors. If the input IR is in SSA form, this also
504 * implies the definition of each SSATmp comes before all uses.
506 * 2. Assign an even numbered position to every instruction. Positions
507 * between instructions are used to insert copies and spills. Each block
508 * starts with an extra position number that corresponds to an imaginary
509 * "label" instruction that is not physically in the vasm IR.
511 * 3. Create one interval I for each Vreg R that requires register allocation,
512 * by iterating blocks and instructions in reverse order, computing live
513 * registers as we go. Each interval consists of a sorted list of disjoint,
514 * live ranges covering the positions where R must be in a physical register
515 * or spill slot. Vregs that are constants or have forced registers
516 * (e.g. VmSp) are skipped. If the input is SSA, the start position of each
517 * interval dominates every live range and use position in the interval.
519 * 4. Process intervals in order of start position, maintaining the set of
520 * active (live) and inactive (not live, but with live ranges that start
521 * after the current interval). When choosing a register, prefer the one
522 * available furthest into the future. If necessary, split the current
523 * interval so the first part gets a register, and enqueue the rest.
524 * When no registers are available, choose either the current interval or
525 * another one to spill, trying to free up the longest-available register.
527 * Split positions must be after an interval's start position, and on or before
528 * the chosen split point. We're free try to choose a good position inbetween,
529 * for example block boundaries and cold blocks.
531 * 5. Once intervals have been walked and split, every interval has an assigned
532 * operand (register or spill location) for all positions where its alive.
533 * Visit every instruction and modify its Vreg operands to the physical
534 * register that was assigned.
536 * 6. Splitting creates sub-intervals that are assigned to different registers
537 * or spill locations, so insert resolving copies at the split positions
538 * between intervals that were split in a block, and copies on control-flow
539 * edges connecting different sub-intervals. When more than one copy occurs
540 * in a position, they are parallel-copies (all sources read before any dest
543 * If any sub-interval was spilled, a single store is generated after each
546 * When analyzing instructions that use or define a virtual SF register
547 * (VregSF), eagerly rename it to the singleton PhysReg RegSF{0}, under the
548 * assumption that there can only be one live SF at each position. This
549 * reduces the number of intervals we need to process, facilitates inserting
550 * ldimm{0} (as xor), and is checked by checkSF().
553 ///////////////////////////////////////////////////////////////////////////////
556 * Printing utilities.
558 void printVariables(const char* caption
,
559 const Vunit
& unit
, const VxlsContext
& ctx
,
560 const jit::vector
<Variable
*>& variables
);
561 void dumpVariables(const jit::vector
<Variable
*>& variables
,
562 unsigned num_spills
);
565 * The ID of the block enclosing `pos'.
567 Vlabel
blockFor(const VxlsContext
& ctx
, unsigned pos
) {
568 for (unsigned lo
= 0, hi
= ctx
.blocks
.size(); lo
< hi
;) {
569 auto mid
= (lo
+ hi
) / 2;
570 auto r
= ctx
.block_ranges
[ctx
.blocks
[mid
]];
573 } else if (pos
>= r
.end
) {
576 return ctx
.blocks
[mid
];
579 always_assert(false);
580 return Vlabel
{0xffffffff};
584 * Insert `src' into `dst' before dst[j], corresponding to XLS logical
587 * Updates `j' to refer to the same instruction after the code insertions.
589 void insertCodeAt(jit::vector
<Vinstr
>& dst
, unsigned& j
,
590 const jit::vector
<Vinstr
>& src
, unsigned pos
) {
591 auto const irctx
= dst
[j
].irctx();
592 dst
.insert(dst
.begin() + j
, src
.size(), trap
{TRAP_REASON
});
593 for (auto const& inst
: src
) {
595 dst
[j
].set_irctx(irctx
);
600 ///////////////////////////////////////////////////////////////////////////////
601 // Pre-analysis passes.
604 * Compute the linear position range of each block.
606 * This modifies the Vinstrs in `unit' by setting their `pos' members, in
607 * addition to producing the block-to-range map.
609 jit::vector
<LiveRange
> computePositions(Vunit
& unit
,
610 const jit::vector
<Vlabel
>& blocks
) {
611 auto block_ranges
= jit::vector
<LiveRange
>{unit
.blocks
.size()};
614 for (auto const b
: blocks
) {
615 auto& code
= unit
.blocks
[b
].code
;
617 bool front_uses
{false};
618 visitUses(unit
, code
.front(), [&](Vreg
/*r*/) { front_uses
= true; });
620 auto irctx
= code
.front().irctx();
621 code
.emplace(code
.begin(), nop
{}, irctx
);
623 auto const start
= pos
;
625 for (auto& inst
: unit
.blocks
[b
].code
) {
629 block_ranges
[b
] = { start
, pos
};
635 * Return the effect this instruction has on the value of `sp'.
637 * Asserts (if `do_assert' is set) if an instruction mutates `sp' in an
640 int spEffect(const Vunit
& unit
, const Vinstr
& inst
, PhysReg sp
,
641 bool do_assert
= debug
) {
659 if (i
.d
== Vreg64(sp
)) {
660 assertx(i
.s
.base
== i
.d
&& !i
.s
.index
.isValid());
667 visitDefs(unit
, inst
, [&] (Vreg r
) { assertx(r
!= sp
); });
674 * Compute the offset from `sp' to the spill area at each block start.
676 jit::vector
<folly::Optional
<int>> analyzeSP(const Vunit
& unit
,
677 const jit::vector
<Vlabel
>& blocks
,
679 auto visited
= boost::dynamic_bitset
<>(unit
.blocks
.size());
680 auto spill_offsets
= jit::vector
<folly::Optional
<int>>(unit
.blocks
.size());
682 for (auto const b
: blocks
) {
683 auto offset
= visited
.test(b
) ? spill_offsets
[b
] : folly::none
;
685 for (unsigned j
= 0; j
< unit
.blocks
[b
].code
.size(); j
++) {
686 auto const& inst
= unit
.blocks
[b
].code
[j
];
687 if (inst
.op
== Vinstr::recordbasenativesp
) {
688 assert_flog(!offset
, "Block B{} Instr {} initiailizes native SP, but "
689 "already initialized.", size_t(b
), j
);
691 } else if (inst
.op
== Vinstr::unrecordbasenativesp
) {
692 assert_flog(offset
, "Block B{} Instr {} uninitiailizes native SP, but "
693 "already uninitialized.", size_t(b
), j
);
694 assert_flog(*offset
== 0, "Block B{} Instr {} uninitiailizes native SP, "
695 "but SPOffset is nonzero.", size_t(b
), j
);
696 offset
= folly::none
;
698 *offset
-= spEffect(unit
, inst
, sp
);
701 for (auto const s
: succs(unit
.blocks
[b
])) {
702 if (visited
.test(s
)) {
703 assert_flog(offset
== spill_offsets
[s
],
704 "sp mismatch on edge B{}->B{}, expected {} got {}",
705 size_t(b
), size_t(s
),
706 spill_offsets
[s
] ? std::to_string(*spill_offsets
[s
])
708 offset
? std::to_string(*offset
)
711 spill_offsets
[s
] = offset
;
716 return spill_offsets
;
719 ///////////////////////////////////////////////////////////////////////////////
720 // Lifetime intervals.
725 * This assumes that the ranges of `ivl' are in reverse order, and that `r'
726 * precedes or overlaps with ivl->ranges.first().
728 void addRange(Interval
* ivl
, LiveRange r
) {
729 while (!ivl
->ranges
.empty() && r
.contains(ivl
->ranges
.back())) {
730 ivl
->ranges
.pop_back();
732 if (ivl
->ranges
.empty()) {
733 return ivl
->ranges
.push_back(r
);
735 auto& first
= ivl
->ranges
.back();
736 if (first
.contains(r
)) return;
737 if (r
.end
>= first
.start
) {
738 first
.start
= r
.start
;
740 ivl
->ranges
.push_back(r
);
745 * Visits defs of an instruction, updates their liveness, adds live ranges, and
746 * adds Uses with appropriate hints.
749 DefVisitor(const Vunit
& unit
, jit::vector
<Variable
*>& variables
,
750 LiveSet
& live
, Vlabel b
, const Vinstr
& inst
, unsigned pos
)
752 , m_variables(variables
)
759 // Skip immediates and uses.
760 template<class F
> void imm(const F
&) {}
761 template<class R
> void use(R
) {}
762 template<class S
, class H
> void useHint(S
, H
) {}
763 template<class R
> void across(R
) {}
765 void def(Vtuple defs
) {
766 for (auto r
: m_unit
.tuples
[defs
]) def(r
);
768 void defHint(Vtuple def_tuple
, Vtuple hint_tuple
) {
769 auto& defs
= m_unit
.tuples
[def_tuple
];
770 auto& hints
= m_unit
.tuples
[hint_tuple
];
771 for (int i
= 0; i
< defs
.size(); i
++) {
772 def(defs
[i
], Constraint::Any
, hints
[i
]);
775 template<class R
> void def(R r
) {
776 def(r
, constraint(r
), Vreg
{}, is_wide(r
));
778 template<class D
, class H
> void defHint(D dst
, H hint
) {
779 def(dst
, constraint(dst
), hint
, is_wide(dst
));
781 void def(Vreg r
) { def(r
, Constraint::Any
); }
782 void defHint(Vreg d
, Vreg hint
) { def(d
, Constraint::Any
, hint
); }
783 void def(RegSet rs
) { rs
.forEach([&](Vreg r
) { def(r
); }); }
785 r
= RegSF
{0}; // eagerly rename all SFs
786 def(r
, constraint(r
));
790 void def(Vreg r
, Constraint kind
, Vreg hint
= Vreg
{}, bool wide
= false) {
791 auto var
= m_variables
[r
];
792 if (m_live
.test(r
)) {
794 var
->ivl()->ranges
.back().start
= m_pos
;
797 var
= m_variables
[r
] = Variable::create(r
);
799 addRange(var
->ivl(), {m_pos
, m_pos
+ 1});
802 var
->ivl()->uses
.push_back(Use
{kind
, m_pos
, hint
});
804 var
->def_pos
= m_pos
;
805 var
->def_block
= m_block
;
806 var
->def_inst
= m_inst
;
812 jit::vector
<Variable
*>& m_variables
;
815 const Vinstr
& m_inst
;
820 UseVisitor(const Vunit
& unit
, jit::vector
<Variable
*>& variables
,
821 LiveSet
& live
, const Vinstr
& inst
, LiveRange range
)
823 , m_variables(variables
)
829 // Skip immediates and defs.
830 template<class F
> void imm(const F
&) {}
831 template<class R
> void def(R
) {}
832 template<class D
, class H
> void defHint(D
, H
) {}
834 template<class R
> void use(R r
) { use(r
, constraint(r
), m_range
.end
); }
835 template<class S
, class H
> void useHint(S src
, H hint
) {
836 use(src
, constraint(src
), m_range
.end
, hint
);
839 r
= RegSF
{0}; // eagerly rename all SFs
840 use(r
, constraint(r
), m_range
.end
);
842 void use(RegSet regs
) { regs
.forEach([&](Vreg r
) { use(r
); }); }
843 void use(Vtuple uses
) { for (auto r
: m_unit
.tuples
[uses
]) use(r
); }
844 void useHint(Vtuple src_tuple
, Vtuple hint_tuple
) {
845 auto& uses
= m_unit
.tuples
[src_tuple
];
846 auto& hints
= m_unit
.tuples
[hint_tuple
];
847 for (int i
= 0, n
= uses
.size(); i
< n
; i
++) {
848 useHint(uses
[i
], hints
[i
]);
852 if (m
.base
.isValid()) use(m
.base
);
853 if (m
.index
.isValid()) use(m
.index
);
855 template<Width w
> void use(Vp
<w
> m
) { use(static_cast<Vptr
>(m
)); }
857 void use(VcallArgsId
/*id*/) {
858 always_assert(false && "vcall unsupported in vxls");
862 * An operand marked as UA means use-across. Mark it live across the
863 * instruction so its lifetime conflicts with the destination, which ensures
864 * it will be assigned a different register than the destination. This isn't
865 * necessary if *both* operands of a binary instruction are the same virtual
866 * register, but is still correct.
868 template<class R
> void across(R r
) { use(r
, constraint(r
), m_range
.end
+ 1); }
869 void across(Vtuple uses
) { for (auto r
: m_unit
.tuples
[uses
]) across(r
); }
870 void across(RegSet regs
) { regs
.forEach([&](Vreg r
) { across(r
); }); }
871 void across(Vptr m
) {
872 if (m
.base
.isValid()) across(m
.base
);
873 if (m
.index
.isValid()) across(m
.index
);
875 template<Width w
> void across(Vp
<w
> m
) { across(static_cast<Vptr
>(m
)); }
878 void use(Vreg r
, Constraint kind
, unsigned end
, Vreg hint
= Vreg
{}) {
881 auto var
= m_variables
[r
];
882 if (!var
) var
= m_variables
[r
] = Variable::create(r
);
883 addRange(var
->ivl(), {m_range
.start
, end
});
886 if (m_inst
.op
== Vinstr::copyargs
||
887 m_inst
.op
== Vinstr::copy2
||
888 m_inst
.op
== Vinstr::copy
||
889 m_inst
.op
== Vinstr::phijmp
) {
890 // all these instructions lower to parallel copyplans, which know
891 // how to load directly from constants or spilled locations
892 kind
= Constraint::CopySrc
;
894 var
->ivl()->uses
.push_back({kind
, m_range
.end
, hint
});
900 jit::vector
<Variable
*>& m_variables
;
902 const Vinstr
& m_inst
;
903 const LiveRange m_range
;
907 * Compute lifetime intervals and use positions of all Vregs by walking the
908 * code bottom-up once.
910 jit::vector
<Variable
*> buildIntervals(const Vunit
& unit
,
911 const VxlsContext
& ctx
) {
912 ONTRACE(kVasmRegAllocDetailLevel
, printCfg(unit
, ctx
.blocks
));
914 auto variables
= jit::vector
<Variable
*>{unit
.next_vr
};
916 for (auto b
: boost::adaptors::reverse(ctx
.blocks
)) {
917 auto& block
= unit
.blocks
[b
];
919 // initial live set is the union of successor live sets.
920 LiveSet
live(unit
.next_vr
);
921 for (auto s
: succs(block
)) {
922 always_assert(!ctx
.livein
[s
].empty());
923 live
|= ctx
.livein
[s
];
926 // add a range covering the whole block to every live interval
927 auto& block_range
= ctx
.block_ranges
[b
];
928 forEach(live
, [&](Vreg r
) {
929 if (!variables
[r
]) variables
[r
] = Variable::create(r
);
930 addRange(variables
[r
]->ivl(), block_range
);
933 // visit instructions bottom-up, adding uses & ranges
934 auto pos
= block_range
.end
;
935 for (auto const& inst
: boost::adaptors::reverse(block
.code
)) {
937 RegSet implicit_uses
, implicit_across
, implicit_defs
;
938 getEffects(ctx
.abi
, inst
, implicit_uses
, implicit_across
, implicit_defs
);
940 DefVisitor
dv(unit
, variables
, live
, b
, inst
, pos
);
941 visitOperands(inst
, dv
);
942 dv
.def(implicit_defs
);
944 UseVisitor
uv(unit
, variables
, live
, inst
, {block_range
.start
, pos
});
945 visitOperands(inst
, uv
);
946 uv
.use(implicit_uses
);
947 uv
.across(implicit_across
);
948 if (inst
.op
== Vinstr::recordbasenativesp
) {
949 forEach(live
, [&](Vreg r
) {
950 if (!unit
.regToConst
.count(r
)) {
951 // We mark the instruction as a use so no spills span the
952 // instruction unless they have to.
954 if (!variables
[r
]) variables
[r
] = Variable::create(r
);
955 variables
[r
]->recordbasenativesps
.push_back(pos
);
958 } else if (inst
.op
== Vinstr::unrecordbasenativesp
) {
959 forEach(live
, [&](Vreg r
) {
960 if (!unit
.regToConst
.count(r
)) {
961 // We mark the instruction as a use so no spills span the
962 // instruction unless they have to.
969 // sanity check liveness computation
970 always_assert(live
== ctx
.livein
[b
]);
973 // finish processing live ranges for constants
974 for (auto& c
: unit
.constToReg
) {
975 if (auto var
= variables
[c
.second
]) {
976 var
->ivl()->ranges
.back().start
= 0;
978 var
->constant
= true;
983 // Ranges and uses were generated in reverse order. Unreverse them now.
984 for (auto var
: variables
) {
986 auto ivl
= var
->ivl();
987 assertx(!ivl
->ranges
.empty()); // no empty intervals
988 std::reverse(ivl
->uses
.begin(), ivl
->uses
.end());
989 std::reverse(ivl
->ranges
.begin(), ivl
->ranges
.end());
991 ONTRACE(kVasmRegAllocDetailLevel
,
992 printVariables("after building intervals", unit
, ctx
, variables
);
996 // only constants and physical registers can be live-into the entry block.
997 forEach(ctx
.livein
[unit
.entry
], [&](Vreg r
) {
998 UNUSED
auto var
= variables
[r
];
999 assertx(var
->constant
|| var
->fixed());
1001 for (auto var
: variables
) {
1003 auto const ivl
= var
->ivl();
1004 for (unsigned i
= 1; i
< ivl
->uses
.size(); i
++) {
1005 assertx(ivl
->uses
[i
].pos
>= ivl
->uses
[i
-1].pos
); // monotonic
1007 for (unsigned i
= 1; i
< ivl
->ranges
.size(); i
++) {
1008 assertx(ivl
->ranges
[i
].end
> ivl
->ranges
[i
].start
); // no empty ranges
1009 assertx(ivl
->ranges
[i
].start
> ivl
->ranges
[i
-1].end
); // no empty gaps
1016 ///////////////////////////////////////////////////////////////////////////////
1019 * A map from PhysReg number to position.
1021 using PosVec
= PhysReg::Map
<unsigned>;
1024 * Between `r1' and `r2', choose the one whose position in `pos_vec' is closest
1025 * to but still after (or at) `pos'.
1027 * If neither has a position after `pos', choose the higher one. If both have
1028 * the same position, choose `r1'.
1030 PhysReg
choose_closest_after(unsigned pos
, const PosVec
& pos_vec
,
1031 PhysReg r1
, PhysReg r2
) {
1032 if (r1
== InvalidReg
) return r2
;
1033 if (r2
== InvalidReg
) return r1
;
1035 if (pos_vec
[r1
] >= pos
) {
1036 if (pos
<= pos_vec
[r2
] && pos_vec
[r2
] < pos_vec
[r1
]) {
1040 if (pos_vec
[r2
] > pos_vec
[r1
]) {
1048 * Like choose_closest_after(), but iterates through `pos_vec'.
1050 PhysReg
find_closest_after(unsigned pos
, const PosVec
& pos_vec
) {
1051 auto ret
= InvalidReg
;
1052 for (auto const r
: pos_vec
) {
1053 ret
= choose_closest_after(pos
, pos_vec
, ret
, r
);
1058 ///////////////////////////////////////////////////////////////////////////////
1062 * A member of a "phi group"---i.e., the set of all variables which flow into
1063 * one another via a set of corresponding phijmp's and phidef's.
1066 Vreg r
; // the Vreg that is used or def'd in the phi
1067 unsigned pos
; // position of the phijmp or phidef
1071 * Hint metadata that doesn't fit into the `variables' vector.
1074 jit::vector
<jit::vector
<PhiVar
>> phi_groups
;
1078 * Add the phi use of `r' at `pos' to the phi group given by `pgid'.
1080 void addPhiGroupMember(const jit::vector
<Variable
*>& variables
,
1081 jit::vector
<jit::vector
<PhiVar
>>& phi_groups
,
1082 Vreg r
, unsigned pos
, int pgid
,
1083 Vreg hint
= Vreg
{}) {
1084 assertx(phi_groups
.size() > pgid
);
1085 assertx(!phi_groups
[pgid
].empty());
1087 auto const var
= variables
[r
];
1088 assertx(var
!= nullptr);
1090 auto const ivl
= var
->ivl();
1091 auto& u
= pos
== var
->def_pos
1093 : ivl
->uses
[ivl
->findUse(pos
)];
1094 assertx(u
.pos
== pos
);
1096 if (hint
.isValid()) u
.hint
= hint
;
1099 phi_groups
[pgid
].push_back({r
, u
.pos
});
1103 * Collect hint metadata for phis.
1105 * The return value of this pass represents an equivalence relation over phi
1106 * uses and defs. The relation is defined as follows:
1108 * Consider a phidef F with arity n (i.e., it defines n variables). F gives
1109 * rise to n equivalence classes, each of which consists in the i-th use or def
1110 * variable from each phi instruction in the transitive closure of the predicate
1112 * P(f) := { every phijmp which targets f, and every other phidef
1113 * targeted by those phijmp's }
1117 * Or, taking a pictoral example:
1118 * _________________________________________________________
1123 * | phijmp(x1, y1) phijmp(x2, y2) phijmp(x3, y3) . |
1125 * | +-----------------+ +-------------+ . |
1128 * | phidef(x4, y4) phidef(x5, y5) -------------------+ |
1130 * |______ . ________________________________________________|
1133 * ______ | __________________________
1135 * | phijmp(x6, y6)--->phidef(x7, y7) |
1136 * |___________________________________|
1138 * The equivalence classes in this example are {x1, x2, x4}, {y1, y2, y4},
1139 * {x3, x5}, {y3, y5}, {x6, x7}, and {y6, y7}.
1141 * We use these equivalence classes during register allocation to try to assign
1142 * the same register to all the variables in a phi group (at least at those
1143 * positions where the phi instructions occur).
1145 * Note that this equivalence relation does /not/ capture the idea that we
1146 * probably want the same register for x4 as we do for x6 and x7. To track that
1147 * information, we set the `hint' on phi uses to the corresponding phidef
1148 * variable, then let the hint back propagation pass handle it. (Note that we
1149 * do /not/ set the `hint' field at phi defs, nor do we try to use it at any
1152 jit::vector
<jit::vector
<PhiVar
>>
1153 analyzePhiHints(const Vunit
& unit
, const VxlsContext
& ctx
,
1154 const jit::vector
<Variable
*>& variables
) {
1155 auto phi_groups
= jit::vector
<jit::vector
<PhiVar
>>{};
1158 * Get the phi group for r's def, optionally allocating a new phi group if
1161 auto const def_phi_group
= [&] (Vreg r
, bool alloc
) {
1162 auto const var
= variables
[r
];
1163 assertx(var
!= nullptr);
1164 assertx(var
->def_inst
.op
== Vinstr::phidef
);
1166 auto& u
= var
->ivl()->uses
.front();
1168 if (u
.phi_group
== kInvalidPhiGroup
&& alloc
) {
1169 u
.phi_group
= phi_groups
.size();
1170 phi_groups
.emplace_back(jit::vector
<PhiVar
>{{r
, u
.pos
}});
1175 auto const is_fixed
= [&] (Vreg r
) { return variables
[r
]->fixed(); };
1177 for (auto const b
: ctx
.blocks
) {
1178 auto const& code
= unit
.blocks
[b
].code
;
1179 if (code
.empty()) continue;
1181 auto const& first
= code
.front();
1182 auto const& last
= code
.back();
1184 if (first
.op
== Vinstr::phidef
) {
1185 // A phidef'd variable just need to be assigned a phi group if it doesn't
1186 // already have one (i.e., from handling a phijmp).
1187 auto const& defs
= unit
.tuples
[first
.phidef_
.defs
];
1188 for (auto const r
: defs
) {
1189 if (is_fixed(r
)) continue;
1190 def_phi_group(r
, true);
1194 if (last
.op
== Vinstr::phijmp
) {
1195 auto const target
= last
.phijmp_
.target
;
1196 auto const& def_inst
= unit
.blocks
[target
].code
.front();
1197 assertx(def_inst
.op
== Vinstr::phidef
);
1199 auto const& uses
= unit
.tuples
[last
.phijmp_
.uses
];
1200 auto const& defs
= unit
.tuples
[def_inst
.phidef_
.defs
];
1201 assertx(uses
.size() == defs
.size());
1203 // Set the phi group for each phi use variable to that of the
1204 // corresponding phi def variable (allocating a new group if the def
1205 // variable has not already been assigned one).
1206 for (size_t i
= 0, n
= uses
.size(); i
< n
; ++i
) {
1207 if (is_fixed(defs
[i
])) continue;
1209 auto const pgid
= def_phi_group(defs
[i
], true);
1210 addPhiGroupMember(variables
, phi_groups
,
1211 uses
[i
], last
.id
, pgid
, defs
[i
]);
1218 HintInfo
analyzeHints(const Vunit
& unit
, const VxlsContext
& ctx
,
1219 const jit::vector
<Variable
*>& variables
) {
1221 hint_info
.phi_groups
= analyzePhiHints(unit
, ctx
, variables
);
1227 * Try to return the physical register that would coalesce `ivl' with its
1230 PhysReg
tryCoalesce(const jit::vector
<Variable
*>& variables
,
1231 const Interval
* ivl
) {
1232 assertx(ivl
== ivl
->leader());
1233 assertx(!ivl
->uses
.empty());
1235 auto const& def
= ivl
->uses
.front();
1236 assertx(def
.pos
== ivl
->var
->def_pos
);
1238 if (!def
.hint
.isValid()) return InvalidReg
;
1239 auto const hint_var
= variables
[def
.hint
];
1241 for (auto hvl
= hint_var
->ivl(); hvl
; hvl
= hvl
->next
) {
1242 if (def
.pos
== hvl
->end()) return hvl
->reg
;
1248 * Scan the uses of `ivl' for phi nodes, and return a pair of hints for the
1249 * first one we find.
1251 * For a given phi node F, the first of the two hints we return is derived from
1252 * only those phijmps targeting F (if F is a phidef), or from the phidefs F
1253 * targets (if F is a phijmp). The second hint accounts for F's entire
1254 * equivalence class---see analyzePhiHints() for more details.
1256 folly::Optional
<std::pair
<PhysReg
,PhysReg
>>
1257 tryPhiHint(const jit::vector
<Variable
*>& variables
,
1258 const Interval
* ivl
, const HintInfo
& hint_info
,
1259 const PosVec
& free_until
) {
1260 auto const choose
= [&] (PhysReg h1
, PhysReg h2
) {
1261 return choose_closest_after(ivl
->end(), free_until
, h1
, h2
);
1264 for (auto const& u
: ivl
->uses
) {
1265 // Look for the first phi hint.
1266 if (u
.phi_group
== kInvalidPhiGroup
) continue;
1268 auto preferred
= InvalidReg
;
1269 auto group_hint
= InvalidReg
;
1271 // Choose a register to be the hint.
1272 for (auto const& phiv
: hint_info
.phi_groups
[u
.phi_group
]) {
1273 auto const var
= variables
[phiv
.r
];
1276 auto const phivl
= var
->ivlAtUse(phiv
.pos
);
1277 if (phivl
->reg
== InvalidReg
) continue;
1279 auto const& phu
= phivl
->uses
[phivl
->findUse(phiv
.pos
)];
1280 assertx(phu
.pos
== phiv
.pos
);
1282 // Prefer to use a hint from a corresponding phi node.
1283 if (u
.hint
== phiv
.r
|| phu
.hint
== ivl
->var
->vreg
) {
1284 preferred
= choose(preferred
, phivl
->reg
);
1286 // In the end, though, we're okay with any hint from the group.
1287 group_hint
= choose(group_hint
, phivl
->reg
);
1289 return std::make_pair(preferred
, group_hint
);
1295 * Choose an appropriate hint for the (sub-)interval `ivl'.
1297 * The register allocator passes us constraints via `free_until' and `allow'.
1298 * We use the `free_until' vector to choose a hint that most tightly covers the
1299 * lifetime of `ivl'; we use `allow' to ensure that our hint satisfies
1302 PhysReg
chooseHint(const jit::vector
<Variable
*>& variables
,
1303 const Interval
* ivl
, const HintInfo
& hint_info
,
1304 const PosVec
& free_until
, RegSet allow
) {
1305 if (!RuntimeOption::EvalHHIREnablePreColoring
&&
1306 !RuntimeOption::EvalHHIREnableCoalescing
) return InvalidReg
;
1308 auto const choose
= [&] (PhysReg h1
, PhysReg h2
) {
1309 return choose_closest_after(ivl
->end(), free_until
, h1
, h2
);
1311 auto hint
= InvalidReg
;
1313 // If `ivl' contains its def, try a coalescing hint.
1314 if (ivl
== ivl
->leader()) {
1315 hint
= tryCoalesce(variables
, ivl
);
1318 // Return true if we should return `h'.
1319 auto const check_and_trace
= [&] (PhysReg h
, const char* prefix
) {
1320 if (h
== InvalidReg
) return false;
1321 if (allow
.contains(h
)) {
1322 FTRACE(kHintLevel
, "{}hinting {} for %{} @ {}\n",
1323 prefix
, show(h
), size_t(ivl
->var
->vreg
), ivl
->start());
1326 FTRACE(kHintLevel
, " found mismatched hint for %{} @ {}\n",
1327 size_t(ivl
->var
->vreg
), ivl
->uses
.front().pos
);
1331 auto const is_phi_use
= !ivl
->uses
.empty() &&
1332 ivl
->uses
.front().phi_group
!= kInvalidPhiGroup
;
1334 // If we have either a backwards or a forwards hint, return it---if we have
1335 // both, return whichever is free the longest. For phi uses, however, we
1336 // want to consider the whole phi group's allocations first.
1337 if (!is_phi_use
&& check_and_trace(hint
, "")) return hint
;
1339 // Try to determine a hint via the first phi use in the interval.
1340 auto const phi_hints
= tryPhiHint(variables
, ivl
, hint_info
, free_until
);
1341 bool is_phi_hint
= false;
1344 hint
= choose(phi_hints
->first
, hint
);
1345 if (hint
!= phi_hints
->first
) {
1346 hint
= choose(hint
, phi_hints
->second
);
1348 is_phi_hint
= hint
== phi_hints
->first
||
1349 hint
== phi_hints
->second
;
1351 if (phi_hints
|| is_phi_use
) {
1352 if (check_and_trace(hint
, debug
&& is_phi_hint
? "phi-" : "")) {
1357 // Crawl through our uses and look for a physical hint.
1358 for (auto const& u
: ivl
->uses
) {
1359 if (!u
.hint
.isValid() ||
1361 !allow
.contains(u
.hint
)) continue;
1362 hint
= choose(hint
, u
.hint
);
1365 if (hint
!= InvalidReg
) {
1366 FTRACE(kHintLevel
, "physical hint {} for %{} @ {}\n",
1367 show(hint
), size_t(ivl
->var
->vreg
), ivl
->start());
1372 ///////////////////////////////////////////////////////////////////////////////
1373 // Register allocation.
1376 * Information about spills generated by register allocation.
1378 * Used for the allocateSpillSpace() pass which inserts the instructions that
1379 * create spill space on the stack.
1382 // Number of intervals spilled.
1383 unsigned num_spills
{0};
1384 // Number of spill slots used.
1385 size_t used_spill_slots
{0};
1389 * Extended Linear Scan register allocator over vasm virtual registers (Vregs).
1391 * This encapsulates the intermediate data structures used during the
1392 * allocation phase of the algorithm so we don't have to pass them around
1396 Vxls(const VxlsContext
& ctx
,
1397 const jit::vector
<Variable
*>& variables
,
1398 const HintInfo
& hint_info
)
1400 , variables(variables
)
1401 , hint_info(hint_info
)
1407 void assignSpill(Interval
* ivl
);
1408 void spill(Interval
*);
1409 void assignReg(Interval
*, PhysReg
);
1411 unsigned nearestSplitBefore(unsigned pos
);
1412 unsigned constrain(Interval
*, RegSet
&);
1414 void update(Interval
*);
1415 void allocate(Interval
*);
1416 void allocBlocked(Interval
*);
1417 void spillOthers(Interval
* current
, PhysReg r
);
1421 * Comparison function for pending priority queue.
1423 * std::priority_queue requires a less operation, but sorts the heap
1424 * highest-first; we need the opposite (lowest-first), so use greater-than.
1427 bool operator()(const Interval
* i1
, const Interval
* i2
) {
1428 return i1
->start() > i2
->start();
1433 const VxlsContext
& ctx
;
1435 // Variables, null if unused.
1436 const jit::vector
<Variable
*>& variables
;
1438 const HintInfo
& hint_info
;
1439 // Subintervals sorted by Interval start.
1440 jit::priority_queue
<Interval
*,Compare
> pending
;
1441 // Intervals that overlap.
1442 jit::vector
<Interval
*> active
, inactive
;
1443 // Last position each spill slot was owned; kMaxPos means currently used.
1444 jit::array
<unsigned, kMaxSpillSlots
> spill_slots
{{0}};
1446 SpillInfo spill_info
;
1449 SpillInfo
Vxls::go() {
1450 for (auto var
: variables
) {
1453 assignReg(var
->ivl(), var
->vreg
);
1454 } else if (var
->constant
) {
1457 pending
.push(var
->ivl());
1460 while (!pending
.empty()) {
1461 auto current
= pending
.top();
1470 * Assign the next available spill slot to `ivl'.
1472 void Vxls::assignSpill(Interval
* ivl
) {
1473 assertx(!ivl
->fixed() && ivl
!= ivl
->leader());
1475 if (ivl
->var
->slot
!= kInvalidSpillSlot
) return;
1477 auto& used_spill_slots
= spill_info
.used_spill_slots
;
1479 auto const assign_slot
= [&] (size_t slot
) {
1480 ivl
->var
->slot
= slot
;
1481 ++spill_info
.num_spills
;
1483 spill_slots
[slot
] = kMaxPos
;
1484 if (!ivl
->var
->wide
) {
1485 used_spill_slots
= std::max(used_spill_slots
, slot
+ 1);
1487 used_spill_slots
= std::max(used_spill_slots
, slot
+ 2);
1488 spill_slots
[slot
+ 1] = kMaxPos
;
1492 // Assign spill slots. We track the highest position at which a spill slot
1493 // was owned, and only reassign it to a Vreg if its lifetime interval
1494 // (including all splits) is strictly above that high water mark.
1495 if (!ivl
->var
->wide
) {
1496 for (size_t slot
= 0, n
= spill_slots
.size(); slot
< n
; ++slot
) {
1497 if (ivl
->leader()->start() >= spill_slots
[slot
]) {
1498 return assign_slot(slot
);
1502 for (size_t slot
= 0, n
= spill_slots
.size() - 1; slot
< n
; slot
+= 2) {
1503 if (ivl
->leader()->start() >= spill_slots
[slot
] &&
1504 ivl
->leader()->start() >= spill_slots
[slot
+ 1]) {
1505 return assign_slot(slot
);
1510 // Ran out of spill slots.
1511 ONTRACE(kVasmRegAllocDetailLevel
,
1512 dumpVariables(variables
, spill_info
.num_spills
));
1513 TRACE(1, "vxls-punt TooManySpills\n");
1514 TRACE_PUNT("LinearScan_TooManySpills");
1518 * Assign `r' to `ivl'.
1520 void Vxls::assignReg(Interval
* ivl
, PhysReg r
) {
1521 if (!ivl
->fixed() && ivl
->uses
.empty()) {
1522 ivl
->reg
= InvalidReg
;
1523 if (!ivl
->constant()) assignSpill(ivl
);
1526 active
.push_back(ivl
);
1531 * Spill `ivl' from its start until its first register use.
1533 * Spill `ivl' if there is no use; otherwise split the interval just before the
1534 * use, and enqueue the second part.
1536 void Vxls::spill(Interval
* ivl
) {
1537 unsigned first_use
= ivl
->firstUse();
1538 if (first_use
<= ivl
->end()) {
1539 auto split_pos
= nearestSplitBefore(first_use
);
1540 if (split_pos
<= ivl
->start()) {
1541 // This only can happen if we need more than the available registers
1542 // at a single position. It can happen in phijmp or callargs.
1543 TRACE(1, "vxls-punt RegSpill\n");
1544 TRACE_PUNT("RegSpill"); // cannot split before first_use
1546 pending
.push(ivl
->split(split_pos
));
1548 ivl
->reg
= InvalidReg
;
1549 if (!ivl
->constant()) assignSpill(ivl
);
1553 * Update the active and inactive lists for the start of `current'.
1555 void Vxls::update(Interval
* current
) {
1556 auto const pos
= current
->start();
1558 auto const free_spill_slot
= [this] (Interval
* ivl
) {
1559 assertx(!ivl
->next
);
1560 auto slot
= ivl
->var
->slot
;
1562 if (slot
!= kInvalidSpillSlot
) {
1563 if (ivl
->var
->wide
) {
1564 assertx(spill_slots
[slot
+ 1]);
1565 spill_slots
[slot
+ 1] = ivl
->end();
1567 assertx(spill_slots
[slot
]);
1568 spill_slots
[slot
] = ivl
->end();
1572 // Check for active/inactive intervals that have expired or which need their
1573 // polarity flipped.
1574 auto const update_list
= [&] (jit::vector
<Interval
*>& target
,
1575 jit::vector
<Interval
*>& other
,
1577 auto end
= target
.end();
1578 for (auto i
= target
.begin(); i
!= end
;) {
1580 if (pos
>= ivl
->end()) {
1582 if (!ivl
->next
) free_spill_slot(ivl
);
1583 } else if (is_active
? !ivl
->covers(pos
) : ivl
->covers(pos
)) {
1585 other
.push_back(ivl
);
1590 target
.erase(end
, target
.end());
1592 update_list(active
, inactive
, true);
1593 update_list(inactive
, active
, false);
1597 * Return the closest split position on or before `pos'.
1599 * The result might be exactly on an edge, or in-between instruction positions.
1601 unsigned Vxls::nearestSplitBefore(unsigned pos
) {
1602 auto b
= blockFor(ctx
, pos
);
1603 auto range
= ctx
.block_ranges
[b
];
1604 if (pos
== range
.start
) return pos
;
1605 return (pos
- 1) | 1;
1609 * Constrain the allowable registers for `ivl' by inspecting uses.
1611 * Returns the latest position for which `allow' (which we populate) is valid.
1612 * We use this return value to fill the `free_until' PosVec in allocate()
1613 * below. That data structure tracks the first position at which a register is
1614 * /unavailable/, so it would appear that constrain()'s return value is
1617 * In fact, it is not; we actually /need/ this position offsetting because of
1618 * our leniency towards having uses at an Interval's end() position. If we
1619 * fail to constrain on an end-position use, we must still split and spill.
1620 * (In contrast, if we intersect with another Interval on an end position use,
1621 * it's okay because SSA tells us that the conflict must be the other
1622 * Interval's def position, and a use and a def at the same position don't
1623 * actually conflict; see the fun ASCII diagram that adorns the definition of
1626 unsigned Vxls::constrain(Interval
* ivl
, RegSet
& allow
) {
1627 auto const any
= ctx
.abi
.unreserved() - ctx
.abi
.sf
; // Any but not flags.
1628 allow
= ctx
.abi
.unreserved();
1629 for (auto& u
: ivl
->uses
) {
1630 auto need
= u
.kind
== Constraint::Simd
? ctx
.abi
.simdUnreserved
:
1631 u
.kind
== Constraint::Gpr
? ctx
.abi
.gpUnreserved
:
1632 u
.kind
== Constraint::Sf
? ctx
.abi
.sf
:
1633 any
; // Any or CopySrc
1634 if ((allow
& need
).empty()) {
1635 // cannot satisfy constraints; must split before u.pos
1644 * Return the next intersection point between `current' and `other', or kMaxPos
1645 * if they never intersect.
1647 * We assume that `other', if it represents an SSA variable, is not live at the
1648 * start of `current'.
1650 * Note that if two intervals intersect, the first point of intersection will
1651 * always be the start of one of the intervals, because SSA ensures that a def
1652 * dominates all uses, and hence all live ranges as well.
1654 unsigned nextIntersect(const Interval
* current
, const Interval
* other
) {
1655 assertx(!current
->fixed());
1657 if (current
== current
->leader() &&
1658 other
== other
->leader() &&
1660 // Since `other' is an inactive Vreg interval, it cannot cover current's
1661 // start, and `current' cannot cover other's start, since `other' started
1662 // earlier. Therefore, SSA guarantees no intersection.
1665 if (current
->end() <= other
->start() ||
1666 other
->end() <= current
->start()) {
1667 // One ends before the other starts.
1670 // r1,e1 span all of current
1671 auto r1
= current
->ranges
.begin();
1672 auto e1
= current
->ranges
.end();
1673 // r2,e2 span the tail of other that might intersect current
1674 auto r2
= other
->ranges
.begin() + other
->findRange(current
->start());
1675 auto e2
= other
->ranges
.end();
1676 // search for the lowest position covered by current and other
1678 if (r1
->start
< r2
->start
) {
1679 if (r2
->start
< r1
->end
) return r2
->start
;
1680 if (++r1
== e1
) return kMaxPos
;
1682 if (r1
->start
< r2
->end
) return r1
->start
;
1683 if (++r2
== e2
) return kMaxPos
;
1689 void Vxls::allocate(Interval
* current
) {
1690 // Map from PhysReg until the first position at which it is /not/ available.
1691 PosVec free_until
; // 0 by default
1694 auto const conflict
= constrain(current
, allow
);
1696 // Mark regs that fit our constraints as free up until the point of conflict,
1697 // unless they're owned by active intervals---then mark them used.
1698 allow
.forEach([&](PhysReg r
) {
1699 free_until
[r
] = conflict
;
1701 for (auto ivl
: active
) {
1702 free_until
[ivl
->reg
] = 0;
1705 // Mark each reg assigned to an inactive interval as only free until the
1706 // first position at which `current' intersects that interval.
1707 for (auto ivl
: inactive
) {
1709 if (free_until
[r
] == 0) continue;
1710 auto until
= nextIntersect(current
, ivl
);
1711 free_until
[r
] = std::min(until
, free_until
[r
]);
1714 if (current
->ranges
.size() > 1) {
1715 auto const b
= blockFor(ctx
, current
->start());
1716 auto const blk_range
= ctx
.block_ranges
[b
];
1717 if (blk_range
.end
> current
->ranges
[0].end
) {
1718 // We're assigning a register to an interval with
1719 // multiple ranges, but the vreg isn't live out
1720 // of the first range. This means there's no
1721 // connection between this range and any subsequent
1722 // one, so we can safely break the interval
1723 // after the first range without making things worse.
1724 // On the other hand, it can make things better, by
1725 // eg not assigning a constant to a register in an
1726 // unlikely exit block, and then holding it in a callee save
1727 // reg across lots of unrelated code until its used
1728 // again in another unlikely exit block.
1729 auto second
= current
->split(blk_range
.end
, false);
1730 pending
.push(second
);
1731 } else if (current
->constant() &&
1732 current
->uses
.size() &&
1733 current
->uses
[0].pos
>= blk_range
.end
) {
1734 // we probably don't want to load a constant into a register
1735 // at the start of a block where its not used.
1736 return spill(current
);
1740 // Choose a register to allocate to `current', either via a hint or by
1741 // various heuristics.
1742 auto const choose_reg
= [&] {
1743 auto const hint
= chooseHint(variables
, current
,
1744 hint_info
, free_until
, allow
);
1745 if (hint
!= InvalidReg
&&
1746 free_until
[hint
] >= current
->end()) {
1750 auto ret
= InvalidReg
;
1751 for (auto const r
: free_until
) {
1752 ret
= choose_closest_after(current
->end(), free_until
, ret
, r
);
1757 auto const r
= choose_reg();
1758 auto const pos
= free_until
[r
];
1759 if (pos
>= current
->end()) {
1760 return assignReg(current
, r
);
1763 if (pos
> current
->start()) {
1764 // `r' is free for the first part of current.
1765 auto const prev_use
= current
->lastUseBefore(pos
);
1767 DEBUG_ONLY
auto min_split
= std::max(prev_use
, current
->start() + 1);
1768 assertx(min_split
<= pos
);
1770 auto split_pos
= nearestSplitBefore(pos
);
1771 if (split_pos
> current
->start()) {
1772 if (prev_use
&& prev_use
< split_pos
) {
1773 // If there are uses in previous blocks, but no uses between the start
1774 // of the block containing `split_pos' and `split_pos' itself, we
1775 // should split earlier; otherwise we'll need to insert moves/loads on
1776 // the edge(s) into this block, which clearly can't be used since we're
1777 // spilling before the first use. Might as well spill on a block
1778 // boundary, as early as possible.
1779 auto prev_range_idx
= current
->findRange(prev_use
);
1780 auto prev_range
= ¤t
->ranges
[prev_range_idx
];
1781 if (prev_range
->start
<= prev_use
&& prev_range
->end
< split_pos
) {
1784 if (prev_range
->start
> prev_use
&& prev_range
->start
< split_pos
) {
1785 split_pos
= prev_range
->start
;
1789 // Split `current'. We keep uses at the end of the first split because
1790 // we know that `r' is free up to /and including/ that position.
1791 auto second
= current
->split(split_pos
, true /* keep_uses */);
1792 pending
.push(second
);
1794 // Try to find a register again. Since `current' is shorter, we might
1795 // have a different hint, or a different heuristically-determined
1797 auto const r
= choose_reg();
1798 assertx(free_until
[r
] >= current
->end());
1799 return assignReg(current
, r
);
1803 // Must spill `current' or another victim.
1804 allocBlocked(current
);
1808 * When all registers are in use, find a good interval (possibly `current') to
1811 * When an interval is split and the second part is spilled, possibly split the
1812 * second part again before the next use-pos that requires a register, and
1813 * enqueue the third part.
1815 void Vxls::allocBlocked(Interval
* current
) {
1816 auto const cur_start
= current
->start();
1819 auto const conflict
= constrain(current
, allow
); // repeated from allocate
1821 // Track the positions (a) at which each PhysReg is next used by any lifetime
1822 // interval to which it's assigned (`used'), and (b) at which each PhysReg is
1823 // next assigned to a value whose lifetime intersects `current' (`blocked').
1824 PosVec used
, blocked
;
1825 allow
.forEach([&](PhysReg r
) { used
[r
] = blocked
[r
] = conflict
; });
1827 // compute next use of active registers, so we can pick the furthest one
1828 for (auto ivl
: active
) {
1830 blocked
[ivl
->reg
] = used
[ivl
->reg
] = 0;
1832 auto use_pos
= ivl
->firstUseAfter(cur_start
);
1833 used
[ivl
->reg
] = std::min(use_pos
, used
[ivl
->reg
]);
1837 // compute next intersection/use of inactive regs to find what's free longest
1838 for (auto ivl
: inactive
) {
1839 auto const r
= ivl
->reg
;
1840 if (blocked
[r
] == 0) continue;
1842 auto intersect_pos
= nextIntersect(current
, ivl
);
1843 if (intersect_pos
== kMaxPos
) continue;
1846 blocked
[r
] = std::min(intersect_pos
, blocked
[r
]);
1847 used
[r
] = std::min(blocked
[r
], used
[r
]);
1849 auto use_pos
= ivl
->firstUseAfter(cur_start
);
1850 used
[r
] = std::min(use_pos
, used
[r
]);
1854 // Choose the best victim register(s) to spill---the one with first-use
1855 // after, but closest to, the lifetime of `current'; or the one with farthest
1856 // first-use if no such register exists.
1857 auto r
= find_closest_after(current
->end(), used
);
1859 // If all other registers are used by their owning intervals before the first
1860 // register-use of `current', then we have to spill `current'.
1861 if (used
[r
] < current
->firstUse()) {
1862 return spill(current
);
1865 auto const block_pos
= blocked
[r
];
1866 if (block_pos
< current
->end()) {
1867 // If /every/ usable register is assigned to a lifetime interval which
1868 // intersects with `current', we have to split current before that point.
1869 auto prev_use
= current
->lastUseBefore(block_pos
);
1871 DEBUG_ONLY
auto min_split
= std::max(prev_use
, cur_start
+ 1);
1872 auto max_split
= block_pos
;
1873 assertx(cur_start
< min_split
&& min_split
<= max_split
);
1875 auto split_pos
= nearestSplitBefore(max_split
);
1876 if (split_pos
> current
->start()) {
1877 auto second
= current
->split(split_pos
, true /* keep_uses */);
1878 pending
.push(second
);
1881 spillOthers(current
, r
);
1882 assignReg(current
, r
);
1886 * Split and spill other intervals that conflict with `current' for register r,
1887 * at current->start().
1889 * If necessary, split the victims again before their first use position that
1890 * requires a register.
1892 void Vxls::spillOthers(Interval
* current
, PhysReg r
) {
1893 auto const cur_start
= current
->start();
1895 // Split `ivl' at `cur_start' and spill the second part. If `cur_start' is
1896 // too close to ivl->start(), spill all of `ivl' instead.
1897 auto const spill_after
= [&] (Interval
* ivl
) {
1898 auto const split_pos
= nearestSplitBefore(cur_start
);
1899 auto const tail
= split_pos
<= ivl
->start() ? ivl
: ivl
->split(split_pos
);
1903 // Split and spill other active intervals after `cur_start'.
1904 auto end
= active
.end();
1905 for (auto i
= active
.begin(); i
!= end
;) {
1907 if (other
->fixed() || r
!= other
->reg
) {
1913 active
.erase(end
, active
.end());
1915 // Split and spill any inactive intervals after `cur_start' if they intersect
1917 end
= inactive
.end();
1918 for (auto i
= inactive
.begin(); i
!= end
;) {
1920 if (other
->fixed() || r
!= other
->reg
) {
1923 auto intersect
= nextIntersect(current
, other
);
1924 if (intersect
>= current
->end()) {
1930 inactive
.erase(end
, inactive
.end());
1933 SpillInfo
assignRegisters(const VxlsContext
& ctx
,
1934 const jit::vector
<Variable
*>& variables
,
1935 const HintInfo
& hint_info
) {
1936 return Vxls(ctx
, variables
, hint_info
).go();
1939 ///////////////////////////////////////////////////////////////////////////////
1940 // Lifetime continuity resolution.
1943 * A pair of source block number and successor index, used to identify an
1946 using EdgeKey
= std::pair
<Vlabel
,unsigned>;
1949 size_t operator()(EdgeKey k
) const {
1950 return size_t(k
.first
) ^ k
.second
;
1955 * Copies that are required at a given position or edge.
1957 * The keys into the PhysReg::Map are the dests; the Interval*'s are the
1958 * sources (nullptr if no copy is needed).
1960 using CopyPlan
= PhysReg::Map
<Interval
*>;
1963 * Copy and spill points for resolving split lifetime intervals.
1965 * After register allocation, some lifetime intervals may have been split, and
1966 * their Vregs assigned to different physical registers or spill locations. We
1967 * use this struct to track where we need to add moves to maintain continuity.
1968 * (We also use it to resolve phis.)
1970 struct ResolutionPlan
{
1971 // Where to insert spills.
1972 jit::hash_map
<unsigned,CopyPlan
> spills
;
1973 // Where to insert reg-reg moves, or spill and constant loads, between
1974 // instructions (or in place of copy{} and friends).
1975 jit::hash_map
<unsigned,CopyPlan
> copies
;
1977 // Copies and loads on edges (between blocks).
1978 jit::hash_map
<EdgeKey
,CopyPlan
,EdgeHasher
> edge_copies
;
1981 template<class CopyPlanT
>
1982 using void_req
= typename
std::enable_if
<
1983 std::is_same
<CopyPlanT
, CopyPlan
>::value
||
1984 std::is_same
<CopyPlanT
, const CopyPlan
>::value
1988 * Iterators for reg-reg copies and for {const,spill}-reg loads.
1990 template<class CopyPlanT
, class F
>
1991 void_req
<CopyPlanT
> for_each_copy(CopyPlanT
& plan
, F f
) {
1992 for (auto dst
: plan
) {
1993 auto& ivl
= plan
[dst
];
1994 if (!ivl
|| !ivl
->live()) continue;
1998 template<class CopyPlanT
, class F
>
1999 void_req
<CopyPlanT
> for_each_load(CopyPlanT
& plan
, F f
) {
2000 for (auto dst
: plan
) {
2001 auto& ivl
= plan
[dst
];
2002 if (!ivl
|| ivl
->live()) continue;
2003 assertx(ivl
->constant() || ivl
->spilled());
2009 * Insert a spill after the def-position in `ivl'.
2011 * There's only one such position, because of SSA.
2013 void insertSpill(const VxlsContext
& ctx
,
2014 ResolutionPlan
& resolution
, Interval
* ivl
) {
2015 auto DEBUG_ONLY checkPos
= [&](unsigned pos
) {
2016 assertx(pos
% 2 == 1);
2017 DEBUG_ONLY
auto const b
= blockFor(ctx
, pos
);
2018 DEBUG_ONLY
auto const& range
= ctx
.block_ranges
[b
];
2019 assertx(pos
- 1 >= range
.start
&& pos
+ 1 < range
.end
);
2022 auto const spill
= [&] (unsigned pos
) {
2023 assertx(checkPos(pos
));
2024 resolution
.spills
[pos
][ivl
->reg
] = ivl
; // store ivl->reg => ivl->slot
2026 if (ivl
->var
->recordbasenativesps
.empty()) {
2027 spill(ivl
->var
->def_pos
+ 1);
2029 for (auto const pos
: ivl
->var
->recordbasenativesps
) {
2030 always_assert_flog(ivl
->covers(pos
),
2031 "Spilling required before native sp set");
2038 * Insert spills and copies that connect subintervals that were split
2039 * between instructions.
2041 void resolveSplits(const VxlsContext
& ctx
,
2042 const jit::vector
<Variable
*>& variables
,
2043 ResolutionPlan
& resolution
) {
2044 for (auto var
: variables
) {
2046 auto i1
= var
->ivl();
2047 if (var
->slot
>= 0) insertSpill(ctx
, resolution
, i1
);
2049 for (auto i2
= i1
->next
; i2
; i1
= i2
, i2
= i2
->next
) {
2050 auto const pos
= i2
->start();
2051 if (i1
->end() != pos
) continue; // spans lifetime hole
2052 if (i2
->reg
== InvalidReg
) continue; // no load necessary
2053 if (i2
->reg
== i1
->reg
) continue; // no copy necessary
2055 auto const b
= blockFor(ctx
, pos
);
2056 auto const range
= ctx
.block_ranges
[b
];
2059 // even position requiring a copy must be on edge
2060 assertx(range
.start
== pos
);
2063 assertx(pos
> range
.start
); // implicit label position per block
2064 if (pos
+ 1 == range
.end
) continue; // copy belongs on successor edge
2066 assertx(!resolution
.copies
[pos
][i2
->reg
]);
2067 resolution
.copies
[pos
][i2
->reg
] = i1
;
2074 * Lower copyargs{} and copy{} into moveplans at the same position.
2076 void lowerCopies(Vunit
& unit
, const VxlsContext
& ctx
,
2077 const jit::vector
<Variable
*>& variables
,
2078 ResolutionPlan
& resolution
) {
2079 // Add a lifetime-resolving copy from `s' to `d'---without touching the
2080 // instruction stream.
2081 auto const lower
= [&] (unsigned pos
, Vreg s
, Vreg d
) {
2082 auto v1
= variables
[s
];
2083 auto v2
= variables
[d
];
2085 assertx(v2
->fixed() || v2
->def_pos
== pos
); // ssa
2087 auto i1
= v1
->fixed() ? v1
->ivl() : v1
->ivlAtUse(pos
);
2088 auto i2
= v2
->ivl();
2090 if (i2
->reg
!= i1
->reg
) {
2091 assertx(!resolution
.copies
[pos
][i2
->reg
]);
2092 resolution
.copies
[pos
][i2
->reg
] = i1
;
2096 for (auto b
: ctx
.blocks
) {
2097 auto pos
= ctx
.block_ranges
[b
].start
;
2099 for (auto& inst
: unit
.blocks
[b
].code
) {
2100 if (inst
.op
== Vinstr::copyargs
) {
2101 auto const& uses
= unit
.tuples
[inst
.copyargs_
.s
];
2102 auto const& defs
= unit
.tuples
[inst
.copyargs_
.d
];
2103 for (unsigned i
= 0, n
= uses
.size(); i
< n
; ++i
) {
2104 lower(pos
, uses
[i
], defs
[i
]);
2107 } else if (inst
.op
== Vinstr::copy2
) {
2108 lower(pos
, inst
.copy2_
.s0
, inst
.copy2_
.d0
);
2109 lower(pos
, inst
.copy2_
.s1
, inst
.copy2_
.d1
);
2111 } else if (inst
.op
== Vinstr::copy
) {
2112 lower(pos
, inst
.copy_
.s
, inst
.copy_
.d
);
2121 * Search for the phidef in block `b', then return its dest tuple.
2123 Vtuple
findPhiDefs(const Vunit
& unit
, Vlabel b
) {
2124 assertx(!unit
.blocks
[b
].code
.empty() &&
2125 unit
.blocks
[b
].code
.front().op
== Vinstr::phidef
);
2126 return unit
.blocks
[b
].code
.front().phidef_
.defs
;
2130 * Register copy resolutions for livein sets and phis.
2132 void resolveEdges(Vunit
& unit
, const VxlsContext
& ctx
,
2133 const jit::vector
<Variable
*>& variables
,
2134 ResolutionPlan
& resolution
) {
2135 auto const addPhiEdgeCopies
= [&] (Vlabel block
, Vlabel target
,
2136 uint32_t targetIndex
,
2137 const VregList
& uses
) {
2138 auto const p1
= ctx
.block_ranges
[block
].end
- 2;
2139 auto const& defs
= unit
.tuples
[findPhiDefs(unit
, target
)];
2141 for (unsigned i
= 0, n
= uses
.size(); i
< n
; ++i
) {
2142 auto v1
= variables
[uses
[i
]];
2143 auto v2
= variables
[defs
[i
]];
2146 auto i1
= v1
->fixed() ? v1
->ivl() : v1
->ivlAtUse(p1
);
2147 auto i2
= v2
->ivl();
2149 if (i2
->reg
!= i1
->reg
) {
2150 EdgeKey edge
{ block
, targetIndex
};
2151 assertx(!resolution
.edge_copies
[edge
][i2
->reg
]);
2152 resolution
.edge_copies
[edge
][i2
->reg
] = i1
;
2157 for (auto b1
: ctx
.blocks
) {
2158 auto const p1
= ctx
.block_ranges
[b1
].end
- 2;
2159 auto& block1
= unit
.blocks
[b1
];
2160 auto& inst1
= block1
.code
.back();
2162 // Add resolutions for phis.
2163 if (inst1
.op
== Vinstr::phijmp
) {
2164 auto const& phijmp
= inst1
.phijmp_
;
2165 auto const target
= phijmp
.target
;
2166 auto const& uses
= unit
.tuples
[phijmp
.uses
];
2167 addPhiEdgeCopies(b1
, target
, 0, uses
);
2168 inst1
= jmp
{target
};
2171 auto const succlist
= succs(block1
);
2173 // Add resolutions for livein sets.
2174 for (unsigned i
= 0, n
= succlist
.size(); i
< n
; i
++) {
2175 auto const b2
= succlist
[i
];
2176 auto const p2
= ctx
.block_ranges
[b2
].start
;
2178 forEach(ctx
.livein
[b2
], [&] (Vreg vr
) {
2179 auto var
= variables
[vr
];
2180 if (var
->fixed()) return;
2181 Interval
* i1
= nullptr;
2182 Interval
* i2
= nullptr;
2184 for (auto ivl
= var
->ivl(); ivl
&& !(i1
&& i2
); ivl
= ivl
->next
) {
2185 if (ivl
->covers(p1
)) i1
= ivl
;
2186 if (ivl
->covers(p2
)) i2
= ivl
;
2189 // i2 can be unallocated if the tmp is a constant or is spilled.
2190 if (i2
->reg
!= InvalidReg
&& i2
->reg
!= i1
->reg
) {
2191 assertx((resolution
.edge_copies
[{b1
,i
}][i2
->reg
] == nullptr));
2192 resolution
.edge_copies
[{b1
,i
}][i2
->reg
] = i1
;
2200 * Walk through the variables list and account for all points where copies or
2201 * spills need to be made.
2203 ResolutionPlan
resolveLifetimes(Vunit
& unit
, const VxlsContext
& ctx
,
2204 const jit::vector
<Variable
*>& variables
) {
2205 ResolutionPlan resolution
;
2207 resolveSplits(ctx
, variables
, resolution
);
2208 lowerCopies(unit
, ctx
, variables
, resolution
);
2209 resolveEdges(unit
, ctx
, variables
, resolution
);
2214 ///////////////////////////////////////////////////////////////////////////////
2215 // Operand renaming.
2218 * Visitor class for renaming registers.
2221 Renamer(const jit::vector
<Variable
*>& variables
, unsigned pos
)
2222 : variables(variables
)
2227 void imm(const T
& /*r*/) {}
2228 template<class R
> void def(R
& r
) { rename(r
); }
2229 template<class D
, class H
> void defHint(D
& dst
, H
) { rename(dst
); }
2230 template<class R
> void use(R
& r
) { rename(r
); }
2231 template<class S
, class H
> void useHint(S
& src
, H
) { rename(src
); }
2232 template<class R
> void across(R
& r
) { rename(r
); }
2233 void across(Vptr
& m
) {
2234 if (m
.base
.isValid()) rename(m
.base
);
2235 if (m
.index
.isValid()) rename(m
.index
);
2237 template<Width w
> void across(Vp
<w
>& m
) { across(static_cast<Vptr
&>(m
)); }
2240 void use(RegSet
/*r*/) {}
2242 if (m
.base
.isValid()) rename(m
.base
);
2243 if (m
.index
.isValid()) rename(m
.index
);
2245 template<Width w
> void use(Vp
<w
>& m
) { use(static_cast<Vptr
&>(m
)); }
2247 void use(VcallArgsId
) { always_assert(false && "vcall unsupported in vxls"); }
2250 void rename(Vreg8
& r
) { r
= lookup(r
, Constraint::Gpr
); }
2251 void rename(Vreg16
& r
) { r
= lookup(r
, Constraint::Gpr
); }
2252 void rename(Vreg32
& r
) { r
= lookup(r
, Constraint::Gpr
); }
2253 void rename(Vreg64
& r
) { r
= lookup(r
, Constraint::Gpr
); }
2254 void rename(VregDbl
& r
) { r
= lookup(r
, Constraint::Simd
); }
2255 void rename(Vreg128
& r
) { r
= lookup(r
, Constraint::Simd
); }
2256 void rename(VregSF
& r
) { r
= RegSF
{0}; }
2257 void rename(Vreg
& r
) { r
= lookup(r
, Constraint::Any
); }
2258 void rename(Vtuple
/*t*/) { /* phijmp+phidef handled by resolveEdges */
2261 PhysReg
lookup(Vreg vreg
, Constraint kind
) {
2262 auto var
= variables
[vreg
];
2263 if (!var
|| vreg
.isPhys()) return vreg
;
2264 PhysReg reg
= var
->ivlAtUse(pos
)->reg
;
2265 assertx((kind
== Constraint::Gpr
&& reg
.isGP()) ||
2266 (kind
== Constraint::Simd
&& reg
.isSIMD()) ||
2267 (kind
== Constraint::Sf
&& reg
.isSF()) ||
2268 (kind
== Constraint::Any
&& reg
!= InvalidReg
));
2272 const jit::vector
<Variable
*>& variables
;
2277 * Visit every virtual-register typed operand in `unit', and rename it to its
2278 * assigned physical register.
2280 void renameOperands(Vunit
& unit
, const VxlsContext
& ctx
,
2281 const jit::vector
<Variable
*>& variables
) {
2282 for (auto b
: ctx
.blocks
) {
2283 auto pos
= ctx
.block_ranges
[b
].start
;
2284 for (auto& inst
: unit
.blocks
[b
].code
) {
2285 Renamer
renamer(variables
, pos
);
2286 visitOperands(inst
, renamer
);
2291 kVasmRegAllocDetailLevel
,
2292 printVariables("after renaming operands", unit
, ctx
, variables
);
2296 ///////////////////////////////////////////////////////////////////////////////
2300 * Insert stores for `spills' (with spill space starting at `slots') into
2301 * `code' before code[j], corresponding to XLS logical position `pos'.
2303 * Updates `j' to refer to the same instruction after the code insertions.
2305 void insertSpillsAt(jit::vector
<Vinstr
>& code
, unsigned& j
,
2306 const CopyPlan
& spills
, folly::Optional
<MemoryRef
> slots
,
2308 jit::vector
<Vinstr
> stores
;
2310 for (auto src
: spills
) {
2311 auto ivl
= spills
[src
];
2314 always_assert_flog(slots
, "Spilling before native sp is set (pos {})",
2316 auto slot
= ivl
->var
->slot
;
2317 assertx(slot
>= 0 && src
== ivl
->reg
);
2318 MemoryRef ptr
{slots
->r
+ slotOffset(slot
)};
2320 if (!ivl
->var
->wide
) {
2321 always_assert_flog(!src
.isSF(), "Tried to spill %flags");
2322 stores
.emplace_back(store
{src
, ptr
});
2324 assertx(src
.isSIMD());
2325 stores
.emplace_back(storeups
{src
, ptr
});
2328 insertCodeAt(code
, j
, stores
, pos
);
2332 * Insert reg-reg moves for `copies' into `code' before code[j], corresponding
2333 * to XLS logical position `pos'.
2335 * Updates `j' to refer to the same instruction after the code insertions.
2337 void insertCopiesAt(const VxlsContext
& ctx
,
2338 jit::vector
<Vinstr
>& code
, unsigned& j
,
2339 const CopyPlan
& plan
, unsigned pos
) {
2341 jit::vector
<Vinstr
> copies
;
2343 for_each_copy(plan
, [&] (PhysReg dst
, const Interval
* ivl
) {
2344 moves
[dst
] = ivl
->reg
;
2346 auto const hows
= doRegMoves(moves
, ctx
.tmp
);
2348 for (auto const& how
: hows
) {
2349 if (how
.m_kind
== MoveInfo::Kind::Xchg
) {
2350 copies
.emplace_back(copy2
{how
.m_src
, how
.m_dst
, how
.m_dst
, how
.m_src
});
2352 copies
.emplace_back(copy
{how
.m_src
, how
.m_dst
});
2355 insertCodeAt(code
, j
, copies
, pos
);
2359 * Insert constant loads or loads from spill space---with spill space starting
2360 * at `slots'---for `loads' into `code' before code[j], corresponding to XLS
2361 * logical position `pos'.
2363 * Updates `j' to refer to the same instruction after the code insertions.
2365 void insertLoadsAt(jit::vector
<Vinstr
>& code
, unsigned& j
,
2366 const CopyPlan
& plan
, folly::Optional
<MemoryRef
> slots
,
2368 jit::vector
<Vinstr
> loads
;
2370 for_each_load(plan
, [&] (PhysReg dst
, const Interval
* ivl
) {
2371 if (ivl
->constant()) {
2372 if (ivl
->var
->val
.isUndef
) return;
2373 loads
.push_back([&]() -> Vinstr
{
2374 switch (ivl
->var
->val
.kind
) {
2376 case Vconst::Double
:
2377 return ldimmq
{uint64_t(ivl
->var
->val
.val
), dst
};
2379 return ldimml
{int32_t(ivl
->var
->val
.val
), dst
};
2381 return ldimmb
{uint8_t(ivl
->var
->val
.val
), dst
};
2385 } else if (ivl
->spilled()) {
2386 always_assert_flog(slots
, "Reloading before native sp is set (pos {})",
2388 MemoryRef ptr
{slots
->r
+ slotOffset(ivl
->var
->slot
)};
2389 if (!ivl
->var
->wide
) {
2390 loads
.emplace_back(load
{ptr
, dst
});
2392 assertx(dst
.isSIMD());
2393 loads
.emplace_back(loadups
{ptr
, dst
});
2397 insertCodeAt(code
, j
, loads
, pos
);
2401 * Mutate the Vinstr stream by inserting copies.
2403 * This destroys the position numbering, so we can't use interval positions
2406 void insertCopies(Vunit
& unit
, const VxlsContext
& ctx
,
2407 const jit::vector
<Variable
*>& /*variables*/,
2408 const ResolutionPlan
& resolution
) {
2409 auto const getSlots
= [&] (folly::Optional
<int> offset
) {
2410 folly::Optional
<MemoryRef
> slots
;
2411 if (offset
) slots
= ctx
.sp
[*offset
];
2415 // Insert copies inside blocks.
2416 for (auto const b
: ctx
.blocks
) {
2417 auto& block
= unit
.blocks
[b
];
2418 auto& code
= block
.code
;
2419 auto pos
= ctx
.block_ranges
[b
].start
;
2420 auto offset
= ctx
.spill_offsets
[b
];
2422 for (unsigned j
= 0; j
< code
.size(); j
++, pos
+= 2) {
2423 auto const slots
= getSlots(offset
);
2425 // Spills, reg-reg moves, and loads of constant values or spill space all
2426 // occur between instruction. Insert them in order.
2427 auto s
= resolution
.spills
.find(pos
- 1);
2428 if (s
!= resolution
.spills
.end()) {
2429 insertSpillsAt(code
, j
, s
->second
, slots
, pos
- 1);
2431 auto c
= resolution
.copies
.find(pos
- 1);
2432 if (c
!= resolution
.copies
.end()) {
2433 insertCopiesAt(ctx
, code
, j
, c
->second
, pos
- 1);
2434 insertLoadsAt(code
, j
, c
->second
, slots
, pos
- 1);
2437 // Insert copies and loads at instructions.
2438 c
= resolution
.copies
.find(pos
);
2439 if (c
!= resolution
.copies
.end()) {
2440 insertCopiesAt(ctx
, code
, j
, c
->second
, pos
);
2441 insertLoadsAt(code
, j
, c
->second
, slots
, pos
);
2443 assertx(resolution
.spills
.count(pos
) == 0);
2444 if (code
[j
].op
== Vinstr::recordbasenativesp
) {
2445 assert_flog(!offset
, "Block B{} Instr {} initiailizes native SP, but "
2446 "already initialized.", size_t(b
), j
);
2448 } else if (code
[j
].op
== Vinstr::unrecordbasenativesp
) {
2449 assert_flog(offset
, "Block B{} Instr {} uninitiailizes native SP, but "
2450 "already uninitialized.", size_t(b
), j
);
2451 assert_flog(*offset
== 0, "Block B{} Instr {} uninitiailizes native "
2452 "SP, but SP offset is non zero.", size_t(b
), j
);
2453 offset
= folly::none
;
2454 } else if (offset
) {
2455 *offset
-= spEffect(unit
, code
[j
], ctx
.sp
);
2460 // insert copies on edges
2461 for (auto const b
: ctx
.blocks
) {
2462 auto& block
= unit
.blocks
[b
];
2463 auto succlist
= succs(block
);
2465 if (succlist
.size() == 1) {
2466 // copies will go at end of b
2467 auto const c
= resolution
.edge_copies
.find({b
, 0});
2468 if (c
!= resolution
.edge_copies
.end()) {
2469 auto& code
= block
.code
;
2470 unsigned j
= code
.size() - 1;
2471 auto const pos
= ctx
.block_ranges
[b
].end
- 1;
2472 auto const offset
= ctx
.spill_offsets
[succlist
[0]];
2473 auto const slots
= getSlots(offset
);
2475 // We interleave copies and loads in `edge_copies', so here and below
2476 // we process them separately (and pass `true' to avoid asserting).
2477 insertCopiesAt(ctx
, code
, j
, c
->second
, pos
);
2478 insertLoadsAt(code
, j
, c
->second
, slots
, pos
);
2481 // copies will go at start of successor
2482 for (int i
= 0, n
= succlist
.size(); i
< n
; i
++) {
2483 auto s
= succlist
[i
];
2484 auto const c
= resolution
.edge_copies
.find({b
, i
});
2485 if (c
!= resolution
.edge_copies
.end()) {
2486 auto& code
= unit
.blocks
[s
].code
;
2488 auto const pos
= ctx
.block_ranges
[s
].start
;
2489 auto const offset
= ctx
.spill_offsets
[s
];
2490 auto const slots
= getSlots(offset
);
2492 insertCopiesAt(ctx
, code
, j
, c
->second
, pos
);
2493 insertLoadsAt(code
, j
, c
->second
, slots
, pos
);
2500 ///////////////////////////////////////////////////////////////////////////////
2503 * Peephole cleanup pass.
2505 * Remove no-op copy sequences before allocating spill space, since doing so
2506 * might modify the CFG.
2508 void peephole(Vunit
& unit
, const VxlsContext
& ctx
) {
2509 // Whether a Vinstr is a register swap.
2510 auto const match_xchg
= [] (Vinstr
& i
, Vreg
& r0
, Vreg
& r1
) {
2511 if (i
.op
!= Vinstr::copy2
) return false;
2514 return r0
== i
.copy2_
.d1
&& r1
== i
.copy2_
.d0
;
2517 for (auto b
: ctx
.blocks
) {
2518 auto& code
= unit
.blocks
[b
].code
;
2519 for (int i
= 0, n
= code
.size(); i
+ 1 < n
; i
++) {
2520 Vreg r0
, r1
, r2
, r3
;
2521 if (match_xchg(code
[i
], r0
, r1
) &&
2522 match_xchg(code
[i
+ 1], r2
, r3
) &&
2523 ((r0
== r2
&& r1
== r3
) || (r0
== r3
&& r1
== r2
))) {
2524 // matched xchg+xchg that cancel each other
2526 code
[i
+ 1] = nop
{};
2530 auto end
= std::remove_if(code
.begin(), code
.end(), [&](Vinstr
& inst
) {
2531 return is_trivial_nop(inst
) ||
2532 inst
.op
== Vinstr::phidef
; // we lowered it
2534 code
.erase(end
, code
.end());
2538 ///////////////////////////////////////////////////////////////////////////////
2539 // Spill space allocation.
2542 * SpillState is used by allocateSpillSpace() to decide where to allocate/free
2543 * spill space. It represents the state of the spill space as a whole and is
2544 * computed before each individual instruction.
2546 * Order is important in this enum: it's only legal to transition to states
2547 * with higher values, and states are merged using std::max().
2549 enum SpillState
: uint8_t {
2550 // State is uninitialized. All block in-states start here.
2553 // Spill space is not currently possible; we must allocate spill space after
2557 // Spill space is not currently needed; it's safe to allocate spill space
2558 // after this point.
2561 // Spill space is needed and must be allocated at or before this point.
2566 * SpillStates is used to hold in/out state for each block after the analysis
2567 * pass of allocateSpillSpace().
2569 struct SpillStates
{
2576 * Returns true if spill space must be allocated before execution of this
2577 * instruction. In order to keep things simple, we return true for any
2578 * instruction that reads or writes sp.
2580 bool instrNeedsSpill(const Vunit
& unit
, const Vinstr
& inst
, PhysReg sp
) {
2581 // Implicit sp input/output.
2582 if (spEffect(unit
, inst
, sp
, false) != 0) return true;
2584 auto foundSp
= false;
2585 visitDefs(unit
, inst
, [&] (Vreg r
) { if (r
== sp
) foundSp
= true; });
2586 if (foundSp
) return true;
2588 visitUses(unit
, inst
, [&] (Vreg r
) { if (r
== sp
) foundSp
= true; });
2593 * Return the required SpillState coming into inst. prevState must not be
2596 SpillState
instrInState(const Vunit
& unit
, const Vinstr
& inst
,
2597 SpillState prevState
, PhysReg sp
) {
2598 switch (prevState
) {
2601 case NoSpillPossible
:
2602 if (inst
.op
== Vinstr::recordbasenativesp
) return NoSpill
;
2603 return NoSpillPossible
;
2606 if (inst
.op
== Vinstr::unrecordbasenativesp
) return NoSpillPossible
;
2607 if (instrNeedsSpill(unit
, inst
, sp
)) return NeedSpill
;
2611 if (inst
.op
== Vinstr::unrecordbasenativesp
) return NoSpillPossible
;
2615 always_assert(false);
2619 * processSpillExits() can insert jcc{} instructions in the middle of a
2620 * block. fixupBlockJumps() breaks the given block after any jccs, making the
2621 * unit valid again. This is done as a separate pass from the work in
2622 * processSpillExits() to reduce complexity.
2624 void fixupBlockJumps(Vunit
& unit
, Vlabel label
) {
2625 jit::vector
<Vinstr
> origCode
;
2626 origCode
.swap(unit
.blocks
[label
].code
);
2627 auto insertBlock
= [&]() -> Vblock
& { return unit
.blocks
[label
]; };
2629 for (auto const& inst
: origCode
) {
2630 insertBlock().code
.emplace_back(inst
);
2632 if (inst
.op
== Vinstr::jcc
&& !inst
.jcc_
.targets
[0].isValid()) {
2633 auto newLabel
= unit
.makeBlock(insertBlock().area_idx
,
2634 insertBlock().weight
);
2635 insertBlock().code
.back().jcc_
.targets
[0] = newLabel
;
2642 * Walk through the given block, undoing any fallbackcc/bindjcc optimizations
2643 * that happen in an area where spill space is live. The latter transformation
2644 * is necessary to make the hidden edge out of the fallbackcc{} explicit, so we
2645 * can insert an lea on it to free spill space. It takes something like this:
2648 * cmpbim 0, %rbp[0x10] => %flags
2649 * fallbackcc CC_E, %flags, <SrcKey>
2652 * and turns it into something like this:
2655 * cmpbim 0, %rbp[0x10] => %flags
2656 * jcc CC_E, %flags -> B3, else B2
2660 * lea %rsp[0x20] => %rsp
2663 void processSpillExits(Vunit
& unit
, Vlabel label
, SpillState state
,
2664 Vinstr free
, PhysReg sp
) {
2665 auto code
= &unit
.blocks
[label
].code
;
2666 auto needFixup
= false;
2668 for (unsigned i
= 0, n
= code
->size(); i
< n
; ++i
) {
2669 auto& inst
= (*code
)[i
];
2670 state
= instrInState(unit
, inst
, state
, sp
);
2672 if (state
< NeedSpill
||
2673 (inst
.op
!= Vinstr::fallbackcc
&&
2674 inst
.op
!= Vinstr::bindjcc
&&
2675 inst
.op
!= Vinstr::jcci
)) {
2679 FTRACE(3, "Breaking out {}: {}\n", label
, show(unit
, inst
));
2680 auto target
= unit
.makeBlock(AreaIndex::Cold
, 0);
2681 // makeBlock might reallocate unit.blocks
2682 code
= &unit
.blocks
[label
].code
;
2684 auto& targetCode
= unit
.blocks
[target
].code
;
2685 free
.set_irctx(inst
.irctx());
2688 if (inst
.op
== Vinstr::fallbackcc
) {
2689 auto const& fb_i
= inst
.fallbackcc_
;
2690 targetCode
.emplace_back(free
);
2691 targetCode
.emplace_back(fallback
{fb_i
.target
, fb_i
.spOff
, fb_i
.args
});
2694 } else if (inst
.op
== Vinstr::bindjcc
) {
2695 auto const& bj_i
= inst
.bindjcc_
;
2696 targetCode
.emplace_back(free
);
2697 targetCode
.emplace_back(bindjmp
{bj_i
.target
, bj_i
.spOff
, bj_i
.args
});
2700 } else /* inst.op == Vinstr::jcci */ {
2701 auto const& jcc_i
= inst
.jcci_
;
2702 targetCode
.emplace_back(free
);
2703 targetCode
.emplace_back(jmpi
{jcc_i
.taken
});
2707 targetCode
.back().set_irctx(inst
.irctx());
2709 // Next is set to an invalid block that will be fixed up once we're done
2710 // iterating through the original block.
2711 inst
= jcc
{cc
, sf
, {Vlabel
{}, target
}};
2715 if (needFixup
) fixupBlockJumps(unit
, label
);
2719 * Merge src into dst, returning true iff dst was changed.
2721 bool mergeSpillStates(SpillState
& dst
, SpillState src
) {
2722 assertx(src
!= Uninit
);
2723 if (dst
== src
) return false;
2725 // The only permitted merges result in a state with higher values. This
2726 // holds because we can't merge a NoSpillPossible with a non NoSpillPossible
2727 // since that would mean a program point both can and can't have spills.
2728 assertx(IMPLIES(src
!= NoSpillPossible
, dst
!= NoSpillPossible
));
2729 auto const oldDst
= dst
;
2730 dst
= std::max(dst
, src
);
2731 return dst
!= oldDst
;
2734 std::default_random_engine
s_stressRand(0xfaceb00c);
2735 std::uniform_int_distribution
<int> s_stressDist(1,7);
2738 * If the current unit used any spill slots, allocate and free spill space
2739 * where appropriate. Spill space is allocated right before it's needed and
2740 * freed before any instruction that exits the unit, which is any block-ending
2741 * instruction with no successors in the unit. fallbackcc{} and bindjcc{}
2742 * instructions have hidden edges that exit the unit, and if we encounter one
2743 * while spill space is live, we have to make that exit edge explicit to insert
2744 * code on it (see processSpillExits()). This makes the exit path more
2745 * expensive, so we try to allocate spill space as late as possible to avoid
2746 * pessimising fallbackcc/bindjcc instructions unless it's really
2747 * necessary. The algorithm uses two passes:
2750 * - For each block in RPO:
2751 * - Load in-state, which has been populated by at least one predecessor
2752 * (or manually set to NoSpillPossible for the entry block).
2753 * - Analyze each instruction in the block, determining what state the
2754 * spill space must be in before executing it.
2755 * - Record out-state for the block and propagate to successors. If this
2756 * changes the in-state for any of them, enqueue them for (re)processing.
2759 * - For each block (we use RPO to only visit reachable blocks but order
2761 * - Inspect the block's in-state and out-state:
2762 * - NoSpill in: Walk the block to see if we need to allocate spill space
2763 * before any instructions.
2764 * - NoSpill out: Allocate spill space on any edges to successors with
2765 * NeedSpill in-states. Also walk block and check for deallocation of
2767 * - NeedSpill out: If the block has no in-unit successors, free spill
2768 * space before the block-end instruction.
2769 * - != NoSpill out: Look for any fallbackcc/bindjcc instructions,
2770 * deoptimizing as appropriate (see processSpillExits()).
2772 void allocateSpillSpace(Vunit
& unit
, const VxlsContext
& ctx
,
2774 if (spi
.used_spill_slots
== 0) return;
2775 Timer
t(Timer::vasm_reg_alloc_spill
, unit
.log_entry
);
2777 // Make sure we always allocate spill space in multiples of 16 bytes, to keep
2778 // alignment straightforward.
2779 if (spi
.used_spill_slots
% 2) spi
.used_spill_slots
++;
2780 FTRACE(1, "Allocating {} spill slots\n", spi
.used_spill_slots
);
2782 auto const spillSize
= safe_cast
<int32_t>(slotOffset(spi
.used_spill_slots
));
2783 // Pointer manipulation is traditionally done with lea, and it's safe to
2784 // insert even where flags might be live.
2785 Vinstr alloc
= lea
{ctx
.sp
[-spillSize
], ctx
.sp
};
2786 Vinstr free
= lea
{ctx
.sp
[spillSize
], ctx
.sp
};
2788 jit::vector
<uint32_t> rpoIds(unit
.blocks
.size());
2789 for (uint32_t i
= 0; i
< ctx
.blocks
.size(); ++i
) rpoIds
[ctx
.blocks
[i
]] = i
;
2791 jit::vector
<SpillStates
> states(unit
.blocks
.size(), {Uninit
, Uninit
, false});
2792 states
[unit
.entry
].in
= NoSpillPossible
;
2793 dataflow_worklist
<uint32_t> worklist(unit
.blocks
.size());
2796 // Walk the blocks in rpo. At the end of each block, propagate its out-state
2797 // to successors, adding them to the worklist if their in-state
2798 // changes. Blocks may be visited multiple times if loops are present.
2799 while (!worklist
.empty()) {
2800 auto const label
= ctx
.blocks
[worklist
.pop()];
2801 auto const& block
= unit
.blocks
[label
];
2802 auto const stateIn
= states
[label
].in
;
2803 auto state
= stateIn
;
2805 for (auto& inst
: block
.code
) {
2806 state
= instrInState(unit
, inst
, state
, ctx
.sp
);
2807 if (state
!= stateIn
) states
[label
].changes
= true;
2809 states
[label
].out
= state
;
2811 for (auto s
: succs(block
)) {
2812 if (mergeSpillStates(states
[s
].in
, state
)) {
2813 worklist
.push(rpoIds
[s
]);
2818 // Do a single mutation pass over the blocks.
2819 for (auto const label
: ctx
.blocks
) {
2820 auto state
= states
[label
];
2821 auto& block
= unit
.blocks
[label
];
2823 // Any block with a non NeedSpill in-state might have an instruction in it
2824 // that needs spill space, which we allocate right before the instruction
2826 if (state
.in
!= NeedSpill
&& (state
.out
== NeedSpill
|| state
.changes
)) {
2827 auto blockState
= state
.in
;
2828 for (auto it
= block
.code
.begin(); it
!= block
.code
.end(); ++it
) {
2829 blockState
= instrInState(unit
, *it
, blockState
, ctx
.sp
);
2830 if (blockState
== NeedSpill
) {
2831 FTRACE(3, "alloc spill before {}: {}\n", label
, show(unit
, *it
));
2832 alloc
.set_irctx(it
->irctx());
2833 block
.code
.insert(it
, alloc
);
2839 // Allocate spill space on edges from a NoSpill out-state to a NeedSpill
2841 auto const successors
= succs(block
);
2842 if (state
.out
== NoSpill
|| state
.out
== NoSpillPossible
) {
2843 for (auto s
: successors
) {
2844 if (states
[s
].in
== NeedSpill
) {
2845 assertx(state
.out
!= NoSpillPossible
);
2846 FTRACE(3, "alloc spill on edge from {} -> {}\n", label
, s
);
2847 auto it
= std::prev(block
.code
.end());
2848 alloc
.set_irctx(it
->irctx());
2849 block
.code
.insert(it
, alloc
);
2854 // Any block that unrecords the base native sp must have the spill space
2855 // removed at the appropriate point.
2856 if ((state
.in
== NeedSpill
|| state
.changes
) && state
.out
!= NeedSpill
) {
2857 auto blockState
= state
.in
;
2858 for (auto it
= block
.code
.begin(); it
!= block
.code
.end(); ++it
) {
2859 blockState
= instrInState(unit
, *it
, blockState
, ctx
.sp
);
2860 if (blockState
== NoSpillPossible
) {
2861 FTRACE(3, "free spill before {}: {}\n", label
, show(unit
, *it
));
2862 free
.set_irctx(it
->irctx());
2863 block
.code
.insert(it
, free
);
2869 // Any block with a NeedSpill out-state and no successors must free spill
2870 // space right before the block-end instruction. We ignore trap so spill
2871 // space is still allocated in core files.
2872 if (state
.out
== NeedSpill
&& successors
.empty() &&
2873 block
.code
.back().op
!= Vinstr::trap
) {
2874 auto it
= std::prev(block
.code
.end());
2875 FTRACE(3, "free spill before {}: {}\n", label
, show(unit
, (*it
)));
2876 free
.set_irctx(it
->irctx());
2877 block
.code
.insert(it
, free
);
2880 // Any block that ends with NeedSpill needs to be walked to look for places
2881 // to free spill space.
2882 if (state
.out
== NeedSpill
) {
2883 processSpillExits(unit
, label
, state
.in
, free
, ctx
.sp
);
2888 ///////////////////////////////////////////////////////////////////////////////
2891 std::string
Interval::toString() const {
2892 std::ostringstream out
;
2894 if (reg
!= InvalidReg
) {
2899 out
<< delim
<< folly::format("#{:08x}", var
->val
.val
);
2901 if (var
->slot
>= 0) {
2902 out
<< delim
<< folly::format("[%sp+{}]", slotOffset(var
->slot
));
2906 for (auto const& r
: ranges
) {
2907 out
<< delim
<< folly::format("{}-{}", r
.start
, r
.end
);
2912 for (auto const& u
: uses
) {
2913 if (u
.pos
== var
->def_pos
) {
2914 if (u
.hint
.isValid()) {
2915 out
<< delim
<< "@" << u
.pos
<< "=" << show(u
.hint
);
2917 out
<< delim
<< "@" << u
.pos
<< "=";
2920 auto hint_delim
= u
.kind
== Constraint::CopySrc
? "=?" : "=@";
2921 if (u
.hint
.isValid()) {
2922 out
<< delim
<< show(u
.hint
) << hint_delim
<< u
.pos
;
2924 out
<< delim
<< hint_delim
<< u
.pos
;
2933 DEBUG_ONLY
void dumpVariables(const jit::vector
<Variable
*>& variables
,
2934 unsigned num_spills
) {
2935 Trace::traceRelease("Spills %u\n", num_spills
);
2936 for (auto var
: variables
) {
2937 if (!var
|| var
->fixed()) continue;
2938 Trace::traceRelease("%%%-2lu %s\n", size_t(var
->vreg
),
2939 var
->ivl()->toString().c_str());
2940 for (auto ivl
= var
->ivl()->next
; ivl
; ivl
= ivl
->next
) {
2941 Trace::traceRelease(" %s\n", ivl
->toString().c_str());
2946 auto const ignore_reserved
= !getenv("XLS_SHOW_RESERVED");
2947 auto const collapse_fixed
= !getenv("XLS_SHOW_FIXED");
2949 enum Mode
{ Light
, Heavy
};
2951 template<class Pred
>
2952 const char* draw(Variable
* var
, unsigned pos
, Mode m
, Pred covers
) {
2954 static const char* top
[] = { u8
"\u2575", u8
"\u2579" };
2955 static const char* bottom
[] = { u8
"\u2577", u8
"\u257B" };
2956 static const char* both
[] = { u8
"\u2502", u8
"\u2503" };
2957 static const char* empty
[] = { " ", " " };
2958 auto f
= [&](unsigned position
) {
2959 if (!var
) return false;
2960 for (auto ivl
= var
->ivl(); ivl
; ivl
= ivl
->next
) {
2961 if (covers(ivl
, position
)) return true;
2967 auto d
= pos
% 2 == 1 ? s
: f(pos
+ 1);
2968 return ( s
&& !d
) ? top
[m
] :
2969 ( s
&& d
) ? both
[m
] :
2970 (!s
&& d
) ? bottom
[m
] :
2974 DEBUG_ONLY
void printInstr(std::ostringstream
& str
,
2975 const Vunit
& unit
, const VxlsContext
& ctx
,
2976 const jit::vector
<Variable
*>& variables
,
2977 const Vinstr
& inst
, Vlabel b
) {
2978 bool fixed_covers
[2] = { false, false };
2979 Variable
* fixed
= nullptr;
2980 for (auto var
: variables
) {
2983 if (ignore_reserved
&& !ctx
.abi
.unreserved().contains(var
->vreg
)) {
2986 if (collapse_fixed
) {
2987 fixed
= var
; // can be any.
2988 fixed_covers
[0] |= var
->ivl()->covers(inst
.id
);
2989 fixed_covers
[1] |= var
->ivl()->covers(inst
.id
+ 1);
2994 str
<< draw(var
, inst
.id
, Light
, [&](Interval
* child
, unsigned p
) {
2995 return child
->covers(p
);
2997 str
<< draw(var
, inst
.id
, Heavy
, [&](Interval
* child
, unsigned p
) {
2998 return child
->usedAt(p
);
3001 str
<< " " << draw(fixed
, inst
.id
, Heavy
, [&](Interval
*, unsigned p
) {
3002 assertx(p
- inst
.id
< 2);
3003 return fixed_covers
[p
- inst
.id
];
3005 if (inst
.id
== ctx
.block_ranges
[b
].start
) {
3006 str
<< folly::format(" B{: <3}", size_t(b
));
3010 str
<< folly::format(" {: <3} ", inst
.id
) << show(unit
, inst
) << "\n";
3013 DEBUG_ONLY
void printVariables(const char* caption
,
3014 const Vunit
& unit
, const VxlsContext
& ctx
,
3015 const jit::vector
<Variable
*>& variables
) {
3016 std::ostringstream str
;
3017 str
<< "Intervals " << caption
<< " " << ctx
.counter
<< "\n";
3018 for (auto var
: variables
) {
3021 if (ignore_reserved
&& !ctx
.abi
.unreserved().contains(var
->vreg
)) {
3024 if (collapse_fixed
) {
3028 str
<< folly::format(" {: <2}", size_t(var
->vreg
));
3031 for (auto b
: ctx
.blocks
) {
3032 for (auto& inst
: unit
.blocks
[b
].code
) {
3033 printInstr(str
, unit
, ctx
, variables
, inst
, b
);
3036 HPHP::Trace::traceRelease("%s\n", str
.str().c_str());
3039 ///////////////////////////////////////////////////////////////////////////////
3042 size_t instrs
{0}; // total instruction count after xls
3043 size_t moves
{0}; // number of movs inserted
3044 size_t loads
{0}; // number of loads inserted
3045 size_t spills
{0}; // number of spill-stores inserted
3046 size_t consts
{0}; // number of const-loads inserted
3047 size_t total_copies
{0}; // moves+loads+spills
3050 void dumpStats(const Vunit
& unit
, const ResolutionPlan
& resolution
) {
3053 for (auto const& blk
: unit
.blocks
) {
3054 stats
.instrs
+= blk
.code
.size();
3056 for (auto const& kv
: resolution
.spills
) {
3057 auto const& spills
= kv
.second
;
3058 for (auto const r
: spills
) {
3059 if (spills
[r
]) ++stats
.spills
;
3063 auto const count_copies
= [&] (const CopyPlan
& plan
) {
3064 for_each_copy(plan
, [&] (PhysReg
, const Interval
*) {
3067 for_each_load(plan
, [&] (PhysReg
, const Interval
* ivl
) {
3068 ++(ivl
->spilled() ? stats
.loads
: stats
.consts
);
3071 for (auto const& kv
: resolution
.copies
) count_copies(kv
.second
);
3072 for (auto const& kv
: resolution
.edge_copies
) count_copies(kv
.second
);
3074 stats
.total_copies
= stats
.moves
+ stats
.loads
+ stats
.spills
;
3077 Trace::xls_stats
, 1,
3084 " total copies: {}\n",
3085 stats
.instrs
, stats
.moves
, stats
.loads
, stats
.spills
,
3086 stats
.consts
, stats
.total_copies
3089 if (auto entry
= unit
.log_entry
) {
3090 entry
->setInt("xls_instrs", stats
.instrs
);
3091 entry
->setInt("xls_moves", stats
.moves
);
3092 entry
->setInt("xls_loads", stats
.loads
);
3093 entry
->setInt("xls_spills", stats
.spills
);
3094 entry
->setInt("xls_consts", stats
.consts
);
3098 ///////////////////////////////////////////////////////////////////////////////
3101 void allocateRegistersWithXLS(Vunit
& unit
, const Abi
& abi
) {
3102 Timer
timer(Timer::vasm_reg_alloc
, unit
.log_entry
);
3103 auto const counter
= s_counter
.fetch_add(1, std::memory_order_relaxed
);
3105 splitCriticalEdges(unit
);
3106 assertx(check(unit
));
3109 VxlsContext ctx
{abi
};
3110 ctx
.blocks
= sortBlocks(unit
);
3111 ctx
.block_ranges
= computePositions(unit
, ctx
.blocks
);
3112 ctx
.spill_offsets
= analyzeSP(unit
, ctx
.blocks
, ctx
.sp
);
3113 ctx
.livein
= computeLiveness(unit
, ctx
.abi
, ctx
.blocks
);
3114 ctx
.counter
= counter
;
3116 // Build lifetime intervals and analyze hints.
3117 auto variables
= buildIntervals(unit
, ctx
);
3118 auto const hint_info
= analyzeHints(unit
, ctx
, variables
);
3120 // Perform register allocation.
3121 auto spill_info
= assignRegisters(ctx
, variables
, hint_info
);
3123 ONTRACE(kVasmRegAllocDetailLevel
,
3124 dumpVariables(variables
, spill_info
.num_spills
));
3126 // Insert lifetime-resolving copies, spills, and rematerializations, and
3127 // replace the Vreg operands in the Vinstr stream with the assigned PhysRegs.
3128 auto const resolution
= resolveLifetimes(unit
, ctx
, variables
);
3129 renameOperands(unit
, ctx
, variables
);
3130 insertCopies(unit
, ctx
, variables
, resolution
);
3132 ONTRACE(kVasmRegAllocDetailLevel
,
3133 dumpVariables(variables
, spill_info
.num_spills
);
3134 printVariables("after inserting copies", unit
, ctx
, variables
);
3137 peephole(unit
, ctx
);
3139 // Insert instructions for creating spill space.
3140 allocateSpillSpace(unit
, ctx
, spill_info
);
3141 if (auto entry
= unit
.log_entry
) {
3142 entry
->setInt("num_spills", spill_info
.num_spills
);
3143 entry
->setInt("used_spill_slots", spill_info
.used_spill_slots
);
3146 printUnit(kVasmRegAllocLevel
, "after vasm-xls", unit
);
3147 if (HPHP::Trace::moduleEnabled(Trace::xls_stats
, 1) || unit
.log_entry
) {
3148 dumpStats(unit
, resolution
);
3151 // Free the variable metadata.
3152 for (auto var
: variables
) {
3154 for (Interval
* ivl
= var
->ivl()->next
, *next
= nullptr; ivl
; ivl
= next
) {
3158 Variable::destroy(var
);
3162 ///////////////////////////////////////////////////////////////////////////////