2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/vasm.h"
19 #include "hphp/runtime/base/stats.h"
21 #include "hphp/runtime/vm/jit/abi.h"
22 #include "hphp/runtime/vm/jit/print.h"
23 #include "hphp/runtime/vm/jit/punt.h"
24 #include "hphp/runtime/vm/jit/reg-algorithms.h"
25 #include "hphp/runtime/vm/jit/timer.h"
26 #include "hphp/runtime/vm/jit/vasm-instr.h"
27 #include "hphp/runtime/vm/jit/vasm-print.h"
28 #include "hphp/runtime/vm/jit/vasm-reg.h"
29 #include "hphp/runtime/vm/jit/vasm-unit.h"
30 #include "hphp/runtime/vm/jit/vasm-util.h"
31 #include "hphp/runtime/vm/jit/vasm-visit.h"
33 #include "hphp/util/arch.h"
34 #include "hphp/util/assertions.h"
35 #include "hphp/util/dataflow-worklist.h"
36 #include "hphp/util/trace.h"
38 #include "hphp/ppc64-asm/asm-ppc64.h"
40 #include <boost/dynamic_bitset.hpp>
41 #include <boost/range/adaptor/reversed.hpp>
42 #include <folly/Format.h>
43 #include <folly/Optional.h>
49 // - #3098509 streamline code, vectors vs linked lists, etc
50 // - #3098685 Optimize lifetime splitting
51 // - #3098739 new features now possible with XLS
55 namespace HPHP
{ namespace jit
{
56 ///////////////////////////////////////////////////////////////////////////////
59 ///////////////////////////////////////////////////////////////////////////////
61 std::atomic
<size_t> s_counter
;
63 DEBUG_ONLY
constexpr auto kHintLevel
= 5;
68 enum class Constraint
{ Any
, CopySrc
, Gpr
, Simd
, Sf
};
70 Constraint
constraint(const Vreg
&) { return Constraint::Any
; }
71 Constraint
constraint(const Vreg64
&) { return Constraint::Gpr
; }
72 Constraint
constraint(const Vreg32
&) { return Constraint::Gpr
; }
73 Constraint
constraint(const Vreg16
&) { return Constraint::Gpr
; }
74 Constraint
constraint(const Vreg8
&) { return Constraint::Gpr
; }
75 Constraint
constraint(const VregDbl
&) { return Constraint::Simd
; }
76 Constraint
constraint(const Vreg128
&) { return Constraint::Simd
; }
77 Constraint
constraint(const VregSF
&) { return Constraint::Sf
; }
79 bool is_wide(const Vreg128
&) { return true; }
80 template<class T
> bool is_wide(const T
&) { return false; }
85 constexpr int kInvalidPhiGroup
= -1;
86 constexpr int kInvalidSpillSlot
= -1;
87 const unsigned kMaxPos
= UINT_MAX
; // "infinity" use position
90 * A Use refers to the position where an interval is used or defined.
94 Use(Constraint kind
, unsigned pos
, Vreg hint
)
101 * Constraint imposed on the Vreg at this use.
103 Constraint kind
{Constraint::Any
};
105 * Position of this use or def.
107 unsigned pos
{kMaxPos
};
109 * If valid, try to assign the same physical register here as `hint' was
110 * assigned at `pos'. The `alt_hint' is for phijcc.
115 * Index of phi group metadata.
117 int phi_group
{kInvalidPhiGroup
};
121 * A LiveRange is an closed-open range of positions where an interval is live.
123 * Specifically, for the LiveRange [start, end), start is in the range and
127 bool contains(unsigned pos
) const { return pos
>= start
&& pos
< end
; }
128 bool intersects(LiveRange r
) const { return r
.start
< end
&& start
< r
.end
; }
129 bool contains(LiveRange r
) const { return r
.start
>= start
&& r
.end
<= end
; }
137 * An Interval represents the lifetime of a Vreg as a sorted list of disjoint
138 * ranges and a sorted list of use positions. It is the unit of register
141 * Intervals may be split---e.g., because the Vreg needed to be spilled in some
142 * subrange. All (sub-)Intervals for a given Vreg are connected as a singly
143 * linked list, sorted by start position.
145 * Every use position must be inside one of the ranges, or exactly at the end
146 * of the last range. Allowing a use exactly at the end facilitates lifetime
147 * splitting when the use at the position of an instruction clobbers registers
148 * as a side effect, e.g. a call.
150 * The intuition for allowing uses at the end of an Interval is that, in truth,
151 * the picture at a given position looks like this:
155 * +-----|-------------+ copy{s, d} <-+
157 * + - - - - - - - - - + +--- position n
159 * +-------------|-----+ <-+
163 * We represent an instruction with a single position `n'. All the use(s) and
164 * def(s) of that instruction are live at some point within it, but their
165 * lifetimes nonetheless do not overlap. Since we don't represent instructions
166 * using two position numbers, instead, we allow uses on the open end side of
167 * Intervals, because they don't actually conflict with, e.g., a def of another
168 * Interval that starts at the same position.
171 explicit Interval(Variable
* var
) : var(var
) {}
173 std::string
toString() const;
176 * Endpoints. Intervals are [closed, open), just like LiveRanges.
178 unsigned start() const { return ranges
.front().start
; }
179 unsigned end() const { return ranges
.back().end
; }
182 * Head of this Interval chain.
184 const Interval
* leader() const;
188 * Register allocation state.
191 bool constant() const;
192 bool spilled() const;
196 * Split this interval at `pos', returning the new `this->next'.
198 * If `keep_uses' is set, uses exactly at the end of the first interval will
199 * stay with the first split (rather than the second).
201 * @requires: pos > start() && pos < end(); this ensures that both
202 * subintervals are nonempty.
204 Interval
* split(unsigned pos
, bool keep_uses
= false);
206 /////////////////////////////////////////////////////////////////////////////
209 // These operate only on `this' and not its children.
212 * Get the index of the first range or use that is not strictly lower than
213 * `pos' (i.e., which contains/is at `pos' or is strictly higher than `pos').
215 unsigned findRange(unsigned pos
) const;
216 unsigned findUse(unsigned pos
) const;
219 * Whether there is a range that includes `pos', or a use at `pos'.
221 bool covers(unsigned pos
) const;
222 bool usedAt(unsigned pos
) const;
225 * The position of a use [relative to `pos'] that requires a register (i.e.,
226 * CopySrc uses are ignored).
228 * firstUseAfter: The first use >= `pos', kMaxPos if there are no more uses.
229 * lastUseBefore: The first use <= `pos'; 0 if the first use is after `pos'.
230 * firstUse: The first use in `this'.
232 unsigned firstUseAfter(unsigned pos
) const;
233 unsigned lastUseBefore(unsigned pos
) const;
234 unsigned firstUse() const;
237 // The Variable whose (sub-)lifetime this (sub-)interval represents.
240 // Pointer to the next subinterval.
241 Interval
* next
{nullptr};
243 // Live ranges and use positions.
244 jit::vector
<LiveRange
> ranges
;
245 jit::vector
<Use
> uses
;
247 // The physical register assigned to this subinterval.
252 * Metadata about a variable which is the object of register allocation.
254 * Each Variable represents a Vreg in the Vunit we're performing register
255 * allocation on---including lifetime ranges, uses, def information, etc.
258 explicit Variable(Vreg r
) : vreg(r
) {}
260 static Variable
* create(Vreg r
);
261 static void destroy(Variable
* var
);
266 Interval
* ivl() const { return (Interval
*)(this + 1); }
267 bool fixed() const { return vreg
.isPhys(); }
270 * Return the subinterval which covers or has a use at `pos', else nullptr if
271 * no subinterval does.
273 Interval
* ivlAt(unsigned pos
);
274 Interval
* ivlAtUse(unsigned pos
);
277 // The Vreg this value represents.
280 // Whether this is a SIMD value.
283 // Whether this variable is a constant, and its constant value.
284 bool constant
{false};
287 // The spill slot assigned to this value. Since we don't spill physical
288 // registers, and Vregs are SSA, a value's assigned slot never changes.
289 int slot
{kInvalidSpillSlot
};
291 // Position of, and block containing, the variable's single def instruction;
292 // invalid for physical registers.
293 unsigned def_pos
{kMaxPos
};
296 // Copy of the Vreg's def instruction.
301 * Bitset of Vreg numbers.
303 using LiveSet
= boost::dynamic_bitset
<>;
305 template<class Fn
> void forEach(const LiveSet
& bits
, Fn fn
) {
306 for (auto i
= bits
.find_first(); i
!= bits
.npos
; i
= bits
.find_next(i
)) {
312 * Sack of inputs and pre-computed data used by the main XLS algorithm.
315 explicit VxlsContext(const Abi
& abi
)
321 tmp
= reg::xmm15
; // reserve xmm15 to break shuffle cycles
324 tmp
= vixl::x17
; // also used as tmp1 by MacroAssembler
327 tmp
= ppc64_asm::reg::v29
;
330 this->abi
.simdUnreserved
-= tmp
;
331 this->abi
.simdReserved
|= tmp
;
332 assertx(!abi
.gpUnreserved
.contains(sp
));
333 assertx(!abi
.gpUnreserved
.contains(tmp
));
338 // Arch-dependent stack pointer.
340 // Temp register used only for breaking cycles.
342 // Debug-only run identifier.
346 jit::vector
<Vlabel
> blocks
;
347 // [start,end) position of each block.
348 jit::vector
<LiveRange
> block_ranges
;
349 // Per-block sp[offset] to spill-slots.
350 jit::vector
<int> spill_offsets
;
351 // Per-block live-in sets.
352 jit::vector
<LiveSet
> livein
;
355 ///////////////////////////////////////////////////////////////////////////////
358 const Interval
* Interval::leader() const { return var
->ivl(); }
359 Interval
* Interval::leader() { return var
->ivl(); }
361 bool Interval::live() const { return reg
!= InvalidReg
; }
362 bool Interval::constant() const { return var
->constant
; }
363 bool Interval::spilled() const { return !live() && var
->slot
>= 0; }
364 bool Interval::fixed() const { return var
->fixed(); }
366 unsigned Interval::findRange(unsigned pos
) const {
368 for (unsigned hi
= ranges
.size(); lo
< hi
;) {
369 auto mid
= (lo
+ hi
) / 2;
370 auto r
= ranges
[mid
];
373 } else if (r
.end
<= pos
) {
379 assertx(lo
== ranges
.size() || pos
< ranges
[lo
].start
);
383 unsigned Interval::findUse(unsigned pos
) const {
384 unsigned lo
= 0, hi
= uses
.size();
386 auto mid
= (lo
+ hi
) / 2;
387 auto u
= uses
[mid
].pos
;
390 } else if (u
< pos
) {
396 assertx(lo
== uses
.size() || pos
< uses
[lo
].pos
);
400 bool Interval::covers(unsigned pos
) const {
401 if (pos
< start() || pos
>= end()) return false;
402 auto i
= ranges
.begin() + findRange(pos
);
403 return i
!= ranges
.end() && i
->contains(pos
);
406 bool Interval::usedAt(unsigned pos
) const {
407 if (pos
< start() || pos
> end()) return false;
408 auto i
= uses
.begin() + findUse(pos
);
409 return i
!= uses
.end() && pos
== i
->pos
;
412 unsigned Interval::firstUseAfter(unsigned pos
) const {
413 for (auto& u
: uses
) {
414 if (u
.kind
== Constraint::CopySrc
) continue;
415 if (u
.pos
>= pos
) return u
.pos
;
420 unsigned Interval::lastUseBefore(unsigned pos
) const {
422 for (auto& u
: uses
) {
423 if (u
.kind
== Constraint::CopySrc
) continue;
424 if (u
.pos
> pos
) return prev
;
430 unsigned Interval::firstUse() const {
431 for (auto& u
: uses
) {
432 if (u
.kind
!= Constraint::CopySrc
) return u
.pos
;
437 Interval
* Interval::split(unsigned pos
, bool keep_uses
) {
438 assertx(pos
> start() && pos
< end()); // both parts will be non-empty
440 auto child
= jit::make
<Interval
>(var
);
444 // advance r1 to the first range we want in child; maybe split a range.
445 auto r1
= ranges
.begin() + findRange(pos
);
446 if (pos
> r1
->start
) { // split r at pos
447 child
->ranges
.push_back({pos
, r1
->end
});
451 child
->ranges
.insert(child
->ranges
.end(), r1
, ranges
.end());
452 ranges
.erase(r1
, ranges
.end());
454 // advance u1 to the first use position in child, then copy u1..end to child.
455 auto u1
= uses
.begin() + findUse(end());
456 auto u2
= uses
.end();
458 while (u1
!= u2
&& u1
->pos
<= end()) u1
++;
460 while (u1
!= u2
&& u1
->pos
< child
->start()) u1
++;
462 child
->uses
.insert(child
->uses
.end(), u1
, u2
);
468 ///////////////////////////////////////////////////////////////////////////////
471 Variable
* Variable::create(Vreg r
) {
472 auto const mem
= malloc(sizeof(Variable
) + sizeof(Interval
));
473 auto const var
= new (mem
) Variable(r
);
474 new (reinterpret_cast<void*>(var
+ 1)) Interval(var
);
478 void Variable::destroy(Variable
* var
) {
480 auto ivl
= reinterpret_cast<Interval
*>(var
+ 1);
485 Interval
* Variable::ivlAt(unsigned pos
) {
486 for (auto ivl
= this->ivl(); ivl
; ivl
= ivl
->next
) {
487 if (pos
< ivl
->start()) return nullptr;
488 if (ivl
->covers(pos
)) return ivl
;
493 Interval
* Variable::ivlAtUse(unsigned pos
) {
494 for (auto ivl
= this->ivl(); ivl
; ivl
= ivl
->next
) {
495 if (pos
< ivl
->start()) return nullptr;
496 if (ivl
->usedAt(pos
)) return ivl
;
501 ///////////////////////////////////////////////////////////////////////////////
504 * Extended Linear Scan is based on Wimmer & Franz "Linear Scan Register
505 * Allocation on SSA Form". As currently written, it also works on non-ssa
508 * 1. Sort blocks such that all predecessors of B come before B, except
509 * loop-edge predecessors. If the input IR is in SSA form, this also
510 * implies the definition of each SSATmp comes before all uses.
512 * 2. Assign an even numbered position to every instruction. Positions
513 * between instructions are used to insert copies and spills. Each block
514 * starts with an extra position number that corresponds to an imaginary
515 * "label" instruction that is not physically in the vasm IR.
517 * 3. Create one interval I for each Vreg R that requires register allocation,
518 * by iterating blocks and instructions in reverse order, computing live
519 * registers as we go. Each interval consists of a sorted list of disjoint,
520 * live ranges covering the positions where R must be in a physical register
521 * or spill slot. Vregs that are constants or have forced registers
522 * (e.g. VmSp) are skipped. If the input is SSA, the start position of each
523 * interval dominates every live range and use position in the interval.
525 * 4. Process intervals in order of start position, maintaining the set of
526 * active (live) and inactive (not live, but with live ranges that start
527 * after the current interval). When choosing a register, prefer the one
528 * available furthest into the future. If necessary, split the current
529 * interval so the first part gets a register, and enqueue the rest.
530 * When no registers are available, choose either the current interval or
531 * another one to spill, trying to free up the longest-available register.
533 * Split positions must be after an interval's start position, and on or before
534 * the chosen split point. We're free try to choose a good position inbetween,
535 * for example block boundaries and cold blocks.
537 * 5. Once intervals have been walked and split, every interval has an assigned
538 * operand (register or spill location) for all positions where its alive.
539 * Visit every instruction and modify its Vreg operands to the physical
540 * register that was assigned.
542 * 6. Splitting creates sub-intervals that are assigned to different registers
543 * or spill locations, so insert resolving copies at the split positions
544 * between intervals that were split in a block, and copies on control-flow
545 * edges connecting different sub-intervals. When more than one copy occurs
546 * in a position, they are parallel-copies (all sources read before any dest
549 * If any sub-interval was spilled, a single store is generated after each
552 * When analyzing instructions that use or define a virtual SF register
553 * (VregSF), eagerly rename it to the singleton PhysReg RegSF{0}, under the
554 * assumption that there can only be one live SF at each position. This
555 * reduces the number of intervals we need to process, facilitates inserting
556 * ldimm{0} (as xor), and is checked by checkSF().
559 ///////////////////////////////////////////////////////////////////////////////
562 * Printing utilities.
564 void printVariables(const char* caption
,
565 const Vunit
& unit
, const VxlsContext
& ctx
,
566 const jit::vector
<Variable
*>& variables
);
567 void dumpVariables(const jit::vector
<Variable
*>& variables
,
568 unsigned num_spills
);
571 * The ID of the block enclosing `pos'.
573 Vlabel
blockFor(const VxlsContext
& ctx
, unsigned pos
) {
574 for (unsigned lo
= 0, hi
= ctx
.blocks
.size(); lo
< hi
;) {
575 auto mid
= (lo
+ hi
) / 2;
576 auto r
= ctx
.block_ranges
[ctx
.blocks
[mid
]];
579 } else if (pos
>= r
.end
) {
582 return ctx
.blocks
[mid
];
585 always_assert(false);
586 return Vlabel
{0xffffffff};
590 * Insert `src' into `dst' before dst[j], corresponding to XLS logical
593 * Updates `j' to refer to the same instruction after the code insertions.
595 void insertCodeAt(jit::vector
<Vinstr
>& dst
, unsigned& j
,
596 const jit::vector
<Vinstr
>& src
, unsigned pos
) {
597 auto const irctx
= dst
[j
].irctx();
598 dst
.insert(dst
.begin() + j
, src
.size(), trap
{TRAP_REASON
});
599 for (auto const& inst
: src
) {
601 dst
[j
].set_irctx(irctx
);
606 ///////////////////////////////////////////////////////////////////////////////
607 // Pre-analysis passes.
610 * Compute the linear position range of each block.
612 * This modifies the Vinstrs in `unit' by setting their `pos' members, in
613 * addition to producing the block-to-range map.
615 jit::vector
<LiveRange
> computePositions(Vunit
& unit
,
616 const jit::vector
<Vlabel
>& blocks
) {
617 auto block_ranges
= jit::vector
<LiveRange
>{unit
.blocks
.size()};
620 for (auto const b
: blocks
) {
621 auto& code
= unit
.blocks
[b
].code
;
623 bool front_uses
{false};
624 visitUses(unit
, code
.front(), [&](Vreg
/*r*/) { front_uses
= true; });
626 auto irctx
= code
.front().irctx();
627 code
.emplace(code
.begin(), nop
{}, irctx
);
629 auto const start
= pos
;
631 for (auto& inst
: unit
.blocks
[b
].code
) {
635 block_ranges
[b
] = { start
, pos
};
641 * Return the effect this instruction has on the value of `sp'.
643 * Asserts (if `do_assert' is set) if an instruction mutates `sp' in an
646 int spEffect(const Vunit
& unit
, const Vinstr
& inst
, PhysReg sp
,
647 bool do_assert
= debug
) {
665 if (i
.d
== Vreg64(sp
)) {
666 assertx(i
.s
.base
== i
.d
&& !i
.s
.index
.isValid());
673 visitDefs(unit
, inst
, [&] (Vreg r
) { assertx(r
!= sp
); });
680 * Compute the offset from `sp' to the spill area at each block start.
682 jit::vector
<int> analyzeSP(const Vunit
& unit
,
683 const jit::vector
<Vlabel
>& blocks
,
685 auto visited
= boost::dynamic_bitset
<>(unit
.blocks
.size());
686 auto spill_offsets
= jit::vector
<int>(unit
.blocks
.size());
688 for (auto const b
: blocks
) {
689 auto offset
= visited
.test(b
) ? spill_offsets
[b
] : 0;
691 for (auto const& inst
: unit
.blocks
[b
].code
) {
692 offset
-= spEffect(unit
, inst
, sp
);
694 for (auto const s
: succs(unit
.blocks
[b
])) {
695 if (visited
.test(s
)) {
696 assert_flog(offset
== spill_offsets
[s
],
697 "sp mismatch on edge B{}->B{}, expected {} got {}",
698 size_t(b
), size_t(s
), spill_offsets
[s
], offset
);
700 spill_offsets
[s
] = offset
;
705 return spill_offsets
;
709 * Compute livein set for each block.
711 * An iterative data-flow analysis to compute the livein sets for each block is
712 * necessary for two reasons:
714 * 1. buildIntervals() uses the sets in a single backwards pass to build
715 * precise Intervals with live range holes, and
717 * 2. resolveEdges() uses the sets to discover which intervals require copies
718 * on control flow edges due to having been split.
720 jit::vector
<LiveSet
> computeLiveness(const Vunit
& unit
,
722 const jit::vector
<Vlabel
>& blocks
) {
723 auto livein
= jit::vector
<LiveSet
>{unit
.blocks
.size()};
724 auto const preds
= computePreds(unit
);
726 auto blockPO
= jit::vector
<uint32_t>(unit
.blocks
.size());
727 auto revBlocks
= blocks
;
728 std::reverse(begin(revBlocks
), end(revBlocks
));
730 FTRACE(6, "computeLiveness: starting with {} blocks (unit blocks: {})\n",
731 revBlocks
.size(), unit
.blocks
.size());
733 auto wl
= dataflow_worklist
<uint32_t>(revBlocks
.size());
735 for (unsigned po
= 0; po
< revBlocks
.size(); po
++) {
737 blockPO
[revBlocks
[po
]] = po
;
738 FTRACE(6, " - inserting block {} (po = {})\n", revBlocks
[po
], po
);
741 while (!wl
.empty()) {
742 auto b
= revBlocks
[wl
.pop()];
743 auto& block
= unit
.blocks
[b
];
745 FTRACE(6, " - popped block {} (po = {})\n", b
, blockPO
[b
]);
747 // start with the union of the successor blocks
748 LiveSet
live(unit
.next_vr
);
749 for (auto s
: succs(block
)) {
750 if (!livein
[s
].empty()) live
|= livein
[s
];
753 // and now go through the instructions in the block in reverse order
754 for (auto const& inst
: boost::adaptors::reverse(block
.code
)) {
755 RegSet implicit_uses
, implicit_across
, implicit_defs
;
756 getEffects(abi
, inst
, implicit_uses
, implicit_across
, implicit_defs
);
758 auto const vsf
= Vreg
{RegSF
{0}};
760 auto const dvisit
= [&] (Vreg r
, Width w
) {
761 live
.reset(w
== Width::Flags
? vsf
: r
);
763 auto const uvisit
= [&] (Vreg r
, Width w
) {
764 live
.set(w
== Width::Flags
? vsf
: r
);
767 visitDefs(unit
, inst
, dvisit
);
768 visit(unit
, implicit_defs
, dvisit
);
770 visitUses(unit
, inst
, uvisit
);
771 visit(unit
, implicit_uses
, uvisit
);
774 if (live
!= livein
[b
]) {
776 for (auto p
: preds
[b
]) {
778 FTRACE(6, " - reinserting block {} (po = {})\n", p
, blockPO
[p
]);
786 ///////////////////////////////////////////////////////////////////////////////
787 // Lifetime intervals.
792 * This assumes that the ranges of `ivl' are in reverse order, and that `r'
793 * precedes or overlaps with ivl->ranges.first().
795 void addRange(Interval
* ivl
, LiveRange r
) {
796 while (!ivl
->ranges
.empty() && r
.contains(ivl
->ranges
.back())) {
797 ivl
->ranges
.pop_back();
799 if (ivl
->ranges
.empty()) {
800 return ivl
->ranges
.push_back(r
);
802 auto& first
= ivl
->ranges
.back();
803 if (first
.contains(r
)) return;
804 if (r
.end
>= first
.start
) {
805 first
.start
= r
.start
;
807 ivl
->ranges
.push_back(r
);
812 * Visits defs of an instruction, updates their liveness, adds live ranges, and
813 * adds Uses with appropriate hints.
816 DefVisitor(const Vunit
& unit
, jit::vector
<Variable
*>& variables
,
817 LiveSet
& live
, Vlabel b
, const Vinstr
& inst
, unsigned pos
)
819 , m_variables(variables
)
826 // Skip immediates and uses.
827 template<class F
> void imm(const F
&) {}
828 template<class R
> void use(R
) {}
829 template<class S
, class H
> void useHint(S
, H
) {}
830 template<class R
> void across(R
) {}
832 void def(Vtuple defs
) {
833 for (auto r
: m_unit
.tuples
[defs
]) def(r
);
835 void defHint(Vtuple def_tuple
, Vtuple hint_tuple
) {
836 auto& defs
= m_unit
.tuples
[def_tuple
];
837 auto& hints
= m_unit
.tuples
[hint_tuple
];
838 for (int i
= 0; i
< defs
.size(); i
++) {
839 def(defs
[i
], Constraint::Any
, hints
[i
]);
842 template<class R
> void def(R r
) {
843 def(r
, constraint(r
), Vreg
{}, is_wide(r
));
845 template<class D
, class H
> void defHint(D dst
, H hint
) {
846 def(dst
, constraint(dst
), hint
, is_wide(dst
));
848 void def(Vreg r
) { def(r
, Constraint::Any
); }
849 void defHint(Vreg d
, Vreg hint
) { def(d
, Constraint::Any
, hint
); }
850 void def(RegSet rs
) { rs
.forEach([&](Vreg r
) { def(r
); }); }
852 r
= RegSF
{0}; // eagerly rename all SFs
853 def(r
, constraint(r
));
857 void def(Vreg r
, Constraint kind
, Vreg hint
= Vreg
{}, bool wide
= false) {
858 auto var
= m_variables
[r
];
859 if (m_live
.test(r
)) {
861 var
->ivl()->ranges
.back().start
= m_pos
;
864 var
= m_variables
[r
] = Variable::create(r
);
866 addRange(var
->ivl(), {m_pos
, m_pos
+ 1});
869 var
->ivl()->uses
.push_back(Use
{kind
, m_pos
, hint
});
871 var
->def_pos
= m_pos
;
872 var
->def_block
= m_block
;
873 var
->def_inst
= m_inst
;
879 jit::vector
<Variable
*>& m_variables
;
882 const Vinstr
& m_inst
;
887 UseVisitor(const Vunit
& unit
, jit::vector
<Variable
*>& variables
,
888 LiveSet
& live
, const Vinstr
& inst
, LiveRange range
)
890 , m_variables(variables
)
896 // Skip immediates and defs.
897 template<class F
> void imm(const F
&) {}
898 template<class R
> void def(R
) {}
899 template<class D
, class H
> void defHint(D
, H
) {}
901 template<class R
> void use(R r
) { use(r
, constraint(r
), m_range
.end
); }
902 template<class S
, class H
> void useHint(S src
, H hint
) {
903 use(src
, constraint(src
), m_range
.end
, hint
);
906 r
= RegSF
{0}; // eagerly rename all SFs
907 use(r
, constraint(r
), m_range
.end
);
909 void use(RegSet regs
) { regs
.forEach([&](Vreg r
) { use(r
); }); }
910 void use(Vtuple uses
) { for (auto r
: m_unit
.tuples
[uses
]) use(r
); }
911 void useHint(Vtuple src_tuple
, Vtuple hint_tuple
) {
912 auto& uses
= m_unit
.tuples
[src_tuple
];
913 auto& hints
= m_unit
.tuples
[hint_tuple
];
914 for (int i
= 0, n
= uses
.size(); i
< n
; i
++) {
915 useHint(uses
[i
], hints
[i
]);
919 if (m
.base
.isValid()) use(m
.base
);
920 if (m
.index
.isValid()) use(m
.index
);
922 void use(VcallArgsId
/*id*/) {
923 always_assert(false && "vcall unsupported in vxls");
927 * An operand marked as UA means use-across. Mark it live across the
928 * instruction so its lifetime conflicts with the destination, which ensures
929 * it will be assigned a different register than the destination. This isn't
930 * necessary if *both* operands of a binary instruction are the same virtual
931 * register, but is still correct.
933 template<class R
> void across(R r
) { use(r
, constraint(r
), m_range
.end
+ 1); }
934 void across(RegSet regs
) { regs
.forEach([&](Vreg r
) { across(r
); }); }
935 void across(Vptr m
) {
936 if (m
.base
.isValid()) across(m
.base
);
937 if (m
.index
.isValid()) across(m
.index
);
941 void use(Vreg r
, Constraint kind
, unsigned end
, Vreg hint
= Vreg
{}) {
944 auto var
= m_variables
[r
];
945 if (!var
) var
= m_variables
[r
] = Variable::create(r
);
946 addRange(var
->ivl(), {m_range
.start
, end
});
949 if (m_inst
.op
== Vinstr::copyargs
||
950 m_inst
.op
== Vinstr::copy2
||
951 m_inst
.op
== Vinstr::copy
||
952 (m_inst
.op
== Vinstr::phijcc
&& kind
!= Constraint::Sf
) ||
953 m_inst
.op
== Vinstr::phijmp
) {
954 // all these instructions lower to parallel copyplans, which know
955 // how to load directly from constants or spilled locations
956 kind
= Constraint::CopySrc
;
958 var
->ivl()->uses
.push_back({kind
, m_range
.end
, hint
});
964 jit::vector
<Variable
*>& m_variables
;
966 const Vinstr
& m_inst
;
967 const LiveRange m_range
;
971 * Compute lifetime intervals and use positions of all Vregs by walking the
972 * code bottom-up once.
974 jit::vector
<Variable
*> buildIntervals(const Vunit
& unit
,
975 const VxlsContext
& ctx
) {
976 ONTRACE(kRegAllocLevel
, printCfg(unit
, ctx
.blocks
));
978 auto variables
= jit::vector
<Variable
*>{unit
.next_vr
};
980 for (auto b
: boost::adaptors::reverse(ctx
.blocks
)) {
981 auto& block
= unit
.blocks
[b
];
983 // initial live set is the union of successor live sets.
984 LiveSet
live(unit
.next_vr
);
985 for (auto s
: succs(block
)) {
986 always_assert(!ctx
.livein
[s
].empty());
987 live
|= ctx
.livein
[s
];
990 // add a range covering the whole block to every live interval
991 auto& block_range
= ctx
.block_ranges
[b
];
992 forEach(live
, [&](Vreg r
) {
993 if (!variables
[r
]) variables
[r
] = Variable::create(r
);
994 addRange(variables
[r
]->ivl(), block_range
);
997 // visit instructions bottom-up, adding uses & ranges
998 auto pos
= block_range
.end
;
999 for (auto const& inst
: boost::adaptors::reverse(block
.code
)) {
1001 RegSet implicit_uses
, implicit_across
, implicit_defs
;
1002 getEffects(ctx
.abi
, inst
, implicit_uses
, implicit_across
, implicit_defs
);
1004 DefVisitor
dv(unit
, variables
, live
, b
, inst
, pos
);
1005 visitOperands(inst
, dv
);
1006 dv
.def(implicit_defs
);
1008 UseVisitor
uv(unit
, variables
, live
, inst
, {block_range
.start
, pos
});
1009 visitOperands(inst
, uv
);
1010 uv
.use(implicit_uses
);
1011 uv
.across(implicit_across
);
1014 // sanity check liveness computation
1015 always_assert(live
== ctx
.livein
[b
]);
1018 // finish processing live ranges for constants
1019 for (auto& c
: unit
.constToReg
) {
1020 if (auto var
= variables
[c
.second
]) {
1021 var
->ivl()->ranges
.back().start
= 0;
1023 var
->constant
= true;
1028 // Ranges and uses were generated in reverse order. Unreverse them now.
1029 for (auto var
: variables
) {
1031 auto ivl
= var
->ivl();
1032 assertx(!ivl
->ranges
.empty()); // no empty intervals
1033 std::reverse(ivl
->uses
.begin(), ivl
->uses
.end());
1034 std::reverse(ivl
->ranges
.begin(), ivl
->ranges
.end());
1036 ONTRACE(kRegAllocLevel
,
1037 printVariables("after building intervals", unit
, ctx
, variables
);
1041 // only constants and physical registers can be live-into the entry block.
1042 forEach(ctx
.livein
[unit
.entry
], [&](Vreg r
) {
1043 UNUSED
auto var
= variables
[r
];
1044 assertx(var
->constant
|| var
->fixed());
1046 for (auto var
: variables
) {
1048 auto const ivl
= var
->ivl();
1049 for (unsigned i
= 1; i
< ivl
->uses
.size(); i
++) {
1050 assertx(ivl
->uses
[i
].pos
>= ivl
->uses
[i
-1].pos
); // monotonic
1052 for (unsigned i
= 1; i
< ivl
->ranges
.size(); i
++) {
1053 assertx(ivl
->ranges
[i
].end
> ivl
->ranges
[i
].start
); // no empty ranges
1054 assertx(ivl
->ranges
[i
].start
> ivl
->ranges
[i
-1].end
); // no empty gaps
1061 ///////////////////////////////////////////////////////////////////////////////
1064 * A map from PhysReg number to position.
1066 using PosVec
= PhysReg::Map
<unsigned>;
1069 * Between `r1' and `r2', choose the one whose position in `pos_vec' is closest
1070 * to but still after (or at) `pos'.
1072 * If neither has a position after `pos', choose the higher one. If both have
1073 * the same position, choose `r1'.
1075 PhysReg
choose_closest_after(unsigned pos
, const PosVec
& pos_vec
,
1076 PhysReg r1
, PhysReg r2
) {
1077 if (r1
== InvalidReg
) return r2
;
1078 if (r2
== InvalidReg
) return r1
;
1080 if (pos_vec
[r1
] >= pos
) {
1081 if (pos
<= pos_vec
[r2
] && pos_vec
[r2
] < pos_vec
[r1
]) {
1085 if (pos_vec
[r2
] > pos_vec
[r1
]) {
1093 * Like choose_closest_after(), but iterates through `pos_vec'.
1095 PhysReg
find_closest_after(unsigned pos
, const PosVec
& pos_vec
) {
1096 auto ret
= InvalidReg
;
1097 for (auto const r
: pos_vec
) {
1098 ret
= choose_closest_after(pos
, pos_vec
, ret
, r
);
1103 ///////////////////////////////////////////////////////////////////////////////
1107 * A member of a "phi group"---i.e., the set of all variables which flow into
1108 * one another via a set of corresponding phijmp's and phidef's.
1111 Vreg r
; // the Vreg that is used or def'd in the phi
1112 unsigned pos
; // position of the phijmp or phidef
1116 * Hint metadata that doesn't fit into the `variables' vector.
1119 jit::vector
<jit::vector
<PhiVar
>> phi_groups
;
1123 * Add the phi use of `r' at `pos' to the phi group given by `pgid'.
1125 void addPhiGroupMember(const jit::vector
<Variable
*>& variables
,
1126 jit::vector
<jit::vector
<PhiVar
>>& phi_groups
,
1127 Vreg r
, unsigned pos
, int pgid
,
1128 Vreg hint
= Vreg
{}, Vreg other
= Vreg
{}) {
1129 assertx(phi_groups
.size() > pgid
);
1130 assertx(!phi_groups
[pgid
].empty());
1132 auto const var
= variables
[r
];
1133 assertx(var
!= nullptr);
1135 auto const ivl
= var
->ivl();
1136 auto& u
= pos
== var
->def_pos
1138 : ivl
->uses
[ivl
->findUse(pos
)];
1139 assertx(u
.pos
== pos
);
1141 if (hint
.isValid()) u
.hint
= hint
;
1142 if (other
.isValid()) u
.alt_hint
= other
;
1145 phi_groups
[pgid
].push_back({r
, u
.pos
});
1149 * Collect hint metadata for phis.
1151 * The return value of this pass represents an equivalence relation over phi
1152 * uses and defs. The relation is defined as follows:
1154 * Consider a phidef F with arity n (i.e., it defines n variables). F gives
1155 * rise to n equivalence classes, each of which consists in the i-th use or def
1156 * variable from each phi instruction in the transitive closure of the predicate
1158 * P(f) := { every phijmp or phijcc which targets f, and every other phidef
1159 * targeted by those phijmp's and phijcc's }
1163 * Or, taking a pictoral example:
1164 * _________________________________________________________
1169 * | phijmp(x1, y1) phijcc(x2, y2) phijmp(x3, y3) . |
1171 * | | +-------------+ +-------------+ . |
1174 * | phidef(x4, y4) phidef(x5, y5) -------------------+ |
1176 * |______ . ________________________________________________|
1179 * ______ | __________________________
1181 * | phijmp(x6, y6)--->phidef(x7, y7) |
1182 * |___________________________________|
1184 * The equivalence classes in this example are {x1, ..., x5}, {y1, ..., y5},
1185 * {x6, x7}, and {y6, y7}.
1187 * We use these equivalence classes during register allocation to try to assign
1188 * the same register to all the variables in a phi group (at least at those
1189 * positions where the phi instructions occur).
1191 * Note that this equivalence relation does /not/ capture the idea that we
1192 * probably want the same register for x4 as we do for x6 and x7. To track
1193 * that information, we set the `hint' on phi uses to the corresponding phidef
1194 * variable (or one of the two, in the case of phijcc), then let the hint back
1195 * propagation pass handle it. (Note that we do /not/ set the `hint' field at
1196 * phi defs, nor do we try to use it at any point.)
1198 jit::vector
<jit::vector
<PhiVar
>>
1199 analyzePhiHints(const Vunit
& unit
, const VxlsContext
& ctx
,
1200 const jit::vector
<Variable
*>& variables
) {
1201 auto phi_groups
= jit::vector
<jit::vector
<PhiVar
>>{};
1204 * Get the phi group for r's def, optionally allocating a new phi group if
1207 auto const def_phi_group
= [&] (Vreg r
, bool alloc
) {
1208 auto const var
= variables
[r
];
1209 assertx(var
!= nullptr);
1210 assertx(var
->def_inst
.op
== Vinstr::phidef
);
1212 auto& u
= var
->ivl()->uses
.front();
1214 if (u
.phi_group
== kInvalidPhiGroup
&& alloc
) {
1215 u
.phi_group
= phi_groups
.size();
1216 phi_groups
.emplace_back(jit::vector
<PhiVar
>{{r
, u
.pos
}});
1221 auto const is_fixed
= [&] (Vreg r
) { return variables
[r
]->fixed(); };
1223 for (auto const b
: ctx
.blocks
) {
1224 auto const& code
= unit
.blocks
[b
].code
;
1225 if (code
.empty()) continue;
1227 auto const& first
= code
.front();
1228 auto const& last
= code
.back();
1230 if (first
.op
== Vinstr::phidef
) {
1231 // A phidef'd variable just need to be assigned a phi group if it doesn't
1232 // already have one (i.e., from handling a phijmp).
1233 auto const& defs
= unit
.tuples
[first
.phidef_
.defs
];
1234 for (auto const r
: defs
) {
1235 if (is_fixed(r
)) continue;
1236 def_phi_group(r
, true);
1240 if (last
.op
== Vinstr::phijmp
) {
1241 auto const target
= last
.phijmp_
.target
;
1242 auto const& def_inst
= unit
.blocks
[target
].code
.front();
1243 assertx(def_inst
.op
== Vinstr::phidef
);
1245 auto const& uses
= unit
.tuples
[last
.phijmp_
.uses
];
1246 auto const& defs
= unit
.tuples
[def_inst
.phidef_
.defs
];
1247 assertx(uses
.size() == defs
.size());
1249 // Set the phi group for each phi use variable to that of the
1250 // corresponding phi def variable (allocating a new group if the def
1251 // variable has not already been assigned one).
1252 for (size_t i
= 0, n
= uses
.size(); i
< n
; ++i
) {
1253 if (is_fixed(defs
[i
])) continue;
1255 auto const pgid
= def_phi_group(defs
[i
], true);
1256 addPhiGroupMember(variables
, phi_groups
,
1257 uses
[i
], last
.pos
, pgid
, defs
[i
]);
1259 } else if (last
.op
== Vinstr::phijcc
) {
1260 auto const next
= last
.phijcc_
.targets
[0];
1261 auto const taken
= last
.phijcc_
.targets
[1];
1262 auto const& next_inst
= unit
.blocks
[next
].code
.front();
1263 auto const& taken_inst
= unit
.blocks
[taken
].code
.front();
1264 assertx(next_inst
.op
== Vinstr::phidef
);
1265 assertx(taken_inst
.op
== Vinstr::phidef
);
1267 auto const& uses
= unit
.tuples
[last
.phijcc_
.uses
];
1268 auto const& next_defs
= unit
.tuples
[next_inst
.phidef_
.defs
];
1269 auto const& taken_defs
= unit
.tuples
[taken_inst
.phidef_
.defs
];
1270 assertx(uses
.size() == next_defs
.size());
1271 assertx(uses
.size() == taken_defs
.size());
1273 for (size_t i
= 0, n
= uses
.size(); i
< n
; ++i
) {
1274 if (is_fixed(next_defs
[i
]) || is_fixed(taken_defs
[i
])) continue;
1276 auto const next_pgid
= def_phi_group(next_defs
[i
], false);
1277 auto const taken_pgid
= def_phi_group(taken_defs
[i
], false);
1278 auto pgid
= next_pgid
;
1280 if (next_pgid
== kInvalidPhiGroup
) {
1281 pgid
= taken_pgid
== kInvalidPhiGroup
1282 ? def_phi_group(taken_defs
[i
], true)
1284 addPhiGroupMember(variables
, phi_groups
,
1285 next_defs
[i
], next_inst
.pos
, pgid
);
1286 } else if (taken_pgid
== kInvalidPhiGroup
) {
1287 pgid
= next_pgid
; // we know this is valid
1288 addPhiGroupMember(variables
, phi_groups
,
1289 taken_defs
[i
], taken_inst
.pos
, pgid
);
1290 } else if (next_pgid
!= taken_pgid
) {
1291 // We managed to hit phijmps to both of our target phidefs before
1292 // hitting this phijcc. This should be pretty rare, but let's go
1293 // ahead and merge the two equivalence classes into one.
1294 phi_groups
[pgid
].reserve(phi_groups
[next_pgid
].size() +
1295 phi_groups
[taken_pgid
].size());
1296 for (auto const& phiv
: phi_groups
[taken_pgid
]) {
1297 addPhiGroupMember(variables
, phi_groups
,
1298 phiv
.r
, phiv
.pos
, pgid
);
1300 phi_groups
[taken_pgid
].clear();
1303 // Pick the hotter phidef as the hint, else prefer `next'.
1304 auto const next_hotter
=
1305 static_cast<unsigned>(unit
.blocks
[next
].area_idx
) <=
1306 static_cast<unsigned>(unit
.blocks
[taken
].area_idx
);
1307 auto const hint
= next_hotter
? next_defs
[i
] : taken_defs
[i
];
1308 auto const other
= next_hotter
? taken_defs
[i
] : next_defs
[i
];
1309 addPhiGroupMember(variables
, phi_groups
,
1310 uses
[i
], last
.pos
, pgid
, hint
, other
);
1317 HintInfo
analyzeHints(const Vunit
& unit
, const VxlsContext
& ctx
,
1318 const jit::vector
<Variable
*>& variables
) {
1320 hint_info
.phi_groups
= analyzePhiHints(unit
, ctx
, variables
);
1326 * Try to return the physical register that would coalesce `ivl' with its
1329 PhysReg
tryCoalesce(const jit::vector
<Variable
*>& variables
,
1330 const Interval
* ivl
) {
1331 assertx(ivl
== ivl
->leader());
1332 assertx(!ivl
->uses
.empty());
1334 auto const& def
= ivl
->uses
.front();
1335 assertx(def
.pos
== ivl
->var
->def_pos
);
1337 if (!def
.hint
.isValid()) return InvalidReg
;
1338 auto const hint_var
= variables
[def
.hint
];
1340 for (auto hvl
= hint_var
->ivl(); hvl
; hvl
= hvl
->next
) {
1341 if (def
.pos
== hvl
->end()) return hvl
->reg
;
1347 * Scan the uses of `ivl' for phi nodes, and return a pair of hints for the
1348 * first one we find.
1350 * For a given phi node F, the first of the two hints we return is derived from
1351 * only those phijmps and phijccs targeting F (if F is a phidef), or from the
1352 * phidefs F targets (if F is a phijmp or phijcc). The second hint accounts
1353 * for F's entire equivalence class---see analyzePhiHints() for more details.
1355 folly::Optional
<std::pair
<PhysReg
,PhysReg
>>
1356 tryPhiHint(const jit::vector
<Variable
*>& variables
,
1357 const Interval
* ivl
, const HintInfo
& hint_info
,
1358 const PosVec
& free_until
) {
1359 auto const choose
= [&] (PhysReg h1
, PhysReg h2
) {
1360 return choose_closest_after(ivl
->end(), free_until
, h1
, h2
);
1363 for (auto const& u
: ivl
->uses
) {
1364 // Look for the first phi hint.
1365 if (u
.phi_group
== kInvalidPhiGroup
) continue;
1367 auto preferred
= InvalidReg
;
1368 auto group_hint
= InvalidReg
;
1370 // Choose a register to be the hint.
1371 for (auto const& phiv
: hint_info
.phi_groups
[u
.phi_group
]) {
1372 auto const var
= variables
[phiv
.r
];
1375 auto const phivl
= var
->ivlAtUse(phiv
.pos
);
1376 if (phivl
->reg
== InvalidReg
) continue;
1378 auto const& phu
= phivl
->uses
[phivl
->findUse(phiv
.pos
)];
1379 assertx(phu
.pos
== phiv
.pos
);
1381 // Prefer to use a hint from a corresponding phi node.
1382 if (u
.hint
== phiv
.r
|| u
.alt_hint
== phiv
.r
||
1383 phu
.hint
== ivl
->var
->vreg
|| phu
.alt_hint
== ivl
->var
->vreg
) {
1384 preferred
= choose(preferred
, phivl
->reg
);
1386 // In the end, though, we're okay with any hint from the group.
1387 group_hint
= choose(group_hint
, phivl
->reg
);
1389 return std::make_pair(preferred
, group_hint
);
1395 * Choose an appropriate hint for the (sub-)interval `ivl'.
1397 * The register allocator passes us constraints via `free_until' and `allow'.
1398 * We use the `free_until' vector to choose a hint that most tightly covers the
1399 * lifetime of `ivl'; we use `allow' to ensure that our hint satisfies
1402 PhysReg
chooseHint(const jit::vector
<Variable
*>& variables
,
1403 const Interval
* ivl
, const HintInfo
& hint_info
,
1404 const PosVec
& free_until
, RegSet allow
) {
1405 if (!RuntimeOption::EvalHHIREnablePreColoring
&&
1406 !RuntimeOption::EvalHHIREnableCoalescing
) return InvalidReg
;
1408 auto const choose
= [&] (PhysReg h1
, PhysReg h2
) {
1409 return choose_closest_after(ivl
->end(), free_until
, h1
, h2
);
1411 auto hint
= InvalidReg
;
1413 // If `ivl' contains its def, try a coalescing hint.
1414 if (ivl
== ivl
->leader()) {
1415 hint
= tryCoalesce(variables
, ivl
);
1418 // Return true if we should return `h'.
1419 auto const check_and_trace
= [&] (PhysReg h
, const char* prefix
) {
1420 if (h
== InvalidReg
) return false;
1421 if (allow
.contains(h
)) {
1422 FTRACE(kHintLevel
, "{}hinting {} for %{} @ {}\n",
1423 prefix
, show(h
), size_t(ivl
->var
->vreg
), ivl
->start());
1426 FTRACE(kHintLevel
, " found mismatched hint for %{} @ {}\n",
1427 size_t(ivl
->var
->vreg
), ivl
->uses
.front().pos
);
1431 auto const is_phi_use
= !ivl
->uses
.empty() &&
1432 ivl
->uses
.front().phi_group
!= kInvalidPhiGroup
;
1434 // If we have either a backwards or a forwards hint, return it---if we have
1435 // both, return whichever is free the longest. For phi uses, however, we
1436 // want to consider the whole phi group's allocations first.
1437 if (!is_phi_use
&& check_and_trace(hint
, "")) return hint
;
1439 // Try to determine a hint via the first phi use in the interval.
1440 auto const phi_hints
= tryPhiHint(variables
, ivl
, hint_info
, free_until
);
1441 bool is_phi_hint
= false;
1444 hint
= choose(phi_hints
->first
, hint
);
1445 if (hint
!= phi_hints
->first
) {
1446 hint
= choose(hint
, phi_hints
->second
);
1448 is_phi_hint
= hint
== phi_hints
->first
||
1449 hint
== phi_hints
->second
;
1451 if (phi_hints
|| is_phi_use
) {
1452 if (check_and_trace(hint
, debug
&& is_phi_hint
? "phi-" : "")) {
1457 // Crawl through our uses and look for a physical hint.
1458 for (auto const& u
: ivl
->uses
) {
1459 if (!u
.hint
.isValid() ||
1461 !allow
.contains(u
.hint
)) continue;
1462 hint
= choose(hint
, u
.hint
);
1465 if (hint
!= InvalidReg
) {
1466 FTRACE(kHintLevel
, "physical hint {} for %{} @ {}\n",
1467 show(hint
), size_t(ivl
->var
->vreg
), ivl
->start());
1472 ///////////////////////////////////////////////////////////////////////////////
1473 // Register allocation.
1476 * Information about spills generated by register allocation.
1478 * Used for the allocateSpillSpace() pass which inserts the instructions that
1479 * create spill space on the stack.
1482 // Number of intervals spilled.
1483 unsigned num_spills
{0};
1484 // Number of spill slots used.
1485 size_t used_spill_slots
{0};
1489 * Extended Linear Scan register allocator over vasm virtual registers (Vregs).
1491 * This encapsulates the intermediate data structures used during the
1492 * allocation phase of the algorithm so we don't have to pass them around
1496 Vxls(const VxlsContext
& ctx
,
1497 const jit::vector
<Variable
*>& variables
,
1498 const HintInfo
& hint_info
)
1500 , variables(variables
)
1501 , hint_info(hint_info
)
1507 void assignSpill(Interval
* ivl
);
1508 void spill(Interval
*);
1509 void assignReg(Interval
*, PhysReg
);
1511 unsigned nearestSplitBefore(unsigned pos
);
1512 unsigned constrain(Interval
*, RegSet
&);
1514 void update(Interval
*);
1515 void allocate(Interval
*);
1516 void allocBlocked(Interval
*);
1517 void spillOthers(Interval
* current
, PhysReg r
);
1521 * Comparison function for pending priority queue.
1523 * std::priority_queue requires a less operation, but sorts the heap
1524 * highest-first; we need the opposite (lowest-first), so use greater-than.
1527 bool operator()(const Interval
* i1
, const Interval
* i2
) {
1528 return i1
->start() > i2
->start();
1533 const VxlsContext
& ctx
;
1535 // Variables, null if unused.
1536 const jit::vector
<Variable
*>& variables
;
1538 const HintInfo
& hint_info
;
1539 // Subintervals sorted by Interval start.
1540 jit::priority_queue
<Interval
*,Compare
> pending
;
1541 // Intervals that overlap.
1542 jit::vector
<Interval
*> active
, inactive
;
1543 // Last position each spill slot was owned; kMaxPos means currently used.
1544 jit::array
<unsigned, kMaxSpillSlots
> spill_slots
{{0}};
1546 SpillInfo spill_info
;
1549 SpillInfo
Vxls::go() {
1550 for (auto var
: variables
) {
1553 assignReg(var
->ivl(), var
->vreg
);
1554 } else if (var
->constant
) {
1557 pending
.push(var
->ivl());
1560 while (!pending
.empty()) {
1561 auto current
= pending
.top();
1570 * Assign the next available spill slot to `ivl'.
1572 void Vxls::assignSpill(Interval
* ivl
) {
1573 assertx(!ivl
->fixed() && ivl
!= ivl
->leader());
1575 if (ivl
->var
->slot
!= kInvalidSpillSlot
) return;
1577 auto& used_spill_slots
= spill_info
.used_spill_slots
;
1579 auto const assign_slot
= [&] (size_t slot
) {
1580 ivl
->var
->slot
= slot
;
1581 ++spill_info
.num_spills
;
1583 spill_slots
[slot
] = kMaxPos
;
1584 if (!ivl
->var
->wide
) {
1585 used_spill_slots
= std::max(used_spill_slots
, slot
+ 1);
1587 used_spill_slots
= std::max(used_spill_slots
, slot
+ 2);
1588 spill_slots
[slot
+ 1] = kMaxPos
;
1592 // Assign spill slots. We track the highest position at which a spill slot
1593 // was owned, and only reassign it to a Vreg if its lifetime interval
1594 // (including all splits) is strictly above that high water mark.
1595 if (!ivl
->var
->wide
) {
1596 for (size_t slot
= 0, n
= spill_slots
.size(); slot
< n
; ++slot
) {
1597 if (ivl
->leader()->start() >= spill_slots
[slot
]) {
1598 return assign_slot(slot
);
1602 for (size_t slot
= 0, n
= spill_slots
.size() - 1; slot
< n
; slot
+= 2) {
1603 if (ivl
->leader()->start() >= spill_slots
[slot
] &&
1604 ivl
->leader()->start() >= spill_slots
[slot
+ 1]) {
1605 return assign_slot(slot
);
1610 // Ran out of spill slots.
1611 ONTRACE(kRegAllocLevel
, dumpVariables(variables
, spill_info
.num_spills
));
1612 TRACE(1, "vxls-punt TooManySpills\n");
1613 TRACE_PUNT("LinearScan_TooManySpills");
1617 * Assign `r' to `ivl'.
1619 void Vxls::assignReg(Interval
* ivl
, PhysReg r
) {
1620 if (!ivl
->fixed() && ivl
->uses
.empty()) {
1621 ivl
->reg
= InvalidReg
;
1622 if (!ivl
->constant()) assignSpill(ivl
);
1625 active
.push_back(ivl
);
1630 * Spill `ivl' from its start until its first register use.
1632 * Spill `ivl' if there is no use; otherwise split the interval just before the
1633 * use, and enqueue the second part.
1635 void Vxls::spill(Interval
* ivl
) {
1636 unsigned first_use
= ivl
->firstUse();
1637 if (first_use
<= ivl
->end()) {
1638 auto split_pos
= nearestSplitBefore(first_use
);
1639 if (split_pos
<= ivl
->start()) {
1640 // This only can happen if we need more than the available registers
1641 // at a single position. It can happen in phijmp or callargs.
1642 TRACE(1, "vxls-punt RegSpill\n");
1643 TRACE_PUNT("RegSpill"); // cannot split before first_use
1645 pending
.push(ivl
->split(split_pos
));
1647 ivl
->reg
= InvalidReg
;
1648 if (!ivl
->constant()) assignSpill(ivl
);
1652 * Update the active and inactive lists for the start of `current'.
1654 void Vxls::update(Interval
* current
) {
1655 auto const pos
= current
->start();
1657 auto const free_spill_slot
= [this] (Interval
* ivl
) {
1658 assertx(!ivl
->next
);
1659 auto slot
= ivl
->var
->slot
;
1661 if (slot
!= kInvalidSpillSlot
) {
1662 if (ivl
->var
->wide
) {
1663 assertx(spill_slots
[slot
+ 1]);
1664 spill_slots
[slot
+ 1] = ivl
->end();
1666 assertx(spill_slots
[slot
]);
1667 spill_slots
[slot
] = ivl
->end();
1671 // Check for active/inactive intervals that have expired or which need their
1672 // polarity flipped.
1673 auto const update_list
= [&] (jit::vector
<Interval
*>& target
,
1674 jit::vector
<Interval
*>& other
,
1676 auto end
= target
.end();
1677 for (auto i
= target
.begin(); i
!= end
;) {
1679 if (pos
>= ivl
->end()) {
1681 if (!ivl
->next
) free_spill_slot(ivl
);
1682 } else if (is_active
? !ivl
->covers(pos
) : ivl
->covers(pos
)) {
1684 other
.push_back(ivl
);
1689 target
.erase(end
, target
.end());
1691 update_list(active
, inactive
, true);
1692 update_list(inactive
, active
, false);
1696 * Return the closest split position on or before `pos'.
1698 * The result might be exactly on an edge, or in-between instruction positions.
1700 unsigned Vxls::nearestSplitBefore(unsigned pos
) {
1701 auto b
= blockFor(ctx
, pos
);
1702 auto range
= ctx
.block_ranges
[b
];
1703 if (pos
== range
.start
) return pos
;
1704 return (pos
- 1) | 1;
1708 * Constrain the allowable registers for `ivl' by inspecting uses.
1710 * Returns the latest position for which `allow' (which we populate) is valid.
1711 * We use this return value to fill the `free_until' PosVec in allocate()
1712 * below. That data structure tracks the first position at which a register is
1713 * /unavailable/, so it would appear that constrain()'s return value is
1716 * In fact, it is not; we actually /need/ this position offsetting because of
1717 * our leniency towards having uses at an Interval's end() position. If we
1718 * fail to constrain on an end-position use, we must still split and spill.
1719 * (In contrast, if we intersect with another Interval on an end position use,
1720 * it's okay because SSA tells us that the conflict must be the other
1721 * Interval's def position, and a use and a def at the same position don't
1722 * actually conflict; see the fun ASCII diagram that adorns the definition of
1725 unsigned Vxls::constrain(Interval
* ivl
, RegSet
& allow
) {
1726 auto const any
= ctx
.abi
.unreserved() - ctx
.abi
.sf
; // Any but not flags.
1727 allow
= ctx
.abi
.unreserved();
1728 for (auto& u
: ivl
->uses
) {
1729 auto need
= u
.kind
== Constraint::Simd
? ctx
.abi
.simdUnreserved
:
1730 u
.kind
== Constraint::Gpr
? ctx
.abi
.gpUnreserved
:
1731 u
.kind
== Constraint::Sf
? ctx
.abi
.sf
:
1732 any
; // Any or CopySrc
1733 if ((allow
& need
).empty()) {
1734 // cannot satisfy constraints; must split before u.pos
1743 * Return the next intersection point between `current' and `other', or kMaxPos
1744 * if they never intersect.
1746 * We assume that `other', if it represents an SSA variable, is not live at the
1747 * start of `current'.
1749 * Note that if two intervals intersect, the first point of intersection will
1750 * always be the start of one of the intervals, because SSA ensures that a def
1751 * dominates all uses, and hence all live ranges as well.
1753 unsigned nextIntersect(const Interval
* current
, const Interval
* other
) {
1754 assertx(!current
->fixed());
1756 if (current
== current
->leader() &&
1757 other
== other
->leader() &&
1759 // Since `other' is an inactive Vreg interval, it cannot cover current's
1760 // start, and `current' cannot cover other's start, since `other' started
1761 // earlier. Therefore, SSA guarantees no intersection.
1764 if (current
->end() <= other
->start() ||
1765 other
->end() <= current
->start()) {
1766 // One ends before the other starts.
1769 // r1,e1 span all of current
1770 auto r1
= current
->ranges
.begin();
1771 auto e1
= current
->ranges
.end();
1772 // r2,e2 span the tail of other that might intersect current
1773 auto r2
= other
->ranges
.begin() + other
->findRange(current
->start());
1774 auto e2
= other
->ranges
.end();
1775 // search for the lowest position covered by current and other
1777 if (r1
->start
< r2
->start
) {
1778 if (r2
->start
< r1
->end
) return r2
->start
;
1779 if (++r1
== e1
) return kMaxPos
;
1781 if (r1
->start
< r2
->end
) return r1
->start
;
1782 if (++r2
== e2
) return kMaxPos
;
1788 void Vxls::allocate(Interval
* current
) {
1789 // Map from PhysReg until the first position at which it is /not/ available.
1790 PosVec free_until
; // 0 by default
1793 auto const conflict
= constrain(current
, allow
);
1795 // Mark regs that fit our constraints as free up until the point of conflict,
1796 // unless they're owned by active intervals---then mark them used.
1797 allow
.forEach([&](PhysReg r
) {
1798 free_until
[r
] = conflict
;
1800 for (auto ivl
: active
) {
1801 free_until
[ivl
->reg
] = 0;
1804 // Mark each reg assigned to an inactive interval as only free until the
1805 // first position at which `current' intersects that interval.
1806 for (auto ivl
: inactive
) {
1808 if (free_until
[r
] == 0) continue;
1809 auto until
= nextIntersect(current
, ivl
);
1810 free_until
[r
] = std::min(until
, free_until
[r
]);
1813 if (current
->ranges
.size() > 1) {
1814 auto const b
= blockFor(ctx
, current
->start());
1815 auto const blk_range
= ctx
.block_ranges
[b
];
1816 if (blk_range
.end
> current
->ranges
[0].end
) {
1817 // We're assigning a register to an interval with
1818 // multiple ranges, but the vreg isn't live out
1819 // of the first range. This means there's no
1820 // connection between this range and any subsequent
1821 // one, so we can safely break the interval
1822 // after the first range without making things worse.
1823 // On the other hand, it can make things better, by
1824 // eg not assigning a constant to a register in an
1825 // unlikely exit block, and then holding it in a callee save
1826 // reg across lots of unrelated code until its used
1827 // again in another unlikely exit block.
1828 auto second
= current
->split(blk_range
.end
, false);
1829 pending
.push(second
);
1830 } else if (current
->constant() &&
1831 current
->uses
.size() &&
1832 current
->uses
[0].pos
>= blk_range
.end
) {
1833 // we probably don't want to load a constant into a register
1834 // at the start of a block where its not used.
1835 return spill(current
);
1839 // Choose a register to allocate to `current', either via a hint or by
1840 // various heuristics.
1841 auto const choose_reg
= [&] {
1842 auto const hint
= chooseHint(variables
, current
,
1843 hint_info
, free_until
, allow
);
1844 if (hint
!= InvalidReg
&&
1845 free_until
[hint
] >= current
->end()) {
1849 auto ret
= InvalidReg
;
1850 for (auto const r
: free_until
) {
1851 ret
= choose_closest_after(current
->end(), free_until
, ret
, r
);
1856 auto const r
= choose_reg();
1857 auto const pos
= free_until
[r
];
1858 if (pos
>= current
->end()) {
1859 return assignReg(current
, r
);
1862 if (pos
> current
->start()) {
1863 // `r' is free for the first part of current.
1864 auto const prev_use
= current
->lastUseBefore(pos
);
1866 DEBUG_ONLY
auto min_split
= std::max(prev_use
, current
->start() + 1);
1867 assertx(min_split
<= pos
);
1869 auto split_pos
= nearestSplitBefore(pos
);
1870 if (split_pos
> current
->start()) {
1871 if (prev_use
&& prev_use
< split_pos
) {
1872 // If there are uses in previous blocks, but no uses between the start
1873 // of the block containing `split_pos' and `split_pos' itself, we
1874 // should split earlier; otherwise we'll need to insert moves/loads on
1875 // the edge(s) into this block, which clearly can't be used since we're
1876 // spilling before the first use. Might as well spill on a block
1877 // boundary, as early as possible.
1878 auto prev_range_idx
= current
->findRange(prev_use
);
1879 auto prev_range
= ¤t
->ranges
[prev_range_idx
];
1880 if (prev_range
->start
<= prev_use
&& prev_range
->end
< split_pos
) {
1883 if (prev_range
->start
> prev_use
&& prev_range
->start
< split_pos
) {
1884 split_pos
= prev_range
->start
;
1888 // Split `current'. We keep uses at the end of the first split because
1889 // we know that `r' is free up to /and including/ that position.
1890 auto second
= current
->split(split_pos
, true /* keep_uses */);
1891 pending
.push(second
);
1893 // Try to find a register again. Since `current' is shorter, we might
1894 // have a different hint, or a different heuristically-determined
1896 auto const r
= choose_reg();
1897 assertx(free_until
[r
] >= current
->end());
1898 return assignReg(current
, r
);
1902 // Must spill `current' or another victim.
1903 allocBlocked(current
);
1907 * When all registers are in use, find a good interval (possibly `current') to
1910 * When an interval is split and the second part is spilled, possibly split the
1911 * second part again before the next use-pos that requires a register, and
1912 * enqueue the third part.
1914 void Vxls::allocBlocked(Interval
* current
) {
1915 auto const cur_start
= current
->start();
1918 auto const conflict
= constrain(current
, allow
); // repeated from allocate
1920 // Track the positions (a) at which each PhysReg is next used by any lifetime
1921 // interval to which it's assigned (`used'), and (b) at which each PhysReg is
1922 // next assigned to a value whose lifetime intersects `current' (`blocked').
1923 PosVec used
, blocked
;
1924 allow
.forEach([&](PhysReg r
) { used
[r
] = blocked
[r
] = conflict
; });
1926 // compute next use of active registers, so we can pick the furthest one
1927 for (auto ivl
: active
) {
1929 blocked
[ivl
->reg
] = used
[ivl
->reg
] = 0;
1931 auto use_pos
= ivl
->firstUseAfter(cur_start
);
1932 used
[ivl
->reg
] = std::min(use_pos
, used
[ivl
->reg
]);
1936 // compute next intersection/use of inactive regs to find what's free longest
1937 for (auto ivl
: inactive
) {
1938 auto const r
= ivl
->reg
;
1939 if (blocked
[r
] == 0) continue;
1941 auto intersect_pos
= nextIntersect(current
, ivl
);
1942 if (intersect_pos
== kMaxPos
) continue;
1945 blocked
[r
] = std::min(intersect_pos
, blocked
[r
]);
1946 used
[r
] = std::min(blocked
[r
], used
[r
]);
1948 auto use_pos
= ivl
->firstUseAfter(cur_start
);
1949 used
[r
] = std::min(use_pos
, used
[r
]);
1953 // Choose the best victim register(s) to spill---the one with first-use
1954 // after, but closest to, the lifetime of `current'; or the one with farthest
1955 // first-use if no such register exists.
1956 auto r
= find_closest_after(current
->end(), used
);
1958 // If all other registers are used by their owning intervals before the first
1959 // register-use of `current', then we have to spill `current'.
1960 if (used
[r
] < current
->firstUse()) {
1961 return spill(current
);
1964 auto const block_pos
= blocked
[r
];
1965 if (block_pos
< current
->end()) {
1966 // If /every/ usable register is assigned to a lifetime interval which
1967 // intersects with `current', we have to split current before that point.
1968 auto prev_use
= current
->lastUseBefore(block_pos
);
1970 DEBUG_ONLY
auto min_split
= std::max(prev_use
, cur_start
+ 1);
1971 auto max_split
= block_pos
;
1972 assertx(cur_start
< min_split
&& min_split
<= max_split
);
1974 auto split_pos
= nearestSplitBefore(max_split
);
1975 if (split_pos
> current
->start()) {
1976 auto second
= current
->split(split_pos
, true /* keep_uses */);
1977 pending
.push(second
);
1980 spillOthers(current
, r
);
1981 assignReg(current
, r
);
1985 * Split and spill other intervals that conflict with `current' for register r,
1986 * at current->start().
1988 * If necessary, split the victims again before their first use position that
1989 * requires a register.
1991 void Vxls::spillOthers(Interval
* current
, PhysReg r
) {
1992 auto const cur_start
= current
->start();
1994 // Split `ivl' at `cur_start' and spill the second part. If `cur_start' is
1995 // too close to ivl->start(), spill all of `ivl' instead.
1996 auto const spill_after
= [&] (Interval
* ivl
) {
1997 auto const split_pos
= nearestSplitBefore(cur_start
);
1998 auto const tail
= split_pos
<= ivl
->start() ? ivl
: ivl
->split(split_pos
);
2002 // Split and spill other active intervals after `cur_start'.
2003 auto end
= active
.end();
2004 for (auto i
= active
.begin(); i
!= end
;) {
2006 if (other
->fixed() || r
!= other
->reg
) {
2012 active
.erase(end
, active
.end());
2014 // Split and spill any inactive intervals after `cur_start' if they intersect
2016 end
= inactive
.end();
2017 for (auto i
= inactive
.begin(); i
!= end
;) {
2019 if (other
->fixed() || r
!= other
->reg
) {
2022 auto intersect
= nextIntersect(current
, other
);
2023 if (intersect
>= current
->end()) {
2029 inactive
.erase(end
, inactive
.end());
2032 SpillInfo
assignRegisters(const VxlsContext
& ctx
,
2033 const jit::vector
<Variable
*>& variables
,
2034 const HintInfo
& hint_info
) {
2035 return Vxls(ctx
, variables
, hint_info
).go();
2038 ///////////////////////////////////////////////////////////////////////////////
2039 // Lifetime continuity resolution.
2042 * A pair of source block number and successor index, used to identify an
2045 using EdgeKey
= std::pair
<Vlabel
,unsigned>;
2048 size_t operator()(EdgeKey k
) const {
2049 return size_t(k
.first
) ^ k
.second
;
2054 * Copies that are required at a given position or edge.
2056 * The keys into the PhysReg::Map are the dests; the Interval*'s are the
2057 * sources (nullptr if no copy is needed).
2059 using CopyPlan
= PhysReg::Map
<Interval
*>;
2062 * Copy and spill points for resolving split lifetime intervals.
2064 * After register allocation, some lifetime intervals may have been split, and
2065 * their Vregs assigned to different physical registers or spill locations. We
2066 * use this struct to track where we need to add moves to maintain continuity.
2067 * (We also use it to resolve phis.)
2069 struct ResolutionPlan
{
2070 // Where to insert spills.
2071 jit::hash_map
<unsigned,CopyPlan
> spills
;
2072 // Where to insert reg-reg moves, or spill and constant loads, between
2073 // instructions (or in place of copy{} and friends).
2074 jit::hash_map
<unsigned,CopyPlan
> copies
;
2076 // Copies and loads on edges (between blocks).
2077 jit::hash_map
<EdgeKey
,CopyPlan
,EdgeHasher
> edge_copies
;
2080 template<class CopyPlanT
>
2081 using void_req
= typename
std::enable_if
<
2082 std::is_same
<CopyPlanT
, CopyPlan
>::value
||
2083 std::is_same
<CopyPlanT
, const CopyPlan
>::value
2087 * Iterators for reg-reg copies and for {const,spill}-reg loads.
2089 template<class CopyPlanT
, class F
>
2090 void_req
<CopyPlanT
> for_each_copy(CopyPlanT
& plan
, F f
) {
2091 for (auto dst
: plan
) {
2092 auto& ivl
= plan
[dst
];
2093 if (!ivl
|| !ivl
->live()) continue;
2097 template<class CopyPlanT
, class F
>
2098 void_req
<CopyPlanT
> for_each_load(CopyPlanT
& plan
, F f
) {
2099 for (auto dst
: plan
) {
2100 auto& ivl
= plan
[dst
];
2101 if (!ivl
|| ivl
->live()) continue;
2102 assertx(ivl
->constant() || ivl
->spilled());
2108 * Insert a spill after the def-position in `ivl'.
2110 * There's only one such position, because of SSA.
2112 void insertSpill(const VxlsContext
& ctx
,
2113 ResolutionPlan
& resolution
, Interval
* ivl
) {
2114 auto DEBUG_ONLY checkPos
= [&](unsigned pos
) {
2115 assertx(pos
% 2 == 1);
2116 DEBUG_ONLY
auto const b
= blockFor(ctx
, pos
);
2117 DEBUG_ONLY
auto const& range
= ctx
.block_ranges
[b
];
2118 assertx(pos
- 1 >= range
.start
&& pos
+ 1 < range
.end
);
2121 auto pos
= ivl
->var
->def_pos
+ 1;
2122 assertx(checkPos(pos
));
2123 resolution
.spills
[pos
][ivl
->reg
] = ivl
; // store ivl->reg => ivl->slot
2127 * Insert spills and copies that connect subintervals that were split
2128 * between instructions.
2130 void resolveSplits(const VxlsContext
& ctx
,
2131 const jit::vector
<Variable
*>& variables
,
2132 ResolutionPlan
& resolution
) {
2133 for (auto var
: variables
) {
2135 auto i1
= var
->ivl();
2136 if (var
->slot
>= 0) insertSpill(ctx
, resolution
, i1
);
2138 for (auto i2
= i1
->next
; i2
; i1
= i2
, i2
= i2
->next
) {
2139 auto const pos
= i2
->start();
2140 if (i1
->end() != pos
) continue; // spans lifetime hole
2141 if (i2
->reg
== InvalidReg
) continue; // no load necessary
2142 if (i2
->reg
== i1
->reg
) continue; // no copy necessary
2144 auto const b
= blockFor(ctx
, pos
);
2145 auto const range
= ctx
.block_ranges
[b
];
2148 // even position requiring a copy must be on edge
2149 assertx(range
.start
== pos
);
2152 assertx(pos
> range
.start
); // implicit label position per block
2153 if (pos
+ 1 == range
.end
) continue; // copy belongs on successor edge
2155 assertx(!resolution
.copies
[pos
][i2
->reg
]);
2156 resolution
.copies
[pos
][i2
->reg
] = i1
;
2163 * Lower copyargs{} and copy{} into moveplans at the same position.
2165 void lowerCopies(Vunit
& unit
, const VxlsContext
& ctx
,
2166 const jit::vector
<Variable
*>& variables
,
2167 ResolutionPlan
& resolution
) {
2168 // Add a lifetime-resolving copy from `s' to `d'---without touching the
2169 // instruction stream.
2170 auto const lower
= [&] (unsigned pos
, Vreg s
, Vreg d
) {
2171 auto v1
= variables
[s
];
2172 auto v2
= variables
[d
];
2174 assertx(v2
->fixed() || v2
->def_pos
== pos
); // ssa
2176 auto i1
= v1
->fixed() ? v1
->ivl() : v1
->ivlAtUse(pos
);
2177 auto i2
= v2
->ivl();
2179 if (i2
->reg
!= i1
->reg
) {
2180 assertx(!resolution
.copies
[pos
][i2
->reg
]);
2181 resolution
.copies
[pos
][i2
->reg
] = i1
;
2185 for (auto b
: ctx
.blocks
) {
2186 auto pos
= ctx
.block_ranges
[b
].start
;
2188 for (auto& inst
: unit
.blocks
[b
].code
) {
2189 if (inst
.op
== Vinstr::copyargs
) {
2190 auto const& uses
= unit
.tuples
[inst
.copyargs_
.s
];
2191 auto const& defs
= unit
.tuples
[inst
.copyargs_
.d
];
2192 for (unsigned i
= 0, n
= uses
.size(); i
< n
; ++i
) {
2193 lower(pos
, uses
[i
], defs
[i
]);
2196 } else if (inst
.op
== Vinstr::copy2
) {
2197 lower(pos
, inst
.copy2_
.s0
, inst
.copy2_
.d0
);
2198 lower(pos
, inst
.copy2_
.s1
, inst
.copy2_
.d1
);
2200 } else if (inst
.op
== Vinstr::copy
) {
2201 lower(pos
, inst
.copy_
.s
, inst
.copy_
.d
);
2210 * Search for the phidef in block `b', then return its dest tuple.
2212 Vtuple
findPhiDefs(const Vunit
& unit
, Vlabel b
) {
2213 assertx(!unit
.blocks
[b
].code
.empty() &&
2214 unit
.blocks
[b
].code
.front().op
== Vinstr::phidef
);
2215 return unit
.blocks
[b
].code
.front().phidef_
.defs
;
2219 * Register copy resolutions for livein sets and phis.
2221 void resolveEdges(Vunit
& unit
, const VxlsContext
& ctx
,
2222 const jit::vector
<Variable
*>& variables
,
2223 ResolutionPlan
& resolution
) {
2224 auto const addPhiEdgeCopies
= [&] (Vlabel block
, Vlabel target
,
2225 uint32_t targetIndex
,
2226 const VregList
& uses
) {
2227 auto const p1
= ctx
.block_ranges
[block
].end
- 2;
2228 auto const& defs
= unit
.tuples
[findPhiDefs(unit
, target
)];
2230 for (unsigned i
= 0, n
= uses
.size(); i
< n
; ++i
) {
2231 auto v1
= variables
[uses
[i
]];
2232 auto v2
= variables
[defs
[i
]];
2235 auto i1
= v1
->fixed() ? v1
->ivl() : v1
->ivlAtUse(p1
);
2236 auto i2
= v2
->ivl();
2238 if (i2
->reg
!= i1
->reg
) {
2239 EdgeKey edge
{ block
, targetIndex
};
2240 assertx(!resolution
.edge_copies
[edge
][i2
->reg
]);
2241 resolution
.edge_copies
[edge
][i2
->reg
] = i1
;
2246 for (auto b1
: ctx
.blocks
) {
2247 auto const p1
= ctx
.block_ranges
[b1
].end
- 2;
2248 auto& block1
= unit
.blocks
[b1
];
2249 auto& inst1
= block1
.code
.back();
2251 // Add resolutions for phis.
2252 if (inst1
.op
== Vinstr::phijmp
) {
2253 auto const& phijmp
= inst1
.phijmp_
;
2254 auto const target
= phijmp
.target
;
2255 auto const& uses
= unit
.tuples
[phijmp
.uses
];
2256 addPhiEdgeCopies(b1
, target
, 0, uses
);
2257 inst1
= jmp
{target
};
2258 } else if (inst1
.op
== Vinstr::phijcc
) {
2259 auto const& phijcc
= inst1
.phijcc_
;
2260 auto const& targets
= phijcc
.targets
;
2261 auto const& uses
= unit
.tuples
[phijcc
.uses
];
2262 addPhiEdgeCopies(b1
, targets
[0], 0, uses
);
2263 addPhiEdgeCopies(b1
, targets
[1], 1, uses
);
2264 inst1
= jcc
{phijcc
.cc
, phijcc
.sf
, {targets
[0], targets
[1]}};
2267 auto const succlist
= succs(block1
);
2269 // Add resolutions for livein sets.
2270 for (unsigned i
= 0, n
= succlist
.size(); i
< n
; i
++) {
2271 auto const b2
= succlist
[i
];
2272 auto const p2
= ctx
.block_ranges
[b2
].start
;
2274 forEach(ctx
.livein
[b2
], [&] (Vreg vr
) {
2275 auto var
= variables
[vr
];
2276 if (var
->fixed()) return;
2277 Interval
* i1
= nullptr;
2278 Interval
* i2
= nullptr;
2280 for (auto ivl
= var
->ivl(); ivl
&& !(i1
&& i2
); ivl
= ivl
->next
) {
2281 if (ivl
->covers(p1
)) i1
= ivl
;
2282 if (ivl
->covers(p2
)) i2
= ivl
;
2285 // i2 can be unallocated if the tmp is a constant or is spilled.
2286 if (i2
->reg
!= InvalidReg
&& i2
->reg
!= i1
->reg
) {
2287 assertx((resolution
.edge_copies
[{b1
,i
}][i2
->reg
] == nullptr));
2288 resolution
.edge_copies
[{b1
,i
}][i2
->reg
] = i1
;
2296 * Walk through the variables list and account for all points where copies or
2297 * spills need to be made.
2299 ResolutionPlan
resolveLifetimes(Vunit
& unit
, const VxlsContext
& ctx
,
2300 const jit::vector
<Variable
*>& variables
) {
2301 ResolutionPlan resolution
;
2303 resolveSplits(ctx
, variables
, resolution
);
2304 lowerCopies(unit
, ctx
, variables
, resolution
);
2305 resolveEdges(unit
, ctx
, variables
, resolution
);
2310 ///////////////////////////////////////////////////////////////////////////////
2311 // Operand renaming.
2314 * Visitor class for renaming registers.
2317 Renamer(const jit::vector
<Variable
*>& variables
, unsigned pos
)
2318 : variables(variables
)
2323 void imm(const T
& /*r*/) {}
2324 template<class R
> void def(R
& r
) { rename(r
); }
2325 template<class D
, class H
> void defHint(D
& dst
, H
) { rename(dst
); }
2326 template<class R
> void use(R
& r
) { rename(r
); }
2327 template<class S
, class H
> void useHint(S
& src
, H
) { rename(src
); }
2328 template<class R
> void across(R
& r
) { rename(r
); }
2329 void across(Vptr
& m
) {
2330 if (m
.base
.isValid()) rename(m
.base
);
2331 if (m
.index
.isValid()) rename(m
.index
);
2335 void use(RegSet
/*r*/) {}
2337 if (m
.base
.isValid()) rename(m
.base
);
2338 if (m
.index
.isValid()) rename(m
.index
);
2340 void use(VcallArgsId
) { always_assert(false && "vcall unsupported in vxls"); }
2343 void rename(Vreg8
& r
) { r
= lookup(r
, Constraint::Gpr
); }
2344 void rename(Vreg16
& r
) { r
= lookup(r
, Constraint::Gpr
); }
2345 void rename(Vreg32
& r
) { r
= lookup(r
, Constraint::Gpr
); }
2346 void rename(Vreg64
& r
) { r
= lookup(r
, Constraint::Gpr
); }
2347 void rename(VregDbl
& r
) { r
= lookup(r
, Constraint::Simd
); }
2348 void rename(Vreg128
& r
) { r
= lookup(r
, Constraint::Simd
); }
2349 void rename(VregSF
& r
) { r
= RegSF
{0}; }
2350 void rename(Vreg
& r
) { r
= lookup(r
, Constraint::Any
); }
2351 void rename(Vtuple
/*t*/) { /* phijmp/phijcc+phidef handled by resolveEdges */
2354 PhysReg
lookup(Vreg vreg
, Constraint kind
) {
2355 auto var
= variables
[vreg
];
2356 if (!var
|| vreg
.isPhys()) return vreg
;
2357 PhysReg reg
= var
->ivlAtUse(pos
)->reg
;
2358 assertx((kind
== Constraint::Gpr
&& reg
.isGP()) ||
2359 (kind
== Constraint::Simd
&& reg
.isSIMD()) ||
2360 (kind
== Constraint::Sf
&& reg
.isSF()) ||
2361 (kind
== Constraint::Any
&& reg
!= InvalidReg
));
2365 const jit::vector
<Variable
*>& variables
;
2370 * Visit every virtual-register typed operand in `unit', and rename it to its
2371 * assigned physical register.
2373 void renameOperands(Vunit
& unit
, const VxlsContext
& ctx
,
2374 const jit::vector
<Variable
*>& variables
) {
2375 for (auto b
: ctx
.blocks
) {
2376 auto pos
= ctx
.block_ranges
[b
].start
;
2377 for (auto& inst
: unit
.blocks
[b
].code
) {
2378 Renamer
renamer(variables
, pos
);
2379 visitOperands(inst
, renamer
);
2385 printVariables("after renaming operands", unit
, ctx
, variables
);
2389 ///////////////////////////////////////////////////////////////////////////////
2390 // Flags liveness optimization.
2392 template <typename Inst
, typename F
>
2393 void optimize(Vunit
& /*unit*/, Inst
& /*inst*/, Vlabel
/*b*/, size_t /*i*/,
2396 template<typename xor_op
, typename ldimm_op
, typename F
>
2397 void optimize_ldimm(Vunit
& unit
, ldimm_op
& ldimm
,
2398 Vlabel b
, size_t i
, F sf_live
) {
2399 if (!sf_live() && ldimm
.s
.q() == 0 && ldimm
.d
.isGP()) {
2400 decltype(xor_op::d
) d
= ldimm
.d
;
2401 unit
.blocks
[b
].code
[i
] = xor_op
{d
, d
, d
, RegSF
{0}};
2405 template<typename F
>
2406 void optimize(Vunit
& unit
, ldimmb
& inst
, Vlabel b
, size_t i
, F sf_live
) {
2407 optimize_ldimm
<xorb
>(unit
, inst
, b
, i
, sf_live
);
2409 template<typename F
>
2410 void optimize(Vunit
& unit
, ldimml
& inst
, Vlabel b
, size_t i
, F sf_live
) {
2411 optimize_ldimm
<xorl
>(unit
, inst
, b
, i
, sf_live
);
2413 template<typename F
>
2414 void optimize(Vunit
& unit
, ldimmq
& inst
, Vlabel b
, size_t i
, F sf_live
) {
2415 optimize_ldimm
<xorq
>(unit
, inst
, b
, i
, sf_live
);
2418 template<typename F
>
2419 void optimize(Vunit
& unit
, lea
& inst
, Vlabel b
, size_t i
, F sf_live
) {
2420 if (!sf_live() && inst
.d
== rsp()) {
2421 assertx(inst
.s
.base
== inst
.d
&& !inst
.s
.index
.isValid());
2422 unit
.blocks
[b
].code
[i
] = addqi
{inst
.s
.disp
, inst
.d
, inst
.d
, RegSF
{0}};
2427 * Perform optimizations on instructions in `unit' at which no flags registers
2430 void optimizeSFLiveness(Vunit
& unit
, const VxlsContext
& ctx
,
2431 const jit::vector
<Variable
*>& variables
) {
2432 // Currently, all our optimizations are only relevant on x64.
2433 if (arch() != Arch::X64
) return;
2435 // sf_var is the physical SF register, computed from the union of VregSF
2436 // registers by computeLiveness() and buildIntervals().
2437 auto const sf_var
= variables
[VregSF(RegSF
{0})];
2438 auto const sf_ivl
= sf_var
? sf_var
->ivl() : nullptr;
2440 for (auto const b
: ctx
.blocks
) {
2441 auto& code
= unit
.blocks
[b
].code
;
2443 for (size_t i
= 0; i
< code
.size(); ++i
) {
2444 auto& inst
= code
[i
];
2447 * An instruction that defines sf, where sf is not subsequently
2448 * read, will have a live range going from inst.pos to inst.pos
2449 * + 1. Its ok to replace such an instruction with one that
2450 * doesn't modify the flags (or that modifies them in different
2451 * ways), so check for a range that covers pos-1 or pos+1 to
2452 * determine true liveness.
2454 auto const sf_live
= [&] {
2456 !sf_ivl
->ranges
.empty() &&
2457 ((inst
.pos
&& sf_ivl
->covers(inst
.pos
- 1)) ||
2458 sf_ivl
->covers(inst
.pos
+ 1));
2462 #define O(name, ...) \
2463 case Vinstr::name: \
2464 optimize(unit, inst.name##_, b, i, sf_live); \
2474 ///////////////////////////////////////////////////////////////////////////////
2478 * Insert stores for `spills' (with spill space starting at `slots') into
2479 * `code' before code[j], corresponding to XLS logical position `pos'.
2481 * Updates `j' to refer to the same instruction after the code insertions.
2483 void insertSpillsAt(jit::vector
<Vinstr
>& code
, unsigned& j
,
2484 const CopyPlan
& spills
, MemoryRef slots
,
2486 jit::vector
<Vinstr
> stores
;
2488 for (auto src
: spills
) {
2489 auto ivl
= spills
[src
];
2492 auto slot
= ivl
->var
->slot
;
2493 assertx(slot
>= 0 && src
== ivl
->reg
);
2494 MemoryRef ptr
{slots
.r
+ slotOffset(slot
)};
2496 if (!ivl
->var
->wide
) {
2497 always_assert_flog(!src
.isSF(), "Tried to spill %flags");
2498 stores
.emplace_back(store
{src
, ptr
});
2500 assertx(src
.isSIMD());
2501 stores
.emplace_back(storeups
{src
, ptr
});
2504 insertCodeAt(code
, j
, stores
, pos
);
2508 * Insert reg-reg moves for `copies' into `code' before code[j], corresponding
2509 * to XLS logical position `pos'.
2511 * Updates `j' to refer to the same instruction after the code insertions.
2513 void insertCopiesAt(const VxlsContext
& ctx
,
2514 jit::vector
<Vinstr
>& code
, unsigned& j
,
2515 const CopyPlan
& plan
, unsigned pos
) {
2517 jit::vector
<Vinstr
> copies
;
2519 for_each_copy(plan
, [&] (PhysReg dst
, const Interval
* ivl
) {
2520 moves
[dst
] = ivl
->reg
;
2522 auto const hows
= doRegMoves(moves
, ctx
.tmp
);
2524 for (auto const& how
: hows
) {
2525 if (how
.m_kind
== MoveInfo::Kind::Xchg
) {
2526 copies
.emplace_back(copy2
{how
.m_src
, how
.m_dst
, how
.m_dst
, how
.m_src
});
2528 copies
.emplace_back(copy
{how
.m_src
, how
.m_dst
});
2531 insertCodeAt(code
, j
, copies
, pos
);
2535 * Insert constant loads or loads from spill space---with spill space starting
2536 * at `slots'---for `loads' into `code' before code[j], corresponding to XLS
2537 * logical position `pos'.
2539 * Updates `j' to refer to the same instruction after the code insertions.
2541 void insertLoadsAt(jit::vector
<Vinstr
>& code
, unsigned& j
,
2542 const CopyPlan
& plan
, MemoryRef slots
, unsigned pos
) {
2543 jit::vector
<Vinstr
> loads
;
2545 for_each_load(plan
, [&] (PhysReg dst
, const Interval
* ivl
) {
2546 if (ivl
->constant()) {
2547 if (ivl
->var
->val
.isUndef
) return;
2548 loads
.push_back([&]() -> Vinstr
{
2549 switch (ivl
->var
->val
.kind
) {
2551 case Vconst::Double
:
2552 return ldimmq
{uint64_t(ivl
->var
->val
.val
), dst
};
2554 return ldimml
{int32_t(ivl
->var
->val
.val
), dst
};
2556 return ldimmb
{uint8_t(ivl
->var
->val
.val
), dst
};
2560 } else if (ivl
->spilled()) {
2561 MemoryRef ptr
{slots
.r
+ slotOffset(ivl
->var
->slot
)};
2562 if (!ivl
->var
->wide
) {
2563 loads
.emplace_back(load
{ptr
, dst
});
2565 assertx(dst
.isSIMD());
2566 loads
.emplace_back(loadups
{ptr
, dst
});
2570 insertCodeAt(code
, j
, loads
, pos
);
2574 * Mutate the Vinstr stream by inserting copies.
2576 * This destroys the position numbering, so we can't use interval positions
2579 void insertCopies(Vunit
& unit
, const VxlsContext
& ctx
,
2580 const jit::vector
<Variable
*>& /*variables*/,
2581 const ResolutionPlan
& resolution
) {
2582 // Insert copies inside blocks.
2583 for (auto const b
: ctx
.blocks
) {
2584 auto& block
= unit
.blocks
[b
];
2585 auto& code
= block
.code
;
2586 auto pos
= ctx
.block_ranges
[b
].start
;
2587 auto offset
= ctx
.spill_offsets
[b
];
2589 for (unsigned j
= 0; j
< code
.size(); j
++, pos
+= 2) {
2590 MemoryRef slots
= ctx
.sp
[offset
];
2592 // Spills, reg-reg moves, and loads of constant values or spill space all
2593 // occur between instruction. Insert them in order.
2594 auto s
= resolution
.spills
.find(pos
- 1);
2595 if (s
!= resolution
.spills
.end()) {
2596 insertSpillsAt(code
, j
, s
->second
, slots
, pos
- 1);
2598 auto c
= resolution
.copies
.find(pos
- 1);
2599 if (c
!= resolution
.copies
.end()) {
2600 insertCopiesAt(ctx
, code
, j
, c
->second
, pos
- 1);
2601 insertLoadsAt(code
, j
, c
->second
, slots
, pos
- 1);
2604 // Insert copies and loads at instructions.
2605 c
= resolution
.copies
.find(pos
);
2606 if (c
!= resolution
.copies
.end()) {
2607 insertCopiesAt(ctx
, code
, j
, c
->second
, pos
);
2608 insertLoadsAt(code
, j
, c
->second
, slots
, pos
);
2610 assertx(resolution
.spills
.count(pos
) == 0);
2612 offset
-= spEffect(unit
, code
[j
], ctx
.sp
);
2616 // insert copies on edges
2617 for (auto const b
: ctx
.blocks
) {
2618 auto& block
= unit
.blocks
[b
];
2619 auto succlist
= succs(block
);
2621 if (succlist
.size() == 1) {
2622 // copies will go at end of b
2623 auto const c
= resolution
.edge_copies
.find({b
, 0});
2624 if (c
!= resolution
.edge_copies
.end()) {
2625 auto& code
= block
.code
;
2626 unsigned j
= code
.size() - 1;
2627 auto const pos
= ctx
.block_ranges
[b
].end
- 1;
2628 auto const slots
= ctx
.sp
[ctx
.spill_offsets
[succlist
[0]]];
2630 // We interleave copies and loads in `edge_copies', so here and below
2631 // we process them separately (and pass `true' to avoid asserting).
2632 insertCopiesAt(ctx
, code
, j
, c
->second
, pos
);
2633 insertLoadsAt(code
, j
, c
->second
, slots
, pos
);
2636 // copies will go at start of successor
2637 for (int i
= 0, n
= succlist
.size(); i
< n
; i
++) {
2638 auto s
= succlist
[i
];
2639 auto const c
= resolution
.edge_copies
.find({b
, i
});
2640 if (c
!= resolution
.edge_copies
.end()) {
2641 auto& code
= unit
.blocks
[s
].code
;
2643 auto const pos
= ctx
.block_ranges
[s
].start
;
2644 auto const slots
= ctx
.sp
[ctx
.spill_offsets
[s
]];
2646 insertCopiesAt(ctx
, code
, j
, c
->second
, pos
);
2647 insertLoadsAt(code
, j
, c
->second
, slots
, pos
);
2654 ///////////////////////////////////////////////////////////////////////////////
2657 * Peephole cleanup pass.
2659 * Remove no-op copy sequences before allocating spill space, since doing so
2660 * might modify the CFG.
2662 void peephole(Vunit
& unit
, const VxlsContext
& ctx
) {
2663 // Whether a Vinstr is a register swap.
2664 auto const match_xchg
= [] (Vinstr
& i
, Vreg
& r0
, Vreg
& r1
) {
2665 if (i
.op
!= Vinstr::copy2
) return false;
2668 return r0
== i
.copy2_
.d1
&& r1
== i
.copy2_
.d0
;
2671 for (auto b
: ctx
.blocks
) {
2672 auto& code
= unit
.blocks
[b
].code
;
2673 for (int i
= 0, n
= code
.size(); i
+ 1 < n
; i
++) {
2674 Vreg r0
, r1
, r2
, r3
;
2675 if (match_xchg(code
[i
], r0
, r1
) &&
2676 match_xchg(code
[i
+ 1], r2
, r3
) &&
2677 ((r0
== r2
&& r1
== r3
) || (r0
== r3
&& r1
== r2
))) {
2678 // matched xchg+xchg that cancel each other
2680 code
[i
+ 1] = nop
{};
2684 auto end
= std::remove_if(code
.begin(), code
.end(), [&](Vinstr
& inst
) {
2685 return is_trivial_nop(inst
) ||
2686 inst
.op
== Vinstr::phidef
; // we lowered it
2688 code
.erase(end
, code
.end());
2692 ///////////////////////////////////////////////////////////////////////////////
2693 // Spill space allocation.
2696 * SpillState is used by allocateSpillSpace() to decide where to allocate/free
2697 * spill space. It represents the state of the spill space as a whole and is
2698 * computed before each individual instruction.
2700 * Order is important in this enum: it's only legal to transition to states
2701 * with higher values, and states are merged using std::max().
2703 enum SpillState
: uint8_t {
2704 // State is uninitialized. All block in-states start here.
2707 // Spill space is not currently needed; it's safe to allocate spill space
2708 // after this point.
2711 // Spill space is needed and must be allocated at or before this point.
2716 * SpillStates is used to hold in/out state for each block after the analysis
2717 * pass of allocateSpillSpace().
2719 struct SpillStates
{
2725 * Returns true if spill space must be allocated before execution of this
2726 * instruction. In order to keep things simple, we return true for any
2727 * instruction that reads or writes sp.
2729 bool instrNeedsSpill(const Vunit
& unit
, const Vinstr
& inst
, PhysReg sp
) {
2730 // Implicit sp input/output.
2731 if (spEffect(unit
, inst
, sp
, false) != 0) return true;
2733 auto foundSp
= false;
2734 visitDefs(unit
, inst
, [&] (Vreg r
) { if (r
== sp
) foundSp
= true; });
2735 if (foundSp
) return true;
2737 visitUses(unit
, inst
, [&] (Vreg r
) { if (r
== sp
) foundSp
= true; });
2742 * Return the required SpillState coming into inst. prevState must not be
2745 SpillState
instrInState(const Vunit
& unit
, const Vinstr
& inst
,
2746 SpillState prevState
, PhysReg sp
) {
2747 switch (prevState
) {
2751 if (instrNeedsSpill(unit
, inst
, sp
)) return NeedSpill
;
2758 always_assert(false);
2762 * processSpillExits() can insert jcc{} instructions in the middle of a
2763 * block. fixupBlockJumps() breaks the given block after any jccs, making the
2764 * unit valid again. This is done as a separate pass from the work in
2765 * processSpillExits() to reduce complexity.
2767 void fixupBlockJumps(Vunit
& unit
, Vlabel label
) {
2768 jit::vector
<Vinstr
> origCode
;
2769 origCode
.swap(unit
.blocks
[label
].code
);
2770 auto insertBlock
= [&]() -> Vblock
& { return unit
.blocks
[label
]; };
2772 for (auto const& inst
: origCode
) {
2773 insertBlock().code
.emplace_back(inst
);
2775 if (inst
.op
== Vinstr::jcc
&& !inst
.jcc_
.targets
[0].isValid()) {
2776 auto newLabel
= unit
.makeBlock(insertBlock().area_idx
,
2777 insertBlock().weight
);
2778 insertBlock().code
.back().jcc_
.targets
[0] = newLabel
;
2785 * Walk through the given block, undoing any fallbackcc/bindjcc optimizations
2786 * that happen in an area where spill space is live. The latter transformation
2787 * is necessary to make the hidden edge out of the fallbackcc{} explicit, so we
2788 * can insert an lea on it to free spill space. It takes something like this:
2791 * cmpbim 0, %rbp[0x10] => %flags
2792 * fallbackcc CC_E, %flags, <SrcKey>
2795 * and turns it into something like this:
2798 * cmpbim 0, %rbp[0x10] => %flags
2799 * jcc CC_E, %flags -> B3, else B2
2803 * lea %rsp[0x20] => %rsp
2806 void processSpillExits(Vunit
& unit
, Vlabel label
, SpillState state
,
2807 Vinstr free
, PhysReg sp
) {
2808 auto code
= &unit
.blocks
[label
].code
;
2809 auto needFixup
= false;
2811 for (unsigned i
= 0, n
= code
->size(); i
< n
; ++i
) {
2812 auto& inst
= (*code
)[i
];
2813 state
= instrInState(unit
, inst
, state
, sp
);
2815 if (state
< NeedSpill
||
2816 (inst
.op
!= Vinstr::fallbackcc
&&
2817 inst
.op
!= Vinstr::bindjcc
&&
2818 inst
.op
!= Vinstr::jcci
)) {
2822 FTRACE(3, "Breaking out {}: {}\n", label
, show(unit
, inst
));
2823 auto target
= unit
.makeBlock(AreaIndex::Cold
, 0);
2824 // makeBlock might reallocate unit.blocks
2825 code
= &unit
.blocks
[label
].code
;
2827 auto& targetCode
= unit
.blocks
[target
].code
;
2828 free
.set_irctx(inst
.irctx());
2831 if (inst
.op
== Vinstr::fallbackcc
) {
2832 auto const& fb_i
= inst
.fallbackcc_
;
2833 targetCode
.emplace_back(free
);
2834 targetCode
.emplace_back(fallback
{fb_i
.target
, fb_i
.spOff
,
2835 fb_i
.trflags
, fb_i
.args
});
2838 } else if (inst
.op
== Vinstr::bindjcc
) {
2839 auto const& bj_i
= inst
.bindjcc_
;
2840 targetCode
.emplace_back(free
);
2841 targetCode
.emplace_back(bindjmp
{bj_i
.target
, bj_i
.spOff
, bj_i
.trflags
,
2845 } else /* inst.op == Vinstr::jcci */ {
2846 auto const& jcc_i
= inst
.jcci_
;
2847 targetCode
.emplace_back(free
);
2848 targetCode
.emplace_back(jmpi
{jcc_i
.taken
});
2852 targetCode
.back().set_irctx(inst
.irctx());
2854 // Next is set to an invalid block that will be fixed up once we're done
2855 // iterating through the original block.
2856 inst
= jcc
{cc
, sf
, {Vlabel
{}, target
}};
2860 if (needFixup
) fixupBlockJumps(unit
, label
);
2864 * Merge src into dst, returning true iff dst was changed.
2866 bool mergeSpillStates(SpillState
& dst
, SpillState src
) {
2867 assertx(src
!= Uninit
);
2868 if (dst
== src
) return false;
2870 // The only allowed state transitions are to states with higher values, so we
2871 // merge with std::max().
2872 auto const oldDst
= dst
;
2873 dst
= std::max(dst
, src
);
2874 return dst
!= oldDst
;
2877 std::default_random_engine
s_stressRand(0xfaceb00c);
2878 std::uniform_int_distribution
<int> s_stressDist(1,7);
2881 * If the current unit used any spill slots, allocate and free spill space
2882 * where appropriate. Spill space is allocated right before it's needed and
2883 * freed before any instruction that exits the unit, which is any block-ending
2884 * instruction with no successors in the unit. fallbackcc{} and bindjcc{}
2885 * instructions have hidden edges that exit the unit, and if we encounter one
2886 * while spill space is live, we have to make that exit edge explicit to insert
2887 * code on it (see processSpillExits()). This makes the exit path more
2888 * expensive, so we try to allocate spill space as late as possible to avoid
2889 * pessimising fallbackcc/bindjcc instructions unless it's really
2890 * necessary. The algorithm uses two passes:
2893 * - For each block in RPO:
2894 * - Load in-state, which has been populated by at least one predecessor
2895 * (or manually set to NoSpill for the entry block).
2896 * - Analyze each instruction in the block, determining what state the
2897 * spill space must be in before executing it.
2898 * - Record out-state for the block and propagate to successors. If this
2899 * changes the in-state for any of them, enqueue them for (re)processing.
2902 * - For each block (we use RPO to only visit reachable blocks but order
2904 * - Inspect the block's in-state and out-state:
2905 * - NoSpill in, == NeedSpill out: Walk the block to see if we need to
2906 * allocate spill space before any instructions.
2907 * - NoSpill out: Allocate spill space on any edges to successors with
2908 * NeedSpill in-states.
2909 * - NeedSpill out: If the block has no in-unit successors, free spill
2910 * space before the block-end instruction.
2911 * - != NoSpill out: Look for any fallbackcc/bindjcc instructions,
2912 * deoptimizing as appropriate (see processSpillExits()).
2914 void allocateSpillSpace(Vunit
& unit
, const VxlsContext
& ctx
,
2916 if (RuntimeOption::EvalHHIRStressSpill
&& ctx
.abi
.canSpill
) {
2917 auto extra
= s_stressDist(s_stressRand
);
2918 FTRACE(1, "StressSpill on; adding {} extra slots\n", extra
);
2919 spi
.used_spill_slots
+= extra
;
2921 if (spi
.used_spill_slots
== 0) return;
2922 Timer
t(Timer::vasm_xls_spill
, unit
.log_entry
);
2923 always_assert(ctx
.abi
.canSpill
);
2925 // Make sure we always allocate spill space in multiples of 16 bytes, to keep
2926 // alignment straightforward.
2927 if (spi
.used_spill_slots
% 2) spi
.used_spill_slots
++;
2928 FTRACE(1, "Allocating {} spill slots\n", spi
.used_spill_slots
);
2930 auto const spillSize
= safe_cast
<int32_t>(slotOffset(spi
.used_spill_slots
));
2931 // Pointer manipulation is traditionally done with lea, and it's safe to
2932 // insert even where flags might be live.
2933 Vinstr alloc
= lea
{ctx
.sp
[-spillSize
], ctx
.sp
};
2934 Vinstr free
= lea
{ctx
.sp
[spillSize
], ctx
.sp
};
2936 jit::vector
<uint32_t> rpoIds(unit
.blocks
.size());
2937 for (uint32_t i
= 0; i
< ctx
.blocks
.size(); ++i
) rpoIds
[ctx
.blocks
[i
]] = i
;
2939 jit::vector
<SpillStates
> states(unit
.blocks
.size(), {Uninit
, Uninit
});
2940 states
[unit
.entry
].in
= NoSpill
;
2941 dataflow_worklist
<uint32_t> worklist(unit
.blocks
.size());
2944 // Walk the blocks in rpo. At the end of each block, propagate its out-state
2945 // to successors, adding them to the worklist if their in-state
2946 // changes. Blocks may be visited multiple times if loops are present.
2947 while (!worklist
.empty()) {
2948 auto const label
= ctx
.blocks
[worklist
.pop()];
2949 auto const& block
= unit
.blocks
[label
];
2950 auto state
= states
[label
].in
;
2952 if (state
< NeedSpill
) {
2953 for (auto& inst
: block
.code
) {
2954 state
= instrInState(unit
, inst
, state
, ctx
.sp
);
2955 if (state
== NeedSpill
) break;
2958 states
[label
].out
= state
;
2960 for (auto s
: succs(block
)) {
2961 if (mergeSpillStates(states
[s
].in
, state
)) {
2962 worklist
.push(rpoIds
[s
]);
2967 // Do a single mutation pass over the blocks.
2968 for (auto const label
: ctx
.blocks
) {
2969 auto state
= states
[label
];
2970 auto& block
= unit
.blocks
[label
];
2972 // Any block with a NoSpill in-state and == NeedSpill out-state might have
2973 // an instruction in it that needs spill space, which we allocate right
2974 // before the instruction in question.
2975 if (state
.in
== NoSpill
&& state
.out
== NeedSpill
) {
2976 auto state
= NoSpill
;
2977 for (auto it
= block
.code
.begin(); it
!= block
.code
.end(); ++it
) {
2978 state
= instrInState(unit
, *it
, state
, ctx
.sp
);
2979 if (state
== NeedSpill
) {
2980 FTRACE(3, "alloc spill before {}: {}\n", label
, show(unit
, *it
));
2981 alloc
.set_irctx(it
->irctx());
2982 block
.code
.insert(it
, alloc
);
2988 // Allocate spill space on edges from a NoSpill out-state to a NeedSpill
2990 auto const successors
= succs(block
);
2991 if (state
.out
== NoSpill
) {
2992 for (auto s
: successors
) {
2993 if (states
[s
].in
== NeedSpill
) {
2994 FTRACE(3, "alloc spill on edge from {} -> {}\n", label
, s
);
2995 auto it
= std::prev(block
.code
.end());
2996 alloc
.set_irctx(it
->irctx());
2997 block
.code
.insert(it
, alloc
);
3002 // Any block with a NeedSpill out-state and no successors must free spill
3003 // space right before the block-end instruction. We ignore trap so spill
3004 // space is still allocated in core files.
3005 if (state
.out
== NeedSpill
&& successors
.empty() &&
3006 block
.code
.back().op
!= Vinstr::trap
) {
3007 auto it
= std::prev(block
.code
.end());
3008 FTRACE(3, "free spill before {}: {}\n", label
, show(unit
, (*it
)));
3009 free
.set_irctx(it
->irctx());
3010 block
.code
.insert(it
, free
);
3013 // Any block that ends with anything other than NoSpill needs to be walked
3014 // to look for places to free spill space.
3015 if (state
.out
!= NoSpill
) {
3016 processSpillExits(unit
, label
, state
.in
, free
, ctx
.sp
);
3021 ///////////////////////////////////////////////////////////////////////////////
3024 std::string
Interval::toString() const {
3025 std::ostringstream out
;
3027 if (reg
!= InvalidReg
) {
3032 out
<< delim
<< folly::format("#{:08x}", var
->val
.val
);
3034 if (var
->slot
>= 0) {
3035 out
<< delim
<< folly::format("[%sp+{}]", slotOffset(var
->slot
));
3039 for (auto const& r
: ranges
) {
3040 out
<< delim
<< folly::format("{}-{}", r
.start
, r
.end
);
3045 for (auto const& u
: uses
) {
3046 if (u
.pos
== var
->def_pos
) {
3047 if (u
.hint
.isValid()) {
3048 out
<< delim
<< "@" << u
.pos
<< "=" << show(u
.hint
);
3050 out
<< delim
<< "@" << u
.pos
<< "=";
3053 auto hint_delim
= u
.kind
== Constraint::CopySrc
? "=?" : "=@";
3054 if (u
.hint
.isValid()) {
3055 out
<< delim
<< show(u
.hint
) << hint_delim
<< u
.pos
;
3057 out
<< delim
<< hint_delim
<< u
.pos
;
3066 DEBUG_ONLY
void dumpVariables(const jit::vector
<Variable
*>& variables
,
3067 unsigned num_spills
) {
3068 Trace::traceRelease("Spills %u\n", num_spills
);
3069 for (auto var
: variables
) {
3070 if (!var
|| var
->fixed()) continue;
3071 Trace::traceRelease("%%%-2lu %s\n", size_t(var
->vreg
),
3072 var
->ivl()->toString().c_str());
3073 for (auto ivl
= var
->ivl()->next
; ivl
; ivl
= ivl
->next
) {
3074 Trace::traceRelease(" %s\n", ivl
->toString().c_str());
3079 auto const ignore_reserved
= !getenv("XLS_SHOW_RESERVED");
3080 auto const collapse_fixed
= !getenv("XLS_SHOW_FIXED");
3082 enum Mode
{ Light
, Heavy
};
3084 template<class Pred
>
3085 const char* draw(Variable
* var
, unsigned pos
, Mode m
, Pred covers
) {
3087 static const char* top
[] = { u8
"\u2575", u8
"\u2579" };
3088 static const char* bottom
[] = { u8
"\u2577", u8
"\u257B" };
3089 static const char* both
[] = { u8
"\u2502", u8
"\u2503" };
3090 static const char* empty
[] = { " ", " " };
3091 auto f
= [&](unsigned position
) {
3092 if (!var
) return false;
3093 for (auto ivl
= var
->ivl(); ivl
; ivl
= ivl
->next
) {
3094 if (covers(ivl
, position
)) return true;
3100 auto d
= pos
% 2 == 1 ? s
: f(pos
+ 1);
3101 return ( s
&& !d
) ? top
[m
] :
3102 ( s
&& d
) ? both
[m
] :
3103 (!s
&& d
) ? bottom
[m
] :
3107 DEBUG_ONLY
void printInstr(std::ostringstream
& str
,
3108 const Vunit
& unit
, const VxlsContext
& ctx
,
3109 const jit::vector
<Variable
*>& variables
,
3110 const Vinstr
& inst
, Vlabel b
) {
3111 bool fixed_covers
[2] = { false, false };
3112 Variable
* fixed
= nullptr;
3113 for (auto var
: variables
) {
3116 if (ignore_reserved
&& !ctx
.abi
.unreserved().contains(var
->vreg
)) {
3119 if (collapse_fixed
) {
3120 fixed
= var
; // can be any.
3121 fixed_covers
[0] |= var
->ivl()->covers(inst
.pos
);
3122 fixed_covers
[1] |= var
->ivl()->covers(inst
.pos
+ 1);
3127 str
<< draw(var
, inst
.pos
, Light
, [&](Interval
* child
, unsigned p
) {
3128 return child
->covers(p
);
3130 str
<< draw(var
, inst
.pos
, Heavy
, [&](Interval
* child
, unsigned p
) {
3131 return child
->usedAt(p
);
3134 str
<< " " << draw(fixed
, inst
.pos
, Heavy
, [&](Interval
*, unsigned p
) {
3135 assertx(p
- inst
.pos
< 2);
3136 return fixed_covers
[p
- inst
.pos
];
3138 if (inst
.pos
== ctx
.block_ranges
[b
].start
) {
3139 str
<< folly::format(" B{: <3}", size_t(b
));
3143 str
<< folly::format(" {: <3} ", inst
.pos
) << show(unit
, inst
) << "\n";
3146 DEBUG_ONLY
void printVariables(const char* caption
,
3147 const Vunit
& unit
, const VxlsContext
& ctx
,
3148 const jit::vector
<Variable
*>& variables
) {
3149 std::ostringstream str
;
3150 str
<< "Intervals " << caption
<< " " << ctx
.counter
<< "\n";
3151 for (auto var
: variables
) {
3154 if (ignore_reserved
&& !ctx
.abi
.unreserved().contains(var
->vreg
)) {
3157 if (collapse_fixed
) {
3161 str
<< folly::format(" {: <2}", size_t(var
->vreg
));
3164 for (auto b
: ctx
.blocks
) {
3165 for (auto& inst
: unit
.blocks
[b
].code
) {
3166 printInstr(str
, unit
, ctx
, variables
, inst
, b
);
3169 HPHP::Trace::traceRelease("%s\n", str
.str().c_str());
3172 ///////////////////////////////////////////////////////////////////////////////
3175 size_t instrs
{0}; // total instruction count after xls
3176 size_t moves
{0}; // number of movs inserted
3177 size_t loads
{0}; // number of loads inserted
3178 size_t spills
{0}; // number of spill-stores inserted
3179 size_t consts
{0}; // number of const-loads inserted
3180 size_t total_copies
{0}; // moves+loads+spills
3183 void dumpStats(const Vunit
& unit
, const ResolutionPlan
& resolution
) {
3186 for (auto const& blk
: unit
.blocks
) {
3187 stats
.instrs
+= blk
.code
.size();
3189 for (auto const& kv
: resolution
.spills
) {
3190 auto const& spills
= kv
.second
;
3191 for (auto const r
: spills
) {
3192 if (spills
[r
]) ++stats
.spills
;
3196 auto const count_copies
= [&] (const CopyPlan
& plan
) {
3197 for_each_copy(plan
, [&] (PhysReg
, const Interval
*) {
3200 for_each_load(plan
, [&] (PhysReg
, const Interval
* ivl
) {
3201 ++(ivl
->spilled() ? stats
.loads
: stats
.consts
);
3204 for (auto const& kv
: resolution
.copies
) count_copies(kv
.second
);
3205 for (auto const& kv
: resolution
.edge_copies
) count_copies(kv
.second
);
3207 stats
.total_copies
= stats
.moves
+ stats
.loads
+ stats
.spills
;
3210 Trace::xls_stats
, 1,
3217 " total copies: {}\n",
3218 stats
.instrs
, stats
.moves
, stats
.loads
, stats
.spills
,
3219 stats
.consts
, stats
.total_copies
3222 if (auto entry
= unit
.log_entry
) {
3223 entry
->setInt("xls_instrs", stats
.instrs
);
3224 entry
->setInt("xls_moves", stats
.moves
);
3225 entry
->setInt("xls_loads", stats
.loads
);
3226 entry
->setInt("xls_spills", stats
.spills
);
3227 entry
->setInt("xls_consts", stats
.consts
);
3231 ///////////////////////////////////////////////////////////////////////////////
3234 void allocateRegisters(Vunit
& unit
, const Abi
& abi
) {
3235 Timer
timer(Timer::vasm_xls
, unit
.log_entry
);
3236 auto const counter
= s_counter
.fetch_add(1, std::memory_order_relaxed
);
3238 splitCriticalEdges(unit
);
3239 assertx(check(unit
));
3242 VxlsContext ctx
{abi
};
3243 ctx
.blocks
= sortBlocks(unit
);
3244 ctx
.block_ranges
= computePositions(unit
, ctx
.blocks
);
3245 ctx
.spill_offsets
= analyzeSP(unit
, ctx
.blocks
, ctx
.sp
);
3246 ctx
.livein
= computeLiveness(unit
, ctx
.abi
, ctx
.blocks
);
3247 ctx
.counter
= counter
;
3249 // Build lifetime intervals and analyze hints.
3250 auto variables
= buildIntervals(unit
, ctx
);
3251 auto const hint_info
= analyzeHints(unit
, ctx
, variables
);
3253 // Perform register allocation.
3254 auto spill_info
= assignRegisters(ctx
, variables
, hint_info
);
3256 ONTRACE(kRegAllocLevel
, dumpVariables(variables
, spill_info
.num_spills
));
3258 // Insert lifetime-resolving copies, spills, and rematerializations, and
3259 // replace the Vreg operands in the Vinstr stream with the assigned PhysRegs.
3260 auto const resolution
= resolveLifetimes(unit
, ctx
, variables
);
3261 renameOperands(unit
, ctx
, variables
);
3262 insertCopies(unit
, ctx
, variables
, resolution
);
3264 ONTRACE(kRegAllocLevel
,
3265 dumpVariables(variables
, spill_info
.num_spills
);
3266 printVariables("after inserting copies", unit
, ctx
, variables
);
3269 // Perform optimizations based on flags liveness, then do some cleanup.
3270 optimizeSFLiveness(unit
, ctx
, variables
);
3271 peephole(unit
, ctx
);
3273 // Insert instructions for creating spill space.
3274 allocateSpillSpace(unit
, ctx
, spill_info
);
3275 if (auto entry
= unit
.log_entry
) {
3276 entry
->setInt("num_spills", spill_info
.num_spills
);
3277 entry
->setInt("used_spill_slots", spill_info
.used_spill_slots
);
3280 printUnit(kVasmRegAllocLevel
, "after vasm-xls", unit
);
3281 if (HPHP::Trace::moduleEnabled(Trace::xls_stats
, 1) || unit
.log_entry
) {
3282 dumpStats(unit
, resolution
);
3285 // Free the variable metadata.
3286 for (auto var
: variables
) {
3288 for (Interval
* ivl
= var
->ivl()->next
, *next
= nullptr; ivl
; ivl
= next
) {
3292 Variable::destroy(var
);
3296 ///////////////////////////////////////////////////////////////////////////////