Add unrecorebasenativesp vinstr
[hiphop-php.git] / hphp / runtime / vm / jit / vasm-xls.cpp
blob7f31f26a5278f20101c0b31ba2d5c611313af01b
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/vasm.h"
19 #include "hphp/runtime/base/stats.h"
21 #include "hphp/runtime/vm/jit/abi.h"
22 #include "hphp/runtime/vm/jit/print.h"
23 #include "hphp/runtime/vm/jit/punt.h"
24 #include "hphp/runtime/vm/jit/reg-algorithms.h"
25 #include "hphp/runtime/vm/jit/timer.h"
26 #include "hphp/runtime/vm/jit/vasm-instr.h"
27 #include "hphp/runtime/vm/jit/vasm-print.h"
28 #include "hphp/runtime/vm/jit/vasm-reg.h"
29 #include "hphp/runtime/vm/jit/vasm-unit.h"
30 #include "hphp/runtime/vm/jit/vasm-util.h"
31 #include "hphp/runtime/vm/jit/vasm-visit.h"
33 #include "hphp/util/arch.h"
34 #include "hphp/util/assertions.h"
35 #include "hphp/util/dataflow-worklist.h"
36 #include "hphp/util/trace.h"
38 #include <boost/dynamic_bitset.hpp>
39 #include <boost/range/adaptor/reversed.hpp>
40 #include <folly/Format.h>
41 #include <folly/Optional.h>
43 #include <algorithm>
44 #include <random>
46 // future work
47 // - #3098509 streamline code, vectors vs linked lists, etc
48 // - #3098685 Optimize lifetime splitting
49 // - #3098739 new features now possible with XLS
51 TRACE_SET_MOD(xls);
53 namespace HPHP { namespace jit {
54 ///////////////////////////////////////////////////////////////////////////////
56 namespace {
57 ///////////////////////////////////////////////////////////////////////////////
59 std::atomic<size_t> s_counter;
61 DEBUG_ONLY constexpr auto kHintLevel = 5;
64 * Vreg discriminator.
66 enum class Constraint { Any, CopySrc, Gpr, Simd, Sf };
68 Constraint constraint(const Vreg&) { return Constraint::Any; }
69 Constraint constraint(const Vreg64&) { return Constraint::Gpr; }
70 Constraint constraint(const Vreg32&) { return Constraint::Gpr; }
71 Constraint constraint(const Vreg16&) { return Constraint::Gpr; }
72 Constraint constraint(const Vreg8&) { return Constraint::Gpr; }
73 Constraint constraint(const VregDbl&) { return Constraint::Simd; }
74 Constraint constraint(const Vreg128&) { return Constraint::Simd; }
75 Constraint constraint(const VregSF&) { return Constraint::Sf; }
77 bool is_wide(const Vreg128&) { return true; }
78 template<class T> bool is_wide(const T&) { return false; }
81 * Sentinel constants.
83 constexpr int kInvalidPhiGroup = -1;
84 constexpr int kInvalidSpillSlot = -1;
85 const unsigned kMaxPos = UINT_MAX; // "infinity" use position
88 * A Use refers to the position where an interval is used or defined.
90 struct Use {
91 Use() {}
92 Use(Constraint kind, unsigned pos, Vreg hint)
93 : kind(kind)
94 , pos(pos)
95 , hint(hint)
99 * Constraint imposed on the Vreg at this use.
101 Constraint kind{Constraint::Any};
103 * Position of this use or def.
105 unsigned pos{kMaxPos};
107 * If valid, try to assign the same physical register here as `hint' was
108 * assigned at `pos'.
110 Vreg hint;
113 * Index of phi group metadata.
115 int phi_group{kInvalidPhiGroup};
119 * A LiveRange is an closed-open range of positions where an interval is live.
121 * Specifically, for the LiveRange [start, end), start is in the range and
122 * end is not.
124 struct LiveRange {
125 bool contains(unsigned pos) const { return pos >= start && pos < end; }
126 bool intersects(LiveRange r) const { return r.start < end && start < r.end; }
127 bool contains(LiveRange r) const { return r.start >= start && r.end <= end; }
128 public:
129 unsigned start, end;
132 struct Variable;
135 * An Interval represents the lifetime of a Vreg as a sorted list of disjoint
136 * ranges and a sorted list of use positions. It is the unit of register
137 * allocation.
139 * Intervals may be split---e.g., because the Vreg needed to be spilled in some
140 * subrange. All (sub-)Intervals for a given Vreg are connected as a singly
141 * linked list, sorted by start position.
143 * Every use position must be inside one of the ranges, or exactly at the end
144 * of the last range. Allowing a use exactly at the end facilitates lifetime
145 * splitting when the use at the position of an instruction clobbers registers
146 * as a side effect, e.g. a call.
148 * The intuition for allowing uses at the end of an Interval is that, in truth,
149 * the picture at a given position looks like this:
151 * | [s]
153 * +-----|-------------+ copy{s, d} <-+
154 * | v | |
155 * + - - - - - - - - - + +--- position n
156 * | | | |
157 * +-------------|-----+ <-+
159 * [d] v
161 * We represent an instruction with a single position `n'. All the use(s) and
162 * def(s) of that instruction are live at some point within it, but their
163 * lifetimes nonetheless do not overlap. Since we don't represent instructions
164 * using two position numbers, instead, we allow uses on the open end side of
165 * Intervals, because they don't actually conflict with, e.g., a def of another
166 * Interval that starts at the same position.
168 struct Interval {
169 explicit Interval(Variable* var) : var(var) {}
171 std::string toString() const;
174 * Endpoints. Intervals are [closed, open), just like LiveRanges.
176 unsigned start() const { return ranges.front().start; }
177 unsigned end() const { return ranges.back().end; }
180 * Head of this Interval chain.
182 const Interval* leader() const;
183 Interval* leader();
186 * Register allocation state.
188 bool live() const;
189 bool constant() const;
190 bool spilled() const;
191 bool fixed() const;
194 * Split this interval at `pos', returning the new `this->next'.
196 * If `keep_uses' is set, uses exactly at the end of the first interval will
197 * stay with the first split (rather than the second).
199 * @requires: pos > start() && pos < end(); this ensures that both
200 * subintervals are nonempty.
202 Interval* split(unsigned pos, bool keep_uses = false);
204 /////////////////////////////////////////////////////////////////////////////
205 // Queries.
207 // These operate only on `this' and not its children.
210 * Get the index of the first range or use that is not strictly lower than
211 * `pos' (i.e., which contains/is at `pos' or is strictly higher than `pos').
213 unsigned findRange(unsigned pos) const;
214 unsigned findUse(unsigned pos) const;
217 * Whether there is a range that includes `pos', or a use at `pos'.
219 bool covers(unsigned pos) const;
220 bool usedAt(unsigned pos) const;
223 * The position of a use [relative to `pos'] that requires a register (i.e.,
224 * CopySrc uses are ignored).
226 * firstUseAfter: The first use >= `pos', kMaxPos if there are no more uses.
227 * lastUseBefore: The first use <= `pos'; 0 if the first use is after `pos'.
228 * firstUse: The first use in `this'.
230 unsigned firstUseAfter(unsigned pos) const;
231 unsigned lastUseBefore(unsigned pos) const;
232 unsigned firstUse() const;
234 public:
235 // The Variable whose (sub-)lifetime this (sub-)interval represents.
236 Variable* const var;
238 // Pointer to the next subinterval.
239 Interval* next{nullptr};
241 // Live ranges and use positions.
242 jit::vector<LiveRange> ranges;
243 jit::vector<Use> uses;
245 // The physical register assigned to this subinterval.
246 PhysReg reg;
250 * Metadata about a variable which is the object of register allocation.
252 * Each Variable represents a Vreg in the Vunit we're performing register
253 * allocation on---including lifetime ranges, uses, def information, etc.
255 struct Variable {
256 explicit Variable(Vreg r) : vreg(r) {}
258 static Variable* create(Vreg r);
259 static void destroy(Variable* var);
262 * Accessors.
264 Interval* ivl() const { return (Interval*)(this + 1); }
265 bool fixed() const { return vreg.isPhys(); }
268 * Return the subinterval which covers or has a use at `pos', else nullptr if
269 * no subinterval does.
271 Interval* ivlAt(unsigned pos);
272 Interval* ivlAtUse(unsigned pos);
274 public:
275 // The Vreg this value represents.
276 const Vreg vreg;
278 // Whether this is a SIMD value.
279 bool wide{false};
281 // Whether this variable is a constant, and its constant value.
282 bool constant{false};
283 Vconst val;
285 // The spill slot assigned to this value. Since we don't spill physical
286 // registers, and Vregs are SSA, a value's assigned slot never changes.
287 int slot{kInvalidSpillSlot};
289 // Position of, and block containing, the variable's single def instruction;
290 // invalid for physical registers.
291 unsigned def_pos{kMaxPos};
292 Vlabel def_block;
294 // List of recordbasenativesps this vreg is live across.
295 jit::vector<unsigned> recordbasenativesps{};
297 // Copy of the Vreg's def instruction.
298 Vinstr def_inst;
301 template<class Fn> void forEach(const LiveSet& bits, Fn fn) {
302 for (auto i = bits.find_first(); i != bits.npos; i = bits.find_next(i)) {
303 fn(Vreg(i));
308 * Sack of inputs and pre-computed data used by the main XLS algorithm.
310 struct VxlsContext {
311 explicit VxlsContext(const Abi& abi)
312 : abi(abi)
313 , sp(rsp())
315 switch (arch()) {
316 case Arch::X64:
317 tmp = reg::xmm15; // reserve xmm15 to break shuffle cycles
318 break;
319 case Arch::ARM:
320 tmp = vixl::d31;
321 break;
323 this->abi.simdUnreserved -= tmp;
324 this->abi.simdReserved |= tmp;
325 assertx(!abi.gpUnreserved.contains(sp));
326 assertx(!abi.gpUnreserved.contains(tmp));
329 public:
330 Abi abi;
331 // Arch-dependent stack pointer.
332 PhysReg sp;
333 // Temp register used only for breaking cycles.
334 PhysReg tmp;
335 // Debug-only run identifier.
336 size_t counter;
338 // Sorted blocks.
339 jit::vector<Vlabel> blocks;
340 // [start,end) position of each block.
341 jit::vector<LiveRange> block_ranges;
342 // Per-block sp[offset] to spill-slots. folly::none in cases where we
343 // have still not reached the recordbasenativesp instruction.
344 jit::vector<folly::Optional<int>> spill_offsets;
345 // Per-block live-in sets.
346 jit::vector<LiveSet> livein;
349 ///////////////////////////////////////////////////////////////////////////////
350 // Interval.
352 const Interval* Interval::leader() const { return var->ivl(); }
353 Interval* Interval::leader() { return var->ivl(); }
355 bool Interval::live() const { return reg != InvalidReg; }
356 bool Interval::constant() const { return var->constant; }
357 bool Interval::spilled() const { return !live() && var->slot >= 0; }
358 bool Interval::fixed() const { return var->fixed(); }
360 unsigned Interval::findRange(unsigned pos) const {
361 unsigned lo = 0;
362 for (unsigned hi = ranges.size(); lo < hi;) {
363 auto mid = (lo + hi) / 2;
364 auto r = ranges[mid];
365 if (pos < r.start) {
366 hi = mid;
367 } else if (r.end <= pos) {
368 lo = mid + 1;
369 } else {
370 return mid;
373 assertx(lo == ranges.size() || pos < ranges[lo].start);
374 return lo;
377 unsigned Interval::findUse(unsigned pos) const {
378 unsigned lo = 0, hi = uses.size();
379 while (lo < hi) {
380 auto mid = (lo + hi) / 2;
381 auto u = uses[mid].pos;
382 if (pos < u) {
383 hi = mid;
384 } else if (u < pos) {
385 lo = mid + 1;
386 } else {
387 return mid;
390 assertx(lo == uses.size() || pos < uses[lo].pos);
391 return lo;
394 bool Interval::covers(unsigned pos) const {
395 if (pos < start() || pos >= end()) return false;
396 auto i = ranges.begin() + findRange(pos);
397 return i != ranges.end() && i->contains(pos);
400 bool Interval::usedAt(unsigned pos) const {
401 if (pos < start() || pos > end()) return false;
402 auto i = uses.begin() + findUse(pos);
403 return i != uses.end() && pos == i->pos;
406 unsigned Interval::firstUseAfter(unsigned pos) const {
407 for (auto& u : uses) {
408 if (u.kind == Constraint::CopySrc) continue;
409 if (u.pos >= pos) return u.pos;
411 return kMaxPos;
414 unsigned Interval::lastUseBefore(unsigned pos) const {
415 auto prev = 0;
416 for (auto& u : uses) {
417 if (u.kind == Constraint::CopySrc) continue;
418 if (u.pos > pos) return prev;
419 prev = u.pos;
421 return prev;
424 unsigned Interval::firstUse() const {
425 for (auto& u : uses) {
426 if (u.kind != Constraint::CopySrc) return u.pos;
428 return kMaxPos;
431 Interval* Interval::split(unsigned pos, bool keep_uses) {
432 assertx(pos > start() && pos < end()); // both parts will be non-empty
434 auto child = jit::make<Interval>(var);
435 child->next = next;
436 next = child;
438 // advance r1 to the first range we want in child; maybe split a range.
439 auto r1 = ranges.begin() + findRange(pos);
440 if (pos > r1->start) { // split r at pos
441 child->ranges.push_back({pos, r1->end});
442 r1->end = pos;
443 r1++;
445 child->ranges.insert(child->ranges.end(), r1, ranges.end());
446 ranges.erase(r1, ranges.end());
448 // advance u1 to the first use position in child, then copy u1..end to child.
449 auto u1 = uses.begin() + findUse(end());
450 auto u2 = uses.end();
451 if (keep_uses) {
452 while (u1 != u2 && u1->pos <= end()) u1++;
453 } else {
454 while (u1 != u2 && u1->pos < child->start()) u1++;
456 child->uses.insert(child->uses.end(), u1, u2);
457 uses.erase(u1, u2);
459 return child;
462 ///////////////////////////////////////////////////////////////////////////////
463 // Variable.
465 Variable* Variable::create(Vreg r) {
466 auto const mem = malloc(sizeof(Variable) + sizeof(Interval));
467 auto const var = new (mem) Variable(r);
468 new (reinterpret_cast<void*>(var + 1)) Interval(var);
469 return var;
472 void Variable::destroy(Variable* var) {
473 var->~Variable();
474 auto ivl = reinterpret_cast<Interval*>(var + 1);
475 ivl->~Interval();
476 free(var);
479 Interval* Variable::ivlAt(unsigned pos) {
480 for (auto ivl = this->ivl(); ivl; ivl = ivl->next) {
481 if (pos < ivl->start()) return nullptr;
482 if (ivl->covers(pos)) return ivl;
484 return nullptr;
487 Interval* Variable::ivlAtUse(unsigned pos) {
488 for (auto ivl = this->ivl(); ivl; ivl = ivl->next) {
489 if (pos < ivl->start()) return nullptr;
490 if (ivl->usedAt(pos)) return ivl;
492 return nullptr;
495 ///////////////////////////////////////////////////////////////////////////////
498 * Extended Linear Scan is based on Wimmer & Franz "Linear Scan Register
499 * Allocation on SSA Form". As currently written, it also works on non-ssa
500 * input.
502 * 1. Sort blocks such that all predecessors of B come before B, except
503 * loop-edge predecessors. If the input IR is in SSA form, this also
504 * implies the definition of each SSATmp comes before all uses.
506 * 2. Assign an even numbered position to every instruction. Positions
507 * between instructions are used to insert copies and spills. Each block
508 * starts with an extra position number that corresponds to an imaginary
509 * "label" instruction that is not physically in the vasm IR.
511 * 3. Create one interval I for each Vreg R that requires register allocation,
512 * by iterating blocks and instructions in reverse order, computing live
513 * registers as we go. Each interval consists of a sorted list of disjoint,
514 * live ranges covering the positions where R must be in a physical register
515 * or spill slot. Vregs that are constants or have forced registers
516 * (e.g. VmSp) are skipped. If the input is SSA, the start position of each
517 * interval dominates every live range and use position in the interval.
519 * 4. Process intervals in order of start position, maintaining the set of
520 * active (live) and inactive (not live, but with live ranges that start
521 * after the current interval). When choosing a register, prefer the one
522 * available furthest into the future. If necessary, split the current
523 * interval so the first part gets a register, and enqueue the rest.
524 * When no registers are available, choose either the current interval or
525 * another one to spill, trying to free up the longest-available register.
527 * Split positions must be after an interval's start position, and on or before
528 * the chosen split point. We're free try to choose a good position inbetween,
529 * for example block boundaries and cold blocks.
531 * 5. Once intervals have been walked and split, every interval has an assigned
532 * operand (register or spill location) for all positions where its alive.
533 * Visit every instruction and modify its Vreg operands to the physical
534 * register that was assigned.
536 * 6. Splitting creates sub-intervals that are assigned to different registers
537 * or spill locations, so insert resolving copies at the split positions
538 * between intervals that were split in a block, and copies on control-flow
539 * edges connecting different sub-intervals. When more than one copy occurs
540 * in a position, they are parallel-copies (all sources read before any dest
541 * is written).
543 * If any sub-interval was spilled, a single store is generated after each
544 * definition point.
546 * When analyzing instructions that use or define a virtual SF register
547 * (VregSF), eagerly rename it to the singleton PhysReg RegSF{0}, under the
548 * assumption that there can only be one live SF at each position. This
549 * reduces the number of intervals we need to process, facilitates inserting
550 * ldimm{0} (as xor), and is checked by checkSF().
553 ///////////////////////////////////////////////////////////////////////////////
556 * Printing utilities.
558 void printVariables(const char* caption,
559 const Vunit& unit, const VxlsContext& ctx,
560 const jit::vector<Variable*>& variables);
561 void dumpVariables(const jit::vector<Variable*>& variables,
562 unsigned num_spills);
565 * The ID of the block enclosing `pos'.
567 Vlabel blockFor(const VxlsContext& ctx, unsigned pos) {
568 for (unsigned lo = 0, hi = ctx.blocks.size(); lo < hi;) {
569 auto mid = (lo + hi) / 2;
570 auto r = ctx.block_ranges[ctx.blocks[mid]];
571 if (pos < r.start) {
572 hi = mid;
573 } else if (pos >= r.end) {
574 lo = mid + 1;
575 } else {
576 return ctx.blocks[mid];
579 always_assert(false);
580 return Vlabel{0xffffffff};
584 * Insert `src' into `dst' before dst[j], corresponding to XLS logical
585 * position `pos'.
587 * Updates `j' to refer to the same instruction after the code insertions.
589 void insertCodeAt(jit::vector<Vinstr>& dst, unsigned& j,
590 const jit::vector<Vinstr>& src, unsigned pos) {
591 auto const irctx = dst[j].irctx();
592 dst.insert(dst.begin() + j, src.size(), trap{TRAP_REASON});
593 for (auto const& inst : src) {
594 dst[j] = inst;
595 dst[j].set_irctx(irctx);
596 dst[j++].id = pos;
600 ///////////////////////////////////////////////////////////////////////////////
601 // Pre-analysis passes.
604 * Compute the linear position range of each block.
606 * This modifies the Vinstrs in `unit' by setting their `pos' members, in
607 * addition to producing the block-to-range map.
609 jit::vector<LiveRange> computePositions(Vunit& unit,
610 const jit::vector<Vlabel>& blocks) {
611 auto block_ranges = jit::vector<LiveRange>{unit.blocks.size()};
612 unsigned pos = 0;
614 for (auto const b : blocks) {
615 auto& code = unit.blocks[b].code;
617 bool front_uses{false};
618 visitUses(unit, code.front(), [&](Vreg /*r*/) { front_uses = true; });
619 if (front_uses) {
620 auto irctx = code.front().irctx();
621 code.emplace(code.begin(), nop{}, irctx);
623 auto const start = pos;
625 for (auto& inst : unit.blocks[b].code) {
626 inst.id = pos;
627 pos += 2;
629 block_ranges[b] = { start, pos };
631 return block_ranges;
635 * Return the effect this instruction has on the value of `sp'.
637 * Asserts (if `do_assert' is set) if an instruction mutates `sp' in an
638 * untrackable way.
640 int spEffect(const Vunit& unit, const Vinstr& inst, PhysReg sp,
641 bool do_assert = debug) {
642 switch (inst.op) {
643 case Vinstr::push:
644 case Vinstr::pushf:
645 case Vinstr::pushm:
646 return -8;
647 case Vinstr::pushp:
648 case Vinstr::pushpm:
649 return -16;
650 case Vinstr::pop:
651 case Vinstr::popf:
652 case Vinstr::popm:
653 return 8;
654 case Vinstr::popp:
655 case Vinstr::poppm:
656 return 16;
657 case Vinstr::lea: {
658 auto& i = inst.lea_;
659 if (i.d == Vreg64(sp)) {
660 assertx(i.s.base == i.d && !i.s.index.isValid());
661 return i.s.disp;
663 return 0;
665 default:
666 if (do_assert) {
667 visitDefs(unit, inst, [&] (Vreg r) { assertx(r != sp); });
669 return 0;
674 * Compute the offset from `sp' to the spill area at each block start.
676 jit::vector<folly::Optional<int>> analyzeSP(const Vunit& unit,
677 const jit::vector<Vlabel>& blocks,
678 PhysReg sp) {
679 auto visited = boost::dynamic_bitset<>(unit.blocks.size());
680 auto spill_offsets = jit::vector<folly::Optional<int>>(unit.blocks.size());
682 for (auto const b : blocks) {
683 auto offset = visited.test(b) ? spill_offsets[b] : folly::none;
685 for (unsigned j = 0; j < unit.blocks[b].code.size(); j++) {
686 auto const& inst = unit.blocks[b].code[j];
687 if (inst.op == Vinstr::recordbasenativesp) {
688 assert_flog(!offset, "Block B{} Instr {} initiailizes native SP, but "
689 "already initialized.", size_t(b), j);
690 offset = 0;
691 } else if (inst.op == Vinstr::unrecordbasenativesp) {
692 assert_flog(offset, "Block B{} Instr {} uninitiailizes native SP, but "
693 "already uninitialized.", size_t(b), j);
694 assert_flog(*offset == 0, "Block B{} Instr {} uninitiailizes native SP, "
695 "but SPOffset is nonzero.", size_t(b), j);
696 offset = folly::none;
697 } else if (offset) {
698 *offset -= spEffect(unit, inst, sp);
701 for (auto const s : succs(unit.blocks[b])) {
702 if (visited.test(s)) {
703 assert_flog(offset == spill_offsets[s],
704 "sp mismatch on edge B{}->B{}, expected {} got {}",
705 size_t(b), size_t(s),
706 spill_offsets[s] ? std::to_string(*spill_offsets[s])
707 : "none",
708 offset ? std::to_string(*offset)
709 : "none");
710 } else {
711 spill_offsets[s] = offset;
712 visited.set(s);
716 return spill_offsets;
719 ///////////////////////////////////////////////////////////////////////////////
720 // Lifetime intervals.
723 * Add `r' to `ivl'.
725 * This assumes that the ranges of `ivl' are in reverse order, and that `r'
726 * precedes or overlaps with ivl->ranges.first().
728 void addRange(Interval* ivl, LiveRange r) {
729 while (!ivl->ranges.empty() && r.contains(ivl->ranges.back())) {
730 ivl->ranges.pop_back();
732 if (ivl->ranges.empty()) {
733 return ivl->ranges.push_back(r);
735 auto& first = ivl->ranges.back();
736 if (first.contains(r)) return;
737 if (r.end >= first.start) {
738 first.start = r.start;
739 } else {
740 ivl->ranges.push_back(r);
745 * Visits defs of an instruction, updates their liveness, adds live ranges, and
746 * adds Uses with appropriate hints.
748 struct DefVisitor {
749 DefVisitor(const Vunit& unit, jit::vector<Variable*>& variables,
750 LiveSet& live, Vlabel b, const Vinstr& inst, unsigned pos)
751 : m_unit(unit)
752 , m_variables(variables)
753 , m_live(live)
754 , m_block(b)
755 , m_inst(inst)
756 , m_pos(pos)
759 // Skip immediates and uses.
760 template<class F> void imm(const F&) {}
761 template<class R> void use(R) {}
762 template<class S, class H> void useHint(S, H) {}
763 template<class R> void across(R) {}
765 void def(Vtuple defs) {
766 for (auto r : m_unit.tuples[defs]) def(r);
768 void defHint(Vtuple def_tuple, Vtuple hint_tuple) {
769 auto& defs = m_unit.tuples[def_tuple];
770 auto& hints = m_unit.tuples[hint_tuple];
771 for (int i = 0; i < defs.size(); i++) {
772 def(defs[i], Constraint::Any, hints[i]);
775 template<class R> void def(R r) {
776 def(r, constraint(r), Vreg{}, is_wide(r));
778 template<class D, class H> void defHint(D dst, H hint) {
779 def(dst, constraint(dst), hint, is_wide(dst));
781 void def(Vreg r) { def(r, Constraint::Any); }
782 void defHint(Vreg d, Vreg hint) { def(d, Constraint::Any, hint); }
783 void def(RegSet rs) { rs.forEach([&](Vreg r) { def(r); }); }
784 void def(VregSF r) {
785 r = RegSF{0}; // eagerly rename all SFs
786 def(r, constraint(r));
789 private:
790 void def(Vreg r, Constraint kind, Vreg hint = Vreg{}, bool wide = false) {
791 auto var = m_variables[r];
792 if (m_live.test(r)) {
793 m_live.reset(r);
794 var->ivl()->ranges.back().start = m_pos;
795 } else {
796 if (!var) {
797 var = m_variables[r] = Variable::create(r);
799 addRange(var->ivl(), {m_pos, m_pos + 1});
801 if (!var->fixed()) {
802 var->ivl()->uses.push_back(Use{kind, m_pos, hint});
803 var->wide |= wide;
804 var->def_pos = m_pos;
805 var->def_block = m_block;
806 var->def_inst = m_inst;
810 private:
811 const Vunit& m_unit;
812 jit::vector<Variable*>& m_variables;
813 LiveSet& m_live;
814 Vlabel m_block;
815 const Vinstr& m_inst;
816 unsigned m_pos;
819 struct UseVisitor {
820 UseVisitor(const Vunit& unit, jit::vector<Variable*>& variables,
821 LiveSet& live, const Vinstr& inst, LiveRange range)
822 : m_unit(unit)
823 , m_variables(variables)
824 , m_live(live)
825 , m_inst(inst)
826 , m_range(range)
829 // Skip immediates and defs.
830 template<class F> void imm(const F&) {}
831 template<class R> void def(R) {}
832 template<class D, class H> void defHint(D, H) {}
834 template<class R> void use(R r) { use(r, constraint(r), m_range.end); }
835 template<class S, class H> void useHint(S src, H hint) {
836 use(src, constraint(src), m_range.end, hint);
838 void use(VregSF r) {
839 r = RegSF{0}; // eagerly rename all SFs
840 use(r, constraint(r), m_range.end);
842 void use(RegSet regs) { regs.forEach([&](Vreg r) { use(r); }); }
843 void use(Vtuple uses) { for (auto r : m_unit.tuples[uses]) use(r); }
844 void useHint(Vtuple src_tuple, Vtuple hint_tuple) {
845 auto& uses = m_unit.tuples[src_tuple];
846 auto& hints = m_unit.tuples[hint_tuple];
847 for (int i = 0, n = uses.size(); i < n; i++) {
848 useHint(uses[i], hints[i]);
851 void use(Vptr m) {
852 if (m.base.isValid()) use(m.base);
853 if (m.index.isValid()) use(m.index);
855 template<Width w> void use(Vp<w> m) { use(static_cast<Vptr>(m)); }
857 void use(VcallArgsId /*id*/) {
858 always_assert(false && "vcall unsupported in vxls");
862 * An operand marked as UA means use-across. Mark it live across the
863 * instruction so its lifetime conflicts with the destination, which ensures
864 * it will be assigned a different register than the destination. This isn't
865 * necessary if *both* operands of a binary instruction are the same virtual
866 * register, but is still correct.
868 template<class R> void across(R r) { use(r, constraint(r), m_range.end + 1); }
869 void across(Vtuple uses) { for (auto r : m_unit.tuples[uses]) across(r); }
870 void across(RegSet regs) { regs.forEach([&](Vreg r) { across(r); }); }
871 void across(Vptr m) {
872 if (m.base.isValid()) across(m.base);
873 if (m.index.isValid()) across(m.index);
875 template<Width w> void across(Vp<w> m) { across(static_cast<Vptr>(m)); }
877 private:
878 void use(Vreg r, Constraint kind, unsigned end, Vreg hint = Vreg{}) {
879 m_live.set(r);
881 auto var = m_variables[r];
882 if (!var) var = m_variables[r] = Variable::create(r);
883 addRange(var->ivl(), {m_range.start, end});
885 if (!var->fixed()) {
886 if (m_inst.op == Vinstr::copyargs ||
887 m_inst.op == Vinstr::copy2 ||
888 m_inst.op == Vinstr::copy ||
889 m_inst.op == Vinstr::phijmp) {
890 // all these instructions lower to parallel copyplans, which know
891 // how to load directly from constants or spilled locations
892 kind = Constraint::CopySrc;
894 var->ivl()->uses.push_back({kind, m_range.end, hint});
898 private:
899 const Vunit& m_unit;
900 jit::vector<Variable*>& m_variables;
901 LiveSet& m_live;
902 const Vinstr& m_inst;
903 const LiveRange m_range;
907 * Compute lifetime intervals and use positions of all Vregs by walking the
908 * code bottom-up once.
910 jit::vector<Variable*> buildIntervals(const Vunit& unit,
911 const VxlsContext& ctx) {
912 ONTRACE(kVasmRegAllocDetailLevel, printCfg(unit, ctx.blocks));
914 auto variables = jit::vector<Variable*>{unit.next_vr};
916 for (auto b : boost::adaptors::reverse(ctx.blocks)) {
917 auto& block = unit.blocks[b];
919 // initial live set is the union of successor live sets.
920 LiveSet live(unit.next_vr);
921 for (auto s : succs(block)) {
922 always_assert(!ctx.livein[s].empty());
923 live |= ctx.livein[s];
926 // add a range covering the whole block to every live interval
927 auto& block_range = ctx.block_ranges[b];
928 forEach(live, [&](Vreg r) {
929 if (!variables[r]) variables[r] = Variable::create(r);
930 addRange(variables[r]->ivl(), block_range);
933 // visit instructions bottom-up, adding uses & ranges
934 auto pos = block_range.end;
935 for (auto const& inst : boost::adaptors::reverse(block.code)) {
936 pos -= 2;
937 RegSet implicit_uses, implicit_across, implicit_defs;
938 getEffects(ctx.abi, inst, implicit_uses, implicit_across, implicit_defs);
940 DefVisitor dv(unit, variables, live, b, inst, pos);
941 visitOperands(inst, dv);
942 dv.def(implicit_defs);
944 UseVisitor uv(unit, variables, live, inst, {block_range.start, pos});
945 visitOperands(inst, uv);
946 uv.use(implicit_uses);
947 uv.across(implicit_across);
948 if (inst.op == Vinstr::recordbasenativesp) {
949 forEach(live, [&](Vreg r) {
950 if (!unit.regToConst.count(r)) {
951 // We mark the instruction as a use so no spills span the
952 // instruction unless they have to.
953 uv.use(r);
954 if (!variables[r]) variables[r] = Variable::create(r);
955 variables[r]->recordbasenativesps.push_back(pos);
958 } else if (inst.op == Vinstr::unrecordbasenativesp) {
959 forEach(live, [&](Vreg r) {
960 if (!unit.regToConst.count(r)) {
961 // We mark the instruction as a use so no spills span the
962 // instruction unless they have to.
963 uv.use(r);
969 // sanity check liveness computation
970 always_assert(live == ctx.livein[b]);
973 // finish processing live ranges for constants
974 for (auto& c : unit.constToReg) {
975 if (auto var = variables[c.second]) {
976 var->ivl()->ranges.back().start = 0;
977 var->def_pos = 0;
978 var->constant = true;
979 var->val = c.first;
983 // Ranges and uses were generated in reverse order. Unreverse them now.
984 for (auto var : variables) {
985 if (!var) continue;
986 auto ivl = var->ivl();
987 assertx(!ivl->ranges.empty()); // no empty intervals
988 std::reverse(ivl->uses.begin(), ivl->uses.end());
989 std::reverse(ivl->ranges.begin(), ivl->ranges.end());
991 ONTRACE(kVasmRegAllocDetailLevel,
992 printVariables("after building intervals", unit, ctx, variables);
995 if (debug) {
996 // only constants and physical registers can be live-into the entry block.
997 forEach(ctx.livein[unit.entry], [&](Vreg r) {
998 UNUSED auto var = variables[r];
999 assertx(var->constant || var->fixed());
1001 for (auto var : variables) {
1002 if (!var) continue;
1003 auto const ivl = var->ivl();
1004 for (unsigned i = 1; i < ivl->uses.size(); i++) {
1005 assertx(ivl->uses[i].pos >= ivl->uses[i-1].pos); // monotonic
1007 for (unsigned i = 1; i < ivl->ranges.size(); i++) {
1008 assertx(ivl->ranges[i].end > ivl->ranges[i].start); // no empty ranges
1009 assertx(ivl->ranges[i].start > ivl->ranges[i-1].end); // no empty gaps
1013 return variables;
1016 ///////////////////////////////////////////////////////////////////////////////
1019 * A map from PhysReg number to position.
1021 using PosVec = PhysReg::Map<unsigned>;
1024 * Between `r1' and `r2', choose the one whose position in `pos_vec' is closest
1025 * to but still after (or at) `pos'.
1027 * If neither has a position after `pos', choose the higher one. If both have
1028 * the same position, choose `r1'.
1030 PhysReg choose_closest_after(unsigned pos, const PosVec& pos_vec,
1031 PhysReg r1, PhysReg r2) {
1032 if (r1 == InvalidReg) return r2;
1033 if (r2 == InvalidReg) return r1;
1035 if (pos_vec[r1] >= pos) {
1036 if (pos <= pos_vec[r2] && pos_vec[r2] < pos_vec[r1]) {
1037 return r2;
1039 } else {
1040 if (pos_vec[r2] > pos_vec[r1]) {
1041 return r2;
1044 return r1;
1048 * Like choose_closest_after(), but iterates through `pos_vec'.
1050 PhysReg find_closest_after(unsigned pos, const PosVec& pos_vec) {
1051 auto ret = InvalidReg;
1052 for (auto const r : pos_vec) {
1053 ret = choose_closest_after(pos, pos_vec, ret, r);
1055 return ret;
1058 ///////////////////////////////////////////////////////////////////////////////
1059 // Hints.
1062 * A member of a "phi group"---i.e., the set of all variables which flow into
1063 * one another via a set of corresponding phijmp's and phidef's.
1065 struct PhiVar {
1066 Vreg r; // the Vreg that is used or def'd in the phi
1067 unsigned pos; // position of the phijmp or phidef
1071 * Hint metadata that doesn't fit into the `variables' vector.
1073 struct HintInfo {
1074 jit::vector<jit::vector<PhiVar>> phi_groups;
1078 * Add the phi use of `r' at `pos' to the phi group given by `pgid'.
1080 void addPhiGroupMember(const jit::vector<Variable*>& variables,
1081 jit::vector<jit::vector<PhiVar>>& phi_groups,
1082 Vreg r, unsigned pos, int pgid,
1083 Vreg hint = Vreg{}) {
1084 assertx(phi_groups.size() > pgid);
1085 assertx(!phi_groups[pgid].empty());
1087 auto const var = variables[r];
1088 assertx(var != nullptr);
1090 auto const ivl = var->ivl();
1091 auto& u = pos == var->def_pos
1092 ? ivl->uses.front()
1093 : ivl->uses[ivl->findUse(pos)];
1094 assertx(u.pos == pos);
1096 if (hint.isValid()) u.hint = hint;
1098 u.phi_group = pgid;
1099 phi_groups[pgid].push_back({r, u.pos});
1103 * Collect hint metadata for phis.
1105 * The return value of this pass represents an equivalence relation over phi
1106 * uses and defs. The relation is defined as follows:
1108 * Consider a phidef F with arity n (i.e., it defines n variables). F gives
1109 * rise to n equivalence classes, each of which consists in the i-th use or def
1110 * variable from each phi instruction in the transitive closure of the predicate
1112 * P(f) := { every phijmp which targets f, and every other phidef
1113 * targeted by those phijmp's }
1115 * applied to F.
1117 * Or, taking a pictoral example:
1118 * _________________________________________________________
1119 * | |
1120 * | +---------+ |
1121 * | | | |
1122 * | v | |
1123 * | phijmp(x1, y1) phijmp(x2, y2) phijmp(x3, y3) . |
1124 * | | | | . |
1125 * | +-----------------+ +-------------+ . |
1126 * | | | | |
1127 * | v v | |
1128 * | phidef(x4, y4) phidef(x5, y5) -------------------+ |
1129 * | | |
1130 * |______ . ________________________________________________|
1133 * ______ | __________________________
1134 * | v |
1135 * | phijmp(x6, y6)--->phidef(x7, y7) |
1136 * |___________________________________|
1138 * The equivalence classes in this example are {x1, x2, x4}, {y1, y2, y4},
1139 * {x3, x5}, {y3, y5}, {x6, x7}, and {y6, y7}.
1141 * We use these equivalence classes during register allocation to try to assign
1142 * the same register to all the variables in a phi group (at least at those
1143 * positions where the phi instructions occur).
1145 * Note that this equivalence relation does /not/ capture the idea that we
1146 * probably want the same register for x4 as we do for x6 and x7. To track that
1147 * information, we set the `hint' on phi uses to the corresponding phidef
1148 * variable, then let the hint back propagation pass handle it. (Note that we
1149 * do /not/ set the `hint' field at phi defs, nor do we try to use it at any
1150 * point.)
1152 jit::vector<jit::vector<PhiVar>>
1153 analyzePhiHints(const Vunit& unit, const VxlsContext& ctx,
1154 const jit::vector<Variable*>& variables) {
1155 auto phi_groups = jit::vector<jit::vector<PhiVar>>{};
1158 * Get the phi group for r's def, optionally allocating a new phi group if
1159 * necessary.
1161 auto const def_phi_group = [&] (Vreg r, bool alloc) {
1162 auto const var = variables[r];
1163 assertx(var != nullptr);
1164 assertx(var->def_inst.op == Vinstr::phidef);
1166 auto& u = var->ivl()->uses.front();
1168 if (u.phi_group == kInvalidPhiGroup && alloc) {
1169 u.phi_group = phi_groups.size();
1170 phi_groups.emplace_back(jit::vector<PhiVar>{{r, u.pos}});
1172 return u.phi_group;
1175 auto const is_fixed = [&] (Vreg r) { return variables[r]->fixed(); };
1177 for (auto const b : ctx.blocks) {
1178 auto const& code = unit.blocks[b].code;
1179 if (code.empty()) continue;
1181 auto const& first = code.front();
1182 auto const& last = code.back();
1184 if (first.op == Vinstr::phidef) {
1185 // A phidef'd variable just need to be assigned a phi group if it doesn't
1186 // already have one (i.e., from handling a phijmp).
1187 auto const& defs = unit.tuples[first.phidef_.defs];
1188 for (auto const r : defs) {
1189 if (is_fixed(r)) continue;
1190 def_phi_group(r, true);
1194 if (last.op == Vinstr::phijmp) {
1195 auto const target = last.phijmp_.target;
1196 auto const& def_inst = unit.blocks[target].code.front();
1197 assertx(def_inst.op == Vinstr::phidef);
1199 auto const& uses = unit.tuples[last.phijmp_.uses];
1200 auto const& defs = unit.tuples[def_inst.phidef_.defs];
1201 assertx(uses.size() == defs.size());
1203 // Set the phi group for each phi use variable to that of the
1204 // corresponding phi def variable (allocating a new group if the def
1205 // variable has not already been assigned one).
1206 for (size_t i = 0, n = uses.size(); i < n; ++i) {
1207 if (is_fixed(defs[i])) continue;
1209 auto const pgid = def_phi_group(defs[i], true);
1210 addPhiGroupMember(variables, phi_groups,
1211 uses[i], last.id, pgid, defs[i]);
1215 return phi_groups;
1218 HintInfo analyzeHints(const Vunit& unit, const VxlsContext& ctx,
1219 const jit::vector<Variable*>& variables) {
1220 HintInfo hint_info;
1221 hint_info.phi_groups = analyzePhiHints(unit, ctx, variables);
1223 return hint_info;
1227 * Try to return the physical register that would coalesce `ivl' with its
1228 * hinted source.
1230 PhysReg tryCoalesce(const jit::vector<Variable*>& variables,
1231 const Interval* ivl) {
1232 assertx(ivl == ivl->leader());
1233 assertx(!ivl->uses.empty());
1235 auto const& def = ivl->uses.front();
1236 assertx(def.pos == ivl->var->def_pos);
1238 if (!def.hint.isValid()) return InvalidReg;
1239 auto const hint_var = variables[def.hint];
1241 for (auto hvl = hint_var->ivl(); hvl; hvl = hvl->next) {
1242 if (def.pos == hvl->end()) return hvl->reg;
1244 return InvalidReg;
1248 * Scan the uses of `ivl' for phi nodes, and return a pair of hints for the
1249 * first one we find.
1251 * For a given phi node F, the first of the two hints we return is derived from
1252 * only those phijmps targeting F (if F is a phidef), or from the phidefs F
1253 * targets (if F is a phijmp). The second hint accounts for F's entire
1254 * equivalence class---see analyzePhiHints() for more details.
1256 folly::Optional<std::pair<PhysReg,PhysReg>>
1257 tryPhiHint(const jit::vector<Variable*>& variables,
1258 const Interval* ivl, const HintInfo& hint_info,
1259 const PosVec& free_until) {
1260 auto const choose = [&] (PhysReg h1, PhysReg h2) {
1261 return choose_closest_after(ivl->end(), free_until, h1, h2);
1264 for (auto const& u : ivl->uses) {
1265 // Look for the first phi hint.
1266 if (u.phi_group == kInvalidPhiGroup) continue;
1268 auto preferred = InvalidReg;
1269 auto group_hint = InvalidReg;
1271 // Choose a register to be the hint.
1272 for (auto const& phiv : hint_info.phi_groups[u.phi_group]) {
1273 auto const var = variables[phiv.r];
1274 assertx(var);
1276 auto const phivl = var->ivlAtUse(phiv.pos);
1277 if (phivl->reg == InvalidReg) continue;
1279 auto const& phu = phivl->uses[phivl->findUse(phiv.pos)];
1280 assertx(phu.pos == phiv.pos);
1282 // Prefer to use a hint from a corresponding phi node.
1283 if (u.hint == phiv.r || phu.hint == ivl->var->vreg) {
1284 preferred = choose(preferred, phivl->reg);
1286 // In the end, though, we're okay with any hint from the group.
1287 group_hint = choose(group_hint, phivl->reg);
1289 return std::make_pair(preferred, group_hint);
1291 return folly::none;
1295 * Choose an appropriate hint for the (sub-)interval `ivl'.
1297 * The register allocator passes us constraints via `free_until' and `allow'.
1298 * We use the `free_until' vector to choose a hint that most tightly covers the
1299 * lifetime of `ivl'; we use `allow' to ensure that our hint satisfies
1300 * constraints.
1302 PhysReg chooseHint(const jit::vector<Variable*>& variables,
1303 const Interval* ivl, const HintInfo& hint_info,
1304 const PosVec& free_until, RegSet allow) {
1305 if (!RuntimeOption::EvalHHIREnablePreColoring &&
1306 !RuntimeOption::EvalHHIREnableCoalescing) return InvalidReg;
1308 auto const choose = [&] (PhysReg h1, PhysReg h2) {
1309 return choose_closest_after(ivl->end(), free_until, h1, h2);
1311 auto hint = InvalidReg;
1313 // If `ivl' contains its def, try a coalescing hint.
1314 if (ivl == ivl->leader()) {
1315 hint = tryCoalesce(variables, ivl);
1318 // Return true if we should return `h'.
1319 auto const check_and_trace = [&] (PhysReg h, const char* prefix) {
1320 if (h == InvalidReg) return false;
1321 if (allow.contains(h)) {
1322 FTRACE(kHintLevel, "{}hinting {} for %{} @ {}\n",
1323 prefix, show(h), size_t(ivl->var->vreg), ivl->start());
1324 return true;
1326 FTRACE(kHintLevel, " found mismatched hint for %{} @ {}\n",
1327 size_t(ivl->var->vreg), ivl->uses.front().pos);
1328 return false;
1331 auto const is_phi_use = !ivl->uses.empty() &&
1332 ivl->uses.front().phi_group != kInvalidPhiGroup;
1334 // If we have either a backwards or a forwards hint, return it---if we have
1335 // both, return whichever is free the longest. For phi uses, however, we
1336 // want to consider the whole phi group's allocations first.
1337 if (!is_phi_use && check_and_trace(hint, "")) return hint;
1339 // Try to determine a hint via the first phi use in the interval.
1340 auto const phi_hints = tryPhiHint(variables, ivl, hint_info, free_until);
1341 bool is_phi_hint = false;
1343 if (phi_hints) {
1344 hint = choose(phi_hints->first, hint);
1345 if (hint != phi_hints->first) {
1346 hint = choose(hint, phi_hints->second);
1348 is_phi_hint = hint == phi_hints->first ||
1349 hint == phi_hints->second;
1351 if (phi_hints || is_phi_use) {
1352 if (check_and_trace(hint, debug && is_phi_hint ? "phi-" : "")) {
1353 return hint;
1357 // Crawl through our uses and look for a physical hint.
1358 for (auto const& u : ivl->uses) {
1359 if (!u.hint.isValid() ||
1360 !u.hint.isPhys() ||
1361 !allow.contains(u.hint)) continue;
1362 hint = choose(hint, u.hint);
1365 if (hint != InvalidReg) {
1366 FTRACE(kHintLevel, "physical hint {} for %{} @ {}\n",
1367 show(hint), size_t(ivl->var->vreg), ivl->start());
1369 return hint;
1372 ///////////////////////////////////////////////////////////////////////////////
1373 // Register allocation.
1376 * Information about spills generated by register allocation.
1378 * Used for the allocateSpillSpace() pass which inserts the instructions that
1379 * create spill space on the stack.
1381 struct SpillInfo {
1382 // Number of intervals spilled.
1383 unsigned num_spills{0};
1384 // Number of spill slots used.
1385 size_t used_spill_slots{0};
1389 * Extended Linear Scan register allocator over vasm virtual registers (Vregs).
1391 * This encapsulates the intermediate data structures used during the
1392 * allocation phase of the algorithm so we don't have to pass them around
1393 * everywhere.
1395 struct Vxls {
1396 Vxls(const VxlsContext& ctx,
1397 const jit::vector<Variable*>& variables,
1398 const HintInfo& hint_info)
1399 : ctx(ctx)
1400 , variables(variables)
1401 , hint_info(hint_info)
1404 SpillInfo go();
1406 private:
1407 void assignSpill(Interval* ivl);
1408 void spill(Interval*);
1409 void assignReg(Interval*, PhysReg);
1411 unsigned nearestSplitBefore(unsigned pos);
1412 unsigned constrain(Interval*, RegSet&);
1414 void update(Interval*);
1415 void allocate(Interval*);
1416 void allocBlocked(Interval*);
1417 void spillOthers(Interval* current, PhysReg r);
1419 private:
1421 * Comparison function for pending priority queue.
1423 * std::priority_queue requires a less operation, but sorts the heap
1424 * highest-first; we need the opposite (lowest-first), so use greater-than.
1426 struct Compare {
1427 bool operator()(const Interval* i1, const Interval* i2) {
1428 return i1->start() > i2->start();
1432 private:
1433 const VxlsContext& ctx;
1435 // Variables, null if unused.
1436 const jit::vector<Variable*>& variables;
1437 // Hint metadata.
1438 const HintInfo& hint_info;
1439 // Subintervals sorted by Interval start.
1440 jit::priority_queue<Interval*,Compare> pending;
1441 // Intervals that overlap.
1442 jit::vector<Interval*> active, inactive;
1443 // Last position each spill slot was owned; kMaxPos means currently used.
1444 jit::array<unsigned, kMaxSpillSlots> spill_slots{{0}};
1445 // Stats on spills.
1446 SpillInfo spill_info;
1449 SpillInfo Vxls::go() {
1450 for (auto var : variables) {
1451 if (!var) continue;
1452 if (var->fixed()) {
1453 assignReg(var->ivl(), var->vreg);
1454 } else if (var->constant) {
1455 spill(var->ivl());
1456 } else {
1457 pending.push(var->ivl());
1460 while (!pending.empty()) {
1461 auto current = pending.top();
1462 pending.pop();
1463 update(current);
1464 allocate(current);
1466 return spill_info;
1470 * Assign the next available spill slot to `ivl'.
1472 void Vxls::assignSpill(Interval* ivl) {
1473 assertx(!ivl->fixed() && ivl != ivl->leader());
1475 if (ivl->var->slot != kInvalidSpillSlot) return;
1477 auto& used_spill_slots = spill_info.used_spill_slots;
1479 auto const assign_slot = [&] (size_t slot) {
1480 ivl->var->slot = slot;
1481 ++spill_info.num_spills;
1483 spill_slots[slot] = kMaxPos;
1484 if (!ivl->var->wide) {
1485 used_spill_slots = std::max(used_spill_slots, slot + 1);
1486 } else {
1487 used_spill_slots = std::max(used_spill_slots, slot + 2);
1488 spill_slots[slot + 1] = kMaxPos;
1492 // Assign spill slots. We track the highest position at which a spill slot
1493 // was owned, and only reassign it to a Vreg if its lifetime interval
1494 // (including all splits) is strictly above that high water mark.
1495 if (!ivl->var->wide) {
1496 for (size_t slot = 0, n = spill_slots.size(); slot < n; ++slot) {
1497 if (ivl->leader()->start() >= spill_slots[slot]) {
1498 return assign_slot(slot);
1501 } else {
1502 for (size_t slot = 0, n = spill_slots.size() - 1; slot < n; slot += 2) {
1503 if (ivl->leader()->start() >= spill_slots[slot] &&
1504 ivl->leader()->start() >= spill_slots[slot + 1]) {
1505 return assign_slot(slot);
1510 // Ran out of spill slots.
1511 ONTRACE(kVasmRegAllocDetailLevel,
1512 dumpVariables(variables, spill_info.num_spills));
1513 TRACE(1, "vxls-punt TooManySpills\n");
1514 TRACE_PUNT("LinearScan_TooManySpills");
1518 * Assign `r' to `ivl'.
1520 void Vxls::assignReg(Interval* ivl, PhysReg r) {
1521 if (!ivl->fixed() && ivl->uses.empty()) {
1522 ivl->reg = InvalidReg;
1523 if (!ivl->constant()) assignSpill(ivl);
1524 } else {
1525 ivl->reg = r;
1526 active.push_back(ivl);
1531 * Spill `ivl' from its start until its first register use.
1533 * Spill `ivl' if there is no use; otherwise split the interval just before the
1534 * use, and enqueue the second part.
1536 void Vxls::spill(Interval* ivl) {
1537 unsigned first_use = ivl->firstUse();
1538 if (first_use <= ivl->end()) {
1539 auto split_pos = nearestSplitBefore(first_use);
1540 if (split_pos <= ivl->start()) {
1541 // This only can happen if we need more than the available registers
1542 // at a single position. It can happen in phijmp or callargs.
1543 TRACE(1, "vxls-punt RegSpill\n");
1544 TRACE_PUNT("RegSpill"); // cannot split before first_use
1546 pending.push(ivl->split(split_pos));
1548 ivl->reg = InvalidReg;
1549 if (!ivl->constant()) assignSpill(ivl);
1553 * Update the active and inactive lists for the start of `current'.
1555 void Vxls::update(Interval* current) {
1556 auto const pos = current->start();
1558 auto const free_spill_slot = [this] (Interval* ivl) {
1559 assertx(!ivl->next);
1560 auto slot = ivl->var->slot;
1562 if (slot != kInvalidSpillSlot) {
1563 if (ivl->var->wide) {
1564 assertx(spill_slots[slot + 1]);
1565 spill_slots[slot + 1] = ivl->end();
1567 assertx(spill_slots[slot]);
1568 spill_slots[slot] = ivl->end();
1572 // Check for active/inactive intervals that have expired or which need their
1573 // polarity flipped.
1574 auto const update_list = [&] (jit::vector<Interval*>& target,
1575 jit::vector<Interval*>& other,
1576 bool is_active) {
1577 auto end = target.end();
1578 for (auto i = target.begin(); i != end;) {
1579 auto ivl = *i;
1580 if (pos >= ivl->end()) {
1581 *i = *--end;
1582 if (!ivl->next) free_spill_slot(ivl);
1583 } else if (is_active ? !ivl->covers(pos) : ivl->covers(pos)) {
1584 *i = *--end;
1585 other.push_back(ivl);
1586 } else {
1587 i++;
1590 target.erase(end, target.end());
1592 update_list(active, inactive, true);
1593 update_list(inactive, active, false);
1597 * Return the closest split position on or before `pos'.
1599 * The result might be exactly on an edge, or in-between instruction positions.
1601 unsigned Vxls::nearestSplitBefore(unsigned pos) {
1602 auto b = blockFor(ctx, pos);
1603 auto range = ctx.block_ranges[b];
1604 if (pos == range.start) return pos;
1605 return (pos - 1) | 1;
1609 * Constrain the allowable registers for `ivl' by inspecting uses.
1611 * Returns the latest position for which `allow' (which we populate) is valid.
1612 * We use this return value to fill the `free_until' PosVec in allocate()
1613 * below. That data structure tracks the first position at which a register is
1614 * /unavailable/, so it would appear that constrain()'s return value is
1615 * off-by-one.
1617 * In fact, it is not; we actually /need/ this position offsetting because of
1618 * our leniency towards having uses at an Interval's end() position. If we
1619 * fail to constrain on an end-position use, we must still split and spill.
1620 * (In contrast, if we intersect with another Interval on an end position use,
1621 * it's okay because SSA tells us that the conflict must be the other
1622 * Interval's def position, and a use and a def at the same position don't
1623 * actually conflict; see the fun ASCII diagram that adorns the definition of
1624 * Interval).
1626 unsigned Vxls::constrain(Interval* ivl, RegSet& allow) {
1627 auto const any = ctx.abi.unreserved() - ctx.abi.sf; // Any but not flags.
1628 allow = ctx.abi.unreserved();
1629 for (auto& u : ivl->uses) {
1630 auto need = u.kind == Constraint::Simd ? ctx.abi.simdUnreserved :
1631 u.kind == Constraint::Gpr ? ctx.abi.gpUnreserved :
1632 u.kind == Constraint::Sf ? ctx.abi.sf :
1633 any; // Any or CopySrc
1634 if ((allow & need).empty()) {
1635 // cannot satisfy constraints; must split before u.pos
1636 return u.pos - 1;
1638 allow &= need;
1640 return kMaxPos;
1644 * Return the next intersection point between `current' and `other', or kMaxPos
1645 * if they never intersect.
1647 * We assume that `other', if it represents an SSA variable, is not live at the
1648 * start of `current'.
1650 * Note that if two intervals intersect, the first point of intersection will
1651 * always be the start of one of the intervals, because SSA ensures that a def
1652 * dominates all uses, and hence all live ranges as well.
1654 unsigned nextIntersect(const Interval* current, const Interval* other) {
1655 assertx(!current->fixed());
1657 if (current == current->leader() &&
1658 other == other->leader() &&
1659 !other->fixed()) {
1660 // Since `other' is an inactive Vreg interval, it cannot cover current's
1661 // start, and `current' cannot cover other's start, since `other' started
1662 // earlier. Therefore, SSA guarantees no intersection.
1663 return kMaxPos;
1665 if (current->end() <= other->start() ||
1666 other->end() <= current->start()) {
1667 // One ends before the other starts.
1668 return kMaxPos;
1670 // r1,e1 span all of current
1671 auto r1 = current->ranges.begin();
1672 auto e1 = current->ranges.end();
1673 // r2,e2 span the tail of other that might intersect current
1674 auto r2 = other->ranges.begin() + other->findRange(current->start());
1675 auto e2 = other->ranges.end();
1676 // search for the lowest position covered by current and other
1677 for (;;) {
1678 if (r1->start < r2->start) {
1679 if (r2->start < r1->end) return r2->start;
1680 if (++r1 == e1) return kMaxPos;
1681 } else {
1682 if (r1->start < r2->end) return r1->start;
1683 if (++r2 == e2) return kMaxPos;
1686 return kMaxPos;
1689 void Vxls::allocate(Interval* current) {
1690 // Map from PhysReg until the first position at which it is /not/ available.
1691 PosVec free_until; // 0 by default
1693 RegSet allow;
1694 auto const conflict = constrain(current, allow);
1696 // Mark regs that fit our constraints as free up until the point of conflict,
1697 // unless they're owned by active intervals---then mark them used.
1698 allow.forEach([&](PhysReg r) {
1699 free_until[r] = conflict;
1701 for (auto ivl : active) {
1702 free_until[ivl->reg] = 0;
1705 // Mark each reg assigned to an inactive interval as only free until the
1706 // first position at which `current' intersects that interval.
1707 for (auto ivl : inactive) {
1708 auto r = ivl->reg;
1709 if (free_until[r] == 0) continue;
1710 auto until = nextIntersect(current, ivl);
1711 free_until[r] = std::min(until, free_until[r]);
1714 if (current->ranges.size() > 1) {
1715 auto const b = blockFor(ctx, current->start());
1716 auto const blk_range = ctx.block_ranges[b];
1717 if (blk_range.end > current->ranges[0].end) {
1718 // We're assigning a register to an interval with
1719 // multiple ranges, but the vreg isn't live out
1720 // of the first range. This means there's no
1721 // connection between this range and any subsequent
1722 // one, so we can safely break the interval
1723 // after the first range without making things worse.
1724 // On the other hand, it can make things better, by
1725 // eg not assigning a constant to a register in an
1726 // unlikely exit block, and then holding it in a callee save
1727 // reg across lots of unrelated code until its used
1728 // again in another unlikely exit block.
1729 auto second = current->split(blk_range.end, false);
1730 pending.push(second);
1731 } else if (current->constant() &&
1732 current->uses.size() &&
1733 current->uses[0].pos >= blk_range.end) {
1734 // we probably don't want to load a constant into a register
1735 // at the start of a block where its not used.
1736 return spill(current);
1740 // Choose a register to allocate to `current', either via a hint or by
1741 // various heuristics.
1742 auto const choose_reg = [&] {
1743 auto const hint = chooseHint(variables, current,
1744 hint_info, free_until, allow);
1745 if (hint != InvalidReg &&
1746 free_until[hint] >= current->end()) {
1747 return hint;
1750 auto ret = InvalidReg;
1751 for (auto const r : free_until) {
1752 ret = choose_closest_after(current->end(), free_until, ret, r);
1754 return ret;
1757 auto const r = choose_reg();
1758 auto const pos = free_until[r];
1759 if (pos >= current->end()) {
1760 return assignReg(current, r);
1763 if (pos > current->start()) {
1764 // `r' is free for the first part of current.
1765 auto const prev_use = current->lastUseBefore(pos);
1767 DEBUG_ONLY auto min_split = std::max(prev_use, current->start() + 1);
1768 assertx(min_split <= pos);
1770 auto split_pos = nearestSplitBefore(pos);
1771 if (split_pos > current->start()) {
1772 if (prev_use && prev_use < split_pos) {
1773 // If there are uses in previous blocks, but no uses between the start
1774 // of the block containing `split_pos' and `split_pos' itself, we
1775 // should split earlier; otherwise we'll need to insert moves/loads on
1776 // the edge(s) into this block, which clearly can't be used since we're
1777 // spilling before the first use. Might as well spill on a block
1778 // boundary, as early as possible.
1779 auto prev_range_idx = current->findRange(prev_use);
1780 auto prev_range = &current->ranges[prev_range_idx];
1781 if (prev_range->start <= prev_use && prev_range->end < split_pos) {
1782 prev_range++;
1784 if (prev_range->start > prev_use && prev_range->start < split_pos) {
1785 split_pos = prev_range->start;
1789 // Split `current'. We keep uses at the end of the first split because
1790 // we know that `r' is free up to /and including/ that position.
1791 auto second = current->split(split_pos, true /* keep_uses */);
1792 pending.push(second);
1794 // Try to find a register again. Since `current' is shorter, we might
1795 // have a different hint, or a different heuristically-determined
1796 // best-reg.
1797 auto const r = choose_reg();
1798 assertx(free_until[r] >= current->end());
1799 return assignReg(current, r);
1803 // Must spill `current' or another victim.
1804 allocBlocked(current);
1808 * When all registers are in use, find a good interval (possibly `current') to
1809 * split and spill.
1811 * When an interval is split and the second part is spilled, possibly split the
1812 * second part again before the next use-pos that requires a register, and
1813 * enqueue the third part.
1815 void Vxls::allocBlocked(Interval* current) {
1816 auto const cur_start = current->start();
1818 RegSet allow;
1819 auto const conflict = constrain(current, allow); // repeated from allocate
1821 // Track the positions (a) at which each PhysReg is next used by any lifetime
1822 // interval to which it's assigned (`used'), and (b) at which each PhysReg is
1823 // next assigned to a value whose lifetime intersects `current' (`blocked').
1824 PosVec used, blocked;
1825 allow.forEach([&](PhysReg r) { used[r] = blocked[r] = conflict; });
1827 // compute next use of active registers, so we can pick the furthest one
1828 for (auto ivl : active) {
1829 if (ivl->fixed()) {
1830 blocked[ivl->reg] = used[ivl->reg] = 0;
1831 } else {
1832 auto use_pos = ivl->firstUseAfter(cur_start);
1833 used[ivl->reg] = std::min(use_pos, used[ivl->reg]);
1837 // compute next intersection/use of inactive regs to find what's free longest
1838 for (auto ivl : inactive) {
1839 auto const r = ivl->reg;
1840 if (blocked[r] == 0) continue;
1842 auto intersect_pos = nextIntersect(current, ivl);
1843 if (intersect_pos == kMaxPos) continue;
1845 if (ivl->fixed()) {
1846 blocked[r] = std::min(intersect_pos, blocked[r]);
1847 used[r] = std::min(blocked[r], used[r]);
1848 } else {
1849 auto use_pos = ivl->firstUseAfter(cur_start);
1850 used[r] = std::min(use_pos, used[r]);
1854 // Choose the best victim register(s) to spill---the one with first-use
1855 // after, but closest to, the lifetime of `current'; or the one with farthest
1856 // first-use if no such register exists.
1857 auto r = find_closest_after(current->end(), used);
1859 // If all other registers are used by their owning intervals before the first
1860 // register-use of `current', then we have to spill `current'.
1861 if (used[r] < current->firstUse()) {
1862 return spill(current);
1865 auto const block_pos = blocked[r];
1866 if (block_pos < current->end()) {
1867 // If /every/ usable register is assigned to a lifetime interval which
1868 // intersects with `current', we have to split current before that point.
1869 auto prev_use = current->lastUseBefore(block_pos);
1871 DEBUG_ONLY auto min_split = std::max(prev_use, cur_start + 1);
1872 auto max_split = block_pos;
1873 assertx(cur_start < min_split && min_split <= max_split);
1875 auto split_pos = nearestSplitBefore(max_split);
1876 if (split_pos > current->start()) {
1877 auto second = current->split(split_pos, true /* keep_uses */);
1878 pending.push(second);
1881 spillOthers(current, r);
1882 assignReg(current, r);
1886 * Split and spill other intervals that conflict with `current' for register r,
1887 * at current->start().
1889 * If necessary, split the victims again before their first use position that
1890 * requires a register.
1892 void Vxls::spillOthers(Interval* current, PhysReg r) {
1893 auto const cur_start = current->start();
1895 // Split `ivl' at `cur_start' and spill the second part. If `cur_start' is
1896 // too close to ivl->start(), spill all of `ivl' instead.
1897 auto const spill_after = [&] (Interval* ivl) {
1898 auto const split_pos = nearestSplitBefore(cur_start);
1899 auto const tail = split_pos <= ivl->start() ? ivl : ivl->split(split_pos);
1900 spill(tail);
1903 // Split and spill other active intervals after `cur_start'.
1904 auto end = active.end();
1905 for (auto i = active.begin(); i != end;) {
1906 auto other = *i;
1907 if (other->fixed() || r != other->reg) {
1908 i++; continue;
1910 *i = *--end;
1911 spill_after(other);
1913 active.erase(end, active.end());
1915 // Split and spill any inactive intervals after `cur_start' if they intersect
1916 // with `current'.
1917 end = inactive.end();
1918 for (auto i = inactive.begin(); i != end;) {
1919 auto other = *i;
1920 if (other->fixed() || r != other->reg) {
1921 i++; continue;
1923 auto intersect = nextIntersect(current, other);
1924 if (intersect >= current->end()) {
1925 i++; continue;
1927 *i = *--end;
1928 spill_after(other);
1930 inactive.erase(end, inactive.end());
1933 SpillInfo assignRegisters(const VxlsContext& ctx,
1934 const jit::vector<Variable*>& variables,
1935 const HintInfo& hint_info) {
1936 return Vxls(ctx, variables, hint_info).go();
1939 ///////////////////////////////////////////////////////////////////////////////
1940 // Lifetime continuity resolution.
1943 * A pair of source block number and successor index, used to identify an
1944 * out-edge.
1946 using EdgeKey = std::pair<Vlabel,unsigned>;
1948 struct EdgeHasher {
1949 size_t operator()(EdgeKey k) const {
1950 return size_t(k.first) ^ k.second;
1955 * Copies that are required at a given position or edge.
1957 * The keys into the PhysReg::Map are the dests; the Interval*'s are the
1958 * sources (nullptr if no copy is needed).
1960 using CopyPlan = PhysReg::Map<Interval*>;
1963 * Copy and spill points for resolving split lifetime intervals.
1965 * After register allocation, some lifetime intervals may have been split, and
1966 * their Vregs assigned to different physical registers or spill locations. We
1967 * use this struct to track where we need to add moves to maintain continuity.
1968 * (We also use it to resolve phis.)
1970 struct ResolutionPlan {
1971 // Where to insert spills.
1972 jit::hash_map<unsigned,CopyPlan> spills;
1973 // Where to insert reg-reg moves, or spill and constant loads, between
1974 // instructions (or in place of copy{} and friends).
1975 jit::hash_map<unsigned,CopyPlan> copies;
1977 // Copies and loads on edges (between blocks).
1978 jit::hash_map<EdgeKey,CopyPlan,EdgeHasher> edge_copies;
1981 template<class CopyPlanT>
1982 using void_req = typename std::enable_if<
1983 std::is_same<CopyPlanT, CopyPlan>::value ||
1984 std::is_same<CopyPlanT, const CopyPlan>::value
1985 >::type;
1988 * Iterators for reg-reg copies and for {const,spill}-reg loads.
1990 template<class CopyPlanT, class F>
1991 void_req<CopyPlanT> for_each_copy(CopyPlanT& plan, F f) {
1992 for (auto dst : plan) {
1993 auto& ivl = plan[dst];
1994 if (!ivl || !ivl->live()) continue;
1995 f(dst, ivl);
1998 template<class CopyPlanT, class F>
1999 void_req<CopyPlanT> for_each_load(CopyPlanT& plan, F f) {
2000 for (auto dst : plan) {
2001 auto& ivl = plan[dst];
2002 if (!ivl || ivl->live()) continue;
2003 assertx(ivl->constant() || ivl->spilled());
2004 f(dst, ivl);
2009 * Insert a spill after the def-position in `ivl'.
2011 * There's only one such position, because of SSA.
2013 void insertSpill(const VxlsContext& ctx,
2014 ResolutionPlan& resolution, Interval* ivl) {
2015 auto DEBUG_ONLY checkPos = [&](unsigned pos) {
2016 assertx(pos % 2 == 1);
2017 DEBUG_ONLY auto const b = blockFor(ctx, pos);
2018 DEBUG_ONLY auto const& range = ctx.block_ranges[b];
2019 assertx(pos - 1 >= range.start && pos + 1 < range.end);
2020 return true;
2022 auto const spill = [&] (unsigned pos) {
2023 assertx(checkPos(pos));
2024 resolution.spills[pos][ivl->reg] = ivl; // store ivl->reg => ivl->slot
2026 if (ivl->var->recordbasenativesps.empty()) {
2027 spill(ivl->var->def_pos + 1);
2028 } else {
2029 for (auto const pos : ivl->var->recordbasenativesps) {
2030 always_assert_flog(ivl->covers(pos),
2031 "Spilling required before native sp set");
2032 spill(pos + 1);
2038 * Insert spills and copies that connect subintervals that were split
2039 * between instructions.
2041 void resolveSplits(const VxlsContext& ctx,
2042 const jit::vector<Variable*>& variables,
2043 ResolutionPlan& resolution) {
2044 for (auto var : variables) {
2045 if (!var) continue;
2046 auto i1 = var->ivl();
2047 if (var->slot >= 0) insertSpill(ctx, resolution, i1);
2049 for (auto i2 = i1->next; i2; i1 = i2, i2 = i2->next) {
2050 auto const pos = i2->start();
2051 if (i1->end() != pos) continue; // spans lifetime hole
2052 if (i2->reg == InvalidReg) continue; // no load necessary
2053 if (i2->reg == i1->reg) continue; // no copy necessary
2055 auto const b = blockFor(ctx, pos);
2056 auto const range = ctx.block_ranges[b];
2058 if (pos % 2 == 0) {
2059 // even position requiring a copy must be on edge
2060 assertx(range.start == pos);
2061 } else {
2062 // odd position
2063 assertx(pos > range.start); // implicit label position per block
2064 if (pos + 1 == range.end) continue; // copy belongs on successor edge
2066 assertx(!resolution.copies[pos][i2->reg]);
2067 resolution.copies[pos][i2->reg] = i1;
2074 * Lower copyargs{} and copy{} into moveplans at the same position.
2076 void lowerCopies(Vunit& unit, const VxlsContext& ctx,
2077 const jit::vector<Variable*>& variables,
2078 ResolutionPlan& resolution) {
2079 // Add a lifetime-resolving copy from `s' to `d'---without touching the
2080 // instruction stream.
2081 auto const lower = [&] (unsigned pos, Vreg s, Vreg d) {
2082 auto v1 = variables[s];
2083 auto v2 = variables[d];
2084 assertx(v1 && v2);
2085 assertx(v2->fixed() || v2->def_pos == pos); // ssa
2087 auto i1 = v1->fixed() ? v1->ivl() : v1->ivlAtUse(pos);
2088 auto i2 = v2->ivl();
2090 if (i2->reg != i1->reg) {
2091 assertx(!resolution.copies[pos][i2->reg]);
2092 resolution.copies[pos][i2->reg] = i1;
2096 for (auto b : ctx.blocks) {
2097 auto pos = ctx.block_ranges[b].start;
2099 for (auto& inst : unit.blocks[b].code) {
2100 if (inst.op == Vinstr::copyargs) {
2101 auto const& uses = unit.tuples[inst.copyargs_.s];
2102 auto const& defs = unit.tuples[inst.copyargs_.d];
2103 for (unsigned i = 0, n = uses.size(); i < n; ++i) {
2104 lower(pos, uses[i], defs[i]);
2106 inst = nop{};
2107 } else if (inst.op == Vinstr::copy2) {
2108 lower(pos, inst.copy2_.s0, inst.copy2_.d0);
2109 lower(pos, inst.copy2_.s1, inst.copy2_.d1);
2110 inst = nop{};
2111 } else if (inst.op == Vinstr::copy) {
2112 lower(pos, inst.copy_.s, inst.copy_.d);
2113 inst = nop{};
2115 pos += 2;
2121 * Search for the phidef in block `b', then return its dest tuple.
2123 Vtuple findPhiDefs(const Vunit& unit, Vlabel b) {
2124 assertx(!unit.blocks[b].code.empty() &&
2125 unit.blocks[b].code.front().op == Vinstr::phidef);
2126 return unit.blocks[b].code.front().phidef_.defs;
2130 * Register copy resolutions for livein sets and phis.
2132 void resolveEdges(Vunit& unit, const VxlsContext& ctx,
2133 const jit::vector<Variable*>& variables,
2134 ResolutionPlan& resolution) {
2135 auto const addPhiEdgeCopies = [&] (Vlabel block, Vlabel target,
2136 uint32_t targetIndex,
2137 const VregList& uses) {
2138 auto const p1 = ctx.block_ranges[block].end - 2;
2139 auto const& defs = unit.tuples[findPhiDefs(unit, target)];
2141 for (unsigned i = 0, n = uses.size(); i < n; ++i) {
2142 auto v1 = variables[uses[i]];
2143 auto v2 = variables[defs[i]];
2144 assertx(v1 && v2);
2146 auto i1 = v1->fixed() ? v1->ivl() : v1->ivlAtUse(p1);
2147 auto i2 = v2->ivl();
2149 if (i2->reg != i1->reg) {
2150 EdgeKey edge { block, targetIndex };
2151 assertx(!resolution.edge_copies[edge][i2->reg]);
2152 resolution.edge_copies[edge][i2->reg] = i1;
2157 for (auto b1 : ctx.blocks) {
2158 auto const p1 = ctx.block_ranges[b1].end - 2;
2159 auto& block1 = unit.blocks[b1];
2160 auto& inst1 = block1.code.back();
2162 // Add resolutions for phis.
2163 if (inst1.op == Vinstr::phijmp) {
2164 auto const& phijmp = inst1.phijmp_;
2165 auto const target = phijmp.target;
2166 auto const& uses = unit.tuples[phijmp.uses];
2167 addPhiEdgeCopies(b1, target, 0, uses);
2168 inst1 = jmp{target};
2171 auto const succlist = succs(block1);
2173 // Add resolutions for livein sets.
2174 for (unsigned i = 0, n = succlist.size(); i < n; i++) {
2175 auto const b2 = succlist[i];
2176 auto const p2 = ctx.block_ranges[b2].start;
2178 forEach(ctx.livein[b2], [&] (Vreg vr) {
2179 auto var = variables[vr];
2180 if (var->fixed()) return;
2181 Interval* i1 = nullptr;
2182 Interval* i2 = nullptr;
2184 for (auto ivl = var->ivl(); ivl && !(i1 && i2); ivl = ivl->next) {
2185 if (ivl->covers(p1)) i1 = ivl;
2186 if (ivl->covers(p2)) i2 = ivl;
2189 // i2 can be unallocated if the tmp is a constant or is spilled.
2190 if (i2->reg != InvalidReg && i2->reg != i1->reg) {
2191 assertx((resolution.edge_copies[{b1,i}][i2->reg] == nullptr));
2192 resolution.edge_copies[{b1,i}][i2->reg] = i1;
2200 * Walk through the variables list and account for all points where copies or
2201 * spills need to be made.
2203 ResolutionPlan resolveLifetimes(Vunit& unit, const VxlsContext& ctx,
2204 const jit::vector<Variable*>& variables) {
2205 ResolutionPlan resolution;
2207 resolveSplits(ctx, variables, resolution);
2208 lowerCopies(unit, ctx, variables, resolution);
2209 resolveEdges(unit, ctx, variables, resolution);
2211 return resolution;
2214 ///////////////////////////////////////////////////////////////////////////////
2215 // Operand renaming.
2218 * Visitor class for renaming registers.
2220 struct Renamer {
2221 Renamer(const jit::vector<Variable*>& variables, unsigned pos)
2222 : variables(variables)
2223 , pos(pos)
2226 template <class T>
2227 void imm(const T& /*r*/) {}
2228 template<class R> void def(R& r) { rename(r); }
2229 template<class D, class H> void defHint(D& dst, H) { rename(dst); }
2230 template<class R> void use(R& r) { rename(r); }
2231 template<class S, class H> void useHint(S& src, H) { rename(src); }
2232 template<class R> void across(R& r) { rename(r); }
2233 void across(Vptr& m) {
2234 if (m.base.isValid()) rename(m.base);
2235 if (m.index.isValid()) rename(m.index);
2237 template<Width w> void across(Vp<w>& m) { across(static_cast<Vptr&>(m)); }
2239 void def(RegSet) {}
2240 void use(RegSet /*r*/) {}
2241 void use(Vptr& m) {
2242 if (m.base.isValid()) rename(m.base);
2243 if (m.index.isValid()) rename(m.index);
2245 template<Width w> void use(Vp<w>& m) { use(static_cast<Vptr&>(m)); }
2247 void use(VcallArgsId) { always_assert(false && "vcall unsupported in vxls"); }
2249 private:
2250 void rename(Vreg8& r) { r = lookup(r, Constraint::Gpr); }
2251 void rename(Vreg16& r) { r = lookup(r, Constraint::Gpr); }
2252 void rename(Vreg32& r) { r = lookup(r, Constraint::Gpr); }
2253 void rename(Vreg64& r) { r = lookup(r, Constraint::Gpr); }
2254 void rename(VregDbl& r) { r = lookup(r, Constraint::Simd); }
2255 void rename(Vreg128& r) { r = lookup(r, Constraint::Simd); }
2256 void rename(VregSF& r) { r = RegSF{0}; }
2257 void rename(Vreg& r) { r = lookup(r, Constraint::Any); }
2258 void rename(Vtuple /*t*/) { /* phijmp+phidef handled by resolveEdges */
2261 PhysReg lookup(Vreg vreg, Constraint kind) {
2262 auto var = variables[vreg];
2263 if (!var || vreg.isPhys()) return vreg;
2264 PhysReg reg = var->ivlAtUse(pos)->reg;
2265 assertx((kind == Constraint::Gpr && reg.isGP()) ||
2266 (kind == Constraint::Simd && reg.isSIMD()) ||
2267 (kind == Constraint::Sf && reg.isSF()) ||
2268 (kind == Constraint::Any && reg != InvalidReg));
2269 return reg;
2271 private:
2272 const jit::vector<Variable*>& variables;
2273 unsigned pos;
2277 * Visit every virtual-register typed operand in `unit', and rename it to its
2278 * assigned physical register.
2280 void renameOperands(Vunit& unit, const VxlsContext& ctx,
2281 const jit::vector<Variable*>& variables) {
2282 for (auto b : ctx.blocks) {
2283 auto pos = ctx.block_ranges[b].start;
2284 for (auto& inst : unit.blocks[b].code) {
2285 Renamer renamer(variables, pos);
2286 visitOperands(inst, renamer);
2287 pos += 2;
2290 ONTRACE(
2291 kVasmRegAllocDetailLevel,
2292 printVariables("after renaming operands", unit, ctx, variables);
2296 ///////////////////////////////////////////////////////////////////////////////
2297 // Copy insertion.
2300 * Insert stores for `spills' (with spill space starting at `slots') into
2301 * `code' before code[j], corresponding to XLS logical position `pos'.
2303 * Updates `j' to refer to the same instruction after the code insertions.
2305 void insertSpillsAt(jit::vector<Vinstr>& code, unsigned& j,
2306 const CopyPlan& spills, folly::Optional<MemoryRef> slots,
2307 unsigned pos) {
2308 jit::vector<Vinstr> stores;
2310 for (auto src : spills) {
2311 auto ivl = spills[src];
2312 if (!ivl) continue;
2314 always_assert_flog(slots, "Spilling before native sp is set (pos {})",
2315 pos);
2316 auto slot = ivl->var->slot;
2317 assertx(slot >= 0 && src == ivl->reg);
2318 MemoryRef ptr{slots->r + slotOffset(slot)};
2320 if (!ivl->var->wide) {
2321 always_assert_flog(!src.isSF(), "Tried to spill %flags");
2322 stores.emplace_back(store{src, ptr});
2323 } else {
2324 assertx(src.isSIMD());
2325 stores.emplace_back(storeups{src, ptr});
2328 insertCodeAt(code, j, stores, pos);
2332 * Insert reg-reg moves for `copies' into `code' before code[j], corresponding
2333 * to XLS logical position `pos'.
2335 * Updates `j' to refer to the same instruction after the code insertions.
2337 void insertCopiesAt(const VxlsContext& ctx,
2338 jit::vector<Vinstr>& code, unsigned& j,
2339 const CopyPlan& plan, unsigned pos) {
2340 MovePlan moves;
2341 jit::vector<Vinstr> copies;
2343 for_each_copy(plan, [&] (PhysReg dst, const Interval* ivl) {
2344 moves[dst] = ivl->reg;
2346 auto const hows = doRegMoves(moves, ctx.tmp);
2348 for (auto const& how : hows) {
2349 if (how.m_kind == MoveInfo::Kind::Xchg) {
2350 copies.emplace_back(copy2{how.m_src, how.m_dst, how.m_dst, how.m_src});
2351 } else {
2352 copies.emplace_back(copy{how.m_src, how.m_dst});
2355 insertCodeAt(code, j, copies, pos);
2359 * Insert constant loads or loads from spill space---with spill space starting
2360 * at `slots'---for `loads' into `code' before code[j], corresponding to XLS
2361 * logical position `pos'.
2363 * Updates `j' to refer to the same instruction after the code insertions.
2365 void insertLoadsAt(jit::vector<Vinstr>& code, unsigned& j,
2366 const CopyPlan& plan, folly::Optional<MemoryRef> slots,
2367 unsigned pos) {
2368 jit::vector<Vinstr> loads;
2370 for_each_load(plan, [&] (PhysReg dst, const Interval* ivl) {
2371 if (ivl->constant()) {
2372 if (ivl->var->val.isUndef) return;
2373 loads.push_back([&]() -> Vinstr {
2374 switch (ivl->var->val.kind) {
2375 case Vconst::Quad:
2376 case Vconst::Double:
2377 return ldimmq{uint64_t(ivl->var->val.val), dst};
2378 case Vconst::Long:
2379 return ldimml{int32_t(ivl->var->val.val), dst};
2380 case Vconst::Byte:
2381 return ldimmb{uint8_t(ivl->var->val.val), dst};
2383 not_reached();
2384 }());
2385 } else if (ivl->spilled()) {
2386 always_assert_flog(slots, "Reloading before native sp is set (pos {})",
2387 pos);
2388 MemoryRef ptr{slots->r + slotOffset(ivl->var->slot)};
2389 if (!ivl->var->wide) {
2390 loads.emplace_back(load{ptr, dst});
2391 } else {
2392 assertx(dst.isSIMD());
2393 loads.emplace_back(loadups{ptr, dst});
2397 insertCodeAt(code, j, loads, pos);
2401 * Mutate the Vinstr stream by inserting copies.
2403 * This destroys the position numbering, so we can't use interval positions
2404 * after this.
2406 void insertCopies(Vunit& unit, const VxlsContext& ctx,
2407 const jit::vector<Variable*>& /*variables*/,
2408 const ResolutionPlan& resolution) {
2409 auto const getSlots = [&] (folly::Optional<int> offset) {
2410 folly::Optional<MemoryRef> slots;
2411 if (offset) slots = ctx.sp[*offset];
2412 return slots;
2415 // Insert copies inside blocks.
2416 for (auto const b : ctx.blocks) {
2417 auto& block = unit.blocks[b];
2418 auto& code = block.code;
2419 auto pos = ctx.block_ranges[b].start;
2420 auto offset = ctx.spill_offsets[b];
2422 for (unsigned j = 0; j < code.size(); j++, pos += 2) {
2423 auto const slots = getSlots(offset);
2425 // Spills, reg-reg moves, and loads of constant values or spill space all
2426 // occur between instruction. Insert them in order.
2427 auto s = resolution.spills.find(pos - 1);
2428 if (s != resolution.spills.end()) {
2429 insertSpillsAt(code, j, s->second, slots, pos - 1);
2431 auto c = resolution.copies.find(pos - 1);
2432 if (c != resolution.copies.end()) {
2433 insertCopiesAt(ctx, code, j, c->second, pos - 1);
2434 insertLoadsAt(code, j, c->second, slots, pos - 1);
2437 // Insert copies and loads at instructions.
2438 c = resolution.copies.find(pos);
2439 if (c != resolution.copies.end()) {
2440 insertCopiesAt(ctx, code, j, c->second, pos);
2441 insertLoadsAt(code, j, c->second, slots, pos);
2443 assertx(resolution.spills.count(pos) == 0);
2444 if (code[j].op == Vinstr::recordbasenativesp) {
2445 assert_flog(!offset, "Block B{} Instr {} initiailizes native SP, but "
2446 "already initialized.", size_t(b), j);
2447 offset = 0;
2448 } else if (code[j].op == Vinstr::unrecordbasenativesp) {
2449 assert_flog(offset, "Block B{} Instr {} uninitiailizes native SP, but "
2450 "already uninitialized.", size_t(b), j);
2451 assert_flog(*offset == 0, "Block B{} Instr {} uninitiailizes native "
2452 "SP, but SP offset is non zero.", size_t(b), j);
2453 offset = folly::none;
2454 } else if (offset) {
2455 *offset -= spEffect(unit, code[j], ctx.sp);
2460 // insert copies on edges
2461 for (auto const b : ctx.blocks) {
2462 auto& block = unit.blocks[b];
2463 auto succlist = succs(block);
2465 if (succlist.size() == 1) {
2466 // copies will go at end of b
2467 auto const c = resolution.edge_copies.find({b, 0});
2468 if (c != resolution.edge_copies.end()) {
2469 auto& code = block.code;
2470 unsigned j = code.size() - 1;
2471 auto const pos = ctx.block_ranges[b].end - 1;
2472 auto const offset = ctx.spill_offsets[succlist[0]];
2473 auto const slots = getSlots(offset);
2475 // We interleave copies and loads in `edge_copies', so here and below
2476 // we process them separately (and pass `true' to avoid asserting).
2477 insertCopiesAt(ctx, code, j, c->second, pos);
2478 insertLoadsAt(code, j, c->second, slots, pos);
2480 } else {
2481 // copies will go at start of successor
2482 for (int i = 0, n = succlist.size(); i < n; i++) {
2483 auto s = succlist[i];
2484 auto const c = resolution.edge_copies.find({b, i});
2485 if (c != resolution.edge_copies.end()) {
2486 auto& code = unit.blocks[s].code;
2487 unsigned j = 0;
2488 auto const pos = ctx.block_ranges[s].start;
2489 auto const offset = ctx.spill_offsets[s];
2490 auto const slots = getSlots(offset);
2492 insertCopiesAt(ctx, code, j, c->second, pos);
2493 insertLoadsAt(code, j, c->second, slots, pos);
2500 ///////////////////////////////////////////////////////////////////////////////
2503 * Peephole cleanup pass.
2505 * Remove no-op copy sequences before allocating spill space, since doing so
2506 * might modify the CFG.
2508 void peephole(Vunit& unit, const VxlsContext& ctx) {
2509 // Whether a Vinstr is a register swap.
2510 auto const match_xchg = [] (Vinstr& i, Vreg& r0, Vreg& r1) {
2511 if (i.op != Vinstr::copy2) return false;
2512 r0 = i.copy2_.s0;
2513 r1 = i.copy2_.s1;
2514 return r0 == i.copy2_.d1 && r1 == i.copy2_.d0;
2517 for (auto b : ctx.blocks) {
2518 auto& code = unit.blocks[b].code;
2519 for (int i = 0, n = code.size(); i + 1 < n; i++) {
2520 Vreg r0, r1, r2, r3;
2521 if (match_xchg(code[i], r0, r1) &&
2522 match_xchg(code[i + 1], r2, r3) &&
2523 ((r0 == r2 && r1 == r3) || (r0 == r3 && r1 == r2))) {
2524 // matched xchg+xchg that cancel each other
2525 code[i] = nop{};
2526 code[i + 1] = nop{};
2527 i++;
2530 auto end = std::remove_if(code.begin(), code.end(), [&](Vinstr& inst) {
2531 return is_trivial_nop(inst) ||
2532 inst.op == Vinstr::phidef; // we lowered it
2534 code.erase(end, code.end());
2538 ///////////////////////////////////////////////////////////////////////////////
2539 // Spill space allocation.
2542 * SpillState is used by allocateSpillSpace() to decide where to allocate/free
2543 * spill space. It represents the state of the spill space as a whole and is
2544 * computed before each individual instruction.
2546 * Order is important in this enum: it's only legal to transition to states
2547 * with higher values, and states are merged using std::max().
2549 enum SpillState : uint8_t {
2550 // State is uninitialized. All block in-states start here.
2551 Uninit,
2553 // Spill space is not currently possible; we must allocate spill space after
2554 // this point.
2555 NoSpillPossible,
2557 // Spill space is not currently needed; it's safe to allocate spill space
2558 // after this point.
2559 NoSpill,
2561 // Spill space is needed and must be allocated at or before this point.
2562 NeedSpill,
2566 * SpillStates is used to hold in/out state for each block after the analysis
2567 * pass of allocateSpillSpace().
2569 struct SpillStates {
2570 SpillState in;
2571 SpillState out;
2572 bool changes;
2576 * Returns true if spill space must be allocated before execution of this
2577 * instruction. In order to keep things simple, we return true for any
2578 * instruction that reads or writes sp.
2580 bool instrNeedsSpill(const Vunit& unit, const Vinstr& inst, PhysReg sp) {
2581 // Implicit sp input/output.
2582 if (spEffect(unit, inst, sp, false) != 0) return true;
2584 auto foundSp = false;
2585 visitDefs(unit, inst, [&] (Vreg r) { if (r == sp) foundSp = true; });
2586 if (foundSp) return true;
2588 visitUses(unit, inst, [&] (Vreg r) { if (r == sp) foundSp = true; });
2589 return foundSp;
2593 * Return the required SpillState coming into inst. prevState must not be
2594 * Uninit.
2596 SpillState instrInState(const Vunit& unit, const Vinstr& inst,
2597 SpillState prevState, PhysReg sp) {
2598 switch (prevState) {
2599 case Uninit: break;
2601 case NoSpillPossible:
2602 if (inst.op == Vinstr::recordbasenativesp) return NoSpill;
2603 return NoSpillPossible;
2605 case NoSpill:
2606 if (inst.op == Vinstr::unrecordbasenativesp) return NoSpillPossible;
2607 if (instrNeedsSpill(unit, inst, sp)) return NeedSpill;
2608 return NoSpill;
2610 case NeedSpill:
2611 if (inst.op == Vinstr::unrecordbasenativesp) return NoSpillPossible;
2612 return NeedSpill;
2615 always_assert(false);
2619 * processSpillExits() can insert jcc{} instructions in the middle of a
2620 * block. fixupBlockJumps() breaks the given block after any jccs, making the
2621 * unit valid again. This is done as a separate pass from the work in
2622 * processSpillExits() to reduce complexity.
2624 void fixupBlockJumps(Vunit& unit, Vlabel label) {
2625 jit::vector<Vinstr> origCode;
2626 origCode.swap(unit.blocks[label].code);
2627 auto insertBlock = [&]() -> Vblock& { return unit.blocks[label]; };
2629 for (auto const& inst : origCode) {
2630 insertBlock().code.emplace_back(inst);
2632 if (inst.op == Vinstr::jcc && !inst.jcc_.targets[0].isValid()) {
2633 auto newLabel = unit.makeBlock(insertBlock().area_idx,
2634 insertBlock().weight);
2635 insertBlock().code.back().jcc_.targets[0] = newLabel;
2636 label = newLabel;
2642 * Walk through the given block, undoing any fallbackcc/bindjcc optimizations
2643 * that happen in an area where spill space is live. The latter transformation
2644 * is necessary to make the hidden edge out of the fallbackcc{} explicit, so we
2645 * can insert an lea on it to free spill space. It takes something like this:
2647 * B0:
2648 * cmpbim 0, %rbp[0x10] => %flags
2649 * fallbackcc CC_E, %flags, <SrcKey>
2650 * ...
2652 * and turns it into something like this:
2654 * B0:
2655 * cmpbim 0, %rbp[0x10] => %flags
2656 * jcc CC_E, %flags -> B3, else B2
2657 * B2:
2658 * ...
2659 * B3:
2660 * lea %rsp[0x20] => %rsp
2661 * fallback <SrcKey>
2663 void processSpillExits(Vunit& unit, Vlabel label, SpillState state,
2664 Vinstr free, PhysReg sp) {
2665 auto code = &unit.blocks[label].code;
2666 auto needFixup = false;
2668 for (unsigned i = 0, n = code->size(); i < n; ++i) {
2669 auto& inst = (*code)[i];
2670 state = instrInState(unit, inst, state, sp);
2672 if (state < NeedSpill ||
2673 (inst.op != Vinstr::fallbackcc &&
2674 inst.op != Vinstr::bindjcc &&
2675 inst.op != Vinstr::jcci)) {
2676 continue;
2679 FTRACE(3, "Breaking out {}: {}\n", label, show(unit, inst));
2680 auto target = unit.makeBlock(AreaIndex::Cold, 0);
2681 // makeBlock might reallocate unit.blocks
2682 code = &unit.blocks[label].code;
2684 auto& targetCode = unit.blocks[target].code;
2685 free.set_irctx(inst.irctx());
2686 ConditionCode cc;
2687 Vreg sf;
2688 if (inst.op == Vinstr::fallbackcc) {
2689 auto const& fb_i = inst.fallbackcc_;
2690 targetCode.emplace_back(free);
2691 targetCode.emplace_back(fallback{fb_i.target, fb_i.spOff, fb_i.args});
2692 cc = fb_i.cc;
2693 sf = fb_i.sf;
2694 } else if (inst.op == Vinstr::bindjcc) {
2695 auto const& bj_i = inst.bindjcc_;
2696 targetCode.emplace_back(free);
2697 targetCode.emplace_back(bindjmp{bj_i.target, bj_i.spOff, bj_i.args});
2698 cc = bj_i.cc;
2699 sf = bj_i.sf;
2700 } else /* inst.op == Vinstr::jcci */ {
2701 auto const& jcc_i = inst.jcci_;
2702 targetCode.emplace_back(free);
2703 targetCode.emplace_back(jmpi{jcc_i.taken});
2704 cc = jcc_i.cc;
2705 sf = jcc_i.sf;
2707 targetCode.back().set_irctx(inst.irctx());
2709 // Next is set to an invalid block that will be fixed up once we're done
2710 // iterating through the original block.
2711 inst = jcc{cc, sf, {Vlabel{}, target}};
2712 needFixup = true;
2715 if (needFixup) fixupBlockJumps(unit, label);
2719 * Merge src into dst, returning true iff dst was changed.
2721 bool mergeSpillStates(SpillState& dst, SpillState src) {
2722 assertx(src != Uninit);
2723 if (dst == src) return false;
2725 // The only permitted merges result in a state with higher values. This
2726 // holds because we can't merge a NoSpillPossible with a non NoSpillPossible
2727 // since that would mean a program point both can and can't have spills.
2728 assertx(IMPLIES(src != NoSpillPossible, dst != NoSpillPossible));
2729 auto const oldDst = dst;
2730 dst = std::max(dst, src);
2731 return dst != oldDst;
2734 std::default_random_engine s_stressRand(0xfaceb00c);
2735 std::uniform_int_distribution<int> s_stressDist(1,7);
2738 * If the current unit used any spill slots, allocate and free spill space
2739 * where appropriate. Spill space is allocated right before it's needed and
2740 * freed before any instruction that exits the unit, which is any block-ending
2741 * instruction with no successors in the unit. fallbackcc{} and bindjcc{}
2742 * instructions have hidden edges that exit the unit, and if we encounter one
2743 * while spill space is live, we have to make that exit edge explicit to insert
2744 * code on it (see processSpillExits()). This makes the exit path more
2745 * expensive, so we try to allocate spill space as late as possible to avoid
2746 * pessimising fallbackcc/bindjcc instructions unless it's really
2747 * necessary. The algorithm uses two passes:
2749 * Analysis:
2750 * - For each block in RPO:
2751 * - Load in-state, which has been populated by at least one predecessor
2752 * (or manually set to NoSpillPossible for the entry block).
2753 * - Analyze each instruction in the block, determining what state the
2754 * spill space must be in before executing it.
2755 * - Record out-state for the block and propagate to successors. If this
2756 * changes the in-state for any of them, enqueue them for (re)processing.
2758 * Mutation:
2759 * - For each block (we use RPO to only visit reachable blocks but order
2760 * doesn't matter):
2761 * - Inspect the block's in-state and out-state:
2762 * - NoSpill in: Walk the block to see if we need to allocate spill space
2763 * before any instructions.
2764 * - NoSpill out: Allocate spill space on any edges to successors with
2765 * NeedSpill in-states. Also walk block and check for deallocation of
2766 * spill space.
2767 * - NeedSpill out: If the block has no in-unit successors, free spill
2768 * space before the block-end instruction.
2769 * - != NoSpill out: Look for any fallbackcc/bindjcc instructions,
2770 * deoptimizing as appropriate (see processSpillExits()).
2772 void allocateSpillSpace(Vunit& unit, const VxlsContext& ctx,
2773 SpillInfo& spi) {
2774 if (spi.used_spill_slots == 0) return;
2775 Timer t(Timer::vasm_reg_alloc_spill, unit.log_entry);
2777 // Make sure we always allocate spill space in multiples of 16 bytes, to keep
2778 // alignment straightforward.
2779 if (spi.used_spill_slots % 2) spi.used_spill_slots++;
2780 FTRACE(1, "Allocating {} spill slots\n", spi.used_spill_slots);
2782 auto const spillSize = safe_cast<int32_t>(slotOffset(spi.used_spill_slots));
2783 // Pointer manipulation is traditionally done with lea, and it's safe to
2784 // insert even where flags might be live.
2785 Vinstr alloc = lea{ctx.sp[-spillSize], ctx.sp};
2786 Vinstr free = lea{ctx.sp[spillSize], ctx.sp};
2788 jit::vector<uint32_t> rpoIds(unit.blocks.size());
2789 for (uint32_t i = 0; i < ctx.blocks.size(); ++i) rpoIds[ctx.blocks[i]] = i;
2791 jit::vector<SpillStates> states(unit.blocks.size(), {Uninit, Uninit, false});
2792 states[unit.entry].in = NoSpillPossible;
2793 dataflow_worklist<uint32_t> worklist(unit.blocks.size());
2794 worklist.push(0);
2796 // Walk the blocks in rpo. At the end of each block, propagate its out-state
2797 // to successors, adding them to the worklist if their in-state
2798 // changes. Blocks may be visited multiple times if loops are present.
2799 while (!worklist.empty()) {
2800 auto const label = ctx.blocks[worklist.pop()];
2801 auto const& block = unit.blocks[label];
2802 auto const stateIn = states[label].in;
2803 auto state = stateIn;
2805 for (auto& inst : block.code) {
2806 state = instrInState(unit, inst, state, ctx.sp);
2807 if (state != stateIn) states[label].changes = true;
2809 states[label].out = state;
2811 for (auto s : succs(block)) {
2812 if (mergeSpillStates(states[s].in, state)) {
2813 worklist.push(rpoIds[s]);
2818 // Do a single mutation pass over the blocks.
2819 for (auto const label : ctx.blocks) {
2820 auto state = states[label];
2821 auto& block = unit.blocks[label];
2823 // Any block with a non NeedSpill in-state might have an instruction in it
2824 // that needs spill space, which we allocate right before the instruction
2825 // in question.
2826 if (state.in != NeedSpill && (state.out == NeedSpill || state.changes)) {
2827 auto blockState = state.in;
2828 for (auto it = block.code.begin(); it != block.code.end(); ++it) {
2829 blockState = instrInState(unit, *it, blockState, ctx.sp);
2830 if (blockState == NeedSpill) {
2831 FTRACE(3, "alloc spill before {}: {}\n", label, show(unit, *it));
2832 alloc.set_irctx(it->irctx());
2833 block.code.insert(it, alloc);
2834 break;
2839 // Allocate spill space on edges from a NoSpill out-state to a NeedSpill
2840 // in-state.
2841 auto const successors = succs(block);
2842 if (state.out == NoSpill || state.out == NoSpillPossible) {
2843 for (auto s : successors) {
2844 if (states[s].in == NeedSpill) {
2845 assertx(state.out != NoSpillPossible);
2846 FTRACE(3, "alloc spill on edge from {} -> {}\n", label, s);
2847 auto it = std::prev(block.code.end());
2848 alloc.set_irctx(it->irctx());
2849 block.code.insert(it, alloc);
2854 // Any block that unrecords the base native sp must have the spill space
2855 // removed at the appropriate point.
2856 if ((state.in == NeedSpill || state.changes) && state.out != NeedSpill) {
2857 auto blockState = state.in;
2858 for (auto it = block.code.begin(); it != block.code.end(); ++it) {
2859 blockState = instrInState(unit, *it, blockState, ctx.sp);
2860 if (blockState == NoSpillPossible) {
2861 FTRACE(3, "free spill before {}: {}\n", label, show(unit, *it));
2862 free.set_irctx(it->irctx());
2863 block.code.insert(it, free);
2864 break;
2869 // Any block with a NeedSpill out-state and no successors must free spill
2870 // space right before the block-end instruction. We ignore trap so spill
2871 // space is still allocated in core files.
2872 if (state.out == NeedSpill && successors.empty() &&
2873 block.code.back().op != Vinstr::trap) {
2874 auto it = std::prev(block.code.end());
2875 FTRACE(3, "free spill before {}: {}\n", label, show(unit, (*it)));
2876 free.set_irctx(it->irctx());
2877 block.code.insert(it, free);
2880 // Any block that ends with NeedSpill needs to be walked to look for places
2881 // to free spill space.
2882 if (state.out == NeedSpill) {
2883 processSpillExits(unit, label, state.in, free, ctx.sp);
2888 ///////////////////////////////////////////////////////////////////////////////
2889 // Printing.
2891 std::string Interval::toString() const {
2892 std::ostringstream out;
2893 auto delim = "";
2894 if (reg != InvalidReg) {
2895 out << show(reg);
2896 delim = " ";
2898 if (constant()) {
2899 out << delim << folly::format("#{:08x}", var->val.val);
2901 if (var->slot >= 0) {
2902 out << delim << folly::format("[%sp+{}]", slotOffset(var->slot));
2904 delim = "";
2905 out << " [";
2906 for (auto const& r : ranges) {
2907 out << delim << folly::format("{}-{}", r.start, r.end);
2908 delim = ",";
2910 out << ") {";
2911 delim = "";
2912 for (auto const& u : uses) {
2913 if (u.pos == var->def_pos) {
2914 if (u.hint.isValid()) {
2915 out << delim << "@" << u.pos << "=" << show(u.hint);
2916 } else {
2917 out << delim << "@" << u.pos << "=";
2919 } else {
2920 auto hint_delim = u.kind == Constraint::CopySrc ? "=?" : "=@";
2921 if (u.hint.isValid()) {
2922 out << delim << show(u.hint) << hint_delim << u.pos;
2923 } else {
2924 out << delim << hint_delim << u.pos;
2927 delim = ",";
2929 out << "}";
2930 return out.str();
2933 DEBUG_ONLY void dumpVariables(const jit::vector<Variable*>& variables,
2934 unsigned num_spills) {
2935 Trace::traceRelease("Spills %u\n", num_spills);
2936 for (auto var : variables) {
2937 if (!var || var->fixed()) continue;
2938 Trace::traceRelease("%%%-2lu %s\n", size_t(var->vreg),
2939 var->ivl()->toString().c_str());
2940 for (auto ivl = var->ivl()->next; ivl; ivl = ivl->next) {
2941 Trace::traceRelease(" %s\n", ivl->toString().c_str());
2946 auto const ignore_reserved = !getenv("XLS_SHOW_RESERVED");
2947 auto const collapse_fixed = !getenv("XLS_SHOW_FIXED");
2949 enum Mode { Light, Heavy };
2951 template<class Pred>
2952 const char* draw(Variable* var, unsigned pos, Mode m, Pred covers) {
2953 // Light Heavy
2954 static const char* top[] = { u8"\u2575", u8"\u2579" };
2955 static const char* bottom[] = { u8"\u2577", u8"\u257B" };
2956 static const char* both[] = { u8"\u2502", u8"\u2503" };
2957 static const char* empty[] = { " ", " " };
2958 auto f = [&](unsigned position) {
2959 if (!var) return false;
2960 for (auto ivl = var->ivl(); ivl; ivl = ivl->next) {
2961 if (covers(ivl, position)) return true;
2963 return false;
2966 auto s = f(pos);
2967 auto d = pos % 2 == 1 ? s : f(pos + 1);
2968 return ( s && !d) ? top[m] :
2969 ( s && d) ? both[m] :
2970 (!s && d) ? bottom[m] :
2971 empty[m];
2974 DEBUG_ONLY void printInstr(std::ostringstream& str,
2975 const Vunit& unit, const VxlsContext& ctx,
2976 const jit::vector<Variable*>& variables,
2977 const Vinstr& inst, Vlabel b) {
2978 bool fixed_covers[2] = { false, false };
2979 Variable* fixed = nullptr;
2980 for (auto var : variables) {
2981 if (!var) continue;
2982 if (var->fixed()) {
2983 if (ignore_reserved && !ctx.abi.unreserved().contains(var->vreg)) {
2984 continue;
2986 if (collapse_fixed) {
2987 fixed = var; // can be any.
2988 fixed_covers[0] |= var->ivl()->covers(inst.id);
2989 fixed_covers[1] |= var->ivl()->covers(inst.id + 1);
2990 continue;
2993 str << " ";
2994 str << draw(var, inst.id, Light, [&](Interval* child, unsigned p) {
2995 return child->covers(p);
2997 str << draw(var, inst.id, Heavy, [&](Interval* child, unsigned p) {
2998 return child->usedAt(p);
3001 str << " " << draw(fixed, inst.id, Heavy, [&](Interval*, unsigned p) {
3002 assertx(p - inst.id < 2);
3003 return fixed_covers[p - inst.id];
3005 if (inst.id == ctx.block_ranges[b].start) {
3006 str << folly::format(" B{: <3}", size_t(b));
3007 } else {
3008 str << " ";
3010 str << folly::format(" {: <3} ", inst.id) << show(unit, inst) << "\n";
3013 DEBUG_ONLY void printVariables(const char* caption,
3014 const Vunit& unit, const VxlsContext& ctx,
3015 const jit::vector<Variable*>& variables) {
3016 std::ostringstream str;
3017 str << "Intervals " << caption << " " << ctx.counter << "\n";
3018 for (auto var : variables) {
3019 if (!var) continue;
3020 if (var->fixed()) {
3021 if (ignore_reserved && !ctx.abi.unreserved().contains(var->vreg)) {
3022 continue;
3024 if (collapse_fixed) {
3025 continue;
3028 str << folly::format(" {: <2}", size_t(var->vreg));
3030 str << " FX\n";
3031 for (auto b : ctx.blocks) {
3032 for (auto& inst : unit.blocks[b].code) {
3033 printInstr(str, unit, ctx, variables, inst, b);
3036 HPHP::Trace::traceRelease("%s\n", str.str().c_str());
3039 ///////////////////////////////////////////////////////////////////////////////
3041 struct XLSStats {
3042 size_t instrs{0}; // total instruction count after xls
3043 size_t moves{0}; // number of movs inserted
3044 size_t loads{0}; // number of loads inserted
3045 size_t spills{0}; // number of spill-stores inserted
3046 size_t consts{0}; // number of const-loads inserted
3047 size_t total_copies{0}; // moves+loads+spills
3050 void dumpStats(const Vunit& unit, const ResolutionPlan& resolution) {
3051 XLSStats stats;
3053 for (auto const& blk : unit.blocks) {
3054 stats.instrs += blk.code.size();
3056 for (auto const& kv : resolution.spills) {
3057 auto const& spills = kv.second;
3058 for (auto const r : spills) {
3059 if (spills[r]) ++stats.spills;
3063 auto const count_copies = [&] (const CopyPlan& plan) {
3064 for_each_copy(plan, [&] (PhysReg, const Interval*) {
3065 ++stats.moves;
3067 for_each_load(plan, [&] (PhysReg, const Interval* ivl) {
3068 ++(ivl->spilled() ? stats.loads : stats.consts);
3071 for (auto const& kv : resolution.copies) count_copies(kv.second);
3072 for (auto const& kv : resolution.edge_copies) count_copies(kv.second);
3074 stats.total_copies = stats.moves + stats.loads + stats.spills;
3076 FTRACE_MOD(
3077 Trace::xls_stats, 1,
3078 "XLS stats:\n"
3079 " instrs: {}\n"
3080 " moves: {}\n"
3081 " loads: {}\n"
3082 " spills: {}\n"
3083 " consts: {}\n"
3084 " total copies: {}\n",
3085 stats.instrs, stats.moves, stats.loads, stats.spills,
3086 stats.consts, stats.total_copies
3089 if (auto entry = unit.log_entry) {
3090 entry->setInt("xls_instrs", stats.instrs);
3091 entry->setInt("xls_moves", stats.moves);
3092 entry->setInt("xls_loads", stats.loads);
3093 entry->setInt("xls_spills", stats.spills);
3094 entry->setInt("xls_consts", stats.consts);
3098 ///////////////////////////////////////////////////////////////////////////////
3101 void allocateRegistersWithXLS(Vunit& unit, const Abi& abi) {
3102 Timer timer(Timer::vasm_reg_alloc, unit.log_entry);
3103 auto const counter = s_counter.fetch_add(1, std::memory_order_relaxed);
3105 splitCriticalEdges(unit);
3106 assertx(check(unit));
3108 // Analysis passes.
3109 VxlsContext ctx{abi};
3110 ctx.blocks = sortBlocks(unit);
3111 ctx.block_ranges = computePositions(unit, ctx.blocks);
3112 ctx.spill_offsets = analyzeSP(unit, ctx.blocks, ctx.sp);
3113 ctx.livein = computeLiveness(unit, ctx.abi, ctx.blocks);
3114 ctx.counter = counter;
3116 // Build lifetime intervals and analyze hints.
3117 auto variables = buildIntervals(unit, ctx);
3118 auto const hint_info = analyzeHints(unit, ctx, variables);
3120 // Perform register allocation.
3121 auto spill_info = assignRegisters(ctx, variables, hint_info);
3123 ONTRACE(kVasmRegAllocDetailLevel,
3124 dumpVariables(variables, spill_info.num_spills));
3126 // Insert lifetime-resolving copies, spills, and rematerializations, and
3127 // replace the Vreg operands in the Vinstr stream with the assigned PhysRegs.
3128 auto const resolution = resolveLifetimes(unit, ctx, variables);
3129 renameOperands(unit, ctx, variables);
3130 insertCopies(unit, ctx, variables, resolution);
3132 ONTRACE(kVasmRegAllocDetailLevel,
3133 dumpVariables(variables, spill_info.num_spills);
3134 printVariables("after inserting copies", unit, ctx, variables);
3137 peephole(unit, ctx);
3139 // Insert instructions for creating spill space.
3140 allocateSpillSpace(unit, ctx, spill_info);
3141 if (auto entry = unit.log_entry) {
3142 entry->setInt("num_spills", spill_info.num_spills);
3143 entry->setInt("used_spill_slots", spill_info.used_spill_slots);
3146 printUnit(kVasmRegAllocLevel, "after vasm-xls", unit);
3147 if (HPHP::Trace::moduleEnabled(Trace::xls_stats, 1) || unit.log_entry) {
3148 dumpStats(unit, resolution);
3151 // Free the variable metadata.
3152 for (auto var : variables) {
3153 if (!var) continue;
3154 for (Interval* ivl = var->ivl()->next, *next = nullptr; ivl; ivl = next) {
3155 next = ivl->next;
3156 jit::destroy(ivl);
3158 Variable::destroy(var);
3162 ///////////////////////////////////////////////////////////////////////////////