emit file/line with ud2
[hiphop-php.git] / hphp / runtime / vm / jit / vasm-xls.cpp
blob055bd146377b31268608dbd86f1fc0e15d32a8e5
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/vasm.h"
19 #include "hphp/runtime/base/stats.h"
21 #include "hphp/runtime/vm/jit/abi.h"
22 #include "hphp/runtime/vm/jit/print.h"
23 #include "hphp/runtime/vm/jit/punt.h"
24 #include "hphp/runtime/vm/jit/reg-algorithms.h"
25 #include "hphp/runtime/vm/jit/timer.h"
26 #include "hphp/runtime/vm/jit/vasm-instr.h"
27 #include "hphp/runtime/vm/jit/vasm-print.h"
28 #include "hphp/runtime/vm/jit/vasm-reg.h"
29 #include "hphp/runtime/vm/jit/vasm-unit.h"
30 #include "hphp/runtime/vm/jit/vasm-util.h"
31 #include "hphp/runtime/vm/jit/vasm-visit.h"
33 #include "hphp/util/arch.h"
34 #include "hphp/util/assertions.h"
35 #include "hphp/util/dataflow-worklist.h"
36 #include "hphp/util/trace.h"
38 #include "hphp/ppc64-asm/asm-ppc64.h"
40 #include <boost/dynamic_bitset.hpp>
41 #include <boost/range/adaptor/reversed.hpp>
42 #include <folly/Format.h>
43 #include <folly/Optional.h>
45 #include <algorithm>
46 #include <random>
48 // future work
49 // - #3098509 streamline code, vectors vs linked lists, etc
50 // - #3098685 Optimize lifetime splitting
51 // - #3098739 new features now possible with XLS
53 TRACE_SET_MOD(xls);
55 namespace HPHP { namespace jit {
56 ///////////////////////////////////////////////////////////////////////////////
58 namespace {
59 ///////////////////////////////////////////////////////////////////////////////
61 std::atomic<size_t> s_counter;
63 DEBUG_ONLY constexpr auto kHintLevel = 5;
66 * Vreg discriminator.
68 enum class Constraint { Any, CopySrc, Gpr, Simd, Sf };
70 Constraint constraint(const Vreg&) { return Constraint::Any; }
71 Constraint constraint(const Vreg64&) { return Constraint::Gpr; }
72 Constraint constraint(const Vreg32&) { return Constraint::Gpr; }
73 Constraint constraint(const Vreg16&) { return Constraint::Gpr; }
74 Constraint constraint(const Vreg8&) { return Constraint::Gpr; }
75 Constraint constraint(const VregDbl&) { return Constraint::Simd; }
76 Constraint constraint(const Vreg128&) { return Constraint::Simd; }
77 Constraint constraint(const VregSF&) { return Constraint::Sf; }
79 bool is_wide(const Vreg128&) { return true; }
80 template<class T> bool is_wide(const T&) { return false; }
83 * Sentinel constants.
85 constexpr int kInvalidPhiGroup = -1;
86 constexpr int kInvalidSpillSlot = -1;
87 const unsigned kMaxPos = UINT_MAX; // "infinity" use position
90 * A Use refers to the position where an interval is used or defined.
92 struct Use {
93 Use() {}
94 Use(Constraint kind, unsigned pos, Vreg hint)
95 : kind(kind)
96 , pos(pos)
97 , hint(hint)
101 * Constraint imposed on the Vreg at this use.
103 Constraint kind{Constraint::Any};
105 * Position of this use or def.
107 unsigned pos{kMaxPos};
109 * If valid, try to assign the same physical register here as `hint' was
110 * assigned at `pos'. The `alt_hint' is for phijcc.
112 Vreg hint;
113 Vreg alt_hint;
115 * Index of phi group metadata.
117 int phi_group{kInvalidPhiGroup};
121 * A LiveRange is an closed-open range of positions where an interval is live.
123 * Specifically, for the LiveRange [start, end), start is in the range and
124 * end is not.
126 struct LiveRange {
127 bool contains(unsigned pos) const { return pos >= start && pos < end; }
128 bool intersects(LiveRange r) const { return r.start < end && start < r.end; }
129 bool contains(LiveRange r) const { return r.start >= start && r.end <= end; }
130 public:
131 unsigned start, end;
134 struct Variable;
137 * An Interval represents the lifetime of a Vreg as a sorted list of disjoint
138 * ranges and a sorted list of use positions. It is the unit of register
139 * allocation.
141 * Intervals may be split---e.g., because the Vreg needed to be spilled in some
142 * subrange. All (sub-)Intervals for a given Vreg are connected as a singly
143 * linked list, sorted by start position.
145 * Every use position must be inside one of the ranges, or exactly at the end
146 * of the last range. Allowing a use exactly at the end facilitates lifetime
147 * splitting when the use at the position of an instruction clobbers registers
148 * as a side effect, e.g. a call.
150 * The intuition for allowing uses at the end of an Interval is that, in truth,
151 * the picture at a given position looks like this:
153 * | [s]
155 * +-----|-------------+ copy{s, d} <-+
156 * | v | |
157 * + - - - - - - - - - + +--- position n
158 * | | | |
159 * +-------------|-----+ <-+
161 * [d] v
163 * We represent an instruction with a single position `n'. All the use(s) and
164 * def(s) of that instruction are live at some point within it, but their
165 * lifetimes nonetheless do not overlap. Since we don't represent instructions
166 * using two position numbers, instead, we allow uses on the open end side of
167 * Intervals, because they don't actually conflict with, e.g., a def of another
168 * Interval that starts at the same position.
170 struct Interval {
171 explicit Interval(Variable* var) : var(var) {}
173 std::string toString() const;
176 * Endpoints. Intervals are [closed, open), just like LiveRanges.
178 unsigned start() const { return ranges.front().start; }
179 unsigned end() const { return ranges.back().end; }
182 * Head of this Interval chain.
184 const Interval* leader() const;
185 Interval* leader();
188 * Register allocation state.
190 bool live() const;
191 bool constant() const;
192 bool spilled() const;
193 bool fixed() const;
196 * Split this interval at `pos', returning the new `this->next'.
198 * If `keep_uses' is set, uses exactly at the end of the first interval will
199 * stay with the first split (rather than the second).
201 * @requires: pos > start() && pos < end(); this ensures that both
202 * subintervals are nonempty.
204 Interval* split(unsigned pos, bool keep_uses = false);
206 /////////////////////////////////////////////////////////////////////////////
207 // Queries.
209 // These operate only on `this' and not its children.
212 * Get the index of the first range or use that is not strictly lower than
213 * `pos' (i.e., which contains/is at `pos' or is strictly higher than `pos').
215 unsigned findRange(unsigned pos) const;
216 unsigned findUse(unsigned pos) const;
219 * Whether there is a range that includes `pos', or a use at `pos'.
221 bool covers(unsigned pos) const;
222 bool usedAt(unsigned pos) const;
225 * The position of a use [relative to `pos'] that requires a register (i.e.,
226 * CopySrc uses are ignored).
228 * firstUseAfter: The first use >= `pos', kMaxPos if there are no more uses.
229 * lastUseBefore: The first use <= `pos'; 0 if the first use is after `pos'.
230 * firstUse: The first use in `this'.
232 unsigned firstUseAfter(unsigned pos) const;
233 unsigned lastUseBefore(unsigned pos) const;
234 unsigned firstUse() const;
236 public:
237 // The Variable whose (sub-)lifetime this (sub-)interval represents.
238 Variable* const var;
240 // Pointer to the next subinterval.
241 Interval* next{nullptr};
243 // Live ranges and use positions.
244 jit::vector<LiveRange> ranges;
245 jit::vector<Use> uses;
247 // The physical register assigned to this subinterval.
248 PhysReg reg;
252 * Metadata about a variable which is the object of register allocation.
254 * Each Variable represents a Vreg in the Vunit we're performing register
255 * allocation on---including lifetime ranges, uses, def information, etc.
257 struct Variable {
258 explicit Variable(Vreg r) : vreg(r) {}
260 static Variable* create(Vreg r);
261 static void destroy(Variable* var);
264 * Accessors.
266 Interval* ivl() const { return (Interval*)(this + 1); }
267 bool fixed() const { return vreg.isPhys(); }
270 * Return the subinterval which covers or has a use at `pos', else nullptr if
271 * no subinterval does.
273 Interval* ivlAt(unsigned pos);
274 Interval* ivlAtUse(unsigned pos);
276 public:
277 // The Vreg this value represents.
278 const Vreg vreg;
280 // Whether this is a SIMD value.
281 bool wide{false};
283 // Whether this variable is a constant, and its constant value.
284 bool constant{false};
285 Vconst val;
287 // The spill slot assigned to this value. Since we don't spill physical
288 // registers, and Vregs are SSA, a value's assigned slot never changes.
289 int slot{kInvalidSpillSlot};
291 // Position of, and block containing, the variable's single def instruction;
292 // invalid for physical registers.
293 unsigned def_pos{kMaxPos};
294 Vlabel def_block;
296 // Copy of the Vreg's def instruction.
297 Vinstr def_inst;
301 * Bitset of Vreg numbers.
303 using LiveSet = boost::dynamic_bitset<>;
305 template<class Fn> void forEach(const LiveSet& bits, Fn fn) {
306 for (auto i = bits.find_first(); i != bits.npos; i = bits.find_next(i)) {
307 fn(Vreg(i));
312 * Sack of inputs and pre-computed data used by the main XLS algorithm.
314 struct VxlsContext {
315 explicit VxlsContext(const Abi& abi)
316 : abi(abi)
317 , sp(rsp())
319 switch (arch()) {
320 case Arch::X64:
321 tmp = reg::xmm15; // reserve xmm15 to break shuffle cycles
322 break;
323 case Arch::ARM:
324 tmp = vixl::x17; // also used as tmp1 by MacroAssembler
325 break;
326 case Arch::PPC64:
327 tmp = ppc64_asm::reg::v29;
328 break;
330 this->abi.simdUnreserved -= tmp;
331 this->abi.simdReserved |= tmp;
332 assertx(!abi.gpUnreserved.contains(sp));
333 assertx(!abi.gpUnreserved.contains(tmp));
336 public:
337 Abi abi;
338 // Arch-dependent stack pointer.
339 PhysReg sp;
340 // Temp register used only for breaking cycles.
341 PhysReg tmp;
342 // Debug-only run identifier.
343 size_t counter;
345 // Sorted blocks.
346 jit::vector<Vlabel> blocks;
347 // [start,end) position of each block.
348 jit::vector<LiveRange> block_ranges;
349 // Per-block sp[offset] to spill-slots.
350 jit::vector<int> spill_offsets;
351 // Per-block live-in sets.
352 jit::vector<LiveSet> livein;
355 ///////////////////////////////////////////////////////////////////////////////
356 // Interval.
358 const Interval* Interval::leader() const { return var->ivl(); }
359 Interval* Interval::leader() { return var->ivl(); }
361 bool Interval::live() const { return reg != InvalidReg; }
362 bool Interval::constant() const { return var->constant; }
363 bool Interval::spilled() const { return !live() && var->slot >= 0; }
364 bool Interval::fixed() const { return var->fixed(); }
366 unsigned Interval::findRange(unsigned pos) const {
367 unsigned lo = 0;
368 for (unsigned hi = ranges.size(); lo < hi;) {
369 auto mid = (lo + hi) / 2;
370 auto r = ranges[mid];
371 if (pos < r.start) {
372 hi = mid;
373 } else if (r.end <= pos) {
374 lo = mid + 1;
375 } else {
376 return mid;
379 assertx(lo == ranges.size() || pos < ranges[lo].start);
380 return lo;
383 unsigned Interval::findUse(unsigned pos) const {
384 unsigned lo = 0, hi = uses.size();
385 while (lo < hi) {
386 auto mid = (lo + hi) / 2;
387 auto u = uses[mid].pos;
388 if (pos < u) {
389 hi = mid;
390 } else if (u < pos) {
391 lo = mid + 1;
392 } else {
393 return mid;
396 assertx(lo == uses.size() || pos < uses[lo].pos);
397 return lo;
400 bool Interval::covers(unsigned pos) const {
401 if (pos < start() || pos >= end()) return false;
402 auto i = ranges.begin() + findRange(pos);
403 return i != ranges.end() && i->contains(pos);
406 bool Interval::usedAt(unsigned pos) const {
407 if (pos < start() || pos > end()) return false;
408 auto i = uses.begin() + findUse(pos);
409 return i != uses.end() && pos == i->pos;
412 unsigned Interval::firstUseAfter(unsigned pos) const {
413 for (auto& u : uses) {
414 if (u.kind == Constraint::CopySrc) continue;
415 if (u.pos >= pos) return u.pos;
417 return kMaxPos;
420 unsigned Interval::lastUseBefore(unsigned pos) const {
421 auto prev = 0;
422 for (auto& u : uses) {
423 if (u.kind == Constraint::CopySrc) continue;
424 if (u.pos > pos) return prev;
425 prev = u.pos;
427 return prev;
430 unsigned Interval::firstUse() const {
431 for (auto& u : uses) {
432 if (u.kind != Constraint::CopySrc) return u.pos;
434 return kMaxPos;
437 Interval* Interval::split(unsigned pos, bool keep_uses) {
438 assertx(pos > start() && pos < end()); // both parts will be non-empty
440 auto child = jit::make<Interval>(var);
441 child->next = next;
442 next = child;
444 // advance r1 to the first range we want in child; maybe split a range.
445 auto r1 = ranges.begin() + findRange(pos);
446 if (pos > r1->start) { // split r at pos
447 child->ranges.push_back({pos, r1->end});
448 r1->end = pos;
449 r1++;
451 child->ranges.insert(child->ranges.end(), r1, ranges.end());
452 ranges.erase(r1, ranges.end());
454 // advance u1 to the first use position in child, then copy u1..end to child.
455 auto u1 = uses.begin() + findUse(end());
456 auto u2 = uses.end();
457 if (keep_uses) {
458 while (u1 != u2 && u1->pos <= end()) u1++;
459 } else {
460 while (u1 != u2 && u1->pos < child->start()) u1++;
462 child->uses.insert(child->uses.end(), u1, u2);
463 uses.erase(u1, u2);
465 return child;
468 ///////////////////////////////////////////////////////////////////////////////
469 // Variable.
471 Variable* Variable::create(Vreg r) {
472 auto const mem = malloc(sizeof(Variable) + sizeof(Interval));
473 auto const var = new (mem) Variable(r);
474 new (reinterpret_cast<void*>(var + 1)) Interval(var);
475 return var;
478 void Variable::destroy(Variable* var) {
479 var->~Variable();
480 auto ivl = reinterpret_cast<Interval*>(var + 1);
481 ivl->~Interval();
482 free(var);
485 Interval* Variable::ivlAt(unsigned pos) {
486 for (auto ivl = this->ivl(); ivl; ivl = ivl->next) {
487 if (pos < ivl->start()) return nullptr;
488 if (ivl->covers(pos)) return ivl;
490 return nullptr;
493 Interval* Variable::ivlAtUse(unsigned pos) {
494 for (auto ivl = this->ivl(); ivl; ivl = ivl->next) {
495 if (pos < ivl->start()) return nullptr;
496 if (ivl->usedAt(pos)) return ivl;
498 return nullptr;
501 ///////////////////////////////////////////////////////////////////////////////
504 * Extended Linear Scan is based on Wimmer & Franz "Linear Scan Register
505 * Allocation on SSA Form". As currently written, it also works on non-ssa
506 * input.
508 * 1. Sort blocks such that all predecessors of B come before B, except
509 * loop-edge predecessors. If the input IR is in SSA form, this also
510 * implies the definition of each SSATmp comes before all uses.
512 * 2. Assign an even numbered position to every instruction. Positions
513 * between instructions are used to insert copies and spills. Each block
514 * starts with an extra position number that corresponds to an imaginary
515 * "label" instruction that is not physically in the vasm IR.
517 * 3. Create one interval I for each Vreg R that requires register allocation,
518 * by iterating blocks and instructions in reverse order, computing live
519 * registers as we go. Each interval consists of a sorted list of disjoint,
520 * live ranges covering the positions where R must be in a physical register
521 * or spill slot. Vregs that are constants or have forced registers
522 * (e.g. VmSp) are skipped. If the input is SSA, the start position of each
523 * interval dominates every live range and use position in the interval.
525 * 4. Process intervals in order of start position, maintaining the set of
526 * active (live) and inactive (not live, but with live ranges that start
527 * after the current interval). When choosing a register, prefer the one
528 * available furthest into the future. If necessary, split the current
529 * interval so the first part gets a register, and enqueue the rest.
530 * When no registers are available, choose either the current interval or
531 * another one to spill, trying to free up the longest-available register.
533 * Split positions must be after an interval's start position, and on or before
534 * the chosen split point. We're free try to choose a good position inbetween,
535 * for example block boundaries and cold blocks.
537 * 5. Once intervals have been walked and split, every interval has an assigned
538 * operand (register or spill location) for all positions where its alive.
539 * Visit every instruction and modify its Vreg operands to the physical
540 * register that was assigned.
542 * 6. Splitting creates sub-intervals that are assigned to different registers
543 * or spill locations, so insert resolving copies at the split positions
544 * between intervals that were split in a block, and copies on control-flow
545 * edges connecting different sub-intervals. When more than one copy occurs
546 * in a position, they are parallel-copies (all sources read before any dest
547 * is written).
549 * If any sub-interval was spilled, a single store is generated after each
550 * definition point.
552 * When analyzing instructions that use or define a virtual SF register
553 * (VregSF), eagerly rename it to the singleton PhysReg RegSF{0}, under the
554 * assumption that there can only be one live SF at each position. This
555 * reduces the number of intervals we need to process, facilitates inserting
556 * ldimm{0} (as xor), and is checked by checkSF().
559 ///////////////////////////////////////////////////////////////////////////////
562 * Printing utilities.
564 void printVariables(const char* caption,
565 const Vunit& unit, const VxlsContext& ctx,
566 const jit::vector<Variable*>& variables);
567 void dumpVariables(const jit::vector<Variable*>& variables,
568 unsigned num_spills);
571 * The ID of the block enclosing `pos'.
573 Vlabel blockFor(const VxlsContext& ctx, unsigned pos) {
574 for (unsigned lo = 0, hi = ctx.blocks.size(); lo < hi;) {
575 auto mid = (lo + hi) / 2;
576 auto r = ctx.block_ranges[ctx.blocks[mid]];
577 if (pos < r.start) {
578 hi = mid;
579 } else if (pos >= r.end) {
580 lo = mid + 1;
581 } else {
582 return ctx.blocks[mid];
585 always_assert(false);
586 return Vlabel{0xffffffff};
590 * Insert `src' into `dst' before dst[j], corresponding to XLS logical
591 * position `pos'.
593 * Updates `j' to refer to the same instruction after the code insertions.
595 void insertCodeAt(jit::vector<Vinstr>& dst, unsigned& j,
596 const jit::vector<Vinstr>& src, unsigned pos) {
597 auto const irctx = dst[j].irctx();
598 dst.insert(dst.begin() + j, src.size(), trap{TRAP_REASON});
599 for (auto const& inst : src) {
600 dst[j] = inst;
601 dst[j].set_irctx(irctx);
602 dst[j++].pos = pos;
606 ///////////////////////////////////////////////////////////////////////////////
607 // Pre-analysis passes.
610 * Compute the linear position range of each block.
612 * This modifies the Vinstrs in `unit' by setting their `pos' members, in
613 * addition to producing the block-to-range map.
615 jit::vector<LiveRange> computePositions(Vunit& unit,
616 const jit::vector<Vlabel>& blocks) {
617 auto block_ranges = jit::vector<LiveRange>{unit.blocks.size()};
618 unsigned pos = 0;
620 for (auto const b : blocks) {
621 auto& code = unit.blocks[b].code;
623 bool front_uses{false};
624 visitUses(unit, code.front(), [&](Vreg /*r*/) { front_uses = true; });
625 if (front_uses) {
626 auto irctx = code.front().irctx();
627 code.emplace(code.begin(), nop{}, irctx);
629 auto const start = pos;
631 for (auto& inst : unit.blocks[b].code) {
632 inst.pos = pos;
633 pos += 2;
635 block_ranges[b] = { start, pos };
637 return block_ranges;
641 * Return the effect this instruction has on the value of `sp'.
643 * Asserts (if `do_assert' is set) if an instruction mutates `sp' in an
644 * untrackable way.
646 int spEffect(const Vunit& unit, const Vinstr& inst, PhysReg sp,
647 bool do_assert = debug) {
648 switch (inst.op) {
649 case Vinstr::push:
650 case Vinstr::pushf:
651 case Vinstr::pushm:
652 return -8;
653 case Vinstr::pushp:
654 case Vinstr::pushpm:
655 return -16;
656 case Vinstr::pop:
657 case Vinstr::popf:
658 case Vinstr::popm:
659 return 8;
660 case Vinstr::popp:
661 case Vinstr::poppm:
662 return 16;
663 case Vinstr::lea: {
664 auto& i = inst.lea_;
665 if (i.d == Vreg64(sp)) {
666 assertx(i.s.base == i.d && !i.s.index.isValid());
667 return i.s.disp;
669 return 0;
671 default:
672 if (do_assert) {
673 visitDefs(unit, inst, [&] (Vreg r) { assertx(r != sp); });
675 return 0;
680 * Compute the offset from `sp' to the spill area at each block start.
682 jit::vector<int> analyzeSP(const Vunit& unit,
683 const jit::vector<Vlabel>& blocks,
684 PhysReg sp) {
685 auto visited = boost::dynamic_bitset<>(unit.blocks.size());
686 auto spill_offsets = jit::vector<int>(unit.blocks.size());
688 for (auto const b : blocks) {
689 auto offset = visited.test(b) ? spill_offsets[b] : 0;
691 for (auto const& inst : unit.blocks[b].code) {
692 offset -= spEffect(unit, inst, sp);
694 for (auto const s : succs(unit.blocks[b])) {
695 if (visited.test(s)) {
696 assert_flog(offset == spill_offsets[s],
697 "sp mismatch on edge B{}->B{}, expected {} got {}",
698 size_t(b), size_t(s), spill_offsets[s], offset);
699 } else {
700 spill_offsets[s] = offset;
701 visited.set(s);
705 return spill_offsets;
709 * Compute livein set for each block.
711 * An iterative data-flow analysis to compute the livein sets for each block is
712 * necessary for two reasons:
714 * 1. buildIntervals() uses the sets in a single backwards pass to build
715 * precise Intervals with live range holes, and
717 * 2. resolveEdges() uses the sets to discover which intervals require copies
718 * on control flow edges due to having been split.
720 jit::vector<LiveSet> computeLiveness(const Vunit& unit,
721 const Abi& abi,
722 const jit::vector<Vlabel>& blocks) {
723 auto livein = jit::vector<LiveSet>{unit.blocks.size()};
724 auto const preds = computePreds(unit);
726 auto blockPO = jit::vector<uint32_t>(unit.blocks.size());
727 auto revBlocks = blocks;
728 std::reverse(begin(revBlocks), end(revBlocks));
730 FTRACE(6, "computeLiveness: starting with {} blocks (unit blocks: {})\n",
731 revBlocks.size(), unit.blocks.size());
733 auto wl = dataflow_worklist<uint32_t>(revBlocks.size());
735 for (unsigned po = 0; po < revBlocks.size(); po++) {
736 wl.push(po);
737 blockPO[revBlocks[po]] = po;
738 FTRACE(6, " - inserting block {} (po = {})\n", revBlocks[po], po);
741 while (!wl.empty()) {
742 auto b = revBlocks[wl.pop()];
743 auto& block = unit.blocks[b];
745 FTRACE(6, " - popped block {} (po = {})\n", b, blockPO[b]);
747 // start with the union of the successor blocks
748 LiveSet live(unit.next_vr);
749 for (auto s : succs(block)) {
750 if (!livein[s].empty()) live |= livein[s];
753 // and now go through the instructions in the block in reverse order
754 for (auto const& inst : boost::adaptors::reverse(block.code)) {
755 RegSet implicit_uses, implicit_across, implicit_defs;
756 getEffects(abi, inst, implicit_uses, implicit_across, implicit_defs);
758 auto const vsf = Vreg{RegSF{0}};
760 auto const dvisit = [&] (Vreg r, Width w) {
761 live.reset(w == Width::Flags ? vsf : r);
763 auto const uvisit = [&] (Vreg r, Width w) {
764 live.set(w == Width::Flags ? vsf : r);
767 visitDefs(unit, inst, dvisit);
768 visit(unit, implicit_defs, dvisit);
770 visitUses(unit, inst, uvisit);
771 visit(unit, implicit_uses, uvisit);
774 if (live != livein[b]) {
775 livein[b] = live;
776 for (auto p : preds[b]) {
777 wl.push(blockPO[p]);
778 FTRACE(6, " - reinserting block {} (po = {})\n", p, blockPO[p]);
783 return livein;
786 ///////////////////////////////////////////////////////////////////////////////
787 // Lifetime intervals.
790 * Add `r' to `ivl'.
792 * This assumes that the ranges of `ivl' are in reverse order, and that `r'
793 * precedes or overlaps with ivl->ranges.first().
795 void addRange(Interval* ivl, LiveRange r) {
796 while (!ivl->ranges.empty() && r.contains(ivl->ranges.back())) {
797 ivl->ranges.pop_back();
799 if (ivl->ranges.empty()) {
800 return ivl->ranges.push_back(r);
802 auto& first = ivl->ranges.back();
803 if (first.contains(r)) return;
804 if (r.end >= first.start) {
805 first.start = r.start;
806 } else {
807 ivl->ranges.push_back(r);
812 * Visits defs of an instruction, updates their liveness, adds live ranges, and
813 * adds Uses with appropriate hints.
815 struct DefVisitor {
816 DefVisitor(const Vunit& unit, jit::vector<Variable*>& variables,
817 LiveSet& live, Vlabel b, const Vinstr& inst, unsigned pos)
818 : m_unit(unit)
819 , m_variables(variables)
820 , m_live(live)
821 , m_block(b)
822 , m_inst(inst)
823 , m_pos(pos)
826 // Skip immediates and uses.
827 template<class F> void imm(const F&) {}
828 template<class R> void use(R) {}
829 template<class S, class H> void useHint(S, H) {}
830 template<class R> void across(R) {}
832 void def(Vtuple defs) {
833 for (auto r : m_unit.tuples[defs]) def(r);
835 void defHint(Vtuple def_tuple, Vtuple hint_tuple) {
836 auto& defs = m_unit.tuples[def_tuple];
837 auto& hints = m_unit.tuples[hint_tuple];
838 for (int i = 0; i < defs.size(); i++) {
839 def(defs[i], Constraint::Any, hints[i]);
842 template<class R> void def(R r) {
843 def(r, constraint(r), Vreg{}, is_wide(r));
845 template<class D, class H> void defHint(D dst, H hint) {
846 def(dst, constraint(dst), hint, is_wide(dst));
848 void def(Vreg r) { def(r, Constraint::Any); }
849 void defHint(Vreg d, Vreg hint) { def(d, Constraint::Any, hint); }
850 void def(RegSet rs) { rs.forEach([&](Vreg r) { def(r); }); }
851 void def(VregSF r) {
852 r = RegSF{0}; // eagerly rename all SFs
853 def(r, constraint(r));
856 private:
857 void def(Vreg r, Constraint kind, Vreg hint = Vreg{}, bool wide = false) {
858 auto var = m_variables[r];
859 if (m_live.test(r)) {
860 m_live.reset(r);
861 var->ivl()->ranges.back().start = m_pos;
862 } else {
863 if (!var) {
864 var = m_variables[r] = Variable::create(r);
866 addRange(var->ivl(), {m_pos, m_pos + 1});
868 if (!var->fixed()) {
869 var->ivl()->uses.push_back(Use{kind, m_pos, hint});
870 var->wide |= wide;
871 var->def_pos = m_pos;
872 var->def_block = m_block;
873 var->def_inst = m_inst;
877 private:
878 const Vunit& m_unit;
879 jit::vector<Variable*>& m_variables;
880 LiveSet& m_live;
881 Vlabel m_block;
882 const Vinstr& m_inst;
883 unsigned m_pos;
886 struct UseVisitor {
887 UseVisitor(const Vunit& unit, jit::vector<Variable*>& variables,
888 LiveSet& live, const Vinstr& inst, LiveRange range)
889 : m_unit(unit)
890 , m_variables(variables)
891 , m_live(live)
892 , m_inst(inst)
893 , m_range(range)
896 // Skip immediates and defs.
897 template<class F> void imm(const F&) {}
898 template<class R> void def(R) {}
899 template<class D, class H> void defHint(D, H) {}
901 template<class R> void use(R r) { use(r, constraint(r), m_range.end); }
902 template<class S, class H> void useHint(S src, H hint) {
903 use(src, constraint(src), m_range.end, hint);
905 void use(VregSF r) {
906 r = RegSF{0}; // eagerly rename all SFs
907 use(r, constraint(r), m_range.end);
909 void use(RegSet regs) { regs.forEach([&](Vreg r) { use(r); }); }
910 void use(Vtuple uses) { for (auto r : m_unit.tuples[uses]) use(r); }
911 void useHint(Vtuple src_tuple, Vtuple hint_tuple) {
912 auto& uses = m_unit.tuples[src_tuple];
913 auto& hints = m_unit.tuples[hint_tuple];
914 for (int i = 0, n = uses.size(); i < n; i++) {
915 useHint(uses[i], hints[i]);
918 void use(Vptr m) {
919 if (m.base.isValid()) use(m.base);
920 if (m.index.isValid()) use(m.index);
922 void use(VcallArgsId /*id*/) {
923 always_assert(false && "vcall unsupported in vxls");
927 * An operand marked as UA means use-across. Mark it live across the
928 * instruction so its lifetime conflicts with the destination, which ensures
929 * it will be assigned a different register than the destination. This isn't
930 * necessary if *both* operands of a binary instruction are the same virtual
931 * register, but is still correct.
933 template<class R> void across(R r) { use(r, constraint(r), m_range.end + 1); }
934 void across(RegSet regs) { regs.forEach([&](Vreg r) { across(r); }); }
935 void across(Vptr m) {
936 if (m.base.isValid()) across(m.base);
937 if (m.index.isValid()) across(m.index);
940 private:
941 void use(Vreg r, Constraint kind, unsigned end, Vreg hint = Vreg{}) {
942 m_live.set(r);
944 auto var = m_variables[r];
945 if (!var) var = m_variables[r] = Variable::create(r);
946 addRange(var->ivl(), {m_range.start, end});
948 if (!var->fixed()) {
949 if (m_inst.op == Vinstr::copyargs ||
950 m_inst.op == Vinstr::copy2 ||
951 m_inst.op == Vinstr::copy ||
952 (m_inst.op == Vinstr::phijcc && kind != Constraint::Sf) ||
953 m_inst.op == Vinstr::phijmp) {
954 // all these instructions lower to parallel copyplans, which know
955 // how to load directly from constants or spilled locations
956 kind = Constraint::CopySrc;
958 var->ivl()->uses.push_back({kind, m_range.end, hint});
962 private:
963 const Vunit& m_unit;
964 jit::vector<Variable*>& m_variables;
965 LiveSet& m_live;
966 const Vinstr& m_inst;
967 const LiveRange m_range;
971 * Compute lifetime intervals and use positions of all Vregs by walking the
972 * code bottom-up once.
974 jit::vector<Variable*> buildIntervals(const Vunit& unit,
975 const VxlsContext& ctx) {
976 ONTRACE(kRegAllocLevel, printCfg(unit, ctx.blocks));
978 auto variables = jit::vector<Variable*>{unit.next_vr};
980 for (auto b : boost::adaptors::reverse(ctx.blocks)) {
981 auto& block = unit.blocks[b];
983 // initial live set is the union of successor live sets.
984 LiveSet live(unit.next_vr);
985 for (auto s : succs(block)) {
986 always_assert(!ctx.livein[s].empty());
987 live |= ctx.livein[s];
990 // add a range covering the whole block to every live interval
991 auto& block_range = ctx.block_ranges[b];
992 forEach(live, [&](Vreg r) {
993 if (!variables[r]) variables[r] = Variable::create(r);
994 addRange(variables[r]->ivl(), block_range);
997 // visit instructions bottom-up, adding uses & ranges
998 auto pos = block_range.end;
999 for (auto const& inst : boost::adaptors::reverse(block.code)) {
1000 pos -= 2;
1001 RegSet implicit_uses, implicit_across, implicit_defs;
1002 getEffects(ctx.abi, inst, implicit_uses, implicit_across, implicit_defs);
1004 DefVisitor dv(unit, variables, live, b, inst, pos);
1005 visitOperands(inst, dv);
1006 dv.def(implicit_defs);
1008 UseVisitor uv(unit, variables, live, inst, {block_range.start, pos});
1009 visitOperands(inst, uv);
1010 uv.use(implicit_uses);
1011 uv.across(implicit_across);
1014 // sanity check liveness computation
1015 always_assert(live == ctx.livein[b]);
1018 // finish processing live ranges for constants
1019 for (auto& c : unit.constToReg) {
1020 if (auto var = variables[c.second]) {
1021 var->ivl()->ranges.back().start = 0;
1022 var->def_pos = 0;
1023 var->constant = true;
1024 var->val = c.first;
1028 // Ranges and uses were generated in reverse order. Unreverse them now.
1029 for (auto var : variables) {
1030 if (!var) continue;
1031 auto ivl = var->ivl();
1032 assertx(!ivl->ranges.empty()); // no empty intervals
1033 std::reverse(ivl->uses.begin(), ivl->uses.end());
1034 std::reverse(ivl->ranges.begin(), ivl->ranges.end());
1036 ONTRACE(kRegAllocLevel,
1037 printVariables("after building intervals", unit, ctx, variables);
1040 if (debug) {
1041 // only constants and physical registers can be live-into the entry block.
1042 forEach(ctx.livein[unit.entry], [&](Vreg r) {
1043 UNUSED auto var = variables[r];
1044 assertx(var->constant || var->fixed());
1046 for (auto var : variables) {
1047 if (!var) continue;
1048 auto const ivl = var->ivl();
1049 for (unsigned i = 1; i < ivl->uses.size(); i++) {
1050 assertx(ivl->uses[i].pos >= ivl->uses[i-1].pos); // monotonic
1052 for (unsigned i = 1; i < ivl->ranges.size(); i++) {
1053 assertx(ivl->ranges[i].end > ivl->ranges[i].start); // no empty ranges
1054 assertx(ivl->ranges[i].start > ivl->ranges[i-1].end); // no empty gaps
1058 return variables;
1061 ///////////////////////////////////////////////////////////////////////////////
1064 * A map from PhysReg number to position.
1066 using PosVec = PhysReg::Map<unsigned>;
1069 * Between `r1' and `r2', choose the one whose position in `pos_vec' is closest
1070 * to but still after (or at) `pos'.
1072 * If neither has a position after `pos', choose the higher one. If both have
1073 * the same position, choose `r1'.
1075 PhysReg choose_closest_after(unsigned pos, const PosVec& pos_vec,
1076 PhysReg r1, PhysReg r2) {
1077 if (r1 == InvalidReg) return r2;
1078 if (r2 == InvalidReg) return r1;
1080 if (pos_vec[r1] >= pos) {
1081 if (pos <= pos_vec[r2] && pos_vec[r2] < pos_vec[r1]) {
1082 return r2;
1084 } else {
1085 if (pos_vec[r2] > pos_vec[r1]) {
1086 return r2;
1089 return r1;
1093 * Like choose_closest_after(), but iterates through `pos_vec'.
1095 PhysReg find_closest_after(unsigned pos, const PosVec& pos_vec) {
1096 auto ret = InvalidReg;
1097 for (auto const r : pos_vec) {
1098 ret = choose_closest_after(pos, pos_vec, ret, r);
1100 return ret;
1103 ///////////////////////////////////////////////////////////////////////////////
1104 // Hints.
1107 * A member of a "phi group"---i.e., the set of all variables which flow into
1108 * one another via a set of corresponding phijmp's and phidef's.
1110 struct PhiVar {
1111 Vreg r; // the Vreg that is used or def'd in the phi
1112 unsigned pos; // position of the phijmp or phidef
1116 * Hint metadata that doesn't fit into the `variables' vector.
1118 struct HintInfo {
1119 jit::vector<jit::vector<PhiVar>> phi_groups;
1123 * Add the phi use of `r' at `pos' to the phi group given by `pgid'.
1125 void addPhiGroupMember(const jit::vector<Variable*>& variables,
1126 jit::vector<jit::vector<PhiVar>>& phi_groups,
1127 Vreg r, unsigned pos, int pgid,
1128 Vreg hint = Vreg{}, Vreg other = Vreg{}) {
1129 assertx(phi_groups.size() > pgid);
1130 assertx(!phi_groups[pgid].empty());
1132 auto const var = variables[r];
1133 assertx(var != nullptr);
1135 auto const ivl = var->ivl();
1136 auto& u = pos == var->def_pos
1137 ? ivl->uses.front()
1138 : ivl->uses[ivl->findUse(pos)];
1139 assertx(u.pos == pos);
1141 if (hint.isValid()) u.hint = hint;
1142 if (other.isValid()) u.alt_hint = other;
1144 u.phi_group = pgid;
1145 phi_groups[pgid].push_back({r, u.pos});
1149 * Collect hint metadata for phis.
1151 * The return value of this pass represents an equivalence relation over phi
1152 * uses and defs. The relation is defined as follows:
1154 * Consider a phidef F with arity n (i.e., it defines n variables). F gives
1155 * rise to n equivalence classes, each of which consists in the i-th use or def
1156 * variable from each phi instruction in the transitive closure of the predicate
1158 * P(f) := { every phijmp or phijcc which targets f, and every other phidef
1159 * targeted by those phijmp's and phijcc's }
1161 * applied to F.
1163 * Or, taking a pictoral example:
1164 * _________________________________________________________
1165 * | |
1166 * | +---------+ |
1167 * | | | |
1168 * | v | |
1169 * | phijmp(x1, y1) phijcc(x2, y2) phijmp(x3, y3) . |
1170 * | | | | . |
1171 * | | +-------------+ +-------------+ . |
1172 * | | | | | | |
1173 * | v v v v | |
1174 * | phidef(x4, y4) phidef(x5, y5) -------------------+ |
1175 * | | |
1176 * |______ . ________________________________________________|
1179 * ______ | __________________________
1180 * | v |
1181 * | phijmp(x6, y6)--->phidef(x7, y7) |
1182 * |___________________________________|
1184 * The equivalence classes in this example are {x1, ..., x5}, {y1, ..., y5},
1185 * {x6, x7}, and {y6, y7}.
1187 * We use these equivalence classes during register allocation to try to assign
1188 * the same register to all the variables in a phi group (at least at those
1189 * positions where the phi instructions occur).
1191 * Note that this equivalence relation does /not/ capture the idea that we
1192 * probably want the same register for x4 as we do for x6 and x7. To track
1193 * that information, we set the `hint' on phi uses to the corresponding phidef
1194 * variable (or one of the two, in the case of phijcc), then let the hint back
1195 * propagation pass handle it. (Note that we do /not/ set the `hint' field at
1196 * phi defs, nor do we try to use it at any point.)
1198 jit::vector<jit::vector<PhiVar>>
1199 analyzePhiHints(const Vunit& unit, const VxlsContext& ctx,
1200 const jit::vector<Variable*>& variables) {
1201 auto phi_groups = jit::vector<jit::vector<PhiVar>>{};
1204 * Get the phi group for r's def, optionally allocating a new phi group if
1205 * necessary.
1207 auto const def_phi_group = [&] (Vreg r, bool alloc) {
1208 auto const var = variables[r];
1209 assertx(var != nullptr);
1210 assertx(var->def_inst.op == Vinstr::phidef);
1212 auto& u = var->ivl()->uses.front();
1214 if (u.phi_group == kInvalidPhiGroup && alloc) {
1215 u.phi_group = phi_groups.size();
1216 phi_groups.emplace_back(jit::vector<PhiVar>{{r, u.pos}});
1218 return u.phi_group;
1221 auto const is_fixed = [&] (Vreg r) { return variables[r]->fixed(); };
1223 for (auto const b : ctx.blocks) {
1224 auto const& code = unit.blocks[b].code;
1225 if (code.empty()) continue;
1227 auto const& first = code.front();
1228 auto const& last = code.back();
1230 if (first.op == Vinstr::phidef) {
1231 // A phidef'd variable just need to be assigned a phi group if it doesn't
1232 // already have one (i.e., from handling a phijmp).
1233 auto const& defs = unit.tuples[first.phidef_.defs];
1234 for (auto const r : defs) {
1235 if (is_fixed(r)) continue;
1236 def_phi_group(r, true);
1240 if (last.op == Vinstr::phijmp) {
1241 auto const target = last.phijmp_.target;
1242 auto const& def_inst = unit.blocks[target].code.front();
1243 assertx(def_inst.op == Vinstr::phidef);
1245 auto const& uses = unit.tuples[last.phijmp_.uses];
1246 auto const& defs = unit.tuples[def_inst.phidef_.defs];
1247 assertx(uses.size() == defs.size());
1249 // Set the phi group for each phi use variable to that of the
1250 // corresponding phi def variable (allocating a new group if the def
1251 // variable has not already been assigned one).
1252 for (size_t i = 0, n = uses.size(); i < n; ++i) {
1253 if (is_fixed(defs[i])) continue;
1255 auto const pgid = def_phi_group(defs[i], true);
1256 addPhiGroupMember(variables, phi_groups,
1257 uses[i], last.pos, pgid, defs[i]);
1259 } else if (last.op == Vinstr::phijcc) {
1260 auto const next = last.phijcc_.targets[0];
1261 auto const taken = last.phijcc_.targets[1];
1262 auto const& next_inst = unit.blocks[next].code.front();
1263 auto const& taken_inst = unit.blocks[taken].code.front();
1264 assertx(next_inst.op == Vinstr::phidef);
1265 assertx(taken_inst.op == Vinstr::phidef);
1267 auto const& uses = unit.tuples[last.phijcc_.uses];
1268 auto const& next_defs = unit.tuples[next_inst.phidef_.defs];
1269 auto const& taken_defs = unit.tuples[taken_inst.phidef_.defs];
1270 assertx(uses.size() == next_defs.size());
1271 assertx(uses.size() == taken_defs.size());
1273 for (size_t i = 0, n = uses.size(); i < n; ++i) {
1274 if (is_fixed(next_defs[i]) || is_fixed(taken_defs[i])) continue;
1276 auto const next_pgid = def_phi_group(next_defs[i], false);
1277 auto const taken_pgid = def_phi_group(taken_defs[i], false);
1278 auto pgid = next_pgid;
1280 if (next_pgid == kInvalidPhiGroup) {
1281 pgid = taken_pgid == kInvalidPhiGroup
1282 ? def_phi_group(taken_defs[i], true)
1283 : taken_pgid;
1284 addPhiGroupMember(variables, phi_groups,
1285 next_defs[i], next_inst.pos, pgid);
1286 } else if (taken_pgid == kInvalidPhiGroup) {
1287 pgid = next_pgid; // we know this is valid
1288 addPhiGroupMember(variables, phi_groups,
1289 taken_defs[i], taken_inst.pos, pgid);
1290 } else if (next_pgid != taken_pgid) {
1291 // We managed to hit phijmps to both of our target phidefs before
1292 // hitting this phijcc. This should be pretty rare, but let's go
1293 // ahead and merge the two equivalence classes into one.
1294 phi_groups[pgid].reserve(phi_groups[next_pgid].size() +
1295 phi_groups[taken_pgid].size());
1296 for (auto const& phiv : phi_groups[taken_pgid]) {
1297 addPhiGroupMember(variables, phi_groups,
1298 phiv.r, phiv.pos, pgid);
1300 phi_groups[taken_pgid].clear();
1303 // Pick the hotter phidef as the hint, else prefer `next'.
1304 auto const next_hotter =
1305 static_cast<unsigned>(unit.blocks[next].area_idx) <=
1306 static_cast<unsigned>(unit.blocks[taken].area_idx);
1307 auto const hint = next_hotter ? next_defs[i] : taken_defs[i];
1308 auto const other = next_hotter ? taken_defs[i] : next_defs[i];
1309 addPhiGroupMember(variables, phi_groups,
1310 uses[i], last.pos, pgid, hint, other);
1314 return phi_groups;
1317 HintInfo analyzeHints(const Vunit& unit, const VxlsContext& ctx,
1318 const jit::vector<Variable*>& variables) {
1319 HintInfo hint_info;
1320 hint_info.phi_groups = analyzePhiHints(unit, ctx, variables);
1322 return hint_info;
1326 * Try to return the physical register that would coalesce `ivl' with its
1327 * hinted source.
1329 PhysReg tryCoalesce(const jit::vector<Variable*>& variables,
1330 const Interval* ivl) {
1331 assertx(ivl == ivl->leader());
1332 assertx(!ivl->uses.empty());
1334 auto const& def = ivl->uses.front();
1335 assertx(def.pos == ivl->var->def_pos);
1337 if (!def.hint.isValid()) return InvalidReg;
1338 auto const hint_var = variables[def.hint];
1340 for (auto hvl = hint_var->ivl(); hvl; hvl = hvl->next) {
1341 if (def.pos == hvl->end()) return hvl->reg;
1343 return InvalidReg;
1347 * Scan the uses of `ivl' for phi nodes, and return a pair of hints for the
1348 * first one we find.
1350 * For a given phi node F, the first of the two hints we return is derived from
1351 * only those phijmps and phijccs targeting F (if F is a phidef), or from the
1352 * phidefs F targets (if F is a phijmp or phijcc). The second hint accounts
1353 * for F's entire equivalence class---see analyzePhiHints() for more details.
1355 folly::Optional<std::pair<PhysReg,PhysReg>>
1356 tryPhiHint(const jit::vector<Variable*>& variables,
1357 const Interval* ivl, const HintInfo& hint_info,
1358 const PosVec& free_until) {
1359 auto const choose = [&] (PhysReg h1, PhysReg h2) {
1360 return choose_closest_after(ivl->end(), free_until, h1, h2);
1363 for (auto const& u : ivl->uses) {
1364 // Look for the first phi hint.
1365 if (u.phi_group == kInvalidPhiGroup) continue;
1367 auto preferred = InvalidReg;
1368 auto group_hint = InvalidReg;
1370 // Choose a register to be the hint.
1371 for (auto const& phiv : hint_info.phi_groups[u.phi_group]) {
1372 auto const var = variables[phiv.r];
1373 assertx(var);
1375 auto const phivl = var->ivlAtUse(phiv.pos);
1376 if (phivl->reg == InvalidReg) continue;
1378 auto const& phu = phivl->uses[phivl->findUse(phiv.pos)];
1379 assertx(phu.pos == phiv.pos);
1381 // Prefer to use a hint from a corresponding phi node.
1382 if (u.hint == phiv.r || u.alt_hint == phiv.r ||
1383 phu.hint == ivl->var->vreg || phu.alt_hint == ivl->var->vreg) {
1384 preferred = choose(preferred, phivl->reg);
1386 // In the end, though, we're okay with any hint from the group.
1387 group_hint = choose(group_hint, phivl->reg);
1389 return std::make_pair(preferred, group_hint);
1391 return folly::none;
1395 * Choose an appropriate hint for the (sub-)interval `ivl'.
1397 * The register allocator passes us constraints via `free_until' and `allow'.
1398 * We use the `free_until' vector to choose a hint that most tightly covers the
1399 * lifetime of `ivl'; we use `allow' to ensure that our hint satisfies
1400 * constraints.
1402 PhysReg chooseHint(const jit::vector<Variable*>& variables,
1403 const Interval* ivl, const HintInfo& hint_info,
1404 const PosVec& free_until, RegSet allow) {
1405 if (!RuntimeOption::EvalHHIREnablePreColoring &&
1406 !RuntimeOption::EvalHHIREnableCoalescing) return InvalidReg;
1408 auto const choose = [&] (PhysReg h1, PhysReg h2) {
1409 return choose_closest_after(ivl->end(), free_until, h1, h2);
1411 auto hint = InvalidReg;
1413 // If `ivl' contains its def, try a coalescing hint.
1414 if (ivl == ivl->leader()) {
1415 hint = tryCoalesce(variables, ivl);
1418 // Return true if we should return `h'.
1419 auto const check_and_trace = [&] (PhysReg h, const char* prefix) {
1420 if (h == InvalidReg) return false;
1421 if (allow.contains(h)) {
1422 FTRACE(kHintLevel, "{}hinting {} for %{} @ {}\n",
1423 prefix, show(h), size_t(ivl->var->vreg), ivl->start());
1424 return true;
1426 FTRACE(kHintLevel, " found mismatched hint for %{} @ {}\n",
1427 size_t(ivl->var->vreg), ivl->uses.front().pos);
1428 return false;
1431 auto const is_phi_use = !ivl->uses.empty() &&
1432 ivl->uses.front().phi_group != kInvalidPhiGroup;
1434 // If we have either a backwards or a forwards hint, return it---if we have
1435 // both, return whichever is free the longest. For phi uses, however, we
1436 // want to consider the whole phi group's allocations first.
1437 if (!is_phi_use && check_and_trace(hint, "")) return hint;
1439 // Try to determine a hint via the first phi use in the interval.
1440 auto const phi_hints = tryPhiHint(variables, ivl, hint_info, free_until);
1441 bool is_phi_hint = false;
1443 if (phi_hints) {
1444 hint = choose(phi_hints->first, hint);
1445 if (hint != phi_hints->first) {
1446 hint = choose(hint, phi_hints->second);
1448 is_phi_hint = hint == phi_hints->first ||
1449 hint == phi_hints->second;
1451 if (phi_hints || is_phi_use) {
1452 if (check_and_trace(hint, debug && is_phi_hint ? "phi-" : "")) {
1453 return hint;
1457 // Crawl through our uses and look for a physical hint.
1458 for (auto const& u : ivl->uses) {
1459 if (!u.hint.isValid() ||
1460 !u.hint.isPhys() ||
1461 !allow.contains(u.hint)) continue;
1462 hint = choose(hint, u.hint);
1465 if (hint != InvalidReg) {
1466 FTRACE(kHintLevel, "physical hint {} for %{} @ {}\n",
1467 show(hint), size_t(ivl->var->vreg), ivl->start());
1469 return hint;
1472 ///////////////////////////////////////////////////////////////////////////////
1473 // Register allocation.
1476 * Information about spills generated by register allocation.
1478 * Used for the allocateSpillSpace() pass which inserts the instructions that
1479 * create spill space on the stack.
1481 struct SpillInfo {
1482 // Number of intervals spilled.
1483 unsigned num_spills{0};
1484 // Number of spill slots used.
1485 size_t used_spill_slots{0};
1489 * Extended Linear Scan register allocator over vasm virtual registers (Vregs).
1491 * This encapsulates the intermediate data structures used during the
1492 * allocation phase of the algorithm so we don't have to pass them around
1493 * everywhere.
1495 struct Vxls {
1496 Vxls(const VxlsContext& ctx,
1497 const jit::vector<Variable*>& variables,
1498 const HintInfo& hint_info)
1499 : ctx(ctx)
1500 , variables(variables)
1501 , hint_info(hint_info)
1504 SpillInfo go();
1506 private:
1507 void assignSpill(Interval* ivl);
1508 void spill(Interval*);
1509 void assignReg(Interval*, PhysReg);
1511 unsigned nearestSplitBefore(unsigned pos);
1512 unsigned constrain(Interval*, RegSet&);
1514 void update(Interval*);
1515 void allocate(Interval*);
1516 void allocBlocked(Interval*);
1517 void spillOthers(Interval* current, PhysReg r);
1519 private:
1521 * Comparison function for pending priority queue.
1523 * std::priority_queue requires a less operation, but sorts the heap
1524 * highest-first; we need the opposite (lowest-first), so use greater-than.
1526 struct Compare {
1527 bool operator()(const Interval* i1, const Interval* i2) {
1528 return i1->start() > i2->start();
1532 private:
1533 const VxlsContext& ctx;
1535 // Variables, null if unused.
1536 const jit::vector<Variable*>& variables;
1537 // Hint metadata.
1538 const HintInfo& hint_info;
1539 // Subintervals sorted by Interval start.
1540 jit::priority_queue<Interval*,Compare> pending;
1541 // Intervals that overlap.
1542 jit::vector<Interval*> active, inactive;
1543 // Last position each spill slot was owned; kMaxPos means currently used.
1544 jit::array<unsigned, kMaxSpillSlots> spill_slots{{0}};
1545 // Stats on spills.
1546 SpillInfo spill_info;
1549 SpillInfo Vxls::go() {
1550 for (auto var : variables) {
1551 if (!var) continue;
1552 if (var->fixed()) {
1553 assignReg(var->ivl(), var->vreg);
1554 } else if (var->constant) {
1555 spill(var->ivl());
1556 } else {
1557 pending.push(var->ivl());
1560 while (!pending.empty()) {
1561 auto current = pending.top();
1562 pending.pop();
1563 update(current);
1564 allocate(current);
1566 return spill_info;
1570 * Assign the next available spill slot to `ivl'.
1572 void Vxls::assignSpill(Interval* ivl) {
1573 assertx(!ivl->fixed() && ivl != ivl->leader());
1575 if (ivl->var->slot != kInvalidSpillSlot) return;
1577 auto& used_spill_slots = spill_info.used_spill_slots;
1579 auto const assign_slot = [&] (size_t slot) {
1580 ivl->var->slot = slot;
1581 ++spill_info.num_spills;
1583 spill_slots[slot] = kMaxPos;
1584 if (!ivl->var->wide) {
1585 used_spill_slots = std::max(used_spill_slots, slot + 1);
1586 } else {
1587 used_spill_slots = std::max(used_spill_slots, slot + 2);
1588 spill_slots[slot + 1] = kMaxPos;
1592 // Assign spill slots. We track the highest position at which a spill slot
1593 // was owned, and only reassign it to a Vreg if its lifetime interval
1594 // (including all splits) is strictly above that high water mark.
1595 if (!ivl->var->wide) {
1596 for (size_t slot = 0, n = spill_slots.size(); slot < n; ++slot) {
1597 if (ivl->leader()->start() >= spill_slots[slot]) {
1598 return assign_slot(slot);
1601 } else {
1602 for (size_t slot = 0, n = spill_slots.size() - 1; slot < n; slot += 2) {
1603 if (ivl->leader()->start() >= spill_slots[slot] &&
1604 ivl->leader()->start() >= spill_slots[slot + 1]) {
1605 return assign_slot(slot);
1610 // Ran out of spill slots.
1611 ONTRACE(kRegAllocLevel, dumpVariables(variables, spill_info.num_spills));
1612 TRACE(1, "vxls-punt TooManySpills\n");
1613 TRACE_PUNT("LinearScan_TooManySpills");
1617 * Assign `r' to `ivl'.
1619 void Vxls::assignReg(Interval* ivl, PhysReg r) {
1620 if (!ivl->fixed() && ivl->uses.empty()) {
1621 ivl->reg = InvalidReg;
1622 if (!ivl->constant()) assignSpill(ivl);
1623 } else {
1624 ivl->reg = r;
1625 active.push_back(ivl);
1630 * Spill `ivl' from its start until its first register use.
1632 * Spill `ivl' if there is no use; otherwise split the interval just before the
1633 * use, and enqueue the second part.
1635 void Vxls::spill(Interval* ivl) {
1636 unsigned first_use = ivl->firstUse();
1637 if (first_use <= ivl->end()) {
1638 auto split_pos = nearestSplitBefore(first_use);
1639 if (split_pos <= ivl->start()) {
1640 // This only can happen if we need more than the available registers
1641 // at a single position. It can happen in phijmp or callargs.
1642 TRACE(1, "vxls-punt RegSpill\n");
1643 TRACE_PUNT("RegSpill"); // cannot split before first_use
1645 pending.push(ivl->split(split_pos));
1647 ivl->reg = InvalidReg;
1648 if (!ivl->constant()) assignSpill(ivl);
1652 * Update the active and inactive lists for the start of `current'.
1654 void Vxls::update(Interval* current) {
1655 auto const pos = current->start();
1657 auto const free_spill_slot = [this] (Interval* ivl) {
1658 assertx(!ivl->next);
1659 auto slot = ivl->var->slot;
1661 if (slot != kInvalidSpillSlot) {
1662 if (ivl->var->wide) {
1663 assertx(spill_slots[slot + 1]);
1664 spill_slots[slot + 1] = ivl->end();
1666 assertx(spill_slots[slot]);
1667 spill_slots[slot] = ivl->end();
1671 // Check for active/inactive intervals that have expired or which need their
1672 // polarity flipped.
1673 auto const update_list = [&] (jit::vector<Interval*>& target,
1674 jit::vector<Interval*>& other,
1675 bool is_active) {
1676 auto end = target.end();
1677 for (auto i = target.begin(); i != end;) {
1678 auto ivl = *i;
1679 if (pos >= ivl->end()) {
1680 *i = *--end;
1681 if (!ivl->next) free_spill_slot(ivl);
1682 } else if (is_active ? !ivl->covers(pos) : ivl->covers(pos)) {
1683 *i = *--end;
1684 other.push_back(ivl);
1685 } else {
1686 i++;
1689 target.erase(end, target.end());
1691 update_list(active, inactive, true);
1692 update_list(inactive, active, false);
1696 * Return the closest split position on or before `pos'.
1698 * The result might be exactly on an edge, or in-between instruction positions.
1700 unsigned Vxls::nearestSplitBefore(unsigned pos) {
1701 auto b = blockFor(ctx, pos);
1702 auto range = ctx.block_ranges[b];
1703 if (pos == range.start) return pos;
1704 return (pos - 1) | 1;
1708 * Constrain the allowable registers for `ivl' by inspecting uses.
1710 * Returns the latest position for which `allow' (which we populate) is valid.
1711 * We use this return value to fill the `free_until' PosVec in allocate()
1712 * below. That data structure tracks the first position at which a register is
1713 * /unavailable/, so it would appear that constrain()'s return value is
1714 * off-by-one.
1716 * In fact, it is not; we actually /need/ this position offsetting because of
1717 * our leniency towards having uses at an Interval's end() position. If we
1718 * fail to constrain on an end-position use, we must still split and spill.
1719 * (In contrast, if we intersect with another Interval on an end position use,
1720 * it's okay because SSA tells us that the conflict must be the other
1721 * Interval's def position, and a use and a def at the same position don't
1722 * actually conflict; see the fun ASCII diagram that adorns the definition of
1723 * Interval).
1725 unsigned Vxls::constrain(Interval* ivl, RegSet& allow) {
1726 auto const any = ctx.abi.unreserved() - ctx.abi.sf; // Any but not flags.
1727 allow = ctx.abi.unreserved();
1728 for (auto& u : ivl->uses) {
1729 auto need = u.kind == Constraint::Simd ? ctx.abi.simdUnreserved :
1730 u.kind == Constraint::Gpr ? ctx.abi.gpUnreserved :
1731 u.kind == Constraint::Sf ? ctx.abi.sf :
1732 any; // Any or CopySrc
1733 if ((allow & need).empty()) {
1734 // cannot satisfy constraints; must split before u.pos
1735 return u.pos - 1;
1737 allow &= need;
1739 return kMaxPos;
1743 * Return the next intersection point between `current' and `other', or kMaxPos
1744 * if they never intersect.
1746 * We assume that `other', if it represents an SSA variable, is not live at the
1747 * start of `current'.
1749 * Note that if two intervals intersect, the first point of intersection will
1750 * always be the start of one of the intervals, because SSA ensures that a def
1751 * dominates all uses, and hence all live ranges as well.
1753 unsigned nextIntersect(const Interval* current, const Interval* other) {
1754 assertx(!current->fixed());
1756 if (current == current->leader() &&
1757 other == other->leader() &&
1758 !other->fixed()) {
1759 // Since `other' is an inactive Vreg interval, it cannot cover current's
1760 // start, and `current' cannot cover other's start, since `other' started
1761 // earlier. Therefore, SSA guarantees no intersection.
1762 return kMaxPos;
1764 if (current->end() <= other->start() ||
1765 other->end() <= current->start()) {
1766 // One ends before the other starts.
1767 return kMaxPos;
1769 // r1,e1 span all of current
1770 auto r1 = current->ranges.begin();
1771 auto e1 = current->ranges.end();
1772 // r2,e2 span the tail of other that might intersect current
1773 auto r2 = other->ranges.begin() + other->findRange(current->start());
1774 auto e2 = other->ranges.end();
1775 // search for the lowest position covered by current and other
1776 for (;;) {
1777 if (r1->start < r2->start) {
1778 if (r2->start < r1->end) return r2->start;
1779 if (++r1 == e1) return kMaxPos;
1780 } else {
1781 if (r1->start < r2->end) return r1->start;
1782 if (++r2 == e2) return kMaxPos;
1785 return kMaxPos;
1788 void Vxls::allocate(Interval* current) {
1789 // Map from PhysReg until the first position at which it is /not/ available.
1790 PosVec free_until; // 0 by default
1792 RegSet allow;
1793 auto const conflict = constrain(current, allow);
1795 // Mark regs that fit our constraints as free up until the point of conflict,
1796 // unless they're owned by active intervals---then mark them used.
1797 allow.forEach([&](PhysReg r) {
1798 free_until[r] = conflict;
1800 for (auto ivl : active) {
1801 free_until[ivl->reg] = 0;
1804 // Mark each reg assigned to an inactive interval as only free until the
1805 // first position at which `current' intersects that interval.
1806 for (auto ivl : inactive) {
1807 auto r = ivl->reg;
1808 if (free_until[r] == 0) continue;
1809 auto until = nextIntersect(current, ivl);
1810 free_until[r] = std::min(until, free_until[r]);
1813 if (current->ranges.size() > 1) {
1814 auto const b = blockFor(ctx, current->start());
1815 auto const blk_range = ctx.block_ranges[b];
1816 if (blk_range.end > current->ranges[0].end) {
1817 // We're assigning a register to an interval with
1818 // multiple ranges, but the vreg isn't live out
1819 // of the first range. This means there's no
1820 // connection between this range and any subsequent
1821 // one, so we can safely break the interval
1822 // after the first range without making things worse.
1823 // On the other hand, it can make things better, by
1824 // eg not assigning a constant to a register in an
1825 // unlikely exit block, and then holding it in a callee save
1826 // reg across lots of unrelated code until its used
1827 // again in another unlikely exit block.
1828 auto second = current->split(blk_range.end, false);
1829 pending.push(second);
1830 } else if (current->constant() &&
1831 current->uses.size() &&
1832 current->uses[0].pos >= blk_range.end) {
1833 // we probably don't want to load a constant into a register
1834 // at the start of a block where its not used.
1835 return spill(current);
1839 // Choose a register to allocate to `current', either via a hint or by
1840 // various heuristics.
1841 auto const choose_reg = [&] {
1842 auto const hint = chooseHint(variables, current,
1843 hint_info, free_until, allow);
1844 if (hint != InvalidReg &&
1845 free_until[hint] >= current->end()) {
1846 return hint;
1849 auto ret = InvalidReg;
1850 for (auto const r : free_until) {
1851 ret = choose_closest_after(current->end(), free_until, ret, r);
1853 return ret;
1856 auto const r = choose_reg();
1857 auto const pos = free_until[r];
1858 if (pos >= current->end()) {
1859 return assignReg(current, r);
1862 if (pos > current->start()) {
1863 // `r' is free for the first part of current.
1864 auto const prev_use = current->lastUseBefore(pos);
1866 DEBUG_ONLY auto min_split = std::max(prev_use, current->start() + 1);
1867 assertx(min_split <= pos);
1869 auto split_pos = nearestSplitBefore(pos);
1870 if (split_pos > current->start()) {
1871 if (prev_use && prev_use < split_pos) {
1872 // If there are uses in previous blocks, but no uses between the start
1873 // of the block containing `split_pos' and `split_pos' itself, we
1874 // should split earlier; otherwise we'll need to insert moves/loads on
1875 // the edge(s) into this block, which clearly can't be used since we're
1876 // spilling before the first use. Might as well spill on a block
1877 // boundary, as early as possible.
1878 auto prev_range_idx = current->findRange(prev_use);
1879 auto prev_range = &current->ranges[prev_range_idx];
1880 if (prev_range->start <= prev_use && prev_range->end < split_pos) {
1881 prev_range++;
1883 if (prev_range->start > prev_use && prev_range->start < split_pos) {
1884 split_pos = prev_range->start;
1888 // Split `current'. We keep uses at the end of the first split because
1889 // we know that `r' is free up to /and including/ that position.
1890 auto second = current->split(split_pos, true /* keep_uses */);
1891 pending.push(second);
1893 // Try to find a register again. Since `current' is shorter, we might
1894 // have a different hint, or a different heuristically-determined
1895 // best-reg.
1896 auto const r = choose_reg();
1897 assertx(free_until[r] >= current->end());
1898 return assignReg(current, r);
1902 // Must spill `current' or another victim.
1903 allocBlocked(current);
1907 * When all registers are in use, find a good interval (possibly `current') to
1908 * split and spill.
1910 * When an interval is split and the second part is spilled, possibly split the
1911 * second part again before the next use-pos that requires a register, and
1912 * enqueue the third part.
1914 void Vxls::allocBlocked(Interval* current) {
1915 auto const cur_start = current->start();
1917 RegSet allow;
1918 auto const conflict = constrain(current, allow); // repeated from allocate
1920 // Track the positions (a) at which each PhysReg is next used by any lifetime
1921 // interval to which it's assigned (`used'), and (b) at which each PhysReg is
1922 // next assigned to a value whose lifetime intersects `current' (`blocked').
1923 PosVec used, blocked;
1924 allow.forEach([&](PhysReg r) { used[r] = blocked[r] = conflict; });
1926 // compute next use of active registers, so we can pick the furthest one
1927 for (auto ivl : active) {
1928 if (ivl->fixed()) {
1929 blocked[ivl->reg] = used[ivl->reg] = 0;
1930 } else {
1931 auto use_pos = ivl->firstUseAfter(cur_start);
1932 used[ivl->reg] = std::min(use_pos, used[ivl->reg]);
1936 // compute next intersection/use of inactive regs to find what's free longest
1937 for (auto ivl : inactive) {
1938 auto const r = ivl->reg;
1939 if (blocked[r] == 0) continue;
1941 auto intersect_pos = nextIntersect(current, ivl);
1942 if (intersect_pos == kMaxPos) continue;
1944 if (ivl->fixed()) {
1945 blocked[r] = std::min(intersect_pos, blocked[r]);
1946 used[r] = std::min(blocked[r], used[r]);
1947 } else {
1948 auto use_pos = ivl->firstUseAfter(cur_start);
1949 used[r] = std::min(use_pos, used[r]);
1953 // Choose the best victim register(s) to spill---the one with first-use
1954 // after, but closest to, the lifetime of `current'; or the one with farthest
1955 // first-use if no such register exists.
1956 auto r = find_closest_after(current->end(), used);
1958 // If all other registers are used by their owning intervals before the first
1959 // register-use of `current', then we have to spill `current'.
1960 if (used[r] < current->firstUse()) {
1961 return spill(current);
1964 auto const block_pos = blocked[r];
1965 if (block_pos < current->end()) {
1966 // If /every/ usable register is assigned to a lifetime interval which
1967 // intersects with `current', we have to split current before that point.
1968 auto prev_use = current->lastUseBefore(block_pos);
1970 DEBUG_ONLY auto min_split = std::max(prev_use, cur_start + 1);
1971 auto max_split = block_pos;
1972 assertx(cur_start < min_split && min_split <= max_split);
1974 auto split_pos = nearestSplitBefore(max_split);
1975 if (split_pos > current->start()) {
1976 auto second = current->split(split_pos, true /* keep_uses */);
1977 pending.push(second);
1980 spillOthers(current, r);
1981 assignReg(current, r);
1985 * Split and spill other intervals that conflict with `current' for register r,
1986 * at current->start().
1988 * If necessary, split the victims again before their first use position that
1989 * requires a register.
1991 void Vxls::spillOthers(Interval* current, PhysReg r) {
1992 auto const cur_start = current->start();
1994 // Split `ivl' at `cur_start' and spill the second part. If `cur_start' is
1995 // too close to ivl->start(), spill all of `ivl' instead.
1996 auto const spill_after = [&] (Interval* ivl) {
1997 auto const split_pos = nearestSplitBefore(cur_start);
1998 auto const tail = split_pos <= ivl->start() ? ivl : ivl->split(split_pos);
1999 spill(tail);
2002 // Split and spill other active intervals after `cur_start'.
2003 auto end = active.end();
2004 for (auto i = active.begin(); i != end;) {
2005 auto other = *i;
2006 if (other->fixed() || r != other->reg) {
2007 i++; continue;
2009 *i = *--end;
2010 spill_after(other);
2012 active.erase(end, active.end());
2014 // Split and spill any inactive intervals after `cur_start' if they intersect
2015 // with `current'.
2016 end = inactive.end();
2017 for (auto i = inactive.begin(); i != end;) {
2018 auto other = *i;
2019 if (other->fixed() || r != other->reg) {
2020 i++; continue;
2022 auto intersect = nextIntersect(current, other);
2023 if (intersect >= current->end()) {
2024 i++; continue;
2026 *i = *--end;
2027 spill_after(other);
2029 inactive.erase(end, inactive.end());
2032 SpillInfo assignRegisters(const VxlsContext& ctx,
2033 const jit::vector<Variable*>& variables,
2034 const HintInfo& hint_info) {
2035 return Vxls(ctx, variables, hint_info).go();
2038 ///////////////////////////////////////////////////////////////////////////////
2039 // Lifetime continuity resolution.
2042 * A pair of source block number and successor index, used to identify an
2043 * out-edge.
2045 using EdgeKey = std::pair<Vlabel,unsigned>;
2047 struct EdgeHasher {
2048 size_t operator()(EdgeKey k) const {
2049 return size_t(k.first) ^ k.second;
2054 * Copies that are required at a given position or edge.
2056 * The keys into the PhysReg::Map are the dests; the Interval*'s are the
2057 * sources (nullptr if no copy is needed).
2059 using CopyPlan = PhysReg::Map<Interval*>;
2062 * Copy and spill points for resolving split lifetime intervals.
2064 * After register allocation, some lifetime intervals may have been split, and
2065 * their Vregs assigned to different physical registers or spill locations. We
2066 * use this struct to track where we need to add moves to maintain continuity.
2067 * (We also use it to resolve phis.)
2069 struct ResolutionPlan {
2070 // Where to insert spills.
2071 jit::hash_map<unsigned,CopyPlan> spills;
2072 // Where to insert reg-reg moves, or spill and constant loads, between
2073 // instructions (or in place of copy{} and friends).
2074 jit::hash_map<unsigned,CopyPlan> copies;
2076 // Copies and loads on edges (between blocks).
2077 jit::hash_map<EdgeKey,CopyPlan,EdgeHasher> edge_copies;
2080 template<class CopyPlanT>
2081 using void_req = typename std::enable_if<
2082 std::is_same<CopyPlanT, CopyPlan>::value ||
2083 std::is_same<CopyPlanT, const CopyPlan>::value
2084 >::type;
2087 * Iterators for reg-reg copies and for {const,spill}-reg loads.
2089 template<class CopyPlanT, class F>
2090 void_req<CopyPlanT> for_each_copy(CopyPlanT& plan, F f) {
2091 for (auto dst : plan) {
2092 auto& ivl = plan[dst];
2093 if (!ivl || !ivl->live()) continue;
2094 f(dst, ivl);
2097 template<class CopyPlanT, class F>
2098 void_req<CopyPlanT> for_each_load(CopyPlanT& plan, F f) {
2099 for (auto dst : plan) {
2100 auto& ivl = plan[dst];
2101 if (!ivl || ivl->live()) continue;
2102 assertx(ivl->constant() || ivl->spilled());
2103 f(dst, ivl);
2108 * Insert a spill after the def-position in `ivl'.
2110 * There's only one such position, because of SSA.
2112 void insertSpill(const VxlsContext& ctx,
2113 ResolutionPlan& resolution, Interval* ivl) {
2114 auto DEBUG_ONLY checkPos = [&](unsigned pos) {
2115 assertx(pos % 2 == 1);
2116 DEBUG_ONLY auto const b = blockFor(ctx, pos);
2117 DEBUG_ONLY auto const& range = ctx.block_ranges[b];
2118 assertx(pos - 1 >= range.start && pos + 1 < range.end);
2119 return true;
2121 auto pos = ivl->var->def_pos + 1;
2122 assertx(checkPos(pos));
2123 resolution.spills[pos][ivl->reg] = ivl; // store ivl->reg => ivl->slot
2127 * Insert spills and copies that connect subintervals that were split
2128 * between instructions.
2130 void resolveSplits(const VxlsContext& ctx,
2131 const jit::vector<Variable*>& variables,
2132 ResolutionPlan& resolution) {
2133 for (auto var : variables) {
2134 if (!var) continue;
2135 auto i1 = var->ivl();
2136 if (var->slot >= 0) insertSpill(ctx, resolution, i1);
2138 for (auto i2 = i1->next; i2; i1 = i2, i2 = i2->next) {
2139 auto const pos = i2->start();
2140 if (i1->end() != pos) continue; // spans lifetime hole
2141 if (i2->reg == InvalidReg) continue; // no load necessary
2142 if (i2->reg == i1->reg) continue; // no copy necessary
2144 auto const b = blockFor(ctx, pos);
2145 auto const range = ctx.block_ranges[b];
2147 if (pos % 2 == 0) {
2148 // even position requiring a copy must be on edge
2149 assertx(range.start == pos);
2150 } else {
2151 // odd position
2152 assertx(pos > range.start); // implicit label position per block
2153 if (pos + 1 == range.end) continue; // copy belongs on successor edge
2155 assertx(!resolution.copies[pos][i2->reg]);
2156 resolution.copies[pos][i2->reg] = i1;
2163 * Lower copyargs{} and copy{} into moveplans at the same position.
2165 void lowerCopies(Vunit& unit, const VxlsContext& ctx,
2166 const jit::vector<Variable*>& variables,
2167 ResolutionPlan& resolution) {
2168 // Add a lifetime-resolving copy from `s' to `d'---without touching the
2169 // instruction stream.
2170 auto const lower = [&] (unsigned pos, Vreg s, Vreg d) {
2171 auto v1 = variables[s];
2172 auto v2 = variables[d];
2173 assertx(v1 && v2);
2174 assertx(v2->fixed() || v2->def_pos == pos); // ssa
2176 auto i1 = v1->fixed() ? v1->ivl() : v1->ivlAtUse(pos);
2177 auto i2 = v2->ivl();
2179 if (i2->reg != i1->reg) {
2180 assertx(!resolution.copies[pos][i2->reg]);
2181 resolution.copies[pos][i2->reg] = i1;
2185 for (auto b : ctx.blocks) {
2186 auto pos = ctx.block_ranges[b].start;
2188 for (auto& inst : unit.blocks[b].code) {
2189 if (inst.op == Vinstr::copyargs) {
2190 auto const& uses = unit.tuples[inst.copyargs_.s];
2191 auto const& defs = unit.tuples[inst.copyargs_.d];
2192 for (unsigned i = 0, n = uses.size(); i < n; ++i) {
2193 lower(pos, uses[i], defs[i]);
2195 inst = nop{};
2196 } else if (inst.op == Vinstr::copy2) {
2197 lower(pos, inst.copy2_.s0, inst.copy2_.d0);
2198 lower(pos, inst.copy2_.s1, inst.copy2_.d1);
2199 inst = nop{};
2200 } else if (inst.op == Vinstr::copy) {
2201 lower(pos, inst.copy_.s, inst.copy_.d);
2202 inst = nop{};
2204 pos += 2;
2210 * Search for the phidef in block `b', then return its dest tuple.
2212 Vtuple findPhiDefs(const Vunit& unit, Vlabel b) {
2213 assertx(!unit.blocks[b].code.empty() &&
2214 unit.blocks[b].code.front().op == Vinstr::phidef);
2215 return unit.blocks[b].code.front().phidef_.defs;
2219 * Register copy resolutions for livein sets and phis.
2221 void resolveEdges(Vunit& unit, const VxlsContext& ctx,
2222 const jit::vector<Variable*>& variables,
2223 ResolutionPlan& resolution) {
2224 auto const addPhiEdgeCopies = [&] (Vlabel block, Vlabel target,
2225 uint32_t targetIndex,
2226 const VregList& uses) {
2227 auto const p1 = ctx.block_ranges[block].end - 2;
2228 auto const& defs = unit.tuples[findPhiDefs(unit, target)];
2230 for (unsigned i = 0, n = uses.size(); i < n; ++i) {
2231 auto v1 = variables[uses[i]];
2232 auto v2 = variables[defs[i]];
2233 assertx(v1 && v2);
2235 auto i1 = v1->fixed() ? v1->ivl() : v1->ivlAtUse(p1);
2236 auto i2 = v2->ivl();
2238 if (i2->reg != i1->reg) {
2239 EdgeKey edge { block, targetIndex };
2240 assertx(!resolution.edge_copies[edge][i2->reg]);
2241 resolution.edge_copies[edge][i2->reg] = i1;
2246 for (auto b1 : ctx.blocks) {
2247 auto const p1 = ctx.block_ranges[b1].end - 2;
2248 auto& block1 = unit.blocks[b1];
2249 auto& inst1 = block1.code.back();
2251 // Add resolutions for phis.
2252 if (inst1.op == Vinstr::phijmp) {
2253 auto const& phijmp = inst1.phijmp_;
2254 auto const target = phijmp.target;
2255 auto const& uses = unit.tuples[phijmp.uses];
2256 addPhiEdgeCopies(b1, target, 0, uses);
2257 inst1 = jmp{target};
2258 } else if (inst1.op == Vinstr::phijcc) {
2259 auto const& phijcc = inst1.phijcc_;
2260 auto const& targets = phijcc.targets;
2261 auto const& uses = unit.tuples[phijcc.uses];
2262 addPhiEdgeCopies(b1, targets[0], 0, uses);
2263 addPhiEdgeCopies(b1, targets[1], 1, uses);
2264 inst1 = jcc{phijcc.cc, phijcc.sf, {targets[0], targets[1]}};
2267 auto const succlist = succs(block1);
2269 // Add resolutions for livein sets.
2270 for (unsigned i = 0, n = succlist.size(); i < n; i++) {
2271 auto const b2 = succlist[i];
2272 auto const p2 = ctx.block_ranges[b2].start;
2274 forEach(ctx.livein[b2], [&] (Vreg vr) {
2275 auto var = variables[vr];
2276 if (var->fixed()) return;
2277 Interval* i1 = nullptr;
2278 Interval* i2 = nullptr;
2280 for (auto ivl = var->ivl(); ivl && !(i1 && i2); ivl = ivl->next) {
2281 if (ivl->covers(p1)) i1 = ivl;
2282 if (ivl->covers(p2)) i2 = ivl;
2285 // i2 can be unallocated if the tmp is a constant or is spilled.
2286 if (i2->reg != InvalidReg && i2->reg != i1->reg) {
2287 assertx((resolution.edge_copies[{b1,i}][i2->reg] == nullptr));
2288 resolution.edge_copies[{b1,i}][i2->reg] = i1;
2296 * Walk through the variables list and account for all points where copies or
2297 * spills need to be made.
2299 ResolutionPlan resolveLifetimes(Vunit& unit, const VxlsContext& ctx,
2300 const jit::vector<Variable*>& variables) {
2301 ResolutionPlan resolution;
2303 resolveSplits(ctx, variables, resolution);
2304 lowerCopies(unit, ctx, variables, resolution);
2305 resolveEdges(unit, ctx, variables, resolution);
2307 return resolution;
2310 ///////////////////////////////////////////////////////////////////////////////
2311 // Operand renaming.
2314 * Visitor class for renaming registers.
2316 struct Renamer {
2317 Renamer(const jit::vector<Variable*>& variables, unsigned pos)
2318 : variables(variables)
2319 , pos(pos)
2322 template <class T>
2323 void imm(const T& /*r*/) {}
2324 template<class R> void def(R& r) { rename(r); }
2325 template<class D, class H> void defHint(D& dst, H) { rename(dst); }
2326 template<class R> void use(R& r) { rename(r); }
2327 template<class S, class H> void useHint(S& src, H) { rename(src); }
2328 template<class R> void across(R& r) { rename(r); }
2329 void across(Vptr& m) {
2330 if (m.base.isValid()) rename(m.base);
2331 if (m.index.isValid()) rename(m.index);
2334 void def(RegSet) {}
2335 void use(RegSet /*r*/) {}
2336 void use(Vptr& m) {
2337 if (m.base.isValid()) rename(m.base);
2338 if (m.index.isValid()) rename(m.index);
2340 void use(VcallArgsId) { always_assert(false && "vcall unsupported in vxls"); }
2342 private:
2343 void rename(Vreg8& r) { r = lookup(r, Constraint::Gpr); }
2344 void rename(Vreg16& r) { r = lookup(r, Constraint::Gpr); }
2345 void rename(Vreg32& r) { r = lookup(r, Constraint::Gpr); }
2346 void rename(Vreg64& r) { r = lookup(r, Constraint::Gpr); }
2347 void rename(VregDbl& r) { r = lookup(r, Constraint::Simd); }
2348 void rename(Vreg128& r) { r = lookup(r, Constraint::Simd); }
2349 void rename(VregSF& r) { r = RegSF{0}; }
2350 void rename(Vreg& r) { r = lookup(r, Constraint::Any); }
2351 void rename(Vtuple /*t*/) { /* phijmp/phijcc+phidef handled by resolveEdges */
2354 PhysReg lookup(Vreg vreg, Constraint kind) {
2355 auto var = variables[vreg];
2356 if (!var || vreg.isPhys()) return vreg;
2357 PhysReg reg = var->ivlAtUse(pos)->reg;
2358 assertx((kind == Constraint::Gpr && reg.isGP()) ||
2359 (kind == Constraint::Simd && reg.isSIMD()) ||
2360 (kind == Constraint::Sf && reg.isSF()) ||
2361 (kind == Constraint::Any && reg != InvalidReg));
2362 return reg;
2364 private:
2365 const jit::vector<Variable*>& variables;
2366 unsigned pos;
2370 * Visit every virtual-register typed operand in `unit', and rename it to its
2371 * assigned physical register.
2373 void renameOperands(Vunit& unit, const VxlsContext& ctx,
2374 const jit::vector<Variable*>& variables) {
2375 for (auto b : ctx.blocks) {
2376 auto pos = ctx.block_ranges[b].start;
2377 for (auto& inst : unit.blocks[b].code) {
2378 Renamer renamer(variables, pos);
2379 visitOperands(inst, renamer);
2380 pos += 2;
2383 ONTRACE(
2384 kRegAllocLevel,
2385 printVariables("after renaming operands", unit, ctx, variables);
2389 ///////////////////////////////////////////////////////////////////////////////
2390 // Flags liveness optimization.
2392 template <typename Inst, typename F>
2393 void optimize(Vunit& /*unit*/, Inst& /*inst*/, Vlabel /*b*/, size_t /*i*/,
2394 F /*sf_live*/) {}
2396 template<typename xor_op, typename ldimm_op, typename F>
2397 void optimize_ldimm(Vunit& unit, ldimm_op& ldimm,
2398 Vlabel b, size_t i, F sf_live) {
2399 if (!sf_live() && ldimm.s.q() == 0 && ldimm.d.isGP()) {
2400 decltype(xor_op::d) d = ldimm.d;
2401 unit.blocks[b].code[i] = xor_op{d, d, d, RegSF{0}};
2405 template<typename F>
2406 void optimize(Vunit& unit, ldimmb& inst, Vlabel b, size_t i, F sf_live) {
2407 optimize_ldimm<xorb>(unit, inst, b, i, sf_live);
2409 template<typename F>
2410 void optimize(Vunit& unit, ldimml& inst, Vlabel b, size_t i, F sf_live) {
2411 optimize_ldimm<xorl>(unit, inst, b, i, sf_live);
2413 template<typename F>
2414 void optimize(Vunit& unit, ldimmq& inst, Vlabel b, size_t i, F sf_live) {
2415 optimize_ldimm<xorq>(unit, inst, b, i, sf_live);
2418 template<typename F>
2419 void optimize(Vunit& unit, lea& inst, Vlabel b, size_t i, F sf_live) {
2420 if (!sf_live() && inst.d == rsp()) {
2421 assertx(inst.s.base == inst.d && !inst.s.index.isValid());
2422 unit.blocks[b].code[i] = addqi{inst.s.disp, inst.d, inst.d, RegSF{0}};
2427 * Perform optimizations on instructions in `unit' at which no flags registers
2428 * are live.
2430 void optimizeSFLiveness(Vunit& unit, const VxlsContext& ctx,
2431 const jit::vector<Variable*>& variables) {
2432 // Currently, all our optimizations are only relevant on x64.
2433 if (arch() != Arch::X64) return;
2435 // sf_var is the physical SF register, computed from the union of VregSF
2436 // registers by computeLiveness() and buildIntervals().
2437 auto const sf_var = variables[VregSF(RegSF{0})];
2438 auto const sf_ivl = sf_var ? sf_var->ivl() : nullptr;
2440 for (auto const b : ctx.blocks) {
2441 auto& code = unit.blocks[b].code;
2443 for (size_t i = 0; i < code.size(); ++i) {
2444 auto& inst = code[i];
2447 * An instruction that defines sf, where sf is not subsequently
2448 * read, will have a live range going from inst.pos to inst.pos
2449 * + 1. Its ok to replace such an instruction with one that
2450 * doesn't modify the flags (or that modifies them in different
2451 * ways), so check for a range that covers pos-1 or pos+1 to
2452 * determine true liveness.
2454 auto const sf_live = [&] {
2455 return sf_ivl &&
2456 !sf_ivl->ranges.empty() &&
2457 ((inst.pos && sf_ivl->covers(inst.pos - 1)) ||
2458 sf_ivl->covers(inst.pos + 1));
2461 switch (inst.op) {
2462 #define O(name, ...) \
2463 case Vinstr::name: \
2464 optimize(unit, inst.name##_, b, i, sf_live); \
2465 break;
2467 VASM_OPCODES
2468 #undef O
2474 ///////////////////////////////////////////////////////////////////////////////
2475 // Copy insertion.
2478 * Insert stores for `spills' (with spill space starting at `slots') into
2479 * `code' before code[j], corresponding to XLS logical position `pos'.
2481 * Updates `j' to refer to the same instruction after the code insertions.
2483 void insertSpillsAt(jit::vector<Vinstr>& code, unsigned& j,
2484 const CopyPlan& spills, MemoryRef slots,
2485 unsigned pos) {
2486 jit::vector<Vinstr> stores;
2488 for (auto src : spills) {
2489 auto ivl = spills[src];
2490 if (!ivl) continue;
2492 auto slot = ivl->var->slot;
2493 assertx(slot >= 0 && src == ivl->reg);
2494 MemoryRef ptr{slots.r + slotOffset(slot)};
2496 if (!ivl->var->wide) {
2497 always_assert_flog(!src.isSF(), "Tried to spill %flags");
2498 stores.emplace_back(store{src, ptr});
2499 } else {
2500 assertx(src.isSIMD());
2501 stores.emplace_back(storeups{src, ptr});
2504 insertCodeAt(code, j, stores, pos);
2508 * Insert reg-reg moves for `copies' into `code' before code[j], corresponding
2509 * to XLS logical position `pos'.
2511 * Updates `j' to refer to the same instruction after the code insertions.
2513 void insertCopiesAt(const VxlsContext& ctx,
2514 jit::vector<Vinstr>& code, unsigned& j,
2515 const CopyPlan& plan, unsigned pos) {
2516 MovePlan moves;
2517 jit::vector<Vinstr> copies;
2519 for_each_copy(plan, [&] (PhysReg dst, const Interval* ivl) {
2520 moves[dst] = ivl->reg;
2522 auto const hows = doRegMoves(moves, ctx.tmp);
2524 for (auto const& how : hows) {
2525 if (how.m_kind == MoveInfo::Kind::Xchg) {
2526 copies.emplace_back(copy2{how.m_src, how.m_dst, how.m_dst, how.m_src});
2527 } else {
2528 copies.emplace_back(copy{how.m_src, how.m_dst});
2531 insertCodeAt(code, j, copies, pos);
2535 * Insert constant loads or loads from spill space---with spill space starting
2536 * at `slots'---for `loads' into `code' before code[j], corresponding to XLS
2537 * logical position `pos'.
2539 * Updates `j' to refer to the same instruction after the code insertions.
2541 void insertLoadsAt(jit::vector<Vinstr>& code, unsigned& j,
2542 const CopyPlan& plan, MemoryRef slots, unsigned pos) {
2543 jit::vector<Vinstr> loads;
2545 for_each_load(plan, [&] (PhysReg dst, const Interval* ivl) {
2546 if (ivl->constant()) {
2547 if (ivl->var->val.isUndef) return;
2548 loads.push_back([&]() -> Vinstr {
2549 switch (ivl->var->val.kind) {
2550 case Vconst::Quad:
2551 case Vconst::Double:
2552 return ldimmq{uint64_t(ivl->var->val.val), dst};
2553 case Vconst::Long:
2554 return ldimml{int32_t(ivl->var->val.val), dst};
2555 case Vconst::Byte:
2556 return ldimmb{uint8_t(ivl->var->val.val), dst};
2558 not_reached();
2559 }());
2560 } else if (ivl->spilled()) {
2561 MemoryRef ptr{slots.r + slotOffset(ivl->var->slot)};
2562 if (!ivl->var->wide) {
2563 loads.emplace_back(load{ptr, dst});
2564 } else {
2565 assertx(dst.isSIMD());
2566 loads.emplace_back(loadups{ptr, dst});
2570 insertCodeAt(code, j, loads, pos);
2574 * Mutate the Vinstr stream by inserting copies.
2576 * This destroys the position numbering, so we can't use interval positions
2577 * after this.
2579 void insertCopies(Vunit& unit, const VxlsContext& ctx,
2580 const jit::vector<Variable*>& /*variables*/,
2581 const ResolutionPlan& resolution) {
2582 // Insert copies inside blocks.
2583 for (auto const b : ctx.blocks) {
2584 auto& block = unit.blocks[b];
2585 auto& code = block.code;
2586 auto pos = ctx.block_ranges[b].start;
2587 auto offset = ctx.spill_offsets[b];
2589 for (unsigned j = 0; j < code.size(); j++, pos += 2) {
2590 MemoryRef slots = ctx.sp[offset];
2592 // Spills, reg-reg moves, and loads of constant values or spill space all
2593 // occur between instruction. Insert them in order.
2594 auto s = resolution.spills.find(pos - 1);
2595 if (s != resolution.spills.end()) {
2596 insertSpillsAt(code, j, s->second, slots, pos - 1);
2598 auto c = resolution.copies.find(pos - 1);
2599 if (c != resolution.copies.end()) {
2600 insertCopiesAt(ctx, code, j, c->second, pos - 1);
2601 insertLoadsAt(code, j, c->second, slots, pos - 1);
2604 // Insert copies and loads at instructions.
2605 c = resolution.copies.find(pos);
2606 if (c != resolution.copies.end()) {
2607 insertCopiesAt(ctx, code, j, c->second, pos);
2608 insertLoadsAt(code, j, c->second, slots, pos);
2610 assertx(resolution.spills.count(pos) == 0);
2612 offset -= spEffect(unit, code[j], ctx.sp);
2616 // insert copies on edges
2617 for (auto const b : ctx.blocks) {
2618 auto& block = unit.blocks[b];
2619 auto succlist = succs(block);
2621 if (succlist.size() == 1) {
2622 // copies will go at end of b
2623 auto const c = resolution.edge_copies.find({b, 0});
2624 if (c != resolution.edge_copies.end()) {
2625 auto& code = block.code;
2626 unsigned j = code.size() - 1;
2627 auto const pos = ctx.block_ranges[b].end - 1;
2628 auto const slots = ctx.sp[ctx.spill_offsets[succlist[0]]];
2630 // We interleave copies and loads in `edge_copies', so here and below
2631 // we process them separately (and pass `true' to avoid asserting).
2632 insertCopiesAt(ctx, code, j, c->second, pos);
2633 insertLoadsAt(code, j, c->second, slots, pos);
2635 } else {
2636 // copies will go at start of successor
2637 for (int i = 0, n = succlist.size(); i < n; i++) {
2638 auto s = succlist[i];
2639 auto const c = resolution.edge_copies.find({b, i});
2640 if (c != resolution.edge_copies.end()) {
2641 auto& code = unit.blocks[s].code;
2642 unsigned j = 0;
2643 auto const pos = ctx.block_ranges[s].start;
2644 auto const slots = ctx.sp[ctx.spill_offsets[s]];
2646 insertCopiesAt(ctx, code, j, c->second, pos);
2647 insertLoadsAt(code, j, c->second, slots, pos);
2654 ///////////////////////////////////////////////////////////////////////////////
2657 * Peephole cleanup pass.
2659 * Remove no-op copy sequences before allocating spill space, since doing so
2660 * might modify the CFG.
2662 void peephole(Vunit& unit, const VxlsContext& ctx) {
2663 // Whether a Vinstr is a register swap.
2664 auto const match_xchg = [] (Vinstr& i, Vreg& r0, Vreg& r1) {
2665 if (i.op != Vinstr::copy2) return false;
2666 r0 = i.copy2_.s0;
2667 r1 = i.copy2_.s1;
2668 return r0 == i.copy2_.d1 && r1 == i.copy2_.d0;
2671 for (auto b : ctx.blocks) {
2672 auto& code = unit.blocks[b].code;
2673 for (int i = 0, n = code.size(); i + 1 < n; i++) {
2674 Vreg r0, r1, r2, r3;
2675 if (match_xchg(code[i], r0, r1) &&
2676 match_xchg(code[i + 1], r2, r3) &&
2677 ((r0 == r2 && r1 == r3) || (r0 == r3 && r1 == r2))) {
2678 // matched xchg+xchg that cancel each other
2679 code[i] = nop{};
2680 code[i + 1] = nop{};
2681 i++;
2684 auto end = std::remove_if(code.begin(), code.end(), [&](Vinstr& inst) {
2685 return is_trivial_nop(inst) ||
2686 inst.op == Vinstr::phidef; // we lowered it
2688 code.erase(end, code.end());
2692 ///////////////////////////////////////////////////////////////////////////////
2693 // Spill space allocation.
2696 * SpillState is used by allocateSpillSpace() to decide where to allocate/free
2697 * spill space. It represents the state of the spill space as a whole and is
2698 * computed before each individual instruction.
2700 * Order is important in this enum: it's only legal to transition to states
2701 * with higher values, and states are merged using std::max().
2703 enum SpillState : uint8_t {
2704 // State is uninitialized. All block in-states start here.
2705 Uninit,
2707 // Spill space is not currently needed; it's safe to allocate spill space
2708 // after this point.
2709 NoSpill,
2711 // Spill space is needed and must be allocated at or before this point.
2712 NeedSpill,
2716 * SpillStates is used to hold in/out state for each block after the analysis
2717 * pass of allocateSpillSpace().
2719 struct SpillStates {
2720 SpillState in;
2721 SpillState out;
2725 * Returns true if spill space must be allocated before execution of this
2726 * instruction. In order to keep things simple, we return true for any
2727 * instruction that reads or writes sp.
2729 bool instrNeedsSpill(const Vunit& unit, const Vinstr& inst, PhysReg sp) {
2730 // Implicit sp input/output.
2731 if (spEffect(unit, inst, sp, false) != 0) return true;
2733 auto foundSp = false;
2734 visitDefs(unit, inst, [&] (Vreg r) { if (r == sp) foundSp = true; });
2735 if (foundSp) return true;
2737 visitUses(unit, inst, [&] (Vreg r) { if (r == sp) foundSp = true; });
2738 return foundSp;
2742 * Return the required SpillState coming into inst. prevState must not be
2743 * Uninit.
2745 SpillState instrInState(const Vunit& unit, const Vinstr& inst,
2746 SpillState prevState, PhysReg sp) {
2747 switch (prevState) {
2748 case Uninit: break;
2750 case NoSpill:
2751 if (instrNeedsSpill(unit, inst, sp)) return NeedSpill;
2752 return NoSpill;
2754 case NeedSpill:
2755 return NeedSpill;
2758 always_assert(false);
2762 * processSpillExits() can insert jcc{} instructions in the middle of a
2763 * block. fixupBlockJumps() breaks the given block after any jccs, making the
2764 * unit valid again. This is done as a separate pass from the work in
2765 * processSpillExits() to reduce complexity.
2767 void fixupBlockJumps(Vunit& unit, Vlabel label) {
2768 jit::vector<Vinstr> origCode;
2769 origCode.swap(unit.blocks[label].code);
2770 auto insertBlock = [&]() -> Vblock& { return unit.blocks[label]; };
2772 for (auto const& inst : origCode) {
2773 insertBlock().code.emplace_back(inst);
2775 if (inst.op == Vinstr::jcc && !inst.jcc_.targets[0].isValid()) {
2776 auto newLabel = unit.makeBlock(insertBlock().area_idx,
2777 insertBlock().weight);
2778 insertBlock().code.back().jcc_.targets[0] = newLabel;
2779 label = newLabel;
2785 * Walk through the given block, undoing any fallbackcc/bindjcc optimizations
2786 * that happen in an area where spill space is live. The latter transformation
2787 * is necessary to make the hidden edge out of the fallbackcc{} explicit, so we
2788 * can insert an lea on it to free spill space. It takes something like this:
2790 * B0:
2791 * cmpbim 0, %rbp[0x10] => %flags
2792 * fallbackcc CC_E, %flags, <SrcKey>
2793 * ...
2795 * and turns it into something like this:
2797 * B0:
2798 * cmpbim 0, %rbp[0x10] => %flags
2799 * jcc CC_E, %flags -> B3, else B2
2800 * B2:
2801 * ...
2802 * B3:
2803 * lea %rsp[0x20] => %rsp
2804 * fallback <SrcKey>
2806 void processSpillExits(Vunit& unit, Vlabel label, SpillState state,
2807 Vinstr free, PhysReg sp) {
2808 auto code = &unit.blocks[label].code;
2809 auto needFixup = false;
2811 for (unsigned i = 0, n = code->size(); i < n; ++i) {
2812 auto& inst = (*code)[i];
2813 state = instrInState(unit, inst, state, sp);
2815 if (state < NeedSpill ||
2816 (inst.op != Vinstr::fallbackcc &&
2817 inst.op != Vinstr::bindjcc &&
2818 inst.op != Vinstr::jcci)) {
2819 continue;
2822 FTRACE(3, "Breaking out {}: {}\n", label, show(unit, inst));
2823 auto target = unit.makeBlock(AreaIndex::Cold, 0);
2824 // makeBlock might reallocate unit.blocks
2825 code = &unit.blocks[label].code;
2827 auto& targetCode = unit.blocks[target].code;
2828 free.set_irctx(inst.irctx());
2829 ConditionCode cc;
2830 Vreg sf;
2831 if (inst.op == Vinstr::fallbackcc) {
2832 auto const& fb_i = inst.fallbackcc_;
2833 targetCode.emplace_back(free);
2834 targetCode.emplace_back(fallback{fb_i.target, fb_i.spOff,
2835 fb_i.trflags, fb_i.args});
2836 cc = fb_i.cc;
2837 sf = fb_i.sf;
2838 } else if (inst.op == Vinstr::bindjcc) {
2839 auto const& bj_i = inst.bindjcc_;
2840 targetCode.emplace_back(free);
2841 targetCode.emplace_back(bindjmp{bj_i.target, bj_i.spOff, bj_i.trflags,
2842 bj_i.args});
2843 cc = bj_i.cc;
2844 sf = bj_i.sf;
2845 } else /* inst.op == Vinstr::jcci */ {
2846 auto const& jcc_i = inst.jcci_;
2847 targetCode.emplace_back(free);
2848 targetCode.emplace_back(jmpi{jcc_i.taken});
2849 cc = jcc_i.cc;
2850 sf = jcc_i.sf;
2852 targetCode.back().set_irctx(inst.irctx());
2854 // Next is set to an invalid block that will be fixed up once we're done
2855 // iterating through the original block.
2856 inst = jcc{cc, sf, {Vlabel{}, target}};
2857 needFixup = true;
2860 if (needFixup) fixupBlockJumps(unit, label);
2864 * Merge src into dst, returning true iff dst was changed.
2866 bool mergeSpillStates(SpillState& dst, SpillState src) {
2867 assertx(src != Uninit);
2868 if (dst == src) return false;
2870 // The only allowed state transitions are to states with higher values, so we
2871 // merge with std::max().
2872 auto const oldDst = dst;
2873 dst = std::max(dst, src);
2874 return dst != oldDst;
2877 std::default_random_engine s_stressRand(0xfaceb00c);
2878 std::uniform_int_distribution<int> s_stressDist(1,7);
2881 * If the current unit used any spill slots, allocate and free spill space
2882 * where appropriate. Spill space is allocated right before it's needed and
2883 * freed before any instruction that exits the unit, which is any block-ending
2884 * instruction with no successors in the unit. fallbackcc{} and bindjcc{}
2885 * instructions have hidden edges that exit the unit, and if we encounter one
2886 * while spill space is live, we have to make that exit edge explicit to insert
2887 * code on it (see processSpillExits()). This makes the exit path more
2888 * expensive, so we try to allocate spill space as late as possible to avoid
2889 * pessimising fallbackcc/bindjcc instructions unless it's really
2890 * necessary. The algorithm uses two passes:
2892 * Analysis:
2893 * - For each block in RPO:
2894 * - Load in-state, which has been populated by at least one predecessor
2895 * (or manually set to NoSpill for the entry block).
2896 * - Analyze each instruction in the block, determining what state the
2897 * spill space must be in before executing it.
2898 * - Record out-state for the block and propagate to successors. If this
2899 * changes the in-state for any of them, enqueue them for (re)processing.
2901 * Mutation:
2902 * - For each block (we use RPO to only visit reachable blocks but order
2903 * doesn't matter):
2904 * - Inspect the block's in-state and out-state:
2905 * - NoSpill in, == NeedSpill out: Walk the block to see if we need to
2906 * allocate spill space before any instructions.
2907 * - NoSpill out: Allocate spill space on any edges to successors with
2908 * NeedSpill in-states.
2909 * - NeedSpill out: If the block has no in-unit successors, free spill
2910 * space before the block-end instruction.
2911 * - != NoSpill out: Look for any fallbackcc/bindjcc instructions,
2912 * deoptimizing as appropriate (see processSpillExits()).
2914 void allocateSpillSpace(Vunit& unit, const VxlsContext& ctx,
2915 SpillInfo& spi) {
2916 if (RuntimeOption::EvalHHIRStressSpill && ctx.abi.canSpill) {
2917 auto extra = s_stressDist(s_stressRand);
2918 FTRACE(1, "StressSpill on; adding {} extra slots\n", extra);
2919 spi.used_spill_slots += extra;
2921 if (spi.used_spill_slots == 0) return;
2922 Timer t(Timer::vasm_xls_spill, unit.log_entry);
2923 always_assert(ctx.abi.canSpill);
2925 // Make sure we always allocate spill space in multiples of 16 bytes, to keep
2926 // alignment straightforward.
2927 if (spi.used_spill_slots % 2) spi.used_spill_slots++;
2928 FTRACE(1, "Allocating {} spill slots\n", spi.used_spill_slots);
2930 auto const spillSize = safe_cast<int32_t>(slotOffset(spi.used_spill_slots));
2931 // Pointer manipulation is traditionally done with lea, and it's safe to
2932 // insert even where flags might be live.
2933 Vinstr alloc = lea{ctx.sp[-spillSize], ctx.sp};
2934 Vinstr free = lea{ctx.sp[spillSize], ctx.sp};
2936 jit::vector<uint32_t> rpoIds(unit.blocks.size());
2937 for (uint32_t i = 0; i < ctx.blocks.size(); ++i) rpoIds[ctx.blocks[i]] = i;
2939 jit::vector<SpillStates> states(unit.blocks.size(), {Uninit, Uninit});
2940 states[unit.entry].in = NoSpill;
2941 dataflow_worklist<uint32_t> worklist(unit.blocks.size());
2942 worklist.push(0);
2944 // Walk the blocks in rpo. At the end of each block, propagate its out-state
2945 // to successors, adding them to the worklist if their in-state
2946 // changes. Blocks may be visited multiple times if loops are present.
2947 while (!worklist.empty()) {
2948 auto const label = ctx.blocks[worklist.pop()];
2949 auto const& block = unit.blocks[label];
2950 auto state = states[label].in;
2952 if (state < NeedSpill) {
2953 for (auto& inst : block.code) {
2954 state = instrInState(unit, inst, state, ctx.sp);
2955 if (state == NeedSpill) break;
2958 states[label].out = state;
2960 for (auto s : succs(block)) {
2961 if (mergeSpillStates(states[s].in, state)) {
2962 worklist.push(rpoIds[s]);
2967 // Do a single mutation pass over the blocks.
2968 for (auto const label : ctx.blocks) {
2969 auto state = states[label];
2970 auto& block = unit.blocks[label];
2972 // Any block with a NoSpill in-state and == NeedSpill out-state might have
2973 // an instruction in it that needs spill space, which we allocate right
2974 // before the instruction in question.
2975 if (state.in == NoSpill && state.out == NeedSpill) {
2976 auto state = NoSpill;
2977 for (auto it = block.code.begin(); it != block.code.end(); ++it) {
2978 state = instrInState(unit, *it, state, ctx.sp);
2979 if (state == NeedSpill) {
2980 FTRACE(3, "alloc spill before {}: {}\n", label, show(unit, *it));
2981 alloc.set_irctx(it->irctx());
2982 block.code.insert(it, alloc);
2983 break;
2988 // Allocate spill space on edges from a NoSpill out-state to a NeedSpill
2989 // in-state.
2990 auto const successors = succs(block);
2991 if (state.out == NoSpill) {
2992 for (auto s : successors) {
2993 if (states[s].in == NeedSpill) {
2994 FTRACE(3, "alloc spill on edge from {} -> {}\n", label, s);
2995 auto it = std::prev(block.code.end());
2996 alloc.set_irctx(it->irctx());
2997 block.code.insert(it, alloc);
3002 // Any block with a NeedSpill out-state and no successors must free spill
3003 // space right before the block-end instruction. We ignore trap so spill
3004 // space is still allocated in core files.
3005 if (state.out == NeedSpill && successors.empty() &&
3006 block.code.back().op != Vinstr::trap) {
3007 auto it = std::prev(block.code.end());
3008 FTRACE(3, "free spill before {}: {}\n", label, show(unit, (*it)));
3009 free.set_irctx(it->irctx());
3010 block.code.insert(it, free);
3013 // Any block that ends with anything other than NoSpill needs to be walked
3014 // to look for places to free spill space.
3015 if (state.out != NoSpill) {
3016 processSpillExits(unit, label, state.in, free, ctx.sp);
3021 ///////////////////////////////////////////////////////////////////////////////
3022 // Printing.
3024 std::string Interval::toString() const {
3025 std::ostringstream out;
3026 auto delim = "";
3027 if (reg != InvalidReg) {
3028 out << show(reg);
3029 delim = " ";
3031 if (constant()) {
3032 out << delim << folly::format("#{:08x}", var->val.val);
3034 if (var->slot >= 0) {
3035 out << delim << folly::format("[%sp+{}]", slotOffset(var->slot));
3037 delim = "";
3038 out << " [";
3039 for (auto const& r : ranges) {
3040 out << delim << folly::format("{}-{}", r.start, r.end);
3041 delim = ",";
3043 out << ") {";
3044 delim = "";
3045 for (auto const& u : uses) {
3046 if (u.pos == var->def_pos) {
3047 if (u.hint.isValid()) {
3048 out << delim << "@" << u.pos << "=" << show(u.hint);
3049 } else {
3050 out << delim << "@" << u.pos << "=";
3052 } else {
3053 auto hint_delim = u.kind == Constraint::CopySrc ? "=?" : "=@";
3054 if (u.hint.isValid()) {
3055 out << delim << show(u.hint) << hint_delim << u.pos;
3056 } else {
3057 out << delim << hint_delim << u.pos;
3060 delim = ",";
3062 out << "}";
3063 return out.str();
3066 DEBUG_ONLY void dumpVariables(const jit::vector<Variable*>& variables,
3067 unsigned num_spills) {
3068 Trace::traceRelease("Spills %u\n", num_spills);
3069 for (auto var : variables) {
3070 if (!var || var->fixed()) continue;
3071 Trace::traceRelease("%%%-2lu %s\n", size_t(var->vreg),
3072 var->ivl()->toString().c_str());
3073 for (auto ivl = var->ivl()->next; ivl; ivl = ivl->next) {
3074 Trace::traceRelease(" %s\n", ivl->toString().c_str());
3079 auto const ignore_reserved = !getenv("XLS_SHOW_RESERVED");
3080 auto const collapse_fixed = !getenv("XLS_SHOW_FIXED");
3082 enum Mode { Light, Heavy };
3084 template<class Pred>
3085 const char* draw(Variable* var, unsigned pos, Mode m, Pred covers) {
3086 // Light Heavy
3087 static const char* top[] = { u8"\u2575", u8"\u2579" };
3088 static const char* bottom[] = { u8"\u2577", u8"\u257B" };
3089 static const char* both[] = { u8"\u2502", u8"\u2503" };
3090 static const char* empty[] = { " ", " " };
3091 auto f = [&](unsigned position) {
3092 if (!var) return false;
3093 for (auto ivl = var->ivl(); ivl; ivl = ivl->next) {
3094 if (covers(ivl, position)) return true;
3096 return false;
3099 auto s = f(pos);
3100 auto d = pos % 2 == 1 ? s : f(pos + 1);
3101 return ( s && !d) ? top[m] :
3102 ( s && d) ? both[m] :
3103 (!s && d) ? bottom[m] :
3104 empty[m];
3107 DEBUG_ONLY void printInstr(std::ostringstream& str,
3108 const Vunit& unit, const VxlsContext& ctx,
3109 const jit::vector<Variable*>& variables,
3110 const Vinstr& inst, Vlabel b) {
3111 bool fixed_covers[2] = { false, false };
3112 Variable* fixed = nullptr;
3113 for (auto var : variables) {
3114 if (!var) continue;
3115 if (var->fixed()) {
3116 if (ignore_reserved && !ctx.abi.unreserved().contains(var->vreg)) {
3117 continue;
3119 if (collapse_fixed) {
3120 fixed = var; // can be any.
3121 fixed_covers[0] |= var->ivl()->covers(inst.pos);
3122 fixed_covers[1] |= var->ivl()->covers(inst.pos + 1);
3123 continue;
3126 str << " ";
3127 str << draw(var, inst.pos, Light, [&](Interval* child, unsigned p) {
3128 return child->covers(p);
3130 str << draw(var, inst.pos, Heavy, [&](Interval* child, unsigned p) {
3131 return child->usedAt(p);
3134 str << " " << draw(fixed, inst.pos, Heavy, [&](Interval*, unsigned p) {
3135 assertx(p - inst.pos < 2);
3136 return fixed_covers[p - inst.pos];
3138 if (inst.pos == ctx.block_ranges[b].start) {
3139 str << folly::format(" B{: <3}", size_t(b));
3140 } else {
3141 str << " ";
3143 str << folly::format(" {: <3} ", inst.pos) << show(unit, inst) << "\n";
3146 DEBUG_ONLY void printVariables(const char* caption,
3147 const Vunit& unit, const VxlsContext& ctx,
3148 const jit::vector<Variable*>& variables) {
3149 std::ostringstream str;
3150 str << "Intervals " << caption << " " << ctx.counter << "\n";
3151 for (auto var : variables) {
3152 if (!var) continue;
3153 if (var->fixed()) {
3154 if (ignore_reserved && !ctx.abi.unreserved().contains(var->vreg)) {
3155 continue;
3157 if (collapse_fixed) {
3158 continue;
3161 str << folly::format(" {: <2}", size_t(var->vreg));
3163 str << " FX\n";
3164 for (auto b : ctx.blocks) {
3165 for (auto& inst : unit.blocks[b].code) {
3166 printInstr(str, unit, ctx, variables, inst, b);
3169 HPHP::Trace::traceRelease("%s\n", str.str().c_str());
3172 ///////////////////////////////////////////////////////////////////////////////
3174 struct XLSStats {
3175 size_t instrs{0}; // total instruction count after xls
3176 size_t moves{0}; // number of movs inserted
3177 size_t loads{0}; // number of loads inserted
3178 size_t spills{0}; // number of spill-stores inserted
3179 size_t consts{0}; // number of const-loads inserted
3180 size_t total_copies{0}; // moves+loads+spills
3183 void dumpStats(const Vunit& unit, const ResolutionPlan& resolution) {
3184 XLSStats stats;
3186 for (auto const& blk : unit.blocks) {
3187 stats.instrs += blk.code.size();
3189 for (auto const& kv : resolution.spills) {
3190 auto const& spills = kv.second;
3191 for (auto const r : spills) {
3192 if (spills[r]) ++stats.spills;
3196 auto const count_copies = [&] (const CopyPlan& plan) {
3197 for_each_copy(plan, [&] (PhysReg, const Interval*) {
3198 ++stats.moves;
3200 for_each_load(plan, [&] (PhysReg, const Interval* ivl) {
3201 ++(ivl->spilled() ? stats.loads : stats.consts);
3204 for (auto const& kv : resolution.copies) count_copies(kv.second);
3205 for (auto const& kv : resolution.edge_copies) count_copies(kv.second);
3207 stats.total_copies = stats.moves + stats.loads + stats.spills;
3209 FTRACE_MOD(
3210 Trace::xls_stats, 1,
3211 "XLS stats:\n"
3212 " instrs: {}\n"
3213 " moves: {}\n"
3214 " loads: {}\n"
3215 " spills: {}\n"
3216 " consts: {}\n"
3217 " total copies: {}\n",
3218 stats.instrs, stats.moves, stats.loads, stats.spills,
3219 stats.consts, stats.total_copies
3222 if (auto entry = unit.log_entry) {
3223 entry->setInt("xls_instrs", stats.instrs);
3224 entry->setInt("xls_moves", stats.moves);
3225 entry->setInt("xls_loads", stats.loads);
3226 entry->setInt("xls_spills", stats.spills);
3227 entry->setInt("xls_consts", stats.consts);
3231 ///////////////////////////////////////////////////////////////////////////////
3234 void allocateRegisters(Vunit& unit, const Abi& abi) {
3235 Timer timer(Timer::vasm_xls, unit.log_entry);
3236 auto const counter = s_counter.fetch_add(1, std::memory_order_relaxed);
3238 splitCriticalEdges(unit);
3239 assertx(check(unit));
3241 // Analysis passes.
3242 VxlsContext ctx{abi};
3243 ctx.blocks = sortBlocks(unit);
3244 ctx.block_ranges = computePositions(unit, ctx.blocks);
3245 ctx.spill_offsets = analyzeSP(unit, ctx.blocks, ctx.sp);
3246 ctx.livein = computeLiveness(unit, ctx.abi, ctx.blocks);
3247 ctx.counter = counter;
3249 // Build lifetime intervals and analyze hints.
3250 auto variables = buildIntervals(unit, ctx);
3251 auto const hint_info = analyzeHints(unit, ctx, variables);
3253 // Perform register allocation.
3254 auto spill_info = assignRegisters(ctx, variables, hint_info);
3256 ONTRACE(kRegAllocLevel, dumpVariables(variables, spill_info.num_spills));
3258 // Insert lifetime-resolving copies, spills, and rematerializations, and
3259 // replace the Vreg operands in the Vinstr stream with the assigned PhysRegs.
3260 auto const resolution = resolveLifetimes(unit, ctx, variables);
3261 renameOperands(unit, ctx, variables);
3262 insertCopies(unit, ctx, variables, resolution);
3264 ONTRACE(kRegAllocLevel,
3265 dumpVariables(variables, spill_info.num_spills);
3266 printVariables("after inserting copies", unit, ctx, variables);
3269 // Perform optimizations based on flags liveness, then do some cleanup.
3270 optimizeSFLiveness(unit, ctx, variables);
3271 peephole(unit, ctx);
3273 // Insert instructions for creating spill space.
3274 allocateSpillSpace(unit, ctx, spill_info);
3275 if (auto entry = unit.log_entry) {
3276 entry->setInt("num_spills", spill_info.num_spills);
3277 entry->setInt("used_spill_slots", spill_info.used_spill_slots);
3280 printUnit(kVasmRegAllocLevel, "after vasm-xls", unit);
3281 if (HPHP::Trace::moduleEnabled(Trace::xls_stats, 1) || unit.log_entry) {
3282 dumpStats(unit, resolution);
3285 // Free the variable metadata.
3286 for (auto var : variables) {
3287 if (!var) continue;
3288 for (Interval* ivl = var->ivl()->next, *next = nullptr; ivl; ivl = next) {
3289 next = ivl->next;
3290 jit::destroy(ivl);
3292 Variable::destroy(var);
3296 ///////////////////////////////////////////////////////////////////////////////