Revert "Remove legacy linear scan register allocator"
[hiphop-php.git] / hphp / runtime / vm / jit / xls.cpp
blob221d527e857dc0a3ffd13df302e4a223755ab10d
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/linear-scan.h"
18 #include "hphp/runtime/vm/jit/state-vector.h"
19 #include "hphp/runtime/vm/jit/id-set.h"
20 #include "hphp/runtime/vm/jit/block.h"
21 #include "hphp/runtime/vm/jit/cfg.h"
22 #include "hphp/runtime/vm/jit/print.h"
23 #include "hphp/runtime/vm/jit/abi-x64.h"
24 #include "hphp/runtime/vm/jit/abi-arm.h"
25 #include "hphp/runtime/vm/jit/arch.h"
26 #include "hphp/runtime/vm/jit/check.h"
28 #include <unordered_set>
29 #include <algorithm>
31 // TODO
32 // - #3098109 dests of branch instructions start in next block
33 // - #3098509 streamline code, vectors vs linked lists, etc
34 // - #3098661 generate spill stats so we can compare side/by/side
35 // - #3098678 EvalHHIREnablePreColoring
36 // - #3098685 EvalHHIREnableCoalescing, by using hints
37 // - #3409409 Enable use of SIMD at least for doubles, for packed_tv
38 // - #3098685 Optimize lifetime splitting
39 // - #3098712 reuse spill slots
40 // - #3098739 new features now possible with XLS
41 // - #3099647 support loops
43 namespace HPHP { namespace JIT {
44 namespace {
45 using namespace reg;
46 struct Interval;
47 struct RegPositions;
49 // machine-specific register conventions
50 struct Abi {
51 RegSet gp; // general purpose 64-bit registers
52 RegSet simd; // floating point / simd 128-bit registers
53 RegSet saved; // callee-saved (gp and simd)
55 // convenience methods
56 RegSet all() const { return gp | simd; }
59 typedef StateVector<SSATmp, Interval*> Intervals;
60 typedef IdSet<SSATmp> LiveSet;
61 typedef std::pair<PhysReg,PhysReg> RegPair;
63 // A Use refers to the position where an interval is read or written.
64 struct Use {
65 unsigned pos;
68 // A LiveRange is an open-ended range of positions where an interval is live.
69 struct LiveRange {
70 LiveRange(unsigned s, unsigned e) : start(s), end(e) { assert(s <= e); }
71 bool contains(unsigned pos) const { return pos >= start && pos < end; }
72 bool intersects(LiveRange r) const { return r.start < end && start < r.end; }
73 bool contains(LiveRange r) const;
74 public:
75 unsigned start, end;
78 // return [first, last+1)
79 LiveRange closedRange(unsigned first, unsigned last);
81 // An Interval stores the lifetime of an SSATmp as a sorted list of disjoint
82 // ranges, and a sorted list of use positions. If this interval was split,
83 // then the first interval is deemed "parent" and the rest are "children",
84 // and they're all connected as a singly linked list sorted by start.
85 struct Interval {
86 Interval() {}
87 explicit Interval(Interval* parent);
88 // accessors
89 bool empty() const { return ranges.empty(); }
90 unsigned start() const { return ranges.front().start; }
91 unsigned end() const { return ranges.back().end; }
92 Interval* leader() { return parent ? parent : this; }
93 const Interval* leader() const { return parent ? parent : this; }
94 bool isChild() const { return parent != nullptr; }
95 // queries
96 bool covers(unsigned pos) const;
97 bool usedAt(unsigned pos) const;
98 Interval* childAt(unsigned pos);
99 unsigned nextIntersect(Interval*) const;
100 unsigned nextUseAfter(unsigned pos) const;
101 unsigned firstRegUse() const;
102 unsigned firstUse() const { return uses.front().pos; }
103 // mutators
104 void add(LiveRange r);
105 void setStart(unsigned start);
106 void addUse(unsigned pos) { uses.push_back(Use{pos}); }
107 Interval* split(unsigned pos);
108 // register/spill assignment
109 RegPair regs() const;
110 bool handled() const { return loc.hasReg(0) || loc.spilled(); }
111 // debugging
112 std::string toString();
113 bool checkInvariants() const;
114 uint32_t id() const { return leader()->tmp->id(); }
115 public:
116 bool blocked { false }; // cannot be spilled
117 bool scratch { false }; // used as scratch or arg, cannot be spilled
118 uint8_t need { 0 }; // number of required registers (exactly 1 or 2)
119 RegSet allow;
120 RegSet prefer;
121 SSATmp* tmp { nullptr };
122 Interval* parent { nullptr }; // if this is a split-off child
123 Interval* next { nullptr }; // next split child or nullptr
124 smart::vector<LiveRange> ranges;
125 smart::vector<Use> uses;
126 PhysLoc loc; // current location assigned to this interval
127 PhysLoc spill; // spill location (parent only)
130 // Extended Linear Scan. This just encapsulates the intermediate
131 // data structures we use during the algorithm so we don't have
132 // to pass them around everywhere.
133 struct XLS {
134 XLS(IRUnit& unit, RegAllocInfo& regs, const Abi&);
135 ~XLS();
136 void allocate();
137 // phases
138 void prepareBlocks();
139 void computePositions();
140 void buildIntervals();
141 void walkIntervals();
142 void assignLocations();
143 void resolveSplits();
144 void resolveEdges();
145 // utilities
146 void enqueue(Interval* interval);
147 bool allocPrefer(Interval* current, const RegPositions& until);
148 void allocOne(Interval* current);
149 void allocBlocked(Interval* current);
150 Interval* goodSplit(Interval*, unsigned pos);
151 void spill(Interval*);
152 void assign(Interval*, RegPair r);
153 void update(unsigned pos);
154 void spillActive(Interval* current);
155 void spillInactive(Interval* current);
156 bool force(IRInstruction&, SSATmp& t);
157 void insertCopy(Block* b, Block::iterator, IRInstruction*& shuffle,
158 SSATmp* src, const PhysLoc& rs, const PhysLoc& rd);
159 void insertSpill(Interval* ivl);
160 // debugging
161 void print(const char* caption);
162 void dumpIntervals();
163 bool checkEdgeShuffles();
164 private:
165 struct Compare { bool operator()(const Interval*, const Interval*); };
166 private:
167 Intervals m_intervals; // parent intervals indexed by ssatmp
168 int m_nextSpill { 0 };
169 int m_numSplits { 0 };
170 IRUnit& m_unit;
171 RegAllocInfo& m_regs;
172 const Abi& m_abi;
173 StateVector<IRInstruction, unsigned> m_posns;
174 smart::vector<IRInstruction*> m_insts;
175 BlockList m_blocks;
176 PhysReg::Map<Interval> m_scratch;
177 PhysReg::Map<Interval> m_blocked;
178 StateVector<Block,LiveSet> m_liveIn;
179 smart::priority_queue<Interval*,Compare> m_pending;
180 smart::vector<Interval*> m_active;
181 smart::vector<Interval*> m_inactive;
182 StateVector<IRInstruction,IRInstruction*> m_between;
183 StateVector<Block,IRInstruction*> m_before;
184 StateVector<Block,IRInstruction*> m_after;
187 const uint32_t kMaximalPos = UINT32_MAX; // "infinity" use position
189 // Keep track of a future use or conflict position for each register.
190 // Initially all usable registers have kMaximalPos
191 struct RegPositions {
192 explicit RegPositions();
193 RegPair max2Reg(RegSet regs) const;
194 unsigned operator[](PhysReg r) const { return posns[r]; }
195 void setPos(Interval*, unsigned pos);
196 private:
197 PhysReg::Map<unsigned> posns;
200 bool checkBlockOrder(IRUnit& unit, BlockList& blocks);
202 //////////////////////////////////////////////////////////////////////////////
204 // returns true if this range contains r
205 bool LiveRange::contains(LiveRange r) const {
206 return r.start >= start && r.end <= end;
209 // Return a range for the closed interval [first,last], i.e. [first, last+1)
210 LiveRange closedRange(unsigned first, unsigned last) {
211 return LiveRange(first, last + 1);
214 //////////////////////////////////////////////////////////////////////////////
216 Interval::Interval(Interval* parent)
217 : need(parent->need)
218 , allow(parent->allow)
219 , prefer(parent->prefer)
220 , tmp(parent->tmp)
221 , parent(parent)
224 // Add r to this interval, merging r with any existing overlapping ranges
225 void Interval::add(LiveRange r) {
226 while (!ranges.empty() && r.contains(ranges.back())) {
227 ranges.pop_back();
229 if (ranges.empty()) {
230 return ranges.push_back(r);
232 auto& first = ranges.back();
233 if (first.contains(r)) return;
234 if (r.end >= first.start) {
235 first.start = r.start;
236 } else {
237 ranges.push_back(r);
241 // Update the start-position of the earliest range in this interval,
242 // which is ranges.back() during buildIntervals().
243 void Interval::setStart(unsigned start) {
244 assert(!ranges.empty() && ranges.back().contains(start));
245 ranges.back().start = start;
248 // Return true if one of the ranges in this interval includes pos
249 bool Interval::covers(unsigned pos) const {
250 if (pos < start() || pos >= end()) return false;
251 for (auto r : ranges) {
252 if (pos < r.start) return false;
253 if (pos < r.end) return true;
255 return false;
258 // Return true if there is a use position at pos
259 bool Interval::usedAt(unsigned pos) const {
260 if (pos < start() || pos >= end()) return false;
261 for (auto& u : uses) if (u.pos == pos) return true;
262 return false;
265 // Return the interval which includes pos (this one or a child)
266 Interval* Interval::childAt(unsigned pos) {
267 assert(!isChild());
268 for (auto ivl = this; ivl; ivl = ivl->next) {
269 if (pos < ivl->start()) return nullptr;
270 if (ivl->covers(pos)) return ivl;
272 return nullptr;
275 // return the next intersection point between this and ivl, or MAX_UINT32
276 // if they never intersect.
277 unsigned Interval::nextIntersect(Interval* ivl) const {
278 assert(!ranges.empty() && !ivl->ranges.empty());
279 auto r1 = ranges.begin(), e1 = ranges.end();
280 auto r2 = ivl->ranges.begin(), e2 = ivl->ranges.end();
281 for (;;) {
282 if (r1->start < r2->start) {
283 if (r2->start < r1->end) return r2->start;
284 if (++r1 == e1) return kMaximalPos;
285 } else {
286 if (r1->start < r2->end) return r1->start;
287 if (++r2 == e2) return kMaximalPos;
290 return kMaximalPos;
293 // Split this interval at pos and return the rest. Pos must be
294 // a location that ensures both shorter intervals are nonempty.
295 // Pos must also be even, indicating a position between instructions.
296 Interval* Interval::split(unsigned pos) {
297 assert(pos > start() && pos < end());
298 auto leader = this->leader();
299 Interval* child = smart_new<Interval>(leader);
300 child->next = next;
301 next = child;
302 // advance r1 to the first range we want in child; maybe split a range.
303 auto r1 = ranges.begin(), r2 = ranges.end();
304 while (r1->end <= pos) r1++;
305 if (pos > r1->start) { // split r at pos
306 child->ranges.push_back(LiveRange(pos, r1->end));
307 r1->end = pos;
308 r1++;
310 child->ranges.insert(child->ranges.end(), r1, r2);
311 ranges.erase(r1, r2);
312 // advance u1 to the first use position in child, then copy u1..end to child.
313 auto u1 = uses.begin(), u2 = uses.end();
314 while (u1 != u2 && u1->pos < pos) u1++;
315 child->uses.insert(child->uses.end(), u1, u2);
316 uses.erase(u1, u2);
317 assert(checkInvariants() && child->checkInvariants());
318 return child;
321 // Return the position of the next use of this interval after (or equal to)
322 // pos. If there are no more uses after pos, return MAX.
323 unsigned Interval::nextUseAfter(unsigned pos) const {
324 for (auto& u : uses) {
325 if (pos <= u.pos) return u.pos;
327 return kMaximalPos;
330 // return the position of the first use that requires a register,
331 // or kMaximalPos if no remaining uses need registers.
332 unsigned Interval::firstRegUse() const {
333 return uses.empty() ? kMaximalPos : uses.front().pos;
336 // Return the register(s) assigned to this interval as a pair.
337 RegPair Interval::regs() const {
338 return RegPair(loc.reg(0), loc.reg(1));
341 //////////////////////////////////////////////////////////////////////////////
343 XLS::XLS(IRUnit& unit, RegAllocInfo& regs, const Abi& abi)
344 : m_intervals(unit, nullptr)
345 , m_unit(unit)
346 , m_regs(regs)
347 , m_abi(abi)
348 , m_posns(unit, 0)
349 , m_liveIn(unit, LiveSet())
350 , m_between(unit, nullptr)
351 , m_before(unit, nullptr)
352 , m_after(unit, nullptr) {
353 auto all = abi.all();
354 for (auto r : m_blocked) {
355 m_blocked[r].blocked = true;
356 m_blocked[r].need = 1;
357 m_blocked[r].loc.setReg(r, 0);
358 if (!all.contains(r)) {
359 // r is never available
360 m_blocked[r].add(LiveRange(0, kMaximalPos));
362 m_scratch[r].scratch = true;
363 m_scratch[r].need = 1;
364 m_scratch[r].loc.setReg(r, 0);
368 XLS::~XLS() {
369 for (auto ivl : m_intervals) {
370 for (Interval* next; ivl; ivl = next) {
371 next = ivl->next;
372 smart_delete(ivl);
377 // Split critical edges, remove dead predecessor edges, and put blocks
378 // in a sutiable order.
379 void XLS::prepareBlocks() {
380 splitCriticalEdges(m_unit);
381 m_blocks = rpoSortCfg(m_unit);
384 // compute the position number for each instruction. Each instruction
385 // gets two positions: the position where srcs are read, and the position
386 // where dests are written.
387 void XLS::computePositions() {
388 m_insts.reserve(m_unit.numInsts());
389 unsigned pos = 0;
390 for (auto b : m_blocks) {
391 for (auto& inst : *b) {
392 m_insts[pos/2] = &inst;
393 m_posns[inst] = pos;
394 pos += 2;
399 // Return true if t was forced into a register.
400 bool XLS::force(IRInstruction& inst, SSATmp& t) {
401 auto reg = forceAlloc(t);
402 if (reg != InvalidReg) {
403 m_regs[inst][t].setReg(reg, 0);
404 return true;
406 return false;
409 bool allocUnusedDest(IRInstruction&) {
410 return false;
413 // Reduce the allow and prefer sets according to this particular use
414 void srcConstraints(IRInstruction& inst, int i, SSATmp* src, Interval* ivl,
415 const Abi& abi) {
416 if (!packed_tv && RuntimeOption::EvalHHIRAllocSIMDRegs) {
417 if (src->type() <= Type::Dbl) {
418 ivl->prefer &= abi.simd;
419 return;
421 if (inst.storesCell(i) && src->numWords() == 2) {
422 ivl->prefer &= abi.simd;
423 return;
426 ivl->allow &= abi.gp;
427 ivl->prefer &= abi.gp;
430 // Reduce the allow and prefer constraints based on this definition
431 void dstConstraints(IRInstruction& inst, int i, SSATmp* dst, Interval* ivl,
432 const Abi& abi) {
433 if (!packed_tv && RuntimeOption::EvalHHIRAllocSIMDRegs) {
434 if (dst->type() <= Type::Dbl) {
435 ivl->prefer &= abi.simd;
436 return;
438 if (inst.isLoad() && !inst.isControlFlow() && dst->numWords() == 2) {
439 ivl->prefer &= abi.simd;
440 if (!(ivl->allow & ivl->prefer).empty()) {
441 // we prefer simd, but allow gp and simd, so restrict allow to just
442 // simd to avoid (simd)<->(gpr,gpr) shuffles.
443 ivl->allow = ivl->prefer;
445 return;
448 ivl->allow &= abi.gp;
449 ivl->prefer &= abi.gp;
452 // build intervals in one pass by walking the block list backwards.
453 // no special handling is needed for goto/label (phi) instructions because
454 // they use/define tmps in the expected locations.
455 void XLS::buildIntervals() {
456 assert(checkBlockOrder(m_unit, m_blocks));
457 unsigned stress = 0;
458 for (auto blockIt = m_blocks.end(); blockIt != m_blocks.begin();) {
459 auto block = *--blockIt;
460 // compute initial live set from liveIn[succsessors]
461 LiveSet live;
462 if (auto taken = block->taken()) live |= m_liveIn[taken];
463 if (auto next = block->next()) live |= m_liveIn[next];
464 // initialize live range for each live tmp to whole block
465 auto blockRange = closedRange(m_posns[block->front()],
466 m_posns[block->back()] + 1);
467 live.forEach([&](uint32_t id) {
468 m_intervals[id]->add(blockRange);
470 for (auto instIt = block->end(); instIt != block->begin();) {
471 auto& inst = *--instIt;
472 auto spos = m_posns[inst]; // position where srcs are read
473 auto dpos = spos + 1; // position where dsts are written
474 unsigned dst_need = 0;
475 for (unsigned i = 0, n = inst.numDsts(); i < n; ++i) {
476 auto d = inst.dst(i);
477 auto dest = m_intervals[d];
478 if (!live[d]) {
479 // dest is not live; give it a register anyway.
480 assert(!dest);
481 if (d->numWords() == 0) continue;
482 if (force(inst, *d)) continue;
483 if (!allocUnusedDest(inst)) continue;
484 m_intervals[d] = dest = smart_new<Interval>();
485 dest->add(closedRange(dpos, dpos));
486 dest->tmp = d;
487 dest->need = d->numWords();
488 dest->allow = dest->prefer = m_abi.all();
489 } else {
490 // adjust start pos for live intervals defined by this instruction
491 assert(dest->tmp == d);
492 dest->setStart(dpos);
493 live.erase(d);
495 dst_need += dest->need;
496 dest->addUse(dpos);
497 dstConstraints(inst, i, d, dest, m_abi);
499 stress = std::max(dst_need, stress);
500 if (inst.isNative()) {
501 if (RuntimeOption::EvalHHIREnableCalleeSavedOpt) {
502 auto scratch = m_abi.gp - m_abi.saved;
503 scratch.forEach([&](PhysReg r) {
504 m_scratch[r].add(LiveRange(spos, dpos));
508 // add live ranges for tmps used by this inst
509 unsigned src_need = 0;
510 for (unsigned i = 0, n = inst.numSrcs(); i < n; ++i) {
511 auto s = inst.src(i);
512 if (s->inst()->op() == DefConst) continue;
513 auto need = s->numWords();
514 if (need == 0) continue;
515 if (force(inst, *s)) continue;
516 auto src = m_intervals[s];
517 if (!src) m_intervals[s] = src = smart_new<Interval>();
518 src->add(closedRange(blockRange.start, spos));
519 src->addUse(spos);
520 if (!src->tmp) {
521 src->tmp = s;
522 src->need = need;
523 src->allow = src->prefer = m_abi.all();
524 } else {
525 assert(src->tmp == s);
527 srcConstraints(inst, i, s, src, m_abi);
528 src_need += src->need;
529 live.add(s);
531 stress = std::max(src_need, stress);
533 m_liveIn[block] = live;
535 // Implement stress mode by blocking more registers.
536 stress += RuntimeOption::EvalHHIRNumFreeRegs;
537 assert(stress >= RuntimeOption::EvalHHIRNumFreeRegs); // no wraparound.
538 for (auto r : m_blocked) {
539 auto& blocked = m_blocked[r];
540 if (blocked.empty() || blocked.start() == 0) continue;
541 // r is not already blocked
542 if (stress > 0) {
543 stress--;
544 std::reverse(blocked.ranges.begin(), blocked.ranges.end());
545 } else {
546 blocked.add(LiveRange(0, kMaximalPos));
547 assert(blocked.ranges.size() == 1);
550 for (auto r : m_scratch) {
551 auto& scratch = m_scratch[r];
552 if (scratch.empty()) continue;
553 std::reverse(scratch.ranges.begin(), scratch.ranges.end());
555 // We built the use list of each interval by appending. Now reverse those
556 // lists so they are in forwards order.
557 for (auto ivl : m_intervals) {
558 if (!ivl) continue;
559 std::reverse(ivl->uses.begin(), ivl->uses.end());
560 std::reverse(ivl->ranges.begin(), ivl->ranges.end());
562 if (dumpIREnabled(kRegAllocLevel)) {
563 print(" after building intervals ");
567 // comparison function for pending priority queue. std::priority_queue
568 // requies a less operation, but sorts the heap highest-first; we
569 // need the opposite (lowest-first), so use greater-than.
570 bool XLS::Compare::operator()(const Interval* i1, const Interval* i2) {
571 return i1->start() > i2->start();
574 // insert interval into pending list in order of start position
575 void XLS::enqueue(Interval* ivl) {
576 assert(ivl->checkInvariants() && !ivl->handled());
577 m_pending.push(ivl);
580 // Assign the next available spill slot to interval
581 void XLS::spill(Interval* ivl) {
582 assert(!ivl->blocked && !ivl->scratch && ivl->isChild());
583 assert(ivl->need == 1 || ivl->need == 2);
584 auto leader = ivl->leader();
585 if (!leader->spill.spilled()) {
586 if (ivl->need == 1) {
587 leader->spill.setSlot(0, m_nextSpill++);
588 } else {
589 if (!PhysLoc::isAligned(m_nextSpill)) m_nextSpill++;
590 leader->spill.setSlot(0, m_nextSpill++);
591 leader->spill.setSlot(1, m_nextSpill++);
593 if (m_nextSpill > NumPreAllocatedSpillLocs) {
594 PUNT(LinearScan_TooManySpills);
597 ivl->loc = leader->spill;
600 // Assign one or both of the registers in r to this interval.
601 void XLS::assign(Interval* ivl, RegPair r) {
602 assert(!ivl->blocked && !ivl->scratch);
603 auto r0 = PhysReg(r.first);
604 auto r1 = PhysReg(r.second);
605 if (ivl->need == 1) {
606 ivl->loc.setReg(r0, 0);
607 } else {
608 if (r0.isSIMD()) {
609 ivl->loc.setRegFullSIMD(r0);
610 } else if (r1.isSIMD()) {
611 ivl->loc.setRegFullSIMD(r1);
612 } else {
613 assert(r0 != r1);
614 ivl->loc.setReg(r0, 0);
615 ivl->loc.setReg(r1, 1);
620 // initialize the positions array with maximal use positions.
621 RegPositions::RegPositions() {
622 for (auto r : posns) posns[r] = kMaximalPos;
625 // Find the two registers used furthest in the future, but only
626 // consider registers in the given set
627 RegPair RegPositions::max2Reg(const RegSet regs) const {
628 unsigned max1 = 0, max2 = 0;
629 PhysReg r1 = *posns.begin(), r2 = *posns.begin();
630 regs.forEach([&](PhysReg r) {
631 if (posns[r] > max2) {
632 if (posns[r] > max1) {
633 r2 = r1; max2 = max1;
634 r1 = r; max1 = posns[r];
635 } else {
636 r2 = r; max2 = posns[r];
640 assert(posns[r1] >= posns[r2]);
641 return { r1, r2 };
644 // Update the position associated with the registers assigned to ivl,
645 // to the minimum of pos and the existing position.
646 void RegPositions::setPos(Interval* ivl, unsigned pos) {
647 assert(ivl->loc.numAllocated() >= 1);
648 auto r0 = ivl->loc.reg(0);
649 posns[r0] = std::min(pos, posns[r0]);
650 if (ivl->loc.numAllocated() == 2) {
651 auto r1 = ivl->loc.reg(1);
652 posns[r1] = std::min(pos, posns[r1]);
656 bool XLS::allocPrefer(Interval* current, const RegPositions& until) {
657 auto r = until.max2Reg(current->prefer);
658 auto untilPos = current->need == 1 ? until[r.first] : // 1 needed, 1 found
659 r.second != r.first ? until[r.second] : // 2 needed, 2 found
660 0; // 2 needed, 1 found
661 if (untilPos > current->end()) {
662 assign(current, r);
663 m_active.push_back(current);
664 return true;
666 return false;
669 // Allocate one register for the current interval.
670 // First try to find a register to assign to current, or its first part.
671 // If none can be found, tail-call to allocBlocked which will spill
672 // something, maybe current or another interval.
674 // SSA form guarantees that two SSA intervals that intersect,
675 // must intersect at one or the other's start position. Current
676 // does not intersect with inactive intervals at current->start(),
677 // and inactive intervals must have started earlier. Thus they
678 // cannot intersect. But we can only skip the intersection test
679 // for the original interval -- split intervals no longer have
680 // the SSA property.
681 void XLS::allocOne(Interval* current) {
682 assert(!current->handled());
683 RegPositions until;
684 for (auto ivl : m_active) {
685 until.setPos(ivl, 0);
687 for (auto ivl : m_inactive) {
688 until.setPos(ivl, current->nextIntersect(ivl));
690 if (allocPrefer(current, until)) {
691 return;
693 // find the register(s) that are free for the longest time.
694 auto r = until.max2Reg(current->allow);
695 auto untilPos = current->need == 1 ? until[r.first] : // 1 needed, 1 found
696 r.second != r.first ? until[r.second] : // 2 needed, 2 found
697 0; // 2 needed, 1 found
698 if (untilPos <= current->start()) {
699 return allocBlocked(current);
701 if (untilPos < current->end()) {
702 auto second = goodSplit(current, untilPos); // got register for first part
703 enqueue(second);
705 assign(current, r);
706 m_active.push_back(current);
709 // When all registers are in use, find a good interval to split and spill,
710 // which could be the current interval. When an interval is split and the
711 // second part is spilled, possibly split the second part again before the
712 // next use-pos that requires a register, and enqueue the third part.
713 void XLS::allocBlocked(Interval* current) {
714 RegPositions used;
715 RegPositions blocked;
716 auto start = current->start();
717 // compute next use of active registers, so we can pick the furthest one
718 for (auto ivl : m_active) {
719 if (ivl->blocked) {
720 blocked.setPos(ivl, 0);
721 used.setPos(ivl, 0);
722 } else {
723 used.setPos(ivl, ivl->nextUseAfter(start));
726 // compute next intersection/use of inactive regs to find whats free longest
727 for (auto ivl : m_inactive) {
728 auto intersectPos = current->nextIntersect(ivl);
729 if (intersectPos == kMaximalPos) continue;
730 if (ivl->blocked) {
731 blocked.setPos(ivl, intersectPos);
732 used.setPos(ivl, blocked[ivl->regs().first]);
733 } else {
734 used.setPos(ivl, ivl->nextUseAfter(start));
737 auto r = used.max2Reg(current->allow);
738 auto usedPos = current->need == 1 ? used[r.first] : used[r.second];
739 if (usedPos < current->firstUse()) {
740 // all active+inactive intervals used before current: spill current
741 auto second = goodSplit(current, current->firstRegUse());
742 assert(second != current);
743 spill(current);
744 enqueue(second);
745 return;
747 auto blockPos = current->need == 1 ? blocked[r.first] : blocked[r.second];
748 assert(blockPos >= usedPos);
749 if (blockPos < current->end()) {
750 // spilling made a register free for first part of current
751 auto second = goodSplit(current, blockPos);
752 assert(second != current);
753 enqueue(second);
755 assign(current, r);
756 spillActive(current);
757 spillInactive(current);
758 m_active.push_back(current);
761 // return true if r1 and r2 have any registers in common
762 bool conflict(RegPair r1, RegPair r2) {
763 assert(r1.first != InvalidReg && r2.first != InvalidReg);
764 assert(r1.first != r1.second && r2.first != r2.second);
765 return r1.first == r2.first ||
766 r1.first == r2.second ||
767 (r1.second != InvalidReg &&
768 (r1.second == r2.first || r1.second == r2.second));
771 // Split and spill other active intervals that conflict with current for
772 // register r, at current->start(). If necessary, split the victims
773 // again before their first use position that requires a register.
774 void XLS::spillActive(Interval* current) {
775 auto start = current->start();
776 auto r = current->regs();
777 for (auto i = m_active.begin(); i != m_active.end();) {
778 auto i2 = i++;
779 Interval* first = *i2;
780 if (first->scratch) continue;
781 auto r2 = first->regs();
782 if (!conflict(r, r2)) continue;
783 // split and spill first at current.start
784 i = m_active.erase(i2);
785 auto second = goodSplit(first, start);
786 auto reloadPos = second->firstRegUse();
787 if (reloadPos <= start) {
788 // not enough registers to hold srcs that must be in a register.
789 PUNT(RegSpill);
791 if (reloadPos < kMaximalPos) {
792 if (reloadPos > second->start()) {
793 auto third = goodSplit(second, reloadPos);
794 spill(second);
795 enqueue(third);
796 } else {
797 enqueue(second);
799 } else {
800 spill(second);
805 // Split and spill other inactive intervals that conflict with current
806 // for register r, at current->start(). If necessary, split the victims
807 // again before their first use position that requires a register.
808 void XLS::spillInactive(Interval* current) {
809 auto start = current->start();
810 auto r = current->regs();
811 for (auto i = m_inactive.begin(); i != m_inactive.end();) {
812 auto i2 = i++;
813 Interval* first = *i2;
814 if (first->scratch) continue;
815 auto r2 = first->regs();
816 if (!conflict(r, r2)) continue;
817 auto intersect = current->nextIntersect(first);
818 if (intersect == kMaximalPos) continue;
819 // split and spill the rest of it starting @current
820 i = m_inactive.erase(i2);
821 auto second = goodSplit(first, start);
822 auto reloadPos = second->firstRegUse();
823 if (reloadPos <= start) {
824 // not enough registers to hold srcs that must be in a register.
825 PUNT(RegSpill);
827 if (reloadPos < kMaximalPos) {
828 if (reloadPos > second->start()) {
829 auto third = goodSplit(second, reloadPos);
830 spill(second);
831 enqueue(third);
832 } else {
833 enqueue(second);
835 } else {
836 spill(second);
841 // split interval at or before pos, then return the new child.
842 // If the position is at ivl.start, return the whole ivl without
843 // splitting it.
844 Interval* XLS::goodSplit(Interval* ivl, unsigned pos) {
845 m_numSplits++;
846 if (pos <= ivl->start()) return ivl;
847 if (pos % 2 == 1 && m_insts[pos/2]->op() == DefLabel) {
848 // correctness: move split point to start of block instead of
849 // in the middle of the DefLabel instruction.
850 pos--;
852 return ivl->split(pos);
855 // Update active/inactive sets based on pos
856 void XLS::update(unsigned pos) {
857 // check for intervals in active that are expired or inactive
858 for (auto i = m_active.begin(); i != m_active.end();) {
859 auto ivl = *i;
860 if (ivl->end() <= pos) {
861 i = m_active.erase(i); // active is now handled.
862 } else if (!ivl->covers(pos)) {
863 i = m_active.erase(i);
864 m_inactive.push_back(ivl);
865 } else {
866 i++;
869 // check for intervals that are expired or active
870 for (auto i = m_inactive.begin(); i != m_inactive.end();) {
871 auto ivl = *i;
872 if (ivl->end() <= pos) {
873 i = m_inactive.erase(i); // inactive is now handled.
874 } else if (ivl->covers(pos)) {
875 i = m_inactive.erase(i);
876 m_active.push_back(ivl);
877 } else {
878 i++;
883 // assign registers to intervals, split & spill where needed.
884 void XLS::walkIntervals() {
885 // fill the pending queue with nonempty intervals in order of start position
886 for (auto ivl : m_intervals) {
887 if (ivl) m_pending.push(ivl);
889 for (auto r : m_scratch) {
890 if (!m_scratch[r].empty()) m_inactive.push_back(&m_scratch[r]);
892 for (auto r : m_blocked) {
893 if (!m_blocked[r].empty()) m_inactive.push_back(&m_blocked[r]);
895 while (!m_pending.empty()) {
896 Interval* current = m_pending.top();
897 m_pending.pop();
898 update(current->start());
899 allocOne(current);
900 assert(current->handled() && current->loc.numWords() == current->need);
902 if (dumpIREnabled(kRegAllocLevel)) {
903 if (m_numSplits) dumpIntervals();
904 print(" after walking intervals ");
908 // Assign PhysLocs (registers or spill slots) to every src and dst ssatmp
909 void XLS::assignLocations() {
910 for (auto b : m_blocks) {
911 for (auto& inst : *b) {
912 auto spos = m_posns[inst];
913 for (auto s : inst.srcs()) {
914 if (!m_intervals[s]) continue;
915 m_regs[inst][s] = m_intervals[s]->childAt(spos)->loc;
917 for (auto& d : inst.dsts()) {
918 if (!m_intervals[d]) continue;
919 m_regs[inst][d] = m_intervals[d]->loc;
925 // Add a copy of tmp from rs to rd to the Shuffle instruction shuffle,
926 // if it exists. Otherwise, create a new one and insert it at pos in block b.
927 void XLS::insertCopy(Block* b, Block::iterator pos, IRInstruction* &shuffle,
928 SSATmp* src, const PhysLoc& rs, const PhysLoc& rd) {
929 if (rs == rd) return;
930 if (shuffle) {
931 // already have shuffle here
932 shuffle->addCopy(m_unit, src, rd);
933 } else {
934 auto cap = 1;
935 auto dests = new (m_unit.arena()) PhysLoc[cap];
936 dests[0] = rd;
937 auto& marker = pos != b->end() ? pos->marker() : b->back().marker();
938 shuffle = m_unit.gen(Shuffle, marker, ShuffleData(dests, 1, cap), src);
939 b->insert(pos, shuffle);
941 m_regs[shuffle][src] = rs;
944 // Return the first instruction in block b that is not a Shuffle
945 IRInstruction* skipShuffle(Block* b) {
946 auto i = b->begin();
947 while (i->op() == Shuffle) ++i;
948 return i != b->end() ? &*i : nullptr;
951 // Insert a spill-store Shuffle after the instruction that defines ivl->tmp.
952 // If the instruction is a branch, do the store at the beginning of the
953 // next block.
954 void XLS::insertSpill(Interval* ivl) {
955 auto inst = ivl->tmp->inst();
956 if (inst->isBlockEnd()) {
957 auto succ = inst->next();
958 auto pos = succ->skipHeader();
959 insertCopy(succ, pos, m_before[succ], ivl->tmp, ivl->loc, ivl->spill);
960 } else {
961 auto block = inst->block();
962 auto pos = block->iteratorTo(inst);
963 auto& shuffle = (++pos) != block->end() ? m_between[*pos] :
964 m_after[block];
965 insertCopy(block, pos, shuffle, ivl->tmp, ivl->loc, ivl->spill);
970 * Insert Shuffle instructions. Each Shuffle at a given position implements
971 * a parallel copy: all sources are read before any destination is written.
972 * 1. If any part of interval was spilled, insert a copy (store) to the spill
973 * slot after the defining instruction.
974 * 2. For intervals split in the middle of a block, connect the two
975 * sub-intervals by inserting a copy at the split point. "middle" means any
976 * program point after the first instruction and before the last instruction
977 * in the block. Intervals that were split on block boundaries are handled
978 * in step 3.
979 * 3. (resolveSplits) When a sub-interval is live at the start of a block
980 * (i.e. live-in), and a different sub-interval was live at the end of the
981 * predecessor, insert a copy on the edge connecting the two blocks. If that
982 * edge is a critical edge, split it first.
983 * 4. Resolve "phi"s by inserting copies on Jmp->DefLabel edges.
984 * These are similar to case 3, except they are two different intervals
985 * logically connected by the phi (Jmp->DefLabel) copy. This is done at the
986 * same time as resolving edges.
989 // Insert spills and copies that connect sub-intervals that were split between
990 // instructions
991 void XLS::resolveSplits() {
992 if (dumpIREnabled(kRegAllocLevel)) dumpIntervals();
993 for (auto i1 : m_intervals) {
994 if (!i1) continue;
995 if (i1->spill.spilled()) insertSpill(i1);
996 for (auto i2 = i1->next; i2; i1 = i2, i2 = i2->next) {
997 auto pos1 = i1->end();
998 auto pos2 = i2->start();
999 if (!i2->loc.spilled() && pos1 == pos2 && i1->loc != i2->loc) {
1000 auto inst = m_insts[pos2 / 2];
1001 auto block = inst->block();
1002 if (inst != skipShuffle(block)) {
1003 assert(pos2 % 2 == 0);
1004 insertCopy(block, block->iteratorTo(inst), m_between[inst], i1->tmp,
1005 i1->loc, i2->loc);
1012 // Insert copies on control-flow edges, and turn Jmps into Shuffles
1013 void XLS::resolveEdges() {
1014 const PhysLoc invalid_loc;
1015 for (auto succ : m_blocks) {
1016 auto& inst2 = *skipShuffle(succ);
1017 auto pos2 = m_posns[inst2]; // pos of first inst in succ.
1018 succ->forEachPred([&](Block* pred) {
1019 auto it1 = pred->end();
1020 while ((--it1)->op() == Shuffle) {}
1021 auto& inst1 = *it1;
1022 auto pos1 = m_posns[inst1]; // pos of last inst in pred
1023 if (inst1.op() == Jmp) {
1024 // resolve Jmp->DefLabel copies
1025 for (unsigned i = 0, n = inst1.numSrcs(); i < n; ++i) {
1026 auto i1 = m_intervals[inst1.src(i)];
1027 auto i2 = m_intervals[inst2.dst(i)];
1028 if (i1) i1 = i1->childAt(pos1);
1029 insertCopy(pred, it1, m_after[pred], inst1.src(i),
1030 i1 ? i1->loc : invalid_loc,
1031 i2 ? i2->loc : invalid_loc);
1034 m_liveIn[succ].forEach([&](uint32_t id) {
1035 auto ivl = m_intervals[id];
1036 auto i1 = ivl->childAt(pos1 + 1);
1037 auto i2 = ivl->childAt(pos2);
1038 assert(i1 && i2);
1039 if (i2->loc.spilled()) return; // we did spill store after def.
1040 if (pred->taken() && pred->next()) {
1041 // insert copy at start of succesor
1042 insertCopy(succ, succ->skipHeader(), m_before[succ],
1043 i1->tmp, i1->loc, i2->loc);
1044 } else {
1045 // insert copy at end of predecessor
1046 auto pos = inst1.op() == Jmp ? it1 : pred->end();
1047 insertCopy(pred, pos, m_after[pred], i1->tmp, i1->loc, i2->loc);
1052 assert(checkEdgeShuffles());
1053 if (dumpIREnabled(kRegAllocLevel)) {
1054 print(" after resolving intervals ");
1059 * Extended Linear Scan is based on Wimmer & Franz "Linear Scan Register
1060 * Allocation on SSA Form".
1062 * 1. Sort blocks such that all predecessors of B come before B, except
1063 * loop-edge predecessors. Because the input IR is in SSA form, this also
1064 * implies the definition of each SSATmp comes before all uses.
1066 * 2. Assign an even numbered position to every instruction. Uses where
1067 * inputs are read occur on even positions, and definition where outputs
1068 * are written occur on odd positions. We can only insert new instructions
1069 * after odd positions and before even positions. Points after even positions
1070 * and before odd positions are "inside" existing instructions.
1072 * 3. Create one interval I for each SSATmp T that requires register allocation,
1073 * by iterating blocks and instructions in reverse order, computing live
1074 * SSATmps as we go. Each interval consists of a sorted list of disjoint,
1075 * live ranges covering the positions where T must be in a register or
1076 * spill location. SSATmps that are constants or have forced registers
1077 * (e.g. VmSp) are skipped. Because of SSA form, the start position of each
1078 * interval dominates every live range and use position in the interval.
1080 * 4. Process intervals in order of start position, maintaining the set of
1081 * active (live) and inactive (not live, but with live ranges that start
1082 * after the current interval). When choosing a register, prefer the one
1083 * available furthest into the future. If necessary, split the current
1084 * interval so the first part gets a register, and enqueue the rest.
1085 * When no registers are available, choose either the current interval or
1086 * another one to spill, trying to free up the longest-available register.
1088 * Split positions must be after an interval's start position, and on or before
1089 * the chosen split point. We're free try to choose a good position inbetween,
1090 * for example block boundaries and cold blocks.
1092 * 5. Once intervals have been walked and split, every interval has an assigned
1093 * operand (register or spill location) for all positions where it's alive.
1094 * visit every instruction and store the position of its sources and
1095 * destinations in the RegAllocInfo structure that we pass onto CodeGenerator.
1097 * 6. Splitting creates sub-intervals that are assigned to different registers
1098 * or spill locations, so we must insert resolving copies at the split
1099 * positions between intervals that were split in a block, and copies on
1100 * control-flow edges connecting different sub-intervals. When more than one
1101 * copy occurs in a position, they are parallel-copies (all sources read before
1102 * any dest is written).
1104 * If any sub-interval was spilled, we a single store is generated after the
1105 * definition point. SSA form ensures this position dominates all uses, so
1106 * therefore it dominates all reloads.
1108 * Copies for Jmp->DefLabel edges are also converted to Shuffles, which are
1109 * combined with any other resolving copies on the same edges.
1112 void XLS::allocate() {
1113 prepareBlocks();
1114 computePositions();
1115 buildIntervals();
1116 walkIntervals();
1117 assignLocations();
1118 resolveSplits();
1119 resolveEdges();
1120 assert(checkRegisters(m_unit, m_regs));
1123 //////////////////////////////////////////////////////////////////////////////
1125 void XLS::dumpIntervals() {
1126 HPHP::Trace::traceRelease("Splits %d Spills %d\n", m_numSplits, m_nextSpill);
1127 for (auto ivl : m_intervals) {
1128 if (!ivl) continue;
1129 HPHP::Trace::traceRelease("i%-2d %s\n", ivl->tmp->id(),
1130 ivl->toString().c_str());
1131 for (ivl = ivl->next; ivl; ivl = ivl->next) {
1132 HPHP::Trace::traceRelease(" %s\n", ivl->toString().c_str());
1137 template<class F>
1138 void forEachInterval(Intervals& intervals, F f) {
1139 for (auto ivl : intervals) {
1140 if (ivl) f(ivl);
1144 enum Mode { Light, Heavy };
1145 template<class F>
1146 const char* draw(unsigned pos, Mode m, F f) {
1147 // Light Heavy
1148 static const char* top[] = { "\u2575", "\u2579" };
1149 static const char* bottom[] = { "\u2577", "\u257B" };
1150 static const char* both[] = { "\u2502", "\u2503" };
1151 static const char* empty[] = { " ", " " };
1152 auto s = f(pos);
1153 auto d = f(pos+1);
1154 return ( s && !d) ? top[m] :
1155 ( s && d) ? both[m] :
1156 (!s && d) ? bottom[m] :
1157 empty[m];
1160 void XLS::print(const char* caption) {
1161 std::ostringstream str;
1162 str << "Intervals " << caption << "\n";
1163 forEachInterval(m_intervals, [&] (Interval* ivl) {
1164 str << folly::format(" {: <2}", ivl->tmp->id());
1166 str << "\n";
1167 for (auto& b : m_blocks) {
1168 for (auto& i : *b) {
1169 auto pos = m_posns[i];
1170 if (!pos) {
1171 forEachInterval(m_intervals, [&](Interval* ivl) {
1172 str << " ";
1174 } else {
1175 forEachInterval(m_intervals, [&] (Interval* ivl) {
1176 str << " ";
1177 str << draw(pos, Light, [&](unsigned p) {
1178 auto c = ivl->childAt(p);
1179 return c && c->covers(p);
1181 str << draw(pos, Heavy, [&](unsigned p) {
1182 auto c = ivl->childAt(p);
1183 return c && c->usedAt(p);
1187 if (&i == &b->front()) {
1188 str << folly::format(" B{: <2}", b->id());
1189 } else {
1190 str << " ";
1192 if (i.isNative()) {
1193 str << folly::format(" {: <3}-", pos);
1194 } else {
1195 str << folly::format(" {: <3} ", pos);
1197 JIT::printOpcode(str, &i, nullptr);
1198 JIT::printSrcs(str, &i, &m_regs, nullptr);
1199 if (i.numDsts()) {
1200 str << " => ";
1201 JIT::printDsts(str, &i, &m_regs, nullptr);
1203 if (&i == &b->back()) {
1204 if (auto next = b->next()) {
1205 str << folly::format(" next->B{}", next->id());
1207 if (auto taken = b->taken()) {
1208 str << folly::format(" taken->B{}", taken->id());
1211 str << "\n";
1214 HPHP::Trace::traceRelease("%s\n", str.str().c_str());
1217 std::string Interval::toString() {
1218 std::ostringstream out;
1219 auto delim = "";
1220 out << loc << " [";
1221 for (auto r : ranges) {
1222 out << delim << folly::format("{}-{}", r.start, r.end);
1223 delim = ",";
1225 out << ") {";
1226 delim = "";
1227 for (auto u : uses) {
1228 out << delim << u.pos;
1229 delim = ",";
1231 out << "}";
1232 return out.str();
1235 //////////////////////////////////////////////////////////////////////////////
1237 // During resolveEdges, we can insert shuffles "on edges", which comes down
1238 // to either before a successor or after a predecessor. We must never have
1239 // a shuffle in both places, because that would be two sequential shuffles
1240 // along one edge.
1241 bool XLS::checkEdgeShuffles() {
1242 for (auto b : m_blocks) {
1243 DEBUG_ONLY auto numSucc = (!!b->next()) + (!!b->taken());
1244 DEBUG_ONLY auto numPred = b->numPreds();
1245 if (m_after[b]) {
1246 assert(numSucc == 1);
1247 assert(!m_before[b->next() ? b->next() : b->taken()]);
1249 if (m_before[b]) {
1250 assert(numPred == 1);
1251 assert(!m_after[b->preds().front().inst()->block()]);
1253 if (numSucc != 1) assert(!m_after[b]);
1254 if (numPred != 1) assert(!m_before[b]);
1256 return true;
1259 // Check validity of this interval
1260 // 1. split-children cannot have more children, nor can the parent
1261 // be a child of another interval.
1262 // 2. live ranges must be sorted and disjoint
1263 // 3. every use position must be inside a live range
1264 // 4. parent and children must all be sorted and disjoint
1265 bool Interval::checkInvariants() const {
1266 assert(!parent || !parent->parent); // 1: no crazy nesting
1267 DEBUG_ONLY auto pos = 0;
1268 for (auto r : ranges) {
1269 assert(r.start <= r.end); // valid range
1270 assert(r.start >= pos); // 2: ranges are sorted
1271 pos = r.end + 1; // 2: ranges are disjoint with no empty holes
1273 for (DEBUG_ONLY auto u : uses) {
1274 assert(covers(u.pos)); // 3: every use must be in a live range
1276 return true;
1279 // check that each block comes after all its predecessors (XXX won't be
1280 // true once we have loops).
1281 bool checkBlockOrder(IRUnit& unit, BlockList& blocks) {
1282 StateVector<Block, bool> seen(unit, false);
1283 for (auto b : blocks) {
1284 b->forEachPred([&](Block* p) {
1285 assert(seen[p]);
1287 seen[b] = true;
1289 return true;
1292 //////////////////////////////////////////////////////////////////////////////
1294 namespace {
1295 const Abi x64_abi {
1296 X64::kAllRegs - X64::kXMMRegs, // general purpose
1297 X64::kAllRegs & X64::kXMMRegs, // fp/simd
1298 X64::kCalleeSaved
1301 const Abi arm_abi {
1302 // For now this is the same as x64, since we're pretending arm
1303 // has the same register conventions as x64.
1304 ARM::kCallerSaved | ARM::kCalleeSaved,
1305 RegSet(), // fp/simd
1306 ARM::kCalleeSaved
1310 // This is the public entry-point
1311 RegAllocInfo allocateRegs(IRUnit& unit) {
1312 RegAllocInfo regs(unit);
1313 XLS xls(unit, regs, arch() == Arch::ARM ? arm_abi : x64_abi);
1314 xls.allocate();
1315 if (dumpIREnabled()) {
1316 dumpTrace(kRegAllocLevel, unit, " after extended alloc ", &regs,
1317 nullptr, nullptr, nullptr);
1319 return regs;