2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/linear-scan.h"
18 #include "hphp/runtime/vm/jit/state-vector.h"
19 #include "hphp/runtime/vm/jit/id-set.h"
20 #include "hphp/runtime/vm/jit/block.h"
21 #include "hphp/runtime/vm/jit/cfg.h"
22 #include "hphp/runtime/vm/jit/print.h"
23 #include "hphp/runtime/vm/jit/abi-x64.h"
24 #include "hphp/runtime/vm/jit/abi-arm.h"
25 #include "hphp/runtime/vm/jit/arch.h"
26 #include "hphp/runtime/vm/jit/check.h"
28 #include <unordered_set>
32 // - #3098109 dests of branch instructions start in next block
33 // - #3098509 streamline code, vectors vs linked lists, etc
34 // - #3098661 generate spill stats so we can compare side/by/side
35 // - #3098678 EvalHHIREnablePreColoring
36 // - #3098685 EvalHHIREnableCoalescing, by using hints
37 // - #3409409 Enable use of SIMD at least for doubles, for packed_tv
38 // - #3098685 Optimize lifetime splitting
39 // - #3098712 reuse spill slots
40 // - #3098739 new features now possible with XLS
41 // - #3099647 support loops
43 namespace HPHP
{ namespace JIT
{
49 // machine-specific register conventions
51 RegSet gp
; // general purpose 64-bit registers
52 RegSet simd
; // floating point / simd 128-bit registers
53 RegSet saved
; // callee-saved (gp and simd)
55 // convenience methods
56 RegSet
all() const { return gp
| simd
; }
59 typedef StateVector
<SSATmp
, Interval
*> Intervals
;
60 typedef IdSet
<SSATmp
> LiveSet
;
61 typedef std::pair
<PhysReg
,PhysReg
> RegPair
;
63 // A Use refers to the position where an interval is read or written.
68 // A LiveRange is an open-ended range of positions where an interval is live.
70 LiveRange(unsigned s
, unsigned e
) : start(s
), end(e
) { assert(s
<= e
); }
71 bool contains(unsigned pos
) const { return pos
>= start
&& pos
< end
; }
72 bool intersects(LiveRange r
) const { return r
.start
< end
&& start
< r
.end
; }
73 bool contains(LiveRange r
) const;
78 // return [first, last+1)
79 LiveRange
closedRange(unsigned first
, unsigned last
);
81 // An Interval stores the lifetime of an SSATmp as a sorted list of disjoint
82 // ranges, and a sorted list of use positions. If this interval was split,
83 // then the first interval is deemed "parent" and the rest are "children",
84 // and they're all connected as a singly linked list sorted by start.
87 explicit Interval(Interval
* parent
);
89 bool empty() const { return ranges
.empty(); }
90 unsigned start() const { return ranges
.front().start
; }
91 unsigned end() const { return ranges
.back().end
; }
92 Interval
* leader() { return parent
? parent
: this; }
93 const Interval
* leader() const { return parent
? parent
: this; }
94 bool isChild() const { return parent
!= nullptr; }
96 bool covers(unsigned pos
) const;
97 bool usedAt(unsigned pos
) const;
98 Interval
* childAt(unsigned pos
);
99 unsigned nextIntersect(Interval
*) const;
100 unsigned nextUseAfter(unsigned pos
) const;
101 unsigned firstRegUse() const;
102 unsigned firstUse() const { return uses
.front().pos
; }
104 void add(LiveRange r
);
105 void setStart(unsigned start
);
106 void addUse(unsigned pos
) { uses
.push_back(Use
{pos
}); }
107 Interval
* split(unsigned pos
);
108 // register/spill assignment
109 RegPair
regs() const;
110 bool handled() const { return loc
.hasReg(0) || loc
.spilled(); }
112 std::string
toString();
113 bool checkInvariants() const;
114 uint32_t id() const { return leader()->tmp
->id(); }
116 bool blocked
{ false }; // cannot be spilled
117 bool scratch
{ false }; // used as scratch or arg, cannot be spilled
118 uint8_t need
{ 0 }; // number of required registers (exactly 1 or 2)
121 SSATmp
* tmp
{ nullptr };
122 Interval
* parent
{ nullptr }; // if this is a split-off child
123 Interval
* next
{ nullptr }; // next split child or nullptr
124 smart::vector
<LiveRange
> ranges
;
125 smart::vector
<Use
> uses
;
126 PhysLoc loc
; // current location assigned to this interval
127 PhysLoc spill
; // spill location (parent only)
130 // Extended Linear Scan. This just encapsulates the intermediate
131 // data structures we use during the algorithm so we don't have
132 // to pass them around everywhere.
134 XLS(IRUnit
& unit
, RegAllocInfo
& regs
, const Abi
&);
138 void prepareBlocks();
139 void computePositions();
140 void buildIntervals();
141 void walkIntervals();
142 void assignLocations();
143 void resolveSplits();
146 void enqueue(Interval
* interval
);
147 bool allocPrefer(Interval
* current
, const RegPositions
& until
);
148 void allocOne(Interval
* current
);
149 void allocBlocked(Interval
* current
);
150 Interval
* goodSplit(Interval
*, unsigned pos
);
151 void spill(Interval
*);
152 void assign(Interval
*, RegPair r
);
153 void update(unsigned pos
);
154 void spillActive(Interval
* current
);
155 void spillInactive(Interval
* current
);
156 bool force(IRInstruction
&, SSATmp
& t
);
157 void insertCopy(Block
* b
, Block::iterator
, IRInstruction
*& shuffle
,
158 SSATmp
* src
, const PhysLoc
& rs
, const PhysLoc
& rd
);
159 void insertSpill(Interval
* ivl
);
161 void print(const char* caption
);
162 void dumpIntervals();
163 bool checkEdgeShuffles();
165 struct Compare
{ bool operator()(const Interval
*, const Interval
*); };
167 Intervals m_intervals
; // parent intervals indexed by ssatmp
168 int m_nextSpill
{ 0 };
169 int m_numSplits
{ 0 };
171 RegAllocInfo
& m_regs
;
173 StateVector
<IRInstruction
, unsigned> m_posns
;
174 smart::vector
<IRInstruction
*> m_insts
;
176 PhysReg::Map
<Interval
> m_scratch
;
177 PhysReg::Map
<Interval
> m_blocked
;
178 StateVector
<Block
,LiveSet
> m_liveIn
;
179 smart::priority_queue
<Interval
*,Compare
> m_pending
;
180 smart::vector
<Interval
*> m_active
;
181 smart::vector
<Interval
*> m_inactive
;
182 StateVector
<IRInstruction
,IRInstruction
*> m_between
;
183 StateVector
<Block
,IRInstruction
*> m_before
;
184 StateVector
<Block
,IRInstruction
*> m_after
;
187 const uint32_t kMaximalPos
= UINT32_MAX
; // "infinity" use position
189 // Keep track of a future use or conflict position for each register.
190 // Initially all usable registers have kMaximalPos
191 struct RegPositions
{
192 explicit RegPositions();
193 RegPair
max2Reg(RegSet regs
) const;
194 unsigned operator[](PhysReg r
) const { return posns
[r
]; }
195 void setPos(Interval
*, unsigned pos
);
197 PhysReg::Map
<unsigned> posns
;
200 bool checkBlockOrder(IRUnit
& unit
, BlockList
& blocks
);
202 //////////////////////////////////////////////////////////////////////////////
204 // returns true if this range contains r
205 bool LiveRange::contains(LiveRange r
) const {
206 return r
.start
>= start
&& r
.end
<= end
;
209 // Return a range for the closed interval [first,last], i.e. [first, last+1)
210 LiveRange
closedRange(unsigned first
, unsigned last
) {
211 return LiveRange(first
, last
+ 1);
214 //////////////////////////////////////////////////////////////////////////////
216 Interval::Interval(Interval
* parent
)
218 , allow(parent
->allow
)
219 , prefer(parent
->prefer
)
224 // Add r to this interval, merging r with any existing overlapping ranges
225 void Interval::add(LiveRange r
) {
226 while (!ranges
.empty() && r
.contains(ranges
.back())) {
229 if (ranges
.empty()) {
230 return ranges
.push_back(r
);
232 auto& first
= ranges
.back();
233 if (first
.contains(r
)) return;
234 if (r
.end
>= first
.start
) {
235 first
.start
= r
.start
;
241 // Update the start-position of the earliest range in this interval,
242 // which is ranges.back() during buildIntervals().
243 void Interval::setStart(unsigned start
) {
244 assert(!ranges
.empty() && ranges
.back().contains(start
));
245 ranges
.back().start
= start
;
248 // Return true if one of the ranges in this interval includes pos
249 bool Interval::covers(unsigned pos
) const {
250 if (pos
< start() || pos
>= end()) return false;
251 for (auto r
: ranges
) {
252 if (pos
< r
.start
) return false;
253 if (pos
< r
.end
) return true;
258 // Return true if there is a use position at pos
259 bool Interval::usedAt(unsigned pos
) const {
260 if (pos
< start() || pos
>= end()) return false;
261 for (auto& u
: uses
) if (u
.pos
== pos
) return true;
265 // Return the interval which includes pos (this one or a child)
266 Interval
* Interval::childAt(unsigned pos
) {
268 for (auto ivl
= this; ivl
; ivl
= ivl
->next
) {
269 if (pos
< ivl
->start()) return nullptr;
270 if (ivl
->covers(pos
)) return ivl
;
275 // return the next intersection point between this and ivl, or MAX_UINT32
276 // if they never intersect.
277 unsigned Interval::nextIntersect(Interval
* ivl
) const {
278 assert(!ranges
.empty() && !ivl
->ranges
.empty());
279 auto r1
= ranges
.begin(), e1
= ranges
.end();
280 auto r2
= ivl
->ranges
.begin(), e2
= ivl
->ranges
.end();
282 if (r1
->start
< r2
->start
) {
283 if (r2
->start
< r1
->end
) return r2
->start
;
284 if (++r1
== e1
) return kMaximalPos
;
286 if (r1
->start
< r2
->end
) return r1
->start
;
287 if (++r2
== e2
) return kMaximalPos
;
293 // Split this interval at pos and return the rest. Pos must be
294 // a location that ensures both shorter intervals are nonempty.
295 // Pos must also be even, indicating a position between instructions.
296 Interval
* Interval::split(unsigned pos
) {
297 assert(pos
> start() && pos
< end());
298 auto leader
= this->leader();
299 Interval
* child
= smart_new
<Interval
>(leader
);
302 // advance r1 to the first range we want in child; maybe split a range.
303 auto r1
= ranges
.begin(), r2
= ranges
.end();
304 while (r1
->end
<= pos
) r1
++;
305 if (pos
> r1
->start
) { // split r at pos
306 child
->ranges
.push_back(LiveRange(pos
, r1
->end
));
310 child
->ranges
.insert(child
->ranges
.end(), r1
, r2
);
311 ranges
.erase(r1
, r2
);
312 // advance u1 to the first use position in child, then copy u1..end to child.
313 auto u1
= uses
.begin(), u2
= uses
.end();
314 while (u1
!= u2
&& u1
->pos
< pos
) u1
++;
315 child
->uses
.insert(child
->uses
.end(), u1
, u2
);
317 assert(checkInvariants() && child
->checkInvariants());
321 // Return the position of the next use of this interval after (or equal to)
322 // pos. If there are no more uses after pos, return MAX.
323 unsigned Interval::nextUseAfter(unsigned pos
) const {
324 for (auto& u
: uses
) {
325 if (pos
<= u
.pos
) return u
.pos
;
330 // return the position of the first use that requires a register,
331 // or kMaximalPos if no remaining uses need registers.
332 unsigned Interval::firstRegUse() const {
333 return uses
.empty() ? kMaximalPos
: uses
.front().pos
;
336 // Return the register(s) assigned to this interval as a pair.
337 RegPair
Interval::regs() const {
338 return RegPair(loc
.reg(0), loc
.reg(1));
341 //////////////////////////////////////////////////////////////////////////////
343 XLS::XLS(IRUnit
& unit
, RegAllocInfo
& regs
, const Abi
& abi
)
344 : m_intervals(unit
, nullptr)
349 , m_liveIn(unit
, LiveSet())
350 , m_between(unit
, nullptr)
351 , m_before(unit
, nullptr)
352 , m_after(unit
, nullptr) {
353 auto all
= abi
.all();
354 for (auto r
: m_blocked
) {
355 m_blocked
[r
].blocked
= true;
356 m_blocked
[r
].need
= 1;
357 m_blocked
[r
].loc
.setReg(r
, 0);
358 if (!all
.contains(r
)) {
359 // r is never available
360 m_blocked
[r
].add(LiveRange(0, kMaximalPos
));
362 m_scratch
[r
].scratch
= true;
363 m_scratch
[r
].need
= 1;
364 m_scratch
[r
].loc
.setReg(r
, 0);
369 for (auto ivl
: m_intervals
) {
370 for (Interval
* next
; ivl
; ivl
= next
) {
377 // Split critical edges, remove dead predecessor edges, and put blocks
378 // in a sutiable order.
379 void XLS::prepareBlocks() {
380 splitCriticalEdges(m_unit
);
381 m_blocks
= rpoSortCfg(m_unit
);
384 // compute the position number for each instruction. Each instruction
385 // gets two positions: the position where srcs are read, and the position
386 // where dests are written.
387 void XLS::computePositions() {
388 m_insts
.reserve(m_unit
.numInsts());
390 for (auto b
: m_blocks
) {
391 for (auto& inst
: *b
) {
392 m_insts
[pos
/2] = &inst
;
399 // Return true if t was forced into a register.
400 bool XLS::force(IRInstruction
& inst
, SSATmp
& t
) {
401 auto reg
= forceAlloc(t
);
402 if (reg
!= InvalidReg
) {
403 m_regs
[inst
][t
].setReg(reg
, 0);
409 bool allocUnusedDest(IRInstruction
&) {
413 // Reduce the allow and prefer sets according to this particular use
414 void srcConstraints(IRInstruction
& inst
, int i
, SSATmp
* src
, Interval
* ivl
,
416 if (!packed_tv
&& RuntimeOption::EvalHHIRAllocSIMDRegs
) {
417 if (src
->type() <= Type::Dbl
) {
418 ivl
->prefer
&= abi
.simd
;
421 if (inst
.storesCell(i
) && src
->numWords() == 2) {
422 ivl
->prefer
&= abi
.simd
;
426 ivl
->allow
&= abi
.gp
;
427 ivl
->prefer
&= abi
.gp
;
430 // Reduce the allow and prefer constraints based on this definition
431 void dstConstraints(IRInstruction
& inst
, int i
, SSATmp
* dst
, Interval
* ivl
,
433 if (!packed_tv
&& RuntimeOption::EvalHHIRAllocSIMDRegs
) {
434 if (dst
->type() <= Type::Dbl
) {
435 ivl
->prefer
&= abi
.simd
;
438 if (inst
.isLoad() && !inst
.isControlFlow() && dst
->numWords() == 2) {
439 ivl
->prefer
&= abi
.simd
;
440 if (!(ivl
->allow
& ivl
->prefer
).empty()) {
441 // we prefer simd, but allow gp and simd, so restrict allow to just
442 // simd to avoid (simd)<->(gpr,gpr) shuffles.
443 ivl
->allow
= ivl
->prefer
;
448 ivl
->allow
&= abi
.gp
;
449 ivl
->prefer
&= abi
.gp
;
452 // build intervals in one pass by walking the block list backwards.
453 // no special handling is needed for goto/label (phi) instructions because
454 // they use/define tmps in the expected locations.
455 void XLS::buildIntervals() {
456 assert(checkBlockOrder(m_unit
, m_blocks
));
458 for (auto blockIt
= m_blocks
.end(); blockIt
!= m_blocks
.begin();) {
459 auto block
= *--blockIt
;
460 // compute initial live set from liveIn[succsessors]
462 if (auto taken
= block
->taken()) live
|= m_liveIn
[taken
];
463 if (auto next
= block
->next()) live
|= m_liveIn
[next
];
464 // initialize live range for each live tmp to whole block
465 auto blockRange
= closedRange(m_posns
[block
->front()],
466 m_posns
[block
->back()] + 1);
467 live
.forEach([&](uint32_t id
) {
468 m_intervals
[id
]->add(blockRange
);
470 for (auto instIt
= block
->end(); instIt
!= block
->begin();) {
471 auto& inst
= *--instIt
;
472 auto spos
= m_posns
[inst
]; // position where srcs are read
473 auto dpos
= spos
+ 1; // position where dsts are written
474 unsigned dst_need
= 0;
475 for (unsigned i
= 0, n
= inst
.numDsts(); i
< n
; ++i
) {
476 auto d
= inst
.dst(i
);
477 auto dest
= m_intervals
[d
];
479 // dest is not live; give it a register anyway.
481 if (d
->numWords() == 0) continue;
482 if (force(inst
, *d
)) continue;
483 if (!allocUnusedDest(inst
)) continue;
484 m_intervals
[d
] = dest
= smart_new
<Interval
>();
485 dest
->add(closedRange(dpos
, dpos
));
487 dest
->need
= d
->numWords();
488 dest
->allow
= dest
->prefer
= m_abi
.all();
490 // adjust start pos for live intervals defined by this instruction
491 assert(dest
->tmp
== d
);
492 dest
->setStart(dpos
);
495 dst_need
+= dest
->need
;
497 dstConstraints(inst
, i
, d
, dest
, m_abi
);
499 stress
= std::max(dst_need
, stress
);
500 if (inst
.isNative()) {
501 if (RuntimeOption::EvalHHIREnableCalleeSavedOpt
) {
502 auto scratch
= m_abi
.gp
- m_abi
.saved
;
503 scratch
.forEach([&](PhysReg r
) {
504 m_scratch
[r
].add(LiveRange(spos
, dpos
));
508 // add live ranges for tmps used by this inst
509 unsigned src_need
= 0;
510 for (unsigned i
= 0, n
= inst
.numSrcs(); i
< n
; ++i
) {
511 auto s
= inst
.src(i
);
512 if (s
->inst()->op() == DefConst
) continue;
513 auto need
= s
->numWords();
514 if (need
== 0) continue;
515 if (force(inst
, *s
)) continue;
516 auto src
= m_intervals
[s
];
517 if (!src
) m_intervals
[s
] = src
= smart_new
<Interval
>();
518 src
->add(closedRange(blockRange
.start
, spos
));
523 src
->allow
= src
->prefer
= m_abi
.all();
525 assert(src
->tmp
== s
);
527 srcConstraints(inst
, i
, s
, src
, m_abi
);
528 src_need
+= src
->need
;
531 stress
= std::max(src_need
, stress
);
533 m_liveIn
[block
] = live
;
535 // Implement stress mode by blocking more registers.
536 stress
+= RuntimeOption::EvalHHIRNumFreeRegs
;
537 assert(stress
>= RuntimeOption::EvalHHIRNumFreeRegs
); // no wraparound.
538 for (auto r
: m_blocked
) {
539 auto& blocked
= m_blocked
[r
];
540 if (blocked
.empty() || blocked
.start() == 0) continue;
541 // r is not already blocked
544 std::reverse(blocked
.ranges
.begin(), blocked
.ranges
.end());
546 blocked
.add(LiveRange(0, kMaximalPos
));
547 assert(blocked
.ranges
.size() == 1);
550 for (auto r
: m_scratch
) {
551 auto& scratch
= m_scratch
[r
];
552 if (scratch
.empty()) continue;
553 std::reverse(scratch
.ranges
.begin(), scratch
.ranges
.end());
555 // We built the use list of each interval by appending. Now reverse those
556 // lists so they are in forwards order.
557 for (auto ivl
: m_intervals
) {
559 std::reverse(ivl
->uses
.begin(), ivl
->uses
.end());
560 std::reverse(ivl
->ranges
.begin(), ivl
->ranges
.end());
562 if (dumpIREnabled(kRegAllocLevel
)) {
563 print(" after building intervals ");
567 // comparison function for pending priority queue. std::priority_queue
568 // requies a less operation, but sorts the heap highest-first; we
569 // need the opposite (lowest-first), so use greater-than.
570 bool XLS::Compare::operator()(const Interval
* i1
, const Interval
* i2
) {
571 return i1
->start() > i2
->start();
574 // insert interval into pending list in order of start position
575 void XLS::enqueue(Interval
* ivl
) {
576 assert(ivl
->checkInvariants() && !ivl
->handled());
580 // Assign the next available spill slot to interval
581 void XLS::spill(Interval
* ivl
) {
582 assert(!ivl
->blocked
&& !ivl
->scratch
&& ivl
->isChild());
583 assert(ivl
->need
== 1 || ivl
->need
== 2);
584 auto leader
= ivl
->leader();
585 if (!leader
->spill
.spilled()) {
586 if (ivl
->need
== 1) {
587 leader
->spill
.setSlot(0, m_nextSpill
++);
589 if (!PhysLoc::isAligned(m_nextSpill
)) m_nextSpill
++;
590 leader
->spill
.setSlot(0, m_nextSpill
++);
591 leader
->spill
.setSlot(1, m_nextSpill
++);
593 if (m_nextSpill
> NumPreAllocatedSpillLocs
) {
594 PUNT(LinearScan_TooManySpills
);
597 ivl
->loc
= leader
->spill
;
600 // Assign one or both of the registers in r to this interval.
601 void XLS::assign(Interval
* ivl
, RegPair r
) {
602 assert(!ivl
->blocked
&& !ivl
->scratch
);
603 auto r0
= PhysReg(r
.first
);
604 auto r1
= PhysReg(r
.second
);
605 if (ivl
->need
== 1) {
606 ivl
->loc
.setReg(r0
, 0);
609 ivl
->loc
.setRegFullSIMD(r0
);
610 } else if (r1
.isSIMD()) {
611 ivl
->loc
.setRegFullSIMD(r1
);
614 ivl
->loc
.setReg(r0
, 0);
615 ivl
->loc
.setReg(r1
, 1);
620 // initialize the positions array with maximal use positions.
621 RegPositions::RegPositions() {
622 for (auto r
: posns
) posns
[r
] = kMaximalPos
;
625 // Find the two registers used furthest in the future, but only
626 // consider registers in the given set
627 RegPair
RegPositions::max2Reg(const RegSet regs
) const {
628 unsigned max1
= 0, max2
= 0;
629 PhysReg r1
= *posns
.begin(), r2
= *posns
.begin();
630 regs
.forEach([&](PhysReg r
) {
631 if (posns
[r
] > max2
) {
632 if (posns
[r
] > max1
) {
633 r2
= r1
; max2
= max1
;
634 r1
= r
; max1
= posns
[r
];
636 r2
= r
; max2
= posns
[r
];
640 assert(posns
[r1
] >= posns
[r2
]);
644 // Update the position associated with the registers assigned to ivl,
645 // to the minimum of pos and the existing position.
646 void RegPositions::setPos(Interval
* ivl
, unsigned pos
) {
647 assert(ivl
->loc
.numAllocated() >= 1);
648 auto r0
= ivl
->loc
.reg(0);
649 posns
[r0
] = std::min(pos
, posns
[r0
]);
650 if (ivl
->loc
.numAllocated() == 2) {
651 auto r1
= ivl
->loc
.reg(1);
652 posns
[r1
] = std::min(pos
, posns
[r1
]);
656 bool XLS::allocPrefer(Interval
* current
, const RegPositions
& until
) {
657 auto r
= until
.max2Reg(current
->prefer
);
658 auto untilPos
= current
->need
== 1 ? until
[r
.first
] : // 1 needed, 1 found
659 r
.second
!= r
.first
? until
[r
.second
] : // 2 needed, 2 found
660 0; // 2 needed, 1 found
661 if (untilPos
> current
->end()) {
663 m_active
.push_back(current
);
669 // Allocate one register for the current interval.
670 // First try to find a register to assign to current, or its first part.
671 // If none can be found, tail-call to allocBlocked which will spill
672 // something, maybe current or another interval.
674 // SSA form guarantees that two SSA intervals that intersect,
675 // must intersect at one or the other's start position. Current
676 // does not intersect with inactive intervals at current->start(),
677 // and inactive intervals must have started earlier. Thus they
678 // cannot intersect. But we can only skip the intersection test
679 // for the original interval -- split intervals no longer have
681 void XLS::allocOne(Interval
* current
) {
682 assert(!current
->handled());
684 for (auto ivl
: m_active
) {
685 until
.setPos(ivl
, 0);
687 for (auto ivl
: m_inactive
) {
688 until
.setPos(ivl
, current
->nextIntersect(ivl
));
690 if (allocPrefer(current
, until
)) {
693 // find the register(s) that are free for the longest time.
694 auto r
= until
.max2Reg(current
->allow
);
695 auto untilPos
= current
->need
== 1 ? until
[r
.first
] : // 1 needed, 1 found
696 r
.second
!= r
.first
? until
[r
.second
] : // 2 needed, 2 found
697 0; // 2 needed, 1 found
698 if (untilPos
<= current
->start()) {
699 return allocBlocked(current
);
701 if (untilPos
< current
->end()) {
702 auto second
= goodSplit(current
, untilPos
); // got register for first part
706 m_active
.push_back(current
);
709 // When all registers are in use, find a good interval to split and spill,
710 // which could be the current interval. When an interval is split and the
711 // second part is spilled, possibly split the second part again before the
712 // next use-pos that requires a register, and enqueue the third part.
713 void XLS::allocBlocked(Interval
* current
) {
715 RegPositions blocked
;
716 auto start
= current
->start();
717 // compute next use of active registers, so we can pick the furthest one
718 for (auto ivl
: m_active
) {
720 blocked
.setPos(ivl
, 0);
723 used
.setPos(ivl
, ivl
->nextUseAfter(start
));
726 // compute next intersection/use of inactive regs to find whats free longest
727 for (auto ivl
: m_inactive
) {
728 auto intersectPos
= current
->nextIntersect(ivl
);
729 if (intersectPos
== kMaximalPos
) continue;
731 blocked
.setPos(ivl
, intersectPos
);
732 used
.setPos(ivl
, blocked
[ivl
->regs().first
]);
734 used
.setPos(ivl
, ivl
->nextUseAfter(start
));
737 auto r
= used
.max2Reg(current
->allow
);
738 auto usedPos
= current
->need
== 1 ? used
[r
.first
] : used
[r
.second
];
739 if (usedPos
< current
->firstUse()) {
740 // all active+inactive intervals used before current: spill current
741 auto second
= goodSplit(current
, current
->firstRegUse());
742 assert(second
!= current
);
747 auto blockPos
= current
->need
== 1 ? blocked
[r
.first
] : blocked
[r
.second
];
748 assert(blockPos
>= usedPos
);
749 if (blockPos
< current
->end()) {
750 // spilling made a register free for first part of current
751 auto second
= goodSplit(current
, blockPos
);
752 assert(second
!= current
);
756 spillActive(current
);
757 spillInactive(current
);
758 m_active
.push_back(current
);
761 // return true if r1 and r2 have any registers in common
762 bool conflict(RegPair r1
, RegPair r2
) {
763 assert(r1
.first
!= InvalidReg
&& r2
.first
!= InvalidReg
);
764 assert(r1
.first
!= r1
.second
&& r2
.first
!= r2
.second
);
765 return r1
.first
== r2
.first
||
766 r1
.first
== r2
.second
||
767 (r1
.second
!= InvalidReg
&&
768 (r1
.second
== r2
.first
|| r1
.second
== r2
.second
));
771 // Split and spill other active intervals that conflict with current for
772 // register r, at current->start(). If necessary, split the victims
773 // again before their first use position that requires a register.
774 void XLS::spillActive(Interval
* current
) {
775 auto start
= current
->start();
776 auto r
= current
->regs();
777 for (auto i
= m_active
.begin(); i
!= m_active
.end();) {
779 Interval
* first
= *i2
;
780 if (first
->scratch
) continue;
781 auto r2
= first
->regs();
782 if (!conflict(r
, r2
)) continue;
783 // split and spill first at current.start
784 i
= m_active
.erase(i2
);
785 auto second
= goodSplit(first
, start
);
786 auto reloadPos
= second
->firstRegUse();
787 if (reloadPos
<= start
) {
788 // not enough registers to hold srcs that must be in a register.
791 if (reloadPos
< kMaximalPos
) {
792 if (reloadPos
> second
->start()) {
793 auto third
= goodSplit(second
, reloadPos
);
805 // Split and spill other inactive intervals that conflict with current
806 // for register r, at current->start(). If necessary, split the victims
807 // again before their first use position that requires a register.
808 void XLS::spillInactive(Interval
* current
) {
809 auto start
= current
->start();
810 auto r
= current
->regs();
811 for (auto i
= m_inactive
.begin(); i
!= m_inactive
.end();) {
813 Interval
* first
= *i2
;
814 if (first
->scratch
) continue;
815 auto r2
= first
->regs();
816 if (!conflict(r
, r2
)) continue;
817 auto intersect
= current
->nextIntersect(first
);
818 if (intersect
== kMaximalPos
) continue;
819 // split and spill the rest of it starting @current
820 i
= m_inactive
.erase(i2
);
821 auto second
= goodSplit(first
, start
);
822 auto reloadPos
= second
->firstRegUse();
823 if (reloadPos
<= start
) {
824 // not enough registers to hold srcs that must be in a register.
827 if (reloadPos
< kMaximalPos
) {
828 if (reloadPos
> second
->start()) {
829 auto third
= goodSplit(second
, reloadPos
);
841 // split interval at or before pos, then return the new child.
842 // If the position is at ivl.start, return the whole ivl without
844 Interval
* XLS::goodSplit(Interval
* ivl
, unsigned pos
) {
846 if (pos
<= ivl
->start()) return ivl
;
847 if (pos
% 2 == 1 && m_insts
[pos
/2]->op() == DefLabel
) {
848 // correctness: move split point to start of block instead of
849 // in the middle of the DefLabel instruction.
852 return ivl
->split(pos
);
855 // Update active/inactive sets based on pos
856 void XLS::update(unsigned pos
) {
857 // check for intervals in active that are expired or inactive
858 for (auto i
= m_active
.begin(); i
!= m_active
.end();) {
860 if (ivl
->end() <= pos
) {
861 i
= m_active
.erase(i
); // active is now handled.
862 } else if (!ivl
->covers(pos
)) {
863 i
= m_active
.erase(i
);
864 m_inactive
.push_back(ivl
);
869 // check for intervals that are expired or active
870 for (auto i
= m_inactive
.begin(); i
!= m_inactive
.end();) {
872 if (ivl
->end() <= pos
) {
873 i
= m_inactive
.erase(i
); // inactive is now handled.
874 } else if (ivl
->covers(pos
)) {
875 i
= m_inactive
.erase(i
);
876 m_active
.push_back(ivl
);
883 // assign registers to intervals, split & spill where needed.
884 void XLS::walkIntervals() {
885 // fill the pending queue with nonempty intervals in order of start position
886 for (auto ivl
: m_intervals
) {
887 if (ivl
) m_pending
.push(ivl
);
889 for (auto r
: m_scratch
) {
890 if (!m_scratch
[r
].empty()) m_inactive
.push_back(&m_scratch
[r
]);
892 for (auto r
: m_blocked
) {
893 if (!m_blocked
[r
].empty()) m_inactive
.push_back(&m_blocked
[r
]);
895 while (!m_pending
.empty()) {
896 Interval
* current
= m_pending
.top();
898 update(current
->start());
900 assert(current
->handled() && current
->loc
.numWords() == current
->need
);
902 if (dumpIREnabled(kRegAllocLevel
)) {
903 if (m_numSplits
) dumpIntervals();
904 print(" after walking intervals ");
908 // Assign PhysLocs (registers or spill slots) to every src and dst ssatmp
909 void XLS::assignLocations() {
910 for (auto b
: m_blocks
) {
911 for (auto& inst
: *b
) {
912 auto spos
= m_posns
[inst
];
913 for (auto s
: inst
.srcs()) {
914 if (!m_intervals
[s
]) continue;
915 m_regs
[inst
][s
] = m_intervals
[s
]->childAt(spos
)->loc
;
917 for (auto& d
: inst
.dsts()) {
918 if (!m_intervals
[d
]) continue;
919 m_regs
[inst
][d
] = m_intervals
[d
]->loc
;
925 // Add a copy of tmp from rs to rd to the Shuffle instruction shuffle,
926 // if it exists. Otherwise, create a new one and insert it at pos in block b.
927 void XLS::insertCopy(Block
* b
, Block::iterator pos
, IRInstruction
* &shuffle
,
928 SSATmp
* src
, const PhysLoc
& rs
, const PhysLoc
& rd
) {
929 if (rs
== rd
) return;
931 // already have shuffle here
932 shuffle
->addCopy(m_unit
, src
, rd
);
935 auto dests
= new (m_unit
.arena()) PhysLoc
[cap
];
937 auto& marker
= pos
!= b
->end() ? pos
->marker() : b
->back().marker();
938 shuffle
= m_unit
.gen(Shuffle
, marker
, ShuffleData(dests
, 1, cap
), src
);
939 b
->insert(pos
, shuffle
);
941 m_regs
[shuffle
][src
] = rs
;
944 // Return the first instruction in block b that is not a Shuffle
945 IRInstruction
* skipShuffle(Block
* b
) {
947 while (i
->op() == Shuffle
) ++i
;
948 return i
!= b
->end() ? &*i
: nullptr;
951 // Insert a spill-store Shuffle after the instruction that defines ivl->tmp.
952 // If the instruction is a branch, do the store at the beginning of the
954 void XLS::insertSpill(Interval
* ivl
) {
955 auto inst
= ivl
->tmp
->inst();
956 if (inst
->isBlockEnd()) {
957 auto succ
= inst
->next();
958 auto pos
= succ
->skipHeader();
959 insertCopy(succ
, pos
, m_before
[succ
], ivl
->tmp
, ivl
->loc
, ivl
->spill
);
961 auto block
= inst
->block();
962 auto pos
= block
->iteratorTo(inst
);
963 auto& shuffle
= (++pos
) != block
->end() ? m_between
[*pos
] :
965 insertCopy(block
, pos
, shuffle
, ivl
->tmp
, ivl
->loc
, ivl
->spill
);
970 * Insert Shuffle instructions. Each Shuffle at a given position implements
971 * a parallel copy: all sources are read before any destination is written.
972 * 1. If any part of interval was spilled, insert a copy (store) to the spill
973 * slot after the defining instruction.
974 * 2. For intervals split in the middle of a block, connect the two
975 * sub-intervals by inserting a copy at the split point. "middle" means any
976 * program point after the first instruction and before the last instruction
977 * in the block. Intervals that were split on block boundaries are handled
979 * 3. (resolveSplits) When a sub-interval is live at the start of a block
980 * (i.e. live-in), and a different sub-interval was live at the end of the
981 * predecessor, insert a copy on the edge connecting the two blocks. If that
982 * edge is a critical edge, split it first.
983 * 4. Resolve "phi"s by inserting copies on Jmp->DefLabel edges.
984 * These are similar to case 3, except they are two different intervals
985 * logically connected by the phi (Jmp->DefLabel) copy. This is done at the
986 * same time as resolving edges.
989 // Insert spills and copies that connect sub-intervals that were split between
991 void XLS::resolveSplits() {
992 if (dumpIREnabled(kRegAllocLevel
)) dumpIntervals();
993 for (auto i1
: m_intervals
) {
995 if (i1
->spill
.spilled()) insertSpill(i1
);
996 for (auto i2
= i1
->next
; i2
; i1
= i2
, i2
= i2
->next
) {
997 auto pos1
= i1
->end();
998 auto pos2
= i2
->start();
999 if (!i2
->loc
.spilled() && pos1
== pos2
&& i1
->loc
!= i2
->loc
) {
1000 auto inst
= m_insts
[pos2
/ 2];
1001 auto block
= inst
->block();
1002 if (inst
!= skipShuffle(block
)) {
1003 assert(pos2
% 2 == 0);
1004 insertCopy(block
, block
->iteratorTo(inst
), m_between
[inst
], i1
->tmp
,
1012 // Insert copies on control-flow edges, and turn Jmps into Shuffles
1013 void XLS::resolveEdges() {
1014 const PhysLoc invalid_loc
;
1015 for (auto succ
: m_blocks
) {
1016 auto& inst2
= *skipShuffle(succ
);
1017 auto pos2
= m_posns
[inst2
]; // pos of first inst in succ.
1018 succ
->forEachPred([&](Block
* pred
) {
1019 auto it1
= pred
->end();
1020 while ((--it1
)->op() == Shuffle
) {}
1022 auto pos1
= m_posns
[inst1
]; // pos of last inst in pred
1023 if (inst1
.op() == Jmp
) {
1024 // resolve Jmp->DefLabel copies
1025 for (unsigned i
= 0, n
= inst1
.numSrcs(); i
< n
; ++i
) {
1026 auto i1
= m_intervals
[inst1
.src(i
)];
1027 auto i2
= m_intervals
[inst2
.dst(i
)];
1028 if (i1
) i1
= i1
->childAt(pos1
);
1029 insertCopy(pred
, it1
, m_after
[pred
], inst1
.src(i
),
1030 i1
? i1
->loc
: invalid_loc
,
1031 i2
? i2
->loc
: invalid_loc
);
1034 m_liveIn
[succ
].forEach([&](uint32_t id
) {
1035 auto ivl
= m_intervals
[id
];
1036 auto i1
= ivl
->childAt(pos1
+ 1);
1037 auto i2
= ivl
->childAt(pos2
);
1039 if (i2
->loc
.spilled()) return; // we did spill store after def.
1040 if (pred
->taken() && pred
->next()) {
1041 // insert copy at start of succesor
1042 insertCopy(succ
, succ
->skipHeader(), m_before
[succ
],
1043 i1
->tmp
, i1
->loc
, i2
->loc
);
1045 // insert copy at end of predecessor
1046 auto pos
= inst1
.op() == Jmp
? it1
: pred
->end();
1047 insertCopy(pred
, pos
, m_after
[pred
], i1
->tmp
, i1
->loc
, i2
->loc
);
1052 assert(checkEdgeShuffles());
1053 if (dumpIREnabled(kRegAllocLevel
)) {
1054 print(" after resolving intervals ");
1059 * Extended Linear Scan is based on Wimmer & Franz "Linear Scan Register
1060 * Allocation on SSA Form".
1062 * 1. Sort blocks such that all predecessors of B come before B, except
1063 * loop-edge predecessors. Because the input IR is in SSA form, this also
1064 * implies the definition of each SSATmp comes before all uses.
1066 * 2. Assign an even numbered position to every instruction. Uses where
1067 * inputs are read occur on even positions, and definition where outputs
1068 * are written occur on odd positions. We can only insert new instructions
1069 * after odd positions and before even positions. Points after even positions
1070 * and before odd positions are "inside" existing instructions.
1072 * 3. Create one interval I for each SSATmp T that requires register allocation,
1073 * by iterating blocks and instructions in reverse order, computing live
1074 * SSATmps as we go. Each interval consists of a sorted list of disjoint,
1075 * live ranges covering the positions where T must be in a register or
1076 * spill location. SSATmps that are constants or have forced registers
1077 * (e.g. VmSp) are skipped. Because of SSA form, the start position of each
1078 * interval dominates every live range and use position in the interval.
1080 * 4. Process intervals in order of start position, maintaining the set of
1081 * active (live) and inactive (not live, but with live ranges that start
1082 * after the current interval). When choosing a register, prefer the one
1083 * available furthest into the future. If necessary, split the current
1084 * interval so the first part gets a register, and enqueue the rest.
1085 * When no registers are available, choose either the current interval or
1086 * another one to spill, trying to free up the longest-available register.
1088 * Split positions must be after an interval's start position, and on or before
1089 * the chosen split point. We're free try to choose a good position inbetween,
1090 * for example block boundaries and cold blocks.
1092 * 5. Once intervals have been walked and split, every interval has an assigned
1093 * operand (register or spill location) for all positions where it's alive.
1094 * visit every instruction and store the position of its sources and
1095 * destinations in the RegAllocInfo structure that we pass onto CodeGenerator.
1097 * 6. Splitting creates sub-intervals that are assigned to different registers
1098 * or spill locations, so we must insert resolving copies at the split
1099 * positions between intervals that were split in a block, and copies on
1100 * control-flow edges connecting different sub-intervals. When more than one
1101 * copy occurs in a position, they are parallel-copies (all sources read before
1102 * any dest is written).
1104 * If any sub-interval was spilled, we a single store is generated after the
1105 * definition point. SSA form ensures this position dominates all uses, so
1106 * therefore it dominates all reloads.
1108 * Copies for Jmp->DefLabel edges are also converted to Shuffles, which are
1109 * combined with any other resolving copies on the same edges.
1112 void XLS::allocate() {
1120 assert(checkRegisters(m_unit
, m_regs
));
1123 //////////////////////////////////////////////////////////////////////////////
1125 void XLS::dumpIntervals() {
1126 HPHP::Trace::traceRelease("Splits %d Spills %d\n", m_numSplits
, m_nextSpill
);
1127 for (auto ivl
: m_intervals
) {
1129 HPHP::Trace::traceRelease("i%-2d %s\n", ivl
->tmp
->id(),
1130 ivl
->toString().c_str());
1131 for (ivl
= ivl
->next
; ivl
; ivl
= ivl
->next
) {
1132 HPHP::Trace::traceRelease(" %s\n", ivl
->toString().c_str());
1138 void forEachInterval(Intervals
& intervals
, F f
) {
1139 for (auto ivl
: intervals
) {
1144 enum Mode
{ Light
, Heavy
};
1146 const char* draw(unsigned pos
, Mode m
, F f
) {
1148 static const char* top
[] = { "\u2575", "\u2579" };
1149 static const char* bottom
[] = { "\u2577", "\u257B" };
1150 static const char* both
[] = { "\u2502", "\u2503" };
1151 static const char* empty
[] = { " ", " " };
1154 return ( s
&& !d
) ? top
[m
] :
1155 ( s
&& d
) ? both
[m
] :
1156 (!s
&& d
) ? bottom
[m
] :
1160 void XLS::print(const char* caption
) {
1161 std::ostringstream str
;
1162 str
<< "Intervals " << caption
<< "\n";
1163 forEachInterval(m_intervals
, [&] (Interval
* ivl
) {
1164 str
<< folly::format(" {: <2}", ivl
->tmp
->id());
1167 for (auto& b
: m_blocks
) {
1168 for (auto& i
: *b
) {
1169 auto pos
= m_posns
[i
];
1171 forEachInterval(m_intervals
, [&](Interval
* ivl
) {
1175 forEachInterval(m_intervals
, [&] (Interval
* ivl
) {
1177 str
<< draw(pos
, Light
, [&](unsigned p
) {
1178 auto c
= ivl
->childAt(p
);
1179 return c
&& c
->covers(p
);
1181 str
<< draw(pos
, Heavy
, [&](unsigned p
) {
1182 auto c
= ivl
->childAt(p
);
1183 return c
&& c
->usedAt(p
);
1187 if (&i
== &b
->front()) {
1188 str
<< folly::format(" B{: <2}", b
->id());
1193 str
<< folly::format(" {: <3}-", pos
);
1195 str
<< folly::format(" {: <3} ", pos
);
1197 JIT::printOpcode(str
, &i
, nullptr);
1198 JIT::printSrcs(str
, &i
, &m_regs
, nullptr);
1201 JIT::printDsts(str
, &i
, &m_regs
, nullptr);
1203 if (&i
== &b
->back()) {
1204 if (auto next
= b
->next()) {
1205 str
<< folly::format(" next->B{}", next
->id());
1207 if (auto taken
= b
->taken()) {
1208 str
<< folly::format(" taken->B{}", taken
->id());
1214 HPHP::Trace::traceRelease("%s\n", str
.str().c_str());
1217 std::string
Interval::toString() {
1218 std::ostringstream out
;
1221 for (auto r
: ranges
) {
1222 out
<< delim
<< folly::format("{}-{}", r
.start
, r
.end
);
1227 for (auto u
: uses
) {
1228 out
<< delim
<< u
.pos
;
1235 //////////////////////////////////////////////////////////////////////////////
1237 // During resolveEdges, we can insert shuffles "on edges", which comes down
1238 // to either before a successor or after a predecessor. We must never have
1239 // a shuffle in both places, because that would be two sequential shuffles
1241 bool XLS::checkEdgeShuffles() {
1242 for (auto b
: m_blocks
) {
1243 DEBUG_ONLY
auto numSucc
= (!!b
->next()) + (!!b
->taken());
1244 DEBUG_ONLY
auto numPred
= b
->numPreds();
1246 assert(numSucc
== 1);
1247 assert(!m_before
[b
->next() ? b
->next() : b
->taken()]);
1250 assert(numPred
== 1);
1251 assert(!m_after
[b
->preds().front().inst()->block()]);
1253 if (numSucc
!= 1) assert(!m_after
[b
]);
1254 if (numPred
!= 1) assert(!m_before
[b
]);
1259 // Check validity of this interval
1260 // 1. split-children cannot have more children, nor can the parent
1261 // be a child of another interval.
1262 // 2. live ranges must be sorted and disjoint
1263 // 3. every use position must be inside a live range
1264 // 4. parent and children must all be sorted and disjoint
1265 bool Interval::checkInvariants() const {
1266 assert(!parent
|| !parent
->parent
); // 1: no crazy nesting
1267 DEBUG_ONLY
auto pos
= 0;
1268 for (auto r
: ranges
) {
1269 assert(r
.start
<= r
.end
); // valid range
1270 assert(r
.start
>= pos
); // 2: ranges are sorted
1271 pos
= r
.end
+ 1; // 2: ranges are disjoint with no empty holes
1273 for (DEBUG_ONLY
auto u
: uses
) {
1274 assert(covers(u
.pos
)); // 3: every use must be in a live range
1279 // check that each block comes after all its predecessors (XXX won't be
1280 // true once we have loops).
1281 bool checkBlockOrder(IRUnit
& unit
, BlockList
& blocks
) {
1282 StateVector
<Block
, bool> seen(unit
, false);
1283 for (auto b
: blocks
) {
1284 b
->forEachPred([&](Block
* p
) {
1292 //////////////////////////////////////////////////////////////////////////////
1296 X64::kAllRegs
- X64::kXMMRegs
, // general purpose
1297 X64::kAllRegs
& X64::kXMMRegs
, // fp/simd
1302 // For now this is the same as x64, since we're pretending arm
1303 // has the same register conventions as x64.
1304 ARM::kCallerSaved
| ARM::kCalleeSaved
,
1305 RegSet(), // fp/simd
1310 // This is the public entry-point
1311 RegAllocInfo
allocateRegs(IRUnit
& unit
) {
1312 RegAllocInfo
regs(unit
);
1313 XLS
xls(unit
, regs
, arch() == Arch::ARM
? arm_abi
: x64_abi
);
1315 if (dumpIREnabled()) {
1316 dumpTrace(kRegAllocLevel
, unit
, " after extended alloc ", ®s
,
1317 nullptr, nullptr, nullptr);