Fix spilling bug
[hiphop-php.git] / hphp / runtime / vm / jit / linearscan.cpp
blob0763eb24d45021a331da436c671d356c7faefe7b
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/linearscan.h"
19 #include "hphp/runtime/base/memory/smart_containers.h"
20 #include "hphp/runtime/vm/jit/irfactory.h"
21 #include "hphp/runtime/vm/jit/nativecalls.h"
22 #include "hphp/runtime/vm/jit/print.h"
23 #include "hphp/runtime/vm/jit/ir.h"
24 #include "hphp/runtime/vm/jit/tracebuilder.h"
25 #include "hphp/runtime/vm/jit/codegen.h"
26 #include "hphp/runtime/vm/jit/state_vector.h"
27 #include "hphp/runtime/vm/jit/check.h"
28 #include "hphp/runtime/vm/jit/physreg.h"
29 #include "hphp/runtime/vm/jit/abi-x64.h"
30 #include <boost/noncopyable.hpp>
32 namespace HPHP {
33 namespace JIT{
35 using namespace Transl::reg;
37 TRACE_SET_MOD(hhir);
39 int RegisterInfo::numAllocatedRegs() const {
40 // Return the number of register slots that actually have an allocated
41 // register or spill slot. We may not have allocated a full numNeededRegs()
42 // worth of registers in some cases (if the value of this tmp wasn't used).
43 // We rely on InvalidReg (-1) never being equal to a spill slot number.
44 int i = 0;
45 while (i < kMaxNumRegs && m_regs[i] != InvalidReg) {
46 ++i;
48 return i;
51 RegSet RegisterInfo::regs() const {
52 RegSet regs;
53 for (int i = 0, n = numAllocatedRegs(); i < n; ++i) {
54 if (hasReg(i)) regs.add(reg(i));
56 return regs;
59 struct LinearScan : private boost::noncopyable {
60 static const int NumRegs = kNumRegs;
62 explicit LinearScan(IRFactory*);
63 RegAllocInfo allocRegs(IRTrace*, LifetimeInfo*);
65 private:
66 class RegState {
67 friend class LinearScan;
69 public:
70 bool isReserved() const { return m_reserved; }
71 bool isCallerSaved() const {
72 return kCallerSaved.contains(m_reg);
74 bool isCalleeSaved() const { return !isCallerSaved(); }
75 bool isAllocated() const { return m_ssaTmp != nullptr; }
76 bool isPinned() const { return m_pinned; }
77 bool isRetAddr() const {
78 if (!m_ssaTmp) return false;
79 Type type = m_ssaTmp->type();
80 return type == Type::RetAddr;
82 PhysReg::Type type() const { return m_reg.type(); }
84 private:
85 SSATmp* m_ssaTmp; // non-null when allocated
86 // Maintain the position of this register so that we can quickly
87 // remove it from the lists.
88 // A non-reserved reg is in either LinearScan::m_freeCallerSaved,
89 // LinearScan::m_freeCalleeSaved, or LinearScan::m_allocatedRegs.
90 // <m_pos> of a reserved reg is undefined.
91 smart::list<RegState*>::iterator m_pos;
92 PhysReg m_reg;
93 bool m_pinned; // do not free this register if pinned
94 // We stress test register allocation by reducing the number of
95 // free registers.
96 // <m_reserved> is true if the register is a reserved register
97 // (i.e., rbx, rsp, rbp, r10, and r12) or it is marked as not free for
98 // stress testing.
99 bool m_reserved;
102 struct SlotInfo {
103 // the SSATmp that represents this spill location
104 SSATmp* spillTmp;
105 // The latest SSATmp that has the most recent reloaded spilled value
106 // If it's NULL, we have to reload this slot before using it.
107 SSATmp* latestReload;
110 class PreColoringHint {
111 public:
112 PreColoringHint() { clear(); }
113 bool preColorsTmp(RegState* reg) const;
114 RegNumber getPreColoringReg(SSATmp* tmp, uint32_t index) const;
115 void clear();
116 void add(SSATmp* tmp, uint32_t index, int argNum);
117 private:
118 // indexed by register number
119 std::pair<SSATmp*, uint32_t> m_preColoredTmps[LinearScan::NumRegs];
122 class StateSave {
123 public:
124 StateSave() {}
125 void save(LinearScan* ls);
126 void restore(LinearScan* ls);
127 private:
128 RegState m_regs[NumRegs];
130 typedef smart::map<Block*, StateSave> ExitTraceMap;
132 private:
133 void allocRegToInstruction(InstructionList::iterator it);
134 int allocRegToTmp(SSATmp* ssaTmp, uint32_t index);
135 void assignRegToTmp(RegState* reg, SSATmp* ssaTmp, uint32_t index);
136 void freeRegsAtId(uint32_t id);
137 void spill(SSATmp* tmp);
138 void numberInstructions(const BlockList& blocks);
140 template<typename T> SSATmp* cns(T val) {
141 return m_irFactory->cns(val);
143 void initFreeList();
144 void coalesce(IRTrace* trace);
145 void genSpillStats(IRTrace* trace, int numSpillLocs);
146 void allocRegsOneTrace(BlockList::iterator& blockIt,
147 ExitTraceMap& etm);
148 void allocRegsToTrace();
149 uint32_t createSpillSlot(SSATmp* tmp);
150 static SSATmp* getSpilledTmp(SSATmp* tmp);
151 static SSATmp* getOrigTmp(SSATmp* tmp);
152 uint32_t assignSpillLoc();
153 void collectInfo(BlockList::iterator it, IRTrace* trace);
154 RegNumber getJmpPreColor(SSATmp* tmp, uint32_t regIndx, bool isReload);
155 void computePreColoringHint();
156 void findFullXMMCandidates();
157 IRInstruction* nextNative() const;
158 uint32_t nextNativeId() const;
160 void pushFreeReg(RegState* reg);
161 RegState* popFreeReg(smart::list<RegState*>& freeList);
162 void freeReg(RegState* reg);
163 RegState* getFreeReg(PhysReg::Type type, bool preferCallerSaved);
164 RegState* getReg(RegState* reg);
165 PhysReg::Type getRegType(const SSATmp *tmp, int locIdx) const;
166 bool crossNativeCall(const SSATmp* tmp) const;
168 template<typename Inner, int DumpVal=4>
169 void dumpIR(const Inner* in, const char* msg) {
170 if (HPHP::Trace::moduleEnabled(HPHP::Trace::hhir, DumpVal)) {
171 std::ostringstream str;
172 print(str, in, &m_allocInfo, &m_lifetime);
173 HPHP::Trace::traceRelease("--- %s: %s\n", msg, str.str().c_str());
177 private:
178 // Register allocation may generate Spill/Reload.
179 IRFactory* const m_irFactory;
180 RegState m_regs[NumRegs];
181 // Lists of free caller and callee-saved registers, respectively.
182 smart::list<RegState*> m_freeCallerSaved[PhysReg::kNumTypes];
183 smart::list<RegState*> m_freeCalleeSaved[PhysReg::kNumTypes];
184 // List of assigned registers, sorted high to low by lastUseId.
185 smart::list<RegState*> m_allocatedRegs;
187 smart::vector<SlotInfo> m_slots; // Spill info indexed by slot id
188 BlockList m_blocks; // all basic blocks in reverse postorder
189 IdomVector m_idoms; // immediate dominator vector
191 // any tmp that has been spilled has an entry in this array with
192 // the spill-slot number, which is an index into m_slots[]. tmps that
193 // have not spilled have -1.
194 StateVector<SSATmp, int32_t> m_spillSlots;
196 LifetimeInfo m_lifetime; // Internal lifetime state
197 LinearIdVector& m_linear; // linear id for each inst
198 UsesVector& m_uses; // use count of each tmp
200 // the list of native instructions in the trace sorted by instruction ID;
201 // i.e. a filtered list in the same order as visited by m_blocks.
202 smart::list<IRInstruction*> m_natives;
204 // stores pre-coloring hints
205 PreColoringHint m_preColoringHint;
207 // a map from SSATmp* to a list of Jmp_ instructions that have it as
208 // a source.
209 typedef smart::vector<IRInstruction*> JmpList;
210 StateVector<SSATmp, JmpList> m_jmps;
212 RegAllocInfo m_allocInfo; // final allocation for each SSATmp
214 // SSATmps requiring 2 64-bit registers that are eligible for
215 // allocation to a single XMM register
216 boost::dynamic_bitset<> m_fullXMMCandidates;
219 static_assert(kReservedRSPSpillSpace == NumPreAllocatedSpillLocs * sizeof(void*),
220 "kReservedRSPSpillSpace changes require updates in "
221 "LinearScan");
223 // The dst of IncRef, Mov, StRef, and StRefNT has the same value
224 // as the src. For analysis purpose, we put them in one equivalence class.
225 // This canonicalize function returns the representative of <tmp>'s
226 // equivalence class. The function computes the representative by
227 // following the dst-src chain.
228 static SSATmp* canonicalize(SSATmp* tmp) {
229 while (true) {
230 IRInstruction* inst = tmp->inst();
231 Opcode opc = inst->op();
232 // The dst of IncRef, Mov, StRef, and StRefNT has the same value
233 // as the src.
234 // We follow these instructions to canonicalize an SSATmp.
235 switch (opc) {
236 case IncRef:
237 case Mov:
238 case StRef:
239 case StRefNT:
240 tmp = inst->src(0);
241 break;
243 default:
244 return tmp;
249 void LinearScan::StateSave::save(LinearScan* ls) {
250 std::copy(ls->m_regs, ls->m_regs + NumRegs, m_regs);
253 void LinearScan::StateSave::restore(LinearScan* ls) {
254 ls->m_allocatedRegs.clear();
255 for (int i = 0; i < PhysReg::kNumTypes; i++) {
256 ls->m_freeCalleeSaved[i].clear();
257 ls->m_freeCallerSaved[i].clear();
260 for (size_t i = 0; i < NumRegs; i++) {
261 ls->m_regs[i] = m_regs[i];
262 RegState* reg = &ls->m_regs[i];
263 if (reg->isReserved()) continue;
264 if (reg->isAllocated()) {
265 SSATmp* tmp = reg->m_ssaTmp;
266 for (int r = 0; r < ls->m_allocInfo[tmp].numAllocatedRegs(); r++) {
267 if (ls->m_allocInfo[tmp].reg(r) == PhysReg(i)) {
268 ls->assignRegToTmp(reg, tmp, r);
271 } else {
272 ls->pushFreeReg(reg);
277 LinearScan::LinearScan(IRFactory* irFactory)
278 : m_irFactory(irFactory)
279 , m_spillSlots(irFactory, -1)
280 , m_lifetime(irFactory)
281 , m_linear(m_lifetime.linear)
282 , m_uses(m_lifetime.uses)
283 , m_jmps(irFactory, JmpList())
284 , m_allocInfo(irFactory)
285 , m_fullXMMCandidates(irFactory->numTmps())
287 for (int i = 0; i < kNumRegs; i++) {
288 m_regs[i].m_ssaTmp = nullptr;
289 m_regs[i].m_reg = PhysReg(i);
290 m_regs[i].m_pinned = false;
291 m_regs[i].m_reserved = false;
294 // Mark reserved regs.
295 m_regs[int(PhysReg(rVmSp))] .m_reserved = true;
296 m_regs[int(PhysReg(rsp))] .m_reserved = true;
297 m_regs[int(PhysReg(rVmFp))] .m_reserved = true;
298 m_regs[int(PhysReg(rAsm))] .m_reserved = true;
299 m_regs[int(PhysReg(rVmTl))] .m_reserved = true;
300 m_regs[int(PhysReg(rCgGP))] .m_reserved = true;
301 m_regs[int(PhysReg(rCgXMM0))].m_reserved = true;
302 m_regs[int(PhysReg(rCgXMM1))].m_reserved = true;
304 // Reserve extra regs for testing purpose.
305 uint32_t numFreeRegs = RuntimeOption::EvalHHIRNumFreeRegs;
306 for (int i = kNumRegs - 1; i >= 0; i--) {
307 if (!m_regs[i].m_reserved) {
308 if (numFreeRegs == 0) {
309 m_regs[i].m_reserved = true;
310 } else {
311 --numFreeRegs;
317 PhysReg::Type LinearScan::getRegType(const SSATmp* tmp, int locIdx) const {
318 if (!RuntimeOption::EvalHHIRAllocXMMRegs) return PhysReg::GP;
320 // If we're selecting a register for the type, it means this SSATmp
321 // didn't get it's value allocated to a XMM register, which
322 // otherwise would store the type too.
323 if (locIdx == 1) return PhysReg::GP;
325 if (tmp->isA(Type::Dbl)) return PhysReg::XMM;
327 if (packed_tv) return PhysReg::GP;
329 DEBUG_ONLY Type tmpType = tmp->type();
331 uint32_t tmpId = tmp->id();
333 if (tmp->inst()->op() == Reload) {
334 // We don't have an entry for reloaded SSATmps in
335 // m_fullXMMCandidates, since they're inserted after this set is
336 // computed. So we approximate this property for the reloaded
337 // SSATmp using the original SSATmp that was spilled. In other
338 // words, if the original SSATmp was a candidate to be allocated
339 // to a full XMM register, then so is the reloaded SSATmp. This
340 // might be a bit conservative, but avoids recomputing the analysis.
341 auto* reload = tmp->inst();
342 auto* spill = reload->src(0)->inst();
343 tmpId = spill->src(0)->id();
346 if (m_fullXMMCandidates[tmpId]) {
347 FTRACE(6,
348 "getRegType(SSATmp {} : {}): it's a candidate for full XMM register\n",
349 tmpId, tmpType.toString());
350 FTRACE(6,
351 "getRegType(SSATmp {}): crossNative = {} ; # freeCalleeSaved[GP] = {}\n",
352 tmpId, crossNativeCall(tmp), m_freeCalleeSaved[PhysReg::GP].size());
354 // Note that there are no callee-saved XMM registers in the x64
355 // ABI. So, if tmp crosses native calls and there are 2 free GP
356 // callee-saved registers, then allocate tmp to GP registers.
357 if (crossNativeCall(tmp) && m_freeCalleeSaved[PhysReg::GP].size() >= 2) {
358 return PhysReg::GP;
360 return PhysReg::XMM;
362 return PhysReg::GP;
365 void LinearScan::allocRegToInstruction(InstructionList::iterator it) {
366 IRInstruction* inst = &*it;
367 dumpIR<IRInstruction, kExtraLevel>(inst, "allocating to instruction");
369 // Reload all source operands if necessary.
370 // Mark registers as unpinned.
371 for (int regNo = 0; regNo < kNumRegs; ++regNo) {
372 m_regs[regNo].m_pinned = false;
374 smart::vector<bool> needsReloading(inst->numSrcs(), true);
375 for (uint32_t i = 0; i < inst->numSrcs(); ++i) {
376 SSATmp* tmp = inst->src(i);
377 int32_t slotId = m_spillSlots[tmp];
378 if (slotId == -1) {
379 needsReloading[i] = false;
380 } else if ((tmp = m_slots[slotId].latestReload)) {
381 needsReloading[i] = false;
382 inst->setSrc(i, tmp);
384 if (!needsReloading[i]) {
385 for (int i = 0, n = m_allocInfo[tmp].numAllocatedRegs(); i < n; ++i) {
386 m_regs[int(m_allocInfo[tmp].reg(i))].m_pinned = true;
390 for (uint32_t i = 0; i < inst->numSrcs(); ++i) {
391 if (needsReloading[i]) {
392 SSATmp* tmp = inst->src(i);
393 int32_t slotId = m_spillSlots[tmp];
394 // <tmp> is spilled, and not reloaded.
395 // Therefore, We need to reload the value into a new SSATmp.
397 // Insert the Reload instruction.
398 SSATmp* spillTmp = m_slots[slotId].spillTmp;
399 IRInstruction* reload = m_irFactory->gen(Reload, inst->marker(),
400 spillTmp);
401 inst->block()->insert(it, reload);
403 // Create <reloadTmp> which inherits <tmp>'s slot ID and
404 // <spillTmp>'s last use ID.
405 // Replace <tmp> with <reloadTmp> in <inst>.
406 SSATmp* reloadTmp = reload->dst();
407 m_uses[reloadTmp].lastUse = m_uses[spillTmp].lastUse;
408 m_spillSlots[reloadTmp] = slotId;
409 inst->setSrc(i, reloadTmp);
410 // reloadTmp and tmp share the same type. Since it was spilled, it
411 // must be using its entire needed-count of registers.
412 assert(reloadTmp->type() == tmp->type());
413 for (int locIndex = 0; locIndex < tmp->numNeededRegs();) {
414 locIndex += allocRegToTmp(reloadTmp, locIndex);
416 // Remember this reload tmp in case we can reuse it in later blocks.
417 m_slots[slotId].latestReload = reloadTmp;
418 dumpIR<IRInstruction, kExtraLevel>(reload, "created reload");
422 freeRegsAtId(m_linear[inst]);
423 // Update next native.
424 if (nextNative() == inst) {
425 assert(!m_natives.empty());
426 m_natives.pop_front();
427 computePreColoringHint();
430 Range<SSATmp*> dsts = inst->dsts();
431 if (dsts.empty()) return;
433 Opcode opc = inst->op();
434 if (opc == DefMIStateBase) {
435 assert(dsts[0].isA(Type::PtrToCell));
436 assignRegToTmp(&m_regs[int(rsp)], &dsts[0], 0);
437 return;
440 for (SSATmp& dst : dsts) {
441 for (int numAllocated = 0, n = dst.numNeededRegs(); numAllocated < n; ) {
442 // LdRaw, loading a generator's embedded AR, is the only time we have a
443 // pointer to an AR that is not in rVmFp.
444 const bool abnormalFramePtr =
445 (opc == LdRaw &&
446 inst->src(1)->getValInt() == RawMemSlot::ContARPtr);
448 // Note that the point of StashGeneratorSP is to save a StkPtr
449 // somewhere other than rVmSp. (TODO(#2288359): make rbx not
450 // special.)
451 const bool abnormalStkPtr = opc == StashGeneratorSP;
453 if (!abnormalStkPtr && dst.isA(Type::StkPtr)) {
454 assert(opc == DefSP ||
455 opc == ReDefSP ||
456 opc == ReDefGeneratorSP ||
457 opc == Call ||
458 opc == CallArray ||
459 opc == SpillStack ||
460 opc == SpillFrame ||
461 opc == CufIterSpillFrame ||
462 opc == ExceptionBarrier ||
463 opc == RetAdjustStack ||
464 opc == InterpOne ||
465 opc == GenericRetDecRefs ||
466 opc == CheckStk ||
467 opc == GuardStk ||
468 opc == AssertStk ||
469 opc == CastStk ||
470 opc == CoerceStk ||
471 opc == SideExitGuardStk ||
472 VectorEffects::supported(opc));
473 assignRegToTmp(&m_regs[int(rVmSp)], &dst, 0);
474 numAllocated++;
475 continue;
477 if (!abnormalFramePtr && dst.isA(Type::FramePtr)) {
478 assert(opc == DefFP || opc == FreeActRec || opc == DefInlineFP);
479 assignRegToTmp(&m_regs[int(rVmFp)], &dst, 0);
480 numAllocated++;
481 continue;
484 // Generally speaking, StkPtrs are pretty special due to
485 // tracelet ABI registers. Keep track here of the allowed uses
486 // that don't use the above allocation.
487 assert(!dst.isA(Type::FramePtr) || abnormalFramePtr);
488 assert(!dst.isA(Type::StkPtr) || abnormalStkPtr);
490 if (!RuntimeOption::EvalHHIRDeadCodeElim || m_uses[dst].lastUse != 0) {
491 numAllocated += allocRegToTmp(&dst, numAllocated);
492 } else {
493 numAllocated++;
497 if (!RuntimeOption::EvalHHIRDeadCodeElim) {
498 // if any outputs were unused, free regs now.
499 freeRegsAtId(m_linear[inst]);
503 bool LinearScan::crossNativeCall(const SSATmp* tmp) const {
504 return m_uses[tmp].lastUse > nextNativeId();
508 * Allocates a register to ssaTmp's index component (0 for value, 1 for type).
509 * Returns the number of 64-bit register-space allocated. This is normally 1,
510 * but it's 2 when both the type and value need registers and they're allocated
511 * together to one 128-bit XMM register.
513 int LinearScan::allocRegToTmp(SSATmp* ssaTmp, uint32_t index) {
514 bool preferCallerSaved = true;
515 PhysReg::Type regType = getRegType(ssaTmp, index);
516 FTRACE(6, "getRegType(SSATmp {}, {}) = {}\n", ssaTmp->id(),
517 index, int(regType));
518 assert(regType == PhysReg::GP || index == 0); // no type-only in XMM regs
520 if (RuntimeOption::EvalHHIREnableCalleeSavedOpt) {
521 preferCallerSaved = !crossNativeCall(ssaTmp);
524 RegState* reg = nullptr;
525 if (!preferCallerSaved) {
526 reg = getFreeReg(regType, false);
527 if (reg->isCallerSaved()) {
528 // If we are out of callee-saved registers, fall into the logic of
529 // assigning a caller-saved register.
530 pushFreeReg(reg);
531 // getFreeReg pins the reg. Need restore it here.
532 reg->m_pinned = false;
533 reg = nullptr;
536 if (reg == nullptr && RuntimeOption::EvalHHIREnablePreColoring) {
537 // Pre-colors ssaTmp if it's used as an argument of next native.
538 // Search for the original tmp instead of <ssaTmp> itself, because
539 // the pre-coloring hint is not aware of reloaded tmps.
540 SSATmp* orig = getOrigTmp(ssaTmp);
541 RegNumber targetRegNo =
542 m_preColoringHint.getPreColoringReg(orig, index);
543 if (targetRegNo == reg::noreg) {
544 targetRegNo = getJmpPreColor(orig, index, orig != ssaTmp);
546 if (targetRegNo == reg::noreg && ssaTmp->inst()->op() == AssertType) {
547 targetRegNo = m_allocInfo[ssaTmp->inst()->src(0)].reg(index);
549 if (targetRegNo != reg::noreg) {
550 reg = getReg(&m_regs[int(targetRegNo)]);
553 if (reg == nullptr &&
554 RuntimeOption::EvalHHIREnablePreColoring &&
555 ssaTmp->inst()->isNative()) {
556 // Pre-colors ssaTmp if it's the return value of a native.
557 if (index == 0) {
558 reg = getReg(&m_regs[int(rax)]);
559 } else if (index == 1) {
560 reg = getReg(&m_regs[int(rdx)]);
561 } else {
562 not_reached();
565 if (reg == nullptr) {
566 // No pre-coloring for this tmp.
567 // Pick a regular caller-saved reg.
568 reg = getFreeReg(regType, true);
571 assert(reg);
572 if (!preferCallerSaved && reg->isCallerSaved()) {
573 // ssaTmp spans native, but we failed to find a free callee-saved reg.
574 // We eagerly add a spill ssaTmp, and update ssaTmp's live range
575 // to end with next native, because we know we have to spill it at
576 // the next native.
577 // Setting the last use ID to the next native is conservative.
578 // Setting it to the last use before the next native would be more precise,
579 // but that would be more expensive to compute.
580 if (m_spillSlots[ssaTmp] == -1) {
581 createSpillSlot(ssaTmp);
583 m_uses[ssaTmp].lastUse = nextNativeId();
586 assignRegToTmp(reg, ssaTmp, index);
588 if (m_allocInfo[ssaTmp].isFullXMM()) {
589 // Type and value allocated together to a single XMM register
590 return 2;
592 return 1;
595 void LinearScan::assignRegToTmp(RegState* reg, SSATmp* ssaTmp, uint32_t index) {
596 reg->m_ssaTmp = ssaTmp;
597 // mark inst as using this register
598 if (ssaTmp->numNeededRegs() == 2 && reg->type() == PhysReg::XMM) {
599 assert(index == 0);
600 m_allocInfo[ssaTmp].setRegFullXMM(reg->m_reg);
601 } else {
602 m_allocInfo[ssaTmp].setReg(reg->m_reg, index);
604 uint32_t lastUseId = m_uses[ssaTmp].lastUse;
605 if (reg->isReserved()) {
606 return;
608 // insert into the list of assigned registers sorted by last use id
609 auto it = m_allocatedRegs.begin();
610 for (; it != m_allocatedRegs.end(); ++it) {
611 if (lastUseId > m_uses[(*it)->m_ssaTmp].lastUse) {
612 break;
615 reg->m_pos = m_allocatedRegs.insert(it, reg);
618 class SpillLocManager {
619 public:
620 explicit SpillLocManager(uint32_t startSpillLoc) :
621 m_nextSpillLoc(startSpillLoc) { }
624 * Allocates a new spill location.
626 SpillInfo allocSpillLoc() {
627 return SpillInfo(m_nextSpillLoc++);
630 void alignTo16Bytes() {
631 SpillInfo spillLoc(m_nextSpillLoc);
632 if (spillLoc.offset() % 16 != 0) {
633 spillLoc = SpillInfo(++m_nextSpillLoc);
635 assert(spillLoc.offset() % 16 == 0);
638 uint32_t getNumSpillLocs() const {
639 return m_nextSpillLoc;
642 void setNextSpillLoc(uint32_t nextSpillLoc) {
643 m_nextSpillLoc = nextSpillLoc;
646 private:
647 uint32_t m_nextSpillLoc;
650 // Assign spill location numbers to Spill/Reload.
651 uint32_t LinearScan::assignSpillLoc() {
652 uint32_t maxSpillLoc = 0;
653 SpillLocManager spillLocManager(0);
655 // visit blocks in reverse postorder and instructions in forward order,
656 // assigning a spill slot id to each Spill. We don't reuse slot id's,
657 // but both could be reused either by visiting the dominator tree in
658 // preorder or by analyzing lifetimes and reusing id/registers between
659 // non-conflicting spills.
660 // As an intermediate step, re-use id's for exit traces
662 smart::map<Block*, uint32_t> exitLocMap;
664 for (Block* block : m_blocks) {
665 auto it = exitLocMap.find(block);
666 if (it != exitLocMap.end()) {
667 spillLocManager.setNextSpillLoc(it->second);
669 for (IRInstruction& inst : *block) {
670 if (nextNative() == &inst) {
671 assert(!m_natives.empty());
672 m_natives.pop_front();
674 if (inst.op() == Spill) {
675 SSATmp* dst = inst.dst();
676 SSATmp* src = inst.src(0);
677 for (int locIndex = 0;
678 locIndex < src->numNeededRegs();
679 ++locIndex) {
680 if (!crossNativeCall(dst)) {
681 TRACE(3, "[counter] 1 spill a tmp that does not span native\n");
682 } else {
683 TRACE(3, "[counter] 1 spill a tmp that spans native\n");
686 // SSATmps with 2 regs are aligned to 16 bytes because they may be
687 // allocated to XMM registers, either before or after being reloaded
688 if (src->numNeededRegs() == 2 && locIndex == 0) {
689 spillLocManager.alignTo16Bytes();
691 SpillInfo spillLoc = spillLocManager.allocSpillLoc();
692 m_allocInfo[dst].setSpillInfo(locIndex, spillLoc);
694 if (m_allocInfo[src].isFullXMM()) {
695 // Allocate the next, consecutive spill slot for this SSATmp too
696 assert(locIndex == 0);
697 assert(spillLoc.offset() % 16 == 0);
698 spillLoc = spillLocManager.allocSpillLoc();
699 m_allocInfo[dst].setSpillInfo(locIndex + 1, spillLoc);
700 break;
704 if (inst.op() == Reload) {
705 SSATmp* src = inst.src(0);
706 for (int locIndex = 0;
707 locIndex < src->numNeededRegs();
708 ++locIndex) {
709 TRACE(3, "[counter] reload\n");
713 uint32_t totalSpillLocs = spillLocManager.getNumSpillLocs();
714 if (totalSpillLocs > maxSpillLoc) maxSpillLoc = totalSpillLocs;
715 if (block->trace()->isMain()) {
716 if (Block* taken = block->taken()) {
717 if (!taken->trace()->isMain()) {
718 exitLocMap[taken] = totalSpillLocs;
723 return maxSpillLoc;
726 void LinearScan::collectInfo(BlockList::iterator it, IRTrace* trace) {
727 m_natives.clear();
728 m_uses.reset(); // TODO(#2536764): serious time sink
730 while (it != m_blocks.end()) {
731 Block* block = *it++;
732 bool offTrace = block->trace() != trace;
733 if (offTrace) {
734 if (!trace->isMain()) return;
735 int lastId = block->trace()->data();
736 for (IRInstruction& inst : *block) {
737 for (auto* src : inst.srcs()) {
738 if (lastId > m_uses[src].lastUse) {
739 m_uses[src].lastUse = lastId;
743 } else {
744 for (IRInstruction& inst : *block) {
745 for (auto* src : inst.srcs()) {
746 m_uses[src].lastUse = m_linear[inst];
748 if (inst.isNative()) m_natives.push_back(&inst);
751 IRInstruction* jmp = block->back();
752 if (jmp->op() == Jmp_ && jmp->numSrcs() != 0) {
753 for (SSATmp* src : jmp->srcs()) {
754 m_jmps[src].push_back(jmp);
761 void LinearScan::computePreColoringHint() {
762 m_preColoringHint.clear();
763 IRInstruction* inst = nextNative();
764 if (inst == nullptr) {
765 return;
768 Opcode opc = inst->op();
769 using namespace NativeCalls;
770 if (CallMap::hasInfo(opc)) {
771 unsigned reg = 0;
772 for (auto const& arg : CallMap::info(opc).args) {
773 switch (arg.type) {
774 case SSA:
775 m_preColoringHint.add(inst->src(arg.srcIdx), 0, reg++);
776 break;
777 case TV:
778 case VecKeyS:
779 case VecKeyIS:
780 m_preColoringHint.add(inst->src(arg.srcIdx), 0, reg++);
781 m_preColoringHint.add(inst->src(arg.srcIdx), 1, reg++);
782 break;
785 return;
788 // For instructions that want to hint a continuous increasing range
789 // of sources to a continuous increasing range of argument
790 // registers.
791 auto normalHint = [&](int count, int srcBase, int argBase) {
792 for (int i = 0; i < count; ++i) {
793 m_preColoringHint.add(inst->src(i + srcBase), 0,
794 i + argBase);
797 switch (opc) {
798 case LdFunc:
799 m_preColoringHint.add(inst->src(0), 0, 1);
800 break;
801 case NativeImpl:
802 m_preColoringHint.add(inst->src(1), 0, 0);
803 break;
804 case Concat:
806 Type lType = inst->src(0)->type();
807 Type rType = inst->src(1)->type();
808 if ((lType.isString() && rType.isString()) ||
809 (lType.isString() && rType == Type::Int) ||
810 (lType == Type::Int && rType.isString())) {
811 m_preColoringHint.add(inst->src(0), 0, 0);
812 m_preColoringHint.add(inst->src(1), 0, 1);
813 } else {
814 m_preColoringHint.add(inst->src(0), 0, 1);
815 m_preColoringHint.add(inst->src(1), 0, 3);
818 break;
819 case AKExists:
820 normalHint(2, 0, 0);
821 break;
822 case OpEq:
823 case OpNeq:
824 case OpSame:
825 case OpNSame:
827 auto src1 = inst->src(0);
828 auto src2 = inst->src(1);
830 auto type1 = src1->type();
831 auto type2 = src2->type();
833 if ((type1.isArray() && type2.isArray())
834 || (type1.isString() && type2.isString())
835 || (type1.isString() && !src1->isConst())
836 || (type1 == Type::Obj && type2 == Type::Obj)) {
837 m_preColoringHint.add(src1, 0, 0);
838 m_preColoringHint.add(src2, 0, 1);
841 break;
842 case IterInit:
843 case WIterInit:
845 m_preColoringHint.add(inst->src(0), 0, 1);
847 break;
848 case InstanceOf:
849 normalHint(2, 0, 0);
850 break;
851 case LdSSwitchDestFast:
852 normalHint(1, 0, 0);
853 break;
854 case LdSSwitchDestSlow:
855 normalHint(1, 0, 0);
856 break;
857 case LdGblAddr:
858 case LdGblAddrDef:
859 normalHint(1, 0, 0);
860 break;
861 case LdClsPropAddr:
862 normalHint(3, 0, 0);
863 break;
864 case LdCls:
865 m_preColoringHint.add(inst->src(0), 0, 1);
866 break;
867 case BoxPtr:
868 normalHint(1, 0, 0);
869 break;
870 default:
871 break;
875 // Given a label, dest index for that label, and register index, scan
876 // the sources of all incoming Jmp_s to see if any have a register
877 // allocated at the specified index.
878 static RegNumber findLabelSrcReg(const RegAllocInfo& regs, IRInstruction* label,
879 unsigned dstIdx, uint32_t regIndex) {
880 assert(label->op() == DefLabel);
881 SSATmp* withReg = label->block()->findSrc(dstIdx, [&](SSATmp* src) {
882 return regs[src].reg(regIndex) != InvalidReg &&
883 src->inst()->block()->hint() != Block::Hint::Unlikely;
885 return withReg ? regs[withReg].reg(regIndex) : reg::noreg;
888 // This function attempts to find a pre-coloring hint from two
889 // different sources: If tmp comes from a DefLabel, it will scan up to
890 // the SSATmps providing values to incoming Jmp_s to look for a
891 // hint. If tmp is consumed by a Jmp_, look for other incoming Jmp_s
892 // to its destination and see if any of them have already been given a
893 // register. If all of these fail, let normal register allocation
894 // proceed unhinted.
895 RegNumber LinearScan::getJmpPreColor(SSATmp* tmp, uint32_t regIndex,
896 bool isReload) {
897 IRInstruction* srcInst = tmp->inst();
898 const JmpList& jmps = m_jmps[tmp];
899 if (isReload && (srcInst->op() == DefLabel || !jmps.empty())) {
900 // If we're precoloring a Reload of a temp that we'd normally find
901 // a hint for, just return the register allocated to the spilled
902 // temp.
903 auto reg = m_allocInfo[tmp].reg(regIndex);
904 assert(reg != reg::noreg);
905 return reg;
908 if (srcInst->op() == DefLabel) {
909 // Figure out which dst of the label is tmp
910 for (unsigned i = 0, n = srcInst->numDsts(); i < n; ++i) {
911 if (srcInst->dst(i) == tmp) {
912 auto reg = findLabelSrcReg(m_allocInfo, srcInst, i, regIndex);
913 // Until we handle loops, it's a bug to try and allocate a
914 // register to a DefLabel's dest before all of its incoming
915 // Jmp_s have had their srcs allocated, unless the incoming
916 // block is unreachable.
917 const DEBUG_ONLY bool unreachable =
918 std::find(m_blocks.begin(), m_blocks.end(),
919 srcInst->block()) == m_blocks.end();
920 always_assert(reg != reg::noreg || unreachable);
921 return reg;
924 not_reached();
927 // If srcInst wasn't a label, check if tmp is used by any Jmp_
928 // instructions. If it is, trace to the Jmp_'s label and use the
929 // same procedure as above.
930 for (unsigned ji = 0, jn = jmps.size(); ji < jn; ++ji) {
931 IRInstruction* jmp = jmps[ji];
932 IRInstruction* label = jmp->taken()->front();
934 // Figure out which src of the Jmp_ is tmp
935 for (unsigned si = 0, sn = jmp->numSrcs(); si < sn; ++si) {
936 SSATmp* src = jmp->src(si);
937 if (tmp == src) {
938 // For now, a DefLabel should never have a register assigned
939 // to it before any of its incoming Jmp_ instructions.
940 always_assert(m_allocInfo[label->dst(si)].reg(regIndex) ==
941 reg::noreg);
942 auto reg = findLabelSrcReg(m_allocInfo, label, si, regIndex);
943 if (reg != reg::noreg) return reg;
948 return reg::noreg;
951 // Create the initial free list.
952 // It must be called after computePreColoringHint, because the order of
953 // caller-saved regs depends on pre-coloring hints.
954 void LinearScan::initFreeList() {
955 // reserve extra regs for testing purpose.
956 for (int i = kNumRegs - 1; i >= 0; i--) {
957 if (!m_regs[i].m_reserved) {
958 pushFreeReg(&m_regs[i]);
963 void LinearScan::coalesce(IRTrace* trace) {
964 forEachTraceInst(trace, [](IRInstruction* inst) {
965 for (uint32_t i = 0; i < inst->numSrcs(); ++i) {
966 SSATmp* src = inst->src(i);
967 SSATmp* origSrc = canonicalize(src);
968 if (origSrc != src) {
969 // Replace every operand with its canonicalized version.
970 inst->setSrc(i, origSrc);
976 // Assign ids to each instruction in linear order.
977 void LinearScan::numberInstructions(const BlockList& blocks) {
978 m_spillSlots.reset();
979 m_uses.reset();
980 uint32_t nextId = 1;
981 for (auto* block : blocks) {
982 for (auto& inst : *block) {
983 uint32_t id = nextId++;
984 m_linear[inst] = id;
985 for (SSATmp* tmp : inst.srcs()) {
986 m_uses[tmp].lastUse = id;
987 m_uses[tmp].count++;
990 if (block->taken() && block->isMain() && !block->taken()->isMain()) {
991 // reserve a spot for the lastUseId when we're processing the main
992 // trace, if the last use is really in an exit trace.
993 block->taken()->trace()->setData(nextId++);
998 void LinearScan::genSpillStats(IRTrace* trace, int numSpillLocs) {
999 if (!moduleEnabled(HPHP::Trace::statgroups, 1)) return;
1000 static bool enabled = getenv("HHVM_STATS_SPILLS");
1001 if (!enabled) return;
1003 int numMainSpills = 0;
1004 int numExitSpills = 0;
1005 int numMainReloads = 0;
1006 int numExitReloads = 0;
1007 forEachInst(
1008 m_blocks,
1009 [&](IRInstruction* inst) {
1010 if (inst->op() == Spill) {
1011 if (inst->block()->isMain()) {
1012 numMainSpills++;
1013 } else {
1014 numExitSpills++;
1016 } else if (inst->op() == Reload) {
1017 if (inst->block()->isMain()) {
1018 numMainReloads++;
1019 } else {
1020 numExitReloads++;
1026 static StringData* spillStats = StringData::GetStaticString("SpillStats");
1027 static StringData* mainSpills = StringData::GetStaticString("MainSpills");
1028 static StringData* mainReloads = StringData::GetStaticString("MainReloads");
1029 static StringData* exitSpills = StringData::GetStaticString("ExitSpills");
1030 static StringData* exitReloads = StringData::GetStaticString("ExitReloads");
1031 static StringData* spillSpace = StringData::GetStaticString("SpillSpace");
1033 auto const marker = trace->front()->front()->marker();
1034 auto addStat = [&](const StringData* key, int value) {
1035 trace->front()->prepend(m_irFactory->gen(IncStatGrouped, marker,
1036 cns(spillStats), cns(key),
1037 cns(value)));
1039 addStat(mainSpills, numMainSpills);
1040 addStat(mainReloads, numMainReloads);
1041 addStat(exitSpills, numExitSpills);
1042 addStat(exitReloads, numExitReloads);
1043 addStat(spillSpace, numSpillLocs);
1047 * Finds the set of SSATmps that should be considered for allocation
1048 * to a full XMM register. These are the SSATmps that satisfy all the
1049 * following conditions:
1050 * a) it requires 2 64-bit registers
1051 * b) it's defined in a load instruction
1052 * c) all its uses are simple stores to memory
1054 * The computed set of SSATmps is stored in m_fullXMMCandidates.
1056 void LinearScan::findFullXMMCandidates() {
1057 boost::dynamic_bitset<> notCandidates(m_irFactory->numTmps());
1058 m_fullXMMCandidates.reset();
1059 for (auto* block : m_blocks) {
1060 for (auto& inst : *block) {
1061 for (SSATmp& tmp : inst.dsts()) {
1062 if (tmp.numNeededRegs() == 2 && inst.isLoad()) {
1063 m_fullXMMCandidates[tmp.id()] = true;
1066 int idx = 0;
1067 for (SSATmp* tmp : inst.srcs()) {
1068 if (tmp->numNeededRegs() == 2 && !inst.storesCell(idx)) {
1069 notCandidates[tmp->id()] = true;
1071 idx++;
1075 m_fullXMMCandidates -= notCandidates;
1078 RegAllocInfo LinearScan::allocRegs(IRTrace* trace, LifetimeInfo* lifetime) {
1079 if (RuntimeOption::EvalHHIREnableCoalescing) {
1080 // <coalesce> doesn't need instruction numbering.
1081 coalesce(trace);
1084 m_blocks = rpoSortCfg(trace, *m_irFactory);
1085 m_idoms = findDominators(m_blocks);
1087 if (!packed_tv) {
1088 findFullXMMCandidates();
1091 allocRegsToTrace();
1093 numberInstructions(m_blocks);
1095 // Make sure rsp is 16-aligned.
1096 uint32_t numSpillLocs = assignSpillLoc();
1097 if (numSpillLocs % 2) {
1098 static_assert(NumPreAllocatedSpillLocs % 2 == 0, "");
1099 ++numSpillLocs;
1101 if (numSpillLocs > (uint32_t)NumPreAllocatedSpillLocs) {
1102 PUNT(LinearScan_TooManySpills);
1105 if (m_slots.size()) genSpillStats(trace, numSpillLocs);
1107 if (lifetime) {
1108 lifetime->linear = std::move(m_linear);
1109 lifetime->uses = std::move(m_uses);
1111 return m_allocInfo;
1114 void LinearScan::allocRegsOneTrace(BlockList::iterator& blockIt,
1115 ExitTraceMap& etm) {
1116 auto const trace = (*blockIt)->trace();
1118 collectInfo(blockIt, trace);
1119 computePreColoringHint();
1121 auto v = etm.find(*blockIt);
1122 if (v != etm.end()) {
1123 assert(!trace->isMain());
1124 v->second.restore(this);
1125 } else {
1126 assert(blockIt == m_blocks.begin() && trace->isMain());
1127 initFreeList();
1130 // First, visit every instruction, allocating registers as we go,
1131 // and inserting Reload instructions where necessary.
1132 bool isMain = trace->isMain();
1133 size_t sz = m_slots.size();
1134 while (blockIt != m_blocks.end()) {
1135 Block* block = *blockIt;
1136 if (block->trace() != trace) {
1137 if (!isMain) {
1138 break;
1139 } else {
1140 ++blockIt;
1141 continue;
1144 FTRACE(5, "Block{}: {} ({})\n",
1145 trace->isMain() ? "" : " (exit trace)",
1146 (*blockIt)->id(), (*blockIt)->postId());
1148 // clear remembered reloads that don't dominate this block
1149 for (SlotInfo& slot : m_slots) {
1150 if (SSATmp* reload = slot.latestReload) {
1151 if (!dominates(reload->inst()->block(), block, m_idoms)) {
1152 slot.latestReload = nullptr;
1156 for (auto it = block->begin(), end = block->end(); it != end; ++it) {
1157 allocRegToInstruction(it);
1158 dumpIR<IRInstruction, kExtraLevel>(&*it, "allocated to instruction ");
1160 if (isMain) {
1161 assert(block->trace()->isMain());
1162 if (block->taken() &&
1163 !block->taken()->trace()->isMain()) {
1164 etm[block->taken()].save(this);
1167 ++blockIt;
1170 // Now that we have visited all instructions on this trace,
1171 // and inserted Reloads for SSATmps which needed to be spilled,
1172 // we can go back and insert the spills.
1173 // On the main trace, insert the spill right after the instruction
1174 // that generated the value (without traversing everything else).
1175 // On exit traces, if the instruction that generated the value
1176 // is on the main trace, insert the spill at the start of the trace,
1177 // otherwise, after the instruction that generated the value
1178 size_t begin = sz;
1179 size_t end = m_slots.size();
1181 while (begin < end) {
1182 SlotInfo& slot = m_slots[begin++];
1183 IRInstruction* spill = slot.spillTmp->inst();
1184 IRInstruction* inst = spill->src(0)->inst();
1185 Block* block = inst->block();
1186 if (!isMain && block->trace()->isMain()) {
1187 // We're on an exit trace, but the def is on the
1188 // main trace, so put it at the start of this trace
1189 if (spill->block()) {
1190 // its already been inserted in another exit trace
1191 assert(!spill->block()->trace()->isMain());
1192 spill = spill->clone(m_irFactory);
1194 block->trace()->front()->prepend(spill);
1195 } else if (inst->isBlockEnd()) {
1196 block->next()->prepend(spill);
1197 } else {
1198 auto pos = block->iteratorTo(inst);
1199 block->insert(++pos, spill);
1204 void LinearScan::allocRegsToTrace() {
1205 ExitTraceMap etm;
1207 numberInstructions(m_blocks);
1209 if (HPHP::Trace::moduleEnabled(HPHP::Trace::hhir, 5)) {
1210 std::stringstream s;
1211 s << "RPO: ";
1212 for (auto& b : m_blocks) {
1213 s << folly::format("{}{} ",
1214 b->isMain() ? "M" : "E",
1215 b->id());
1217 s << "\n";
1218 HPHP::Trace::traceRelease("%s\n", s.str().c_str());
1221 BlockList::iterator it = m_blocks.begin();
1222 while (it != m_blocks.end()) {
1223 allocRegsOneTrace(it, etm);
1226 for (it = m_blocks.begin(); it != m_blocks.end();) {
1227 if ((*it)->isMain()) {
1228 ++it;
1229 continue;
1231 allocRegsOneTrace(it, etm);
1235 void LinearScan::freeRegsAtId(uint32_t id) {
1236 // free all registers whose lifetime ends at this id
1237 // Note that we free registers before we allocate a register
1238 // to this instruction, so we have to be careful to finish using
1239 // a register before over-writing it.
1240 for (auto it = m_allocatedRegs.begin(); it != m_allocatedRegs.end(); ) {
1241 auto next = it; ++next;
1242 RegState* reg = *it;
1243 assert(reg->m_ssaTmp);
1244 if (m_uses[reg->m_ssaTmp].lastUse <= id) {
1245 m_allocatedRegs.erase(it);
1246 freeReg(reg);
1248 it = next;
1252 // Try to get a specific register.
1253 // Returns NULL if <reg> is not in the free list;
1254 // otherwise, return <reg> and remove it from the free list.
1255 LinearScan::RegState* LinearScan::getReg(RegState* reg) {
1256 if (reg->isReserved() || reg->isAllocated()) {
1257 return nullptr;
1259 auto type = reg->type();
1260 auto& freeList = (reg->isCallerSaved() ?
1261 m_freeCallerSaved[type] : m_freeCalleeSaved[type]);
1262 freeList.erase(reg->m_pos);
1263 // Pin it so that other operands in the same instruction will not reuse it.
1264 reg->m_pinned = true;
1265 return reg;
1268 LinearScan::RegState* LinearScan::getFreeReg(PhysReg::Type type,
1269 bool preferCallerSaved) {
1270 if (m_freeCallerSaved[type].empty() && m_freeCalleeSaved[type].empty()) {
1271 assert(!m_allocatedRegs.empty());
1273 // no free registers --> free a register from the allocatedRegs
1274 // Pick the first register in <m_allocatedRegs> that is:
1275 // 1. not used for any source operand in the current instruction, and
1276 // 2. not used for the return address of a function.
1277 auto canSpill = [&] (RegState* reg) {
1278 return !reg->isPinned() && !reg->isRetAddr() && reg->type() == type;
1280 auto pos = std::find_if(m_allocatedRegs.begin(), m_allocatedRegs.end(),
1281 canSpill);
1282 if (pos == m_allocatedRegs.end()) {
1283 PUNT(RegSpill);
1285 spill((*pos)->m_ssaTmp);
1288 smart::list<RegState*>* preferred = nullptr;
1289 smart::list<RegState*>* other = nullptr;
1290 if (preferCallerSaved) {
1291 preferred = &m_freeCallerSaved[type];
1292 other = &m_freeCalleeSaved[type];
1293 } else {
1294 preferred = &m_freeCalleeSaved[type];
1295 other = &m_freeCallerSaved[type];
1298 RegState* theFreeReg = nullptr;
1299 if (!preferred->empty()) {
1300 theFreeReg = popFreeReg(*preferred);
1301 } else {
1302 theFreeReg = popFreeReg(*other);
1304 assert(theFreeReg);
1305 // Pin it so that other operands in the same instruction will not reuse it.
1306 theFreeReg->m_pinned = true;
1307 return theFreeReg;
1310 void LinearScan::freeReg(RegState* reg) {
1311 pushFreeReg(reg);
1312 // The <tmp> shouldn't be reused any more.
1313 SSATmp* tmp = reg->m_ssaTmp;
1314 int32_t slotId = m_spillSlots[tmp];
1315 if (slotId != -1) {
1316 m_slots[slotId].latestReload = nullptr;
1318 reg->m_ssaTmp = nullptr;
1321 void LinearScan::pushFreeReg(RegState* reg) {
1322 PhysReg::Type type = reg->type();
1323 auto& freeList = (reg->isCallerSaved() ?
1324 m_freeCallerSaved[type] : m_freeCalleeSaved[type]);
1325 // If next native is going to use <reg>, put <reg> to the back of the
1326 // queue so that it's unlikely to be misused by irrelevant tmps.
1327 if (RuntimeOption::EvalHHIREnablePreColoring &&
1328 type == PhysReg::GP &&
1329 (reg->m_reg == PhysReg(rax) || m_preColoringHint.preColorsTmp(reg))) {
1330 freeList.push_back(reg);
1331 reg->m_pos = (--freeList.end());
1332 } else {
1333 freeList.push_front(reg);
1334 reg->m_pos = freeList.begin();
1338 LinearScan::RegState* LinearScan::popFreeReg(smart::list<RegState*>& freeList) {
1339 if (freeList.empty()) {
1340 return nullptr;
1342 RegState* reg = freeList.front();
1343 freeList.pop_front();
1344 return reg;
1347 void LinearScan::spill(SSATmp* tmp) {
1348 dumpIR<SSATmp, kExtraLevel>(tmp, "spilling");
1349 // If we're spilling, we better actually have registers allocated.
1350 assert(m_allocInfo[tmp].numAllocatedRegs() > 0);
1351 assert(m_allocInfo[tmp].numAllocatedRegs() == tmp->numNeededRegs());
1353 // Free the registers used by <tmp>.
1354 // Need call freeReg and modify <m_allocatedRegs>.
1355 for (auto it = m_allocatedRegs.begin(); it != m_allocatedRegs.end(); ) {
1356 auto next = it; ++next;
1357 RegState* reg = *it;
1358 if (reg->m_ssaTmp == tmp) {
1359 freeReg(reg);
1360 m_allocatedRegs.erase(it);
1362 it = next;
1365 if (m_spillSlots[tmp] == -1) {
1366 // <tmp> hasn't been spilled before.
1367 // We need to create a new spill slot for it.
1368 uint32_t slotId = createSpillSlot(tmp);
1369 // createSpillSlot sets the latest reloaded value of slotId to tmp.
1370 // Here, we need reset this value because tmp is spilled and no longer
1371 // synced with memory.
1372 m_slots[slotId].latestReload = nullptr;
1376 // Create a spill slot for <tmp>.
1377 uint32_t LinearScan::createSpillSlot(SSATmp* tmp) {
1378 uint32_t slotId = m_slots.size();
1379 m_spillSlots[tmp] = slotId;
1380 auto* spillInst = m_irFactory->gen(Spill, tmp->inst()->marker(), tmp);
1381 SSATmp* spillTmp = spillInst->dst();
1382 SlotInfo si;
1383 si.spillTmp = spillTmp;
1384 si.latestReload = tmp;
1385 m_slots.push_back(si);
1386 // The spill slot inherits the last use ID of the spilled tmp.
1387 m_uses[si.spillTmp].lastUse = m_uses[tmp].lastUse;
1388 return slotId;
1391 IRInstruction* LinearScan::nextNative() const {
1392 return m_natives.empty() ? nullptr : m_natives.front();
1395 uint32_t LinearScan::nextNativeId() const {
1396 IRInstruction* next = nextNative();
1397 return next ? m_linear[next] : -1;
1400 SSATmp* LinearScan::getSpilledTmp(SSATmp* tmp) {
1401 assert(tmp->inst()->op() == Reload);
1402 SSATmp* slot = tmp->inst()->src(0);
1403 assert(slot->inst()->op() == Spill);
1404 return slot->inst()->src(0);
1407 // If <tmp> is a reloaded value, follow the spill-reload chain to find
1408 // its source; otherwise, return <tmp> itself.
1409 SSATmp* LinearScan::getOrigTmp(SSATmp* tmp) {
1410 if (tmp->inst()->op() == Reload)
1411 return getSpilledTmp(tmp);
1412 return tmp;
1415 bool LinearScan::PreColoringHint::preColorsTmp(RegState* reg) const {
1416 assert(reg->m_reg.isGP());
1417 return m_preColoredTmps[int(reg->m_reg)].first != nullptr;
1420 // Get the pre-coloring register of (<tmp>, <index>).
1421 // A native call has at most six arguments, so the time complexity is
1422 // not a big problem.
1423 RegNumber LinearScan::PreColoringHint::getPreColoringReg(
1424 SSATmp* tmp, uint32_t index) const {
1425 for (int regNo = 0; regNo < kNumRegs; ++regNo) {
1426 if (m_preColoredTmps[regNo].first == tmp &&
1427 m_preColoredTmps[regNo].second == index) {
1428 assert(regNo < kNumGPRegs);
1429 return (RegNumber)regNo;
1432 return reg::noreg;
1435 void LinearScan::PreColoringHint::clear() {
1436 for (int i = 0; i < kNumRegs; ++i) {
1437 m_preColoredTmps[i].first = nullptr;
1438 m_preColoredTmps[i].second = 0;
1442 // Provide a hint that (<tmp>, <index>) is used as the <argNum>-th arg
1443 // in next native.
1444 void LinearScan::PreColoringHint::add(SSATmp* tmp, uint32_t index, int argNum) {
1445 int reg = int(argNumToRegName[argNum]);
1446 assert(reg >= 0 && reg < kNumGPRegs);
1447 m_preColoredTmps[reg].first = tmp;
1448 m_preColoredTmps[reg].second = index;
1451 //////////////////////////////////////////////////////////////////////
1453 RegAllocInfo allocRegsForTrace(IRTrace* trace, IRFactory* irFactory,
1454 LifetimeInfo* lifetime) {
1455 return LinearScan(irFactory).allocRegs(trace, lifetime);
1458 }} // HPHP::JIT