1 //===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
11 // register allocator for LLVM. This allocator works by constructing a PBQP
12 // problem representing the register allocation problem under consideration,
13 // solving this using a PBQP solver, and mapping the solution back to a
14 // register assignment. If any variables are selected for spilling then spill
15 // code is inserted and the process repeated.
17 // The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
18 // for register allocation. For more information on PBQP for register
19 // allocation, see the following papers:
21 // (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
22 // PBQP. In Proceedings of the 7th Joint Modular Languages Conference
23 // (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
25 // (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
26 // architectures. In Proceedings of the Joint Conference on Languages,
27 // Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
30 //===----------------------------------------------------------------------===//
32 #include "llvm/CodeGen/RegAllocPBQP.h"
33 #include "RegisterCoalescer.h"
35 #include "llvm/Analysis/AliasAnalysis.h"
36 #include "llvm/CodeGen/CalcSpillWeights.h"
37 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
38 #include "llvm/CodeGen/LiveRangeEdit.h"
39 #include "llvm/CodeGen/LiveStackAnalysis.h"
40 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
41 #include "llvm/CodeGen/MachineDominators.h"
42 #include "llvm/CodeGen/MachineFunctionPass.h"
43 #include "llvm/CodeGen/MachineLoopInfo.h"
44 #include "llvm/CodeGen/MachineRegisterInfo.h"
45 #include "llvm/CodeGen/RegAllocRegistry.h"
46 #include "llvm/CodeGen/VirtRegMap.h"
47 #include "llvm/IR/Module.h"
48 #include "llvm/Support/Debug.h"
49 #include "llvm/Support/FileSystem.h"
50 #include "llvm/Support/raw_ostream.h"
51 #include "llvm/Target/TargetInstrInfo.h"
52 #include "llvm/Target/TargetSubtargetInfo.h"
62 #define DEBUG_TYPE "regalloc"
64 static RegisterRegAlloc
65 RegisterPBQPRepAlloc("pbqp", "PBQP register allocator",
66 createDefaultPBQPRegisterAllocator
);
69 PBQPCoalescing("pbqp-coalescing",
70 cl::desc("Attempt coalescing during PBQP register allocation."),
71 cl::init(false), cl::Hidden
);
75 PBQPDumpGraphs("pbqp-dump-graphs",
76 cl::desc("Dump graphs for each function/round in the compilation unit."),
77 cl::init(false), cl::Hidden
);
83 /// PBQP based allocators solve the register allocation problem by mapping
84 /// register allocation problems to Partitioned Boolean Quadratic
85 /// Programming problems.
86 class RegAllocPBQP
: public MachineFunctionPass
{
91 /// Construct a PBQP register allocator.
92 RegAllocPBQP(char *cPassID
= nullptr)
93 : MachineFunctionPass(ID
), customPassID(cPassID
) {
94 initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
95 initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
96 initializeLiveStacksPass(*PassRegistry::getPassRegistry());
97 initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
100 /// Return the pass name.
101 const char* getPassName() const override
{
102 return "PBQP Register Allocator";
105 /// PBQP analysis usage.
106 void getAnalysisUsage(AnalysisUsage
&au
) const override
;
108 /// Perform register allocation
109 bool runOnMachineFunction(MachineFunction
&MF
) override
;
113 typedef std::map
<const LiveInterval
*, unsigned> LI2NodeMap
;
114 typedef std::vector
<const LiveInterval
*> Node2LIMap
;
115 typedef std::vector
<unsigned> AllowedSet
;
116 typedef std::vector
<AllowedSet
> AllowedSetMap
;
117 typedef std::pair
<unsigned, unsigned> RegPair
;
118 typedef std::map
<RegPair
, PBQP::PBQPNum
> CoalesceMap
;
119 typedef std::set
<unsigned> RegSet
;
123 RegSet VRegsToAlloc
, EmptyIntervalVRegs
;
125 /// \brief Finds the initial set of vreg intervals to allocate.
126 void findVRegIntervalsToAlloc(const MachineFunction
&MF
, LiveIntervals
&LIS
);
128 /// \brief Constructs an initial graph.
129 void initializeGraph(PBQPRAGraph
&G
, VirtRegMap
&VRM
, Spiller
&VRegSpiller
);
131 /// \brief Spill the given VReg.
132 void spillVReg(unsigned VReg
, SmallVectorImpl
<unsigned> &NewIntervals
,
133 MachineFunction
&MF
, LiveIntervals
&LIS
, VirtRegMap
&VRM
,
134 Spiller
&VRegSpiller
);
136 /// \brief Given a solved PBQP problem maps this solution back to a register
138 bool mapPBQPToRegAlloc(const PBQPRAGraph
&G
,
139 const PBQP::Solution
&Solution
,
141 Spiller
&VRegSpiller
);
143 /// \brief Postprocessing before final spilling. Sets basic block "live in"
145 void finalizeAlloc(MachineFunction
&MF
, LiveIntervals
&LIS
,
146 VirtRegMap
&VRM
) const;
150 char RegAllocPBQP::ID
= 0;
152 /// @brief Set spill costs for each node in the PBQP reg-alloc graph.
153 class SpillCosts
: public PBQPRAConstraint
{
155 void apply(PBQPRAGraph
&G
) override
{
156 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
158 // A minimum spill costs, so that register constraints can can be set
159 // without normalization in the [0.0:MinSpillCost( interval.
160 const PBQP::PBQPNum MinSpillCost
= 10.0;
162 for (auto NId
: G
.nodeIds()) {
163 PBQP::PBQPNum SpillCost
=
164 LIS
.getInterval(G
.getNodeMetadata(NId
).getVReg()).weight
;
165 if (SpillCost
== 0.0)
166 SpillCost
= std::numeric_limits
<PBQP::PBQPNum
>::min();
168 SpillCost
+= MinSpillCost
;
169 PBQPRAGraph::RawVector
NodeCosts(G
.getNodeCosts(NId
));
170 NodeCosts
[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost
;
171 G
.setNodeCosts(NId
, std::move(NodeCosts
));
176 /// @brief Add interference edges between overlapping vregs.
177 class Interference
: public PBQPRAConstraint
{
180 typedef const PBQP::RegAlloc::AllowedRegVector
* AllowedRegVecPtr
;
181 typedef std::pair
<AllowedRegVecPtr
, AllowedRegVecPtr
> IKey
;
182 typedef DenseMap
<IKey
, PBQPRAGraph::MatrixPtr
> IMatrixCache
;
183 typedef DenseSet
<IKey
> DisjointAllowedRegsCache
;
184 typedef std::pair
<PBQP::GraphBase::NodeId
, PBQP::GraphBase::NodeId
> IEdgeKey
;
185 typedef DenseSet
<IEdgeKey
> IEdgeCache
;
187 bool haveDisjointAllowedRegs(const PBQPRAGraph
&G
, PBQPRAGraph::NodeId NId
,
188 PBQPRAGraph::NodeId MId
,
189 const DisjointAllowedRegsCache
&D
) const {
190 const auto *NRegs
= &G
.getNodeMetadata(NId
).getAllowedRegs();
191 const auto *MRegs
= &G
.getNodeMetadata(MId
).getAllowedRegs();
197 return D
.count(IKey(NRegs
, MRegs
)) > 0;
199 return D
.count(IKey(MRegs
, NRegs
)) > 0;
202 void setDisjointAllowedRegs(const PBQPRAGraph
&G
, PBQPRAGraph::NodeId NId
,
203 PBQPRAGraph::NodeId MId
,
204 DisjointAllowedRegsCache
&D
) {
205 const auto *NRegs
= &G
.getNodeMetadata(NId
).getAllowedRegs();
206 const auto *MRegs
= &G
.getNodeMetadata(MId
).getAllowedRegs();
208 assert(NRegs
!= MRegs
&& "AllowedRegs can not be disjoint with itself");
211 D
.insert(IKey(NRegs
, MRegs
));
213 D
.insert(IKey(MRegs
, NRegs
));
216 // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
217 // for the fast interference graph construction algorithm. The last is there
218 // to save us from looking up node ids via the VRegToNode map in the graph
220 typedef std::tuple
<LiveInterval
*, size_t, PBQP::GraphBase::NodeId
>
223 static SlotIndex
getStartPoint(const IntervalInfo
&I
) {
224 return std::get
<0>(I
)->segments
[std::get
<1>(I
)].start
;
227 static SlotIndex
getEndPoint(const IntervalInfo
&I
) {
228 return std::get
<0>(I
)->segments
[std::get
<1>(I
)].end
;
231 static PBQP::GraphBase::NodeId
getNodeId(const IntervalInfo
&I
) {
232 return std::get
<2>(I
);
235 static bool lowestStartPoint(const IntervalInfo
&I1
,
236 const IntervalInfo
&I2
) {
237 // Condition reversed because priority queue has the *highest* element at
238 // the front, rather than the lowest.
239 return getStartPoint(I1
) > getStartPoint(I2
);
242 static bool lowestEndPoint(const IntervalInfo
&I1
,
243 const IntervalInfo
&I2
) {
244 SlotIndex E1
= getEndPoint(I1
);
245 SlotIndex E2
= getEndPoint(I2
);
253 // If two intervals end at the same point, we need a way to break the tie or
254 // the set will assume they're actually equal and refuse to insert a
255 // "duplicate". Just compare the vregs - fast and guaranteed unique.
256 return std::get
<0>(I1
)->reg
< std::get
<0>(I2
)->reg
;
259 static bool isAtLastSegment(const IntervalInfo
&I
) {
260 return std::get
<1>(I
) == std::get
<0>(I
)->size() - 1;
263 static IntervalInfo
nextSegment(const IntervalInfo
&I
) {
264 return std::make_tuple(std::get
<0>(I
), std::get
<1>(I
) + 1, std::get
<2>(I
));
269 void apply(PBQPRAGraph
&G
) override
{
270 // The following is loosely based on the linear scan algorithm introduced in
271 // "Linear Scan Register Allocation" by Poletto and Sarkar. This version
272 // isn't linear, because the size of the active set isn't bound by the
273 // number of registers, but rather the size of the largest clique in the
274 // graph. Still, we expect this to be better than N^2.
275 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
277 // Interferenc matrices are incredibly regular - they're only a function of
278 // the allowed sets, so we cache them to avoid the overhead of constructing
279 // and uniquing them.
282 // Finding an edge is expensive in the worst case (O(max_clique(G))). So
283 // cache locally edges we have already seen.
286 // Cache known disjoint allowed registers pairs
287 DisjointAllowedRegsCache D
;
289 typedef std::set
<IntervalInfo
, decltype(&lowestEndPoint
)> IntervalSet
;
290 typedef std::priority_queue
<IntervalInfo
, std::vector
<IntervalInfo
>,
291 decltype(&lowestStartPoint
)> IntervalQueue
;
292 IntervalSet
Active(lowestEndPoint
);
293 IntervalQueue
Inactive(lowestStartPoint
);
295 // Start by building the inactive set.
296 for (auto NId
: G
.nodeIds()) {
297 unsigned VReg
= G
.getNodeMetadata(NId
).getVReg();
298 LiveInterval
&LI
= LIS
.getInterval(VReg
);
299 assert(!LI
.empty() && "PBQP graph contains node for empty interval");
300 Inactive
.push(std::make_tuple(&LI
, 0, NId
));
303 while (!Inactive
.empty()) {
304 // Tentatively grab the "next" interval - this choice may be overriden
306 IntervalInfo Cur
= Inactive
.top();
308 // Retire any active intervals that end before Cur starts.
309 IntervalSet::iterator RetireItr
= Active
.begin();
310 while (RetireItr
!= Active
.end() &&
311 (getEndPoint(*RetireItr
) <= getStartPoint(Cur
))) {
312 // If this interval has subsequent segments, add the next one to the
314 if (!isAtLastSegment(*RetireItr
))
315 Inactive
.push(nextSegment(*RetireItr
));
319 Active
.erase(Active
.begin(), RetireItr
);
321 // One of the newly retired segments may actually start before the
322 // Cur segment, so re-grab the front of the inactive list.
323 Cur
= Inactive
.top();
326 // At this point we know that Cur overlaps all active intervals. Add the
327 // interference edges.
328 PBQP::GraphBase::NodeId NId
= getNodeId(Cur
);
329 for (const auto &A
: Active
) {
330 PBQP::GraphBase::NodeId MId
= getNodeId(A
);
332 // Do not add an edge when the nodes' allowed registers do not
333 // intersect: there is obviously no interference.
334 if (haveDisjointAllowedRegs(G
, NId
, MId
, D
))
337 // Check that we haven't already added this edge
338 IEdgeKey
EK(std::min(NId
, MId
), std::max(NId
, MId
));
342 // This is a new edge - add it to the graph.
343 if (!createInterferenceEdge(G
, NId
, MId
, C
))
344 setDisjointAllowedRegs(G
, NId
, MId
, D
);
349 // Finally, add Cur to the Active set.
356 // Create an Interference edge and add it to the graph, unless it is
357 // a null matrix, meaning the nodes' allowed registers do not have any
358 // interference. This case occurs frequently between integer and floating
359 // point registers for example.
360 // return true iff both nodes interferes.
361 bool createInterferenceEdge(PBQPRAGraph
&G
,
362 PBQPRAGraph::NodeId NId
, PBQPRAGraph::NodeId MId
,
365 const TargetRegisterInfo
&TRI
=
366 *G
.getMetadata().MF
.getSubtarget().getRegisterInfo();
367 const auto &NRegs
= G
.getNodeMetadata(NId
).getAllowedRegs();
368 const auto &MRegs
= G
.getNodeMetadata(MId
).getAllowedRegs();
370 // Try looking the edge costs up in the IMatrixCache first.
371 IKey
K(&NRegs
, &MRegs
);
372 IMatrixCache::iterator I
= C
.find(K
);
374 G
.addEdgeBypassingCostAllocator(NId
, MId
, I
->second
);
378 PBQPRAGraph::RawMatrix
M(NRegs
.size() + 1, MRegs
.size() + 1, 0);
379 bool NodesInterfere
= false;
380 for (unsigned I
= 0; I
!= NRegs
.size(); ++I
) {
381 unsigned PRegN
= NRegs
[I
];
382 for (unsigned J
= 0; J
!= MRegs
.size(); ++J
) {
383 unsigned PRegM
= MRegs
[J
];
384 if (TRI
.regsOverlap(PRegN
, PRegM
)) {
385 M
[I
+ 1][J
+ 1] = std::numeric_limits
<PBQP::PBQPNum
>::infinity();
386 NodesInterfere
= true;
394 PBQPRAGraph::EdgeId EId
= G
.addEdge(NId
, MId
, std::move(M
));
395 C
[K
] = G
.getEdgeCostsPtr(EId
);
402 class Coalescing
: public PBQPRAConstraint
{
404 void apply(PBQPRAGraph
&G
) override
{
405 MachineFunction
&MF
= G
.getMetadata().MF
;
406 MachineBlockFrequencyInfo
&MBFI
= G
.getMetadata().MBFI
;
407 CoalescerPair
CP(*MF
.getSubtarget().getRegisterInfo());
409 // Scan the machine function and add a coalescing cost whenever CoalescerPair
411 for (const auto &MBB
: MF
) {
412 for (const auto &MI
: MBB
) {
414 // Skip not-coalescable or already coalesced copies.
415 if (!CP
.setRegisters(&MI
) || CP
.getSrcReg() == CP
.getDstReg())
418 unsigned DstReg
= CP
.getDstReg();
419 unsigned SrcReg
= CP
.getSrcReg();
421 const float Scale
= 1.0f
/ MBFI
.getEntryFreq();
422 PBQP::PBQPNum CBenefit
= MBFI
.getBlockFreq(&MBB
).getFrequency() * Scale
;
425 if (!MF
.getRegInfo().isAllocatable(DstReg
))
428 PBQPRAGraph::NodeId NId
= G
.getMetadata().getNodeIdForVReg(SrcReg
);
430 const PBQPRAGraph::NodeMetadata::AllowedRegVector
&Allowed
=
431 G
.getNodeMetadata(NId
).getAllowedRegs();
433 unsigned PRegOpt
= 0;
434 while (PRegOpt
< Allowed
.size() && Allowed
[PRegOpt
] != DstReg
)
437 if (PRegOpt
< Allowed
.size()) {
438 PBQPRAGraph::RawVector
NewCosts(G
.getNodeCosts(NId
));
439 NewCosts
[PRegOpt
+ 1] -= CBenefit
;
440 G
.setNodeCosts(NId
, std::move(NewCosts
));
443 PBQPRAGraph::NodeId N1Id
= G
.getMetadata().getNodeIdForVReg(DstReg
);
444 PBQPRAGraph::NodeId N2Id
= G
.getMetadata().getNodeIdForVReg(SrcReg
);
445 const PBQPRAGraph::NodeMetadata::AllowedRegVector
*Allowed1
=
446 &G
.getNodeMetadata(N1Id
).getAllowedRegs();
447 const PBQPRAGraph::NodeMetadata::AllowedRegVector
*Allowed2
=
448 &G
.getNodeMetadata(N2Id
).getAllowedRegs();
450 PBQPRAGraph::EdgeId EId
= G
.findEdge(N1Id
, N2Id
);
451 if (EId
== G
.invalidEdgeId()) {
452 PBQPRAGraph::RawMatrix
Costs(Allowed1
->size() + 1,
453 Allowed2
->size() + 1, 0);
454 addVirtRegCoalesce(Costs
, *Allowed1
, *Allowed2
, CBenefit
);
455 G
.addEdge(N1Id
, N2Id
, std::move(Costs
));
457 if (G
.getEdgeNode1Id(EId
) == N2Id
) {
458 std::swap(N1Id
, N2Id
);
459 std::swap(Allowed1
, Allowed2
);
461 PBQPRAGraph::RawMatrix
Costs(G
.getEdgeCosts(EId
));
462 addVirtRegCoalesce(Costs
, *Allowed1
, *Allowed2
, CBenefit
);
463 G
.updateEdgeCosts(EId
, std::move(Costs
));
472 void addVirtRegCoalesce(
473 PBQPRAGraph::RawMatrix
&CostMat
,
474 const PBQPRAGraph::NodeMetadata::AllowedRegVector
&Allowed1
,
475 const PBQPRAGraph::NodeMetadata::AllowedRegVector
&Allowed2
,
476 PBQP::PBQPNum Benefit
) {
477 assert(CostMat
.getRows() == Allowed1
.size() + 1 && "Size mismatch.");
478 assert(CostMat
.getCols() == Allowed2
.size() + 1 && "Size mismatch.");
479 for (unsigned I
= 0; I
!= Allowed1
.size(); ++I
) {
480 unsigned PReg1
= Allowed1
[I
];
481 for (unsigned J
= 0; J
!= Allowed2
.size(); ++J
) {
482 unsigned PReg2
= Allowed2
[J
];
484 CostMat
[I
+ 1][J
+ 1] -= Benefit
;
491 } // End anonymous namespace.
493 // Out-of-line destructor/anchor for PBQPRAConstraint.
494 PBQPRAConstraint::~PBQPRAConstraint() {}
495 void PBQPRAConstraint::anchor() {}
496 void PBQPRAConstraintList::anchor() {}
498 void RegAllocPBQP::getAnalysisUsage(AnalysisUsage
&au
) const {
499 au
.setPreservesCFG();
500 au
.addRequired
<AAResultsWrapperPass
>();
501 au
.addPreserved
<AAResultsWrapperPass
>();
502 au
.addRequired
<SlotIndexes
>();
503 au
.addPreserved
<SlotIndexes
>();
504 au
.addRequired
<LiveIntervals
>();
505 au
.addPreserved
<LiveIntervals
>();
506 //au.addRequiredID(SplitCriticalEdgesID);
508 au
.addRequiredID(*customPassID
);
509 au
.addRequired
<LiveStacks
>();
510 au
.addPreserved
<LiveStacks
>();
511 au
.addRequired
<MachineBlockFrequencyInfo
>();
512 au
.addPreserved
<MachineBlockFrequencyInfo
>();
513 au
.addRequired
<MachineLoopInfo
>();
514 au
.addPreserved
<MachineLoopInfo
>();
515 au
.addRequired
<MachineDominatorTree
>();
516 au
.addPreserved
<MachineDominatorTree
>();
517 au
.addRequired
<VirtRegMap
>();
518 au
.addPreserved
<VirtRegMap
>();
519 MachineFunctionPass::getAnalysisUsage(au
);
522 void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction
&MF
,
523 LiveIntervals
&LIS
) {
524 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
526 // Iterate over all live ranges.
527 for (unsigned I
= 0, E
= MRI
.getNumVirtRegs(); I
!= E
; ++I
) {
528 unsigned Reg
= TargetRegisterInfo::index2VirtReg(I
);
529 if (MRI
.reg_nodbg_empty(Reg
))
531 LiveInterval
&LI
= LIS
.getInterval(Reg
);
533 // If this live interval is non-empty we will use pbqp to allocate it.
534 // Empty intervals we allocate in a simple post-processing stage in
537 VRegsToAlloc
.insert(LI
.reg
);
539 EmptyIntervalVRegs
.insert(LI
.reg
);
544 static bool isACalleeSavedRegister(unsigned reg
, const TargetRegisterInfo
&TRI
,
545 const MachineFunction
&MF
) {
546 const MCPhysReg
*CSR
= TRI
.getCalleeSavedRegs(&MF
);
547 for (unsigned i
= 0; CSR
[i
] != 0; ++i
)
548 if (TRI
.regsOverlap(reg
, CSR
[i
]))
553 void RegAllocPBQP::initializeGraph(PBQPRAGraph
&G
, VirtRegMap
&VRM
,
554 Spiller
&VRegSpiller
) {
555 MachineFunction
&MF
= G
.getMetadata().MF
;
557 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
558 const MachineRegisterInfo
&MRI
= G
.getMetadata().MF
.getRegInfo();
559 const TargetRegisterInfo
&TRI
=
560 *G
.getMetadata().MF
.getSubtarget().getRegisterInfo();
562 std::vector
<unsigned> Worklist(VRegsToAlloc
.begin(), VRegsToAlloc
.end());
564 while (!Worklist
.empty()) {
565 unsigned VReg
= Worklist
.back();
568 const TargetRegisterClass
*TRC
= MRI
.getRegClass(VReg
);
569 LiveInterval
&VRegLI
= LIS
.getInterval(VReg
);
571 // Record any overlaps with regmask operands.
572 BitVector RegMaskOverlaps
;
573 LIS
.checkRegMaskInterference(VRegLI
, RegMaskOverlaps
);
575 // Compute an initial allowed set for the current vreg.
576 std::vector
<unsigned> VRegAllowed
;
577 ArrayRef
<MCPhysReg
> RawPRegOrder
= TRC
->getRawAllocationOrder(MF
);
578 for (unsigned I
= 0; I
!= RawPRegOrder
.size(); ++I
) {
579 unsigned PReg
= RawPRegOrder
[I
];
580 if (MRI
.isReserved(PReg
))
583 // vregLI crosses a regmask operand that clobbers preg.
584 if (!RegMaskOverlaps
.empty() && !RegMaskOverlaps
.test(PReg
))
587 // vregLI overlaps fixed regunit interference.
588 bool Interference
= false;
589 for (MCRegUnitIterator
Units(PReg
, &TRI
); Units
.isValid(); ++Units
) {
590 if (VRegLI
.overlaps(LIS
.getRegUnit(*Units
))) {
598 // preg is usable for this virtual register.
599 VRegAllowed
.push_back(PReg
);
602 // Check for vregs that have no allowed registers. These should be
603 // pre-spilled and the new vregs added to the worklist.
604 if (VRegAllowed
.empty()) {
605 SmallVector
<unsigned, 8> NewVRegs
;
606 spillVReg(VReg
, NewVRegs
, MF
, LIS
, VRM
, VRegSpiller
);
607 Worklist
.insert(Worklist
.end(), NewVRegs
.begin(), NewVRegs
.end());
611 PBQPRAGraph::RawVector
NodeCosts(VRegAllowed
.size() + 1, 0);
613 // Tweak cost of callee saved registers, as using then force spilling and
614 // restoring them. This would only happen in the prologue / epilogue though.
615 for (unsigned i
= 0; i
!= VRegAllowed
.size(); ++i
)
616 if (isACalleeSavedRegister(VRegAllowed
[i
], TRI
, MF
))
617 NodeCosts
[1 + i
] += 1.0;
619 PBQPRAGraph::NodeId NId
= G
.addNode(std::move(NodeCosts
));
620 G
.getNodeMetadata(NId
).setVReg(VReg
);
621 G
.getNodeMetadata(NId
).setAllowedRegs(
622 G
.getMetadata().getAllowedRegs(std::move(VRegAllowed
)));
623 G
.getMetadata().setNodeIdForVReg(VReg
, NId
);
627 void RegAllocPBQP::spillVReg(unsigned VReg
,
628 SmallVectorImpl
<unsigned> &NewIntervals
,
629 MachineFunction
&MF
, LiveIntervals
&LIS
,
630 VirtRegMap
&VRM
, Spiller
&VRegSpiller
) {
632 VRegsToAlloc
.erase(VReg
);
633 LiveRangeEdit
LRE(&LIS
.getInterval(VReg
), NewIntervals
, MF
, LIS
, &VRM
);
634 VRegSpiller
.spill(LRE
);
636 const TargetRegisterInfo
&TRI
= *MF
.getSubtarget().getRegisterInfo();
638 DEBUG(dbgs() << "VREG " << PrintReg(VReg
, &TRI
) << " -> SPILLED (Cost: "
639 << LRE
.getParent().weight
<< ", New vregs: ");
641 // Copy any newly inserted live intervals into the list of regs to
643 for (LiveRangeEdit::iterator I
= LRE
.begin(), E
= LRE
.end();
645 const LiveInterval
&LI
= LIS
.getInterval(*I
);
646 assert(!LI
.empty() && "Empty spill range.");
647 DEBUG(dbgs() << PrintReg(LI
.reg
, &TRI
) << " ");
648 VRegsToAlloc
.insert(LI
.reg
);
651 DEBUG(dbgs() << ")\n");
654 bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph
&G
,
655 const PBQP::Solution
&Solution
,
657 Spiller
&VRegSpiller
) {
658 MachineFunction
&MF
= G
.getMetadata().MF
;
659 LiveIntervals
&LIS
= G
.getMetadata().LIS
;
660 const TargetRegisterInfo
&TRI
= *MF
.getSubtarget().getRegisterInfo();
663 // Set to true if we have any spills
664 bool AnotherRoundNeeded
= false;
666 // Clear the existing allocation.
669 // Iterate over the nodes mapping the PBQP solution to a register
671 for (auto NId
: G
.nodeIds()) {
672 unsigned VReg
= G
.getNodeMetadata(NId
).getVReg();
673 unsigned AllocOption
= Solution
.getSelection(NId
);
675 if (AllocOption
!= PBQP::RegAlloc::getSpillOptionIdx()) {
676 unsigned PReg
= G
.getNodeMetadata(NId
).getAllowedRegs()[AllocOption
- 1];
677 DEBUG(dbgs() << "VREG " << PrintReg(VReg
, &TRI
) << " -> "
678 << TRI
.getName(PReg
) << "\n");
679 assert(PReg
!= 0 && "Invalid preg selected.");
680 VRM
.assignVirt2Phys(VReg
, PReg
);
682 // Spill VReg. If this introduces new intervals we'll need another round
684 SmallVector
<unsigned, 8> NewVRegs
;
685 spillVReg(VReg
, NewVRegs
, MF
, LIS
, VRM
, VRegSpiller
);
686 AnotherRoundNeeded
|= !NewVRegs
.empty();
690 return !AnotherRoundNeeded
;
693 void RegAllocPBQP::finalizeAlloc(MachineFunction
&MF
,
695 VirtRegMap
&VRM
) const {
696 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
698 // First allocate registers for the empty intervals.
699 for (RegSet::const_iterator
700 I
= EmptyIntervalVRegs
.begin(), E
= EmptyIntervalVRegs
.end();
702 LiveInterval
&LI
= LIS
.getInterval(*I
);
704 unsigned PReg
= MRI
.getSimpleHint(LI
.reg
);
707 const TargetRegisterClass
&RC
= *MRI
.getRegClass(LI
.reg
);
708 PReg
= RC
.getRawAllocationOrder(MF
).front();
711 VRM
.assignVirt2Phys(LI
.reg
, PReg
);
715 static inline float normalizePBQPSpillWeight(float UseDefFreq
, unsigned Size
,
717 // All intervals have a spill weight that is mostly proportional to the number
718 // of uses, with uses in loops having a bigger weight.
719 return NumInstr
* normalizeSpillWeight(UseDefFreq
, Size
, 1);
722 bool RegAllocPBQP::runOnMachineFunction(MachineFunction
&MF
) {
723 LiveIntervals
&LIS
= getAnalysis
<LiveIntervals
>();
724 MachineBlockFrequencyInfo
&MBFI
=
725 getAnalysis
<MachineBlockFrequencyInfo
>();
727 VirtRegMap
&VRM
= getAnalysis
<VirtRegMap
>();
729 calculateSpillWeightsAndHints(LIS
, MF
, &VRM
, getAnalysis
<MachineLoopInfo
>(),
730 MBFI
, normalizePBQPSpillWeight
);
732 std::unique_ptr
<Spiller
> VRegSpiller(createInlineSpiller(*this, MF
, VRM
));
734 MF
.getRegInfo().freezeReservedRegs(MF
);
736 DEBUG(dbgs() << "PBQP Register Allocating for " << MF
.getName() << "\n");
738 // Allocator main loop:
740 // * Map current regalloc problem to a PBQP problem
741 // * Solve the PBQP problem
742 // * Map the solution back to a register allocation
743 // * Spill if necessary
745 // This process is continued till no more spills are generated.
747 // Find the vreg intervals in need of allocation.
748 findVRegIntervalsToAlloc(MF
, LIS
);
751 const Function
&F
= *MF
.getFunction();
752 std::string FullyQualifiedName
=
753 F
.getParent()->getModuleIdentifier() + "." + F
.getName().str();
756 // If there are non-empty intervals allocate them using pbqp.
757 if (!VRegsToAlloc
.empty()) {
759 const TargetSubtargetInfo
&Subtarget
= MF
.getSubtarget();
760 std::unique_ptr
<PBQPRAConstraintList
> ConstraintsRoot
=
761 llvm::make_unique
<PBQPRAConstraintList
>();
762 ConstraintsRoot
->addConstraint(llvm::make_unique
<SpillCosts
>());
763 ConstraintsRoot
->addConstraint(llvm::make_unique
<Interference
>());
765 ConstraintsRoot
->addConstraint(llvm::make_unique
<Coalescing
>());
766 ConstraintsRoot
->addConstraint(Subtarget
.getCustomPBQPConstraints());
768 bool PBQPAllocComplete
= false;
771 while (!PBQPAllocComplete
) {
772 DEBUG(dbgs() << " PBQP Regalloc round " << Round
<< ":\n");
774 PBQPRAGraph
G(PBQPRAGraph::GraphMetadata(MF
, LIS
, MBFI
));
775 initializeGraph(G
, VRM
, *VRegSpiller
);
776 ConstraintsRoot
->apply(G
);
779 if (PBQPDumpGraphs
) {
780 std::ostringstream RS
;
782 std::string GraphFileName
= FullyQualifiedName
+ "." + RS
.str() +
785 raw_fd_ostream
OS(GraphFileName
, EC
, sys::fs::F_Text
);
786 DEBUG(dbgs() << "Dumping graph for round " << Round
<< " to \""
787 << GraphFileName
<< "\"\n");
792 PBQP::Solution Solution
= PBQP::RegAlloc::solve(G
);
793 PBQPAllocComplete
= mapPBQPToRegAlloc(G
, Solution
, VRM
, *VRegSpiller
);
798 // Finalise allocation, allocate empty ranges.
799 finalizeAlloc(MF
, LIS
, VRM
);
800 VRegsToAlloc
.clear();
801 EmptyIntervalVRegs
.clear();
803 DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM
<< "\n");
809 // A helper class for printing node and register info in a consistent way
810 class PrintNodeInfo
{
812 typedef PBQP::RegAlloc::PBQPRAGraph Graph
;
813 typedef PBQP::RegAlloc::PBQPRAGraph::NodeId NodeId
;
815 PrintNodeInfo(NodeId NId
, const Graph
&G
) : G(G
), NId(NId
) {}
817 void print(raw_ostream
&OS
) const {
818 const MachineRegisterInfo
&MRI
= G
.getMetadata().MF
.getRegInfo();
819 const TargetRegisterInfo
*TRI
= MRI
.getTargetRegisterInfo();
820 unsigned VReg
= G
.getNodeMetadata(NId
).getVReg();
821 const char *RegClassName
= TRI
->getRegClassName(MRI
.getRegClass(VReg
));
822 OS
<< NId
<< " (" << RegClassName
<< ':' << PrintReg(VReg
, TRI
) << ')';
830 inline raw_ostream
&operator<<(raw_ostream
&OS
, const PrintNodeInfo
&PR
) {
834 } // anonymous namespace
836 void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream
&OS
) const {
837 for (auto NId
: nodeIds()) {
838 const Vector
&Costs
= getNodeCosts(NId
);
839 assert(Costs
.getLength() != 0 && "Empty vector in graph.");
840 OS
<< PrintNodeInfo(NId
, *this) << ": " << Costs
<< '\n';
844 for (auto EId
: edgeIds()) {
845 NodeId N1Id
= getEdgeNode1Id(EId
);
846 NodeId N2Id
= getEdgeNode2Id(EId
);
847 assert(N1Id
!= N2Id
&& "PBQP graphs should not have self-edges.");
848 const Matrix
&M
= getEdgeCosts(EId
);
849 assert(M
.getRows() != 0 && "No rows in matrix.");
850 assert(M
.getCols() != 0 && "No cols in matrix.");
851 OS
<< PrintNodeInfo(N1Id
, *this) << ' ' << M
.getRows() << " rows / ";
852 OS
<< PrintNodeInfo(N2Id
, *this) << ' ' << M
.getCols() << " cols:\n";
857 void PBQP::RegAlloc::PBQPRAGraph::dump() const { dump(dbgs()); }
859 void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream
&OS
) const {
861 for (auto NId
: nodeIds()) {
862 OS
<< " node" << NId
<< " [ label=\""
863 << PrintNodeInfo(NId
, *this) << "\\n"
864 << getNodeCosts(NId
) << "\" ]\n";
867 OS
<< " edge [ len=" << nodeIds().size() << " ]\n";
868 for (auto EId
: edgeIds()) {
869 OS
<< " node" << getEdgeNode1Id(EId
)
870 << " -- node" << getEdgeNode2Id(EId
)
872 const Matrix
&EdgeCosts
= getEdgeCosts(EId
);
873 for (unsigned i
= 0; i
< EdgeCosts
.getRows(); ++i
) {
874 OS
<< EdgeCosts
.getRowAsVector(i
) << "\\n";
881 FunctionPass
*llvm::createPBQPRegisterAllocator(char *customPassID
) {
882 return new RegAllocPBQP(customPassID
);
885 FunctionPass
* llvm::createDefaultPBQPRegisterAllocator() {
886 return createPBQPRegisterAllocator();