1 //===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the generic RegisterCoalescer interface which
11 // is used as the common interface used by all clients and
12 // implementations of register coalescing.
14 //===----------------------------------------------------------------------===//
16 #include "RegisterCoalescer.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/AliasAnalysis.h"
21 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
22 #include "llvm/CodeGen/LiveRangeEdit.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineLoopInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/Passes.h"
28 #include "llvm/CodeGen/RegisterClassInfo.h"
29 #include "llvm/CodeGen/VirtRegMap.h"
30 #include "llvm/IR/Value.h"
31 #include "llvm/Pass.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include "llvm/Target/TargetInstrInfo.h"
37 #include "llvm/Target/TargetMachine.h"
38 #include "llvm/Target/TargetRegisterInfo.h"
39 #include "llvm/Target/TargetSubtargetInfo.h"
44 #define DEBUG_TYPE "regalloc"
46 STATISTIC(numJoins
, "Number of interval joins performed");
47 STATISTIC(numCrossRCs
, "Number of cross class joins performed");
48 STATISTIC(numCommutes
, "Number of instruction commuting performed");
49 STATISTIC(numExtends
, "Number of copies extended");
50 STATISTIC(NumReMats
, "Number of instructions re-materialized");
51 STATISTIC(NumInflated
, "Number of register classes inflated");
52 STATISTIC(NumLaneConflicts
, "Number of dead lane conflicts tested");
53 STATISTIC(NumLaneResolves
, "Number of dead lane conflicts resolved");
56 EnableJoining("join-liveintervals",
57 cl::desc("Coalesce copies (default=true)"),
60 static cl::opt
<bool> UseTerminalRule("terminal-rule",
61 cl::desc("Apply the terminal rule"),
62 cl::init(false), cl::Hidden
);
64 /// Temporary flag to test critical edge unsplitting.
66 EnableJoinSplits("join-splitedges",
67 cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden
);
69 /// Temporary flag to test global copy optimization.
70 static cl::opt
<cl::boolOrDefault
>
71 EnableGlobalCopies("join-globalcopies",
72 cl::desc("Coalesce copies that span blocks (default=subtarget)"),
73 cl::init(cl::BOU_UNSET
), cl::Hidden
);
76 VerifyCoalescing("verify-coalescing",
77 cl::desc("Verify machine instrs before and after register coalescing"),
81 class RegisterCoalescer
: public MachineFunctionPass
,
82 private LiveRangeEdit::Delegate
{
84 MachineRegisterInfo
* MRI
;
85 const TargetMachine
* TM
;
86 const TargetRegisterInfo
* TRI
;
87 const TargetInstrInfo
* TII
;
89 const MachineLoopInfo
* Loops
;
91 RegisterClassInfo RegClassInfo
;
93 /// A LaneMask to remember on which subregister live ranges we need to call
94 /// shrinkToUses() later.
95 LaneBitmask ShrinkMask
;
97 /// True if the main range of the currently coalesced intervals should be
98 /// checked for smaller live intervals.
101 /// \brief True if the coalescer should aggressively coalesce global copies
102 /// in favor of keeping local copies.
103 bool JoinGlobalCopies
;
105 /// \brief True if the coalescer should aggressively coalesce fall-thru
106 /// blocks exclusively containing copies.
109 /// Copy instructions yet to be coalesced.
110 SmallVector
<MachineInstr
*, 8> WorkList
;
111 SmallVector
<MachineInstr
*, 8> LocalWorkList
;
113 /// Set of instruction pointers that have been erased, and
114 /// that may be present in WorkList.
115 SmallPtrSet
<MachineInstr
*, 8> ErasedInstrs
;
117 /// Dead instructions that are about to be deleted.
118 SmallVector
<MachineInstr
*, 8> DeadDefs
;
120 /// Virtual registers to be considered for register class inflation.
121 SmallVector
<unsigned, 8> InflateRegs
;
123 /// Recursively eliminate dead defs in DeadDefs.
124 void eliminateDeadDefs();
126 /// LiveRangeEdit callback for eliminateDeadDefs().
127 void LRE_WillEraseInstruction(MachineInstr
*MI
) override
;
129 /// Coalesce the LocalWorkList.
130 void coalesceLocals();
132 /// Join compatible live intervals
133 void joinAllIntervals();
135 /// Coalesce copies in the specified MBB, putting
136 /// copies that cannot yet be coalesced into WorkList.
137 void copyCoalesceInMBB(MachineBasicBlock
*MBB
);
139 /// Tries to coalesce all copies in CurrList. Returns true if any progress
141 bool copyCoalesceWorkList(MutableArrayRef
<MachineInstr
*> CurrList
);
143 /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
144 /// src/dst of the copy instruction CopyMI. This returns true if the copy
145 /// was successfully coalesced away. If it is not currently possible to
146 /// coalesce this interval, but it may be possible if other things get
147 /// coalesced, then it returns true by reference in 'Again'.
148 bool joinCopy(MachineInstr
*TheCopy
, bool &Again
);
150 /// Attempt to join these two intervals. On failure, this
151 /// returns false. The output "SrcInt" will not have been modified, so we
152 /// can use this information below to update aliases.
153 bool joinIntervals(CoalescerPair
&CP
);
155 /// Attempt joining two virtual registers. Return true on success.
156 bool joinVirtRegs(CoalescerPair
&CP
);
158 /// Attempt joining with a reserved physreg.
159 bool joinReservedPhysReg(CoalescerPair
&CP
);
161 /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI.
162 /// Subranges in @p LI which only partially interfere with the desired
163 /// LaneMask are split as necessary. @p LaneMask are the lanes that
164 /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
165 /// lanemasks already adjusted to the coalesced register.
166 /// @returns false if live range conflicts couldn't get resolved.
167 bool mergeSubRangeInto(LiveInterval
&LI
, const LiveRange
&ToMerge
,
168 LaneBitmask LaneMask
, CoalescerPair
&CP
);
170 /// Join the liveranges of two subregisters. Joins @p RRange into
171 /// @p LRange, @p RRange may be invalid afterwards.
172 /// @returns false if live range conflicts couldn't get resolved.
173 bool joinSubRegRanges(LiveRange
&LRange
, LiveRange
&RRange
,
174 LaneBitmask LaneMask
, const CoalescerPair
&CP
);
176 /// We found a non-trivially-coalescable copy. If the source value number is
177 /// defined by a copy from the destination reg see if we can merge these two
178 /// destination reg valno# into a single value number, eliminating a copy.
179 /// This returns true if an interval was modified.
180 bool adjustCopiesBackFrom(const CoalescerPair
&CP
, MachineInstr
*CopyMI
);
182 /// Return true if there are definitions of IntB
183 /// other than BValNo val# that can reach uses of AValno val# of IntA.
184 bool hasOtherReachingDefs(LiveInterval
&IntA
, LiveInterval
&IntB
,
185 VNInfo
*AValNo
, VNInfo
*BValNo
);
187 /// We found a non-trivially-coalescable copy.
188 /// If the source value number is defined by a commutable instruction and
189 /// its other operand is coalesced to the copy dest register, see if we
190 /// can transform the copy into a noop by commuting the definition.
191 /// This returns true if an interval was modified.
192 bool removeCopyByCommutingDef(const CoalescerPair
&CP
,MachineInstr
*CopyMI
);
194 /// If the source of a copy is defined by a
195 /// trivial computation, replace the copy by rematerialize the definition.
196 bool reMaterializeTrivialDef(const CoalescerPair
&CP
, MachineInstr
*CopyMI
,
199 /// Return true if a copy involving a physreg should be joined.
200 bool canJoinPhys(const CoalescerPair
&CP
);
202 /// Replace all defs and uses of SrcReg to DstReg and update the subregister
203 /// number if it is not zero. If DstReg is a physical register and the
204 /// existing subregister number of the def / use being updated is not zero,
205 /// make sure to set it to the correct physical subregister.
206 void updateRegDefsUses(unsigned SrcReg
, unsigned DstReg
, unsigned SubIdx
);
208 /// Handle copies of undef values.
209 /// Returns true if @p CopyMI was a copy of an undef value and eliminated.
210 bool eliminateUndefCopy(MachineInstr
*CopyMI
);
212 /// Check whether or not we should apply the terminal rule on the
213 /// destination (Dst) of \p Copy.
214 /// When the terminal rule applies, Copy is not profitable to
216 /// Dst is terminal if it has exactly one affinity (Dst, Src) and
217 /// at least one interference (Dst, Dst2). If Dst is terminal, the
218 /// terminal rule consists in checking that at least one of
219 /// interfering node, say Dst2, has an affinity of equal or greater
221 /// In that case, Dst2 and Dst will not be able to be both coalesced
222 /// with Src. Since Dst2 exposes more coalescing opportunities than
223 /// Dst, we can drop \p Copy.
224 bool applyTerminalRule(const MachineInstr
&Copy
) const;
226 /// Wrapper method for \see LiveIntervals::shrinkToUses.
227 /// This method does the proper fixing of the live-ranges when the afore
228 /// mentioned method returns true.
229 void shrinkToUses(LiveInterval
*LI
,
230 SmallVectorImpl
<MachineInstr
* > *Dead
= nullptr) {
231 if (LIS
->shrinkToUses(LI
, Dead
)) {
232 /// Check whether or not \p LI is composed by multiple connected
233 /// components and if that is the case, fix that.
234 SmallVector
<LiveInterval
*, 8> SplitLIs
;
235 LIS
->splitSeparateComponents(*LI
, SplitLIs
);
240 static char ID
; ///< Class identification, replacement for typeinfo
241 RegisterCoalescer() : MachineFunctionPass(ID
) {
242 initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
245 void getAnalysisUsage(AnalysisUsage
&AU
) const override
;
247 void releaseMemory() override
;
249 /// This is the pass entry point.
250 bool runOnMachineFunction(MachineFunction
&) override
;
252 /// Implement the dump method.
253 void print(raw_ostream
&O
, const Module
* = nullptr) const override
;
255 } // end anonymous namespace
257 char &llvm::RegisterCoalescerID
= RegisterCoalescer::ID
;
259 INITIALIZE_PASS_BEGIN(RegisterCoalescer
, "simple-register-coalescing",
260 "Simple Register Coalescing", false, false)
261 INITIALIZE_PASS_DEPENDENCY(LiveIntervals
)
262 INITIALIZE_PASS_DEPENDENCY(SlotIndexes
)
263 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo
)
264 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
)
265 INITIALIZE_PASS_END(RegisterCoalescer
, "simple-register-coalescing",
266 "Simple Register Coalescing", false, false)
268 char RegisterCoalescer::ID
= 0;
270 static bool isMoveInstr(const TargetRegisterInfo
&tri
, const MachineInstr
*MI
,
271 unsigned &Src
, unsigned &Dst
,
272 unsigned &SrcSub
, unsigned &DstSub
) {
274 Dst
= MI
->getOperand(0).getReg();
275 DstSub
= MI
->getOperand(0).getSubReg();
276 Src
= MI
->getOperand(1).getReg();
277 SrcSub
= MI
->getOperand(1).getSubReg();
278 } else if (MI
->isSubregToReg()) {
279 Dst
= MI
->getOperand(0).getReg();
280 DstSub
= tri
.composeSubRegIndices(MI
->getOperand(0).getSubReg(),
281 MI
->getOperand(3).getImm());
282 Src
= MI
->getOperand(2).getReg();
283 SrcSub
= MI
->getOperand(2).getSubReg();
289 /// Return true if this block should be vacated by the coalescer to eliminate
290 /// branches. The important cases to handle in the coalescer are critical edges
291 /// split during phi elimination which contain only copies. Simple blocks that
292 /// contain non-branches should also be vacated, but this can be handled by an
293 /// earlier pass similar to early if-conversion.
294 static bool isSplitEdge(const MachineBasicBlock
*MBB
) {
295 if (MBB
->pred_size() != 1 || MBB
->succ_size() != 1)
298 for (const auto &MI
: *MBB
) {
299 if (!MI
.isCopyLike() && !MI
.isUnconditionalBranch())
305 bool CoalescerPair::setRegisters(const MachineInstr
*MI
) {
309 Flipped
= CrossClass
= false;
311 unsigned Src
, Dst
, SrcSub
, DstSub
;
312 if (!isMoveInstr(TRI
, MI
, Src
, Dst
, SrcSub
, DstSub
))
314 Partial
= SrcSub
|| DstSub
;
316 // If one register is a physreg, it must be Dst.
317 if (TargetRegisterInfo::isPhysicalRegister(Src
)) {
318 if (TargetRegisterInfo::isPhysicalRegister(Dst
))
321 std::swap(SrcSub
, DstSub
);
325 const MachineRegisterInfo
&MRI
= MI
->getParent()->getParent()->getRegInfo();
327 if (TargetRegisterInfo::isPhysicalRegister(Dst
)) {
328 // Eliminate DstSub on a physreg.
330 Dst
= TRI
.getSubReg(Dst
, DstSub
);
331 if (!Dst
) return false;
335 // Eliminate SrcSub by picking a corresponding Dst superregister.
337 Dst
= TRI
.getMatchingSuperReg(Dst
, SrcSub
, MRI
.getRegClass(Src
));
338 if (!Dst
) return false;
339 } else if (!MRI
.getRegClass(Src
)->contains(Dst
)) {
343 // Both registers are virtual.
344 const TargetRegisterClass
*SrcRC
= MRI
.getRegClass(Src
);
345 const TargetRegisterClass
*DstRC
= MRI
.getRegClass(Dst
);
347 // Both registers have subreg indices.
348 if (SrcSub
&& DstSub
) {
349 // Copies between different sub-registers are never coalescable.
350 if (Src
== Dst
&& SrcSub
!= DstSub
)
353 NewRC
= TRI
.getCommonSuperRegClass(SrcRC
, SrcSub
, DstRC
, DstSub
,
358 // SrcReg will be merged with a sub-register of DstReg.
360 NewRC
= TRI
.getMatchingSuperRegClass(DstRC
, SrcRC
, DstSub
);
362 // DstReg will be merged with a sub-register of SrcReg.
364 NewRC
= TRI
.getMatchingSuperRegClass(SrcRC
, DstRC
, SrcSub
);
366 // This is a straight copy without sub-registers.
367 NewRC
= TRI
.getCommonSubClass(DstRC
, SrcRC
);
370 // The combined constraint may be impossible to satisfy.
374 // Prefer SrcReg to be a sub-register of DstReg.
375 // FIXME: Coalescer should support subregs symmetrically.
376 if (DstIdx
&& !SrcIdx
) {
378 std::swap(SrcIdx
, DstIdx
);
382 CrossClass
= NewRC
!= DstRC
|| NewRC
!= SrcRC
;
384 // Check our invariants
385 assert(TargetRegisterInfo::isVirtualRegister(Src
) && "Src must be virtual");
386 assert(!(TargetRegisterInfo::isPhysicalRegister(Dst
) && DstSub
) &&
387 "Cannot have a physical SubIdx");
393 bool CoalescerPair::flip() {
394 if (TargetRegisterInfo::isPhysicalRegister(DstReg
))
396 std::swap(SrcReg
, DstReg
);
397 std::swap(SrcIdx
, DstIdx
);
402 bool CoalescerPair::isCoalescable(const MachineInstr
*MI
) const {
405 unsigned Src
, Dst
, SrcSub
, DstSub
;
406 if (!isMoveInstr(TRI
, MI
, Src
, Dst
, SrcSub
, DstSub
))
409 // Find the virtual register that is SrcReg.
412 std::swap(SrcSub
, DstSub
);
413 } else if (Src
!= SrcReg
) {
417 // Now check that Dst matches DstReg.
418 if (TargetRegisterInfo::isPhysicalRegister(DstReg
)) {
419 if (!TargetRegisterInfo::isPhysicalRegister(Dst
))
421 assert(!DstIdx
&& !SrcIdx
&& "Inconsistent CoalescerPair state.");
422 // DstSub could be set for a physreg from INSERT_SUBREG.
424 Dst
= TRI
.getSubReg(Dst
, DstSub
);
427 return DstReg
== Dst
;
428 // This is a partial register copy. Check that the parts match.
429 return TRI
.getSubReg(DstReg
, SrcSub
) == Dst
;
431 // DstReg is virtual.
434 // Registers match, do the subregisters line up?
435 return TRI
.composeSubRegIndices(SrcIdx
, SrcSub
) ==
436 TRI
.composeSubRegIndices(DstIdx
, DstSub
);
440 void RegisterCoalescer::getAnalysisUsage(AnalysisUsage
&AU
) const {
441 AU
.setPreservesCFG();
442 AU
.addRequired
<AAResultsWrapperPass
>();
443 AU
.addRequired
<LiveIntervals
>();
444 AU
.addPreserved
<LiveIntervals
>();
445 AU
.addPreserved
<SlotIndexes
>();
446 AU
.addRequired
<MachineLoopInfo
>();
447 AU
.addPreserved
<MachineLoopInfo
>();
448 AU
.addPreservedID(MachineDominatorsID
);
449 MachineFunctionPass::getAnalysisUsage(AU
);
452 void RegisterCoalescer::eliminateDeadDefs() {
453 SmallVector
<unsigned, 8> NewRegs
;
454 LiveRangeEdit(nullptr, NewRegs
, *MF
, *LIS
,
455 nullptr, this).eliminateDeadDefs(DeadDefs
);
458 void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr
*MI
) {
459 // MI may be in WorkList. Make sure we don't visit it.
460 ErasedInstrs
.insert(MI
);
463 bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair
&CP
,
464 MachineInstr
*CopyMI
) {
465 assert(!CP
.isPartial() && "This doesn't work for partial copies.");
466 assert(!CP
.isPhys() && "This doesn't work for physreg copies.");
469 LIS
->getInterval(CP
.isFlipped() ? CP
.getDstReg() : CP
.getSrcReg());
471 LIS
->getInterval(CP
.isFlipped() ? CP
.getSrcReg() : CP
.getDstReg());
472 SlotIndex CopyIdx
= LIS
->getInstructionIndex(CopyMI
).getRegSlot();
474 // We have a non-trivially-coalescable copy with IntA being the source and
475 // IntB being the dest, thus this defines a value number in IntB. If the
476 // source value number (in IntA) is defined by a copy from B, see if we can
477 // merge these two pieces of B into a single value number, eliminating a copy.
482 // B1 = A3 <- this copy
484 // In this case, B0 can be extended to where the B1 copy lives, allowing the
485 // B1 value number to be replaced with B0 (which simplifies the B
488 // BValNo is a value number in B that is defined by a copy from A. 'B1' in
489 // the example above.
490 LiveInterval::iterator BS
= IntB
.FindSegmentContaining(CopyIdx
);
491 if (BS
== IntB
.end()) return false;
492 VNInfo
*BValNo
= BS
->valno
;
494 // Get the location that B is defined at. Two options: either this value has
495 // an unknown definition point or it is defined at CopyIdx. If unknown, we
497 if (BValNo
->def
!= CopyIdx
) return false;
499 // AValNo is the value number in A that defines the copy, A3 in the example.
500 SlotIndex CopyUseIdx
= CopyIdx
.getRegSlot(true);
501 LiveInterval::iterator AS
= IntA
.FindSegmentContaining(CopyUseIdx
);
502 // The live segment might not exist after fun with physreg coalescing.
503 if (AS
== IntA
.end()) return false;
504 VNInfo
*AValNo
= AS
->valno
;
506 // If AValNo is defined as a copy from IntB, we can potentially process this.
507 // Get the instruction that defines this value number.
508 MachineInstr
*ACopyMI
= LIS
->getInstructionFromIndex(AValNo
->def
);
509 // Don't allow any partial copies, even if isCoalescable() allows them.
510 if (!CP
.isCoalescable(ACopyMI
) || !ACopyMI
->isFullCopy())
513 // Get the Segment in IntB that this value number starts with.
514 LiveInterval::iterator ValS
=
515 IntB
.FindSegmentContaining(AValNo
->def
.getPrevSlot());
516 if (ValS
== IntB
.end())
519 // Make sure that the end of the live segment is inside the same block as
521 MachineInstr
*ValSEndInst
=
522 LIS
->getInstructionFromIndex(ValS
->end
.getPrevSlot());
523 if (!ValSEndInst
|| ValSEndInst
->getParent() != CopyMI
->getParent())
526 // Okay, we now know that ValS ends in the same block that the CopyMI
527 // live-range starts. If there are no intervening live segments between them
528 // in IntB, we can merge them.
529 if (ValS
+1 != BS
) return false;
531 DEBUG(dbgs() << "Extending: " << PrintReg(IntB
.reg
, TRI
));
533 SlotIndex FillerStart
= ValS
->end
, FillerEnd
= BS
->start
;
534 // We are about to delete CopyMI, so need to remove it as the 'instruction
535 // that defines this value #'. Update the valnum with the new defining
537 BValNo
->def
= FillerStart
;
539 // Okay, we can merge them. We need to insert a new liverange:
540 // [ValS.end, BS.begin) of either value number, then we merge the
541 // two value numbers.
542 IntB
.addSegment(LiveInterval::Segment(FillerStart
, FillerEnd
, BValNo
));
544 // Okay, merge "B1" into the same value number as "B0".
545 if (BValNo
!= ValS
->valno
)
546 IntB
.MergeValueNumberInto(BValNo
, ValS
->valno
);
548 // Do the same for the subregister segments.
549 for (LiveInterval::SubRange
&S
: IntB
.subranges()) {
550 VNInfo
*SubBValNo
= S
.getVNInfoAt(CopyIdx
);
551 S
.addSegment(LiveInterval::Segment(FillerStart
, FillerEnd
, SubBValNo
));
552 VNInfo
*SubValSNo
= S
.getVNInfoAt(AValNo
->def
.getPrevSlot());
553 if (SubBValNo
!= SubValSNo
)
554 S
.MergeValueNumberInto(SubBValNo
, SubValSNo
);
557 DEBUG(dbgs() << " result = " << IntB
<< '\n');
559 // If the source instruction was killing the source register before the
560 // merge, unset the isKill marker given the live range has been extended.
561 int UIdx
= ValSEndInst
->findRegisterUseOperandIdx(IntB
.reg
, true);
563 ValSEndInst
->getOperand(UIdx
).setIsKill(false);
566 // Rewrite the copy. If the copy instruction was killing the destination
567 // register before the merge, find the last use and trim the live range. That
568 // will also add the isKill marker.
569 CopyMI
->substituteRegister(IntA
.reg
, IntB
.reg
, 0, *TRI
);
570 if (AS
->end
== CopyIdx
)
577 bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval
&IntA
,
581 // If AValNo has PHI kills, conservatively assume that IntB defs can reach
583 if (LIS
->hasPHIKill(IntA
, AValNo
))
586 for (LiveRange::Segment
&ASeg
: IntA
.segments
) {
587 if (ASeg
.valno
!= AValNo
) continue;
588 LiveInterval::iterator BI
=
589 std::upper_bound(IntB
.begin(), IntB
.end(), ASeg
.start
);
590 if (BI
!= IntB
.begin())
592 for (; BI
!= IntB
.end() && ASeg
.end
>= BI
->start
; ++BI
) {
593 if (BI
->valno
== BValNo
)
595 if (BI
->start
<= ASeg
.start
&& BI
->end
> ASeg
.start
)
597 if (BI
->start
> ASeg
.start
&& BI
->start
< ASeg
.end
)
604 /// Copy segements with value number @p SrcValNo from liverange @p Src to live
605 /// range @Dst and use value number @p DstValNo there.
606 static void addSegmentsWithValNo(LiveRange
&Dst
, VNInfo
*DstValNo
,
607 const LiveRange
&Src
, const VNInfo
*SrcValNo
)
609 for (const LiveRange::Segment
&S
: Src
.segments
) {
610 if (S
.valno
!= SrcValNo
)
612 Dst
.addSegment(LiveRange::Segment(S
.start
, S
.end
, DstValNo
));
616 bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair
&CP
,
617 MachineInstr
*CopyMI
) {
618 assert(!CP
.isPhys());
621 LIS
->getInterval(CP
.isFlipped() ? CP
.getDstReg() : CP
.getSrcReg());
623 LIS
->getInterval(CP
.isFlipped() ? CP
.getSrcReg() : CP
.getDstReg());
625 // We found a non-trivially-coalescable copy with IntA being the source and
626 // IntB being the dest, thus this defines a value number in IntB. If the
627 // source value number (in IntA) is defined by a commutable instruction and
628 // its other operand is coalesced to the copy dest register, see if we can
629 // transform the copy into a noop by commuting the definition. For example,
631 // A3 = op A2 B0<kill>
633 // B1 = A3 <- this copy
635 // = op A3 <- more uses
639 // B2 = op B0 A2<kill>
641 // B1 = B2 <- now an identity copy
643 // = op B2 <- more uses
645 // BValNo is a value number in B that is defined by a copy from A. 'B1' in
646 // the example above.
647 SlotIndex CopyIdx
= LIS
->getInstructionIndex(CopyMI
).getRegSlot();
648 VNInfo
*BValNo
= IntB
.getVNInfoAt(CopyIdx
);
649 assert(BValNo
!= nullptr && BValNo
->def
== CopyIdx
);
651 // AValNo is the value number in A that defines the copy, A3 in the example.
652 VNInfo
*AValNo
= IntA
.getVNInfoAt(CopyIdx
.getRegSlot(true));
653 assert(AValNo
&& !AValNo
->isUnused() && "COPY source not live");
654 if (AValNo
->isPHIDef())
656 MachineInstr
*DefMI
= LIS
->getInstructionFromIndex(AValNo
->def
);
659 if (!DefMI
->isCommutable())
661 // If DefMI is a two-address instruction then commuting it will change the
662 // destination register.
663 int DefIdx
= DefMI
->findRegisterDefOperandIdx(IntA
.reg
);
664 assert(DefIdx
!= -1);
666 if (!DefMI
->isRegTiedToUseOperand(DefIdx
, &UseOpIdx
))
669 // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
670 // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
671 // passed to the method. That _other_ operand is chosen by
672 // the findCommutedOpIndices() method.
674 // That is obviously an area for improvement in case of instructions having
675 // more than 2 operands. For example, if some instruction has 3 commutable
676 // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
677 // op#2<->op#3) of commute transformation should be considered/tried here.
678 unsigned NewDstIdx
= TargetInstrInfo::CommuteAnyOperandIndex
;
679 if (!TII
->findCommutedOpIndices(DefMI
, UseOpIdx
, NewDstIdx
))
682 MachineOperand
&NewDstMO
= DefMI
->getOperand(NewDstIdx
);
683 unsigned NewReg
= NewDstMO
.getReg();
684 if (NewReg
!= IntB
.reg
|| !IntB
.Query(AValNo
->def
).isKill())
687 // Make sure there are no other definitions of IntB that would reach the
688 // uses which the new definition can reach.
689 if (hasOtherReachingDefs(IntA
, IntB
, AValNo
, BValNo
))
692 // If some of the uses of IntA.reg is already coalesced away, return false.
693 // It's not possible to determine whether it's safe to perform the coalescing.
694 for (MachineOperand
&MO
: MRI
->use_nodbg_operands(IntA
.reg
)) {
695 MachineInstr
*UseMI
= MO
.getParent();
696 unsigned OpNo
= &MO
- &UseMI
->getOperand(0);
697 SlotIndex UseIdx
= LIS
->getInstructionIndex(UseMI
);
698 LiveInterval::iterator US
= IntA
.FindSegmentContaining(UseIdx
);
699 if (US
== IntA
.end() || US
->valno
!= AValNo
)
701 // If this use is tied to a def, we can't rewrite the register.
702 if (UseMI
->isRegTiedToDefOperand(OpNo
))
706 DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo
->def
<< '\t'
709 // At this point we have decided that it is legal to do this
710 // transformation. Start by commuting the instruction.
711 MachineBasicBlock
*MBB
= DefMI
->getParent();
712 MachineInstr
*NewMI
=
713 TII
->commuteInstruction(DefMI
, false, UseOpIdx
, NewDstIdx
);
716 if (TargetRegisterInfo::isVirtualRegister(IntA
.reg
) &&
717 TargetRegisterInfo::isVirtualRegister(IntB
.reg
) &&
718 !MRI
->constrainRegClass(IntB
.reg
, MRI
->getRegClass(IntA
.reg
)))
720 if (NewMI
!= DefMI
) {
721 LIS
->ReplaceMachineInstrInMaps(DefMI
, NewMI
);
722 MachineBasicBlock::iterator Pos
= DefMI
;
723 MBB
->insert(Pos
, NewMI
);
727 // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
736 // Update uses of IntA of the specific Val# with IntB.
737 for (MachineRegisterInfo::use_iterator UI
= MRI
->use_begin(IntA
.reg
),
739 UI
!= UE
; /* ++UI is below because of possible MI removal */) {
740 MachineOperand
&UseMO
= *UI
;
744 MachineInstr
*UseMI
= UseMO
.getParent();
745 if (UseMI
->isDebugValue()) {
746 // FIXME These don't have an instruction index. Not clear we have enough
747 // info to decide whether to do this replacement or not. For now do it.
748 UseMO
.setReg(NewReg
);
751 SlotIndex UseIdx
= LIS
->getInstructionIndex(UseMI
).getRegSlot(true);
752 LiveInterval::iterator US
= IntA
.FindSegmentContaining(UseIdx
);
753 assert(US
!= IntA
.end() && "Use must be live");
754 if (US
->valno
!= AValNo
)
756 // Kill flags are no longer accurate. They are recomputed after RA.
757 UseMO
.setIsKill(false);
758 if (TargetRegisterInfo::isPhysicalRegister(NewReg
))
759 UseMO
.substPhysReg(NewReg
, *TRI
);
761 UseMO
.setReg(NewReg
);
764 if (!UseMI
->isCopy())
766 if (UseMI
->getOperand(0).getReg() != IntB
.reg
||
767 UseMI
->getOperand(0).getSubReg())
770 // This copy will become a noop. If it's defining a new val#, merge it into
772 SlotIndex DefIdx
= UseIdx
.getRegSlot();
773 VNInfo
*DVNI
= IntB
.getVNInfoAt(DefIdx
);
776 DEBUG(dbgs() << "\t\tnoop: " << DefIdx
<< '\t' << *UseMI
);
777 assert(DVNI
->def
== DefIdx
);
778 BValNo
= IntB
.MergeValueNumberInto(DVNI
, BValNo
);
779 for (LiveInterval::SubRange
&S
: IntB
.subranges()) {
780 VNInfo
*SubDVNI
= S
.getVNInfoAt(DefIdx
);
783 VNInfo
*SubBValNo
= S
.getVNInfoAt(CopyIdx
);
784 assert(SubBValNo
->def
== CopyIdx
);
785 S
.MergeValueNumberInto(SubDVNI
, SubBValNo
);
788 ErasedInstrs
.insert(UseMI
);
789 LIS
->RemoveMachineInstrFromMaps(UseMI
);
790 UseMI
->eraseFromParent();
793 // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
795 BumpPtrAllocator
&Allocator
= LIS
->getVNInfoAllocator();
796 if (IntB
.hasSubRanges()) {
797 if (!IntA
.hasSubRanges()) {
798 LaneBitmask Mask
= MRI
->getMaxLaneMaskForVReg(IntA
.reg
);
799 IntA
.createSubRangeFrom(Allocator
, Mask
, IntA
);
801 SlotIndex AIdx
= CopyIdx
.getRegSlot(true);
802 for (LiveInterval::SubRange
&SA
: IntA
.subranges()) {
803 VNInfo
*ASubValNo
= SA
.getVNInfoAt(AIdx
);
804 assert(ASubValNo
!= nullptr);
806 LaneBitmask AMask
= SA
.LaneMask
;
807 for (LiveInterval::SubRange
&SB
: IntB
.subranges()) {
808 LaneBitmask BMask
= SB
.LaneMask
;
809 LaneBitmask Common
= BMask
& AMask
;
813 DEBUG( dbgs() << "\t\tCopy_Merge " << PrintLaneMask(BMask
)
814 << " into " << PrintLaneMask(Common
) << '\n');
815 LaneBitmask BRest
= BMask
& ~AMask
;
816 LiveInterval::SubRange
*CommonRange
;
819 DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(BRest
)
821 // Duplicate SubRange for newly merged common stuff.
822 CommonRange
= IntB
.createSubRangeFrom(Allocator
, Common
, SB
);
824 // We van reuse the L SubRange.
825 SB
.LaneMask
= Common
;
828 LiveRange
RangeCopy(SB
, Allocator
);
830 VNInfo
*BSubValNo
= CommonRange
->getVNInfoAt(CopyIdx
);
831 assert(BSubValNo
->def
== CopyIdx
);
832 BSubValNo
->def
= ASubValNo
->def
;
833 addSegmentsWithValNo(*CommonRange
, BSubValNo
, SA
, ASubValNo
);
837 DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(AMask
) << '\n');
838 LiveRange
*NewRange
= IntB
.createSubRange(Allocator
, AMask
);
839 VNInfo
*BSubValNo
= NewRange
->getNextValue(CopyIdx
, Allocator
);
840 addSegmentsWithValNo(*NewRange
, BSubValNo
, SA
, ASubValNo
);
845 BValNo
->def
= AValNo
->def
;
846 addSegmentsWithValNo(IntB
, BValNo
, IntA
, AValNo
);
847 DEBUG(dbgs() << "\t\textended: " << IntB
<< '\n');
849 LIS
->removeVRegDefAt(IntA
, AValNo
->def
);
851 DEBUG(dbgs() << "\t\ttrimmed: " << IntA
<< '\n');
856 /// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
857 /// defining a subregister.
858 static bool definesFullReg(const MachineInstr
&MI
, unsigned Reg
) {
859 assert(!TargetRegisterInfo::isPhysicalRegister(Reg
) &&
860 "This code cannot handle physreg aliasing");
861 for (const MachineOperand
&Op
: MI
.operands()) {
862 if (!Op
.isReg() || !Op
.isDef() || Op
.getReg() != Reg
)
864 // Return true if we define the full register or don't care about the value
865 // inside other subregisters.
866 if (Op
.getSubReg() == 0 || Op
.isUndef())
872 bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair
&CP
,
873 MachineInstr
*CopyMI
,
876 unsigned SrcReg
= CP
.isFlipped() ? CP
.getDstReg() : CP
.getSrcReg();
877 unsigned SrcIdx
= CP
.isFlipped() ? CP
.getDstIdx() : CP
.getSrcIdx();
878 unsigned DstReg
= CP
.isFlipped() ? CP
.getSrcReg() : CP
.getDstReg();
879 unsigned DstIdx
= CP
.isFlipped() ? CP
.getSrcIdx() : CP
.getDstIdx();
880 if (TargetRegisterInfo::isPhysicalRegister(SrcReg
))
883 LiveInterval
&SrcInt
= LIS
->getInterval(SrcReg
);
884 SlotIndex CopyIdx
= LIS
->getInstructionIndex(CopyMI
);
885 VNInfo
*ValNo
= SrcInt
.Query(CopyIdx
).valueIn();
886 assert(ValNo
&& "CopyMI input register not live");
887 if (ValNo
->isPHIDef() || ValNo
->isUnused())
889 MachineInstr
*DefMI
= LIS
->getInstructionFromIndex(ValNo
->def
);
892 if (DefMI
->isCopyLike()) {
896 if (!TII
->isAsCheapAsAMove(DefMI
))
898 if (!TII
->isTriviallyReMaterializable(DefMI
, AA
))
900 if (!definesFullReg(*DefMI
, SrcReg
))
902 bool SawStore
= false;
903 if (!DefMI
->isSafeToMove(AA
, SawStore
))
905 const MCInstrDesc
&MCID
= DefMI
->getDesc();
906 if (MCID
.getNumDefs() != 1)
908 // Only support subregister destinations when the def is read-undef.
909 MachineOperand
&DstOperand
= CopyMI
->getOperand(0);
910 unsigned CopyDstReg
= DstOperand
.getReg();
911 if (DstOperand
.getSubReg() && !DstOperand
.isUndef())
914 // If both SrcIdx and DstIdx are set, correct rematerialization would widen
915 // the register substantially (beyond both source and dest size). This is bad
916 // for performance since it can cascade through a function, introducing many
917 // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers
918 // around after a few subreg copies).
919 if (SrcIdx
&& DstIdx
)
922 const TargetRegisterClass
*DefRC
= TII
->getRegClass(MCID
, 0, TRI
, *MF
);
923 if (!DefMI
->isImplicitDef()) {
924 if (TargetRegisterInfo::isPhysicalRegister(DstReg
)) {
925 unsigned NewDstReg
= DstReg
;
927 unsigned NewDstIdx
= TRI
->composeSubRegIndices(CP
.getSrcIdx(),
928 DefMI
->getOperand(0).getSubReg());
930 NewDstReg
= TRI
->getSubReg(DstReg
, NewDstIdx
);
932 // Finally, make sure that the physical subregister that will be
933 // constructed later is permitted for the instruction.
934 if (!DefRC
->contains(NewDstReg
))
937 // Theoretically, some stack frame reference could exist. Just make sure
938 // it hasn't actually happened.
939 assert(TargetRegisterInfo::isVirtualRegister(DstReg
) &&
940 "Only expect to deal with virtual or physical registers");
944 MachineBasicBlock
*MBB
= CopyMI
->getParent();
945 MachineBasicBlock::iterator MII
=
946 std::next(MachineBasicBlock::iterator(CopyMI
));
947 TII
->reMaterialize(*MBB
, MII
, DstReg
, SrcIdx
, DefMI
, *TRI
);
948 MachineInstr
*NewMI
= std::prev(MII
);
950 // In a situation like the following:
951 // %vreg0:subreg = instr ; DefMI, subreg = DstIdx
952 // %vreg1 = copy %vreg0:subreg ; CopyMI, SrcIdx = 0
953 // instead of widening %vreg1 to the register class of %vreg0 simply do:
955 const TargetRegisterClass
*NewRC
= CP
.getNewRC();
957 MachineOperand
&DefMO
= NewMI
->getOperand(0);
958 if (DefMO
.getSubReg() == DstIdx
) {
959 assert(SrcIdx
== 0 && CP
.isFlipped()
960 && "Shouldn't have SrcIdx+DstIdx at this point");
961 const TargetRegisterClass
*DstRC
= MRI
->getRegClass(DstReg
);
962 const TargetRegisterClass
*CommonRC
=
963 TRI
->getCommonSubClass(DefRC
, DstRC
);
964 if (CommonRC
!= nullptr) {
972 LIS
->ReplaceMachineInstrInMaps(CopyMI
, NewMI
);
973 CopyMI
->eraseFromParent();
974 ErasedInstrs
.insert(CopyMI
);
976 // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
977 // We need to remember these so we can add intervals once we insert
978 // NewMI into SlotIndexes.
979 SmallVector
<unsigned, 4> NewMIImplDefs
;
980 for (unsigned i
= NewMI
->getDesc().getNumOperands(),
981 e
= NewMI
->getNumOperands(); i
!= e
; ++i
) {
982 MachineOperand
&MO
= NewMI
->getOperand(i
);
983 if (MO
.isReg() && MO
.isDef()) {
984 assert(MO
.isImplicit() && MO
.isDead() &&
985 TargetRegisterInfo::isPhysicalRegister(MO
.getReg()));
986 NewMIImplDefs
.push_back(MO
.getReg());
990 if (TargetRegisterInfo::isVirtualRegister(DstReg
)) {
991 unsigned NewIdx
= NewMI
->getOperand(0).getSubReg();
993 if (DefRC
!= nullptr) {
995 NewRC
= TRI
->getMatchingSuperRegClass(NewRC
, DefRC
, NewIdx
);
997 NewRC
= TRI
->getCommonSubClass(NewRC
, DefRC
);
998 assert(NewRC
&& "subreg chosen for remat incompatible with instruction");
1000 MRI
->setRegClass(DstReg
, NewRC
);
1002 updateRegDefsUses(DstReg
, DstReg
, DstIdx
);
1003 NewMI
->getOperand(0).setSubReg(NewIdx
);
1004 } else if (NewMI
->getOperand(0).getReg() != CopyDstReg
) {
1005 // The New instruction may be defining a sub-register of what's actually
1006 // been asked for. If so it must implicitly define the whole thing.
1007 assert(TargetRegisterInfo::isPhysicalRegister(DstReg
) &&
1008 "Only expect virtual or physical registers in remat");
1009 NewMI
->getOperand(0).setIsDead(true);
1010 NewMI
->addOperand(MachineOperand::CreateReg(CopyDstReg
,
1014 // Record small dead def live-ranges for all the subregisters
1015 // of the destination register.
1016 // Otherwise, variables that live through may miss some
1017 // interferences, thus creating invalid allocation.
1019 // vreg1 = somedef ; vreg1 GR8
1020 // vreg2 = remat ; vreg2 GR32
1021 // CL = COPY vreg2.sub_8bit
1022 // = somedef vreg1 ; vreg1 GR8
1024 // vreg1 = somedef ; vreg1 GR8
1025 // ECX<def, dead> = remat ; CL<imp-def>
1026 // = somedef vreg1 ; vreg1 GR8
1027 // vreg1 will see the inteferences with CL but not with CH since
1028 // no live-ranges would have been created for ECX.
1030 SlotIndex NewMIIdx
= LIS
->getInstructionIndex(NewMI
);
1031 for (MCRegUnitIterator
Units(NewMI
->getOperand(0).getReg(), TRI
);
1032 Units
.isValid(); ++Units
)
1033 if (LiveRange
*LR
= LIS
->getCachedRegUnit(*Units
))
1034 LR
->createDeadDef(NewMIIdx
.getRegSlot(), LIS
->getVNInfoAllocator());
1037 if (NewMI
->getOperand(0).getSubReg())
1038 NewMI
->getOperand(0).setIsUndef();
1040 // CopyMI may have implicit operands, transfer them over to the newly
1041 // rematerialized instruction. And update implicit def interval valnos.
1042 for (unsigned i
= CopyMI
->getDesc().getNumOperands(),
1043 e
= CopyMI
->getNumOperands(); i
!= e
; ++i
) {
1044 MachineOperand
&MO
= CopyMI
->getOperand(i
);
1046 assert(MO
.isImplicit() && "No explicit operands after implict operands.");
1047 // Discard VReg implicit defs.
1048 if (TargetRegisterInfo::isPhysicalRegister(MO
.getReg())) {
1049 NewMI
->addOperand(MO
);
1054 SlotIndex NewMIIdx
= LIS
->getInstructionIndex(NewMI
);
1055 for (unsigned i
= 0, e
= NewMIImplDefs
.size(); i
!= e
; ++i
) {
1056 unsigned Reg
= NewMIImplDefs
[i
];
1057 for (MCRegUnitIterator
Units(Reg
, TRI
); Units
.isValid(); ++Units
)
1058 if (LiveRange
*LR
= LIS
->getCachedRegUnit(*Units
))
1059 LR
->createDeadDef(NewMIIdx
.getRegSlot(), LIS
->getVNInfoAllocator());
1062 DEBUG(dbgs() << "Remat: " << *NewMI
);
1065 // The source interval can become smaller because we removed a use.
1066 shrinkToUses(&SrcInt
, &DeadDefs
);
1067 if (!DeadDefs
.empty()) {
1068 // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
1069 // to describe DstReg instead.
1070 for (MachineOperand
&UseMO
: MRI
->use_operands(SrcReg
)) {
1071 MachineInstr
*UseMI
= UseMO
.getParent();
1072 if (UseMI
->isDebugValue()) {
1073 UseMO
.setReg(DstReg
);
1074 DEBUG(dbgs() << "\t\tupdated: " << *UseMI
);
1077 eliminateDeadDefs();
1083 bool RegisterCoalescer::eliminateUndefCopy(MachineInstr
*CopyMI
) {
1084 // ProcessImpicitDefs may leave some copies of <undef> values, it only removes
1085 // local variables. When we have a copy like:
1087 // %vreg1 = COPY %vreg2<undef>
1089 // We delete the copy and remove the corresponding value number from %vreg1.
1090 // Any uses of that value number are marked as <undef>.
1092 // Note that we do not query CoalescerPair here but redo isMoveInstr as the
1093 // CoalescerPair may have a new register class with adjusted subreg indices
1095 unsigned SrcReg
, DstReg
, SrcSubIdx
, DstSubIdx
;
1096 isMoveInstr(*TRI
, CopyMI
, SrcReg
, DstReg
, SrcSubIdx
, DstSubIdx
);
1098 SlotIndex Idx
= LIS
->getInstructionIndex(CopyMI
);
1099 const LiveInterval
&SrcLI
= LIS
->getInterval(SrcReg
);
1100 // CopyMI is undef iff SrcReg is not live before the instruction.
1101 if (SrcSubIdx
!= 0 && SrcLI
.hasSubRanges()) {
1102 LaneBitmask SrcMask
= TRI
->getSubRegIndexLaneMask(SrcSubIdx
);
1103 for (const LiveInterval::SubRange
&SR
: SrcLI
.subranges()) {
1104 if ((SR
.LaneMask
& SrcMask
) == 0)
1109 } else if (SrcLI
.liveAt(Idx
))
1112 DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
1114 // Remove any DstReg segments starting at the instruction.
1115 LiveInterval
&DstLI
= LIS
->getInterval(DstReg
);
1116 SlotIndex RegIndex
= Idx
.getRegSlot();
1117 // Remove value or merge with previous one in case of a subregister def.
1118 if (VNInfo
*PrevVNI
= DstLI
.getVNInfoAt(Idx
)) {
1119 VNInfo
*VNI
= DstLI
.getVNInfoAt(RegIndex
);
1120 DstLI
.MergeValueNumberInto(VNI
, PrevVNI
);
1122 // The affected subregister segments can be removed.
1123 LaneBitmask DstMask
= TRI
->getSubRegIndexLaneMask(DstSubIdx
);
1124 for (LiveInterval::SubRange
&SR
: DstLI
.subranges()) {
1125 if ((SR
.LaneMask
& DstMask
) == 0)
1128 VNInfo
*SVNI
= SR
.getVNInfoAt(RegIndex
);
1129 assert(SVNI
!= nullptr && SlotIndex::isSameInstr(SVNI
->def
, RegIndex
));
1130 SR
.removeValNo(SVNI
);
1132 DstLI
.removeEmptySubRanges();
1134 LIS
->removeVRegDefAt(DstLI
, RegIndex
);
1136 // Mark uses as undef.
1137 for (MachineOperand
&MO
: MRI
->reg_nodbg_operands(DstReg
)) {
1138 if (MO
.isDef() /*|| MO.isUndef()*/)
1140 const MachineInstr
&MI
= *MO
.getParent();
1141 SlotIndex UseIdx
= LIS
->getInstructionIndex(&MI
);
1142 LaneBitmask UseMask
= TRI
->getSubRegIndexLaneMask(MO
.getSubReg());
1144 if (UseMask
!= ~0u && DstLI
.hasSubRanges()) {
1146 for (const LiveInterval::SubRange
&SR
: DstLI
.subranges()) {
1147 if ((SR
.LaneMask
& UseMask
) == 0)
1149 if (SR
.liveAt(UseIdx
)) {
1155 isLive
= DstLI
.liveAt(UseIdx
);
1158 MO
.setIsUndef(true);
1159 DEBUG(dbgs() << "\tnew undef: " << UseIdx
<< '\t' << MI
);
1164 void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg
,
1167 bool DstIsPhys
= TargetRegisterInfo::isPhysicalRegister(DstReg
);
1168 LiveInterval
*DstInt
= DstIsPhys
? nullptr : &LIS
->getInterval(DstReg
);
1170 SmallPtrSet
<MachineInstr
*, 8> Visited
;
1171 for (MachineRegisterInfo::reg_instr_iterator
1172 I
= MRI
->reg_instr_begin(SrcReg
), E
= MRI
->reg_instr_end();
1174 MachineInstr
*UseMI
= &*(I
++);
1176 // Each instruction can only be rewritten once because sub-register
1177 // composition is not always idempotent. When SrcReg != DstReg, rewriting
1178 // the UseMI operands removes them from the SrcReg use-def chain, but when
1179 // SrcReg is DstReg we could encounter UseMI twice if it has multiple
1180 // operands mentioning the virtual register.
1181 if (SrcReg
== DstReg
&& !Visited
.insert(UseMI
).second
)
1184 SmallVector
<unsigned,8> Ops
;
1186 std::tie(Reads
, Writes
) = UseMI
->readsWritesVirtualRegister(SrcReg
, &Ops
);
1188 // If SrcReg wasn't read, it may still be the case that DstReg is live-in
1189 // because SrcReg is a sub-register.
1190 if (DstInt
&& !Reads
&& SubIdx
)
1191 Reads
= DstInt
->liveAt(LIS
->getInstructionIndex(UseMI
));
1193 // Replace SrcReg with DstReg in all UseMI operands.
1194 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
) {
1195 MachineOperand
&MO
= UseMI
->getOperand(Ops
[i
]);
1197 // Adjust <undef> flags in case of sub-register joins. We don't want to
1198 // turn a full def into a read-modify-write sub-register def and vice
1200 if (SubIdx
&& MO
.isDef())
1201 MO
.setIsUndef(!Reads
);
1203 // A subreg use of a partially undef (super) register may be a complete
1204 // undef use now and then has to be marked that way.
1205 if (SubIdx
!= 0 && MO
.isUse() && MRI
->shouldTrackSubRegLiveness(DstReg
)) {
1206 if (!DstInt
->hasSubRanges()) {
1207 BumpPtrAllocator
&Allocator
= LIS
->getVNInfoAllocator();
1208 LaneBitmask Mask
= MRI
->getMaxLaneMaskForVReg(DstInt
->reg
);
1209 DstInt
->createSubRangeFrom(Allocator
, Mask
, *DstInt
);
1211 LaneBitmask Mask
= TRI
->getSubRegIndexLaneMask(SubIdx
);
1212 bool IsUndef
= true;
1213 SlotIndex MIIdx
= UseMI
->isDebugValue()
1214 ? LIS
->getSlotIndexes()->getIndexBefore(UseMI
)
1215 : LIS
->getInstructionIndex(UseMI
);
1216 SlotIndex UseIdx
= MIIdx
.getRegSlot(true);
1217 for (LiveInterval::SubRange
&S
: DstInt
->subranges()) {
1218 if ((S
.LaneMask
& Mask
) == 0)
1220 if (S
.liveAt(UseIdx
)) {
1226 MO
.setIsUndef(true);
1227 // We found out some subregister use is actually reading an undefined
1228 // value. In some cases the whole vreg has become undefined at this
1229 // point so we have to potentially shrink the main range if the
1230 // use was ending a live segment there.
1231 LiveQueryResult Q
= DstInt
->Query(MIIdx
);
1232 if (Q
.valueOut() == nullptr)
1233 ShrinkMainRange
= true;
1238 MO
.substPhysReg(DstReg
, *TRI
);
1240 MO
.substVirtReg(DstReg
, SubIdx
, *TRI
);
1244 dbgs() << "\t\tupdated: ";
1245 if (!UseMI
->isDebugValue())
1246 dbgs() << LIS
->getInstructionIndex(UseMI
) << "\t";
1252 bool RegisterCoalescer::canJoinPhys(const CoalescerPair
&CP
) {
1253 // Always join simple intervals that are defined by a single copy from a
1254 // reserved register. This doesn't increase register pressure, so it is
1255 // always beneficial.
1256 if (!MRI
->isReserved(CP
.getDstReg())) {
1257 DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
1261 LiveInterval
&JoinVInt
= LIS
->getInterval(CP
.getSrcReg());
1262 if (JoinVInt
.containsOneValue())
1265 DEBUG(dbgs() << "\tCannot join complex intervals into reserved register.\n");
1269 bool RegisterCoalescer::joinCopy(MachineInstr
*CopyMI
, bool &Again
) {
1272 DEBUG(dbgs() << LIS
->getInstructionIndex(CopyMI
) << '\t' << *CopyMI
);
1274 CoalescerPair
CP(*TRI
);
1275 if (!CP
.setRegisters(CopyMI
)) {
1276 DEBUG(dbgs() << "\tNot coalescable.\n");
1280 if (CP
.getNewRC()) {
1281 auto SrcRC
= MRI
->getRegClass(CP
.getSrcReg());
1282 auto DstRC
= MRI
->getRegClass(CP
.getDstReg());
1283 unsigned SrcIdx
= CP
.getSrcIdx();
1284 unsigned DstIdx
= CP
.getDstIdx();
1285 if (CP
.isFlipped()) {
1286 std::swap(SrcIdx
, DstIdx
);
1287 std::swap(SrcRC
, DstRC
);
1289 if (!TRI
->shouldCoalesce(CopyMI
, SrcRC
, SrcIdx
, DstRC
, DstIdx
,
1291 DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
1296 // Dead code elimination. This really should be handled by MachineDCE, but
1297 // sometimes dead copies slip through, and we can't generate invalid live
1299 if (!CP
.isPhys() && CopyMI
->allDefsAreDead()) {
1300 DEBUG(dbgs() << "\tCopy is dead.\n");
1301 DeadDefs
.push_back(CopyMI
);
1302 eliminateDeadDefs();
1306 // Eliminate undefs.
1307 if (!CP
.isPhys() && eliminateUndefCopy(CopyMI
)) {
1308 LIS
->RemoveMachineInstrFromMaps(CopyMI
);
1309 CopyMI
->eraseFromParent();
1310 return false; // Not coalescable.
1313 // Coalesced copies are normally removed immediately, but transformations
1314 // like removeCopyByCommutingDef() can inadvertently create identity copies.
1315 // When that happens, just join the values and remove the copy.
1316 if (CP
.getSrcReg() == CP
.getDstReg()) {
1317 LiveInterval
&LI
= LIS
->getInterval(CP
.getSrcReg());
1318 DEBUG(dbgs() << "\tCopy already coalesced: " << LI
<< '\n');
1319 const SlotIndex CopyIdx
= LIS
->getInstructionIndex(CopyMI
);
1320 LiveQueryResult LRQ
= LI
.Query(CopyIdx
);
1321 if (VNInfo
*DefVNI
= LRQ
.valueDefined()) {
1322 VNInfo
*ReadVNI
= LRQ
.valueIn();
1323 assert(ReadVNI
&& "No value before copy and no <undef> flag.");
1324 assert(ReadVNI
!= DefVNI
&& "Cannot read and define the same value.");
1325 LI
.MergeValueNumberInto(DefVNI
, ReadVNI
);
1327 // Process subregister liveranges.
1328 for (LiveInterval::SubRange
&S
: LI
.subranges()) {
1329 LiveQueryResult SLRQ
= S
.Query(CopyIdx
);
1330 if (VNInfo
*SDefVNI
= SLRQ
.valueDefined()) {
1331 VNInfo
*SReadVNI
= SLRQ
.valueIn();
1332 S
.MergeValueNumberInto(SDefVNI
, SReadVNI
);
1335 DEBUG(dbgs() << "\tMerged values: " << LI
<< '\n');
1337 LIS
->RemoveMachineInstrFromMaps(CopyMI
);
1338 CopyMI
->eraseFromParent();
1342 // Enforce policies.
1344 DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP
.getSrcReg(), TRI
)
1345 << " with " << PrintReg(CP
.getDstReg(), TRI
, CP
.getSrcIdx())
1347 if (!canJoinPhys(CP
)) {
1348 // Before giving up coalescing, if definition of source is defined by
1349 // trivial computation, try rematerializing it.
1351 if (reMaterializeTrivialDef(CP
, CopyMI
, IsDefCopy
))
1354 Again
= true; // May be possible to coalesce later.
1358 // When possible, let DstReg be the larger interval.
1359 if (!CP
.isPartial() && LIS
->getInterval(CP
.getSrcReg()).size() >
1360 LIS
->getInterval(CP
.getDstReg()).size())
1364 dbgs() << "\tConsidering merging to "
1365 << TRI
->getRegClassName(CP
.getNewRC()) << " with ";
1366 if (CP
.getDstIdx() && CP
.getSrcIdx())
1367 dbgs() << PrintReg(CP
.getDstReg()) << " in "
1368 << TRI
->getSubRegIndexName(CP
.getDstIdx()) << " and "
1369 << PrintReg(CP
.getSrcReg()) << " in "
1370 << TRI
->getSubRegIndexName(CP
.getSrcIdx()) << '\n';
1372 dbgs() << PrintReg(CP
.getSrcReg(), TRI
) << " in "
1373 << PrintReg(CP
.getDstReg(), TRI
, CP
.getSrcIdx()) << '\n';
1378 ShrinkMainRange
= false;
1380 // Okay, attempt to join these two intervals. On failure, this returns false.
1381 // Otherwise, if one of the intervals being joined is a physreg, this method
1382 // always canonicalizes DstInt to be it. The output "SrcInt" will not have
1383 // been modified, so we can use this information below to update aliases.
1384 if (!joinIntervals(CP
)) {
1385 // Coalescing failed.
1387 // If definition of source is defined by trivial computation, try
1388 // rematerializing it.
1390 if (reMaterializeTrivialDef(CP
, CopyMI
, IsDefCopy
))
1393 // If we can eliminate the copy without merging the live segments, do so
1395 if (!CP
.isPartial() && !CP
.isPhys()) {
1396 if (adjustCopiesBackFrom(CP
, CopyMI
) ||
1397 removeCopyByCommutingDef(CP
, CopyMI
)) {
1398 LIS
->RemoveMachineInstrFromMaps(CopyMI
);
1399 CopyMI
->eraseFromParent();
1400 DEBUG(dbgs() << "\tTrivial!\n");
1405 // Otherwise, we are unable to join the intervals.
1406 DEBUG(dbgs() << "\tInterference!\n");
1407 Again
= true; // May be possible to coalesce later.
1411 // Coalescing to a virtual register that is of a sub-register class of the
1412 // other. Make sure the resulting register is set to the right register class.
1413 if (CP
.isCrossClass()) {
1415 MRI
->setRegClass(CP
.getDstReg(), CP
.getNewRC());
1418 // Removing sub-register copies can ease the register class constraints.
1419 // Make sure we attempt to inflate the register class of DstReg.
1420 if (!CP
.isPhys() && RegClassInfo
.isProperSubClass(CP
.getNewRC()))
1421 InflateRegs
.push_back(CP
.getDstReg());
1423 // CopyMI has been erased by joinIntervals at this point. Remove it from
1424 // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
1425 // to the work list. This keeps ErasedInstrs from growing needlessly.
1426 ErasedInstrs
.erase(CopyMI
);
1428 // Rewrite all SrcReg operands to DstReg.
1429 // Also update DstReg operands to include DstIdx if it is set.
1431 updateRegDefsUses(CP
.getDstReg(), CP
.getDstReg(), CP
.getDstIdx());
1432 updateRegDefsUses(CP
.getSrcReg(), CP
.getDstReg(), CP
.getSrcIdx());
1434 // Shrink subregister ranges if necessary.
1435 if (ShrinkMask
!= 0) {
1436 LiveInterval
&LI
= LIS
->getInterval(CP
.getDstReg());
1437 for (LiveInterval::SubRange
&S
: LI
.subranges()) {
1438 if ((S
.LaneMask
& ShrinkMask
) == 0)
1440 DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S
.LaneMask
)
1442 LIS
->shrinkToUses(S
, LI
.reg
);
1444 LI
.removeEmptySubRanges();
1446 if (ShrinkMainRange
) {
1447 LiveInterval
&LI
= LIS
->getInterval(CP
.getDstReg());
1451 // SrcReg is guaranteed to be the register whose live interval that is
1453 LIS
->removeInterval(CP
.getSrcReg());
1455 // Update regalloc hint.
1456 TRI
->updateRegAllocHint(CP
.getSrcReg(), CP
.getDstReg(), *MF
);
1459 dbgs() << "\tSuccess: " << PrintReg(CP
.getSrcReg(), TRI
, CP
.getSrcIdx())
1460 << " -> " << PrintReg(CP
.getDstReg(), TRI
, CP
.getDstIdx()) << '\n';
1461 dbgs() << "\tResult = ";
1463 dbgs() << PrintReg(CP
.getDstReg(), TRI
);
1465 dbgs() << LIS
->getInterval(CP
.getDstReg());
1473 bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair
&CP
) {
1474 unsigned DstReg
= CP
.getDstReg();
1475 assert(CP
.isPhys() && "Must be a physreg copy");
1476 assert(MRI
->isReserved(DstReg
) && "Not a reserved register");
1477 LiveInterval
&RHS
= LIS
->getInterval(CP
.getSrcReg());
1478 DEBUG(dbgs() << "\t\tRHS = " << RHS
<< '\n');
1480 assert(RHS
.containsOneValue() && "Invalid join with reserved register");
1482 // Optimization for reserved registers like ESP. We can only merge with a
1483 // reserved physreg if RHS has a single value that is a copy of DstReg.
1484 // The live range of the reserved register will look like a set of dead defs
1485 // - we don't properly track the live range of reserved registers.
1487 // Deny any overlapping intervals. This depends on all the reserved
1488 // register live ranges to look like dead defs.
1489 for (MCRegUnitIterator
UI(DstReg
, TRI
); UI
.isValid(); ++UI
)
1490 if (RHS
.overlaps(LIS
->getRegUnit(*UI
))) {
1491 DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI
, TRI
) << '\n');
1495 // Skip any value computations, we are not adding new values to the
1496 // reserved register. Also skip merging the live ranges, the reserved
1497 // register live range doesn't need to be accurate as long as all the
1500 // Delete the identity copy.
1501 MachineInstr
*CopyMI
;
1502 if (CP
.isFlipped()) {
1503 CopyMI
= MRI
->getVRegDef(RHS
.reg
);
1505 if (!MRI
->hasOneNonDBGUse(RHS
.reg
)) {
1506 DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
1510 MachineInstr
*DestMI
= MRI
->getVRegDef(RHS
.reg
);
1511 CopyMI
= &*MRI
->use_instr_nodbg_begin(RHS
.reg
);
1512 const SlotIndex CopyRegIdx
= LIS
->getInstructionIndex(CopyMI
).getRegSlot();
1513 const SlotIndex DestRegIdx
= LIS
->getInstructionIndex(DestMI
).getRegSlot();
1515 // We checked above that there are no interfering defs of the physical
1516 // register. However, for this case, where we intent to move up the def of
1517 // the physical register, we also need to check for interfering uses.
1518 SlotIndexes
*Indexes
= LIS
->getSlotIndexes();
1519 for (SlotIndex SI
= Indexes
->getNextNonNullIndex(DestRegIdx
);
1520 SI
!= CopyRegIdx
; SI
= Indexes
->getNextNonNullIndex(SI
)) {
1521 MachineInstr
*MI
= LIS
->getInstructionFromIndex(SI
);
1522 if (MI
->readsRegister(DstReg
, TRI
)) {
1523 DEBUG(dbgs() << "\t\tInterference (read): " << *MI
);
1527 // We must also check for clobbers caused by regmasks.
1528 for (const auto &MO
: MI
->operands()) {
1529 if (MO
.isRegMask() && MO
.clobbersPhysReg(DstReg
)) {
1530 DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI
);
1536 // We're going to remove the copy which defines a physical reserved
1537 // register, so remove its valno, etc.
1538 DEBUG(dbgs() << "\t\tRemoving phys reg def of " << DstReg
<< " at "
1539 << CopyRegIdx
<< "\n");
1541 LIS
->removePhysRegDefAt(DstReg
, CopyRegIdx
);
1542 // Create a new dead def at the new def location.
1543 for (MCRegUnitIterator
UI(DstReg
, TRI
); UI
.isValid(); ++UI
) {
1544 LiveRange
&LR
= LIS
->getRegUnit(*UI
);
1545 LR
.createDeadDef(DestRegIdx
, LIS
->getVNInfoAllocator());
1549 LIS
->RemoveMachineInstrFromMaps(CopyMI
);
1550 CopyMI
->eraseFromParent();
1552 // We don't track kills for reserved registers.
1553 MRI
->clearKillFlags(CP
.getSrcReg());
1558 //===----------------------------------------------------------------------===//
1559 // Interference checking and interval joining
1560 //===----------------------------------------------------------------------===//
1562 // In the easiest case, the two live ranges being joined are disjoint, and
1563 // there is no interference to consider. It is quite common, though, to have
1564 // overlapping live ranges, and we need to check if the interference can be
1567 // The live range of a single SSA value forms a sub-tree of the dominator tree.
1568 // This means that two SSA values overlap if and only if the def of one value
1569 // is contained in the live range of the other value. As a special case, the
1570 // overlapping values can be defined at the same index.
1572 // The interference from an overlapping def can be resolved in these cases:
1574 // 1. Coalescable copies. The value is defined by a copy that would become an
1575 // identity copy after joining SrcReg and DstReg. The copy instruction will
1576 // be removed, and the value will be merged with the source value.
1578 // There can be several copies back and forth, causing many values to be
1579 // merged into one. We compute a list of ultimate values in the joined live
1580 // range as well as a mappings from the old value numbers.
1582 // 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
1583 // predecessors have a live out value. It doesn't cause real interference,
1584 // and can be merged into the value it overlaps. Like a coalescable copy, it
1585 // can be erased after joining.
1587 // 3. Copy of external value. The overlapping def may be a copy of a value that
1588 // is already in the other register. This is like a coalescable copy, but
1589 // the live range of the source register must be trimmed after erasing the
1590 // copy instruction:
1593 // %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext.
1595 // 4. Clobbering undefined lanes. Vector registers are sometimes built by
1596 // defining one lane at a time:
1598 // %dst:ssub0<def,read-undef> = FOO
1600 // %dst:ssub1<def> = COPY %src
1602 // The live range of %src overlaps the %dst value defined by FOO, but
1603 // merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
1604 // which was undef anyway.
1606 // The value mapping is more complicated in this case. The final live range
1607 // will have different value numbers for both FOO and BAR, but there is no
1608 // simple mapping from old to new values. It may even be necessary to add
1611 // 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
1612 // is live, but never read. This can happen because we don't compute
1613 // individual live ranges per lane.
1617 // %dst:ssub1<def> = COPY %src
1619 // This kind of interference is only resolved locally. If the clobbered
1620 // lane value escapes the block, the join is aborted.
1623 /// Track information about values in a single virtual register about to be
1624 /// joined. Objects of this class are always created in pairs - one for each
1625 /// side of the CoalescerPair (or one for each lane of a side of the coalescer
1628 /// Live range we work on.
1630 /// (Main) register we work on.
1633 /// Reg (and therefore the values in this liverange) will end up as
1634 /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
1636 const unsigned SubIdx
;
1637 /// The LaneMask that this liverange will occupy the coalesced register. May
1638 /// be smaller than the lanemask produced by SubIdx when merging subranges.
1639 const LaneBitmask LaneMask
;
1641 /// This is true when joining sub register ranges, false when joining main
1643 const bool SubRangeJoin
;
1644 /// Whether the current LiveInterval tracks subregister liveness.
1645 const bool TrackSubRegLiveness
;
1647 /// Values that will be present in the final live range.
1648 SmallVectorImpl
<VNInfo
*> &NewVNInfo
;
1650 const CoalescerPair
&CP
;
1652 SlotIndexes
*Indexes
;
1653 const TargetRegisterInfo
*TRI
;
1655 /// Value number assignments. Maps value numbers in LI to entries in
1656 /// NewVNInfo. This is suitable for passing to LiveInterval::join().
1657 SmallVector
<int, 8> Assignments
;
1659 /// Conflict resolution for overlapping values.
1660 enum ConflictResolution
{
1661 /// No overlap, simply keep this value.
1664 /// Merge this value into OtherVNI and erase the defining instruction.
1665 /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
1669 /// Merge this value into OtherVNI but keep the defining instruction.
1670 /// This is for the special case where OtherVNI is defined by the same
1674 /// Keep this value, and have it replace OtherVNI where possible. This
1675 /// complicates value mapping since OtherVNI maps to two different values
1676 /// before and after this def.
1677 /// Used when clobbering undefined or dead lanes.
1680 /// Unresolved conflict. Visit later when all values have been mapped.
1683 /// Unresolvable conflict. Abort the join.
1687 /// Per-value info for LI. The lane bit masks are all relative to the final
1688 /// joined register, so they can be compared directly between SrcReg and
1691 ConflictResolution Resolution
;
1693 /// Lanes written by this def, 0 for unanalyzed values.
1694 LaneBitmask WriteLanes
;
1696 /// Lanes with defined values in this register. Other lanes are undef and
1697 /// safe to clobber.
1698 LaneBitmask ValidLanes
;
1700 /// Value in LI being redefined by this def.
1703 /// Value in the other live range that overlaps this def, if any.
1706 /// Is this value an IMPLICIT_DEF that can be erased?
1708 /// IMPLICIT_DEF values should only exist at the end of a basic block that
1709 /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
1710 /// safely erased if they are overlapping a live value in the other live
1713 /// Weird control flow graphs and incomplete PHI handling in
1714 /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
1715 /// longer live ranges. Such IMPLICIT_DEF values should be treated like
1717 bool ErasableImplicitDef
;
1719 /// True when the live range of this value will be pruned because of an
1720 /// overlapping CR_Replace value in the other live range.
1723 /// True once Pruned above has been computed.
1724 bool PrunedComputed
;
1726 Val() : Resolution(CR_Keep
), WriteLanes(0), ValidLanes(0),
1727 RedefVNI(nullptr), OtherVNI(nullptr), ErasableImplicitDef(false),
1728 Pruned(false), PrunedComputed(false) {}
1730 bool isAnalyzed() const { return WriteLanes
!= 0; }
1733 /// One entry per value number in LI.
1734 SmallVector
<Val
, 8> Vals
;
1736 /// Compute the bitmask of lanes actually written by DefMI.
1737 /// Set Redef if there are any partial register definitions that depend on the
1738 /// previous value of the register.
1739 LaneBitmask
computeWriteLanes(const MachineInstr
*DefMI
, bool &Redef
) const;
1741 /// Find the ultimate value that VNI was copied from.
1742 std::pair
<const VNInfo
*,unsigned> followCopyChain(const VNInfo
*VNI
) const;
1744 bool valuesIdentical(VNInfo
*Val0
, VNInfo
*Val1
, const JoinVals
&Other
) const;
1746 /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
1747 /// Return a conflict resolution when possible, but leave the hard cases as
1749 /// Recursively calls computeAssignment() on this and Other, guaranteeing that
1750 /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
1751 /// The recursion always goes upwards in the dominator tree, making loops
1753 ConflictResolution
analyzeValue(unsigned ValNo
, JoinVals
&Other
);
1755 /// Compute the value assignment for ValNo in RI.
1756 /// This may be called recursively by analyzeValue(), but never for a ValNo on
1758 void computeAssignment(unsigned ValNo
, JoinVals
&Other
);
1760 /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
1761 /// the extent of the tainted lanes in the block.
1763 /// Multiple values in Other.LR can be affected since partial redefinitions
1764 /// can preserve previously tainted lanes.
1766 /// 1 %dst = VLOAD <-- Define all lanes in %dst
1767 /// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0
1768 /// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0
1769 /// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
1771 /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
1772 /// entry to TaintedVals.
1774 /// Returns false if the tainted lanes extend beyond the basic block.
1775 bool taintExtent(unsigned, LaneBitmask
, JoinVals
&,
1776 SmallVectorImpl
<std::pair
<SlotIndex
, LaneBitmask
> >&);
1778 /// Return true if MI uses any of the given Lanes from Reg.
1779 /// This does not include partial redefinitions of Reg.
1780 bool usesLanes(const MachineInstr
*MI
, unsigned, unsigned, LaneBitmask
) const;
1782 /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
1785 /// %dst = COPY %src
1786 /// %src = COPY %dst <-- This value to be pruned.
1787 /// %dst = COPY %src <-- This value is a copy of a pruned value.
1788 bool isPrunedValue(unsigned ValNo
, JoinVals
&Other
);
1791 JoinVals(LiveRange
&LR
, unsigned Reg
, unsigned SubIdx
, LaneBitmask LaneMask
,
1792 SmallVectorImpl
<VNInfo
*> &newVNInfo
, const CoalescerPair
&cp
,
1793 LiveIntervals
*lis
, const TargetRegisterInfo
*TRI
, bool SubRangeJoin
,
1794 bool TrackSubRegLiveness
)
1795 : LR(LR
), Reg(Reg
), SubIdx(SubIdx
), LaneMask(LaneMask
),
1796 SubRangeJoin(SubRangeJoin
), TrackSubRegLiveness(TrackSubRegLiveness
),
1797 NewVNInfo(newVNInfo
), CP(cp
), LIS(lis
), Indexes(LIS
->getSlotIndexes()),
1798 TRI(TRI
), Assignments(LR
.getNumValNums(), -1), Vals(LR
.getNumValNums())
1801 /// Analyze defs in LR and compute a value mapping in NewVNInfo.
1802 /// Returns false if any conflicts were impossible to resolve.
1803 bool mapValues(JoinVals
&Other
);
1805 /// Try to resolve conflicts that require all values to be mapped.
1806 /// Returns false if any conflicts were impossible to resolve.
1807 bool resolveConflicts(JoinVals
&Other
);
1809 /// Prune the live range of values in Other.LR where they would conflict with
1810 /// CR_Replace values in LR. Collect end points for restoring the live range
1812 void pruneValues(JoinVals
&Other
, SmallVectorImpl
<SlotIndex
> &EndPoints
,
1815 /// Removes subranges starting at copies that get removed. This sometimes
1816 /// happens when undefined subranges are copied around. These ranges contain
1817 /// no useful information and can be removed.
1818 void pruneSubRegValues(LiveInterval
&LI
, LaneBitmask
&ShrinkMask
);
1820 /// Erase any machine instructions that have been coalesced away.
1821 /// Add erased instructions to ErasedInstrs.
1822 /// Add foreign virtual registers to ShrinkRegs if their live range ended at
1823 /// the erased instrs.
1824 void eraseInstrs(SmallPtrSetImpl
<MachineInstr
*> &ErasedInstrs
,
1825 SmallVectorImpl
<unsigned> &ShrinkRegs
);
1827 /// Remove liverange defs at places where implicit defs will be removed.
1828 void removeImplicitDefs();
1830 /// Get the value assignments suitable for passing to LiveInterval::join.
1831 const int *getAssignments() const { return Assignments
.data(); }
1833 } // end anonymous namespace
1835 LaneBitmask
JoinVals::computeWriteLanes(const MachineInstr
*DefMI
, bool &Redef
)
1838 for (const MachineOperand
&MO
: DefMI
->operands()) {
1839 if (!MO
.isReg() || MO
.getReg() != Reg
|| !MO
.isDef())
1841 L
|= TRI
->getSubRegIndexLaneMask(
1842 TRI
->composeSubRegIndices(SubIdx
, MO
.getSubReg()));
1849 std::pair
<const VNInfo
*, unsigned> JoinVals::followCopyChain(
1850 const VNInfo
*VNI
) const {
1851 unsigned Reg
= this->Reg
;
1853 while (!VNI
->isPHIDef()) {
1854 SlotIndex Def
= VNI
->def
;
1855 MachineInstr
*MI
= Indexes
->getInstructionFromIndex(Def
);
1856 assert(MI
&& "No defining instruction");
1857 if (!MI
->isFullCopy())
1858 return std::make_pair(VNI
, Reg
);
1859 unsigned SrcReg
= MI
->getOperand(1).getReg();
1860 if (!TargetRegisterInfo::isVirtualRegister(SrcReg
))
1861 return std::make_pair(VNI
, Reg
);
1863 const LiveInterval
&LI
= LIS
->getInterval(SrcReg
);
1864 const VNInfo
*ValueIn
;
1865 // No subrange involved.
1866 if (!SubRangeJoin
|| !LI
.hasSubRanges()) {
1867 LiveQueryResult LRQ
= LI
.Query(Def
);
1868 ValueIn
= LRQ
.valueIn();
1870 // Query subranges. Pick the first matching one.
1872 for (const LiveInterval::SubRange
&S
: LI
.subranges()) {
1873 // Transform lanemask to a mask in the joined live interval.
1874 LaneBitmask SMask
= TRI
->composeSubRegIndexLaneMask(SubIdx
, S
.LaneMask
);
1875 if ((SMask
& LaneMask
) == 0)
1877 LiveQueryResult LRQ
= S
.Query(Def
);
1878 ValueIn
= LRQ
.valueIn();
1882 if (ValueIn
== nullptr)
1887 return std::make_pair(VNI
, Reg
);
1890 bool JoinVals::valuesIdentical(VNInfo
*Value0
, VNInfo
*Value1
,
1891 const JoinVals
&Other
) const {
1892 const VNInfo
*Orig0
;
1894 std::tie(Orig0
, Reg0
) = followCopyChain(Value0
);
1895 if (Orig0
== Value1
)
1898 const VNInfo
*Orig1
;
1900 std::tie(Orig1
, Reg1
) = Other
.followCopyChain(Value1
);
1902 // The values are equal if they are defined at the same place and use the
1903 // same register. Note that we cannot compare VNInfos directly as some of
1904 // them might be from a copy created in mergeSubRangeInto() while the other
1905 // is from the original LiveInterval.
1906 return Orig0
->def
== Orig1
->def
&& Reg0
== Reg1
;
1909 JoinVals::ConflictResolution
1910 JoinVals::analyzeValue(unsigned ValNo
, JoinVals
&Other
) {
1911 Val
&V
= Vals
[ValNo
];
1912 assert(!V
.isAnalyzed() && "Value has already been analyzed!");
1913 VNInfo
*VNI
= LR
.getValNumInfo(ValNo
);
1914 if (VNI
->isUnused()) {
1919 // Get the instruction defining this value, compute the lanes written.
1920 const MachineInstr
*DefMI
= nullptr;
1921 if (VNI
->isPHIDef()) {
1922 // Conservatively assume that all lanes in a PHI are valid.
1923 LaneBitmask Lanes
= SubRangeJoin
? 1 : TRI
->getSubRegIndexLaneMask(SubIdx
);
1924 V
.ValidLanes
= V
.WriteLanes
= Lanes
;
1926 DefMI
= Indexes
->getInstructionFromIndex(VNI
->def
);
1927 assert(DefMI
!= nullptr);
1929 // We don't care about the lanes when joining subregister ranges.
1930 V
.WriteLanes
= V
.ValidLanes
= 1;
1931 if (DefMI
->isImplicitDef()) {
1933 V
.ErasableImplicitDef
= true;
1937 V
.ValidLanes
= V
.WriteLanes
= computeWriteLanes(DefMI
, Redef
);
1939 // If this is a read-modify-write instruction, there may be more valid
1940 // lanes than the ones written by this instruction.
1941 // This only covers partial redef operands. DefMI may have normal use
1942 // operands reading the register. They don't contribute valid lanes.
1944 // This adds ssub1 to the set of valid lanes in %src:
1946 // %src:ssub1<def> = FOO
1948 // This leaves only ssub1 valid, making any other lanes undef:
1950 // %src:ssub1<def,read-undef> = FOO %src:ssub2
1952 // The <read-undef> flag on the def operand means that old lane values are
1955 V
.RedefVNI
= LR
.Query(VNI
->def
).valueIn();
1956 assert((TrackSubRegLiveness
|| V
.RedefVNI
) &&
1957 "Instruction is reading nonexistent value");
1958 if (V
.RedefVNI
!= nullptr) {
1959 computeAssignment(V
.RedefVNI
->id
, Other
);
1960 V
.ValidLanes
|= Vals
[V
.RedefVNI
->id
].ValidLanes
;
1964 // An IMPLICIT_DEF writes undef values.
1965 if (DefMI
->isImplicitDef()) {
1966 // We normally expect IMPLICIT_DEF values to be live only until the end
1967 // of their block. If the value is really live longer and gets pruned in
1968 // another block, this flag is cleared again.
1969 V
.ErasableImplicitDef
= true;
1970 V
.ValidLanes
&= ~V
.WriteLanes
;
1975 // Find the value in Other that overlaps VNI->def, if any.
1976 LiveQueryResult OtherLRQ
= Other
.LR
.Query(VNI
->def
);
1978 // It is possible that both values are defined by the same instruction, or
1979 // the values are PHIs defined in the same block. When that happens, the two
1980 // values should be merged into one, but not into any preceding value.
1981 // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
1982 if (VNInfo
*OtherVNI
= OtherLRQ
.valueDefined()) {
1983 assert(SlotIndex::isSameInstr(VNI
->def
, OtherVNI
->def
) && "Broken LRQ");
1985 // One value stays, the other is merged. Keep the earlier one, or the first
1987 if (OtherVNI
->def
< VNI
->def
)
1988 Other
.computeAssignment(OtherVNI
->id
, *this);
1989 else if (VNI
->def
< OtherVNI
->def
&& OtherLRQ
.valueIn()) {
1990 // This is an early-clobber def overlapping a live-in value in the other
1991 // register. Not mergeable.
1992 V
.OtherVNI
= OtherLRQ
.valueIn();
1993 return CR_Impossible
;
1995 V
.OtherVNI
= OtherVNI
;
1996 Val
&OtherV
= Other
.Vals
[OtherVNI
->id
];
1997 // Keep this value, check for conflicts when analyzing OtherVNI.
1998 if (!OtherV
.isAnalyzed())
2000 // Both sides have been analyzed now.
2001 // Allow overlapping PHI values. Any real interference would show up in a
2002 // predecessor, the PHI itself can't introduce any conflicts.
2003 if (VNI
->isPHIDef())
2005 if (V
.ValidLanes
& OtherV
.ValidLanes
)
2006 // Overlapping lanes can't be resolved.
2007 return CR_Impossible
;
2012 // No simultaneous def. Is Other live at the def?
2013 V
.OtherVNI
= OtherLRQ
.valueIn();
2015 // No overlap, no conflict.
2018 assert(!SlotIndex::isSameInstr(VNI
->def
, V
.OtherVNI
->def
) && "Broken LRQ");
2020 // We have overlapping values, or possibly a kill of Other.
2021 // Recursively compute assignments up the dominator tree.
2022 Other
.computeAssignment(V
.OtherVNI
->id
, *this);
2023 Val
&OtherV
= Other
.Vals
[V
.OtherVNI
->id
];
2025 // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
2026 // This shouldn't normally happen, but ProcessImplicitDefs can leave such
2027 // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
2030 // WHen it happens, treat that IMPLICIT_DEF as a normal value, and don't try
2031 // to erase the IMPLICIT_DEF instruction.
2032 if (OtherV
.ErasableImplicitDef
&& DefMI
&&
2033 DefMI
->getParent() != Indexes
->getMBBFromIndex(V
.OtherVNI
->def
)) {
2034 DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V
.OtherVNI
->def
2035 << " extends into BB#" << DefMI
->getParent()->getNumber()
2036 << ", keeping it.\n");
2037 OtherV
.ErasableImplicitDef
= false;
2040 // Allow overlapping PHI values. Any real interference would show up in a
2041 // predecessor, the PHI itself can't introduce any conflicts.
2042 if (VNI
->isPHIDef())
2045 // Check for simple erasable conflicts.
2046 if (DefMI
->isImplicitDef()) {
2047 // We need the def for the subregister if there is nothing else live at the
2048 // subrange at this point.
2049 if (TrackSubRegLiveness
2050 && (V
.WriteLanes
& (OtherV
.ValidLanes
| OtherV
.WriteLanes
)) == 0)
2055 // Include the non-conflict where DefMI is a coalescable copy that kills
2056 // OtherVNI. We still want the copy erased and value numbers merged.
2057 if (CP
.isCoalescable(DefMI
)) {
2058 // Some of the lanes copied from OtherVNI may be undef, making them undef
2060 V
.ValidLanes
&= ~V
.WriteLanes
| OtherV
.ValidLanes
;
2064 // This may not be a real conflict if DefMI simply kills Other and defines
2066 if (OtherLRQ
.isKill() && OtherLRQ
.endPoint() <= VNI
->def
)
2069 // Handle the case where VNI and OtherVNI can be proven to be identical:
2071 // %other = COPY %ext
2072 // %this = COPY %ext <-- Erase this copy
2074 if (DefMI
->isFullCopy() && !CP
.isPartial()
2075 && valuesIdentical(VNI
, V
.OtherVNI
, Other
))
2078 // If the lanes written by this instruction were all undef in OtherVNI, it is
2079 // still safe to join the live ranges. This can't be done with a simple value
2080 // mapping, though - OtherVNI will map to multiple values:
2082 // 1 %dst:ssub0 = FOO <-- OtherVNI
2083 // 2 %src = BAR <-- VNI
2084 // 3 %dst:ssub1 = COPY %src<kill> <-- Eliminate this copy.
2086 // 5 QUUX %src<kill>
2088 // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
2089 // handles this complex value mapping.
2090 if ((V
.WriteLanes
& OtherV
.ValidLanes
) == 0)
2093 // If the other live range is killed by DefMI and the live ranges are still
2094 // overlapping, it must be because we're looking at an early clobber def:
2096 // %dst<def,early-clobber> = ASM %src<kill>
2098 // In this case, it is illegal to merge the two live ranges since the early
2099 // clobber def would clobber %src before it was read.
2100 if (OtherLRQ
.isKill()) {
2101 // This case where the def doesn't overlap the kill is handled above.
2102 assert(VNI
->def
.isEarlyClobber() &&
2103 "Only early clobber defs can overlap a kill");
2104 return CR_Impossible
;
2107 // VNI is clobbering live lanes in OtherVNI, but there is still the
2108 // possibility that no instructions actually read the clobbered lanes.
2109 // If we're clobbering all the lanes in OtherVNI, at least one must be read.
2110 // Otherwise Other.RI wouldn't be live here.
2111 if ((TRI
->getSubRegIndexLaneMask(Other
.SubIdx
) & ~V
.WriteLanes
) == 0)
2112 return CR_Impossible
;
2114 // We need to verify that no instructions are reading the clobbered lanes. To
2115 // save compile time, we'll only check that locally. Don't allow the tainted
2116 // value to escape the basic block.
2117 MachineBasicBlock
*MBB
= Indexes
->getMBBFromIndex(VNI
->def
);
2118 if (OtherLRQ
.endPoint() >= Indexes
->getMBBEndIdx(MBB
))
2119 return CR_Impossible
;
2121 // There are still some things that could go wrong besides clobbered lanes
2122 // being read, for example OtherVNI may be only partially redefined in MBB,
2123 // and some clobbered lanes could escape the block. Save this analysis for
2124 // resolveConflicts() when all values have been mapped. We need to know
2125 // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
2126 // that now - the recursive analyzeValue() calls must go upwards in the
2128 return CR_Unresolved
;
2131 void JoinVals::computeAssignment(unsigned ValNo
, JoinVals
&Other
) {
2132 Val
&V
= Vals
[ValNo
];
2133 if (V
.isAnalyzed()) {
2134 // Recursion should always move up the dominator tree, so ValNo is not
2135 // supposed to reappear before it has been assigned.
2136 assert(Assignments
[ValNo
] != -1 && "Bad recursion?");
2139 switch ((V
.Resolution
= analyzeValue(ValNo
, Other
))) {
2142 // Merge this ValNo into OtherVNI.
2143 assert(V
.OtherVNI
&& "OtherVNI not assigned, can't merge.");
2144 assert(Other
.Vals
[V
.OtherVNI
->id
].isAnalyzed() && "Missing recursion");
2145 Assignments
[ValNo
] = Other
.Assignments
[V
.OtherVNI
->id
];
2146 DEBUG(dbgs() << "\t\tmerge " << PrintReg(Reg
) << ':' << ValNo
<< '@'
2147 << LR
.getValNumInfo(ValNo
)->def
<< " into "
2148 << PrintReg(Other
.Reg
) << ':' << V
.OtherVNI
->id
<< '@'
2149 << V
.OtherVNI
->def
<< " --> @"
2150 << NewVNInfo
[Assignments
[ValNo
]]->def
<< '\n');
2153 case CR_Unresolved
: {
2154 // The other value is going to be pruned if this join is successful.
2155 assert(V
.OtherVNI
&& "OtherVNI not assigned, can't prune");
2156 Val
&OtherV
= Other
.Vals
[V
.OtherVNI
->id
];
2157 // We cannot erase an IMPLICIT_DEF if we don't have valid values for all
2159 if ((OtherV
.WriteLanes
& ~V
.ValidLanes
) != 0 && TrackSubRegLiveness
)
2160 OtherV
.ErasableImplicitDef
= false;
2161 OtherV
.Pruned
= true;
2165 // This value number needs to go in the final joined live range.
2166 Assignments
[ValNo
] = NewVNInfo
.size();
2167 NewVNInfo
.push_back(LR
.getValNumInfo(ValNo
));
2172 bool JoinVals::mapValues(JoinVals
&Other
) {
2173 for (unsigned i
= 0, e
= LR
.getNumValNums(); i
!= e
; ++i
) {
2174 computeAssignment(i
, Other
);
2175 if (Vals
[i
].Resolution
== CR_Impossible
) {
2176 DEBUG(dbgs() << "\t\tinterference at " << PrintReg(Reg
) << ':' << i
2177 << '@' << LR
.getValNumInfo(i
)->def
<< '\n');
2185 taintExtent(unsigned ValNo
, LaneBitmask TaintedLanes
, JoinVals
&Other
,
2186 SmallVectorImpl
<std::pair
<SlotIndex
, LaneBitmask
> > &TaintExtent
) {
2187 VNInfo
*VNI
= LR
.getValNumInfo(ValNo
);
2188 MachineBasicBlock
*MBB
= Indexes
->getMBBFromIndex(VNI
->def
);
2189 SlotIndex MBBEnd
= Indexes
->getMBBEndIdx(MBB
);
2191 // Scan Other.LR from VNI.def to MBBEnd.
2192 LiveInterval::iterator OtherI
= Other
.LR
.find(VNI
->def
);
2193 assert(OtherI
!= Other
.LR
.end() && "No conflict?");
2195 // OtherI is pointing to a tainted value. Abort the join if the tainted
2196 // lanes escape the block.
2197 SlotIndex End
= OtherI
->end
;
2198 if (End
>= MBBEnd
) {
2199 DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other
.Reg
) << ':'
2200 << OtherI
->valno
->id
<< '@' << OtherI
->start
<< '\n');
2203 DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other
.Reg
) << ':'
2204 << OtherI
->valno
->id
<< '@' << OtherI
->start
2205 << " to " << End
<< '\n');
2206 // A dead def is not a problem.
2209 TaintExtent
.push_back(std::make_pair(End
, TaintedLanes
));
2211 // Check for another def in the MBB.
2212 if (++OtherI
== Other
.LR
.end() || OtherI
->start
>= MBBEnd
)
2215 // Lanes written by the new def are no longer tainted.
2216 const Val
&OV
= Other
.Vals
[OtherI
->valno
->id
];
2217 TaintedLanes
&= ~OV
.WriteLanes
;
2220 } while (TaintedLanes
);
2224 bool JoinVals::usesLanes(const MachineInstr
*MI
, unsigned Reg
, unsigned SubIdx
,
2225 LaneBitmask Lanes
) const {
2226 if (MI
->isDebugValue())
2228 for (const MachineOperand
&MO
: MI
->operands()) {
2229 if (!MO
.isReg() || MO
.isDef() || MO
.getReg() != Reg
)
2233 if (Lanes
& TRI
->getSubRegIndexLaneMask(
2234 TRI
->composeSubRegIndices(SubIdx
, MO
.getSubReg())))
2240 bool JoinVals::resolveConflicts(JoinVals
&Other
) {
2241 for (unsigned i
= 0, e
= LR
.getNumValNums(); i
!= e
; ++i
) {
2243 assert (V
.Resolution
!= CR_Impossible
&& "Unresolvable conflict");
2244 if (V
.Resolution
!= CR_Unresolved
)
2246 DEBUG(dbgs() << "\t\tconflict at " << PrintReg(Reg
) << ':' << i
2247 << '@' << LR
.getValNumInfo(i
)->def
<< '\n');
2252 assert(V
.OtherVNI
&& "Inconsistent conflict resolution.");
2253 VNInfo
*VNI
= LR
.getValNumInfo(i
);
2254 const Val
&OtherV
= Other
.Vals
[V
.OtherVNI
->id
];
2256 // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
2257 // join, those lanes will be tainted with a wrong value. Get the extent of
2258 // the tainted lanes.
2259 LaneBitmask TaintedLanes
= V
.WriteLanes
& OtherV
.ValidLanes
;
2260 SmallVector
<std::pair
<SlotIndex
, LaneBitmask
>, 8> TaintExtent
;
2261 if (!taintExtent(i
, TaintedLanes
, Other
, TaintExtent
))
2262 // Tainted lanes would extend beyond the basic block.
2265 assert(!TaintExtent
.empty() && "There should be at least one conflict.");
2267 // Now look at the instructions from VNI->def to TaintExtent (inclusive).
2268 MachineBasicBlock
*MBB
= Indexes
->getMBBFromIndex(VNI
->def
);
2269 MachineBasicBlock::iterator MI
= MBB
->begin();
2270 if (!VNI
->isPHIDef()) {
2271 MI
= Indexes
->getInstructionFromIndex(VNI
->def
);
2272 // No need to check the instruction defining VNI for reads.
2275 assert(!SlotIndex::isSameInstr(VNI
->def
, TaintExtent
.front().first
) &&
2276 "Interference ends on VNI->def. Should have been handled earlier");
2277 MachineInstr
*LastMI
=
2278 Indexes
->getInstructionFromIndex(TaintExtent
.front().first
);
2279 assert(LastMI
&& "Range must end at a proper instruction");
2280 unsigned TaintNum
= 0;
2282 assert(MI
!= MBB
->end() && "Bad LastMI");
2283 if (usesLanes(MI
, Other
.Reg
, Other
.SubIdx
, TaintedLanes
)) {
2284 DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI
);
2287 // LastMI is the last instruction to use the current value.
2288 if (&*MI
== LastMI
) {
2289 if (++TaintNum
== TaintExtent
.size())
2291 LastMI
= Indexes
->getInstructionFromIndex(TaintExtent
[TaintNum
].first
);
2292 assert(LastMI
&& "Range must end at a proper instruction");
2293 TaintedLanes
= TaintExtent
[TaintNum
].second
;
2298 // The tainted lanes are unused.
2299 V
.Resolution
= CR_Replace
;
2305 bool JoinVals::isPrunedValue(unsigned ValNo
, JoinVals
&Other
) {
2306 Val
&V
= Vals
[ValNo
];
2307 if (V
.Pruned
|| V
.PrunedComputed
)
2310 if (V
.Resolution
!= CR_Erase
&& V
.Resolution
!= CR_Merge
)
2313 // Follow copies up the dominator tree and check if any intermediate value
2315 V
.PrunedComputed
= true;
2316 V
.Pruned
= Other
.isPrunedValue(V
.OtherVNI
->id
, *this);
2320 void JoinVals::pruneValues(JoinVals
&Other
,
2321 SmallVectorImpl
<SlotIndex
> &EndPoints
,
2322 bool changeInstrs
) {
2323 for (unsigned i
= 0, e
= LR
.getNumValNums(); i
!= e
; ++i
) {
2324 SlotIndex Def
= LR
.getValNumInfo(i
)->def
;
2325 switch (Vals
[i
].Resolution
) {
2329 // This value takes precedence over the value in Other.LR.
2330 LIS
->pruneValue(Other
.LR
, Def
, &EndPoints
);
2331 // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
2332 // instructions are only inserted to provide a live-out value for PHI
2333 // predecessors, so the instruction should simply go away once its value
2334 // has been replaced.
2335 Val
&OtherV
= Other
.Vals
[Vals
[i
].OtherVNI
->id
];
2336 bool EraseImpDef
= OtherV
.ErasableImplicitDef
&&
2337 OtherV
.Resolution
== CR_Keep
;
2338 if (!Def
.isBlock()) {
2340 // Remove <def,read-undef> flags. This def is now a partial redef.
2341 // Also remove <def,dead> flags since the joined live range will
2342 // continue past this instruction.
2343 for (MachineOperand
&MO
:
2344 Indexes
->getInstructionFromIndex(Def
)->operands()) {
2345 if (MO
.isReg() && MO
.isDef() && MO
.getReg() == Reg
) {
2346 MO
.setIsUndef(EraseImpDef
);
2347 MO
.setIsDead(false);
2351 // This value will reach instructions below, but we need to make sure
2352 // the live range also reaches the instruction at Def.
2354 EndPoints
.push_back(Def
);
2356 DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other
.Reg
) << " at " << Def
2357 << ": " << Other
.LR
<< '\n');
2362 if (isPrunedValue(i
, Other
)) {
2363 // This value is ultimately a copy of a pruned value in LR or Other.LR.
2364 // We can no longer trust the value mapping computed by
2365 // computeAssignment(), the value that was originally copied could have
2367 LIS
->pruneValue(LR
, Def
, &EndPoints
);
2368 DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(Reg
) << " at "
2369 << Def
<< ": " << LR
<< '\n');
2374 llvm_unreachable("Unresolved conflicts");
2379 void JoinVals::pruneSubRegValues(LiveInterval
&LI
, LaneBitmask
&ShrinkMask
)
2381 // Look for values being erased.
2382 bool DidPrune
= false;
2383 for (unsigned i
= 0, e
= LR
.getNumValNums(); i
!= e
; ++i
) {
2384 if (Vals
[i
].Resolution
!= CR_Erase
)
2387 // Check subranges at the point where the copy will be removed.
2388 SlotIndex Def
= LR
.getValNumInfo(i
)->def
;
2389 for (LiveInterval::SubRange
&S
: LI
.subranges()) {
2390 LiveQueryResult Q
= S
.Query(Def
);
2392 // If a subrange starts at the copy then an undefined value has been
2393 // copied and we must remove that subrange value as well.
2394 VNInfo
*ValueOut
= Q
.valueOutOrDead();
2395 if (ValueOut
!= nullptr && Q
.valueIn() == nullptr) {
2396 DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S
.LaneMask
)
2397 << " at " << Def
<< "\n");
2398 LIS
->pruneValue(S
, Def
, nullptr);
2400 // Mark value number as unused.
2401 ValueOut
->markUnused();
2404 // If a subrange ends at the copy, then a value was copied but only
2405 // partially used later. Shrink the subregister range appropriately.
2406 if (Q
.valueIn() != nullptr && Q
.valueOut() == nullptr) {
2407 DEBUG(dbgs() << "\t\tDead uses at sublane " << PrintLaneMask(S
.LaneMask
)
2408 << " at " << Def
<< "\n");
2409 ShrinkMask
|= S
.LaneMask
;
2414 LI
.removeEmptySubRanges();
2417 void JoinVals::removeImplicitDefs() {
2418 for (unsigned i
= 0, e
= LR
.getNumValNums(); i
!= e
; ++i
) {
2420 if (V
.Resolution
!= CR_Keep
|| !V
.ErasableImplicitDef
|| !V
.Pruned
)
2423 VNInfo
*VNI
= LR
.getValNumInfo(i
);
2425 LR
.removeValNo(VNI
);
2429 void JoinVals::eraseInstrs(SmallPtrSetImpl
<MachineInstr
*> &ErasedInstrs
,
2430 SmallVectorImpl
<unsigned> &ShrinkRegs
) {
2431 for (unsigned i
= 0, e
= LR
.getNumValNums(); i
!= e
; ++i
) {
2432 // Get the def location before markUnused() below invalidates it.
2433 SlotIndex Def
= LR
.getValNumInfo(i
)->def
;
2434 switch (Vals
[i
].Resolution
) {
2436 // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
2437 // longer. The IMPLICIT_DEF instructions are only inserted by
2438 // PHIElimination to guarantee that all PHI predecessors have a value.
2439 if (!Vals
[i
].ErasableImplicitDef
|| !Vals
[i
].Pruned
)
2441 // Remove value number i from LR.
2442 VNInfo
*VNI
= LR
.getValNumInfo(i
);
2443 LR
.removeValNo(VNI
);
2444 // Note that this VNInfo is reused and still referenced in NewVNInfo,
2445 // make it appear like an unused value number.
2447 DEBUG(dbgs() << "\t\tremoved " << i
<< '@' << Def
<< ": " << LR
<< '\n');
2452 MachineInstr
*MI
= Indexes
->getInstructionFromIndex(Def
);
2453 assert(MI
&& "No instruction to erase");
2455 unsigned Reg
= MI
->getOperand(1).getReg();
2456 if (TargetRegisterInfo::isVirtualRegister(Reg
) &&
2457 Reg
!= CP
.getSrcReg() && Reg
!= CP
.getDstReg())
2458 ShrinkRegs
.push_back(Reg
);
2460 ErasedInstrs
.insert(MI
);
2461 DEBUG(dbgs() << "\t\terased:\t" << Def
<< '\t' << *MI
);
2462 LIS
->RemoveMachineInstrFromMaps(MI
);
2463 MI
->eraseFromParent();
2472 bool RegisterCoalescer::joinSubRegRanges(LiveRange
&LRange
, LiveRange
&RRange
,
2473 LaneBitmask LaneMask
,
2474 const CoalescerPair
&CP
) {
2475 SmallVector
<VNInfo
*, 16> NewVNInfo
;
2476 JoinVals
RHSVals(RRange
, CP
.getSrcReg(), CP
.getSrcIdx(), LaneMask
,
2477 NewVNInfo
, CP
, LIS
, TRI
, true, true);
2478 JoinVals
LHSVals(LRange
, CP
.getDstReg(), CP
.getDstIdx(), LaneMask
,
2479 NewVNInfo
, CP
, LIS
, TRI
, true, true);
2481 // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
2482 // We should be able to resolve all conflicts here as we could successfully do
2483 // it on the mainrange already. There is however a problem when multiple
2484 // ranges get mapped to the "overflow" lane mask bit which creates unexpected
2486 if (!LHSVals
.mapValues(RHSVals
) || !RHSVals
.mapValues(LHSVals
)) {
2487 DEBUG(dbgs() << "*** Couldn't join subrange!\n");
2490 if (!LHSVals
.resolveConflicts(RHSVals
) ||
2491 !RHSVals
.resolveConflicts(LHSVals
)) {
2492 DEBUG(dbgs() << "*** Couldn't join subrange!\n");
2496 // The merging algorithm in LiveInterval::join() can't handle conflicting
2497 // value mappings, so we need to remove any live ranges that overlap a
2498 // CR_Replace resolution. Collect a set of end points that can be used to
2499 // restore the live range after joining.
2500 SmallVector
<SlotIndex
, 8> EndPoints
;
2501 LHSVals
.pruneValues(RHSVals
, EndPoints
, false);
2502 RHSVals
.pruneValues(LHSVals
, EndPoints
, false);
2504 LHSVals
.removeImplicitDefs();
2505 RHSVals
.removeImplicitDefs();
2510 // Join RRange into LHS.
2511 LRange
.join(RRange
, LHSVals
.getAssignments(), RHSVals
.getAssignments(),
2514 DEBUG(dbgs() << "\t\tjoined lanes: " << LRange
<< "\n");
2515 if (EndPoints
.empty())
2518 // Recompute the parts of the live range we had to remove because of
2519 // CR_Replace conflicts.
2520 DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints
.size()
2521 << " points: " << LRange
<< '\n');
2522 LIS
->extendToIndices(LRange
, EndPoints
);
2526 bool RegisterCoalescer::mergeSubRangeInto(LiveInterval
&LI
,
2527 const LiveRange
&ToMerge
,
2528 LaneBitmask LaneMask
,
2529 CoalescerPair
&CP
) {
2530 BumpPtrAllocator
&Allocator
= LIS
->getVNInfoAllocator();
2531 for (LiveInterval::SubRange
&R
: LI
.subranges()) {
2532 LaneBitmask RMask
= R
.LaneMask
;
2533 // LaneMask of subregisters common to subrange R and ToMerge.
2534 LaneBitmask Common
= RMask
& LaneMask
;
2535 // There is nothing to do without common subregs.
2539 DEBUG(dbgs() << "\t\tCopy+Merge " << PrintLaneMask(RMask
) << " into "
2540 << PrintLaneMask(Common
) << '\n');
2541 // LaneMask of subregisters contained in the R range but not in ToMerge,
2542 // they have to split into their own subrange.
2543 LaneBitmask LRest
= RMask
& ~LaneMask
;
2544 LiveInterval::SubRange
*CommonRange
;
2547 DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(LRest
) << '\n');
2548 // Duplicate SubRange for newly merged common stuff.
2549 CommonRange
= LI
.createSubRangeFrom(Allocator
, Common
, R
);
2551 // Reuse the existing range.
2552 R
.LaneMask
= Common
;
2555 LiveRange
RangeCopy(ToMerge
, Allocator
);
2556 if (!joinSubRegRanges(*CommonRange
, RangeCopy
, Common
, CP
))
2561 if (LaneMask
!= 0) {
2562 DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(LaneMask
) << '\n');
2563 LI
.createSubRangeFrom(Allocator
, LaneMask
, ToMerge
);
2568 bool RegisterCoalescer::joinVirtRegs(CoalescerPair
&CP
) {
2569 SmallVector
<VNInfo
*, 16> NewVNInfo
;
2570 LiveInterval
&RHS
= LIS
->getInterval(CP
.getSrcReg());
2571 LiveInterval
&LHS
= LIS
->getInterval(CP
.getDstReg());
2572 bool TrackSubRegLiveness
= MRI
->shouldTrackSubRegLiveness(*CP
.getNewRC());
2573 JoinVals
RHSVals(RHS
, CP
.getSrcReg(), CP
.getSrcIdx(), 0, NewVNInfo
, CP
, LIS
,
2574 TRI
, false, TrackSubRegLiveness
);
2575 JoinVals
LHSVals(LHS
, CP
.getDstReg(), CP
.getDstIdx(), 0, NewVNInfo
, CP
, LIS
,
2576 TRI
, false, TrackSubRegLiveness
);
2578 DEBUG(dbgs() << "\t\tRHS = " << RHS
2579 << "\n\t\tLHS = " << LHS
2582 // First compute NewVNInfo and the simple value mappings.
2583 // Detect impossible conflicts early.
2584 if (!LHSVals
.mapValues(RHSVals
) || !RHSVals
.mapValues(LHSVals
))
2587 // Some conflicts can only be resolved after all values have been mapped.
2588 if (!LHSVals
.resolveConflicts(RHSVals
) || !RHSVals
.resolveConflicts(LHSVals
))
2591 // All clear, the live ranges can be merged.
2592 if (RHS
.hasSubRanges() || LHS
.hasSubRanges()) {
2593 BumpPtrAllocator
&Allocator
= LIS
->getVNInfoAllocator();
2595 // Transform lanemasks from the LHS to masks in the coalesced register and
2596 // create initial subranges if necessary.
2597 unsigned DstIdx
= CP
.getDstIdx();
2598 if (!LHS
.hasSubRanges()) {
2599 LaneBitmask Mask
= DstIdx
== 0 ? CP
.getNewRC()->getLaneMask()
2600 : TRI
->getSubRegIndexLaneMask(DstIdx
);
2601 // LHS must support subregs or we wouldn't be in this codepath.
2603 LHS
.createSubRangeFrom(Allocator
, Mask
, LHS
);
2604 } else if (DstIdx
!= 0) {
2605 // Transform LHS lanemasks to new register class if necessary.
2606 for (LiveInterval::SubRange
&R
: LHS
.subranges()) {
2607 LaneBitmask Mask
= TRI
->composeSubRegIndexLaneMask(DstIdx
, R
.LaneMask
);
2611 DEBUG(dbgs() << "\t\tLHST = " << PrintReg(CP
.getDstReg())
2612 << ' ' << LHS
<< '\n');
2614 // Determine lanemasks of RHS in the coalesced register and merge subranges.
2615 unsigned SrcIdx
= CP
.getSrcIdx();
2617 if (!RHS
.hasSubRanges()) {
2618 LaneBitmask Mask
= SrcIdx
== 0 ? CP
.getNewRC()->getLaneMask()
2619 : TRI
->getSubRegIndexLaneMask(SrcIdx
);
2620 if (!mergeSubRangeInto(LHS
, RHS
, Mask
, CP
))
2623 // Pair up subranges and merge.
2624 for (LiveInterval::SubRange
&R
: RHS
.subranges()) {
2625 LaneBitmask Mask
= TRI
->composeSubRegIndexLaneMask(SrcIdx
, R
.LaneMask
);
2626 if (!mergeSubRangeInto(LHS
, R
, Mask
, CP
)) {
2633 // This shouldn't have happened :-(
2634 // However we are aware of at least one existing problem where we
2635 // can't merge subranges when multiple ranges end up in the
2636 // "overflow bit" 32. As a workaround we drop all subregister ranges
2637 // which means we loose some precision but are back to a well defined
2639 assert(TargetRegisterInfo::isImpreciseLaneMask(
2640 CP
.getNewRC()->getLaneMask())
2641 && "SubRange merge should only fail when merging into bit 32.");
2642 DEBUG(dbgs() << "\tSubrange join aborted!\n");
2643 LHS
.clearSubRanges();
2644 RHS
.clearSubRanges();
2646 DEBUG(dbgs() << "\tJoined SubRanges " << LHS
<< "\n");
2648 LHSVals
.pruneSubRegValues(LHS
, ShrinkMask
);
2649 RHSVals
.pruneSubRegValues(LHS
, ShrinkMask
);
2653 // The merging algorithm in LiveInterval::join() can't handle conflicting
2654 // value mappings, so we need to remove any live ranges that overlap a
2655 // CR_Replace resolution. Collect a set of end points that can be used to
2656 // restore the live range after joining.
2657 SmallVector
<SlotIndex
, 8> EndPoints
;
2658 LHSVals
.pruneValues(RHSVals
, EndPoints
, true);
2659 RHSVals
.pruneValues(LHSVals
, EndPoints
, true);
2661 // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
2662 // registers to require trimming.
2663 SmallVector
<unsigned, 8> ShrinkRegs
;
2664 LHSVals
.eraseInstrs(ErasedInstrs
, ShrinkRegs
);
2665 RHSVals
.eraseInstrs(ErasedInstrs
, ShrinkRegs
);
2666 while (!ShrinkRegs
.empty())
2667 shrinkToUses(&LIS
->getInterval(ShrinkRegs
.pop_back_val()));
2669 // Join RHS into LHS.
2670 LHS
.join(RHS
, LHSVals
.getAssignments(), RHSVals
.getAssignments(), NewVNInfo
);
2672 // Kill flags are going to be wrong if the live ranges were overlapping.
2673 // Eventually, we should simply clear all kill flags when computing live
2674 // ranges. They are reinserted after register allocation.
2675 MRI
->clearKillFlags(LHS
.reg
);
2676 MRI
->clearKillFlags(RHS
.reg
);
2678 if (!EndPoints
.empty()) {
2679 // Recompute the parts of the live range we had to remove because of
2680 // CR_Replace conflicts.
2681 DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints
.size()
2682 << " points: " << LHS
<< '\n');
2683 LIS
->extendToIndices((LiveRange
&)LHS
, EndPoints
);
2689 bool RegisterCoalescer::joinIntervals(CoalescerPair
&CP
) {
2690 return CP
.isPhys() ? joinReservedPhysReg(CP
) : joinVirtRegs(CP
);
2694 /// Information concerning MBB coalescing priority.
2695 struct MBBPriorityInfo
{
2696 MachineBasicBlock
*MBB
;
2700 MBBPriorityInfo(MachineBasicBlock
*mbb
, unsigned depth
, bool issplit
)
2701 : MBB(mbb
), Depth(depth
), IsSplit(issplit
) {}
2705 /// C-style comparator that sorts first based on the loop depth of the basic
2706 /// block (the unsigned), and then on the MBB number.
2708 /// EnableGlobalCopies assumes that the primary sort key is loop depth.
2709 static int compareMBBPriority(const MBBPriorityInfo
*LHS
,
2710 const MBBPriorityInfo
*RHS
) {
2711 // Deeper loops first
2712 if (LHS
->Depth
!= RHS
->Depth
)
2713 return LHS
->Depth
> RHS
->Depth
? -1 : 1;
2715 // Try to unsplit critical edges next.
2716 if (LHS
->IsSplit
!= RHS
->IsSplit
)
2717 return LHS
->IsSplit
? -1 : 1;
2719 // Prefer blocks that are more connected in the CFG. This takes care of
2720 // the most difficult copies first while intervals are short.
2721 unsigned cl
= LHS
->MBB
->pred_size() + LHS
->MBB
->succ_size();
2722 unsigned cr
= RHS
->MBB
->pred_size() + RHS
->MBB
->succ_size();
2724 return cl
> cr
? -1 : 1;
2726 // As a last resort, sort by block number.
2727 return LHS
->MBB
->getNumber() < RHS
->MBB
->getNumber() ? -1 : 1;
2730 /// \returns true if the given copy uses or defines a local live range.
2731 static bool isLocalCopy(MachineInstr
*Copy
, const LiveIntervals
*LIS
) {
2732 if (!Copy
->isCopy())
2735 if (Copy
->getOperand(1).isUndef())
2738 unsigned SrcReg
= Copy
->getOperand(1).getReg();
2739 unsigned DstReg
= Copy
->getOperand(0).getReg();
2740 if (TargetRegisterInfo::isPhysicalRegister(SrcReg
)
2741 || TargetRegisterInfo::isPhysicalRegister(DstReg
))
2744 return LIS
->intervalIsInOneMBB(LIS
->getInterval(SrcReg
))
2745 || LIS
->intervalIsInOneMBB(LIS
->getInterval(DstReg
));
2748 bool RegisterCoalescer::
2749 copyCoalesceWorkList(MutableArrayRef
<MachineInstr
*> CurrList
) {
2750 bool Progress
= false;
2751 for (unsigned i
= 0, e
= CurrList
.size(); i
!= e
; ++i
) {
2754 // Skip instruction pointers that have already been erased, for example by
2755 // dead code elimination.
2756 if (ErasedInstrs
.erase(CurrList
[i
])) {
2757 CurrList
[i
] = nullptr;
2761 bool Success
= joinCopy(CurrList
[i
], Again
);
2762 Progress
|= Success
;
2763 if (Success
|| !Again
)
2764 CurrList
[i
] = nullptr;
2769 /// Check if DstReg is a terminal node.
2770 /// I.e., it does not have any affinity other than \p Copy.
2771 static bool isTerminalReg(unsigned DstReg
, const MachineInstr
&Copy
,
2772 const MachineRegisterInfo
*MRI
) {
2773 assert(Copy
.isCopyLike());
2774 // Check if the destination of this copy as any other affinity.
2775 for (const MachineInstr
&MI
: MRI
->reg_nodbg_instructions(DstReg
))
2776 if (&MI
!= &Copy
&& MI
.isCopyLike())
2781 bool RegisterCoalescer::applyTerminalRule(const MachineInstr
&Copy
) const {
2782 assert(Copy
.isCopyLike());
2783 if (!UseTerminalRule
)
2785 unsigned DstReg
, DstSubReg
, SrcReg
, SrcSubReg
;
2786 isMoveInstr(*TRI
, &Copy
, SrcReg
, DstReg
, SrcSubReg
, DstSubReg
);
2787 // Check if the destination of this copy has any other affinity.
2788 if (TargetRegisterInfo::isPhysicalRegister(DstReg
) ||
2789 // If SrcReg is a physical register, the copy won't be coalesced.
2790 // Ignoring it may have other side effect (like missing
2791 // rematerialization). So keep it.
2792 TargetRegisterInfo::isPhysicalRegister(SrcReg
) ||
2793 !isTerminalReg(DstReg
, Copy
, MRI
))
2796 // DstReg is a terminal node. Check if it interferes with any other
2797 // copy involving SrcReg.
2798 const MachineBasicBlock
*OrigBB
= Copy
.getParent();
2799 const LiveInterval
&DstLI
= LIS
->getInterval(DstReg
);
2800 for (const MachineInstr
&MI
: MRI
->reg_nodbg_instructions(SrcReg
)) {
2801 // Technically we should check if the weight of the new copy is
2802 // interesting compared to the other one and update the weight
2803 // of the copies accordingly. However, this would only work if
2804 // we would gather all the copies first then coalesce, whereas
2805 // right now we interleave both actions.
2806 // For now, just consider the copies that are in the same block.
2807 if (&MI
== &Copy
|| !MI
.isCopyLike() || MI
.getParent() != OrigBB
)
2809 unsigned OtherReg
, OtherSubReg
, OtherSrcReg
, OtherSrcSubReg
;
2810 isMoveInstr(*TRI
, &Copy
, OtherSrcReg
, OtherReg
, OtherSrcSubReg
,
2812 if (OtherReg
== SrcReg
)
2813 OtherReg
= OtherSrcReg
;
2814 // Check if OtherReg is a non-terminal.
2815 if (TargetRegisterInfo::isPhysicalRegister(OtherReg
) ||
2816 isTerminalReg(OtherReg
, MI
, MRI
))
2818 // Check that OtherReg interfere with DstReg.
2819 if (LIS
->getInterval(OtherReg
).overlaps(DstLI
)) {
2820 DEBUG(dbgs() << "Apply terminal rule for: " << PrintReg(DstReg
) << '\n');
2828 RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock
*MBB
) {
2829 DEBUG(dbgs() << MBB
->getName() << ":\n");
2831 // Collect all copy-like instructions in MBB. Don't start coalescing anything
2832 // yet, it might invalidate the iterator.
2833 const unsigned PrevSize
= WorkList
.size();
2834 if (JoinGlobalCopies
) {
2835 SmallVector
<MachineInstr
*, 2> LocalTerminals
;
2836 SmallVector
<MachineInstr
*, 2> GlobalTerminals
;
2837 // Coalesce copies bottom-up to coalesce local defs before local uses. They
2838 // are not inherently easier to resolve, but slightly preferable until we
2839 // have local live range splitting. In particular this is required by
2840 // cmp+jmp macro fusion.
2841 for (MachineBasicBlock::iterator MII
= MBB
->begin(), E
= MBB
->end();
2843 if (!MII
->isCopyLike())
2845 bool ApplyTerminalRule
= applyTerminalRule(*MII
);
2846 if (isLocalCopy(&(*MII
), LIS
)) {
2847 if (ApplyTerminalRule
)
2848 LocalTerminals
.push_back(&(*MII
));
2850 LocalWorkList
.push_back(&(*MII
));
2852 if (ApplyTerminalRule
)
2853 GlobalTerminals
.push_back(&(*MII
));
2855 WorkList
.push_back(&(*MII
));
2858 // Append the copies evicted by the terminal rule at the end of the list.
2859 LocalWorkList
.append(LocalTerminals
.begin(), LocalTerminals
.end());
2860 WorkList
.append(GlobalTerminals
.begin(), GlobalTerminals
.end());
2863 SmallVector
<MachineInstr
*, 2> Terminals
;
2864 for (MachineBasicBlock::iterator MII
= MBB
->begin(), E
= MBB
->end();
2866 if (MII
->isCopyLike()) {
2867 if (applyTerminalRule(*MII
))
2868 Terminals
.push_back(&(*MII
));
2870 WorkList
.push_back(MII
);
2872 // Append the copies evicted by the terminal rule at the end of the list.
2873 WorkList
.append(Terminals
.begin(), Terminals
.end());
2875 // Try coalescing the collected copies immediately, and remove the nulls.
2876 // This prevents the WorkList from getting too large since most copies are
2877 // joinable on the first attempt.
2878 MutableArrayRef
<MachineInstr
*>
2879 CurrList(WorkList
.begin() + PrevSize
, WorkList
.end());
2880 if (copyCoalesceWorkList(CurrList
))
2881 WorkList
.erase(std::remove(WorkList
.begin() + PrevSize
, WorkList
.end(),
2882 (MachineInstr
*)nullptr), WorkList
.end());
2885 void RegisterCoalescer::coalesceLocals() {
2886 copyCoalesceWorkList(LocalWorkList
);
2887 for (unsigned j
= 0, je
= LocalWorkList
.size(); j
!= je
; ++j
) {
2888 if (LocalWorkList
[j
])
2889 WorkList
.push_back(LocalWorkList
[j
]);
2891 LocalWorkList
.clear();
2894 void RegisterCoalescer::joinAllIntervals() {
2895 DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
2896 assert(WorkList
.empty() && LocalWorkList
.empty() && "Old data still around.");
2898 std::vector
<MBBPriorityInfo
> MBBs
;
2899 MBBs
.reserve(MF
->size());
2900 for (MachineFunction::iterator I
= MF
->begin(), E
= MF
->end();I
!= E
;++I
){
2901 MachineBasicBlock
*MBB
= &*I
;
2902 MBBs
.push_back(MBBPriorityInfo(MBB
, Loops
->getLoopDepth(MBB
),
2903 JoinSplitEdges
&& isSplitEdge(MBB
)));
2905 array_pod_sort(MBBs
.begin(), MBBs
.end(), compareMBBPriority
);
2907 // Coalesce intervals in MBB priority order.
2908 unsigned CurrDepth
= UINT_MAX
;
2909 for (unsigned i
= 0, e
= MBBs
.size(); i
!= e
; ++i
) {
2910 // Try coalescing the collected local copies for deeper loops.
2911 if (JoinGlobalCopies
&& MBBs
[i
].Depth
< CurrDepth
) {
2913 CurrDepth
= MBBs
[i
].Depth
;
2915 copyCoalesceInMBB(MBBs
[i
].MBB
);
2919 // Joining intervals can allow other intervals to be joined. Iteratively join
2920 // until we make no progress.
2921 while (copyCoalesceWorkList(WorkList
))
2925 void RegisterCoalescer::releaseMemory() {
2926 ErasedInstrs
.clear();
2929 InflateRegs
.clear();
2932 bool RegisterCoalescer::runOnMachineFunction(MachineFunction
&fn
) {
2934 MRI
= &fn
.getRegInfo();
2935 TM
= &fn
.getTarget();
2936 const TargetSubtargetInfo
&STI
= fn
.getSubtarget();
2937 TRI
= STI
.getRegisterInfo();
2938 TII
= STI
.getInstrInfo();
2939 LIS
= &getAnalysis
<LiveIntervals
>();
2940 AA
= &getAnalysis
<AAResultsWrapperPass
>().getAAResults();
2941 Loops
= &getAnalysis
<MachineLoopInfo
>();
2942 if (EnableGlobalCopies
== cl::BOU_UNSET
)
2943 JoinGlobalCopies
= STI
.enableJoinGlobalCopies();
2945 JoinGlobalCopies
= (EnableGlobalCopies
== cl::BOU_TRUE
);
2947 // The MachineScheduler does not currently require JoinSplitEdges. This will
2948 // either be enabled unconditionally or replaced by a more general live range
2949 // splitting optimization.
2950 JoinSplitEdges
= EnableJoinSplits
;
2952 DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
2953 << "********** Function: " << MF
->getName() << '\n');
2955 if (VerifyCoalescing
)
2956 MF
->verify(this, "Before register coalescing");
2958 RegClassInfo
.runOnMachineFunction(fn
);
2960 // Join (coalesce) intervals if requested.
2964 // After deleting a lot of copies, register classes may be less constrained.
2965 // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
2967 array_pod_sort(InflateRegs
.begin(), InflateRegs
.end());
2968 InflateRegs
.erase(std::unique(InflateRegs
.begin(), InflateRegs
.end()),
2970 DEBUG(dbgs() << "Trying to inflate " << InflateRegs
.size() << " regs.\n");
2971 for (unsigned i
= 0, e
= InflateRegs
.size(); i
!= e
; ++i
) {
2972 unsigned Reg
= InflateRegs
[i
];
2973 if (MRI
->reg_nodbg_empty(Reg
))
2975 if (MRI
->recomputeRegClass(Reg
)) {
2976 DEBUG(dbgs() << PrintReg(Reg
) << " inflated to "
2977 << TRI
->getRegClassName(MRI
->getRegClass(Reg
)) << '\n');
2980 LiveInterval
&LI
= LIS
->getInterval(Reg
);
2981 if (LI
.hasSubRanges()) {
2982 // If the inflated register class does not support subregisters anymore
2983 // remove the subranges.
2984 if (!MRI
->shouldTrackSubRegLiveness(Reg
)) {
2985 LI
.clearSubRanges();
2988 LaneBitmask MaxMask
= MRI
->getMaxLaneMaskForVReg(Reg
);
2989 // If subranges are still supported, then the same subregs
2990 // should still be supported.
2991 for (LiveInterval::SubRange
&S
: LI
.subranges()) {
2992 assert((S
.LaneMask
& ~MaxMask
) == 0);
3001 if (VerifyCoalescing
)
3002 MF
->verify(this, "After register coalescing");
3006 void RegisterCoalescer::print(raw_ostream
&O
, const Module
* m
) const {