1 //===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass duplicates basic blocks ending in unconditional branches into
11 // the tails of their predecessors.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/CodeGen/Passes.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/SetVector.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/MachineSSAUpdater.h"
26 #include "llvm/CodeGen/RegisterScavenging.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include "llvm/Target/TargetInstrInfo.h"
33 #include "llvm/Target/TargetRegisterInfo.h"
34 #include "llvm/Target/TargetSubtargetInfo.h"
37 #define DEBUG_TYPE "tailduplication"
39 STATISTIC(NumTails
, "Number of tails duplicated");
40 STATISTIC(NumTailDups
, "Number of tail duplicated blocks");
41 STATISTIC(NumInstrDups
, "Additional instructions due to tail duplication");
42 STATISTIC(NumDeadBlocks
, "Number of dead blocks removed");
43 STATISTIC(NumAddedPHIs
, "Number of phis added");
45 // Heuristic for tail duplication.
46 static cl::opt
<unsigned>
47 TailDuplicateSize("tail-dup-size",
48 cl::desc("Maximum instructions to consider tail duplicating"),
49 cl::init(2), cl::Hidden
);
52 TailDupVerify("tail-dup-verify",
53 cl::desc("Verify sanity of PHI instructions during taildup"),
54 cl::init(false), cl::Hidden
);
56 static cl::opt
<unsigned>
57 TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden
);
59 typedef std::vector
<std::pair
<MachineBasicBlock
*,unsigned> > AvailableValsTy
;
62 /// Perform tail duplication.
63 class TailDuplicatePass
: public MachineFunctionPass
{
64 const TargetInstrInfo
*TII
;
65 const TargetRegisterInfo
*TRI
;
66 const MachineBranchProbabilityInfo
*MBPI
;
67 MachineModuleInfo
*MMI
;
68 MachineRegisterInfo
*MRI
;
69 std::unique_ptr
<RegScavenger
> RS
;
72 // A list of virtual registers for which to update SSA form.
73 SmallVector
<unsigned, 16> SSAUpdateVRs
;
75 // For each virtual register in SSAUpdateVals keep a list of source virtual
77 DenseMap
<unsigned, AvailableValsTy
> SSAUpdateVals
;
81 explicit TailDuplicatePass() :
82 MachineFunctionPass(ID
), PreRegAlloc(false) {}
84 bool runOnMachineFunction(MachineFunction
&MF
) override
;
86 void getAnalysisUsage(AnalysisUsage
&AU
) const override
;
89 void AddSSAUpdateEntry(unsigned OrigReg
, unsigned NewReg
,
90 MachineBasicBlock
*BB
);
91 void ProcessPHI(MachineInstr
*MI
, MachineBasicBlock
*TailBB
,
92 MachineBasicBlock
*PredBB
,
93 DenseMap
<unsigned, unsigned> &LocalVRMap
,
94 SmallVectorImpl
<std::pair
<unsigned,unsigned> > &Copies
,
95 const DenseSet
<unsigned> &UsedByPhi
,
97 void DuplicateInstruction(MachineInstr
*MI
,
98 MachineBasicBlock
*TailBB
,
99 MachineBasicBlock
*PredBB
,
101 DenseMap
<unsigned, unsigned> &LocalVRMap
,
102 const DenseSet
<unsigned> &UsedByPhi
);
103 void UpdateSuccessorsPHIs(MachineBasicBlock
*FromBB
, bool isDead
,
104 SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
105 SmallSetVector
<MachineBasicBlock
*, 8> &Succs
);
106 bool TailDuplicateBlocks(MachineFunction
&MF
);
107 bool shouldTailDuplicate(const MachineFunction
&MF
,
108 bool IsSimple
, MachineBasicBlock
&TailBB
);
109 bool isSimpleBB(MachineBasicBlock
*TailBB
);
110 bool canCompletelyDuplicateBB(MachineBasicBlock
&BB
);
111 bool duplicateSimpleBB(MachineBasicBlock
*TailBB
,
112 SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
113 const DenseSet
<unsigned> &RegsUsedByPhi
,
114 SmallVectorImpl
<MachineInstr
*> &Copies
);
115 bool TailDuplicate(MachineBasicBlock
*TailBB
,
118 SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
119 SmallVectorImpl
<MachineInstr
*> &Copies
);
120 bool TailDuplicateAndUpdate(MachineBasicBlock
*MBB
,
122 MachineFunction
&MF
);
124 void RemoveDeadBlock(MachineBasicBlock
*MBB
);
127 char TailDuplicatePass::ID
= 0;
130 char &llvm::TailDuplicateID
= TailDuplicatePass::ID
;
132 INITIALIZE_PASS(TailDuplicatePass
, "tailduplication", "Tail Duplication",
135 bool TailDuplicatePass::runOnMachineFunction(MachineFunction
&MF
) {
136 if (skipOptnoneFunction(*MF
.getFunction()))
139 TII
= MF
.getSubtarget().getInstrInfo();
140 TRI
= MF
.getSubtarget().getRegisterInfo();
141 MRI
= &MF
.getRegInfo();
142 MMI
= getAnalysisIfAvailable
<MachineModuleInfo
>();
143 MBPI
= &getAnalysis
<MachineBranchProbabilityInfo
>();
145 PreRegAlloc
= MRI
->isSSA();
147 if (MRI
->tracksLiveness() && TRI
->trackLivenessAfterRegAlloc(MF
))
148 RS
.reset(new RegScavenger());
150 bool MadeChange
= false;
151 while (TailDuplicateBlocks(MF
))
157 void TailDuplicatePass::getAnalysisUsage(AnalysisUsage
&AU
) const {
158 AU
.addRequired
<MachineBranchProbabilityInfo
>();
159 MachineFunctionPass::getAnalysisUsage(AU
);
162 static void VerifyPHIs(MachineFunction
&MF
, bool CheckExtra
) {
163 for (MachineFunction::iterator I
= ++MF
.begin(), E
= MF
.end(); I
!= E
; ++I
) {
164 MachineBasicBlock
*MBB
= &*I
;
165 SmallSetVector
<MachineBasicBlock
*, 8> Preds(MBB
->pred_begin(),
167 MachineBasicBlock::iterator MI
= MBB
->begin();
168 while (MI
!= MBB
->end()) {
171 for (SmallSetVector
<MachineBasicBlock
*, 8>::iterator PI
= Preds
.begin(),
172 PE
= Preds
.end(); PI
!= PE
; ++PI
) {
173 MachineBasicBlock
*PredBB
= *PI
;
175 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+= 2) {
176 MachineBasicBlock
*PHIBB
= MI
->getOperand(i
+1).getMBB();
177 if (PHIBB
== PredBB
) {
183 dbgs() << "Malformed PHI in BB#" << MBB
->getNumber() << ": " << *MI
;
184 dbgs() << " missing input from predecessor BB#"
185 << PredBB
->getNumber() << '\n';
186 llvm_unreachable(nullptr);
190 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+= 2) {
191 MachineBasicBlock
*PHIBB
= MI
->getOperand(i
+1).getMBB();
192 if (CheckExtra
&& !Preds
.count(PHIBB
)) {
193 dbgs() << "Warning: malformed PHI in BB#" << MBB
->getNumber()
195 dbgs() << " extra input from predecessor BB#"
196 << PHIBB
->getNumber() << '\n';
197 llvm_unreachable(nullptr);
199 if (PHIBB
->getNumber() < 0) {
200 dbgs() << "Malformed PHI in BB#" << MBB
->getNumber() << ": " << *MI
;
201 dbgs() << " non-existing BB#" << PHIBB
->getNumber() << '\n';
202 llvm_unreachable(nullptr);
210 /// Tail duplicate the block and cleanup.
212 TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock
*MBB
,
214 MachineFunction
&MF
) {
215 // Save the successors list.
216 SmallSetVector
<MachineBasicBlock
*, 8> Succs(MBB
->succ_begin(),
219 SmallVector
<MachineBasicBlock
*, 8> TDBBs
;
220 SmallVector
<MachineInstr
*, 16> Copies
;
221 if (!TailDuplicate(MBB
, IsSimple
, MF
, TDBBs
, Copies
))
226 SmallVector
<MachineInstr
*, 8> NewPHIs
;
227 MachineSSAUpdater
SSAUpdate(MF
, &NewPHIs
);
229 // TailBB's immediate successors are now successors of those predecessors
230 // which duplicated TailBB. Add the predecessors as sources to the PHI
232 bool isDead
= MBB
->pred_empty() && !MBB
->hasAddressTaken();
234 UpdateSuccessorsPHIs(MBB
, isDead
, TDBBs
, Succs
);
236 // If it is dead, remove it.
238 NumInstrDups
-= MBB
->size();
239 RemoveDeadBlock(MBB
);
244 if (!SSAUpdateVRs
.empty()) {
245 for (unsigned i
= 0, e
= SSAUpdateVRs
.size(); i
!= e
; ++i
) {
246 unsigned VReg
= SSAUpdateVRs
[i
];
247 SSAUpdate
.Initialize(VReg
);
249 // If the original definition is still around, add it as an available
251 MachineInstr
*DefMI
= MRI
->getVRegDef(VReg
);
252 MachineBasicBlock
*DefBB
= nullptr;
254 DefBB
= DefMI
->getParent();
255 SSAUpdate
.AddAvailableValue(DefBB
, VReg
);
258 // Add the new vregs as available values.
259 DenseMap
<unsigned, AvailableValsTy
>::iterator LI
=
260 SSAUpdateVals
.find(VReg
);
261 for (unsigned j
= 0, ee
= LI
->second
.size(); j
!= ee
; ++j
) {
262 MachineBasicBlock
*SrcBB
= LI
->second
[j
].first
;
263 unsigned SrcReg
= LI
->second
[j
].second
;
264 SSAUpdate
.AddAvailableValue(SrcBB
, SrcReg
);
267 // Rewrite uses that are outside of the original def's block.
268 MachineRegisterInfo::use_iterator UI
= MRI
->use_begin(VReg
);
269 while (UI
!= MRI
->use_end()) {
270 MachineOperand
&UseMO
= *UI
;
271 MachineInstr
*UseMI
= UseMO
.getParent();
273 if (UseMI
->isDebugValue()) {
274 // SSAUpdate can replace the use with an undef. That creates
275 // a debug instruction that is a kill.
276 // FIXME: Should it SSAUpdate job to delete debug instructions
277 // instead of replacing the use with undef?
278 UseMI
->eraseFromParent();
281 if (UseMI
->getParent() == DefBB
&& !UseMI
->isPHI())
283 SSAUpdate
.RewriteUse(UseMO
);
287 SSAUpdateVRs
.clear();
288 SSAUpdateVals
.clear();
291 // Eliminate some of the copies inserted by tail duplication to maintain
293 for (unsigned i
= 0, e
= Copies
.size(); i
!= e
; ++i
) {
294 MachineInstr
*Copy
= Copies
[i
];
297 unsigned Dst
= Copy
->getOperand(0).getReg();
298 unsigned Src
= Copy
->getOperand(1).getReg();
299 if (MRI
->hasOneNonDBGUse(Src
) &&
300 MRI
->constrainRegClass(Src
, MRI
->getRegClass(Dst
))) {
301 // Copy is the only use. Do trivial copy propagation here.
302 MRI
->replaceRegWith(Dst
, Src
);
303 Copy
->eraseFromParent();
308 NumAddedPHIs
+= NewPHIs
.size();
313 /// Look for small blocks that are unconditionally branched to and do not fall
314 /// through. Tail-duplicate their instructions into their predecessors to
315 /// eliminate (dynamic) branches.
316 bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction
&MF
) {
317 bool MadeChange
= false;
319 if (PreRegAlloc
&& TailDupVerify
) {
320 DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
321 VerifyPHIs(MF
, true);
324 for (MachineFunction::iterator I
= ++MF
.begin(), E
= MF
.end(); I
!= E
; ) {
325 MachineBasicBlock
*MBB
= &*I
++;
327 if (NumTails
== TailDupLimit
)
330 bool IsSimple
= isSimpleBB(MBB
);
332 if (!shouldTailDuplicate(MF
, IsSimple
, *MBB
))
335 MadeChange
|= TailDuplicateAndUpdate(MBB
, IsSimple
, MF
);
338 if (PreRegAlloc
&& TailDupVerify
)
339 VerifyPHIs(MF
, false);
344 static bool isDefLiveOut(unsigned Reg
, MachineBasicBlock
*BB
,
345 const MachineRegisterInfo
*MRI
) {
346 for (MachineInstr
&UseMI
: MRI
->use_instructions(Reg
)) {
347 if (UseMI
.isDebugValue())
349 if (UseMI
.getParent() != BB
)
355 static unsigned getPHISrcRegOpIdx(MachineInstr
*MI
, MachineBasicBlock
*SrcBB
) {
356 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+= 2)
357 if (MI
->getOperand(i
+1).getMBB() == SrcBB
)
363 // Remember which registers are used by phis in this block. This is
364 // used to determine which registers are liveout while modifying the
365 // block (which is why we need to copy the information).
366 static void getRegsUsedByPHIs(const MachineBasicBlock
&BB
,
367 DenseSet
<unsigned> *UsedByPhi
) {
368 for (const auto &MI
: BB
) {
371 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2) {
372 unsigned SrcReg
= MI
.getOperand(i
).getReg();
373 UsedByPhi
->insert(SrcReg
);
378 /// Add a definition and source virtual registers pair for SSA update.
379 void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg
, unsigned NewReg
,
380 MachineBasicBlock
*BB
) {
381 DenseMap
<unsigned, AvailableValsTy
>::iterator LI
= SSAUpdateVals
.find(OrigReg
);
382 if (LI
!= SSAUpdateVals
.end())
383 LI
->second
.push_back(std::make_pair(BB
, NewReg
));
385 AvailableValsTy Vals
;
386 Vals
.push_back(std::make_pair(BB
, NewReg
));
387 SSAUpdateVals
.insert(std::make_pair(OrigReg
, Vals
));
388 SSAUpdateVRs
.push_back(OrigReg
);
392 /// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the
393 /// source register that's contributed by PredBB and update SSA update map.
394 void TailDuplicatePass::ProcessPHI(
395 MachineInstr
*MI
, MachineBasicBlock
*TailBB
, MachineBasicBlock
*PredBB
,
396 DenseMap
<unsigned, unsigned> &LocalVRMap
,
397 SmallVectorImpl
<std::pair
<unsigned, unsigned> > &Copies
,
398 const DenseSet
<unsigned> &RegsUsedByPhi
, bool Remove
) {
399 unsigned DefReg
= MI
->getOperand(0).getReg();
400 unsigned SrcOpIdx
= getPHISrcRegOpIdx(MI
, PredBB
);
401 assert(SrcOpIdx
&& "Unable to find matching PHI source?");
402 unsigned SrcReg
= MI
->getOperand(SrcOpIdx
).getReg();
403 const TargetRegisterClass
*RC
= MRI
->getRegClass(DefReg
);
404 LocalVRMap
.insert(std::make_pair(DefReg
, SrcReg
));
406 // Insert a copy from source to the end of the block. The def register is the
407 // available value liveout of the block.
408 unsigned NewDef
= MRI
->createVirtualRegister(RC
);
409 Copies
.push_back(std::make_pair(NewDef
, SrcReg
));
410 if (isDefLiveOut(DefReg
, TailBB
, MRI
) || RegsUsedByPhi
.count(DefReg
))
411 AddSSAUpdateEntry(DefReg
, NewDef
, PredBB
);
416 // Remove PredBB from the PHI node.
417 MI
->RemoveOperand(SrcOpIdx
+1);
418 MI
->RemoveOperand(SrcOpIdx
);
419 if (MI
->getNumOperands() == 1)
420 MI
->eraseFromParent();
423 /// Duplicate a TailBB instruction to PredBB and update
424 /// the source operands due to earlier PHI translation.
425 void TailDuplicatePass::DuplicateInstruction(MachineInstr
*MI
,
426 MachineBasicBlock
*TailBB
,
427 MachineBasicBlock
*PredBB
,
429 DenseMap
<unsigned, unsigned> &LocalVRMap
,
430 const DenseSet
<unsigned> &UsedByPhi
) {
431 MachineInstr
*NewMI
= TII
->duplicate(MI
, MF
);
432 for (unsigned i
= 0, e
= NewMI
->getNumOperands(); i
!= e
; ++i
) {
433 MachineOperand
&MO
= NewMI
->getOperand(i
);
436 unsigned Reg
= MO
.getReg();
437 if (!TargetRegisterInfo::isVirtualRegister(Reg
))
440 const TargetRegisterClass
*RC
= MRI
->getRegClass(Reg
);
441 unsigned NewReg
= MRI
->createVirtualRegister(RC
);
443 LocalVRMap
.insert(std::make_pair(Reg
, NewReg
));
444 if (isDefLiveOut(Reg
, TailBB
, MRI
) || UsedByPhi
.count(Reg
))
445 AddSSAUpdateEntry(Reg
, NewReg
, PredBB
);
447 DenseMap
<unsigned, unsigned>::iterator VI
= LocalVRMap
.find(Reg
);
448 if (VI
!= LocalVRMap
.end()) {
449 MO
.setReg(VI
->second
);
450 // Clear any kill flags from this operand. The new register could have
451 // uses after this one, so kills are not valid here.
453 MRI
->constrainRegClass(VI
->second
, MRI
->getRegClass(Reg
));
457 PredBB
->insert(PredBB
->instr_end(), NewMI
);
460 /// After FromBB is tail duplicated into its predecessor blocks, the successors
461 /// have gained new predecessors. Update the PHI instructions in them
464 TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock
*FromBB
, bool isDead
,
465 SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
466 SmallSetVector
<MachineBasicBlock
*,8> &Succs
) {
467 for (SmallSetVector
<MachineBasicBlock
*, 8>::iterator SI
= Succs
.begin(),
468 SE
= Succs
.end(); SI
!= SE
; ++SI
) {
469 MachineBasicBlock
*SuccBB
= *SI
;
470 for (MachineBasicBlock::iterator II
= SuccBB
->begin(), EE
= SuccBB
->end();
474 MachineInstrBuilder
MIB(*FromBB
->getParent(), II
);
476 for (unsigned i
= 1, e
= II
->getNumOperands(); i
!= e
; i
+= 2) {
477 MachineOperand
&MO
= II
->getOperand(i
+1);
478 if (MO
.getMBB() == FromBB
) {
485 MachineOperand
&MO0
= II
->getOperand(Idx
);
486 unsigned Reg
= MO0
.getReg();
488 // Folded into the previous BB.
489 // There could be duplicate phi source entries. FIXME: Should sdisel
490 // or earlier pass fixed this?
491 for (unsigned i
= II
->getNumOperands()-2; i
!= Idx
; i
-= 2) {
492 MachineOperand
&MO
= II
->getOperand(i
+1);
493 if (MO
.getMBB() == FromBB
) {
494 II
->RemoveOperand(i
+1);
495 II
->RemoveOperand(i
);
501 // If Idx is set, the operands at Idx and Idx+1 must be removed.
502 // We reuse the location to avoid expensive RemoveOperand calls.
504 DenseMap
<unsigned,AvailableValsTy
>::iterator LI
=SSAUpdateVals
.find(Reg
);
505 if (LI
!= SSAUpdateVals
.end()) {
506 // This register is defined in the tail block.
507 for (unsigned j
= 0, ee
= LI
->second
.size(); j
!= ee
; ++j
) {
508 MachineBasicBlock
*SrcBB
= LI
->second
[j
].first
;
509 // If we didn't duplicate a bb into a particular predecessor, we
510 // might still have added an entry to SSAUpdateVals to correcly
511 // recompute SSA. If that case, avoid adding a dummy extra argument
513 if (!SrcBB
->isSuccessor(SuccBB
))
516 unsigned SrcReg
= LI
->second
[j
].second
;
518 II
->getOperand(Idx
).setReg(SrcReg
);
519 II
->getOperand(Idx
+1).setMBB(SrcBB
);
522 MIB
.addReg(SrcReg
).addMBB(SrcBB
);
526 // Live in tail block, must also be live in predecessors.
527 for (unsigned j
= 0, ee
= TDBBs
.size(); j
!= ee
; ++j
) {
528 MachineBasicBlock
*SrcBB
= TDBBs
[j
];
530 II
->getOperand(Idx
).setReg(Reg
);
531 II
->getOperand(Idx
+1).setMBB(SrcBB
);
534 MIB
.addReg(Reg
).addMBB(SrcBB
);
539 II
->RemoveOperand(Idx
+1);
540 II
->RemoveOperand(Idx
);
546 /// Determine if it is profitable to duplicate this block.
548 TailDuplicatePass::shouldTailDuplicate(const MachineFunction
&MF
,
550 MachineBasicBlock
&TailBB
) {
551 // Only duplicate blocks that end with unconditional branches.
552 if (TailBB
.canFallThrough())
555 // Don't try to tail-duplicate single-block loops.
556 if (TailBB
.isSuccessor(&TailBB
))
559 // Set the limit on the cost to duplicate. When optimizing for size,
560 // duplicate only one, because one branch instruction can be eliminated to
561 // compensate for the duplication.
562 unsigned MaxDuplicateCount
;
563 if (TailDuplicateSize
.getNumOccurrences() == 0 &&
564 // FIXME: Use Function::optForSize().
565 MF
.getFunction()->hasFnAttribute(Attribute::OptimizeForSize
))
566 MaxDuplicateCount
= 1;
568 MaxDuplicateCount
= TailDuplicateSize
;
570 // If the target has hardware branch prediction that can handle indirect
571 // branches, duplicating them can often make them predictable when there
572 // are common paths through the code. The limit needs to be high enough
573 // to allow undoing the effects of tail merging and other optimizations
574 // that rearrange the predecessors of the indirect branch.
576 bool HasIndirectbr
= false;
578 HasIndirectbr
= TailBB
.back().isIndirectBranch();
580 if (HasIndirectbr
&& PreRegAlloc
)
581 MaxDuplicateCount
= 20;
583 // Check the instructions in the block to determine whether tail-duplication
584 // is invalid or unlikely to be profitable.
585 unsigned InstrCount
= 0;
586 for (MachineInstr
&MI
: TailBB
) {
587 // Non-duplicable things shouldn't be tail-duplicated.
588 if (MI
.isNotDuplicable())
591 // Do not duplicate 'return' instructions if this is a pre-regalloc run.
592 // A return may expand into a lot more instructions (e.g. reload of callee
593 // saved registers) after PEI.
594 if (PreRegAlloc
&& MI
.isReturn())
597 // Avoid duplicating calls before register allocation. Calls presents a
598 // barrier to register allocation so duplicating them may end up increasing
600 if (PreRegAlloc
&& MI
.isCall())
603 if (!MI
.isPHI() && !MI
.isDebugValue())
606 if (InstrCount
> MaxDuplicateCount
)
610 // Check if any of the successors of TailBB has a PHI node in which the
611 // value corresponding to TailBB uses a subregister.
612 // If a phi node uses a register paired with a subregister, the actual
613 // "value type" of the phi may differ from the type of the register without
614 // any subregisters. Due to a bug, tail duplication may add a new operand
615 // without a necessary subregister, producing an invalid code. This is
616 // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll.
617 // Disable tail duplication for this case for now, until the problem is
619 for (auto SB
: TailBB
.successors()) {
620 for (auto &I
: *SB
) {
623 unsigned Idx
= getPHISrcRegOpIdx(&I
, &TailBB
);
625 MachineOperand
&PU
= I
.getOperand(Idx
);
626 if (PU
.getSubReg() != 0)
631 if (HasIndirectbr
&& PreRegAlloc
)
640 return canCompletelyDuplicateBB(TailBB
);
643 /// True if this BB has only one unconditional jump.
645 TailDuplicatePass::isSimpleBB(MachineBasicBlock
*TailBB
) {
646 if (TailBB
->succ_size() != 1)
648 if (TailBB
->pred_empty())
650 MachineBasicBlock::iterator I
= TailBB
->getFirstNonDebugInstr();
651 if (I
== TailBB
->end())
653 return I
->isUnconditionalBranch();
657 bothUsedInPHI(const MachineBasicBlock
&A
,
658 SmallPtrSet
<MachineBasicBlock
*, 8> SuccsB
) {
659 for (MachineBasicBlock
*BB
: A
.successors())
660 if (SuccsB
.count(BB
) && !BB
->empty() && BB
->begin()->isPHI())
667 TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock
&BB
) {
668 for (MachineBasicBlock
*PredBB
: BB
.predecessors()) {
669 if (PredBB
->succ_size() > 1)
672 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
673 SmallVector
<MachineOperand
, 4> PredCond
;
674 if (TII
->AnalyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
, true))
677 if (!PredCond
.empty())
684 TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock
*TailBB
,
685 SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
686 const DenseSet
<unsigned> &UsedByPhi
,
687 SmallVectorImpl
<MachineInstr
*> &Copies
) {
688 SmallPtrSet
<MachineBasicBlock
*, 8> Succs(TailBB
->succ_begin(),
690 SmallVector
<MachineBasicBlock
*, 8> Preds(TailBB
->pred_begin(),
692 bool Changed
= false;
693 for (SmallSetVector
<MachineBasicBlock
*, 8>::iterator PI
= Preds
.begin(),
694 PE
= Preds
.end(); PI
!= PE
; ++PI
) {
695 MachineBasicBlock
*PredBB
= *PI
;
697 if (PredBB
->hasEHPadSuccessor())
700 if (bothUsedInPHI(*PredBB
, Succs
))
703 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
704 SmallVector
<MachineOperand
, 4> PredCond
;
705 if (TII
->AnalyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
, true))
709 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
710 << "From simple Succ: " << *TailBB
);
712 MachineBasicBlock
*NewTarget
= *TailBB
->succ_begin();
713 MachineBasicBlock
*NextBB
= &*std::next(PredBB
->getIterator());
715 // Make PredFBB explicit.
716 if (PredCond
.empty())
719 // Make fall through explicit.
726 if (PredFBB
== TailBB
)
728 if (PredTBB
== TailBB
)
731 // Make the branch unconditional if possible
732 if (PredTBB
== PredFBB
) {
737 // Avoid adding fall through branches.
738 if (PredFBB
== NextBB
)
740 if (PredTBB
== NextBB
&& PredFBB
== nullptr)
743 TII
->RemoveBranch(*PredBB
);
746 TII
->InsertBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
, DebugLoc());
748 uint32_t Weight
= MBPI
->getEdgeWeight(PredBB
, TailBB
);
749 PredBB
->removeSuccessor(TailBB
);
750 unsigned NumSuccessors
= PredBB
->succ_size();
751 assert(NumSuccessors
<= 1);
752 if (NumSuccessors
== 0 || *PredBB
->succ_begin() != NewTarget
)
753 PredBB
->addSuccessor(NewTarget
, Weight
);
755 TDBBs
.push_back(PredBB
);
760 /// If it is profitable, duplicate TailBB's contents in each
761 /// of its predecessors.
763 TailDuplicatePass::TailDuplicate(MachineBasicBlock
*TailBB
,
766 SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
767 SmallVectorImpl
<MachineInstr
*> &Copies
) {
768 DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB
->getNumber() << '\n');
770 DenseSet
<unsigned> UsedByPhi
;
771 getRegsUsedByPHIs(*TailBB
, &UsedByPhi
);
774 return duplicateSimpleBB(TailBB
, TDBBs
, UsedByPhi
, Copies
);
776 // Iterate through all the unique predecessors and tail-duplicate this
777 // block into them, if possible. Copying the list ahead of time also
778 // avoids trouble with the predecessor list reallocating.
779 bool Changed
= false;
780 SmallSetVector
<MachineBasicBlock
*, 8> Preds(TailBB
->pred_begin(),
782 for (SmallSetVector
<MachineBasicBlock
*, 8>::iterator PI
= Preds
.begin(),
783 PE
= Preds
.end(); PI
!= PE
; ++PI
) {
784 MachineBasicBlock
*PredBB
= *PI
;
786 assert(TailBB
!= PredBB
&&
787 "Single-block loop should have been rejected earlier!");
788 // EH edges are ignored by AnalyzeBranch.
789 if (PredBB
->succ_size() > 1)
792 MachineBasicBlock
*PredTBB
, *PredFBB
;
793 SmallVector
<MachineOperand
, 4> PredCond
;
794 if (TII
->AnalyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
, true))
796 if (!PredCond
.empty())
798 // Don't duplicate into a fall-through predecessor (at least for now).
799 if (PredBB
->isLayoutSuccessor(TailBB
) && PredBB
->canFallThrough())
802 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
803 << "From Succ: " << *TailBB
);
805 TDBBs
.push_back(PredBB
);
807 // Remove PredBB's unconditional branch.
808 TII
->RemoveBranch(*PredBB
);
810 if (RS
&& !TailBB
->livein_empty()) {
811 // Update PredBB livein.
812 RS
->enterBasicBlock(PredBB
);
813 if (!PredBB
->empty())
814 RS
->forward(std::prev(PredBB
->end()));
815 for (const auto &LI
: TailBB
->liveins()) {
816 if (!RS
->isRegUsed(LI
.PhysReg
, false))
817 // If a register is previously livein to the tail but it's not live
818 // at the end of predecessor BB, then it should be added to its
820 PredBB
->addLiveIn(LI
);
824 // Clone the contents of TailBB into PredBB.
825 DenseMap
<unsigned, unsigned> LocalVRMap
;
826 SmallVector
<std::pair
<unsigned,unsigned>, 4> CopyInfos
;
827 // Use instr_iterator here to properly handle bundles, e.g.
828 // ARM Thumb2 IT block.
829 MachineBasicBlock::instr_iterator I
= TailBB
->instr_begin();
830 while (I
!= TailBB
->instr_end()) {
831 MachineInstr
*MI
= &*I
;
834 // Replace the uses of the def of the PHI with the register coming
836 ProcessPHI(MI
, TailBB
, PredBB
, LocalVRMap
, CopyInfos
, UsedByPhi
, true);
838 // Replace def of virtual registers with new registers, and update
839 // uses with PHI source register or the new registers.
840 DuplicateInstruction(MI
, TailBB
, PredBB
, MF
, LocalVRMap
, UsedByPhi
);
843 MachineBasicBlock::iterator Loc
= PredBB
->getFirstTerminator();
844 for (unsigned i
= 0, e
= CopyInfos
.size(); i
!= e
; ++i
) {
845 Copies
.push_back(BuildMI(*PredBB
, Loc
, DebugLoc(),
846 TII
->get(TargetOpcode::COPY
),
847 CopyInfos
[i
].first
).addReg(CopyInfos
[i
].second
));
851 TII
->AnalyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
, true);
853 NumInstrDups
+= TailBB
->size() - 1; // subtract one for removed branch
856 PredBB
->removeSuccessor(PredBB
->succ_begin());
857 assert(PredBB
->succ_empty() &&
858 "TailDuplicate called on block with multiple successors!");
859 for (MachineBasicBlock::succ_iterator I
= TailBB
->succ_begin(),
860 E
= TailBB
->succ_end(); I
!= E
; ++I
)
861 PredBB
->addSuccessor(*I
, MBPI
->getEdgeWeight(TailBB
, I
));
867 // If TailBB was duplicated into all its predecessors except for the prior
868 // block, which falls through unconditionally, move the contents of this
869 // block into the prior block.
870 MachineBasicBlock
*PrevBB
= &*std::prev(TailBB
->getIterator());
871 MachineBasicBlock
*PriorTBB
= nullptr, *PriorFBB
= nullptr;
872 SmallVector
<MachineOperand
, 4> PriorCond
;
873 // This has to check PrevBB->succ_size() because EH edges are ignored by
875 if (PrevBB
->succ_size() == 1 &&
876 !TII
->AnalyzeBranch(*PrevBB
, PriorTBB
, PriorFBB
, PriorCond
, true) &&
877 PriorCond
.empty() && !PriorTBB
&& TailBB
->pred_size() == 1 &&
878 !TailBB
->hasAddressTaken()) {
879 DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
880 << "From MBB: " << *TailBB
);
882 DenseMap
<unsigned, unsigned> LocalVRMap
;
883 SmallVector
<std::pair
<unsigned,unsigned>, 4> CopyInfos
;
884 MachineBasicBlock::iterator I
= TailBB
->begin();
885 // Process PHI instructions first.
886 while (I
!= TailBB
->end() && I
->isPHI()) {
887 // Replace the uses of the def of the PHI with the register coming
889 MachineInstr
*MI
= &*I
++;
890 ProcessPHI(MI
, TailBB
, PrevBB
, LocalVRMap
, CopyInfos
, UsedByPhi
, true);
892 MI
->eraseFromParent();
895 // Now copy the non-PHI instructions.
896 while (I
!= TailBB
->end()) {
897 // Replace def of virtual registers with new registers, and update
898 // uses with PHI source register or the new registers.
899 MachineInstr
*MI
= &*I
++;
900 assert(!MI
->isBundle() && "Not expecting bundles before regalloc!");
901 DuplicateInstruction(MI
, TailBB
, PrevBB
, MF
, LocalVRMap
, UsedByPhi
);
902 MI
->eraseFromParent();
904 MachineBasicBlock::iterator Loc
= PrevBB
->getFirstTerminator();
905 for (unsigned i
= 0, e
= CopyInfos
.size(); i
!= e
; ++i
) {
906 Copies
.push_back(BuildMI(*PrevBB
, Loc
, DebugLoc(),
907 TII
->get(TargetOpcode::COPY
),
909 .addReg(CopyInfos
[i
].second
));
912 // No PHIs to worry about, just splice the instructions over.
913 PrevBB
->splice(PrevBB
->end(), TailBB
, TailBB
->begin(), TailBB
->end());
915 PrevBB
->removeSuccessor(PrevBB
->succ_begin());
916 assert(PrevBB
->succ_empty());
917 PrevBB
->transferSuccessors(TailBB
);
918 TDBBs
.push_back(PrevBB
);
922 // If this is after register allocation, there are no phis to fix.
926 // If we made no changes so far, we are safe.
931 // Handle the nasty case in that we duplicated a block that is part of a loop
932 // into some but not all of its predecessors. For example:
936 // if we duplicate 2 into 1 but not into 3, we end up with
937 // 12 -> 3 <-> 2 -> rest |
940 // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
941 // with a phi in 3 (which now dominates 2).
942 // What we do here is introduce a copy in 3 of the register defined by the
943 // phi, just like when we are duplicating 2 into 3, but we don't copy any
944 // real instructions or remove the 3 -> 2 edge from the phi in 2.
945 for (SmallSetVector
<MachineBasicBlock
*, 8>::iterator PI
= Preds
.begin(),
946 PE
= Preds
.end(); PI
!= PE
; ++PI
) {
947 MachineBasicBlock
*PredBB
= *PI
;
948 if (std::find(TDBBs
.begin(), TDBBs
.end(), PredBB
) != TDBBs
.end())
952 if (PredBB
->succ_size() != 1)
955 DenseMap
<unsigned, unsigned> LocalVRMap
;
956 SmallVector
<std::pair
<unsigned,unsigned>, 4> CopyInfos
;
957 MachineBasicBlock::iterator I
= TailBB
->begin();
958 // Process PHI instructions first.
959 while (I
!= TailBB
->end() && I
->isPHI()) {
960 // Replace the uses of the def of the PHI with the register coming
962 MachineInstr
*MI
= &*I
++;
963 ProcessPHI(MI
, TailBB
, PredBB
, LocalVRMap
, CopyInfos
, UsedByPhi
, false);
965 MachineBasicBlock::iterator Loc
= PredBB
->getFirstTerminator();
966 for (unsigned i
= 0, e
= CopyInfos
.size(); i
!= e
; ++i
) {
967 Copies
.push_back(BuildMI(*PredBB
, Loc
, DebugLoc(),
968 TII
->get(TargetOpcode::COPY
),
969 CopyInfos
[i
].first
).addReg(CopyInfos
[i
].second
));
976 /// Remove the specified dead machine basic block from the function, updating
978 void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock
*MBB
) {
979 assert(MBB
->pred_empty() && "MBB must be dead!");
980 DEBUG(dbgs() << "\nRemoving MBB: " << *MBB
);
982 // Remove all successors.
983 while (!MBB
->succ_empty())
984 MBB
->removeSuccessor(MBB
->succ_end()-1);
987 MBB
->eraseFromParent();