1 //===- InlineFunction.cpp - Code to perform function inlining -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements inlining of a function into a call site, resolving
11 // parameters and the return value as appropriate.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Utils/Cloning.h"
16 #include "llvm/Constants.h"
17 #include "llvm/DerivedTypes.h"
18 #include "llvm/Module.h"
19 #include "llvm/Instructions.h"
20 #include "llvm/IntrinsicInst.h"
21 #include "llvm/Intrinsics.h"
22 #include "llvm/Attributes.h"
23 #include "llvm/Analysis/CallGraph.h"
24 #include "llvm/Analysis/DebugInfo.h"
25 #include "llvm/Analysis/InstructionSimplify.h"
26 #include "llvm/Target/TargetData.h"
27 #include "llvm/Transforms/Utils/Local.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/Support/CallSite.h"
31 #include "llvm/Support/IRBuilder.h"
34 bool llvm::InlineFunction(CallInst
*CI
, InlineFunctionInfo
&IFI
) {
35 return InlineFunction(CallSite(CI
), IFI
);
37 bool llvm::InlineFunction(InvokeInst
*II
, InlineFunctionInfo
&IFI
) {
38 return InlineFunction(CallSite(II
), IFI
);
42 /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
43 /// an invoke, we have to turn all of the calls that can throw into
44 /// invokes. This function analyze BB to see if there are any calls, and if so,
45 /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
46 /// nodes in that block with the values specified in InvokeDestPHIValues.
48 static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock
*BB
,
49 BasicBlock
*InvokeDest
,
50 const SmallVectorImpl
<Value
*> &InvokeDestPHIValues
) {
51 for (BasicBlock::iterator BBI
= BB
->begin(), E
= BB
->end(); BBI
!= E
; ) {
52 Instruction
*I
= BBI
++;
54 // We only need to check for function calls: inlined invoke
55 // instructions require no special handling.
56 CallInst
*CI
= dyn_cast
<CallInst
>(I
);
57 if (CI
== 0) continue;
59 // If this call cannot unwind, don't convert it to an invoke.
60 if (CI
->doesNotThrow())
63 // Convert this function call into an invoke instruction.
64 // First, split the basic block.
65 BasicBlock
*Split
= BB
->splitBasicBlock(CI
, CI
->getName()+".noexc");
67 // Next, create the new invoke instruction, inserting it at the end
68 // of the old basic block.
69 ImmutableCallSite
CS(CI
);
70 SmallVector
<Value
*, 8> InvokeArgs(CS
.arg_begin(), CS
.arg_end());
72 InvokeInst::Create(CI
->getCalledValue(), Split
, InvokeDest
,
73 InvokeArgs
.begin(), InvokeArgs
.end(),
74 CI
->getName(), BB
->getTerminator());
75 II
->setCallingConv(CI
->getCallingConv());
76 II
->setAttributes(CI
->getAttributes());
78 // Make sure that anything using the call now uses the invoke! This also
79 // updates the CallGraph if present, because it uses a WeakVH.
80 CI
->replaceAllUsesWith(II
);
82 // Delete the unconditional branch inserted by splitBasicBlock
83 BB
->getInstList().pop_back();
84 Split
->getInstList().pop_front(); // Delete the original call
86 // Update any PHI nodes in the exceptional block to indicate that
87 // there is now a new entry in them.
89 for (BasicBlock::iterator I
= InvokeDest
->begin();
90 isa
<PHINode
>(I
); ++I
, ++i
)
91 cast
<PHINode
>(I
)->addIncoming(InvokeDestPHIValues
[i
], BB
);
93 // This basic block is now complete, the caller will continue scanning the
100 /// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
101 /// in the body of the inlined function into invokes and turn unwind
102 /// instructions into branches to the invoke unwind dest.
104 /// II is the invoke instruction being inlined. FirstNewBlock is the first
105 /// block of the inlined code (the last block is the end of the function),
106 /// and InlineCodeInfo is information about the code that got inlined.
107 static void HandleInlinedInvoke(InvokeInst
*II
, BasicBlock
*FirstNewBlock
,
108 ClonedCodeInfo
&InlinedCodeInfo
) {
109 BasicBlock
*InvokeDest
= II
->getUnwindDest();
110 SmallVector
<Value
*, 8> InvokeDestPHIValues
;
112 // If there are PHI nodes in the unwind destination block, we need to
113 // keep track of which values came into them from this invoke, then remove
114 // the entry for this block.
115 BasicBlock
*InvokeBlock
= II
->getParent();
116 for (BasicBlock::iterator I
= InvokeDest
->begin(); isa
<PHINode
>(I
); ++I
) {
117 PHINode
*PN
= cast
<PHINode
>(I
);
118 // Save the value to use for this edge.
119 InvokeDestPHIValues
.push_back(PN
->getIncomingValueForBlock(InvokeBlock
));
122 Function
*Caller
= FirstNewBlock
->getParent();
124 // The inlined code is currently at the end of the function, scan from the
125 // start of the inlined code to its end, checking for stuff we need to
126 // rewrite. If the code doesn't have calls or unwinds, we know there is
127 // nothing to rewrite.
128 if (!InlinedCodeInfo
.ContainsCalls
&& !InlinedCodeInfo
.ContainsUnwinds
) {
129 // Now that everything is happy, we have one final detail. The PHI nodes in
130 // the exception destination block still have entries due to the original
131 // invoke instruction. Eliminate these entries (which might even delete the
133 InvokeDest
->removePredecessor(II
->getParent());
137 for (Function::iterator BB
= FirstNewBlock
, E
= Caller
->end(); BB
!= E
; ++BB
){
138 if (InlinedCodeInfo
.ContainsCalls
)
139 HandleCallsInBlockInlinedThroughInvoke(BB
, InvokeDest
,
140 InvokeDestPHIValues
);
142 if (UnwindInst
*UI
= dyn_cast
<UnwindInst
>(BB
->getTerminator())) {
143 // An UnwindInst requires special handling when it gets inlined into an
144 // invoke site. Once this happens, we know that the unwind would cause
145 // a control transfer to the invoke exception destination, so we can
146 // transform it into a direct branch to the exception destination.
147 BranchInst::Create(InvokeDest
, UI
);
149 // Delete the unwind instruction!
150 UI
->eraseFromParent();
152 // Update any PHI nodes in the exceptional block to indicate that
153 // there is now a new entry in them.
155 for (BasicBlock::iterator I
= InvokeDest
->begin();
156 isa
<PHINode
>(I
); ++I
, ++i
) {
157 PHINode
*PN
= cast
<PHINode
>(I
);
158 PN
->addIncoming(InvokeDestPHIValues
[i
], BB
);
163 // Now that everything is happy, we have one final detail. The PHI nodes in
164 // the exception destination block still have entries due to the original
165 // invoke instruction. Eliminate these entries (which might even delete the
167 InvokeDest
->removePredecessor(II
->getParent());
170 /// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
171 /// into the caller, update the specified callgraph to reflect the changes we
172 /// made. Note that it's possible that not all code was copied over, so only
173 /// some edges of the callgraph may remain.
174 static void UpdateCallGraphAfterInlining(CallSite CS
,
175 Function::iterator FirstNewBlock
,
176 ValueToValueMapTy
&VMap
,
177 InlineFunctionInfo
&IFI
) {
178 CallGraph
&CG
= *IFI
.CG
;
179 const Function
*Caller
= CS
.getInstruction()->getParent()->getParent();
180 const Function
*Callee
= CS
.getCalledFunction();
181 CallGraphNode
*CalleeNode
= CG
[Callee
];
182 CallGraphNode
*CallerNode
= CG
[Caller
];
184 // Since we inlined some uninlined call sites in the callee into the caller,
185 // add edges from the caller to all of the callees of the callee.
186 CallGraphNode::iterator I
= CalleeNode
->begin(), E
= CalleeNode
->end();
188 // Consider the case where CalleeNode == CallerNode.
189 CallGraphNode::CalledFunctionsVector CallCache
;
190 if (CalleeNode
== CallerNode
) {
191 CallCache
.assign(I
, E
);
192 I
= CallCache
.begin();
196 for (; I
!= E
; ++I
) {
197 const Value
*OrigCall
= I
->first
;
199 ValueToValueMapTy::iterator VMI
= VMap
.find(OrigCall
);
200 // Only copy the edge if the call was inlined!
201 if (VMI
== VMap
.end() || VMI
->second
== 0)
204 // If the call was inlined, but then constant folded, there is no edge to
205 // add. Check for this case.
206 Instruction
*NewCall
= dyn_cast
<Instruction
>(VMI
->second
);
207 if (NewCall
== 0) continue;
209 // Remember that this call site got inlined for the client of
211 IFI
.InlinedCalls
.push_back(NewCall
);
213 // It's possible that inlining the callsite will cause it to go from an
214 // indirect to a direct call by resolving a function pointer. If this
215 // happens, set the callee of the new call site to a more precise
216 // destination. This can also happen if the call graph node of the caller
217 // was just unnecessarily imprecise.
218 if (I
->second
->getFunction() == 0)
219 if (Function
*F
= CallSite(NewCall
).getCalledFunction()) {
220 // Indirect call site resolved to direct call.
221 CallerNode
->addCalledFunction(CallSite(NewCall
), CG
[F
]);
226 CallerNode
->addCalledFunction(CallSite(NewCall
), I
->second
);
229 // Update the call graph by deleting the edge from Callee to Caller. We must
230 // do this after the loop above in case Caller and Callee are the same.
231 CallerNode
->removeCallEdgeFor(CS
);
234 /// HandleByValArgument - When inlining a call site that has a byval argument,
235 /// we have to make the implicit memcpy explicit by adding it.
236 static Value
*HandleByValArgument(Value
*Arg
, Instruction
*TheCall
,
237 const Function
*CalledFunc
,
238 InlineFunctionInfo
&IFI
,
239 unsigned ByValAlignment
) {
240 const Type
*AggTy
= cast
<PointerType
>(Arg
->getType())->getElementType();
242 // If the called function is readonly, then it could not mutate the caller's
243 // copy of the byval'd memory. In this case, it is safe to elide the copy and
245 if (CalledFunc
->onlyReadsMemory()) {
246 // If the byval argument has a specified alignment that is greater than the
247 // passed in pointer, then we either have to round up the input pointer or
248 // give up on this transformation.
249 if (ByValAlignment
<= 1) // 0 = unspecified, 1 = no particular alignment.
252 // If the pointer is already known to be sufficiently aligned, or if we can
253 // round it up to a larger alignment, then we don't need a temporary.
254 if (getOrEnforceKnownAlignment(Arg
, ByValAlignment
,
255 IFI
.TD
) >= ByValAlignment
)
258 // Otherwise, we have to make a memcpy to get a safe alignment. This is bad
259 // for code quality, but rarely happens and is required for correctness.
262 LLVMContext
&Context
= Arg
->getContext();
264 const Type
*VoidPtrTy
= Type::getInt8PtrTy(Context
);
266 // Create the alloca. If we have TargetData, use nice alignment.
269 Align
= IFI
.TD
->getPrefTypeAlignment(AggTy
);
271 // If the byval had an alignment specified, we *must* use at least that
272 // alignment, as it is required by the byval argument (and uses of the
273 // pointer inside the callee).
274 Align
= std::max(Align
, ByValAlignment
);
276 Function
*Caller
= TheCall
->getParent()->getParent();
278 Value
*NewAlloca
= new AllocaInst(AggTy
, 0, Align
, Arg
->getName(),
279 &*Caller
->begin()->begin());
281 const Type
*Tys
[3] = {VoidPtrTy
, VoidPtrTy
, Type::getInt64Ty(Context
)};
282 Function
*MemCpyFn
= Intrinsic::getDeclaration(Caller
->getParent(),
285 Value
*DestCast
= new BitCastInst(NewAlloca
, VoidPtrTy
, "tmp", TheCall
);
286 Value
*SrcCast
= new BitCastInst(Arg
, VoidPtrTy
, "tmp", TheCall
);
290 Size
= ConstantExpr::getSizeOf(AggTy
);
292 Size
= ConstantInt::get(Type::getInt64Ty(Context
),
293 IFI
.TD
->getTypeStoreSize(AggTy
));
295 // Always generate a memcpy of alignment 1 here because we don't know
296 // the alignment of the src pointer. Other optimizations can infer
298 Value
*CallArgs
[] = {
299 DestCast
, SrcCast
, Size
,
300 ConstantInt::get(Type::getInt32Ty(Context
), 1),
301 ConstantInt::getFalse(Context
) // isVolatile
303 CallInst
*TheMemCpy
=
304 CallInst::Create(MemCpyFn
, CallArgs
, CallArgs
+5, "", TheCall
);
306 // If we have a call graph, update it.
307 if (CallGraph
*CG
= IFI
.CG
) {
308 CallGraphNode
*MemCpyCGN
= CG
->getOrInsertFunction(MemCpyFn
);
309 CallGraphNode
*CallerNode
= (*CG
)[Caller
];
310 CallerNode
->addCalledFunction(TheMemCpy
, MemCpyCGN
);
313 // Uses of the argument in the function should use our new alloca
318 // isUsedByLifetimeMarker - Check whether this Value is used by a lifetime
320 static bool isUsedByLifetimeMarker(Value
*V
) {
321 for (Value::use_iterator UI
= V
->use_begin(), UE
= V
->use_end(); UI
!= UE
;
323 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(*UI
)) {
324 switch (II
->getIntrinsicID()) {
326 case Intrinsic::lifetime_start
:
327 case Intrinsic::lifetime_end
:
335 // hasLifetimeMarkers - Check whether the given alloca already has
336 // lifetime.start or lifetime.end intrinsics.
337 static bool hasLifetimeMarkers(AllocaInst
*AI
) {
338 const Type
*Int8PtrTy
= Type::getInt8PtrTy(AI
->getType()->getContext());
339 if (AI
->getType() == Int8PtrTy
)
340 return isUsedByLifetimeMarker(AI
);
342 // Do a scan to find all the bitcasts to i8*.
343 for (Value::use_iterator I
= AI
->use_begin(), E
= AI
->use_end(); I
!= E
;
345 if (I
->getType() != Int8PtrTy
) continue;
346 if (!isa
<BitCastInst
>(*I
)) continue;
347 if (isUsedByLifetimeMarker(*I
))
353 // InlineFunction - This function inlines the called function into the basic
354 // block of the caller. This returns false if it is not possible to inline this
355 // call. The program is still in a well defined state if this occurs though.
357 // Note that this only does one level of inlining. For example, if the
358 // instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
359 // exists in the instruction stream. Similarly this will inline a recursive
360 // function by one level.
362 bool llvm::InlineFunction(CallSite CS
, InlineFunctionInfo
&IFI
) {
363 Instruction
*TheCall
= CS
.getInstruction();
364 LLVMContext
&Context
= TheCall
->getContext();
365 assert(TheCall
->getParent() && TheCall
->getParent()->getParent() &&
366 "Instruction not in function!");
368 // If IFI has any state in it, zap it before we fill it in.
371 const Function
*CalledFunc
= CS
.getCalledFunction();
372 if (CalledFunc
== 0 || // Can't inline external function or indirect
373 CalledFunc
->isDeclaration() || // call, or call to a vararg function!
374 CalledFunc
->getFunctionType()->isVarArg()) return false;
376 // If the call to the callee is not a tail call, we must clear the 'tail'
377 // flags on any calls that we inline.
378 bool MustClearTailCallFlags
=
379 !(isa
<CallInst
>(TheCall
) && cast
<CallInst
>(TheCall
)->isTailCall());
381 // If the call to the callee cannot throw, set the 'nounwind' flag on any
382 // calls that we inline.
383 bool MarkNoUnwind
= CS
.doesNotThrow();
385 BasicBlock
*OrigBB
= TheCall
->getParent();
386 Function
*Caller
= OrigBB
->getParent();
388 // GC poses two hazards to inlining, which only occur when the callee has GC:
389 // 1. If the caller has no GC, then the callee's GC must be propagated to the
391 // 2. If the caller has a differing GC, it is invalid to inline.
392 if (CalledFunc
->hasGC()) {
393 if (!Caller
->hasGC())
394 Caller
->setGC(CalledFunc
->getGC());
395 else if (CalledFunc
->getGC() != Caller
->getGC())
399 // Get an iterator to the last basic block in the function, which will have
400 // the new function inlined after it.
402 Function::iterator LastBlock
= &Caller
->back();
404 // Make sure to capture all of the return instructions from the cloned
406 SmallVector
<ReturnInst
*, 8> Returns
;
407 ClonedCodeInfo InlinedFunctionInfo
;
408 Function::iterator FirstNewBlock
;
410 { // Scope to destroy VMap after cloning.
411 ValueToValueMapTy VMap
;
413 assert(CalledFunc
->arg_size() == CS
.arg_size() &&
414 "No varargs calls can be inlined!");
416 // Calculate the vector of arguments to pass into the function cloner, which
417 // matches up the formal to the actual argument values.
418 CallSite::arg_iterator AI
= CS
.arg_begin();
420 for (Function::const_arg_iterator I
= CalledFunc
->arg_begin(),
421 E
= CalledFunc
->arg_end(); I
!= E
; ++I
, ++AI
, ++ArgNo
) {
422 Value
*ActualArg
= *AI
;
424 // When byval arguments actually inlined, we need to make the copy implied
425 // by them explicit. However, we don't do this if the callee is readonly
426 // or readnone, because the copy would be unneeded: the callee doesn't
427 // modify the struct.
428 if (CalledFunc
->paramHasAttr(ArgNo
+1, Attribute::ByVal
)) {
429 ActualArg
= HandleByValArgument(ActualArg
, TheCall
, CalledFunc
, IFI
,
430 CalledFunc
->getParamAlignment(ArgNo
+1));
432 // Calls that we inline may use the new alloca, so we need to clear
433 // their 'tail' flags if HandleByValArgument introduced a new alloca and
434 // the callee has calls.
435 MustClearTailCallFlags
|= ActualArg
!= *AI
;
441 // We want the inliner to prune the code as it copies. We would LOVE to
442 // have no dead or constant instructions leftover after inlining occurs
443 // (which can happen, e.g., because an argument was constant), but we'll be
444 // happy with whatever the cloner can do.
445 CloneAndPruneFunctionInto(Caller
, CalledFunc
, VMap
,
446 /*ModuleLevelChanges=*/false, Returns
, ".i",
447 &InlinedFunctionInfo
, IFI
.TD
, TheCall
);
449 // Remember the first block that is newly cloned over.
450 FirstNewBlock
= LastBlock
; ++FirstNewBlock
;
452 // Update the callgraph if requested.
454 UpdateCallGraphAfterInlining(CS
, FirstNewBlock
, VMap
, IFI
);
457 // If there are any alloca instructions in the block that used to be the entry
458 // block for the callee, move them to the entry block of the caller. First
459 // calculate which instruction they should be inserted before. We insert the
460 // instructions at the end of the current alloca list.
463 BasicBlock::iterator InsertPoint
= Caller
->begin()->begin();
464 for (BasicBlock::iterator I
= FirstNewBlock
->begin(),
465 E
= FirstNewBlock
->end(); I
!= E
; ) {
466 AllocaInst
*AI
= dyn_cast
<AllocaInst
>(I
++);
467 if (AI
== 0) continue;
469 // If the alloca is now dead, remove it. This often occurs due to code
471 if (AI
->use_empty()) {
472 AI
->eraseFromParent();
476 if (!isa
<Constant
>(AI
->getArraySize()))
479 // Keep track of the static allocas that we inline into the caller.
480 IFI
.StaticAllocas
.push_back(AI
);
482 // Scan for the block of allocas that we can move over, and move them
484 while (isa
<AllocaInst
>(I
) &&
485 isa
<Constant
>(cast
<AllocaInst
>(I
)->getArraySize())) {
486 IFI
.StaticAllocas
.push_back(cast
<AllocaInst
>(I
));
490 // Transfer all of the allocas over in a block. Using splice means
491 // that the instructions aren't removed from the symbol table, then
493 Caller
->getEntryBlock().getInstList().splice(InsertPoint
,
494 FirstNewBlock
->getInstList(),
499 // Leave lifetime markers for the static alloca's, scoping them to the
500 // function we just inlined.
501 if (!IFI
.StaticAllocas
.empty()) {
502 // Also preserve the call graph, if applicable.
503 CallGraphNode
*StartCGN
= 0, *EndCGN
= 0, *CallerNode
= 0;
504 if (CallGraph
*CG
= IFI
.CG
) {
505 Function
*Start
= Intrinsic::getDeclaration(Caller
->getParent(),
506 Intrinsic::lifetime_start
);
507 Function
*End
= Intrinsic::getDeclaration(Caller
->getParent(),
508 Intrinsic::lifetime_end
);
509 StartCGN
= CG
->getOrInsertFunction(Start
);
510 EndCGN
= CG
->getOrInsertFunction(End
);
511 CallerNode
= (*CG
)[Caller
];
514 IRBuilder
<> builder(FirstNewBlock
->begin());
515 for (unsigned ai
= 0, ae
= IFI
.StaticAllocas
.size(); ai
!= ae
; ++ai
) {
516 AllocaInst
*AI
= IFI
.StaticAllocas
[ai
];
518 // If the alloca is already scoped to something smaller than the whole
519 // function then there's no need to add redundant, less accurate markers.
520 if (hasLifetimeMarkers(AI
))
523 CallInst
*StartCall
= builder
.CreateLifetimeStart(AI
);
524 if (IFI
.CG
) CallerNode
->addCalledFunction(StartCall
, StartCGN
);
525 for (unsigned ri
= 0, re
= Returns
.size(); ri
!= re
; ++ri
) {
526 IRBuilder
<> builder(Returns
[ri
]);
527 CallInst
*EndCall
= builder
.CreateLifetimeEnd(AI
);
528 if (IFI
.CG
) CallerNode
->addCalledFunction(EndCall
, EndCGN
);
533 // If the inlined code contained dynamic alloca instructions, wrap the inlined
534 // code with llvm.stacksave/llvm.stackrestore intrinsics.
535 if (InlinedFunctionInfo
.ContainsDynamicAllocas
) {
536 Module
*M
= Caller
->getParent();
537 // Get the two intrinsics we care about.
538 Function
*StackSave
= Intrinsic::getDeclaration(M
, Intrinsic::stacksave
);
539 Function
*StackRestore
=Intrinsic::getDeclaration(M
,Intrinsic::stackrestore
);
541 // If we are preserving the callgraph, add edges to the stacksave/restore
542 // functions for the calls we insert.
543 CallGraphNode
*StackSaveCGN
= 0, *StackRestoreCGN
= 0, *CallerNode
= 0;
544 if (CallGraph
*CG
= IFI
.CG
) {
545 StackSaveCGN
= CG
->getOrInsertFunction(StackSave
);
546 StackRestoreCGN
= CG
->getOrInsertFunction(StackRestore
);
547 CallerNode
= (*CG
)[Caller
];
550 // Insert the llvm.stacksave.
551 CallInst
*SavedPtr
= CallInst::Create(StackSave
, "savedstack",
552 FirstNewBlock
->begin());
553 if (IFI
.CG
) CallerNode
->addCalledFunction(SavedPtr
, StackSaveCGN
);
555 // Insert a call to llvm.stackrestore before any return instructions in the
557 for (unsigned i
= 0, e
= Returns
.size(); i
!= e
; ++i
) {
558 CallInst
*CI
= CallInst::Create(StackRestore
, SavedPtr
, "", Returns
[i
]);
559 if (IFI
.CG
) CallerNode
->addCalledFunction(CI
, StackRestoreCGN
);
562 // Count the number of StackRestore calls we insert.
563 unsigned NumStackRestores
= Returns
.size();
565 // If we are inlining an invoke instruction, insert restores before each
566 // unwind. These unwinds will be rewritten into branches later.
567 if (InlinedFunctionInfo
.ContainsUnwinds
&& isa
<InvokeInst
>(TheCall
)) {
568 for (Function::iterator BB
= FirstNewBlock
, E
= Caller
->end();
570 if (UnwindInst
*UI
= dyn_cast
<UnwindInst
>(BB
->getTerminator())) {
571 CallInst
*CI
= CallInst::Create(StackRestore
, SavedPtr
, "", UI
);
572 if (IFI
.CG
) CallerNode
->addCalledFunction(CI
, StackRestoreCGN
);
578 // If we are inlining tail call instruction through a call site that isn't
579 // marked 'tail', we must remove the tail marker for any calls in the inlined
580 // code. Also, calls inlined through a 'nounwind' call site should be marked
582 if (InlinedFunctionInfo
.ContainsCalls
&&
583 (MustClearTailCallFlags
|| MarkNoUnwind
)) {
584 for (Function::iterator BB
= FirstNewBlock
, E
= Caller
->end();
586 for (BasicBlock::iterator I
= BB
->begin(), E
= BB
->end(); I
!= E
; ++I
)
587 if (CallInst
*CI
= dyn_cast
<CallInst
>(I
)) {
588 if (MustClearTailCallFlags
)
589 CI
->setTailCall(false);
591 CI
->setDoesNotThrow();
595 // If we are inlining through a 'nounwind' call site then any inlined 'unwind'
596 // instructions are unreachable.
597 if (InlinedFunctionInfo
.ContainsUnwinds
&& MarkNoUnwind
)
598 for (Function::iterator BB
= FirstNewBlock
, E
= Caller
->end();
600 TerminatorInst
*Term
= BB
->getTerminator();
601 if (isa
<UnwindInst
>(Term
)) {
602 new UnreachableInst(Context
, Term
);
603 BB
->getInstList().erase(Term
);
607 // If we are inlining for an invoke instruction, we must make sure to rewrite
608 // any inlined 'unwind' instructions into branches to the invoke exception
609 // destination, and call instructions into invoke instructions.
610 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(TheCall
))
611 HandleInlinedInvoke(II
, FirstNewBlock
, InlinedFunctionInfo
);
613 // If we cloned in _exactly one_ basic block, and if that block ends in a
614 // return instruction, we splice the body of the inlined callee directly into
615 // the calling basic block.
616 if (Returns
.size() == 1 && std::distance(FirstNewBlock
, Caller
->end()) == 1) {
617 // Move all of the instructions right before the call.
618 OrigBB
->getInstList().splice(TheCall
, FirstNewBlock
->getInstList(),
619 FirstNewBlock
->begin(), FirstNewBlock
->end());
620 // Remove the cloned basic block.
621 Caller
->getBasicBlockList().pop_back();
623 // If the call site was an invoke instruction, add a branch to the normal
625 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(TheCall
))
626 BranchInst::Create(II
->getNormalDest(), TheCall
);
628 // If the return instruction returned a value, replace uses of the call with
629 // uses of the returned value.
630 if (!TheCall
->use_empty()) {
631 ReturnInst
*R
= Returns
[0];
632 if (TheCall
== R
->getReturnValue())
633 TheCall
->replaceAllUsesWith(UndefValue::get(TheCall
->getType()));
635 TheCall
->replaceAllUsesWith(R
->getReturnValue());
637 // Since we are now done with the Call/Invoke, we can delete it.
638 TheCall
->eraseFromParent();
640 // Since we are now done with the return instruction, delete it also.
641 Returns
[0]->eraseFromParent();
643 // We are now done with the inlining.
647 // Otherwise, we have the normal case, of more than one block to inline or
648 // multiple return sites.
650 // We want to clone the entire callee function into the hole between the
651 // "starter" and "ender" blocks. How we accomplish this depends on whether
652 // this is an invoke instruction or a call instruction.
653 BasicBlock
*AfterCallBB
;
654 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(TheCall
)) {
656 // Add an unconditional branch to make this look like the CallInst case...
657 BranchInst
*NewBr
= BranchInst::Create(II
->getNormalDest(), TheCall
);
659 // Split the basic block. This guarantees that no PHI nodes will have to be
660 // updated due to new incoming edges, and make the invoke case more
661 // symmetric to the call case.
662 AfterCallBB
= OrigBB
->splitBasicBlock(NewBr
,
663 CalledFunc
->getName()+".exit");
665 } else { // It's a call
666 // If this is a call instruction, we need to split the basic block that
667 // the call lives in.
669 AfterCallBB
= OrigBB
->splitBasicBlock(TheCall
,
670 CalledFunc
->getName()+".exit");
673 // Change the branch that used to go to AfterCallBB to branch to the first
674 // basic block of the inlined function.
676 TerminatorInst
*Br
= OrigBB
->getTerminator();
677 assert(Br
&& Br
->getOpcode() == Instruction::Br
&&
678 "splitBasicBlock broken!");
679 Br
->setOperand(0, FirstNewBlock
);
682 // Now that the function is correct, make it a little bit nicer. In
683 // particular, move the basic blocks inserted from the end of the function
684 // into the space made by splitting the source basic block.
685 Caller
->getBasicBlockList().splice(AfterCallBB
, Caller
->getBasicBlockList(),
686 FirstNewBlock
, Caller
->end());
688 // Handle all of the return instructions that we just cloned in, and eliminate
689 // any users of the original call/invoke instruction.
690 const Type
*RTy
= CalledFunc
->getReturnType();
693 if (Returns
.size() > 1) {
694 // The PHI node should go at the front of the new basic block to merge all
695 // possible incoming values.
696 if (!TheCall
->use_empty()) {
697 PHI
= PHINode::Create(RTy
, Returns
.size(), TheCall
->getName(),
698 AfterCallBB
->begin());
699 // Anything that used the result of the function call should now use the
700 // PHI node as their operand.
701 TheCall
->replaceAllUsesWith(PHI
);
704 // Loop over all of the return instructions adding entries to the PHI node
707 for (unsigned i
= 0, e
= Returns
.size(); i
!= e
; ++i
) {
708 ReturnInst
*RI
= Returns
[i
];
709 assert(RI
->getReturnValue()->getType() == PHI
->getType() &&
710 "Ret value not consistent in function!");
711 PHI
->addIncoming(RI
->getReturnValue(), RI
->getParent());
716 // Add a branch to the merge points and remove return instructions.
717 for (unsigned i
= 0, e
= Returns
.size(); i
!= e
; ++i
) {
718 ReturnInst
*RI
= Returns
[i
];
719 BranchInst::Create(AfterCallBB
, RI
);
720 RI
->eraseFromParent();
722 } else if (!Returns
.empty()) {
723 // Otherwise, if there is exactly one return value, just replace anything
724 // using the return value of the call with the computed value.
725 if (!TheCall
->use_empty()) {
726 if (TheCall
== Returns
[0]->getReturnValue())
727 TheCall
->replaceAllUsesWith(UndefValue::get(TheCall
->getType()));
729 TheCall
->replaceAllUsesWith(Returns
[0]->getReturnValue());
732 // Splice the code from the return block into the block that it will return
733 // to, which contains the code that was after the call.
734 BasicBlock
*ReturnBB
= Returns
[0]->getParent();
735 AfterCallBB
->getInstList().splice(AfterCallBB
->begin(),
736 ReturnBB
->getInstList());
738 // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
739 ReturnBB
->replaceAllUsesWith(AfterCallBB
);
741 // Delete the return instruction now and empty ReturnBB now.
742 Returns
[0]->eraseFromParent();
743 ReturnBB
->eraseFromParent();
744 } else if (!TheCall
->use_empty()) {
745 // No returns, but something is using the return value of the call. Just
747 TheCall
->replaceAllUsesWith(UndefValue::get(TheCall
->getType()));
750 // Since we are now done with the Call/Invoke, we can delete it.
751 TheCall
->eraseFromParent();
753 // We should always be able to fold the entry block of the function into the
754 // single predecessor of the block...
755 assert(cast
<BranchInst
>(Br
)->isUnconditional() && "splitBasicBlock broken!");
756 BasicBlock
*CalleeEntry
= cast
<BranchInst
>(Br
)->getSuccessor(0);
758 // Splice the code entry block into calling block, right before the
759 // unconditional branch.
760 OrigBB
->getInstList().splice(Br
, CalleeEntry
->getInstList());
761 CalleeEntry
->replaceAllUsesWith(OrigBB
); // Update PHI nodes
763 // Remove the unconditional branch.
764 OrigBB
->getInstList().erase(Br
);
766 // Now we can remove the CalleeEntry block, which is now empty.
767 Caller
->getBasicBlockList().erase(CalleeEntry
);
769 // If we inserted a phi node, check to see if it has a single value (e.g. all
770 // the entries are the same or undef). If so, remove the PHI so it doesn't
771 // block other optimizations.
773 if (Value
*V
= SimplifyInstruction(PHI
, IFI
.TD
)) {
774 PHI
->replaceAllUsesWith(V
);
775 PHI
->eraseFromParent();