1 //===- InlineFunction.cpp - Code to perform function inlining -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements inlining of a function into a call site, resolving
11 // parameters and the return value as appropriate.
12 //
13 // The code in this file for handling inlines through invoke
14 // instructions preserves semantics only under some assumptions about
15 // the behavior of unwinders which correspond to gcc-style libUnwind
16 // exception personality functions.  Eventually the IR will be
17 // improved to make this unnecessary, but until then, this code is
18 // marked [LIBUNWIND].
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "llvm/Transforms/Utils/Cloning.h"
23 #include "llvm/Constants.h"
24 #include "llvm/DerivedTypes.h"
25 #include "llvm/Module.h"
26 #include "llvm/Instructions.h"
27 #include "llvm/IntrinsicInst.h"
28 #include "llvm/Intrinsics.h"
29 #include "llvm/Attributes.h"
30 #include "llvm/Analysis/CallGraph.h"
31 #include "llvm/Analysis/DebugInfo.h"
32 #include "llvm/Analysis/InstructionSimplify.h"
33 #include "llvm/Target/TargetData.h"
34 #include "llvm/Transforms/Utils/Local.h"
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/ADT/StringExtras.h"
37 #include "llvm/Support/CallSite.h"
38 #include "llvm/Support/IRBuilder.h"
39 using namespace llvm;
40 
41 bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI) {
42   return InlineFunction(CallSite(CI), IFI);
43 }
44 bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) {
45   return InlineFunction(CallSite(II), IFI);
46 }
47 
48 /// [LIBUNWIND] Find the (possibly absent) call to @llvm.eh.selector in
49 /// the given landing pad.
50 static EHSelectorInst *findSelectorForLandingPad(BasicBlock *lpad) {
51   // The llvm.eh.exception call is required to be in the landing pad.
52   for (BasicBlock::iterator i = lpad->begin(), e = lpad->end(); i != e; i++) {
53     EHExceptionInst *exn = dyn_cast<EHExceptionInst>(i);
54     if (!exn) continue;
55 
56     EHSelectorInst *selector = 0;
57     for (Instruction::use_iterator
58            ui = exn->use_begin(), ue = exn->use_end(); ui != ue; ++ui) {
59       EHSelectorInst *sel = dyn_cast<EHSelectorInst>(*ui);
60       if (!sel) continue;
61 
62       // Immediately accept an eh.selector in the landing pad.
63       if (sel->getParent() == lpad) return sel;
64 
65       // Otherwise, use the first selector we see.
66       if (!selector) selector = sel;
67     }
68 
69     return selector;
70   }
71 
72   return 0;
73 }
74 
75 namespace {
76   /// A class for recording information about inlining through an invoke.
77   class InvokeInliningInfo {
78     BasicBlock *OuterUnwindDest;
79     EHSelectorInst *OuterSelector;
80     BasicBlock *InnerUnwindDest;
81     PHINode *InnerExceptionPHI;
82     PHINode *InnerSelectorPHI;
83     SmallVector<Value*, 8> UnwindDestPHIValues;
84 
85   public:
86     InvokeInliningInfo(InvokeInst *II) :
87       OuterUnwindDest(II->getUnwindDest()), OuterSelector(0),
88       InnerUnwindDest(0), InnerExceptionPHI(0), InnerSelectorPHI(0) {
89 
90       // If there are PHI nodes in the unwind destination block, we
91       // need to keep track of which values came into them from the
92       // invoke before removing the edge from this block.
93       llvm::BasicBlock *invokeBB = II->getParent();
94       for (BasicBlock::iterator I = OuterUnwindDest->begin();
95              isa<PHINode>(I); ++I) {
96         // Save the value to use for this edge.
97         PHINode *phi = cast<PHINode>(I);
98         UnwindDestPHIValues.push_back(phi->getIncomingValueForBlock(invokeBB));
99       }
100     }
101 
102     /// The outer unwind destination is the target of unwind edges
103     /// introduced for calls within the inlined function.
104     BasicBlock *getOuterUnwindDest() const {
105       return OuterUnwindDest;
106     }
107 
108     EHSelectorInst *getOuterSelector() {
109       if (!OuterSelector)
110         OuterSelector = findSelectorForLandingPad(OuterUnwindDest);
111       return OuterSelector;
112     }
113 
114     BasicBlock *getInnerUnwindDest();
115 
116     bool forwardEHResume(CallInst *call, BasicBlock *src);
117 
118     /// Add incoming-PHI values to the unwind destination block for
119     /// the given basic block, using the values for the original
120     /// invoke's source block.
121     void addIncomingPHIValuesFor(BasicBlock *BB) const {
122       addIncomingPHIValuesForInto(BB, OuterUnwindDest);
123     }
124 
125     void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
126       BasicBlock::iterator I = dest->begin();
127       for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
128         PHINode *phi = cast<PHINode>(I);
129         phi->addIncoming(UnwindDestPHIValues[i], src);
130       }
131     }
132   };
133 }
134 
135 /// Replace all the instruction uses of a value with a different value.
136 /// This has the advantage of not screwing up the CallGraph.
137 static void replaceAllInsnUsesWith(Instruction *insn, Value *replacement) {
138   for (Value::use_iterator i = insn->use_begin(), e = insn->use_end();
139        i != e; ) {
140     Use &use = i.getUse();
141     ++i;
142     if (isa<Instruction>(use.getUser()))
143       use.set(replacement);
144   }
145 }
146 
147 /// Get or create a target for the branch out of rewritten calls to
148 /// llvm.eh.resume.
149 BasicBlock *InvokeInliningInfo::getInnerUnwindDest() {
150   if (InnerUnwindDest) return InnerUnwindDest;
151 
152   // Find and hoist the llvm.eh.exception and llvm.eh.selector calls
153   // in the outer landing pad to immediately following the phis.
154   EHSelectorInst *selector = getOuterSelector();
155   if (!selector) return 0;
156 
157   // The call to llvm.eh.exception *must* be in the landing pad.
158   Instruction *exn = cast<Instruction>(selector->getArgOperand(0));
159   assert(exn->getParent() == OuterUnwindDest);
160 
161   // TODO: recognize when we've already done this, so that we don't
162   // get a linear number of these when inlining calls into lots of
163   // invokes with the same landing pad.
164 
165   // Do the hoisting.
166   Instruction *splitPoint = exn->getParent()->getFirstNonPHI();
167   assert(splitPoint != selector && "selector-on-exception dominance broken!");
168   if (splitPoint == exn) {
169     selector->removeFromParent();
170     selector->insertAfter(exn);
171     splitPoint = selector->getNextNode();
172   } else {
173     exn->moveBefore(splitPoint);
174     selector->moveBefore(splitPoint);
175   }
176 
177   // Split the landing pad.
178   InnerUnwindDest = OuterUnwindDest->splitBasicBlock(splitPoint,
179                                         OuterUnwindDest->getName() + ".body");
180 
181   // The number of incoming edges we expect to the inner landing pad.
182   const unsigned phiCapacity = 2;
183 
184   // Create corresponding new phis for all the phis in the outer landing pad.
185   BasicBlock::iterator insertPoint = InnerUnwindDest->begin();
186   BasicBlock::iterator I = OuterUnwindDest->begin();
187   for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
188     PHINode *outerPhi = cast<PHINode>(I);
189     PHINode *innerPhi = PHINode::Create(outerPhi->getType(), phiCapacity,
190                                         outerPhi->getName() + ".lpad-body",
191                                         insertPoint);
192     outerPhi->replaceAllUsesWith(innerPhi);
193     innerPhi->addIncoming(outerPhi, OuterUnwindDest);
194   }
195 
196   // Create a phi for the exception value...
197   InnerExceptionPHI = PHINode::Create(exn->getType(), phiCapacity,
198                                       "exn.lpad-body", insertPoint);
199   replaceAllInsnUsesWith(exn, InnerExceptionPHI);
200   selector->setArgOperand(0, exn); // restore this use
201   InnerExceptionPHI->addIncoming(exn, OuterUnwindDest);
202 
203   // ...and the selector.
204   InnerSelectorPHI = PHINode::Create(selector->getType(), phiCapacity,
205                                      "selector.lpad-body", insertPoint);
206   replaceAllInsnUsesWith(selector, InnerSelectorPHI);
207   InnerSelectorPHI->addIncoming(selector, OuterUnwindDest);
208 
209   // All done.
210   return InnerUnwindDest;
211 }
212 
213 /// [LIBUNWIND] Try to forward the given call, which logically occurs
214 /// at the end of the given block, as a branch to the inner unwind
215 /// block.  Returns true if the call was forwarded.
216 bool InvokeInliningInfo::forwardEHResume(CallInst *call, BasicBlock *src) {
217   // First, check whether this is a call to the intrinsic.
218   Function *fn = dyn_cast<Function>(call->getCalledValue());
219   if (!fn || fn->getName() != "llvm.eh.resume")
220     return false;
221 
222   // At this point, we need to return true on all paths, because
223   // otherwise we'll construct an invoke of the intrinsic, which is
224   // not well-formed.
225 
226   // Try to find or make an inner unwind dest, which will fail if we
227   // can't find a selector call for the outer unwind dest.
228   BasicBlock *dest = getInnerUnwindDest();
229   bool hasSelector = (dest != 0);
230 
231   // If we failed, just use the outer unwind dest, dropping the
232   // exception and selector on the floor.
233   if (!hasSelector)
234     dest = OuterUnwindDest;
235 
236   // Make a branch.
237   BranchInst::Create(dest, src);
238 
239   // Update the phis in the destination.  They were inserted in an
240   // order which makes this work.
241   addIncomingPHIValuesForInto(src, dest);
242 
243   if (hasSelector) {
244     InnerExceptionPHI->addIncoming(call->getArgOperand(0), src);
245     InnerSelectorPHI->addIncoming(call->getArgOperand(1), src);
246   }
247 
248   return true;
249 }
250 
251 /// [LIBUNWIND] Check whether this selector is "only cleanups":
252 ///   call i32 @llvm.eh.selector(blah, blah, i32 0)
253 static bool isCleanupOnlySelector(EHSelectorInst *selector) {
254   if (selector->getNumArgOperands() != 3) return false;
255   ConstantInt *val = dyn_cast<ConstantInt>(selector->getArgOperand(2));
256   return (val && val->isZero());
257 }
258 
259 /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
260 /// an invoke, we have to turn all of the calls that can throw into
261 /// invokes.  This function analyze BB to see if there are any calls, and if so,
262 /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
263 /// nodes in that block with the values specified in InvokeDestPHIValues.
264 ///
265 /// Returns true to indicate that the next block should be skipped.
266 static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
267                                                    InvokeInliningInfo &Invoke) {
268   for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
269     Instruction *I = BBI++;
270 
271     // We only need to check for function calls: inlined invoke
272     // instructions require no special handling.
273     CallInst *CI = dyn_cast<CallInst>(I);
274     if (CI == 0) continue;
275 
276     // LIBUNWIND: merge selector instructions.
277     if (EHSelectorInst *Inner = dyn_cast<EHSelectorInst>(CI)) {
278       EHSelectorInst *Outer = Invoke.getOuterSelector();
279       if (!Outer) continue;
280 
281       bool innerIsOnlyCleanup = isCleanupOnlySelector(Inner);
282       bool outerIsOnlyCleanup = isCleanupOnlySelector(Outer);
283 
284       // If both selectors contain only cleanups, we don't need to do
285       // anything.  TODO: this is really just a very specific instance
286       // of a much more general optimization.
287       if (innerIsOnlyCleanup && outerIsOnlyCleanup) continue;
288 
289       // Otherwise, we just append the outer selector to the inner selector.
290       SmallVector<Value*, 16> NewSelector;
291       for (unsigned i = 0, e = Inner->getNumArgOperands(); i != e; ++i)
292         NewSelector.push_back(Inner->getArgOperand(i));
293       for (unsigned i = 2, e = Outer->getNumArgOperands(); i != e; ++i)
294         NewSelector.push_back(Outer->getArgOperand(i));
295 
296       CallInst *NewInner = CallInst::Create(Inner->getCalledValue(),
297                                             NewSelector.begin(),
298                                             NewSelector.end(),
299                                             "",
300                                             Inner);
301       // No need to copy attributes, calling convention, etc.
302       NewInner->takeName(Inner);
303       Inner->replaceAllUsesWith(NewInner);
304       Inner->eraseFromParent();
305       continue;
306     }
307 
308     // If this call cannot unwind, don't convert it to an invoke.
309     if (CI->doesNotThrow())
310       continue;
311 
312     // Convert this function call into an invoke instruction.
313     // First, split the basic block.
314     BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
315 
316     // Delete the unconditional branch inserted by splitBasicBlock
317     BB->getInstList().pop_back();
318 
319     // LIBUNWIND: If this is a call to @llvm.eh.resume, just branch
320     // directly to the new landing pad.
321     if (Invoke.forwardEHResume(CI, BB)) {
322       // TODO: 'Split' is now unreachable; clean it up.
323 
324       // We want to leave the original call intact so that the call
325       // graph and other structures won't get misled.  We also have to
326       // avoid processing the next block, or we'll iterate here forever.
327       return true;
328     }
329 
330     // Otherwise, create the new invoke instruction.
331     ImmutableCallSite CS(CI);
332     SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
333     InvokeInst *II =
334       InvokeInst::Create(CI->getCalledValue(), Split,
335                          Invoke.getOuterUnwindDest(),
336                          InvokeArgs.begin(), InvokeArgs.end(),
337                          CI->getName(), BB);
338     II->setCallingConv(CI->getCallingConv());
339     II->setAttributes(CI->getAttributes());
340 
341     // Make sure that anything using the call now uses the invoke!  This also
342     // updates the CallGraph if present, because it uses a WeakVH.
343     CI->replaceAllUsesWith(II);
344 
345     Split->getInstList().pop_front();  // Delete the original call
346 
347     // Update any PHI nodes in the exceptional block to indicate that
348     // there is now a new entry in them.
349     Invoke.addIncomingPHIValuesFor(BB);
350     return false;
351   }
352 
353   return false;
354 }
355 
356 
357 /// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
358 /// in the body of the inlined function into invokes and turn unwind
359 /// instructions into branches to the invoke unwind dest.
360 ///
361 /// II is the invoke instruction being inlined.  FirstNewBlock is the first
362 /// block of the inlined code (the last block is the end of the function),
363 /// and InlineCodeInfo is information about the code that got inlined.
364 static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
365                                 ClonedCodeInfo &InlinedCodeInfo) {
366   BasicBlock *InvokeDest = II->getUnwindDest();
367 
368   Function *Caller = FirstNewBlock->getParent();
369 
370   // The inlined code is currently at the end of the function, scan from the
371   // start of the inlined code to its end, checking for stuff we need to
372   // rewrite.  If the code doesn't have calls or unwinds, we know there is
373   // nothing to rewrite.
374   if (!InlinedCodeInfo.ContainsCalls && !InlinedCodeInfo.ContainsUnwinds) {
375     // Now that everything is happy, we have one final detail.  The PHI nodes in
376     // the exception destination block still have entries due to the original
377     // invoke instruction.  Eliminate these entries (which might even delete the
378     // PHI node) now.
379     InvokeDest->removePredecessor(II->getParent());
380     return;
381   }
382 
383   InvokeInliningInfo Invoke(II);
384 
385   for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
386     if (InlinedCodeInfo.ContainsCalls)
387       if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
388         // Honor a request to skip the next block.  We don't need to
389         // consider UnwindInsts in this case either.
390         ++BB;
391         continue;
392       }
393 
394     if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
395       // An UnwindInst requires special handling when it gets inlined into an
396       // invoke site.  Once this happens, we know that the unwind would cause
397       // a control transfer to the invoke exception destination, so we can
398       // transform it into a direct branch to the exception destination.
399       BranchInst::Create(InvokeDest, UI);
400 
401       // Delete the unwind instruction!
402       UI->eraseFromParent();
403 
404       // Update any PHI nodes in the exceptional block to indicate that
405       // there is now a new entry in them.
406       Invoke.addIncomingPHIValuesFor(BB);
407     }
408   }
409 
410   // Now that everything is happy, we have one final detail.  The PHI nodes in
411   // the exception destination block still have entries due to the original
412   // invoke instruction.  Eliminate these entries (which might even delete the
413   // PHI node) now.
414   InvokeDest->removePredecessor(II->getParent());
415 }
416 
417 /// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
418 /// into the caller, update the specified callgraph to reflect the changes we
419 /// made.  Note that it's possible that not all code was copied over, so only
420 /// some edges of the callgraph may remain.
421 static void UpdateCallGraphAfterInlining(CallSite CS,
422                                          Function::iterator FirstNewBlock,
423                                          ValueToValueMapTy &VMap,
424                                          InlineFunctionInfo &IFI) {
425   CallGraph &CG = *IFI.CG;
426   const Function *Caller = CS.getInstruction()->getParent()->getParent();
427   const Function *Callee = CS.getCalledFunction();
428   CallGraphNode *CalleeNode = CG[Callee];
429   CallGraphNode *CallerNode = CG[Caller];
430 
431   // Since we inlined some uninlined call sites in the callee into the caller,
432   // add edges from the caller to all of the callees of the callee.
433   CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end();
434 
435   // Consider the case where CalleeNode == CallerNode.
436   CallGraphNode::CalledFunctionsVector CallCache;
437   if (CalleeNode == CallerNode) {
438     CallCache.assign(I, E);
439     I = CallCache.begin();
440     E = CallCache.end();
441   }
442 
443   for (; I != E; ++I) {
444     const Value *OrigCall = I->first;
445 
446     ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
447     // Only copy the edge if the call was inlined!
448     if (VMI == VMap.end() || VMI->second == 0)
449       continue;
450 
451     // If the call was inlined, but then constant folded, there is no edge to
452     // add.  Check for this case.
453     Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
454     if (NewCall == 0) continue;
455 
456     // Remember that this call site got inlined for the client of
457     // InlineFunction.
458     IFI.InlinedCalls.push_back(NewCall);
459 
460     // It's possible that inlining the callsite will cause it to go from an
461     // indirect to a direct call by resolving a function pointer.  If this
462     // happens, set the callee of the new call site to a more precise
463     // destination.  This can also happen if the call graph node of the caller
464     // was just unnecessarily imprecise.
465     if (I->second->getFunction() == 0)
466       if (Function *F = CallSite(NewCall).getCalledFunction()) {
467         // Indirect call site resolved to direct call.
468         CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
469 
470         continue;
471       }
472 
473     CallerNode->addCalledFunction(CallSite(NewCall), I->second);
474   }
475 
476   // Update the call graph by deleting the edge from Callee to Caller.  We must
477   // do this after the loop above in case Caller and Callee are the same.
478   CallerNode->removeCallEdgeFor(CS);
479 }
480 
481 /// HandleByValArgument - When inlining a call site that has a byval argument,
482 /// we have to make the implicit memcpy explicit by adding it.
483 static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
484                                   const Function *CalledFunc,
485                                   InlineFunctionInfo &IFI,
486                                   unsigned ByValAlignment) {
487   const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
488 
489   // If the called function is readonly, then it could not mutate the caller's
490   // copy of the byval'd memory.  In this case, it is safe to elide the copy and
491   // temporary.
492   if (CalledFunc->onlyReadsMemory()) {
493     // If the byval argument has a specified alignment that is greater than the
494     // passed in pointer, then we either have to round up the input pointer or
495     // give up on this transformation.
496     if (ByValAlignment <= 1)  // 0 = unspecified, 1 = no particular alignment.
497       return Arg;
498 
499     // If the pointer is already known to be sufficiently aligned, or if we can
500     // round it up to a larger alignment, then we don't need a temporary.
501     if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
502                                    IFI.TD) >= ByValAlignment)
503       return Arg;
504 
505     // Otherwise, we have to make a memcpy to get a safe alignment.  This is bad
506     // for code quality, but rarely happens and is required for correctness.
507   }
508 
509   LLVMContext &Context = Arg->getContext();
510 
511   const Type *VoidPtrTy = Type::getInt8PtrTy(Context);
512 
513   // Create the alloca.  If we have TargetData, use nice alignment.
514   unsigned Align = 1;
515   if (IFI.TD)
516     Align = IFI.TD->getPrefTypeAlignment(AggTy);
517 
518   // If the byval had an alignment specified, we *must* use at least that
519   // alignment, as it is required by the byval argument (and uses of the
520   // pointer inside the callee).
521   Align = std::max(Align, ByValAlignment);
522 
523   Function *Caller = TheCall->getParent()->getParent();
524 
525   Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(),
526                                     &*Caller->begin()->begin());
527   // Emit a memcpy.
528   const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
529   Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
530                                                  Intrinsic::memcpy,
531                                                  Tys, 3);
532   Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
533   Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
534 
535   Value *Size;
536   if (IFI.TD == 0)
537     Size = ConstantExpr::getSizeOf(AggTy);
538   else
539     Size = ConstantInt::get(Type::getInt64Ty(Context),
540                             IFI.TD->getTypeStoreSize(AggTy));
541 
542   // Always generate a memcpy of alignment 1 here because we don't know
543   // the alignment of the src pointer.  Other optimizations can infer
544   // better alignment.
545   Value *CallArgs[] = {
546     DestCast, SrcCast, Size,
547     ConstantInt::get(Type::getInt32Ty(Context), 1),
548     ConstantInt::getFalse(Context) // isVolatile
549   };
550   CallInst *TheMemCpy =
551     CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall);
552 
553   // If we have a call graph, update it.
554   if (CallGraph *CG = IFI.CG) {
555     CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn);
556     CallGraphNode *CallerNode = (*CG)[Caller];
557     CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN);
558   }
559 
560   // Uses of the argument in the function should use our new alloca
561   // instead.
562   return NewAlloca;
563 }
564 
565 // isUsedByLifetimeMarker - Check whether this Value is used by a lifetime
566 // intrinsic.
567 static bool isUsedByLifetimeMarker(Value *V) {
568   for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE;
569        ++UI) {
570     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI)) {
571       switch (II->getIntrinsicID()) {
572       default: break;
573       case Intrinsic::lifetime_start:
574       case Intrinsic::lifetime_end:
575         return true;
576       }
577     }
578   }
579   return false;
580 }
581 
582 // hasLifetimeMarkers - Check whether the given alloca already has
583 // lifetime.start or lifetime.end intrinsics.
584 static bool hasLifetimeMarkers(AllocaInst *AI) {
585   const Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext());
586   if (AI->getType() == Int8PtrTy)
587     return isUsedByLifetimeMarker(AI);
588 
589   // Do a scan to find all the bitcasts to i8*.
590   for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E;
591        ++I) {
592     if (I->getType() != Int8PtrTy) continue;
593     if (!isa<BitCastInst>(*I)) continue;
594     if (isUsedByLifetimeMarker(*I))
595       return true;
596   }
597   return false;
598 }
599 
600 // InlineFunction - This function inlines the called function into the basic
601 // block of the caller.  This returns false if it is not possible to inline this
602 // call.  The program is still in a well defined state if this occurs though.
603 //
604 // Note that this only does one level of inlining.  For example, if the
605 // instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
606 // exists in the instruction stream.  Similarly this will inline a recursive
607 // function by one level.
608 //
609 bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
610   Instruction *TheCall = CS.getInstruction();
611   LLVMContext &Context = TheCall->getContext();
612   assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
613          "Instruction not in function!");
614 
615   // If IFI has any state in it, zap it before we fill it in.
616   IFI.reset();
617 
618   const Function *CalledFunc = CS.getCalledFunction();
619   if (CalledFunc == 0 ||          // Can't inline external function or indirect
620       CalledFunc->isDeclaration() || // call, or call to a vararg function!
621       CalledFunc->getFunctionType()->isVarArg()) return false;
622 
623   // If the call to the callee is not a tail call, we must clear the 'tail'
624   // flags on any calls that we inline.
625   bool MustClearTailCallFlags =
626     !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());
627 
628   // If the call to the callee cannot throw, set the 'nounwind' flag on any
629   // calls that we inline.
630   bool MarkNoUnwind = CS.doesNotThrow();
631 
632   BasicBlock *OrigBB = TheCall->getParent();
633   Function *Caller = OrigBB->getParent();
634 
635   // GC poses two hazards to inlining, which only occur when the callee has GC:
636   //  1. If the caller has no GC, then the callee's GC must be propagated to the
637   //     caller.
638   //  2. If the caller has a differing GC, it is invalid to inline.
639   if (CalledFunc->hasGC()) {
640     if (!Caller->hasGC())
641       Caller->setGC(CalledFunc->getGC());
642     else if (CalledFunc->getGC() != Caller->getGC())
643       return false;
644   }
645 
646   // Get an iterator to the last basic block in the function, which will have
647   // the new function inlined after it.
648   //
649   Function::iterator LastBlock = &Caller->back();
650 
651   // Make sure to capture all of the return instructions from the cloned
652   // function.
653   SmallVector<ReturnInst*, 8> Returns;
654   ClonedCodeInfo InlinedFunctionInfo;
655   Function::iterator FirstNewBlock;
656 
657   { // Scope to destroy VMap after cloning.
658     ValueToValueMapTy VMap;
659 
660     assert(CalledFunc->arg_size() == CS.arg_size() &&
661            "No varargs calls can be inlined!");
662 
663     // Calculate the vector of arguments to pass into the function cloner, which
664     // matches up the formal to the actual argument values.
665     CallSite::arg_iterator AI = CS.arg_begin();
666     unsigned ArgNo = 0;
667     for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
668          E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
669       Value *ActualArg = *AI;
670 
671       // When byval arguments actually inlined, we need to make the copy implied
672       // by them explicit.  However, we don't do this if the callee is readonly
673       // or readnone, because the copy would be unneeded: the callee doesn't
674       // modify the struct.
675       if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) {
676         ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
677                                         CalledFunc->getParamAlignment(ArgNo+1));
678 
679         // Calls that we inline may use the new alloca, so we need to clear
680         // their 'tail' flags if HandleByValArgument introduced a new alloca and
681         // the callee has calls.
682         MustClearTailCallFlags |= ActualArg != *AI;
683       }
684 
685       VMap[I] = ActualArg;
686     }
687 
688     // We want the inliner to prune the code as it copies.  We would LOVE to
689     // have no dead or constant instructions leftover after inlining occurs
690     // (which can happen, e.g., because an argument was constant), but we'll be
691     // happy with whatever the cloner can do.
692     CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
693                               /*ModuleLevelChanges=*/false, Returns, ".i",
694                               &InlinedFunctionInfo, IFI.TD, TheCall);
695 
696     // Remember the first block that is newly cloned over.
697     FirstNewBlock = LastBlock; ++FirstNewBlock;
698 
699     // Update the callgraph if requested.
700     if (IFI.CG)
701       UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
702   }
703 
704   // If there are any alloca instructions in the block that used to be the entry
705   // block for the callee, move them to the entry block of the caller.  First
706   // calculate which instruction they should be inserted before.  We insert the
707   // instructions at the end of the current alloca list.
708   //
709   {
710     BasicBlock::iterator InsertPoint = Caller->begin()->begin();
711     for (BasicBlock::iterator I = FirstNewBlock->begin(),
712          E = FirstNewBlock->end(); I != E; ) {
713       AllocaInst *AI = dyn_cast<AllocaInst>(I++);
714       if (AI == 0) continue;
715 
716       // If the alloca is now dead, remove it.  This often occurs due to code
717       // specialization.
718       if (AI->use_empty()) {
719         AI->eraseFromParent();
720         continue;
721       }
722 
723       if (!isa<Constant>(AI->getArraySize()))
724         continue;
725 
726       // Keep track of the static allocas that we inline into the caller.
727       IFI.StaticAllocas.push_back(AI);
728 
729       // Scan for the block of allocas that we can move over, and move them
730       // all at once.
731       while (isa<AllocaInst>(I) &&
732              isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
733         IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
734         ++I;
735       }
736 
737       // Transfer all of the allocas over in a block.  Using splice means
738       // that the instructions aren't removed from the symbol table, then
739       // reinserted.
740       Caller->getEntryBlock().getInstList().splice(InsertPoint,
741                                                    FirstNewBlock->getInstList(),
742                                                    AI, I);
743     }
744   }
745 
746   // Leave lifetime markers for the static alloca's, scoping them to the
747   // function we just inlined.
748   if (!IFI.StaticAllocas.empty()) {
749     // Also preserve the call graph, if applicable.
750     CallGraphNode *StartCGN = 0, *EndCGN = 0, *CallerNode = 0;
751     if (CallGraph *CG = IFI.CG) {
752       Function *Start = Intrinsic::getDeclaration(Caller->getParent(),
753                                                   Intrinsic::lifetime_start);
754       Function *End = Intrinsic::getDeclaration(Caller->getParent(),
755                                                 Intrinsic::lifetime_end);
756       StartCGN = CG->getOrInsertFunction(Start);
757       EndCGN = CG->getOrInsertFunction(End);
758       CallerNode = (*CG)[Caller];
759     }
760 
761     IRBuilder<> builder(FirstNewBlock->begin());
762     for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
763       AllocaInst *AI = IFI.StaticAllocas[ai];
764 
765       // If the alloca is already scoped to something smaller than the whole
766       // function then there's no need to add redundant, less accurate markers.
767       if (hasLifetimeMarkers(AI))
768         continue;
769 
770       CallInst *StartCall = builder.CreateLifetimeStart(AI);
771       if (IFI.CG) CallerNode->addCalledFunction(StartCall, StartCGN);
772       for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
773         IRBuilder<> builder(Returns[ri]);
774         CallInst *EndCall = builder.CreateLifetimeEnd(AI);
775         if (IFI.CG) CallerNode->addCalledFunction(EndCall, EndCGN);
776       }
777     }
778   }
779 
780   // If the inlined code contained dynamic alloca instructions, wrap the inlined
781   // code with llvm.stacksave/llvm.stackrestore intrinsics.
782   if (InlinedFunctionInfo.ContainsDynamicAllocas) {
783     Module *M = Caller->getParent();
784     // Get the two intrinsics we care about.
785     Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
786     Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
787 
788     // If we are preserving the callgraph, add edges to the stacksave/restore
789     // functions for the calls we insert.
790     CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0;
791     if (CallGraph *CG = IFI.CG) {
792       StackSaveCGN    = CG->getOrInsertFunction(StackSave);
793       StackRestoreCGN = CG->getOrInsertFunction(StackRestore);
794       CallerNode = (*CG)[Caller];
795     }
796 
797     // Insert the llvm.stacksave.
798     CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack",
799                                           FirstNewBlock->begin());
800     if (IFI.CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN);
801 
802     // Insert a call to llvm.stackrestore before any return instructions in the
803     // inlined function.
804     for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
805       CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", Returns[i]);
806       if (IFI.CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
807     }
808 
809     // Count the number of StackRestore calls we insert.
810     unsigned NumStackRestores = Returns.size();
811 
812     // If we are inlining an invoke instruction, insert restores before each
813     // unwind.  These unwinds will be rewritten into branches later.
814     if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) {
815       for (Function::iterator BB = FirstNewBlock, E = Caller->end();
816            BB != E; ++BB)
817         if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
818           CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", UI);
819           if (IFI.CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
820           ++NumStackRestores;
821         }
822     }
823   }
824 
825   // If we are inlining tail call instruction through a call site that isn't
826   // marked 'tail', we must remove the tail marker for any calls in the inlined
827   // code.  Also, calls inlined through a 'nounwind' call site should be marked
828   // 'nounwind'.
829   if (InlinedFunctionInfo.ContainsCalls &&
830       (MustClearTailCallFlags || MarkNoUnwind)) {
831     for (Function::iterator BB = FirstNewBlock, E = Caller->end();
832          BB != E; ++BB)
833       for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
834         if (CallInst *CI = dyn_cast<CallInst>(I)) {
835           if (MustClearTailCallFlags)
836             CI->setTailCall(false);
837           if (MarkNoUnwind)
838             CI->setDoesNotThrow();
839         }
840   }
841 
842   // If we are inlining through a 'nounwind' call site then any inlined 'unwind'
843   // instructions are unreachable.
844   if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind)
845     for (Function::iterator BB = FirstNewBlock, E = Caller->end();
846          BB != E; ++BB) {
847       TerminatorInst *Term = BB->getTerminator();
848       if (isa<UnwindInst>(Term)) {
849         new UnreachableInst(Context, Term);
850         BB->getInstList().erase(Term);
851       }
852     }
853 
854   // If we are inlining for an invoke instruction, we must make sure to rewrite
855   // any inlined 'unwind' instructions into branches to the invoke exception
856   // destination, and call instructions into invoke instructions.
857   if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
858     HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
859 
860   // If we cloned in _exactly one_ basic block, and if that block ends in a
861   // return instruction, we splice the body of the inlined callee directly into
862   // the calling basic block.
863   if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
864     // Move all of the instructions right before the call.
865     OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
866                                  FirstNewBlock->begin(), FirstNewBlock->end());
867     // Remove the cloned basic block.
868     Caller->getBasicBlockList().pop_back();
869 
870     // If the call site was an invoke instruction, add a branch to the normal
871     // destination.
872     if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
873       BranchInst::Create(II->getNormalDest(), TheCall);
874 
875     // If the return instruction returned a value, replace uses of the call with
876     // uses of the returned value.
877     if (!TheCall->use_empty()) {
878       ReturnInst *R = Returns[0];
879       if (TheCall == R->getReturnValue())
880         TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
881       else
882         TheCall->replaceAllUsesWith(R->getReturnValue());
883     }
884     // Since we are now done with the Call/Invoke, we can delete it.
885     TheCall->eraseFromParent();
886 
887     // Since we are now done with the return instruction, delete it also.
888     Returns[0]->eraseFromParent();
889 
890     // We are now done with the inlining.
891     return true;
892   }
893 
894   // Otherwise, we have the normal case, of more than one block to inline or
895   // multiple return sites.
896 
897   // We want to clone the entire callee function into the hole between the
898   // "starter" and "ender" blocks.  How we accomplish this depends on whether
899   // this is an invoke instruction or a call instruction.
900   BasicBlock *AfterCallBB;
901   if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
902 
903     // Add an unconditional branch to make this look like the CallInst case...
904     BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
905 
906     // Split the basic block.  This guarantees that no PHI nodes will have to be
907     // updated due to new incoming edges, and make the invoke case more
908     // symmetric to the call case.
909     AfterCallBB = OrigBB->splitBasicBlock(NewBr,
910                                           CalledFunc->getName()+".exit");
911 
912   } else {  // It's a call
913     // If this is a call instruction, we need to split the basic block that
914     // the call lives in.
915     //
916     AfterCallBB = OrigBB->splitBasicBlock(TheCall,
917                                           CalledFunc->getName()+".exit");
918   }
919 
920   // Change the branch that used to go to AfterCallBB to branch to the first
921   // basic block of the inlined function.
922   //
923   TerminatorInst *Br = OrigBB->getTerminator();
924   assert(Br && Br->getOpcode() == Instruction::Br &&
925          "splitBasicBlock broken!");
926   Br->setOperand(0, FirstNewBlock);
927 
928 
929   // Now that the function is correct, make it a little bit nicer.  In
930   // particular, move the basic blocks inserted from the end of the function
931   // into the space made by splitting the source basic block.
932   Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
933                                      FirstNewBlock, Caller->end());
934 
935   // Handle all of the return instructions that we just cloned in, and eliminate
936   // any users of the original call/invoke instruction.
937   const Type *RTy = CalledFunc->getReturnType();
938 
939   PHINode *PHI = 0;
940   if (Returns.size() > 1) {
941     // The PHI node should go at the front of the new basic block to merge all
942     // possible incoming values.
943     if (!TheCall->use_empty()) {
944       PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
945                             AfterCallBB->begin());
946       // Anything that used the result of the function call should now use the
947       // PHI node as their operand.
948       TheCall->replaceAllUsesWith(PHI);
949     }
950 
951     // Loop over all of the return instructions adding entries to the PHI node
952     // as appropriate.
953     if (PHI) {
954       for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
955         ReturnInst *RI = Returns[i];
956         assert(RI->getReturnValue()->getType() == PHI->getType() &&
957                "Ret value not consistent in function!");
958         PHI->addIncoming(RI->getReturnValue(), RI->getParent());
959       }
960     }
961 
962 
963     // Add a branch to the merge points and remove return instructions.
964     for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
965       ReturnInst *RI = Returns[i];
966       BranchInst::Create(AfterCallBB, RI);
967       RI->eraseFromParent();
968     }
969   } else if (!Returns.empty()) {
970     // Otherwise, if there is exactly one return value, just replace anything
971     // using the return value of the call with the computed value.
972     if (!TheCall->use_empty()) {
973       if (TheCall == Returns[0]->getReturnValue())
974         TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
975       else
976         TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
977     }
978 
979     // Splice the code from the return block into the block that it will return
980     // to, which contains the code that was after the call.
981     BasicBlock *ReturnBB = Returns[0]->getParent();
982     AfterCallBB->getInstList().splice(AfterCallBB->begin(),
983                                       ReturnBB->getInstList());
984 
985     // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
986     ReturnBB->replaceAllUsesWith(AfterCallBB);
987 
988     // Delete the return instruction now and empty ReturnBB now.
989     Returns[0]->eraseFromParent();
990     ReturnBB->eraseFromParent();
991   } else if (!TheCall->use_empty()) {
992     // No returns, but something is using the return value of the call.  Just
993     // nuke the result.
994     TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
995   }
996 
997   // Since we are now done with the Call/Invoke, we can delete it.
998   TheCall->eraseFromParent();
999 
1000   // We should always be able to fold the entry block of the function into the
1001   // single predecessor of the block...
1002   assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
1003   BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
1004 
1005   // Splice the code entry block into calling block, right before the
1006   // unconditional branch.
1007   OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
1008   CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes
1009 
1010   // Remove the unconditional branch.
1011   OrigBB->getInstList().erase(Br);
1012 
1013   // Now we can remove the CalleeEntry block, which is now empty.
1014   Caller->getBasicBlockList().erase(CalleeEntry);
1015 
1016   // If we inserted a phi node, check to see if it has a single value (e.g. all
1017   // the entries are the same or undef).  If so, remove the PHI so it doesn't
1018   // block other optimizations.
1019   if (PHI)
1020     if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
1021       PHI->replaceAllUsesWith(V);
1022       PHI->eraseFromParent();
1023     }
1024 
1025   return true;
1026 }
1027