1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements inline cost analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Analysis/InlineCost.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/AssumptionCache.h"
21 #include "llvm/Analysis/CodeMetrics.h"
22 #include "llvm/Analysis/ConstantFolding.h"
23 #include "llvm/Analysis/InstructionSimplify.h"
24 #include "llvm/Analysis/ProfileSummaryInfo.h"
25 #include "llvm/Analysis/TargetTransformInfo.h"
26 #include "llvm/IR/CallSite.h"
27 #include "llvm/IR/CallingConv.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/GetElementPtrTypeIterator.h"
30 #include "llvm/IR/GlobalAlias.h"
31 #include "llvm/IR/InstVisitor.h"
32 #include "llvm/IR/IntrinsicInst.h"
33 #include "llvm/IR/Operator.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
36 
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "inline-cost"
40 
41 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
42 
43 static cl::opt<int> InlineThreshold(
44     "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
45     cl::desc("Control the amount of inlining to perform (default = 225)"));
46 
47 static cl::opt<int> HintThreshold(
48     "inlinehint-threshold", cl::Hidden, cl::init(325),
49     cl::desc("Threshold for inlining functions with inline hint"));
50 
51 // We introduce this threshold to help performance of instrumentation based
52 // PGO before we actually hook up inliner with analysis passes such as BPI and
53 // BFI.
54 static cl::opt<int> ColdThreshold(
55     "inlinecold-threshold", cl::Hidden, cl::init(225),
56     cl::desc("Threshold for inlining functions with cold attribute"));
57 
58 static cl::opt<int>
59     HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
60                          cl::ZeroOrMore,
61                          cl::desc("Threshold for hot callsites "));
62 
63 namespace {
64 
65 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
66   typedef InstVisitor<CallAnalyzer, bool> Base;
67   friend class InstVisitor<CallAnalyzer, bool>;
68 
69   /// The TargetTransformInfo available for this compilation.
70   const TargetTransformInfo &TTI;
71 
72   /// Getter for the cache of @llvm.assume intrinsics.
73   std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
74 
75   /// Profile summary information.
76   ProfileSummaryInfo *PSI;
77 
78   /// The called function.
79   Function &F;
80 
81   /// The candidate callsite being analyzed. Please do not use this to do
82   /// analysis in the caller function; we want the inline cost query to be
83   /// easily cacheable. Instead, use the cover function paramHasAttr.
84   CallSite CandidateCS;
85 
86   /// Tunable parameters that control the analysis.
87   const InlineParams &Params;
88 
89   int Threshold;
90   int Cost;
91 
92   bool IsCallerRecursive;
93   bool IsRecursiveCall;
94   bool ExposesReturnsTwice;
95   bool HasDynamicAlloca;
96   bool ContainsNoDuplicateCall;
97   bool HasReturn;
98   bool HasIndirectBr;
99   bool HasFrameEscape;
100 
101   /// Number of bytes allocated statically by the callee.
102   uint64_t AllocatedSize;
103   unsigned NumInstructions, NumVectorInstructions;
104   int FiftyPercentVectorBonus, TenPercentVectorBonus;
105   int VectorBonus;
106 
107   /// While we walk the potentially-inlined instructions, we build up and
108   /// maintain a mapping of simplified values specific to this callsite. The
109   /// idea is to propagate any special information we have about arguments to
110   /// this call through the inlinable section of the function, and account for
111   /// likely simplifications post-inlining. The most important aspect we track
112   /// is CFG altering simplifications -- when we prove a basic block dead, that
113   /// can cause dramatic shifts in the cost of inlining a function.
114   DenseMap<Value *, Constant *> SimplifiedValues;
115 
116   /// Keep track of the values which map back (through function arguments) to
117   /// allocas on the caller stack which could be simplified through SROA.
118   DenseMap<Value *, Value *> SROAArgValues;
119 
120   /// The mapping of caller Alloca values to their accumulated cost savings. If
121   /// we have to disable SROA for one of the allocas, this tells us how much
122   /// cost must be added.
123   DenseMap<Value *, int> SROAArgCosts;
124 
125   /// Keep track of values which map to a pointer base and constant offset.
126   DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
127 
128   // Custom simplification helper routines.
129   bool isAllocaDerivedArg(Value *V);
130   bool lookupSROAArgAndCost(Value *V, Value *&Arg,
131                             DenseMap<Value *, int>::iterator &CostIt);
132   void disableSROA(DenseMap<Value *, int>::iterator CostIt);
133   void disableSROA(Value *V);
134   void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
135                           int InstructionCost);
136   bool isGEPOffsetConstant(GetElementPtrInst &GEP);
137   bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
138   bool simplifyCallSite(Function *F, CallSite CS);
139   ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
140 
141   /// Return true if the given argument to the function being considered for
142   /// inlining has the given attribute set either at the call site or the
143   /// function declaration.  Primarily used to inspect call site specific
144   /// attributes since these can be more precise than the ones on the callee
145   /// itself.
146   bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
147 
148   /// Return true if the given value is known non null within the callee if
149   /// inlined through this particular callsite.
150   bool isKnownNonNullInCallee(Value *V);
151 
152   /// Update Threshold based on callsite properties such as callee
153   /// attributes and callee hotness for PGO builds. The Callee is explicitly
154   /// passed to support analyzing indirect calls whose target is inferred by
155   /// analysis.
156   void updateThreshold(CallSite CS, Function &Callee);
157 
158   /// Return true if size growth is allowed when inlining the callee at CS.
159   bool allowSizeGrowth(CallSite CS);
160 
161   // Custom analysis routines.
162   bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
163 
164   // Disable several entry points to the visitor so we don't accidentally use
165   // them by declaring but not defining them here.
166   void visit(Module *);
167   void visit(Module &);
168   void visit(Function *);
169   void visit(Function &);
170   void visit(BasicBlock *);
171   void visit(BasicBlock &);
172 
173   // Provide base case for our instruction visit.
174   bool visitInstruction(Instruction &I);
175 
176   // Our visit overrides.
177   bool visitAlloca(AllocaInst &I);
178   bool visitPHI(PHINode &I);
179   bool visitGetElementPtr(GetElementPtrInst &I);
180   bool visitBitCast(BitCastInst &I);
181   bool visitPtrToInt(PtrToIntInst &I);
182   bool visitIntToPtr(IntToPtrInst &I);
183   bool visitCastInst(CastInst &I);
184   bool visitUnaryInstruction(UnaryInstruction &I);
185   bool visitCmpInst(CmpInst &I);
186   bool visitSub(BinaryOperator &I);
187   bool visitBinaryOperator(BinaryOperator &I);
188   bool visitLoad(LoadInst &I);
189   bool visitStore(StoreInst &I);
190   bool visitExtractValue(ExtractValueInst &I);
191   bool visitInsertValue(InsertValueInst &I);
192   bool visitCallSite(CallSite CS);
193   bool visitReturnInst(ReturnInst &RI);
194   bool visitBranchInst(BranchInst &BI);
195   bool visitSwitchInst(SwitchInst &SI);
196   bool visitIndirectBrInst(IndirectBrInst &IBI);
197   bool visitResumeInst(ResumeInst &RI);
198   bool visitCleanupReturnInst(CleanupReturnInst &RI);
199   bool visitCatchReturnInst(CatchReturnInst &RI);
200   bool visitUnreachableInst(UnreachableInst &I);
201 
202 public:
203   CallAnalyzer(const TargetTransformInfo &TTI,
204                std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
205                ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg,
206                const InlineParams &Params)
207       : TTI(TTI), GetAssumptionCache(GetAssumptionCache), PSI(PSI), F(Callee),
208         CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
209         Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
210         ExposesReturnsTwice(false), HasDynamicAlloca(false),
211         ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
212         HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
213         NumVectorInstructions(0), FiftyPercentVectorBonus(0),
214         TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
215         NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
216         NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
217         SROACostSavings(0), SROACostSavingsLost(0) {}
218 
219   bool analyzeCall(CallSite CS);
220 
221   int getThreshold() { return Threshold; }
222   int getCost() { return Cost; }
223 
224   // Keep a bunch of stats about the cost savings found so we can print them
225   // out when debugging.
226   unsigned NumConstantArgs;
227   unsigned NumConstantOffsetPtrArgs;
228   unsigned NumAllocaArgs;
229   unsigned NumConstantPtrCmps;
230   unsigned NumConstantPtrDiffs;
231   unsigned NumInstructionsSimplified;
232   unsigned SROACostSavings;
233   unsigned SROACostSavingsLost;
234 
235   void dump();
236 };
237 
238 } // namespace
239 
240 /// \brief Test whether the given value is an Alloca-derived function argument.
241 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
242   return SROAArgValues.count(V);
243 }
244 
245 /// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
246 /// Returns false if V does not map to a SROA-candidate.
247 bool CallAnalyzer::lookupSROAArgAndCost(
248     Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
249   if (SROAArgValues.empty() || SROAArgCosts.empty())
250     return false;
251 
252   DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
253   if (ArgIt == SROAArgValues.end())
254     return false;
255 
256   Arg = ArgIt->second;
257   CostIt = SROAArgCosts.find(Arg);
258   return CostIt != SROAArgCosts.end();
259 }
260 
261 /// \brief Disable SROA for the candidate marked by this cost iterator.
262 ///
263 /// This marks the candidate as no longer viable for SROA, and adds the cost
264 /// savings associated with it back into the inline cost measurement.
265 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
266   // If we're no longer able to perform SROA we need to undo its cost savings
267   // and prevent subsequent analysis.
268   Cost += CostIt->second;
269   SROACostSavings -= CostIt->second;
270   SROACostSavingsLost += CostIt->second;
271   SROAArgCosts.erase(CostIt);
272 }
273 
274 /// \brief If 'V' maps to a SROA candidate, disable SROA for it.
275 void CallAnalyzer::disableSROA(Value *V) {
276   Value *SROAArg;
277   DenseMap<Value *, int>::iterator CostIt;
278   if (lookupSROAArgAndCost(V, SROAArg, CostIt))
279     disableSROA(CostIt);
280 }
281 
282 /// \brief Accumulate the given cost for a particular SROA candidate.
283 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
284                                       int InstructionCost) {
285   CostIt->second += InstructionCost;
286   SROACostSavings += InstructionCost;
287 }
288 
289 /// \brief Check whether a GEP's indices are all constant.
290 ///
291 /// Respects any simplified values known during the analysis of this callsite.
292 bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
293   for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
294     if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
295       return false;
296 
297   return true;
298 }
299 
300 /// \brief Accumulate a constant GEP offset into an APInt if possible.
301 ///
302 /// Returns false if unable to compute the offset for any reason. Respects any
303 /// simplified values known during the analysis of this callsite.
304 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
305   const DataLayout &DL = F.getParent()->getDataLayout();
306   unsigned IntPtrWidth = DL.getPointerSizeInBits();
307   assert(IntPtrWidth == Offset.getBitWidth());
308 
309   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
310        GTI != GTE; ++GTI) {
311     ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
312     if (!OpC)
313       if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
314         OpC = dyn_cast<ConstantInt>(SimpleOp);
315     if (!OpC)
316       return false;
317     if (OpC->isZero())
318       continue;
319 
320     // Handle a struct index, which adds its field offset to the pointer.
321     if (StructType *STy = dyn_cast<StructType>(*GTI)) {
322       unsigned ElementIdx = OpC->getZExtValue();
323       const StructLayout *SL = DL.getStructLayout(STy);
324       Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
325       continue;
326     }
327 
328     APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
329     Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
330   }
331   return true;
332 }
333 
334 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
335   // Check whether inlining will turn a dynamic alloca into a static
336   // alloca and handle that case.
337   if (I.isArrayAllocation()) {
338     Constant *Size = SimplifiedValues.lookup(I.getArraySize());
339     if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
340       const DataLayout &DL = F.getParent()->getDataLayout();
341       Type *Ty = I.getAllocatedType();
342       AllocatedSize = SaturatingMultiplyAdd(
343           AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
344       return Base::visitAlloca(I);
345     }
346   }
347 
348   // Accumulate the allocated size.
349   if (I.isStaticAlloca()) {
350     const DataLayout &DL = F.getParent()->getDataLayout();
351     Type *Ty = I.getAllocatedType();
352     AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
353   }
354 
355   // We will happily inline static alloca instructions.
356   if (I.isStaticAlloca())
357     return Base::visitAlloca(I);
358 
359   // FIXME: This is overly conservative. Dynamic allocas are inefficient for
360   // a variety of reasons, and so we would like to not inline them into
361   // functions which don't currently have a dynamic alloca. This simply
362   // disables inlining altogether in the presence of a dynamic alloca.
363   HasDynamicAlloca = true;
364   return false;
365 }
366 
367 bool CallAnalyzer::visitPHI(PHINode &I) {
368   // FIXME: We should potentially be tracking values through phi nodes,
369   // especially when they collapse to a single value due to deleted CFG edges
370   // during inlining.
371 
372   // FIXME: We need to propagate SROA *disabling* through phi nodes, even
373   // though we don't want to propagate it's bonuses. The idea is to disable
374   // SROA if it *might* be used in an inappropriate manner.
375 
376   // Phi nodes are always zero-cost.
377   return true;
378 }
379 
380 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
381   Value *SROAArg;
382   DenseMap<Value *, int>::iterator CostIt;
383   bool SROACandidate =
384       lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
385 
386   // Try to fold GEPs of constant-offset call site argument pointers. This
387   // requires target data and inbounds GEPs.
388   if (I.isInBounds()) {
389     // Check if we have a base + offset for the pointer.
390     Value *Ptr = I.getPointerOperand();
391     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
392     if (BaseAndOffset.first) {
393       // Check if the offset of this GEP is constant, and if so accumulate it
394       // into Offset.
395       if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
396         // Non-constant GEPs aren't folded, and disable SROA.
397         if (SROACandidate)
398           disableSROA(CostIt);
399         return false;
400       }
401 
402       // Add the result as a new mapping to Base + Offset.
403       ConstantOffsetPtrs[&I] = BaseAndOffset;
404 
405       // Also handle SROA candidates here, we already know that the GEP is
406       // all-constant indexed.
407       if (SROACandidate)
408         SROAArgValues[&I] = SROAArg;
409 
410       return true;
411     }
412   }
413 
414   if (isGEPOffsetConstant(I)) {
415     if (SROACandidate)
416       SROAArgValues[&I] = SROAArg;
417 
418     // Constant GEPs are modeled as free.
419     return true;
420   }
421 
422   // Variable GEPs will require math and will disable SROA.
423   if (SROACandidate)
424     disableSROA(CostIt);
425   return false;
426 }
427 
428 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
429   // Propagate constants through bitcasts.
430   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
431   if (!COp)
432     COp = SimplifiedValues.lookup(I.getOperand(0));
433   if (COp)
434     if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
435       SimplifiedValues[&I] = C;
436       return true;
437     }
438 
439   // Track base/offsets through casts
440   std::pair<Value *, APInt> BaseAndOffset =
441       ConstantOffsetPtrs.lookup(I.getOperand(0));
442   // Casts don't change the offset, just wrap it up.
443   if (BaseAndOffset.first)
444     ConstantOffsetPtrs[&I] = BaseAndOffset;
445 
446   // Also look for SROA candidates here.
447   Value *SROAArg;
448   DenseMap<Value *, int>::iterator CostIt;
449   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
450     SROAArgValues[&I] = SROAArg;
451 
452   // Bitcasts are always zero cost.
453   return true;
454 }
455 
456 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
457   // Propagate constants through ptrtoint.
458   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
459   if (!COp)
460     COp = SimplifiedValues.lookup(I.getOperand(0));
461   if (COp)
462     if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
463       SimplifiedValues[&I] = C;
464       return true;
465     }
466 
467   // Track base/offset pairs when converted to a plain integer provided the
468   // integer is large enough to represent the pointer.
469   unsigned IntegerSize = I.getType()->getScalarSizeInBits();
470   const DataLayout &DL = F.getParent()->getDataLayout();
471   if (IntegerSize >= DL.getPointerSizeInBits()) {
472     std::pair<Value *, APInt> BaseAndOffset =
473         ConstantOffsetPtrs.lookup(I.getOperand(0));
474     if (BaseAndOffset.first)
475       ConstantOffsetPtrs[&I] = BaseAndOffset;
476   }
477 
478   // This is really weird. Technically, ptrtoint will disable SROA. However,
479   // unless that ptrtoint is *used* somewhere in the live basic blocks after
480   // inlining, it will be nuked, and SROA should proceed. All of the uses which
481   // would block SROA would also block SROA if applied directly to a pointer,
482   // and so we can just add the integer in here. The only places where SROA is
483   // preserved either cannot fire on an integer, or won't in-and-of themselves
484   // disable SROA (ext) w/o some later use that we would see and disable.
485   Value *SROAArg;
486   DenseMap<Value *, int>::iterator CostIt;
487   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
488     SROAArgValues[&I] = SROAArg;
489 
490   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
491 }
492 
493 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
494   // Propagate constants through ptrtoint.
495   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
496   if (!COp)
497     COp = SimplifiedValues.lookup(I.getOperand(0));
498   if (COp)
499     if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
500       SimplifiedValues[&I] = C;
501       return true;
502     }
503 
504   // Track base/offset pairs when round-tripped through a pointer without
505   // modifications provided the integer is not too large.
506   Value *Op = I.getOperand(0);
507   unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
508   const DataLayout &DL = F.getParent()->getDataLayout();
509   if (IntegerSize <= DL.getPointerSizeInBits()) {
510     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
511     if (BaseAndOffset.first)
512       ConstantOffsetPtrs[&I] = BaseAndOffset;
513   }
514 
515   // "Propagate" SROA here in the same manner as we do for ptrtoint above.
516   Value *SROAArg;
517   DenseMap<Value *, int>::iterator CostIt;
518   if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
519     SROAArgValues[&I] = SROAArg;
520 
521   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
522 }
523 
524 bool CallAnalyzer::visitCastInst(CastInst &I) {
525   // Propagate constants through ptrtoint.
526   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
527   if (!COp)
528     COp = SimplifiedValues.lookup(I.getOperand(0));
529   if (COp)
530     if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
531       SimplifiedValues[&I] = C;
532       return true;
533     }
534 
535   // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
536   disableSROA(I.getOperand(0));
537 
538   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
539 }
540 
541 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
542   Value *Operand = I.getOperand(0);
543   Constant *COp = dyn_cast<Constant>(Operand);
544   if (!COp)
545     COp = SimplifiedValues.lookup(Operand);
546   if (COp) {
547     const DataLayout &DL = F.getParent()->getDataLayout();
548     if (Constant *C = ConstantFoldInstOperands(&I, COp, DL)) {
549       SimplifiedValues[&I] = C;
550       return true;
551     }
552   }
553 
554   // Disable any SROA on the argument to arbitrary unary operators.
555   disableSROA(Operand);
556 
557   return false;
558 }
559 
560 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
561   unsigned ArgNo = A->getArgNo();
562   return CandidateCS.paramHasAttr(ArgNo + 1, Attr);
563 }
564 
565 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
566   // Does the *call site* have the NonNull attribute set on an argument?  We
567   // use the attribute on the call site to memoize any analysis done in the
568   // caller. This will also trip if the callee function has a non-null
569   // parameter attribute, but that's a less interesting case because hopefully
570   // the callee would already have been simplified based on that.
571   if (Argument *A = dyn_cast<Argument>(V))
572     if (paramHasAttr(A, Attribute::NonNull))
573       return true;
574 
575   // Is this an alloca in the caller?  This is distinct from the attribute case
576   // above because attributes aren't updated within the inliner itself and we
577   // always want to catch the alloca derived case.
578   if (isAllocaDerivedArg(V))
579     // We can actually predict the result of comparisons between an
580     // alloca-derived value and null. Note that this fires regardless of
581     // SROA firing.
582     return true;
583 
584   return false;
585 }
586 
587 bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
588   // If the normal destination of the invoke or the parent block of the call
589   // site is unreachable-terminated, there is little point in inlining this
590   // unless there is literally zero cost.
591   // FIXME: Note that it is possible that an unreachable-terminated block has a
592   // hot entry. For example, in below scenario inlining hot_call_X() may be
593   // beneficial :
594   // main() {
595   //   hot_call_1();
596   //   ...
597   //   hot_call_N()
598   //   exit(0);
599   // }
600   // For now, we are not handling this corner case here as it is rare in real
601   // code. In future, we should elaborate this based on BPI and BFI in more
602   // general threshold adjusting heuristics in updateThreshold().
603   Instruction *Instr = CS.getInstruction();
604   if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
605     if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
606       return false;
607   } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator()))
608     return false;
609 
610   return true;
611 }
612 
613 void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
614   // If no size growth is allowed for this inlining, set Threshold to 0.
615   if (!allowSizeGrowth(CS)) {
616     Threshold = 0;
617     return;
618   }
619 
620   Function *Caller = CS.getCaller();
621 
622   // return min(A, B) if B is valid.
623   auto MinIfValid = [](int A, Optional<int> B) {
624     return B ? std::min(A, B.getValue()) : A;
625   };
626 
627   // return max(A, B) if B is valid.
628   auto MaxIfValid = [](int A, Optional<int> B) {
629     return B ? std::max(A, B.getValue()) : A;
630   };
631 
632   // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
633   // and reduce the threshold if the caller has the necessary attribute.
634   if (Caller->optForMinSize())
635     Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
636   else if (Caller->optForSize())
637     Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
638 
639   bool HotCallsite = false;
640   uint64_t TotalWeight;
641   if (CS.getInstruction()->extractProfTotalWeight(TotalWeight) &&
642       PSI->isHotCount(TotalWeight)) {
643     HotCallsite = true;
644   }
645 
646   // Listen to the inlinehint attribute or profile based hotness information
647   // when it would increase the threshold and the caller does not need to
648   // minimize its size.
649   bool InlineHint = Callee.hasFnAttribute(Attribute::InlineHint) ||
650                     PSI->isFunctionEntryHot(&Callee);
651   if (InlineHint && !Caller->optForMinSize())
652     Threshold = MaxIfValid(Threshold, Params.HintThreshold);
653 
654   if (HotCallsite && !Caller->optForMinSize())
655     Threshold = MaxIfValid(Threshold, Params.HotCallSiteThreshold);
656 
657   bool ColdCallee = PSI->isFunctionEntryCold(&Callee);
658   // For cold callees, use the ColdThreshold knob if it is available and reduces
659   // the threshold.
660   if (ColdCallee)
661     Threshold = MinIfValid(Threshold, Params.ColdThreshold);
662 
663   // Finally, take the target-specific inlining threshold multiplier into
664   // account.
665   Threshold *= TTI.getInliningThresholdMultiplier();
666 }
667 
668 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
669   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
670   // First try to handle simplified comparisons.
671   if (!isa<Constant>(LHS))
672     if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
673       LHS = SimpleLHS;
674   if (!isa<Constant>(RHS))
675     if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
676       RHS = SimpleRHS;
677   if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
678     if (Constant *CRHS = dyn_cast<Constant>(RHS))
679       if (Constant *C =
680               ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
681         SimplifiedValues[&I] = C;
682         return true;
683       }
684   }
685 
686   if (I.getOpcode() == Instruction::FCmp)
687     return false;
688 
689   // Otherwise look for a comparison between constant offset pointers with
690   // a common base.
691   Value *LHSBase, *RHSBase;
692   APInt LHSOffset, RHSOffset;
693   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
694   if (LHSBase) {
695     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
696     if (RHSBase && LHSBase == RHSBase) {
697       // We have common bases, fold the icmp to a constant based on the
698       // offsets.
699       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
700       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
701       if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
702         SimplifiedValues[&I] = C;
703         ++NumConstantPtrCmps;
704         return true;
705       }
706     }
707   }
708 
709   // If the comparison is an equality comparison with null, we can simplify it
710   // if we know the value (argument) can't be null
711   if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
712       isKnownNonNullInCallee(I.getOperand(0))) {
713     bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
714     SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
715                                       : ConstantInt::getFalse(I.getType());
716     return true;
717   }
718   // Finally check for SROA candidates in comparisons.
719   Value *SROAArg;
720   DenseMap<Value *, int>::iterator CostIt;
721   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
722     if (isa<ConstantPointerNull>(I.getOperand(1))) {
723       accumulateSROACost(CostIt, InlineConstants::InstrCost);
724       return true;
725     }
726 
727     disableSROA(CostIt);
728   }
729 
730   return false;
731 }
732 
733 bool CallAnalyzer::visitSub(BinaryOperator &I) {
734   // Try to handle a special case: we can fold computing the difference of two
735   // constant-related pointers.
736   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
737   Value *LHSBase, *RHSBase;
738   APInt LHSOffset, RHSOffset;
739   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
740   if (LHSBase) {
741     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
742     if (RHSBase && LHSBase == RHSBase) {
743       // We have common bases, fold the subtract to a constant based on the
744       // offsets.
745       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
746       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
747       if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
748         SimplifiedValues[&I] = C;
749         ++NumConstantPtrDiffs;
750         return true;
751       }
752     }
753   }
754 
755   // Otherwise, fall back to the generic logic for simplifying and handling
756   // instructions.
757   return Base::visitSub(I);
758 }
759 
760 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
761   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
762   const DataLayout &DL = F.getParent()->getDataLayout();
763   if (!isa<Constant>(LHS))
764     if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
765       LHS = SimpleLHS;
766   if (!isa<Constant>(RHS))
767     if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
768       RHS = SimpleRHS;
769   Value *SimpleV = nullptr;
770   if (auto FI = dyn_cast<FPMathOperator>(&I))
771     SimpleV =
772         SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
773   else
774     SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
775 
776   if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
777     SimplifiedValues[&I] = C;
778     return true;
779   }
780 
781   // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
782   disableSROA(LHS);
783   disableSROA(RHS);
784 
785   return false;
786 }
787 
788 bool CallAnalyzer::visitLoad(LoadInst &I) {
789   Value *SROAArg;
790   DenseMap<Value *, int>::iterator CostIt;
791   if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
792     if (I.isSimple()) {
793       accumulateSROACost(CostIt, InlineConstants::InstrCost);
794       return true;
795     }
796 
797     disableSROA(CostIt);
798   }
799 
800   return false;
801 }
802 
803 bool CallAnalyzer::visitStore(StoreInst &I) {
804   Value *SROAArg;
805   DenseMap<Value *, int>::iterator CostIt;
806   if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
807     if (I.isSimple()) {
808       accumulateSROACost(CostIt, InlineConstants::InstrCost);
809       return true;
810     }
811 
812     disableSROA(CostIt);
813   }
814 
815   return false;
816 }
817 
818 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
819   // Constant folding for extract value is trivial.
820   Constant *C = dyn_cast<Constant>(I.getAggregateOperand());
821   if (!C)
822     C = SimplifiedValues.lookup(I.getAggregateOperand());
823   if (C) {
824     SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices());
825     return true;
826   }
827 
828   // SROA can look through these but give them a cost.
829   return false;
830 }
831 
832 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
833   // Constant folding for insert value is trivial.
834   Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand());
835   if (!AggC)
836     AggC = SimplifiedValues.lookup(I.getAggregateOperand());
837   Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand());
838   if (!InsertedC)
839     InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand());
840   if (AggC && InsertedC) {
841     SimplifiedValues[&I] =
842         ConstantExpr::getInsertValue(AggC, InsertedC, I.getIndices());
843     return true;
844   }
845 
846   // SROA can look through these but give them a cost.
847   return false;
848 }
849 
850 /// \brief Try to simplify a call site.
851 ///
852 /// Takes a concrete function and callsite and tries to actually simplify it by
853 /// analyzing the arguments and call itself with instsimplify. Returns true if
854 /// it has simplified the callsite to some other entity (a constant), making it
855 /// free.
856 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
857   // FIXME: Using the instsimplify logic directly for this is inefficient
858   // because we have to continually rebuild the argument list even when no
859   // simplifications can be performed. Until that is fixed with remapping
860   // inside of instsimplify, directly constant fold calls here.
861   if (!canConstantFoldCallTo(F))
862     return false;
863 
864   // Try to re-map the arguments to constants.
865   SmallVector<Constant *, 4> ConstantArgs;
866   ConstantArgs.reserve(CS.arg_size());
867   for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E;
868        ++I) {
869     Constant *C = dyn_cast<Constant>(*I);
870     if (!C)
871       C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
872     if (!C)
873       return false; // This argument doesn't map to a constant.
874 
875     ConstantArgs.push_back(C);
876   }
877   if (Constant *C = ConstantFoldCall(F, ConstantArgs)) {
878     SimplifiedValues[CS.getInstruction()] = C;
879     return true;
880   }
881 
882   return false;
883 }
884 
885 bool CallAnalyzer::visitCallSite(CallSite CS) {
886   if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
887       !F.hasFnAttribute(Attribute::ReturnsTwice)) {
888     // This aborts the entire analysis.
889     ExposesReturnsTwice = true;
890     return false;
891   }
892   if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate())
893     ContainsNoDuplicateCall = true;
894 
895   if (Function *F = CS.getCalledFunction()) {
896     // When we have a concrete function, first try to simplify it directly.
897     if (simplifyCallSite(F, CS))
898       return true;
899 
900     // Next check if it is an intrinsic we know about.
901     // FIXME: Lift this into part of the InstVisitor.
902     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
903       switch (II->getIntrinsicID()) {
904       default:
905         return Base::visitCallSite(CS);
906 
907       case Intrinsic::load_relative:
908         // This is normally lowered to 4 LLVM instructions.
909         Cost += 3 * InlineConstants::InstrCost;
910         return false;
911 
912       case Intrinsic::memset:
913       case Intrinsic::memcpy:
914       case Intrinsic::memmove:
915         // SROA can usually chew through these intrinsics, but they aren't free.
916         return false;
917       case Intrinsic::localescape:
918         HasFrameEscape = true;
919         return false;
920       }
921     }
922 
923     if (F == CS.getInstruction()->getParent()->getParent()) {
924       // This flag will fully abort the analysis, so don't bother with anything
925       // else.
926       IsRecursiveCall = true;
927       return false;
928     }
929 
930     if (TTI.isLoweredToCall(F)) {
931       // We account for the average 1 instruction per call argument setup
932       // here.
933       Cost += CS.arg_size() * InlineConstants::InstrCost;
934 
935       // Everything other than inline ASM will also have a significant cost
936       // merely from making the call.
937       if (!isa<InlineAsm>(CS.getCalledValue()))
938         Cost += InlineConstants::CallPenalty;
939     }
940 
941     return Base::visitCallSite(CS);
942   }
943 
944   // Otherwise we're in a very special case -- an indirect function call. See
945   // if we can be particularly clever about this.
946   Value *Callee = CS.getCalledValue();
947 
948   // First, pay the price of the argument setup. We account for the average
949   // 1 instruction per call argument setup here.
950   Cost += CS.arg_size() * InlineConstants::InstrCost;
951 
952   // Next, check if this happens to be an indirect function call to a known
953   // function in this inline context. If not, we've done all we can.
954   Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
955   if (!F)
956     return Base::visitCallSite(CS);
957 
958   // If we have a constant that we are calling as a function, we can peer
959   // through it and see the function target. This happens not infrequently
960   // during devirtualization and so we want to give it a hefty bonus for
961   // inlining, but cap that bonus in the event that inlining wouldn't pan
962   // out. Pretend to inline the function, with a custom threshold.
963   auto IndirectCallParams = Params;
964   IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
965   CallAnalyzer CA(TTI, GetAssumptionCache, PSI, *F, CS, IndirectCallParams);
966   if (CA.analyzeCall(CS)) {
967     // We were able to inline the indirect call! Subtract the cost from the
968     // threshold to get the bonus we want to apply, but don't go below zero.
969     Cost -= std::max(0, CA.getThreshold() - CA.getCost());
970   }
971 
972   return Base::visitCallSite(CS);
973 }
974 
975 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
976   // At least one return instruction will be free after inlining.
977   bool Free = !HasReturn;
978   HasReturn = true;
979   return Free;
980 }
981 
982 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
983   // We model unconditional branches as essentially free -- they really
984   // shouldn't exist at all, but handling them makes the behavior of the
985   // inliner more regular and predictable. Interestingly, conditional branches
986   // which will fold away are also free.
987   return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
988          dyn_cast_or_null<ConstantInt>(
989              SimplifiedValues.lookup(BI.getCondition()));
990 }
991 
992 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
993   // We model unconditional switches as free, see the comments on handling
994   // branches.
995   if (isa<ConstantInt>(SI.getCondition()))
996     return true;
997   if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
998     if (isa<ConstantInt>(V))
999       return true;
1000 
1001   // Otherwise, we need to accumulate a cost proportional to the number of
1002   // distinct successor blocks. This fan-out in the CFG cannot be represented
1003   // for free even if we can represent the core switch as a jumptable that
1004   // takes a single instruction.
1005   //
1006   // NB: We convert large switches which are just used to initialize large phi
1007   // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
1008   // inlining those. It will prevent inlining in cases where the optimization
1009   // does not (yet) fire.
1010   SmallPtrSet<BasicBlock *, 8> SuccessorBlocks;
1011   SuccessorBlocks.insert(SI.getDefaultDest());
1012   for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I)
1013     SuccessorBlocks.insert(I.getCaseSuccessor());
1014   // Add cost corresponding to the number of distinct destinations. The first
1015   // we model as free because of fallthrough.
1016   Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost;
1017   return false;
1018 }
1019 
1020 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
1021   // We never want to inline functions that contain an indirectbr.  This is
1022   // incorrect because all the blockaddress's (in static global initializers
1023   // for example) would be referring to the original function, and this
1024   // indirect jump would jump from the inlined copy of the function into the
1025   // original function which is extremely undefined behavior.
1026   // FIXME: This logic isn't really right; we can safely inline functions with
1027   // indirectbr's as long as no other function or global references the
1028   // blockaddress of a block within the current function.
1029   HasIndirectBr = true;
1030   return false;
1031 }
1032 
1033 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
1034   // FIXME: It's not clear that a single instruction is an accurate model for
1035   // the inline cost of a resume instruction.
1036   return false;
1037 }
1038 
1039 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
1040   // FIXME: It's not clear that a single instruction is an accurate model for
1041   // the inline cost of a cleanupret instruction.
1042   return false;
1043 }
1044 
1045 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
1046   // FIXME: It's not clear that a single instruction is an accurate model for
1047   // the inline cost of a catchret instruction.
1048   return false;
1049 }
1050 
1051 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
1052   // FIXME: It might be reasonably to discount the cost of instructions leading
1053   // to unreachable as they have the lowest possible impact on both runtime and
1054   // code size.
1055   return true; // No actual code is needed for unreachable.
1056 }
1057 
1058 bool CallAnalyzer::visitInstruction(Instruction &I) {
1059   // Some instructions are free. All of the free intrinsics can also be
1060   // handled by SROA, etc.
1061   if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
1062     return true;
1063 
1064   // We found something we don't understand or can't handle. Mark any SROA-able
1065   // values in the operand list as no longer viable.
1066   for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
1067     disableSROA(*OI);
1068 
1069   return false;
1070 }
1071 
1072 /// \brief Analyze a basic block for its contribution to the inline cost.
1073 ///
1074 /// This method walks the analyzer over every instruction in the given basic
1075 /// block and accounts for their cost during inlining at this callsite. It
1076 /// aborts early if the threshold has been exceeded or an impossible to inline
1077 /// construct has been detected. It returns false if inlining is no longer
1078 /// viable, and true if inlining remains viable.
1079 bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
1080                                 SmallPtrSetImpl<const Value *> &EphValues) {
1081   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
1082     // FIXME: Currently, the number of instructions in a function regardless of
1083     // our ability to simplify them during inline to constants or dead code,
1084     // are actually used by the vector bonus heuristic. As long as that's true,
1085     // we have to special case debug intrinsics here to prevent differences in
1086     // inlining due to debug symbols. Eventually, the number of unsimplified
1087     // instructions shouldn't factor into the cost computation, but until then,
1088     // hack around it here.
1089     if (isa<DbgInfoIntrinsic>(I))
1090       continue;
1091 
1092     // Skip ephemeral values.
1093     if (EphValues.count(&*I))
1094       continue;
1095 
1096     ++NumInstructions;
1097     if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
1098       ++NumVectorInstructions;
1099 
1100     // If the instruction is floating point, and the target says this operation
1101     // is expensive or the function has the "use-soft-float" attribute, this may
1102     // eventually become a library call. Treat the cost as such.
1103     if (I->getType()->isFloatingPointTy()) {
1104       bool hasSoftFloatAttr = false;
1105 
1106       // If the function has the "use-soft-float" attribute, mark it as
1107       // expensive.
1108       if (F.hasFnAttribute("use-soft-float")) {
1109         Attribute Attr = F.getFnAttribute("use-soft-float");
1110         StringRef Val = Attr.getValueAsString();
1111         if (Val == "true")
1112           hasSoftFloatAttr = true;
1113       }
1114 
1115       if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
1116           hasSoftFloatAttr)
1117         Cost += InlineConstants::CallPenalty;
1118     }
1119 
1120     // If the instruction simplified to a constant, there is no cost to this
1121     // instruction. Visit the instructions using our InstVisitor to account for
1122     // all of the per-instruction logic. The visit tree returns true if we
1123     // consumed the instruction in any way, and false if the instruction's base
1124     // cost should count against inlining.
1125     if (Base::visit(&*I))
1126       ++NumInstructionsSimplified;
1127     else
1128       Cost += InlineConstants::InstrCost;
1129 
1130     // If the visit this instruction detected an uninlinable pattern, abort.
1131     if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
1132         HasIndirectBr || HasFrameEscape)
1133       return false;
1134 
1135     // If the caller is a recursive function then we don't want to inline
1136     // functions which allocate a lot of stack space because it would increase
1137     // the caller stack usage dramatically.
1138     if (IsCallerRecursive &&
1139         AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
1140       return false;
1141 
1142     // Check if we've past the maximum possible threshold so we don't spin in
1143     // huge basic blocks that will never inline.
1144     if (Cost > Threshold)
1145       return false;
1146   }
1147 
1148   return true;
1149 }
1150 
1151 /// \brief Compute the base pointer and cumulative constant offsets for V.
1152 ///
1153 /// This strips all constant offsets off of V, leaving it the base pointer, and
1154 /// accumulates the total constant offset applied in the returned constant. It
1155 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
1156 /// no constant offsets applied.
1157 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
1158   if (!V->getType()->isPointerTy())
1159     return nullptr;
1160 
1161   const DataLayout &DL = F.getParent()->getDataLayout();
1162   unsigned IntPtrWidth = DL.getPointerSizeInBits();
1163   APInt Offset = APInt::getNullValue(IntPtrWidth);
1164 
1165   // Even though we don't look through PHI nodes, we could be called on an
1166   // instruction in an unreachable block, which may be on a cycle.
1167   SmallPtrSet<Value *, 4> Visited;
1168   Visited.insert(V);
1169   do {
1170     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
1171       if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
1172         return nullptr;
1173       V = GEP->getPointerOperand();
1174     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
1175       V = cast<Operator>(V)->getOperand(0);
1176     } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1177       if (GA->isInterposable())
1178         break;
1179       V = GA->getAliasee();
1180     } else {
1181       break;
1182     }
1183     assert(V->getType()->isPointerTy() && "Unexpected operand type!");
1184   } while (Visited.insert(V).second);
1185 
1186   Type *IntPtrTy = DL.getIntPtrType(V->getContext());
1187   return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
1188 }
1189 
1190 /// \brief Analyze a call site for potential inlining.
1191 ///
1192 /// Returns true if inlining this call is viable, and false if it is not
1193 /// viable. It computes the cost and adjusts the threshold based on numerous
1194 /// factors and heuristics. If this method returns false but the computed cost
1195 /// is below the computed threshold, then inlining was forcibly disabled by
1196 /// some artifact of the routine.
1197 bool CallAnalyzer::analyzeCall(CallSite CS) {
1198   ++NumCallsAnalyzed;
1199 
1200   // Perform some tweaks to the cost and threshold based on the direct
1201   // callsite information.
1202 
1203   // We want to more aggressively inline vector-dense kernels, so up the
1204   // threshold, and we'll lower it if the % of vector instructions gets too
1205   // low. Note that these bonuses are some what arbitrary and evolved over time
1206   // by accident as much as because they are principled bonuses.
1207   //
1208   // FIXME: It would be nice to remove all such bonuses. At least it would be
1209   // nice to base the bonus values on something more scientific.
1210   assert(NumInstructions == 0);
1211   assert(NumVectorInstructions == 0);
1212 
1213   // Update the threshold based on callsite properties
1214   updateThreshold(CS, F);
1215 
1216   FiftyPercentVectorBonus = 3 * Threshold / 2;
1217   TenPercentVectorBonus = 3 * Threshold / 4;
1218   const DataLayout &DL = F.getParent()->getDataLayout();
1219 
1220   // Track whether the post-inlining function would have more than one basic
1221   // block. A single basic block is often intended for inlining. Balloon the
1222   // threshold by 50% until we pass the single-BB phase.
1223   bool SingleBB = true;
1224   int SingleBBBonus = Threshold / 2;
1225 
1226   // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1227   // this Threshold any time, and cost cannot decrease, we can stop processing
1228   // the rest of the function body.
1229   Threshold += (SingleBBBonus + FiftyPercentVectorBonus);
1230 
1231   // Give out bonuses per argument, as the instructions setting them up will
1232   // be gone after inlining.
1233   for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
1234     if (CS.isByValArgument(I)) {
1235       // We approximate the number of loads and stores needed by dividing the
1236       // size of the byval type by the target's pointer size.
1237       PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
1238       unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
1239       unsigned PointerSize = DL.getPointerSizeInBits();
1240       // Ceiling division.
1241       unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
1242 
1243       // If it generates more than 8 stores it is likely to be expanded as an
1244       // inline memcpy so we take that as an upper bound. Otherwise we assume
1245       // one load and one store per word copied.
1246       // FIXME: The maxStoresPerMemcpy setting from the target should be used
1247       // here instead of a magic number of 8, but it's not available via
1248       // DataLayout.
1249       NumStores = std::min(NumStores, 8U);
1250 
1251       Cost -= 2 * NumStores * InlineConstants::InstrCost;
1252     } else {
1253       // For non-byval arguments subtract off one instruction per call
1254       // argument.
1255       Cost -= InlineConstants::InstrCost;
1256     }
1257   }
1258 
1259   // If there is only one call of the function, and it has internal linkage,
1260   // the cost of inlining it drops dramatically.
1261   bool OnlyOneCallAndLocalLinkage =
1262       F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
1263   if (OnlyOneCallAndLocalLinkage)
1264     Cost -= InlineConstants::LastCallToStaticBonus;
1265 
1266   // If this function uses the coldcc calling convention, prefer not to inline
1267   // it.
1268   if (F.getCallingConv() == CallingConv::Cold)
1269     Cost += InlineConstants::ColdccPenalty;
1270 
1271   // Check if we're done. This can happen due to bonuses and penalties.
1272   if (Cost > Threshold)
1273     return false;
1274 
1275   if (F.empty())
1276     return true;
1277 
1278   Function *Caller = CS.getInstruction()->getParent()->getParent();
1279   // Check if the caller function is recursive itself.
1280   for (User *U : Caller->users()) {
1281     CallSite Site(U);
1282     if (!Site)
1283       continue;
1284     Instruction *I = Site.getInstruction();
1285     if (I->getParent()->getParent() == Caller) {
1286       IsCallerRecursive = true;
1287       break;
1288     }
1289   }
1290 
1291   // Populate our simplified values by mapping from function arguments to call
1292   // arguments with known important simplifications.
1293   CallSite::arg_iterator CAI = CS.arg_begin();
1294   for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1295        FAI != FAE; ++FAI, ++CAI) {
1296     assert(CAI != CS.arg_end());
1297     if (Constant *C = dyn_cast<Constant>(CAI))
1298       SimplifiedValues[&*FAI] = C;
1299 
1300     Value *PtrArg = *CAI;
1301     if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1302       ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
1303 
1304       // We can SROA any pointer arguments derived from alloca instructions.
1305       if (isa<AllocaInst>(PtrArg)) {
1306         SROAArgValues[&*FAI] = PtrArg;
1307         SROAArgCosts[PtrArg] = 0;
1308       }
1309     }
1310   }
1311   NumConstantArgs = SimplifiedValues.size();
1312   NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1313   NumAllocaArgs = SROAArgValues.size();
1314 
1315   // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1316   // the ephemeral values multiple times (and they're completely determined by
1317   // the callee, so this is purely duplicate work).
1318   SmallPtrSet<const Value *, 32> EphValues;
1319   CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
1320 
1321   // The worklist of live basic blocks in the callee *after* inlining. We avoid
1322   // adding basic blocks of the callee which can be proven to be dead for this
1323   // particular call site in order to get more accurate cost estimates. This
1324   // requires a somewhat heavyweight iteration pattern: we need to walk the
1325   // basic blocks in a breadth-first order as we insert live successors. To
1326   // accomplish this, prioritizing for small iterations because we exit after
1327   // crossing our threshold, we use a small-size optimized SetVector.
1328   typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
1329                     SmallPtrSet<BasicBlock *, 16>>
1330       BBSetVector;
1331   BBSetVector BBWorklist;
1332   BBWorklist.insert(&F.getEntryBlock());
1333   // Note that we *must not* cache the size, this loop grows the worklist.
1334   for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1335     // Bail out the moment we cross the threshold. This means we'll under-count
1336     // the cost, but only when undercounting doesn't matter.
1337     if (Cost > Threshold)
1338       break;
1339 
1340     BasicBlock *BB = BBWorklist[Idx];
1341     if (BB->empty())
1342       continue;
1343 
1344     // Disallow inlining a blockaddress. A blockaddress only has defined
1345     // behavior for an indirect branch in the same function, and we do not
1346     // currently support inlining indirect branches. But, the inliner may not
1347     // see an indirect branch that ends up being dead code at a particular call
1348     // site. If the blockaddress escapes the function, e.g., via a global
1349     // variable, inlining may lead to an invalid cross-function reference.
1350     if (BB->hasAddressTaken())
1351       return false;
1352 
1353     // Analyze the cost of this block. If we blow through the threshold, this
1354     // returns false, and we can bail on out.
1355     if (!analyzeBlock(BB, EphValues))
1356       return false;
1357 
1358     TerminatorInst *TI = BB->getTerminator();
1359 
1360     // Add in the live successors by first checking whether we have terminator
1361     // that may be simplified based on the values simplified by this call.
1362     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1363       if (BI->isConditional()) {
1364         Value *Cond = BI->getCondition();
1365         if (ConstantInt *SimpleCond =
1366                 dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1367           BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
1368           continue;
1369         }
1370       }
1371     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1372       Value *Cond = SI->getCondition();
1373       if (ConstantInt *SimpleCond =
1374               dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1375         BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
1376         continue;
1377       }
1378     }
1379 
1380     // If we're unable to select a particular successor, just count all of
1381     // them.
1382     for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1383          ++TIdx)
1384       BBWorklist.insert(TI->getSuccessor(TIdx));
1385 
1386     // If we had any successors at this point, than post-inlining is likely to
1387     // have them as well. Note that we assume any basic blocks which existed
1388     // due to branches or switches which folded above will also fold after
1389     // inlining.
1390     if (SingleBB && TI->getNumSuccessors() > 1) {
1391       // Take off the bonus we applied to the threshold.
1392       Threshold -= SingleBBBonus;
1393       SingleBB = false;
1394     }
1395   }
1396 
1397   // If this is a noduplicate call, we can still inline as long as
1398   // inlining this would cause the removal of the caller (so the instruction
1399   // is not actually duplicated, just moved).
1400   if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1401     return false;
1402 
1403   // We applied the maximum possible vector bonus at the beginning. Now,
1404   // subtract the excess bonus, if any, from the Threshold before
1405   // comparing against Cost.
1406   if (NumVectorInstructions <= NumInstructions / 10)
1407     Threshold -= FiftyPercentVectorBonus;
1408   else if (NumVectorInstructions <= NumInstructions / 2)
1409     Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus);
1410 
1411   return Cost < std::max(1, Threshold);
1412 }
1413 
1414 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1415 /// \brief Dump stats about this call's analysis.
1416 LLVM_DUMP_METHOD void CallAnalyzer::dump() {
1417 #define DEBUG_PRINT_STAT(x) dbgs() << "      " #x ": " << x << "\n"
1418   DEBUG_PRINT_STAT(NumConstantArgs);
1419   DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1420   DEBUG_PRINT_STAT(NumAllocaArgs);
1421   DEBUG_PRINT_STAT(NumConstantPtrCmps);
1422   DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1423   DEBUG_PRINT_STAT(NumInstructionsSimplified);
1424   DEBUG_PRINT_STAT(NumInstructions);
1425   DEBUG_PRINT_STAT(SROACostSavings);
1426   DEBUG_PRINT_STAT(SROACostSavingsLost);
1427   DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
1428   DEBUG_PRINT_STAT(Cost);
1429   DEBUG_PRINT_STAT(Threshold);
1430 #undef DEBUG_PRINT_STAT
1431 }
1432 #endif
1433 
1434 /// \brief Test that two functions either have or have not the given attribute
1435 ///        at the same time.
1436 template <typename AttrKind>
1437 static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
1438   return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr);
1439 }
1440 
1441 /// \brief Test that there are no attribute conflicts between Caller and Callee
1442 ///        that prevent inlining.
1443 static bool functionsHaveCompatibleAttributes(Function *Caller,
1444                                               Function *Callee,
1445                                               TargetTransformInfo &TTI) {
1446   return TTI.areInlineCompatible(Caller, Callee) &&
1447          AttributeFuncs::areInlineCompatible(*Caller, *Callee);
1448 }
1449 
1450 InlineCost llvm::getInlineCost(
1451     CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
1452     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1453     ProfileSummaryInfo *PSI) {
1454   return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
1455                        GetAssumptionCache, PSI);
1456 }
1457 
1458 InlineCost llvm::getInlineCost(
1459     CallSite CS, Function *Callee, const InlineParams &Params,
1460     TargetTransformInfo &CalleeTTI,
1461     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1462     ProfileSummaryInfo *PSI) {
1463 
1464   // Cannot inline indirect calls.
1465   if (!Callee)
1466     return llvm::InlineCost::getNever();
1467 
1468   // Calls to functions with always-inline attributes should be inlined
1469   // whenever possible.
1470   if (CS.hasFnAttr(Attribute::AlwaysInline)) {
1471     if (isInlineViable(*Callee))
1472       return llvm::InlineCost::getAlways();
1473     return llvm::InlineCost::getNever();
1474   }
1475 
1476   // Never inline functions with conflicting attributes (unless callee has
1477   // always-inline attribute).
1478   if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, CalleeTTI))
1479     return llvm::InlineCost::getNever();
1480 
1481   // Don't inline this call if the caller has the optnone attribute.
1482   if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
1483     return llvm::InlineCost::getNever();
1484 
1485   // Don't inline functions which can be interposed at link-time.  Don't inline
1486   // functions marked noinline or call sites marked noinline.
1487   // Note: inlining non-exact non-interposable fucntions is fine, since we know
1488   // we have *a* correct implementation of the source level function.
1489   if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
1490       CS.isNoInline())
1491     return llvm::InlineCost::getNever();
1492 
1493   DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
1494                      << "...\n");
1495 
1496   CallAnalyzer CA(CalleeTTI, GetAssumptionCache, PSI, *Callee, CS, Params);
1497   bool ShouldInline = CA.analyzeCall(CS);
1498 
1499   DEBUG(CA.dump());
1500 
1501   // Check if there was a reason to force inlining or no inlining.
1502   if (!ShouldInline && CA.getCost() < CA.getThreshold())
1503     return InlineCost::getNever();
1504   if (ShouldInline && CA.getCost() >= CA.getThreshold())
1505     return InlineCost::getAlways();
1506 
1507   return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
1508 }
1509 
1510 bool llvm::isInlineViable(Function &F) {
1511   bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
1512   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
1513     // Disallow inlining of functions which contain indirect branches or
1514     // blockaddresses.
1515     if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
1516       return false;
1517 
1518     for (auto &II : *BI) {
1519       CallSite CS(&II);
1520       if (!CS)
1521         continue;
1522 
1523       // Disallow recursive calls.
1524       if (&F == CS.getCalledFunction())
1525         return false;
1526 
1527       // Disallow calls which expose returns-twice to a function not previously
1528       // attributed as such.
1529       if (!ReturnsTwice && CS.isCall() &&
1530           cast<CallInst>(CS.getInstruction())->canReturnTwice())
1531         return false;
1532 
1533       // Disallow inlining functions that call @llvm.localescape. Doing this
1534       // correctly would require major changes to the inliner.
1535       if (CS.getCalledFunction() &&
1536           CS.getCalledFunction()->getIntrinsicID() ==
1537               llvm::Intrinsic::localescape)
1538         return false;
1539     }
1540   }
1541 
1542   return true;
1543 }
1544 
1545 // APIs to create InlineParams based on command line flags and/or other
1546 // parameters.
1547 
1548 InlineParams llvm::getInlineParams(int Threshold) {
1549   InlineParams Params;
1550 
1551   // This field is the threshold to use for a callee by default. This is
1552   // derived from one or more of:
1553   //  * optimization or size-optimization levels,
1554   //  * a value passed to createFunctionInliningPass function, or
1555   //  * the -inline-threshold flag.
1556   //  If the -inline-threshold flag is explicitly specified, that is used
1557   //  irrespective of anything else.
1558   if (InlineThreshold.getNumOccurrences() > 0)
1559     Params.DefaultThreshold = InlineThreshold;
1560   else
1561     Params.DefaultThreshold = Threshold;
1562 
1563   // Set the HintThreshold knob from the -inlinehint-threshold.
1564   Params.HintThreshold = HintThreshold;
1565 
1566   // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
1567   Params.HotCallSiteThreshold = HotCallSiteThreshold;
1568 
1569   // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
1570   // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
1571   // -inlinehint-threshold commandline option is not explicitly given. If that
1572   // option is present, then its value applies even for callees with size and
1573   // minsize attributes.
1574   // If the -inline-threshold is not specified, set the ColdThreshold from the
1575   // -inlinecold-threshold even if it is not explicitly passed. If
1576   // -inline-threshold is specified, then -inlinecold-threshold needs to be
1577   // explicitly specified to set the ColdThreshold knob
1578   if (InlineThreshold.getNumOccurrences() == 0) {
1579     Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold;
1580     Params.OptSizeThreshold = InlineConstants::OptSizeThreshold;
1581     Params.ColdThreshold = ColdThreshold;
1582   } else if (ColdThreshold.getNumOccurrences() > 0) {
1583     Params.ColdThreshold = ColdThreshold;
1584   }
1585   return Params;
1586 }
1587 
1588 InlineParams llvm::getInlineParams() {
1589   return getInlineParams(InlineThreshold);
1590 }
1591 
1592 // Compute the default threshold for inlining based on the opt level and the
1593 // size opt level.
1594 static int computeThresholdFromOptLevels(unsigned OptLevel,
1595                                          unsigned SizeOptLevel) {
1596   if (OptLevel > 2)
1597     return InlineConstants::OptAggressiveThreshold;
1598   if (SizeOptLevel == 1) // -Os
1599     return InlineConstants::OptSizeThreshold;
1600   if (SizeOptLevel == 2) // -Oz
1601     return InlineConstants::OptMinSizeThreshold;
1602   return InlineThreshold;
1603 }
1604 
1605 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
1606   return getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
1607 }
1608