1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements inline cost analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Analysis/InlineCost.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/AssumptionCache.h"
21 #include "llvm/Analysis/BlockFrequencyInfo.h"
22 #include "llvm/Analysis/CodeMetrics.h"
23 #include "llvm/Analysis/ConstantFolding.h"
24 #include "llvm/Analysis/InstructionSimplify.h"
25 #include "llvm/Analysis/ProfileSummaryInfo.h"
26 #include "llvm/Analysis/TargetTransformInfo.h"
27 #include "llvm/IR/CallSite.h"
28 #include "llvm/IR/CallingConv.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/GetElementPtrTypeIterator.h"
31 #include "llvm/IR/GlobalAlias.h"
32 #include "llvm/IR/InstVisitor.h"
33 #include "llvm/IR/IntrinsicInst.h"
34 #include "llvm/IR/Operator.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/raw_ostream.h"
37 
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "inline-cost"
41 
42 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
43 
44 static cl::opt<int> InlineThreshold(
45     "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
46     cl::desc("Control the amount of inlining to perform (default = 225)"));
47 
48 static cl::opt<int> HintThreshold(
49     "inlinehint-threshold", cl::Hidden, cl::init(325),
50     cl::desc("Threshold for inlining functions with inline hint"));
51 
52 static cl::opt<int>
53     ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
54                           cl::init(45),
55                           cl::desc("Threshold for inlining cold callsites"));
56 
57 static cl::opt<bool>
58     EnableGenericSwitchCost("inline-generic-switch-cost", cl::Hidden,
59                             cl::init(false),
60                             cl::desc("Enable generic switch cost model"));
61 
62 // We introduce this threshold to help performance of instrumentation based
63 // PGO before we actually hook up inliner with analysis passes such as BPI and
64 // BFI.
65 static cl::opt<int> ColdThreshold(
66     "inlinecold-threshold", cl::Hidden, cl::init(45),
67     cl::desc("Threshold for inlining functions with cold attribute"));
68 
69 static cl::opt<int>
70     HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
71                          cl::ZeroOrMore,
72                          cl::desc("Threshold for hot callsites "));
73 
74 namespace {
75 
76 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
77   typedef InstVisitor<CallAnalyzer, bool> Base;
78   friend class InstVisitor<CallAnalyzer, bool>;
79 
80   /// The TargetTransformInfo available for this compilation.
81   const TargetTransformInfo &TTI;
82 
83   /// Getter for the cache of @llvm.assume intrinsics.
84   std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
85 
86   /// Getter for BlockFrequencyInfo
87   Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI;
88 
89   /// Profile summary information.
90   ProfileSummaryInfo *PSI;
91 
92   /// The called function.
93   Function &F;
94 
95   // Cache the DataLayout since we use it a lot.
96   const DataLayout &DL;
97 
98   /// The candidate callsite being analyzed. Please do not use this to do
99   /// analysis in the caller function; we want the inline cost query to be
100   /// easily cacheable. Instead, use the cover function paramHasAttr.
101   CallSite CandidateCS;
102 
103   /// Tunable parameters that control the analysis.
104   const InlineParams &Params;
105 
106   int Threshold;
107   int Cost;
108 
109   bool IsCallerRecursive;
110   bool IsRecursiveCall;
111   bool ExposesReturnsTwice;
112   bool HasDynamicAlloca;
113   bool ContainsNoDuplicateCall;
114   bool HasReturn;
115   bool HasIndirectBr;
116   bool HasFrameEscape;
117 
118   /// Number of bytes allocated statically by the callee.
119   uint64_t AllocatedSize;
120   unsigned NumInstructions, NumVectorInstructions;
121   int FiftyPercentVectorBonus, TenPercentVectorBonus;
122   int VectorBonus;
123 
124   /// While we walk the potentially-inlined instructions, we build up and
125   /// maintain a mapping of simplified values specific to this callsite. The
126   /// idea is to propagate any special information we have about arguments to
127   /// this call through the inlinable section of the function, and account for
128   /// likely simplifications post-inlining. The most important aspect we track
129   /// is CFG altering simplifications -- when we prove a basic block dead, that
130   /// can cause dramatic shifts in the cost of inlining a function.
131   DenseMap<Value *, Constant *> SimplifiedValues;
132 
133   /// Keep track of the values which map back (through function arguments) to
134   /// allocas on the caller stack which could be simplified through SROA.
135   DenseMap<Value *, Value *> SROAArgValues;
136 
137   /// The mapping of caller Alloca values to their accumulated cost savings. If
138   /// we have to disable SROA for one of the allocas, this tells us how much
139   /// cost must be added.
140   DenseMap<Value *, int> SROAArgCosts;
141 
142   /// Keep track of values which map to a pointer base and constant offset.
143   DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
144 
145   // Custom simplification helper routines.
146   bool isAllocaDerivedArg(Value *V);
147   bool lookupSROAArgAndCost(Value *V, Value *&Arg,
148                             DenseMap<Value *, int>::iterator &CostIt);
149   void disableSROA(DenseMap<Value *, int>::iterator CostIt);
150   void disableSROA(Value *V);
151   void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
152                           int InstructionCost);
153   bool isGEPFree(GetElementPtrInst &GEP);
154   bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
155   bool simplifyCallSite(Function *F, CallSite CS);
156   template <typename Callable>
157   bool simplifyInstruction(Instruction &I, Callable Evaluate);
158   ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
159 
160   /// Return true if the given argument to the function being considered for
161   /// inlining has the given attribute set either at the call site or the
162   /// function declaration.  Primarily used to inspect call site specific
163   /// attributes since these can be more precise than the ones on the callee
164   /// itself.
165   bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
166 
167   /// Return true if the given value is known non null within the callee if
168   /// inlined through this particular callsite.
169   bool isKnownNonNullInCallee(Value *V);
170 
171   /// Update Threshold based on callsite properties such as callee
172   /// attributes and callee hotness for PGO builds. The Callee is explicitly
173   /// passed to support analyzing indirect calls whose target is inferred by
174   /// analysis.
175   void updateThreshold(CallSite CS, Function &Callee);
176 
177   /// Return true if size growth is allowed when inlining the callee at CS.
178   bool allowSizeGrowth(CallSite CS);
179 
180   // Custom analysis routines.
181   bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
182 
183   // Disable several entry points to the visitor so we don't accidentally use
184   // them by declaring but not defining them here.
185   void visit(Module *);
186   void visit(Module &);
187   void visit(Function *);
188   void visit(Function &);
189   void visit(BasicBlock *);
190   void visit(BasicBlock &);
191 
192   // Provide base case for our instruction visit.
193   bool visitInstruction(Instruction &I);
194 
195   // Our visit overrides.
196   bool visitAlloca(AllocaInst &I);
197   bool visitPHI(PHINode &I);
198   bool visitGetElementPtr(GetElementPtrInst &I);
199   bool visitBitCast(BitCastInst &I);
200   bool visitPtrToInt(PtrToIntInst &I);
201   bool visitIntToPtr(IntToPtrInst &I);
202   bool visitCastInst(CastInst &I);
203   bool visitUnaryInstruction(UnaryInstruction &I);
204   bool visitCmpInst(CmpInst &I);
205   bool visitSub(BinaryOperator &I);
206   bool visitBinaryOperator(BinaryOperator &I);
207   bool visitLoad(LoadInst &I);
208   bool visitStore(StoreInst &I);
209   bool visitExtractValue(ExtractValueInst &I);
210   bool visitInsertValue(InsertValueInst &I);
211   bool visitCallSite(CallSite CS);
212   bool visitReturnInst(ReturnInst &RI);
213   bool visitBranchInst(BranchInst &BI);
214   bool visitSwitchInst(SwitchInst &SI);
215   bool visitIndirectBrInst(IndirectBrInst &IBI);
216   bool visitResumeInst(ResumeInst &RI);
217   bool visitCleanupReturnInst(CleanupReturnInst &RI);
218   bool visitCatchReturnInst(CatchReturnInst &RI);
219   bool visitUnreachableInst(UnreachableInst &I);
220 
221 public:
222   CallAnalyzer(const TargetTransformInfo &TTI,
223                std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
224                Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
225                ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg,
226                const InlineParams &Params)
227       : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
228         PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()),
229         CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
230         Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
231         ExposesReturnsTwice(false), HasDynamicAlloca(false),
232         ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
233         HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
234         NumVectorInstructions(0), FiftyPercentVectorBonus(0),
235         TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
236         NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
237         NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
238         SROACostSavings(0), SROACostSavingsLost(0) {}
239 
240   bool analyzeCall(CallSite CS);
241 
242   int getThreshold() { return Threshold; }
243   int getCost() { return Cost; }
244 
245   // Keep a bunch of stats about the cost savings found so we can print them
246   // out when debugging.
247   unsigned NumConstantArgs;
248   unsigned NumConstantOffsetPtrArgs;
249   unsigned NumAllocaArgs;
250   unsigned NumConstantPtrCmps;
251   unsigned NumConstantPtrDiffs;
252   unsigned NumInstructionsSimplified;
253   unsigned SROACostSavings;
254   unsigned SROACostSavingsLost;
255 
256   void dump();
257 };
258 
259 } // namespace
260 
261 /// \brief Test whether the given value is an Alloca-derived function argument.
262 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
263   return SROAArgValues.count(V);
264 }
265 
266 /// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
267 /// Returns false if V does not map to a SROA-candidate.
268 bool CallAnalyzer::lookupSROAArgAndCost(
269     Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
270   if (SROAArgValues.empty() || SROAArgCosts.empty())
271     return false;
272 
273   DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
274   if (ArgIt == SROAArgValues.end())
275     return false;
276 
277   Arg = ArgIt->second;
278   CostIt = SROAArgCosts.find(Arg);
279   return CostIt != SROAArgCosts.end();
280 }
281 
282 /// \brief Disable SROA for the candidate marked by this cost iterator.
283 ///
284 /// This marks the candidate as no longer viable for SROA, and adds the cost
285 /// savings associated with it back into the inline cost measurement.
286 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
287   // If we're no longer able to perform SROA we need to undo its cost savings
288   // and prevent subsequent analysis.
289   Cost += CostIt->second;
290   SROACostSavings -= CostIt->second;
291   SROACostSavingsLost += CostIt->second;
292   SROAArgCosts.erase(CostIt);
293 }
294 
295 /// \brief If 'V' maps to a SROA candidate, disable SROA for it.
296 void CallAnalyzer::disableSROA(Value *V) {
297   Value *SROAArg;
298   DenseMap<Value *, int>::iterator CostIt;
299   if (lookupSROAArgAndCost(V, SROAArg, CostIt))
300     disableSROA(CostIt);
301 }
302 
303 /// \brief Accumulate the given cost for a particular SROA candidate.
304 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
305                                       int InstructionCost) {
306   CostIt->second += InstructionCost;
307   SROACostSavings += InstructionCost;
308 }
309 
310 /// \brief Accumulate a constant GEP offset into an APInt if possible.
311 ///
312 /// Returns false if unable to compute the offset for any reason. Respects any
313 /// simplified values known during the analysis of this callsite.
314 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
315   unsigned IntPtrWidth = DL.getPointerSizeInBits();
316   assert(IntPtrWidth == Offset.getBitWidth());
317 
318   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
319        GTI != GTE; ++GTI) {
320     ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
321     if (!OpC)
322       if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
323         OpC = dyn_cast<ConstantInt>(SimpleOp);
324     if (!OpC)
325       return false;
326     if (OpC->isZero())
327       continue;
328 
329     // Handle a struct index, which adds its field offset to the pointer.
330     if (StructType *STy = GTI.getStructTypeOrNull()) {
331       unsigned ElementIdx = OpC->getZExtValue();
332       const StructLayout *SL = DL.getStructLayout(STy);
333       Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
334       continue;
335     }
336 
337     APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
338     Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
339   }
340   return true;
341 }
342 
343 /// \brief Use TTI to check whether a GEP is free.
344 ///
345 /// Respects any simplified values known during the analysis of this callsite.
346 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
347   SmallVector<Value *, 4> Indices;
348   for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
349     if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
350        Indices.push_back(SimpleOp);
351      else
352        Indices.push_back(*I);
353   return TargetTransformInfo::TCC_Free ==
354          TTI.getGEPCost(GEP.getSourceElementType(), GEP.getPointerOperand(),
355                         Indices);
356 }
357 
358 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
359   // Check whether inlining will turn a dynamic alloca into a static
360   // alloca and handle that case.
361   if (I.isArrayAllocation()) {
362     Constant *Size = SimplifiedValues.lookup(I.getArraySize());
363     if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
364       Type *Ty = I.getAllocatedType();
365       AllocatedSize = SaturatingMultiplyAdd(
366           AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
367       return Base::visitAlloca(I);
368     }
369   }
370 
371   // Accumulate the allocated size.
372   if (I.isStaticAlloca()) {
373     Type *Ty = I.getAllocatedType();
374     AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
375   }
376 
377   // We will happily inline static alloca instructions.
378   if (I.isStaticAlloca())
379     return Base::visitAlloca(I);
380 
381   // FIXME: This is overly conservative. Dynamic allocas are inefficient for
382   // a variety of reasons, and so we would like to not inline them into
383   // functions which don't currently have a dynamic alloca. This simply
384   // disables inlining altogether in the presence of a dynamic alloca.
385   HasDynamicAlloca = true;
386   return false;
387 }
388 
389 bool CallAnalyzer::visitPHI(PHINode &I) {
390   // FIXME: We should potentially be tracking values through phi nodes,
391   // especially when they collapse to a single value due to deleted CFG edges
392   // during inlining.
393 
394   // FIXME: We need to propagate SROA *disabling* through phi nodes, even
395   // though we don't want to propagate it's bonuses. The idea is to disable
396   // SROA if it *might* be used in an inappropriate manner.
397 
398   // Phi nodes are always zero-cost.
399   return true;
400 }
401 
402 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
403   Value *SROAArg;
404   DenseMap<Value *, int>::iterator CostIt;
405   bool SROACandidate =
406       lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
407 
408   // Try to fold GEPs of constant-offset call site argument pointers. This
409   // requires target data and inbounds GEPs.
410   if (I.isInBounds()) {
411     // Check if we have a base + offset for the pointer.
412     Value *Ptr = I.getPointerOperand();
413     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
414     if (BaseAndOffset.first) {
415       // Check if the offset of this GEP is constant, and if so accumulate it
416       // into Offset.
417       if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
418         // Non-constant GEPs aren't folded, and disable SROA.
419         if (SROACandidate)
420           disableSROA(CostIt);
421         return isGEPFree(I);
422       }
423 
424       // Add the result as a new mapping to Base + Offset.
425       ConstantOffsetPtrs[&I] = BaseAndOffset;
426 
427       // Also handle SROA candidates here, we already know that the GEP is
428       // all-constant indexed.
429       if (SROACandidate)
430         SROAArgValues[&I] = SROAArg;
431 
432       return true;
433     }
434   }
435 
436   // Lambda to check whether a GEP's indices are all constant.
437   auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
438     for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
439       if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
440         return false;
441     return true;
442   };
443 
444   if (IsGEPOffsetConstant(I)) {
445     if (SROACandidate)
446       SROAArgValues[&I] = SROAArg;
447 
448     // Constant GEPs are modeled as free.
449     return true;
450   }
451 
452   // Variable GEPs will require math and will disable SROA.
453   if (SROACandidate)
454     disableSROA(CostIt);
455   return isGEPFree(I);
456 }
457 
458 /// Simplify \p I if its operands are constants and update SimplifiedValues.
459 /// \p Evaluate is a callable specific to instruction type that evaluates the
460 /// instruction when all the operands are constants.
461 template <typename Callable>
462 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
463   SmallVector<Constant *, 2> COps;
464   for (Value *Op : I.operands()) {
465     Constant *COp = dyn_cast<Constant>(Op);
466     if (!COp)
467       COp = SimplifiedValues.lookup(Op);
468     if (!COp)
469       return false;
470     COps.push_back(COp);
471   }
472   auto *C = Evaluate(COps);
473   if (!C)
474     return false;
475   SimplifiedValues[&I] = C;
476   return true;
477 }
478 
479 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
480   // Propagate constants through bitcasts.
481   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
482         return ConstantExpr::getBitCast(COps[0], I.getType());
483       }))
484     return true;
485 
486   // Track base/offsets through casts
487   std::pair<Value *, APInt> BaseAndOffset =
488       ConstantOffsetPtrs.lookup(I.getOperand(0));
489   // Casts don't change the offset, just wrap it up.
490   if (BaseAndOffset.first)
491     ConstantOffsetPtrs[&I] = BaseAndOffset;
492 
493   // Also look for SROA candidates here.
494   Value *SROAArg;
495   DenseMap<Value *, int>::iterator CostIt;
496   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
497     SROAArgValues[&I] = SROAArg;
498 
499   // Bitcasts are always zero cost.
500   return true;
501 }
502 
503 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
504   // Propagate constants through ptrtoint.
505   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
506         return ConstantExpr::getPtrToInt(COps[0], I.getType());
507       }))
508     return true;
509 
510   // Track base/offset pairs when converted to a plain integer provided the
511   // integer is large enough to represent the pointer.
512   unsigned IntegerSize = I.getType()->getScalarSizeInBits();
513   if (IntegerSize >= DL.getPointerSizeInBits()) {
514     std::pair<Value *, APInt> BaseAndOffset =
515         ConstantOffsetPtrs.lookup(I.getOperand(0));
516     if (BaseAndOffset.first)
517       ConstantOffsetPtrs[&I] = BaseAndOffset;
518   }
519 
520   // This is really weird. Technically, ptrtoint will disable SROA. However,
521   // unless that ptrtoint is *used* somewhere in the live basic blocks after
522   // inlining, it will be nuked, and SROA should proceed. All of the uses which
523   // would block SROA would also block SROA if applied directly to a pointer,
524   // and so we can just add the integer in here. The only places where SROA is
525   // preserved either cannot fire on an integer, or won't in-and-of themselves
526   // disable SROA (ext) w/o some later use that we would see and disable.
527   Value *SROAArg;
528   DenseMap<Value *, int>::iterator CostIt;
529   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
530     SROAArgValues[&I] = SROAArg;
531 
532   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
533 }
534 
535 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
536   // Propagate constants through ptrtoint.
537   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
538         return ConstantExpr::getIntToPtr(COps[0], I.getType());
539       }))
540     return true;
541 
542   // Track base/offset pairs when round-tripped through a pointer without
543   // modifications provided the integer is not too large.
544   Value *Op = I.getOperand(0);
545   unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
546   if (IntegerSize <= DL.getPointerSizeInBits()) {
547     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
548     if (BaseAndOffset.first)
549       ConstantOffsetPtrs[&I] = BaseAndOffset;
550   }
551 
552   // "Propagate" SROA here in the same manner as we do for ptrtoint above.
553   Value *SROAArg;
554   DenseMap<Value *, int>::iterator CostIt;
555   if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
556     SROAArgValues[&I] = SROAArg;
557 
558   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
559 }
560 
561 bool CallAnalyzer::visitCastInst(CastInst &I) {
562   // Propagate constants through ptrtoint.
563   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
564         return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
565       }))
566     return true;
567 
568   // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
569   disableSROA(I.getOperand(0));
570 
571   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
572 }
573 
574 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
575   Value *Operand = I.getOperand(0);
576   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
577         return ConstantFoldInstOperands(&I, COps[0], DL);
578       }))
579     return true;
580 
581   // Disable any SROA on the argument to arbitrary unary operators.
582   disableSROA(Operand);
583 
584   return false;
585 }
586 
587 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
588   return CandidateCS.paramHasAttr(A->getArgNo(), Attr);
589 }
590 
591 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
592   // Does the *call site* have the NonNull attribute set on an argument?  We
593   // use the attribute on the call site to memoize any analysis done in the
594   // caller. This will also trip if the callee function has a non-null
595   // parameter attribute, but that's a less interesting case because hopefully
596   // the callee would already have been simplified based on that.
597   if (Argument *A = dyn_cast<Argument>(V))
598     if (paramHasAttr(A, Attribute::NonNull))
599       return true;
600 
601   // Is this an alloca in the caller?  This is distinct from the attribute case
602   // above because attributes aren't updated within the inliner itself and we
603   // always want to catch the alloca derived case.
604   if (isAllocaDerivedArg(V))
605     // We can actually predict the result of comparisons between an
606     // alloca-derived value and null. Note that this fires regardless of
607     // SROA firing.
608     return true;
609 
610   return false;
611 }
612 
613 bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
614   // If the normal destination of the invoke or the parent block of the call
615   // site is unreachable-terminated, there is little point in inlining this
616   // unless there is literally zero cost.
617   // FIXME: Note that it is possible that an unreachable-terminated block has a
618   // hot entry. For example, in below scenario inlining hot_call_X() may be
619   // beneficial :
620   // main() {
621   //   hot_call_1();
622   //   ...
623   //   hot_call_N()
624   //   exit(0);
625   // }
626   // For now, we are not handling this corner case here as it is rare in real
627   // code. In future, we should elaborate this based on BPI and BFI in more
628   // general threshold adjusting heuristics in updateThreshold().
629   Instruction *Instr = CS.getInstruction();
630   if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
631     if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
632       return false;
633   } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator()))
634     return false;
635 
636   return true;
637 }
638 
639 void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
640   // If no size growth is allowed for this inlining, set Threshold to 0.
641   if (!allowSizeGrowth(CS)) {
642     Threshold = 0;
643     return;
644   }
645 
646   Function *Caller = CS.getCaller();
647 
648   // return min(A, B) if B is valid.
649   auto MinIfValid = [](int A, Optional<int> B) {
650     return B ? std::min(A, B.getValue()) : A;
651   };
652 
653   // return max(A, B) if B is valid.
654   auto MaxIfValid = [](int A, Optional<int> B) {
655     return B ? std::max(A, B.getValue()) : A;
656   };
657 
658   // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
659   // and reduce the threshold if the caller has the necessary attribute.
660   if (Caller->optForMinSize())
661     Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
662   else if (Caller->optForSize())
663     Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
664 
665   // Adjust the threshold based on inlinehint attribute and profile based
666   // hotness information if the caller does not have MinSize attribute.
667   if (!Caller->optForMinSize()) {
668     if (Callee.hasFnAttribute(Attribute::InlineHint))
669       Threshold = MaxIfValid(Threshold, Params.HintThreshold);
670     if (PSI) {
671       BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
672       // FIXME: After switching to the new passmanager, simplify the logic below
673       // by checking only the callsite hotness/coldness. The check for CallerBFI
674       // exists only because we do not have BFI available with the old PM.
675       //
676       // Use callee's hotness information only if we have no way of determining
677       // callsite's hotness information. Callsite hotness can be determined if
678       // sample profile is used (which adds hotness metadata to calls) or if
679       // caller's BlockFrequencyInfo is available.
680       if (CallerBFI || PSI->hasSampleProfile()) {
681         if (PSI->isHotCallSite(CS, CallerBFI)) {
682           DEBUG(dbgs() << "Hot callsite.\n");
683           Threshold = Params.HotCallSiteThreshold.getValue();
684         } else if (PSI->isColdCallSite(CS, CallerBFI)) {
685           DEBUG(dbgs() << "Cold callsite.\n");
686           Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
687         }
688       } else {
689         if (PSI->isFunctionEntryHot(&Callee)) {
690           DEBUG(dbgs() << "Hot callee.\n");
691           // If callsite hotness can not be determined, we may still know
692           // that the callee is hot and treat it as a weaker hint for threshold
693           // increase.
694           Threshold = MaxIfValid(Threshold, Params.HintThreshold);
695         } else if (PSI->isFunctionEntryCold(&Callee)) {
696           DEBUG(dbgs() << "Cold callee.\n");
697           Threshold = MinIfValid(Threshold, Params.ColdThreshold);
698         }
699       }
700     }
701   }
702 
703   // Finally, take the target-specific inlining threshold multiplier into
704   // account.
705   Threshold *= TTI.getInliningThresholdMultiplier();
706 }
707 
708 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
709   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
710   // First try to handle simplified comparisons.
711   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
712         return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
713       }))
714     return true;
715 
716   if (I.getOpcode() == Instruction::FCmp)
717     return false;
718 
719   // Otherwise look for a comparison between constant offset pointers with
720   // a common base.
721   Value *LHSBase, *RHSBase;
722   APInt LHSOffset, RHSOffset;
723   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
724   if (LHSBase) {
725     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
726     if (RHSBase && LHSBase == RHSBase) {
727       // We have common bases, fold the icmp to a constant based on the
728       // offsets.
729       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
730       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
731       if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
732         SimplifiedValues[&I] = C;
733         ++NumConstantPtrCmps;
734         return true;
735       }
736     }
737   }
738 
739   // If the comparison is an equality comparison with null, we can simplify it
740   // if we know the value (argument) can't be null
741   if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
742       isKnownNonNullInCallee(I.getOperand(0))) {
743     bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
744     SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
745                                       : ConstantInt::getFalse(I.getType());
746     return true;
747   }
748   // Finally check for SROA candidates in comparisons.
749   Value *SROAArg;
750   DenseMap<Value *, int>::iterator CostIt;
751   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
752     if (isa<ConstantPointerNull>(I.getOperand(1))) {
753       accumulateSROACost(CostIt, InlineConstants::InstrCost);
754       return true;
755     }
756 
757     disableSROA(CostIt);
758   }
759 
760   return false;
761 }
762 
763 bool CallAnalyzer::visitSub(BinaryOperator &I) {
764   // Try to handle a special case: we can fold computing the difference of two
765   // constant-related pointers.
766   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
767   Value *LHSBase, *RHSBase;
768   APInt LHSOffset, RHSOffset;
769   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
770   if (LHSBase) {
771     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
772     if (RHSBase && LHSBase == RHSBase) {
773       // We have common bases, fold the subtract to a constant based on the
774       // offsets.
775       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
776       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
777       if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
778         SimplifiedValues[&I] = C;
779         ++NumConstantPtrDiffs;
780         return true;
781       }
782     }
783   }
784 
785   // Otherwise, fall back to the generic logic for simplifying and handling
786   // instructions.
787   return Base::visitSub(I);
788 }
789 
790 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
791   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
792   auto Evaluate = [&](SmallVectorImpl<Constant *> &COps) {
793     Value *SimpleV = nullptr;
794     if (auto FI = dyn_cast<FPMathOperator>(&I))
795       SimpleV = SimplifyFPBinOp(I.getOpcode(), COps[0], COps[1],
796                                 FI->getFastMathFlags(), DL);
797     else
798       SimpleV = SimplifyBinOp(I.getOpcode(), COps[0], COps[1], DL);
799     return dyn_cast_or_null<Constant>(SimpleV);
800   };
801 
802   if (simplifyInstruction(I, Evaluate))
803     return true;
804 
805   // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
806   disableSROA(LHS);
807   disableSROA(RHS);
808 
809   return false;
810 }
811 
812 bool CallAnalyzer::visitLoad(LoadInst &I) {
813   Value *SROAArg;
814   DenseMap<Value *, int>::iterator CostIt;
815   if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
816     if (I.isSimple()) {
817       accumulateSROACost(CostIt, InlineConstants::InstrCost);
818       return true;
819     }
820 
821     disableSROA(CostIt);
822   }
823 
824   return false;
825 }
826 
827 bool CallAnalyzer::visitStore(StoreInst &I) {
828   Value *SROAArg;
829   DenseMap<Value *, int>::iterator CostIt;
830   if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
831     if (I.isSimple()) {
832       accumulateSROACost(CostIt, InlineConstants::InstrCost);
833       return true;
834     }
835 
836     disableSROA(CostIt);
837   }
838 
839   return false;
840 }
841 
842 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
843   // Constant folding for extract value is trivial.
844   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
845         return ConstantExpr::getExtractValue(COps[0], I.getIndices());
846       }))
847     return true;
848 
849   // SROA can look through these but give them a cost.
850   return false;
851 }
852 
853 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
854   // Constant folding for insert value is trivial.
855   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
856         return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
857                                             /*InsertedValueOperand*/ COps[1],
858                                             I.getIndices());
859       }))
860     return true;
861 
862   // SROA can look through these but give them a cost.
863   return false;
864 }
865 
866 /// \brief Try to simplify a call site.
867 ///
868 /// Takes a concrete function and callsite and tries to actually simplify it by
869 /// analyzing the arguments and call itself with instsimplify. Returns true if
870 /// it has simplified the callsite to some other entity (a constant), making it
871 /// free.
872 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
873   // FIXME: Using the instsimplify logic directly for this is inefficient
874   // because we have to continually rebuild the argument list even when no
875   // simplifications can be performed. Until that is fixed with remapping
876   // inside of instsimplify, directly constant fold calls here.
877   if (!canConstantFoldCallTo(F))
878     return false;
879 
880   // Try to re-map the arguments to constants.
881   SmallVector<Constant *, 4> ConstantArgs;
882   ConstantArgs.reserve(CS.arg_size());
883   for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E;
884        ++I) {
885     Constant *C = dyn_cast<Constant>(*I);
886     if (!C)
887       C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
888     if (!C)
889       return false; // This argument doesn't map to a constant.
890 
891     ConstantArgs.push_back(C);
892   }
893   if (Constant *C = ConstantFoldCall(F, ConstantArgs)) {
894     SimplifiedValues[CS.getInstruction()] = C;
895     return true;
896   }
897 
898   return false;
899 }
900 
901 bool CallAnalyzer::visitCallSite(CallSite CS) {
902   if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
903       !F.hasFnAttribute(Attribute::ReturnsTwice)) {
904     // This aborts the entire analysis.
905     ExposesReturnsTwice = true;
906     return false;
907   }
908   if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate())
909     ContainsNoDuplicateCall = true;
910 
911   if (Function *F = CS.getCalledFunction()) {
912     // When we have a concrete function, first try to simplify it directly.
913     if (simplifyCallSite(F, CS))
914       return true;
915 
916     // Next check if it is an intrinsic we know about.
917     // FIXME: Lift this into part of the InstVisitor.
918     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
919       switch (II->getIntrinsicID()) {
920       default:
921         return Base::visitCallSite(CS);
922 
923       case Intrinsic::load_relative:
924         // This is normally lowered to 4 LLVM instructions.
925         Cost += 3 * InlineConstants::InstrCost;
926         return false;
927 
928       case Intrinsic::memset:
929       case Intrinsic::memcpy:
930       case Intrinsic::memmove:
931         // SROA can usually chew through these intrinsics, but they aren't free.
932         return false;
933       case Intrinsic::localescape:
934         HasFrameEscape = true;
935         return false;
936       }
937     }
938 
939     if (F == CS.getInstruction()->getParent()->getParent()) {
940       // This flag will fully abort the analysis, so don't bother with anything
941       // else.
942       IsRecursiveCall = true;
943       return false;
944     }
945 
946     if (TTI.isLoweredToCall(F)) {
947       // We account for the average 1 instruction per call argument setup
948       // here.
949       Cost += CS.arg_size() * InlineConstants::InstrCost;
950 
951       // Everything other than inline ASM will also have a significant cost
952       // merely from making the call.
953       if (!isa<InlineAsm>(CS.getCalledValue()))
954         Cost += InlineConstants::CallPenalty;
955     }
956 
957     return Base::visitCallSite(CS);
958   }
959 
960   // Otherwise we're in a very special case -- an indirect function call. See
961   // if we can be particularly clever about this.
962   Value *Callee = CS.getCalledValue();
963 
964   // First, pay the price of the argument setup. We account for the average
965   // 1 instruction per call argument setup here.
966   Cost += CS.arg_size() * InlineConstants::InstrCost;
967 
968   // Next, check if this happens to be an indirect function call to a known
969   // function in this inline context. If not, we've done all we can.
970   Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
971   if (!F)
972     return Base::visitCallSite(CS);
973 
974   // If we have a constant that we are calling as a function, we can peer
975   // through it and see the function target. This happens not infrequently
976   // during devirtualization and so we want to give it a hefty bonus for
977   // inlining, but cap that bonus in the event that inlining wouldn't pan
978   // out. Pretend to inline the function, with a custom threshold.
979   auto IndirectCallParams = Params;
980   IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
981   CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, *F, CS,
982                   IndirectCallParams);
983   if (CA.analyzeCall(CS)) {
984     // We were able to inline the indirect call! Subtract the cost from the
985     // threshold to get the bonus we want to apply, but don't go below zero.
986     Cost -= std::max(0, CA.getThreshold() - CA.getCost());
987   }
988 
989   return Base::visitCallSite(CS);
990 }
991 
992 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
993   // At least one return instruction will be free after inlining.
994   bool Free = !HasReturn;
995   HasReturn = true;
996   return Free;
997 }
998 
999 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
1000   // We model unconditional branches as essentially free -- they really
1001   // shouldn't exist at all, but handling them makes the behavior of the
1002   // inliner more regular and predictable. Interestingly, conditional branches
1003   // which will fold away are also free.
1004   return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
1005          dyn_cast_or_null<ConstantInt>(
1006              SimplifiedValues.lookup(BI.getCondition()));
1007 }
1008 
1009 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
1010   // We model unconditional switches as free, see the comments on handling
1011   // branches.
1012   if (isa<ConstantInt>(SI.getCondition()))
1013     return true;
1014   if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
1015     if (isa<ConstantInt>(V))
1016       return true;
1017 
1018   if (EnableGenericSwitchCost) {
1019     // Assume the most general case where the swith is lowered into
1020     // either a jump table, bit test, or a balanced binary tree consisting of
1021     // case clusters without merging adjacent clusters with the same
1022     // destination. We do not consider the switches that are lowered with a mix
1023     // of jump table/bit test/binary search tree. The cost of the switch is
1024     // proportional to the size of the tree or the size of jump table range.
1025 
1026     // Exit early for a large switch, assuming one case needs at least one
1027     // instruction.
1028     // FIXME: This is not true for a bit test, but ignore such case for now to
1029     // save compile-time.
1030     int64_t CostLowerBound =
1031         std::min((int64_t)INT_MAX,
1032                  (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
1033 
1034     if (CostLowerBound > Threshold) {
1035       Cost = CostLowerBound;
1036       return false;
1037     }
1038 
1039     unsigned JumpTableSize = 0;
1040     unsigned NumCaseCluster =
1041         TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
1042 
1043     // If suitable for a jump table, consider the cost for the table size and
1044     // branch to destination.
1045     if (JumpTableSize) {
1046       int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
1047                        4 * InlineConstants::InstrCost;
1048       Cost = std::min((int64_t)INT_MAX, JTCost + Cost);
1049       return false;
1050     }
1051 
1052     // Considering forming a binary search, we should find the number of nodes
1053     // which is same as the number of comparisons when lowered. For a given
1054     // number of clusters, n, we can define a recursive function, f(n), to find
1055     // the number of nodes in the tree. The recursion is :
1056     // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
1057     // and f(n) = n, when n <= 3.
1058     // This will lead a binary tree where the leaf should be either f(2) or f(3)
1059     // when n > 3.  So, the number of comparisons from leaves should be n, while
1060     // the number of non-leaf should be :
1061     //   2^(log2(n) - 1) - 1
1062     //   = 2^log2(n) * 2^-1 - 1
1063     //   = n / 2 - 1.
1064     // Considering comparisons from leaf and non-leaf nodes, we can estimate the
1065     // number of comparisons in a simple closed form :
1066     //   n + n / 2 - 1 = n * 3 / 2 - 1
1067     if (NumCaseCluster <= 3) {
1068       // Suppose a comparison includes one compare and one conditional branch.
1069       Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
1070       return false;
1071     }
1072     int64_t ExpectedNumberOfCompare = 3 * (uint64_t)NumCaseCluster / 2 - 1;
1073     uint64_t SwitchCost =
1074         ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
1075     Cost = std::min((uint64_t)INT_MAX, SwitchCost + Cost);
1076     return false;
1077   }
1078 
1079   // Use a simple switch cost model where we accumulate a cost proportional to
1080   // the number of distinct successor blocks. This fan-out in the CFG cannot
1081   // be represented for free even if we can represent the core switch as a
1082   // jumptable that takes a single instruction.
1083   ///
1084   // NB: We convert large switches which are just used to initialize large phi
1085   // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
1086   // inlining those. It will prevent inlining in cases where the optimization
1087   // does not (yet) fire.
1088   SmallPtrSet<BasicBlock *, 8> SuccessorBlocks;
1089   SuccessorBlocks.insert(SI.getDefaultDest());
1090   for (auto Case : SI.cases())
1091     SuccessorBlocks.insert(Case.getCaseSuccessor());
1092   // Add cost corresponding to the number of distinct destinations. The first
1093   // we model as free because of fallthrough.
1094   Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost;
1095   return false;
1096 }
1097 
1098 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
1099   // We never want to inline functions that contain an indirectbr.  This is
1100   // incorrect because all the blockaddress's (in static global initializers
1101   // for example) would be referring to the original function, and this
1102   // indirect jump would jump from the inlined copy of the function into the
1103   // original function which is extremely undefined behavior.
1104   // FIXME: This logic isn't really right; we can safely inline functions with
1105   // indirectbr's as long as no other function or global references the
1106   // blockaddress of a block within the current function.
1107   HasIndirectBr = true;
1108   return false;
1109 }
1110 
1111 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
1112   // FIXME: It's not clear that a single instruction is an accurate model for
1113   // the inline cost of a resume instruction.
1114   return false;
1115 }
1116 
1117 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
1118   // FIXME: It's not clear that a single instruction is an accurate model for
1119   // the inline cost of a cleanupret instruction.
1120   return false;
1121 }
1122 
1123 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
1124   // FIXME: It's not clear that a single instruction is an accurate model for
1125   // the inline cost of a catchret instruction.
1126   return false;
1127 }
1128 
1129 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
1130   // FIXME: It might be reasonably to discount the cost of instructions leading
1131   // to unreachable as they have the lowest possible impact on both runtime and
1132   // code size.
1133   return true; // No actual code is needed for unreachable.
1134 }
1135 
1136 bool CallAnalyzer::visitInstruction(Instruction &I) {
1137   // Some instructions are free. All of the free intrinsics can also be
1138   // handled by SROA, etc.
1139   if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
1140     return true;
1141 
1142   // We found something we don't understand or can't handle. Mark any SROA-able
1143   // values in the operand list as no longer viable.
1144   for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
1145     disableSROA(*OI);
1146 
1147   return false;
1148 }
1149 
1150 /// \brief Analyze a basic block for its contribution to the inline cost.
1151 ///
1152 /// This method walks the analyzer over every instruction in the given basic
1153 /// block and accounts for their cost during inlining at this callsite. It
1154 /// aborts early if the threshold has been exceeded or an impossible to inline
1155 /// construct has been detected. It returns false if inlining is no longer
1156 /// viable, and true if inlining remains viable.
1157 bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
1158                                 SmallPtrSetImpl<const Value *> &EphValues) {
1159   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
1160     // FIXME: Currently, the number of instructions in a function regardless of
1161     // our ability to simplify them during inline to constants or dead code,
1162     // are actually used by the vector bonus heuristic. As long as that's true,
1163     // we have to special case debug intrinsics here to prevent differences in
1164     // inlining due to debug symbols. Eventually, the number of unsimplified
1165     // instructions shouldn't factor into the cost computation, but until then,
1166     // hack around it here.
1167     if (isa<DbgInfoIntrinsic>(I))
1168       continue;
1169 
1170     // Skip ephemeral values.
1171     if (EphValues.count(&*I))
1172       continue;
1173 
1174     ++NumInstructions;
1175     if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
1176       ++NumVectorInstructions;
1177 
1178     // If the instruction is floating point, and the target says this operation
1179     // is expensive or the function has the "use-soft-float" attribute, this may
1180     // eventually become a library call. Treat the cost as such.
1181     if (I->getType()->isFloatingPointTy()) {
1182       // If the function has the "use-soft-float" attribute, mark it as
1183       // expensive.
1184       if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
1185           (F.getFnAttribute("use-soft-float").getValueAsString() == "true"))
1186         Cost += InlineConstants::CallPenalty;
1187     }
1188 
1189     // If the instruction simplified to a constant, there is no cost to this
1190     // instruction. Visit the instructions using our InstVisitor to account for
1191     // all of the per-instruction logic. The visit tree returns true if we
1192     // consumed the instruction in any way, and false if the instruction's base
1193     // cost should count against inlining.
1194     if (Base::visit(&*I))
1195       ++NumInstructionsSimplified;
1196     else
1197       Cost += InlineConstants::InstrCost;
1198 
1199     // If the visit this instruction detected an uninlinable pattern, abort.
1200     if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
1201         HasIndirectBr || HasFrameEscape)
1202       return false;
1203 
1204     // If the caller is a recursive function then we don't want to inline
1205     // functions which allocate a lot of stack space because it would increase
1206     // the caller stack usage dramatically.
1207     if (IsCallerRecursive &&
1208         AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
1209       return false;
1210 
1211     // Check if we've past the maximum possible threshold so we don't spin in
1212     // huge basic blocks that will never inline.
1213     if (Cost > Threshold)
1214       return false;
1215   }
1216 
1217   return true;
1218 }
1219 
1220 /// \brief Compute the base pointer and cumulative constant offsets for V.
1221 ///
1222 /// This strips all constant offsets off of V, leaving it the base pointer, and
1223 /// accumulates the total constant offset applied in the returned constant. It
1224 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
1225 /// no constant offsets applied.
1226 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
1227   if (!V->getType()->isPointerTy())
1228     return nullptr;
1229 
1230   unsigned IntPtrWidth = DL.getPointerSizeInBits();
1231   APInt Offset = APInt::getNullValue(IntPtrWidth);
1232 
1233   // Even though we don't look through PHI nodes, we could be called on an
1234   // instruction in an unreachable block, which may be on a cycle.
1235   SmallPtrSet<Value *, 4> Visited;
1236   Visited.insert(V);
1237   do {
1238     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
1239       if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
1240         return nullptr;
1241       V = GEP->getPointerOperand();
1242     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
1243       V = cast<Operator>(V)->getOperand(0);
1244     } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1245       if (GA->isInterposable())
1246         break;
1247       V = GA->getAliasee();
1248     } else {
1249       break;
1250     }
1251     assert(V->getType()->isPointerTy() && "Unexpected operand type!");
1252   } while (Visited.insert(V).second);
1253 
1254   Type *IntPtrTy = DL.getIntPtrType(V->getContext());
1255   return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
1256 }
1257 
1258 /// \brief Analyze a call site for potential inlining.
1259 ///
1260 /// Returns true if inlining this call is viable, and false if it is not
1261 /// viable. It computes the cost and adjusts the threshold based on numerous
1262 /// factors and heuristics. If this method returns false but the computed cost
1263 /// is below the computed threshold, then inlining was forcibly disabled by
1264 /// some artifact of the routine.
1265 bool CallAnalyzer::analyzeCall(CallSite CS) {
1266   ++NumCallsAnalyzed;
1267 
1268   // Perform some tweaks to the cost and threshold based on the direct
1269   // callsite information.
1270 
1271   // We want to more aggressively inline vector-dense kernels, so up the
1272   // threshold, and we'll lower it if the % of vector instructions gets too
1273   // low. Note that these bonuses are some what arbitrary and evolved over time
1274   // by accident as much as because they are principled bonuses.
1275   //
1276   // FIXME: It would be nice to remove all such bonuses. At least it would be
1277   // nice to base the bonus values on something more scientific.
1278   assert(NumInstructions == 0);
1279   assert(NumVectorInstructions == 0);
1280 
1281   // Update the threshold based on callsite properties
1282   updateThreshold(CS, F);
1283 
1284   FiftyPercentVectorBonus = 3 * Threshold / 2;
1285   TenPercentVectorBonus = 3 * Threshold / 4;
1286 
1287   // Track whether the post-inlining function would have more than one basic
1288   // block. A single basic block is often intended for inlining. Balloon the
1289   // threshold by 50% until we pass the single-BB phase.
1290   bool SingleBB = true;
1291   int SingleBBBonus = Threshold / 2;
1292 
1293   // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1294   // this Threshold any time, and cost cannot decrease, we can stop processing
1295   // the rest of the function body.
1296   Threshold += (SingleBBBonus + FiftyPercentVectorBonus);
1297 
1298   // Give out bonuses for the callsite, as the instructions setting them up
1299   // will be gone after inlining.
1300   Cost -= getCallsiteCost(CS, DL);
1301 
1302   // If there is only one call of the function, and it has internal linkage,
1303   // the cost of inlining it drops dramatically.
1304   bool OnlyOneCallAndLocalLinkage =
1305       F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
1306   if (OnlyOneCallAndLocalLinkage)
1307     Cost -= InlineConstants::LastCallToStaticBonus;
1308 
1309   // If this function uses the coldcc calling convention, prefer not to inline
1310   // it.
1311   if (F.getCallingConv() == CallingConv::Cold)
1312     Cost += InlineConstants::ColdccPenalty;
1313 
1314   // Check if we're done. This can happen due to bonuses and penalties.
1315   if (Cost > Threshold)
1316     return false;
1317 
1318   if (F.empty())
1319     return true;
1320 
1321   Function *Caller = CS.getInstruction()->getParent()->getParent();
1322   // Check if the caller function is recursive itself.
1323   for (User *U : Caller->users()) {
1324     CallSite Site(U);
1325     if (!Site)
1326       continue;
1327     Instruction *I = Site.getInstruction();
1328     if (I->getParent()->getParent() == Caller) {
1329       IsCallerRecursive = true;
1330       break;
1331     }
1332   }
1333 
1334   // Populate our simplified values by mapping from function arguments to call
1335   // arguments with known important simplifications.
1336   CallSite::arg_iterator CAI = CS.arg_begin();
1337   for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1338        FAI != FAE; ++FAI, ++CAI) {
1339     assert(CAI != CS.arg_end());
1340     if (Constant *C = dyn_cast<Constant>(CAI))
1341       SimplifiedValues[&*FAI] = C;
1342 
1343     Value *PtrArg = *CAI;
1344     if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1345       ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
1346 
1347       // We can SROA any pointer arguments derived from alloca instructions.
1348       if (isa<AllocaInst>(PtrArg)) {
1349         SROAArgValues[&*FAI] = PtrArg;
1350         SROAArgCosts[PtrArg] = 0;
1351       }
1352     }
1353   }
1354   NumConstantArgs = SimplifiedValues.size();
1355   NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1356   NumAllocaArgs = SROAArgValues.size();
1357 
1358   // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1359   // the ephemeral values multiple times (and they're completely determined by
1360   // the callee, so this is purely duplicate work).
1361   SmallPtrSet<const Value *, 32> EphValues;
1362   CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
1363 
1364   // The worklist of live basic blocks in the callee *after* inlining. We avoid
1365   // adding basic blocks of the callee which can be proven to be dead for this
1366   // particular call site in order to get more accurate cost estimates. This
1367   // requires a somewhat heavyweight iteration pattern: we need to walk the
1368   // basic blocks in a breadth-first order as we insert live successors. To
1369   // accomplish this, prioritizing for small iterations because we exit after
1370   // crossing our threshold, we use a small-size optimized SetVector.
1371   typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
1372                     SmallPtrSet<BasicBlock *, 16>>
1373       BBSetVector;
1374   BBSetVector BBWorklist;
1375   BBWorklist.insert(&F.getEntryBlock());
1376   // Note that we *must not* cache the size, this loop grows the worklist.
1377   for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1378     // Bail out the moment we cross the threshold. This means we'll under-count
1379     // the cost, but only when undercounting doesn't matter.
1380     if (Cost > Threshold)
1381       break;
1382 
1383     BasicBlock *BB = BBWorklist[Idx];
1384     if (BB->empty())
1385       continue;
1386 
1387     // Disallow inlining a blockaddress. A blockaddress only has defined
1388     // behavior for an indirect branch in the same function, and we do not
1389     // currently support inlining indirect branches. But, the inliner may not
1390     // see an indirect branch that ends up being dead code at a particular call
1391     // site. If the blockaddress escapes the function, e.g., via a global
1392     // variable, inlining may lead to an invalid cross-function reference.
1393     if (BB->hasAddressTaken())
1394       return false;
1395 
1396     // Analyze the cost of this block. If we blow through the threshold, this
1397     // returns false, and we can bail on out.
1398     if (!analyzeBlock(BB, EphValues))
1399       return false;
1400 
1401     TerminatorInst *TI = BB->getTerminator();
1402 
1403     // Add in the live successors by first checking whether we have terminator
1404     // that may be simplified based on the values simplified by this call.
1405     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1406       if (BI->isConditional()) {
1407         Value *Cond = BI->getCondition();
1408         if (ConstantInt *SimpleCond =
1409                 dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1410           BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
1411           continue;
1412         }
1413       }
1414     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1415       Value *Cond = SI->getCondition();
1416       if (ConstantInt *SimpleCond =
1417               dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1418         BBWorklist.insert(SI->findCaseValue(SimpleCond)->getCaseSuccessor());
1419         continue;
1420       }
1421     }
1422 
1423     // If we're unable to select a particular successor, just count all of
1424     // them.
1425     for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1426          ++TIdx)
1427       BBWorklist.insert(TI->getSuccessor(TIdx));
1428 
1429     // If we had any successors at this point, than post-inlining is likely to
1430     // have them as well. Note that we assume any basic blocks which existed
1431     // due to branches or switches which folded above will also fold after
1432     // inlining.
1433     if (SingleBB && TI->getNumSuccessors() > 1) {
1434       // Take off the bonus we applied to the threshold.
1435       Threshold -= SingleBBBonus;
1436       SingleBB = false;
1437     }
1438   }
1439 
1440   // If this is a noduplicate call, we can still inline as long as
1441   // inlining this would cause the removal of the caller (so the instruction
1442   // is not actually duplicated, just moved).
1443   if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1444     return false;
1445 
1446   // We applied the maximum possible vector bonus at the beginning. Now,
1447   // subtract the excess bonus, if any, from the Threshold before
1448   // comparing against Cost.
1449   if (NumVectorInstructions <= NumInstructions / 10)
1450     Threshold -= FiftyPercentVectorBonus;
1451   else if (NumVectorInstructions <= NumInstructions / 2)
1452     Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus);
1453 
1454   return Cost < std::max(1, Threshold);
1455 }
1456 
1457 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1458 /// \brief Dump stats about this call's analysis.
1459 LLVM_DUMP_METHOD void CallAnalyzer::dump() {
1460 #define DEBUG_PRINT_STAT(x) dbgs() << "      " #x ": " << x << "\n"
1461   DEBUG_PRINT_STAT(NumConstantArgs);
1462   DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1463   DEBUG_PRINT_STAT(NumAllocaArgs);
1464   DEBUG_PRINT_STAT(NumConstantPtrCmps);
1465   DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1466   DEBUG_PRINT_STAT(NumInstructionsSimplified);
1467   DEBUG_PRINT_STAT(NumInstructions);
1468   DEBUG_PRINT_STAT(SROACostSavings);
1469   DEBUG_PRINT_STAT(SROACostSavingsLost);
1470   DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
1471   DEBUG_PRINT_STAT(Cost);
1472   DEBUG_PRINT_STAT(Threshold);
1473 #undef DEBUG_PRINT_STAT
1474 }
1475 #endif
1476 
1477 /// \brief Test that there are no attribute conflicts between Caller and Callee
1478 ///        that prevent inlining.
1479 static bool functionsHaveCompatibleAttributes(Function *Caller,
1480                                               Function *Callee,
1481                                               TargetTransformInfo &TTI) {
1482   return TTI.areInlineCompatible(Caller, Callee) &&
1483          AttributeFuncs::areInlineCompatible(*Caller, *Callee);
1484 }
1485 
1486 int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) {
1487   int Cost = 0;
1488   for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
1489     if (CS.isByValArgument(I)) {
1490       // We approximate the number of loads and stores needed by dividing the
1491       // size of the byval type by the target's pointer size.
1492       PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
1493       unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
1494       unsigned PointerSize = DL.getPointerSizeInBits();
1495       // Ceiling division.
1496       unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
1497 
1498       // If it generates more than 8 stores it is likely to be expanded as an
1499       // inline memcpy so we take that as an upper bound. Otherwise we assume
1500       // one load and one store per word copied.
1501       // FIXME: The maxStoresPerMemcpy setting from the target should be used
1502       // here instead of a magic number of 8, but it's not available via
1503       // DataLayout.
1504       NumStores = std::min(NumStores, 8U);
1505 
1506       Cost += 2 * NumStores * InlineConstants::InstrCost;
1507     } else {
1508       // For non-byval arguments subtract off one instruction per call
1509       // argument.
1510       Cost += InlineConstants::InstrCost;
1511     }
1512   }
1513   // The call instruction also disappears after inlining.
1514   Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty;
1515   return Cost;
1516 }
1517 
1518 InlineCost llvm::getInlineCost(
1519     CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
1520     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1521     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
1522     ProfileSummaryInfo *PSI) {
1523   return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
1524                        GetAssumptionCache, GetBFI, PSI);
1525 }
1526 
1527 InlineCost llvm::getInlineCost(
1528     CallSite CS, Function *Callee, const InlineParams &Params,
1529     TargetTransformInfo &CalleeTTI,
1530     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1531     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
1532     ProfileSummaryInfo *PSI) {
1533 
1534   // Cannot inline indirect calls.
1535   if (!Callee)
1536     return llvm::InlineCost::getNever();
1537 
1538   // Calls to functions with always-inline attributes should be inlined
1539   // whenever possible.
1540   if (CS.hasFnAttr(Attribute::AlwaysInline)) {
1541     if (isInlineViable(*Callee))
1542       return llvm::InlineCost::getAlways();
1543     return llvm::InlineCost::getNever();
1544   }
1545 
1546   // Never inline functions with conflicting attributes (unless callee has
1547   // always-inline attribute).
1548   if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, CalleeTTI))
1549     return llvm::InlineCost::getNever();
1550 
1551   // Don't inline this call if the caller has the optnone attribute.
1552   if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
1553     return llvm::InlineCost::getNever();
1554 
1555   // Don't inline functions which can be interposed at link-time.  Don't inline
1556   // functions marked noinline or call sites marked noinline.
1557   // Note: inlining non-exact non-interposable functions is fine, since we know
1558   // we have *a* correct implementation of the source level function.
1559   if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
1560       CS.isNoInline())
1561     return llvm::InlineCost::getNever();
1562 
1563   DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
1564                      << "...\n");
1565 
1566   CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, *Callee, CS,
1567                   Params);
1568   bool ShouldInline = CA.analyzeCall(CS);
1569 
1570   DEBUG(CA.dump());
1571 
1572   // Check if there was a reason to force inlining or no inlining.
1573   if (!ShouldInline && CA.getCost() < CA.getThreshold())
1574     return InlineCost::getNever();
1575   if (ShouldInline && CA.getCost() >= CA.getThreshold())
1576     return InlineCost::getAlways();
1577 
1578   return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
1579 }
1580 
1581 bool llvm::isInlineViable(Function &F) {
1582   bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
1583   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
1584     // Disallow inlining of functions which contain indirect branches or
1585     // blockaddresses.
1586     if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
1587       return false;
1588 
1589     for (auto &II : *BI) {
1590       CallSite CS(&II);
1591       if (!CS)
1592         continue;
1593 
1594       // Disallow recursive calls.
1595       if (&F == CS.getCalledFunction())
1596         return false;
1597 
1598       // Disallow calls which expose returns-twice to a function not previously
1599       // attributed as such.
1600       if (!ReturnsTwice && CS.isCall() &&
1601           cast<CallInst>(CS.getInstruction())->canReturnTwice())
1602         return false;
1603 
1604       // Disallow inlining functions that call @llvm.localescape. Doing this
1605       // correctly would require major changes to the inliner.
1606       if (CS.getCalledFunction() &&
1607           CS.getCalledFunction()->getIntrinsicID() ==
1608               llvm::Intrinsic::localescape)
1609         return false;
1610     }
1611   }
1612 
1613   return true;
1614 }
1615 
1616 // APIs to create InlineParams based on command line flags and/or other
1617 // parameters.
1618 
1619 InlineParams llvm::getInlineParams(int Threshold) {
1620   InlineParams Params;
1621 
1622   // This field is the threshold to use for a callee by default. This is
1623   // derived from one or more of:
1624   //  * optimization or size-optimization levels,
1625   //  * a value passed to createFunctionInliningPass function, or
1626   //  * the -inline-threshold flag.
1627   //  If the -inline-threshold flag is explicitly specified, that is used
1628   //  irrespective of anything else.
1629   if (InlineThreshold.getNumOccurrences() > 0)
1630     Params.DefaultThreshold = InlineThreshold;
1631   else
1632     Params.DefaultThreshold = Threshold;
1633 
1634   // Set the HintThreshold knob from the -inlinehint-threshold.
1635   Params.HintThreshold = HintThreshold;
1636 
1637   // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
1638   Params.HotCallSiteThreshold = HotCallSiteThreshold;
1639 
1640   // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
1641   Params.ColdCallSiteThreshold = ColdCallSiteThreshold;
1642 
1643   // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
1644   // -inlinehint-threshold commandline option is not explicitly given. If that
1645   // option is present, then its value applies even for callees with size and
1646   // minsize attributes.
1647   // If the -inline-threshold is not specified, set the ColdThreshold from the
1648   // -inlinecold-threshold even if it is not explicitly passed. If
1649   // -inline-threshold is specified, then -inlinecold-threshold needs to be
1650   // explicitly specified to set the ColdThreshold knob
1651   if (InlineThreshold.getNumOccurrences() == 0) {
1652     Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold;
1653     Params.OptSizeThreshold = InlineConstants::OptSizeThreshold;
1654     Params.ColdThreshold = ColdThreshold;
1655   } else if (ColdThreshold.getNumOccurrences() > 0) {
1656     Params.ColdThreshold = ColdThreshold;
1657   }
1658   return Params;
1659 }
1660 
1661 InlineParams llvm::getInlineParams() {
1662   return getInlineParams(InlineThreshold);
1663 }
1664 
1665 // Compute the default threshold for inlining based on the opt level and the
1666 // size opt level.
1667 static int computeThresholdFromOptLevels(unsigned OptLevel,
1668                                          unsigned SizeOptLevel) {
1669   if (OptLevel > 2)
1670     return InlineConstants::OptAggressiveThreshold;
1671   if (SizeOptLevel == 1) // -Os
1672     return InlineConstants::OptSizeThreshold;
1673   if (SizeOptLevel == 2) // -Oz
1674     return InlineConstants::OptMinSizeThreshold;
1675   return InlineThreshold;
1676 }
1677 
1678 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
1679   return getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
1680 }
1681