1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements inline cost analysis.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Analysis/InlineCost.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/AssumptionCache.h"
20 #include "llvm/Analysis/BlockFrequencyInfo.h"
21 #include "llvm/Analysis/CodeMetrics.h"
22 #include "llvm/Analysis/ConstantFolding.h"
23 #include "llvm/Analysis/CFG.h"
24 #include "llvm/Analysis/InstructionSimplify.h"
25 #include "llvm/Analysis/LoopInfo.h"
26 #include "llvm/Analysis/ProfileSummaryInfo.h"
27 #include "llvm/Analysis/TargetTransformInfo.h"
28 #include "llvm/Analysis/ValueTracking.h"
29 #include "llvm/Config/llvm-config.h"
30 #include "llvm/IR/CallingConv.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/Dominators.h"
33 #include "llvm/IR/GetElementPtrTypeIterator.h"
34 #include "llvm/IR/GlobalAlias.h"
35 #include "llvm/IR/InstVisitor.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Operator.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/raw_ostream.h"
40 
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "inline-cost"
44 
45 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
46 
47 static cl::opt<int> InlineThreshold(
48     "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
49     cl::desc("Control the amount of inlining to perform (default = 225)"));
50 
51 static cl::opt<int> HintThreshold(
52     "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore,
53     cl::desc("Threshold for inlining functions with inline hint"));
54 
55 static cl::opt<int>
56     ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
57                           cl::init(45), cl::ZeroOrMore,
58                           cl::desc("Threshold for inlining cold callsites"));
59 
60 // We introduce this threshold to help performance of instrumentation based
61 // PGO before we actually hook up inliner with analysis passes such as BPI and
62 // BFI.
63 static cl::opt<int> ColdThreshold(
64     "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore,
65     cl::desc("Threshold for inlining functions with cold attribute"));
66 
67 static cl::opt<int>
68     HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
69                          cl::ZeroOrMore,
70                          cl::desc("Threshold for hot callsites "));
71 
72 static cl::opt<int> LocallyHotCallSiteThreshold(
73     "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
74     cl::desc("Threshold for locally hot callsites "));
75 
76 static cl::opt<int> ColdCallSiteRelFreq(
77     "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
78     cl::desc("Maximum block frequency, expressed as a percentage of caller's "
79              "entry frequency, for a callsite to be cold in the absence of "
80              "profile information."));
81 
82 static cl::opt<int> HotCallSiteRelFreq(
83     "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
84     cl::desc("Minimum block frequency, expressed as a multiple of caller's "
85              "entry frequency, for a callsite to be hot in the absence of "
86              "profile information."));
87 
88 static cl::opt<bool> OptComputeFullInlineCost(
89     "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore,
90     cl::desc("Compute the full inline cost of a call site even when the cost "
91              "exceeds the threshold."));
92 
93 namespace {
94 
95 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
96   typedef InstVisitor<CallAnalyzer, bool> Base;
97   friend class InstVisitor<CallAnalyzer, bool>;
98 
99   /// The TargetTransformInfo available for this compilation.
100   const TargetTransformInfo &TTI;
101 
102   /// Getter for the cache of @llvm.assume intrinsics.
103   std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
104 
105   /// Getter for BlockFrequencyInfo
106   Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI;
107 
108   /// Profile summary information.
109   ProfileSummaryInfo *PSI;
110 
111   /// The called function.
112   Function &F;
113 
114   // Cache the DataLayout since we use it a lot.
115   const DataLayout &DL;
116 
117   /// The OptimizationRemarkEmitter available for this compilation.
118   OptimizationRemarkEmitter *ORE;
119 
120   /// The candidate callsite being analyzed. Please do not use this to do
121   /// analysis in the caller function; we want the inline cost query to be
122   /// easily cacheable. Instead, use the cover function paramHasAttr.
123   CallBase &CandidateCall;
124 
125   /// Tunable parameters that control the analysis.
126   const InlineParams &Params;
127 
128   int Threshold;
129   int Cost;
130   bool ComputeFullInlineCost;
131 
132   bool IsCallerRecursive;
133   bool IsRecursiveCall;
134   bool ExposesReturnsTwice;
135   bool HasDynamicAlloca;
136   bool ContainsNoDuplicateCall;
137   bool HasReturn;
138   bool HasIndirectBr;
139   bool HasUninlineableIntrinsic;
140   bool InitsVargArgs;
141 
142   /// Number of bytes allocated statically by the callee.
143   uint64_t AllocatedSize;
144   unsigned NumInstructions, NumVectorInstructions;
145   int VectorBonus, TenPercentVectorBonus;
146   // Bonus to be applied when the callee has only one reachable basic block.
147   int SingleBBBonus;
148 
149   /// While we walk the potentially-inlined instructions, we build up and
150   /// maintain a mapping of simplified values specific to this callsite. The
151   /// idea is to propagate any special information we have about arguments to
152   /// this call through the inlinable section of the function, and account for
153   /// likely simplifications post-inlining. The most important aspect we track
154   /// is CFG altering simplifications -- when we prove a basic block dead, that
155   /// can cause dramatic shifts in the cost of inlining a function.
156   DenseMap<Value *, Constant *> SimplifiedValues;
157 
158   /// Keep track of the values which map back (through function arguments) to
159   /// allocas on the caller stack which could be simplified through SROA.
160   DenseMap<Value *, Value *> SROAArgValues;
161 
162   /// The mapping of caller Alloca values to their accumulated cost savings. If
163   /// we have to disable SROA for one of the allocas, this tells us how much
164   /// cost must be added.
165   DenseMap<Value *, int> SROAArgCosts;
166 
167   /// Keep track of values which map to a pointer base and constant offset.
168   DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
169 
170   /// Keep track of dead blocks due to the constant arguments.
171   SetVector<BasicBlock *> DeadBlocks;
172 
173   /// The mapping of the blocks to their known unique successors due to the
174   /// constant arguments.
175   DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
176 
177   /// Model the elimination of repeated loads that is expected to happen
178   /// whenever we simplify away the stores that would otherwise cause them to be
179   /// loads.
180   bool EnableLoadElimination;
181   SmallPtrSet<Value *, 16> LoadAddrSet;
182   int LoadEliminationCost;
183 
184   // Custom simplification helper routines.
185   bool isAllocaDerivedArg(Value *V);
186   bool lookupSROAArgAndCost(Value *V, Value *&Arg,
187                             DenseMap<Value *, int>::iterator &CostIt);
188   void disableSROA(DenseMap<Value *, int>::iterator CostIt);
189   void disableSROA(Value *V);
190   void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
191   void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
192                           int InstructionCost);
193   void disableLoadElimination();
194   bool isGEPFree(GetElementPtrInst &GEP);
195   bool canFoldInboundsGEP(GetElementPtrInst &I);
196   bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
197   bool simplifyCallSite(Function *F, CallBase &Call);
198   template <typename Callable>
199   bool simplifyInstruction(Instruction &I, Callable Evaluate);
200   ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
201 
202   /// Return true if the given argument to the function being considered for
203   /// inlining has the given attribute set either at the call site or the
204   /// function declaration.  Primarily used to inspect call site specific
205   /// attributes since these can be more precise than the ones on the callee
206   /// itself.
207   bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
208 
209   /// Return true if the given value is known non null within the callee if
210   /// inlined through this particular callsite.
211   bool isKnownNonNullInCallee(Value *V);
212 
213   /// Update Threshold based on callsite properties such as callee
214   /// attributes and callee hotness for PGO builds. The Callee is explicitly
215   /// passed to support analyzing indirect calls whose target is inferred by
216   /// analysis.
217   void updateThreshold(CallBase &Call, Function &Callee);
218 
219   /// Return true if size growth is allowed when inlining the callee at \p Call.
220   bool allowSizeGrowth(CallBase &Call);
221 
222   /// Return true if \p Call is a cold callsite.
223   bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
224 
225   /// Return a higher threshold if \p Call is a hot callsite.
226   Optional<int> getHotCallSiteThreshold(CallBase &Call,
227                                         BlockFrequencyInfo *CallerBFI);
228 
229   // Custom analysis routines.
230   InlineResult analyzeBlock(BasicBlock *BB,
231                             SmallPtrSetImpl<const Value *> &EphValues);
232 
233   // Disable several entry points to the visitor so we don't accidentally use
234   // them by declaring but not defining them here.
235   void visit(Module *);
236   void visit(Module &);
237   void visit(Function *);
238   void visit(Function &);
239   void visit(BasicBlock *);
240   void visit(BasicBlock &);
241 
242   // Provide base case for our instruction visit.
243   bool visitInstruction(Instruction &I);
244 
245   // Our visit overrides.
246   bool visitAlloca(AllocaInst &I);
247   bool visitPHI(PHINode &I);
248   bool visitGetElementPtr(GetElementPtrInst &I);
249   bool visitBitCast(BitCastInst &I);
250   bool visitPtrToInt(PtrToIntInst &I);
251   bool visitIntToPtr(IntToPtrInst &I);
252   bool visitCastInst(CastInst &I);
253   bool visitUnaryInstruction(UnaryInstruction &I);
254   bool visitCmpInst(CmpInst &I);
255   bool visitSub(BinaryOperator &I);
256   bool visitBinaryOperator(BinaryOperator &I);
257   bool visitLoad(LoadInst &I);
258   bool visitStore(StoreInst &I);
259   bool visitExtractValue(ExtractValueInst &I);
260   bool visitInsertValue(InsertValueInst &I);
261   bool visitCallBase(CallBase &Call);
262   bool visitReturnInst(ReturnInst &RI);
263   bool visitBranchInst(BranchInst &BI);
264   bool visitSelectInst(SelectInst &SI);
265   bool visitSwitchInst(SwitchInst &SI);
266   bool visitIndirectBrInst(IndirectBrInst &IBI);
267   bool visitResumeInst(ResumeInst &RI);
268   bool visitCleanupReturnInst(CleanupReturnInst &RI);
269   bool visitCatchReturnInst(CatchReturnInst &RI);
270   bool visitUnreachableInst(UnreachableInst &I);
271 
272 public:
273   CallAnalyzer(const TargetTransformInfo &TTI,
274                std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
275                Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
276                ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
277                Function &Callee, CallBase &Call, const InlineParams &Params)
278       : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
279         PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
280         CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold),
281         Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost ||
282                                        Params.ComputeFullInlineCost || ORE),
283         IsCallerRecursive(false), IsRecursiveCall(false),
284         ExposesReturnsTwice(false), HasDynamicAlloca(false),
285         ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
286         HasUninlineableIntrinsic(false), InitsVargArgs(false), AllocatedSize(0),
287         NumInstructions(0), NumVectorInstructions(0), VectorBonus(0),
288         SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0),
289         NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
290         NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
291         NumInstructionsSimplified(0), SROACostSavings(0),
292         SROACostSavingsLost(0) {}
293 
294   InlineResult analyzeCall(CallBase &Call);
295 
296   int getThreshold() { return Threshold; }
297   int getCost() { return Cost; }
298 
299   // Keep a bunch of stats about the cost savings found so we can print them
300   // out when debugging.
301   unsigned NumConstantArgs;
302   unsigned NumConstantOffsetPtrArgs;
303   unsigned NumAllocaArgs;
304   unsigned NumConstantPtrCmps;
305   unsigned NumConstantPtrDiffs;
306   unsigned NumInstructionsSimplified;
307   unsigned SROACostSavings;
308   unsigned SROACostSavingsLost;
309 
310   void dump();
311 };
312 
313 } // namespace
314 
315 /// Test whether the given value is an Alloca-derived function argument.
316 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
317   return SROAArgValues.count(V);
318 }
319 
320 /// Lookup the SROA-candidate argument and cost iterator which V maps to.
321 /// Returns false if V does not map to a SROA-candidate.
322 bool CallAnalyzer::lookupSROAArgAndCost(
323     Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
324   if (SROAArgValues.empty() || SROAArgCosts.empty())
325     return false;
326 
327   DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
328   if (ArgIt == SROAArgValues.end())
329     return false;
330 
331   Arg = ArgIt->second;
332   CostIt = SROAArgCosts.find(Arg);
333   return CostIt != SROAArgCosts.end();
334 }
335 
336 /// Disable SROA for the candidate marked by this cost iterator.
337 ///
338 /// This marks the candidate as no longer viable for SROA, and adds the cost
339 /// savings associated with it back into the inline cost measurement.
340 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
341   // If we're no longer able to perform SROA we need to undo its cost savings
342   // and prevent subsequent analysis.
343   Cost += CostIt->second;
344   SROACostSavings -= CostIt->second;
345   SROACostSavingsLost += CostIt->second;
346   SROAArgCosts.erase(CostIt);
347   disableLoadElimination();
348 }
349 
350 /// If 'V' maps to a SROA candidate, disable SROA for it.
351 void CallAnalyzer::disableSROA(Value *V) {
352   Value *SROAArg;
353   DenseMap<Value *, int>::iterator CostIt;
354   if (lookupSROAArgAndCost(V, SROAArg, CostIt))
355     disableSROA(CostIt);
356 }
357 
358 /// Accumulate the given cost for a particular SROA candidate.
359 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
360                                       int InstructionCost) {
361   CostIt->second += InstructionCost;
362   SROACostSavings += InstructionCost;
363 }
364 
365 void CallAnalyzer::disableLoadElimination() {
366   if (EnableLoadElimination) {
367     Cost += LoadEliminationCost;
368     LoadEliminationCost = 0;
369     EnableLoadElimination = false;
370   }
371 }
372 
373 /// Accumulate a constant GEP offset into an APInt if possible.
374 ///
375 /// Returns false if unable to compute the offset for any reason. Respects any
376 /// simplified values known during the analysis of this callsite.
377 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
378   unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
379   assert(IntPtrWidth == Offset.getBitWidth());
380 
381   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
382        GTI != GTE; ++GTI) {
383     ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
384     if (!OpC)
385       if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
386         OpC = dyn_cast<ConstantInt>(SimpleOp);
387     if (!OpC)
388       return false;
389     if (OpC->isZero())
390       continue;
391 
392     // Handle a struct index, which adds its field offset to the pointer.
393     if (StructType *STy = GTI.getStructTypeOrNull()) {
394       unsigned ElementIdx = OpC->getZExtValue();
395       const StructLayout *SL = DL.getStructLayout(STy);
396       Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
397       continue;
398     }
399 
400     APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
401     Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
402   }
403   return true;
404 }
405 
406 /// Use TTI to check whether a GEP is free.
407 ///
408 /// Respects any simplified values known during the analysis of this callsite.
409 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
410   SmallVector<Value *, 4> Operands;
411   Operands.push_back(GEP.getOperand(0));
412   for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
413     if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
414        Operands.push_back(SimpleOp);
415      else
416        Operands.push_back(*I);
417   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands);
418 }
419 
420 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
421   // Check whether inlining will turn a dynamic alloca into a static
422   // alloca and handle that case.
423   if (I.isArrayAllocation()) {
424     Constant *Size = SimplifiedValues.lookup(I.getArraySize());
425     if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
426       Type *Ty = I.getAllocatedType();
427       AllocatedSize = SaturatingMultiplyAdd(
428           AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
429       return Base::visitAlloca(I);
430     }
431   }
432 
433   // Accumulate the allocated size.
434   if (I.isStaticAlloca()) {
435     Type *Ty = I.getAllocatedType();
436     AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
437   }
438 
439   // We will happily inline static alloca instructions.
440   if (I.isStaticAlloca())
441     return Base::visitAlloca(I);
442 
443   // FIXME: This is overly conservative. Dynamic allocas are inefficient for
444   // a variety of reasons, and so we would like to not inline them into
445   // functions which don't currently have a dynamic alloca. This simply
446   // disables inlining altogether in the presence of a dynamic alloca.
447   HasDynamicAlloca = true;
448   return false;
449 }
450 
451 bool CallAnalyzer::visitPHI(PHINode &I) {
452   // FIXME: We need to propagate SROA *disabling* through phi nodes, even
453   // though we don't want to propagate it's bonuses. The idea is to disable
454   // SROA if it *might* be used in an inappropriate manner.
455 
456   // Phi nodes are always zero-cost.
457   // FIXME: Pointer sizes may differ between different address spaces, so do we
458   // need to use correct address space in the call to getPointerSizeInBits here?
459   // Or could we skip the getPointerSizeInBits call completely? As far as I can
460   // see the ZeroOffset is used as a dummy value, so we can probably use any
461   // bit width for the ZeroOffset?
462   APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0));
463   bool CheckSROA = I.getType()->isPointerTy();
464 
465   // Track the constant or pointer with constant offset we've seen so far.
466   Constant *FirstC = nullptr;
467   std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
468   Value *FirstV = nullptr;
469 
470   for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
471     BasicBlock *Pred = I.getIncomingBlock(i);
472     // If the incoming block is dead, skip the incoming block.
473     if (DeadBlocks.count(Pred))
474       continue;
475     // If the parent block of phi is not the known successor of the incoming
476     // block, skip the incoming block.
477     BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
478     if (KnownSuccessor && KnownSuccessor != I.getParent())
479       continue;
480 
481     Value *V = I.getIncomingValue(i);
482     // If the incoming value is this phi itself, skip the incoming value.
483     if (&I == V)
484       continue;
485 
486     Constant *C = dyn_cast<Constant>(V);
487     if (!C)
488       C = SimplifiedValues.lookup(V);
489 
490     std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
491     if (!C && CheckSROA)
492       BaseAndOffset = ConstantOffsetPtrs.lookup(V);
493 
494     if (!C && !BaseAndOffset.first)
495       // The incoming value is neither a constant nor a pointer with constant
496       // offset, exit early.
497       return true;
498 
499     if (FirstC) {
500       if (FirstC == C)
501         // If we've seen a constant incoming value before and it is the same
502         // constant we see this time, continue checking the next incoming value.
503         continue;
504       // Otherwise early exit because we either see a different constant or saw
505       // a constant before but we have a pointer with constant offset this time.
506       return true;
507     }
508 
509     if (FirstV) {
510       // The same logic as above, but check pointer with constant offset here.
511       if (FirstBaseAndOffset == BaseAndOffset)
512         continue;
513       return true;
514     }
515 
516     if (C) {
517       // This is the 1st time we've seen a constant, record it.
518       FirstC = C;
519       continue;
520     }
521 
522     // The remaining case is that this is the 1st time we've seen a pointer with
523     // constant offset, record it.
524     FirstV = V;
525     FirstBaseAndOffset = BaseAndOffset;
526   }
527 
528   // Check if we can map phi to a constant.
529   if (FirstC) {
530     SimplifiedValues[&I] = FirstC;
531     return true;
532   }
533 
534   // Check if we can map phi to a pointer with constant offset.
535   if (FirstBaseAndOffset.first) {
536     ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
537 
538     Value *SROAArg;
539     DenseMap<Value *, int>::iterator CostIt;
540     if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt))
541       SROAArgValues[&I] = SROAArg;
542   }
543 
544   return true;
545 }
546 
547 /// Check we can fold GEPs of constant-offset call site argument pointers.
548 /// This requires target data and inbounds GEPs.
549 ///
550 /// \return true if the specified GEP can be folded.
551 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
552   // Check if we have a base + offset for the pointer.
553   std::pair<Value *, APInt> BaseAndOffset =
554       ConstantOffsetPtrs.lookup(I.getPointerOperand());
555   if (!BaseAndOffset.first)
556     return false;
557 
558   // Check if the offset of this GEP is constant, and if so accumulate it
559   // into Offset.
560   if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
561     return false;
562 
563   // Add the result as a new mapping to Base + Offset.
564   ConstantOffsetPtrs[&I] = BaseAndOffset;
565 
566   return true;
567 }
568 
569 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
570   Value *SROAArg;
571   DenseMap<Value *, int>::iterator CostIt;
572   bool SROACandidate =
573       lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
574 
575   // Lambda to check whether a GEP's indices are all constant.
576   auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
577     for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
578       if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
579         return false;
580     return true;
581   };
582 
583   if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
584     if (SROACandidate)
585       SROAArgValues[&I] = SROAArg;
586 
587     // Constant GEPs are modeled as free.
588     return true;
589   }
590 
591   // Variable GEPs will require math and will disable SROA.
592   if (SROACandidate)
593     disableSROA(CostIt);
594   return isGEPFree(I);
595 }
596 
597 /// Simplify \p I if its operands are constants and update SimplifiedValues.
598 /// \p Evaluate is a callable specific to instruction type that evaluates the
599 /// instruction when all the operands are constants.
600 template <typename Callable>
601 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
602   SmallVector<Constant *, 2> COps;
603   for (Value *Op : I.operands()) {
604     Constant *COp = dyn_cast<Constant>(Op);
605     if (!COp)
606       COp = SimplifiedValues.lookup(Op);
607     if (!COp)
608       return false;
609     COps.push_back(COp);
610   }
611   auto *C = Evaluate(COps);
612   if (!C)
613     return false;
614   SimplifiedValues[&I] = C;
615   return true;
616 }
617 
618 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
619   // Propagate constants through bitcasts.
620   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
621         return ConstantExpr::getBitCast(COps[0], I.getType());
622       }))
623     return true;
624 
625   // Track base/offsets through casts
626   std::pair<Value *, APInt> BaseAndOffset =
627       ConstantOffsetPtrs.lookup(I.getOperand(0));
628   // Casts don't change the offset, just wrap it up.
629   if (BaseAndOffset.first)
630     ConstantOffsetPtrs[&I] = BaseAndOffset;
631 
632   // Also look for SROA candidates here.
633   Value *SROAArg;
634   DenseMap<Value *, int>::iterator CostIt;
635   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
636     SROAArgValues[&I] = SROAArg;
637 
638   // Bitcasts are always zero cost.
639   return true;
640 }
641 
642 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
643   // Propagate constants through ptrtoint.
644   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
645         return ConstantExpr::getPtrToInt(COps[0], I.getType());
646       }))
647     return true;
648 
649   // Track base/offset pairs when converted to a plain integer provided the
650   // integer is large enough to represent the pointer.
651   unsigned IntegerSize = I.getType()->getScalarSizeInBits();
652   unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
653   if (IntegerSize >= DL.getPointerSizeInBits(AS)) {
654     std::pair<Value *, APInt> BaseAndOffset =
655         ConstantOffsetPtrs.lookup(I.getOperand(0));
656     if (BaseAndOffset.first)
657       ConstantOffsetPtrs[&I] = BaseAndOffset;
658   }
659 
660   // This is really weird. Technically, ptrtoint will disable SROA. However,
661   // unless that ptrtoint is *used* somewhere in the live basic blocks after
662   // inlining, it will be nuked, and SROA should proceed. All of the uses which
663   // would block SROA would also block SROA if applied directly to a pointer,
664   // and so we can just add the integer in here. The only places where SROA is
665   // preserved either cannot fire on an integer, or won't in-and-of themselves
666   // disable SROA (ext) w/o some later use that we would see and disable.
667   Value *SROAArg;
668   DenseMap<Value *, int>::iterator CostIt;
669   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
670     SROAArgValues[&I] = SROAArg;
671 
672   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
673 }
674 
675 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
676   // Propagate constants through ptrtoint.
677   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
678         return ConstantExpr::getIntToPtr(COps[0], I.getType());
679       }))
680     return true;
681 
682   // Track base/offset pairs when round-tripped through a pointer without
683   // modifications provided the integer is not too large.
684   Value *Op = I.getOperand(0);
685   unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
686   if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
687     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
688     if (BaseAndOffset.first)
689       ConstantOffsetPtrs[&I] = BaseAndOffset;
690   }
691 
692   // "Propagate" SROA here in the same manner as we do for ptrtoint above.
693   Value *SROAArg;
694   DenseMap<Value *, int>::iterator CostIt;
695   if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
696     SROAArgValues[&I] = SROAArg;
697 
698   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
699 }
700 
701 bool CallAnalyzer::visitCastInst(CastInst &I) {
702   // Propagate constants through ptrtoint.
703   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
704         return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
705       }))
706     return true;
707 
708   // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
709   disableSROA(I.getOperand(0));
710 
711   // If this is a floating-point cast, and the target says this operation
712   // is expensive, this may eventually become a library call. Treat the cost
713   // as such.
714   switch (I.getOpcode()) {
715   case Instruction::FPTrunc:
716   case Instruction::FPExt:
717   case Instruction::UIToFP:
718   case Instruction::SIToFP:
719   case Instruction::FPToUI:
720   case Instruction::FPToSI:
721     if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
722       Cost += InlineConstants::CallPenalty;
723     break;
724   default:
725     break;
726   }
727 
728   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
729 }
730 
731 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
732   Value *Operand = I.getOperand(0);
733   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
734         return ConstantFoldInstOperands(&I, COps[0], DL);
735       }))
736     return true;
737 
738   // Disable any SROA on the argument to arbitrary unary operators.
739   disableSROA(Operand);
740 
741   return false;
742 }
743 
744 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
745   return CandidateCall.paramHasAttr(A->getArgNo(), Attr);
746 }
747 
748 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
749   // Does the *call site* have the NonNull attribute set on an argument?  We
750   // use the attribute on the call site to memoize any analysis done in the
751   // caller. This will also trip if the callee function has a non-null
752   // parameter attribute, but that's a less interesting case because hopefully
753   // the callee would already have been simplified based on that.
754   if (Argument *A = dyn_cast<Argument>(V))
755     if (paramHasAttr(A, Attribute::NonNull))
756       return true;
757 
758   // Is this an alloca in the caller?  This is distinct from the attribute case
759   // above because attributes aren't updated within the inliner itself and we
760   // always want to catch the alloca derived case.
761   if (isAllocaDerivedArg(V))
762     // We can actually predict the result of comparisons between an
763     // alloca-derived value and null. Note that this fires regardless of
764     // SROA firing.
765     return true;
766 
767   return false;
768 }
769 
770 bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
771   // If the normal destination of the invoke or the parent block of the call
772   // site is unreachable-terminated, there is little point in inlining this
773   // unless there is literally zero cost.
774   // FIXME: Note that it is possible that an unreachable-terminated block has a
775   // hot entry. For example, in below scenario inlining hot_call_X() may be
776   // beneficial :
777   // main() {
778   //   hot_call_1();
779   //   ...
780   //   hot_call_N()
781   //   exit(0);
782   // }
783   // For now, we are not handling this corner case here as it is rare in real
784   // code. In future, we should elaborate this based on BPI and BFI in more
785   // general threshold adjusting heuristics in updateThreshold().
786   if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
787     if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
788       return false;
789   } else if (isa<UnreachableInst>(Call.getParent()->getTerminator()))
790     return false;
791 
792   return true;
793 }
794 
795 bool CallAnalyzer::isColdCallSite(CallBase &Call,
796                                   BlockFrequencyInfo *CallerBFI) {
797   // If global profile summary is available, then callsite's coldness is
798   // determined based on that.
799   if (PSI && PSI->hasProfileSummary())
800     return PSI->isColdCallSite(CallSite(&Call), CallerBFI);
801 
802   // Otherwise we need BFI to be available.
803   if (!CallerBFI)
804     return false;
805 
806   // Determine if the callsite is cold relative to caller's entry. We could
807   // potentially cache the computation of scaled entry frequency, but the added
808   // complexity is not worth it unless this scaling shows up high in the
809   // profiles.
810   const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
811   auto CallSiteBB = Call.getParent();
812   auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
813   auto CallerEntryFreq =
814       CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock()));
815   return CallSiteFreq < CallerEntryFreq * ColdProb;
816 }
817 
818 Optional<int>
819 CallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
820                                       BlockFrequencyInfo *CallerBFI) {
821 
822   // If global profile summary is available, then callsite's hotness is
823   // determined based on that.
824   if (PSI && PSI->hasProfileSummary() &&
825       PSI->isHotCallSite(CallSite(&Call), CallerBFI))
826     return Params.HotCallSiteThreshold;
827 
828   // Otherwise we need BFI to be available and to have a locally hot callsite
829   // threshold.
830   if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
831     return None;
832 
833   // Determine if the callsite is hot relative to caller's entry. We could
834   // potentially cache the computation of scaled entry frequency, but the added
835   // complexity is not worth it unless this scaling shows up high in the
836   // profiles.
837   auto CallSiteBB = Call.getParent();
838   auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
839   auto CallerEntryFreq = CallerBFI->getEntryFreq();
840   if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
841     return Params.LocallyHotCallSiteThreshold;
842 
843   // Otherwise treat it normally.
844   return None;
845 }
846 
847 void CallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
848   // If no size growth is allowed for this inlining, set Threshold to 0.
849   if (!allowSizeGrowth(Call)) {
850     Threshold = 0;
851     return;
852   }
853 
854   Function *Caller = Call.getCaller();
855 
856   // return min(A, B) if B is valid.
857   auto MinIfValid = [](int A, Optional<int> B) {
858     return B ? std::min(A, B.getValue()) : A;
859   };
860 
861   // return max(A, B) if B is valid.
862   auto MaxIfValid = [](int A, Optional<int> B) {
863     return B ? std::max(A, B.getValue()) : A;
864   };
865 
866   // Various bonus percentages. These are multiplied by Threshold to get the
867   // bonus values.
868   // SingleBBBonus: This bonus is applied if the callee has a single reachable
869   // basic block at the given callsite context. This is speculatively applied
870   // and withdrawn if more than one basic block is seen.
871   //
872   // Vector bonuses: We want to more aggressively inline vector-dense kernels
873   // and apply this bonus based on the percentage of vector instructions. A
874   // bonus is applied if the vector instructions exceed 50% and half that amount
875   // is applied if it exceeds 10%. Note that these bonuses are some what
876   // arbitrary and evolved over time by accident as much as because they are
877   // principled bonuses.
878   // FIXME: It would be nice to base the bonus values on something more
879   // scientific.
880   //
881   // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
882   // of the last call to a static function as inlining such functions is
883   // guaranteed to reduce code size.
884   //
885   // These bonus percentages may be set to 0 based on properties of the caller
886   // and the callsite.
887   int SingleBBBonusPercent = 50;
888   int VectorBonusPercent = 150;
889   int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus;
890 
891   // Lambda to set all the above bonus and bonus percentages to 0.
892   auto DisallowAllBonuses = [&]() {
893     SingleBBBonusPercent = 0;
894     VectorBonusPercent = 0;
895     LastCallToStaticBonus = 0;
896   };
897 
898   // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
899   // and reduce the threshold if the caller has the necessary attribute.
900   if (Caller->hasMinSize()) {
901     Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
902     // For minsize, we want to disable the single BB bonus and the vector
903     // bonuses, but not the last-call-to-static bonus. Inlining the last call to
904     // a static function will, at the minimum, eliminate the parameter setup and
905     // call/return instructions.
906     SingleBBBonusPercent = 0;
907     VectorBonusPercent = 0;
908   } else if (Caller->hasOptSize())
909     Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
910 
911   // Adjust the threshold based on inlinehint attribute and profile based
912   // hotness information if the caller does not have MinSize attribute.
913   if (!Caller->hasMinSize()) {
914     if (Callee.hasFnAttribute(Attribute::InlineHint))
915       Threshold = MaxIfValid(Threshold, Params.HintThreshold);
916 
917     // FIXME: After switching to the new passmanager, simplify the logic below
918     // by checking only the callsite hotness/coldness as we will reliably
919     // have local profile information.
920     //
921     // Callsite hotness and coldness can be determined if sample profile is
922     // used (which adds hotness metadata to calls) or if caller's
923     // BlockFrequencyInfo is available.
924     BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
925     auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
926     if (!Caller->hasOptSize() && HotCallSiteThreshold) {
927       LLVM_DEBUG(dbgs() << "Hot callsite.\n");
928       // FIXME: This should update the threshold only if it exceeds the
929       // current threshold, but AutoFDO + ThinLTO currently relies on this
930       // behavior to prevent inlining of hot callsites during ThinLTO
931       // compile phase.
932       Threshold = HotCallSiteThreshold.getValue();
933     } else if (isColdCallSite(Call, CallerBFI)) {
934       LLVM_DEBUG(dbgs() << "Cold callsite.\n");
935       // Do not apply bonuses for a cold callsite including the
936       // LastCallToStatic bonus. While this bonus might result in code size
937       // reduction, it can cause the size of a non-cold caller to increase
938       // preventing it from being inlined.
939       DisallowAllBonuses();
940       Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
941     } else if (PSI) {
942       // Use callee's global profile information only if we have no way of
943       // determining this via callsite information.
944       if (PSI->isFunctionEntryHot(&Callee)) {
945         LLVM_DEBUG(dbgs() << "Hot callee.\n");
946         // If callsite hotness can not be determined, we may still know
947         // that the callee is hot and treat it as a weaker hint for threshold
948         // increase.
949         Threshold = MaxIfValid(Threshold, Params.HintThreshold);
950       } else if (PSI->isFunctionEntryCold(&Callee)) {
951         LLVM_DEBUG(dbgs() << "Cold callee.\n");
952         // Do not apply bonuses for a cold callee including the
953         // LastCallToStatic bonus. While this bonus might result in code size
954         // reduction, it can cause the size of a non-cold caller to increase
955         // preventing it from being inlined.
956         DisallowAllBonuses();
957         Threshold = MinIfValid(Threshold, Params.ColdThreshold);
958       }
959     }
960   }
961 
962   // Finally, take the target-specific inlining threshold multiplier into
963   // account.
964   Threshold *= TTI.getInliningThresholdMultiplier();
965 
966   SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
967   VectorBonus = Threshold * VectorBonusPercent / 100;
968 
969   bool OnlyOneCallAndLocalLinkage =
970       F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
971   // If there is only one call of the function, and it has internal linkage,
972   // the cost of inlining it drops dramatically. It may seem odd to update
973   // Cost in updateThreshold, but the bonus depends on the logic in this method.
974   if (OnlyOneCallAndLocalLinkage)
975     Cost -= LastCallToStaticBonus;
976 }
977 
978 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
979   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
980   // First try to handle simplified comparisons.
981   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
982         return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
983       }))
984     return true;
985 
986   if (I.getOpcode() == Instruction::FCmp)
987     return false;
988 
989   // Otherwise look for a comparison between constant offset pointers with
990   // a common base.
991   Value *LHSBase, *RHSBase;
992   APInt LHSOffset, RHSOffset;
993   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
994   if (LHSBase) {
995     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
996     if (RHSBase && LHSBase == RHSBase) {
997       // We have common bases, fold the icmp to a constant based on the
998       // offsets.
999       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1000       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1001       if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
1002         SimplifiedValues[&I] = C;
1003         ++NumConstantPtrCmps;
1004         return true;
1005       }
1006     }
1007   }
1008 
1009   // If the comparison is an equality comparison with null, we can simplify it
1010   // if we know the value (argument) can't be null
1011   if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
1012       isKnownNonNullInCallee(I.getOperand(0))) {
1013     bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
1014     SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
1015                                       : ConstantInt::getFalse(I.getType());
1016     return true;
1017   }
1018   // Finally check for SROA candidates in comparisons.
1019   Value *SROAArg;
1020   DenseMap<Value *, int>::iterator CostIt;
1021   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
1022     if (isa<ConstantPointerNull>(I.getOperand(1))) {
1023       accumulateSROACost(CostIt, InlineConstants::InstrCost);
1024       return true;
1025     }
1026 
1027     disableSROA(CostIt);
1028   }
1029 
1030   return false;
1031 }
1032 
1033 bool CallAnalyzer::visitSub(BinaryOperator &I) {
1034   // Try to handle a special case: we can fold computing the difference of two
1035   // constant-related pointers.
1036   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1037   Value *LHSBase, *RHSBase;
1038   APInt LHSOffset, RHSOffset;
1039   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1040   if (LHSBase) {
1041     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1042     if (RHSBase && LHSBase == RHSBase) {
1043       // We have common bases, fold the subtract to a constant based on the
1044       // offsets.
1045       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1046       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1047       if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
1048         SimplifiedValues[&I] = C;
1049         ++NumConstantPtrDiffs;
1050         return true;
1051       }
1052     }
1053   }
1054 
1055   // Otherwise, fall back to the generic logic for simplifying and handling
1056   // instructions.
1057   return Base::visitSub(I);
1058 }
1059 
1060 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
1061   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1062   Constant *CLHS = dyn_cast<Constant>(LHS);
1063   if (!CLHS)
1064     CLHS = SimplifiedValues.lookup(LHS);
1065   Constant *CRHS = dyn_cast<Constant>(RHS);
1066   if (!CRHS)
1067     CRHS = SimplifiedValues.lookup(RHS);
1068 
1069   Value *SimpleV = nullptr;
1070   if (auto FI = dyn_cast<FPMathOperator>(&I))
1071     SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS,
1072                               CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL);
1073   else
1074     SimpleV =
1075         SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
1076 
1077   if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
1078     SimplifiedValues[&I] = C;
1079 
1080   if (SimpleV)
1081     return true;
1082 
1083   // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
1084   disableSROA(LHS);
1085   disableSROA(RHS);
1086 
1087   // If the instruction is floating point, and the target says this operation
1088   // is expensive, this may eventually become a library call. Treat the cost
1089   // as such.
1090   if (I.getType()->isFloatingPointTy() &&
1091       TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
1092     Cost += InlineConstants::CallPenalty;
1093 
1094   return false;
1095 }
1096 
1097 bool CallAnalyzer::visitLoad(LoadInst &I) {
1098   Value *SROAArg;
1099   DenseMap<Value *, int>::iterator CostIt;
1100   if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1101     if (I.isSimple()) {
1102       accumulateSROACost(CostIt, InlineConstants::InstrCost);
1103       return true;
1104     }
1105 
1106     disableSROA(CostIt);
1107   }
1108 
1109   // If the data is already loaded from this address and hasn't been clobbered
1110   // by any stores or calls, this load is likely to be redundant and can be
1111   // eliminated.
1112   if (EnableLoadElimination &&
1113       !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
1114     LoadEliminationCost += InlineConstants::InstrCost;
1115     return true;
1116   }
1117 
1118   return false;
1119 }
1120 
1121 bool CallAnalyzer::visitStore(StoreInst &I) {
1122   Value *SROAArg;
1123   DenseMap<Value *, int>::iterator CostIt;
1124   if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1125     if (I.isSimple()) {
1126       accumulateSROACost(CostIt, InlineConstants::InstrCost);
1127       return true;
1128     }
1129 
1130     disableSROA(CostIt);
1131   }
1132 
1133   // The store can potentially clobber loads and prevent repeated loads from
1134   // being eliminated.
1135   // FIXME:
1136   // 1. We can probably keep an initial set of eliminatable loads substracted
1137   // from the cost even when we finally see a store. We just need to disable
1138   // *further* accumulation of elimination savings.
1139   // 2. We should probably at some point thread MemorySSA for the callee into
1140   // this and then use that to actually compute *really* precise savings.
1141   disableLoadElimination();
1142   return false;
1143 }
1144 
1145 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
1146   // Constant folding for extract value is trivial.
1147   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1148         return ConstantExpr::getExtractValue(COps[0], I.getIndices());
1149       }))
1150     return true;
1151 
1152   // SROA can look through these but give them a cost.
1153   return false;
1154 }
1155 
1156 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
1157   // Constant folding for insert value is trivial.
1158   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1159         return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
1160                                             /*InsertedValueOperand*/ COps[1],
1161                                             I.getIndices());
1162       }))
1163     return true;
1164 
1165   // SROA can look through these but give them a cost.
1166   return false;
1167 }
1168 
1169 /// Try to simplify a call site.
1170 ///
1171 /// Takes a concrete function and callsite and tries to actually simplify it by
1172 /// analyzing the arguments and call itself with instsimplify. Returns true if
1173 /// it has simplified the callsite to some other entity (a constant), making it
1174 /// free.
1175 bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
1176   // FIXME: Using the instsimplify logic directly for this is inefficient
1177   // because we have to continually rebuild the argument list even when no
1178   // simplifications can be performed. Until that is fixed with remapping
1179   // inside of instsimplify, directly constant fold calls here.
1180   if (!canConstantFoldCallTo(&Call, F))
1181     return false;
1182 
1183   // Try to re-map the arguments to constants.
1184   SmallVector<Constant *, 4> ConstantArgs;
1185   ConstantArgs.reserve(Call.arg_size());
1186   for (Value *I : Call.args()) {
1187     Constant *C = dyn_cast<Constant>(I);
1188     if (!C)
1189       C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I));
1190     if (!C)
1191       return false; // This argument doesn't map to a constant.
1192 
1193     ConstantArgs.push_back(C);
1194   }
1195   if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) {
1196     SimplifiedValues[&Call] = C;
1197     return true;
1198   }
1199 
1200   return false;
1201 }
1202 
1203 bool CallAnalyzer::visitCallBase(CallBase &Call) {
1204   if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
1205       !F.hasFnAttribute(Attribute::ReturnsTwice)) {
1206     // This aborts the entire analysis.
1207     ExposesReturnsTwice = true;
1208     return false;
1209   }
1210   if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
1211     ContainsNoDuplicateCall = true;
1212 
1213   if (Function *F = Call.getCalledFunction()) {
1214     // When we have a concrete function, first try to simplify it directly.
1215     if (simplifyCallSite(F, Call))
1216       return true;
1217 
1218     // Next check if it is an intrinsic we know about.
1219     // FIXME: Lift this into part of the InstVisitor.
1220     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
1221       switch (II->getIntrinsicID()) {
1222       default:
1223         if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
1224           disableLoadElimination();
1225         return Base::visitCallBase(Call);
1226 
1227       case Intrinsic::load_relative:
1228         // This is normally lowered to 4 LLVM instructions.
1229         Cost += 3 * InlineConstants::InstrCost;
1230         return false;
1231 
1232       case Intrinsic::memset:
1233       case Intrinsic::memcpy:
1234       case Intrinsic::memmove:
1235         disableLoadElimination();
1236         // SROA can usually chew through these intrinsics, but they aren't free.
1237         return false;
1238       case Intrinsic::icall_branch_funnel:
1239       case Intrinsic::localescape:
1240         HasUninlineableIntrinsic = true;
1241         return false;
1242       case Intrinsic::vastart:
1243         InitsVargArgs = true;
1244         return false;
1245       }
1246     }
1247 
1248     if (F == Call.getFunction()) {
1249       // This flag will fully abort the analysis, so don't bother with anything
1250       // else.
1251       IsRecursiveCall = true;
1252       return false;
1253     }
1254 
1255     if (TTI.isLoweredToCall(F)) {
1256       // We account for the average 1 instruction per call argument setup
1257       // here.
1258       Cost += Call.arg_size() * InlineConstants::InstrCost;
1259 
1260       // Everything other than inline ASM will also have a significant cost
1261       // merely from making the call.
1262       if (!isa<InlineAsm>(Call.getCalledValue()))
1263         Cost += InlineConstants::CallPenalty;
1264     }
1265 
1266     if (!Call.onlyReadsMemory())
1267       disableLoadElimination();
1268     return Base::visitCallBase(Call);
1269   }
1270 
1271   // Otherwise we're in a very special case -- an indirect function call. See
1272   // if we can be particularly clever about this.
1273   Value *Callee = Call.getCalledValue();
1274 
1275   // First, pay the price of the argument setup. We account for the average
1276   // 1 instruction per call argument setup here.
1277   Cost += Call.arg_size() * InlineConstants::InstrCost;
1278 
1279   // Next, check if this happens to be an indirect function call to a known
1280   // function in this inline context. If not, we've done all we can.
1281   Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
1282   if (!F) {
1283     if (!Call.onlyReadsMemory())
1284       disableLoadElimination();
1285     return Base::visitCallBase(Call);
1286   }
1287 
1288   // If we have a constant that we are calling as a function, we can peer
1289   // through it and see the function target. This happens not infrequently
1290   // during devirtualization and so we want to give it a hefty bonus for
1291   // inlining, but cap that bonus in the event that inlining wouldn't pan
1292   // out. Pretend to inline the function, with a custom threshold.
1293   auto IndirectCallParams = Params;
1294   IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
1295   CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call,
1296                   IndirectCallParams);
1297   if (CA.analyzeCall(Call)) {
1298     // We were able to inline the indirect call! Subtract the cost from the
1299     // threshold to get the bonus we want to apply, but don't go below zero.
1300     Cost -= std::max(0, CA.getThreshold() - CA.getCost());
1301   }
1302 
1303   if (!F->onlyReadsMemory())
1304     disableLoadElimination();
1305   return Base::visitCallBase(Call);
1306 }
1307 
1308 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
1309   // At least one return instruction will be free after inlining.
1310   bool Free = !HasReturn;
1311   HasReturn = true;
1312   return Free;
1313 }
1314 
1315 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
1316   // We model unconditional branches as essentially free -- they really
1317   // shouldn't exist at all, but handling them makes the behavior of the
1318   // inliner more regular and predictable. Interestingly, conditional branches
1319   // which will fold away are also free.
1320   return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
1321          dyn_cast_or_null<ConstantInt>(
1322              SimplifiedValues.lookup(BI.getCondition()));
1323 }
1324 
1325 bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
1326   bool CheckSROA = SI.getType()->isPointerTy();
1327   Value *TrueVal = SI.getTrueValue();
1328   Value *FalseVal = SI.getFalseValue();
1329 
1330   Constant *TrueC = dyn_cast<Constant>(TrueVal);
1331   if (!TrueC)
1332     TrueC = SimplifiedValues.lookup(TrueVal);
1333   Constant *FalseC = dyn_cast<Constant>(FalseVal);
1334   if (!FalseC)
1335     FalseC = SimplifiedValues.lookup(FalseVal);
1336   Constant *CondC =
1337       dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
1338 
1339   if (!CondC) {
1340     // Select C, X, X => X
1341     if (TrueC == FalseC && TrueC) {
1342       SimplifiedValues[&SI] = TrueC;
1343       return true;
1344     }
1345 
1346     if (!CheckSROA)
1347       return Base::visitSelectInst(SI);
1348 
1349     std::pair<Value *, APInt> TrueBaseAndOffset =
1350         ConstantOffsetPtrs.lookup(TrueVal);
1351     std::pair<Value *, APInt> FalseBaseAndOffset =
1352         ConstantOffsetPtrs.lookup(FalseVal);
1353     if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
1354       ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
1355 
1356       Value *SROAArg;
1357       DenseMap<Value *, int>::iterator CostIt;
1358       if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt))
1359         SROAArgValues[&SI] = SROAArg;
1360       return true;
1361     }
1362 
1363     return Base::visitSelectInst(SI);
1364   }
1365 
1366   // Select condition is a constant.
1367   Value *SelectedV = CondC->isAllOnesValue()
1368                          ? TrueVal
1369                          : (CondC->isNullValue()) ? FalseVal : nullptr;
1370   if (!SelectedV) {
1371     // Condition is a vector constant that is not all 1s or all 0s.  If all
1372     // operands are constants, ConstantExpr::getSelect() can handle the cases
1373     // such as select vectors.
1374     if (TrueC && FalseC) {
1375       if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
1376         SimplifiedValues[&SI] = C;
1377         return true;
1378       }
1379     }
1380     return Base::visitSelectInst(SI);
1381   }
1382 
1383   // Condition is either all 1s or all 0s. SI can be simplified.
1384   if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
1385     SimplifiedValues[&SI] = SelectedC;
1386     return true;
1387   }
1388 
1389   if (!CheckSROA)
1390     return true;
1391 
1392   std::pair<Value *, APInt> BaseAndOffset =
1393       ConstantOffsetPtrs.lookup(SelectedV);
1394   if (BaseAndOffset.first) {
1395     ConstantOffsetPtrs[&SI] = BaseAndOffset;
1396 
1397     Value *SROAArg;
1398     DenseMap<Value *, int>::iterator CostIt;
1399     if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt))
1400       SROAArgValues[&SI] = SROAArg;
1401   }
1402 
1403   return true;
1404 }
1405 
1406 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
1407   // We model unconditional switches as free, see the comments on handling
1408   // branches.
1409   if (isa<ConstantInt>(SI.getCondition()))
1410     return true;
1411   if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
1412     if (isa<ConstantInt>(V))
1413       return true;
1414 
1415   // Assume the most general case where the switch is lowered into
1416   // either a jump table, bit test, or a balanced binary tree consisting of
1417   // case clusters without merging adjacent clusters with the same
1418   // destination. We do not consider the switches that are lowered with a mix
1419   // of jump table/bit test/binary search tree. The cost of the switch is
1420   // proportional to the size of the tree or the size of jump table range.
1421   //
1422   // NB: We convert large switches which are just used to initialize large phi
1423   // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
1424   // inlining those. It will prevent inlining in cases where the optimization
1425   // does not (yet) fire.
1426 
1427   // Maximum valid cost increased in this function.
1428   int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
1429 
1430   // Exit early for a large switch, assuming one case needs at least one
1431   // instruction.
1432   // FIXME: This is not true for a bit test, but ignore such case for now to
1433   // save compile-time.
1434   int64_t CostLowerBound =
1435       std::min((int64_t)CostUpperBound,
1436                (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
1437 
1438   if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
1439     Cost = CostLowerBound;
1440     return false;
1441   }
1442 
1443   unsigned JumpTableSize = 0;
1444   unsigned NumCaseCluster =
1445       TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
1446 
1447   // If suitable for a jump table, consider the cost for the table size and
1448   // branch to destination.
1449   if (JumpTableSize) {
1450     int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
1451                      4 * InlineConstants::InstrCost;
1452 
1453     Cost = std::min((int64_t)CostUpperBound, JTCost + Cost);
1454     return false;
1455   }
1456 
1457   // Considering forming a binary search, we should find the number of nodes
1458   // which is same as the number of comparisons when lowered. For a given
1459   // number of clusters, n, we can define a recursive function, f(n), to find
1460   // the number of nodes in the tree. The recursion is :
1461   // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
1462   // and f(n) = n, when n <= 3.
1463   // This will lead a binary tree where the leaf should be either f(2) or f(3)
1464   // when n > 3.  So, the number of comparisons from leaves should be n, while
1465   // the number of non-leaf should be :
1466   //   2^(log2(n) - 1) - 1
1467   //   = 2^log2(n) * 2^-1 - 1
1468   //   = n / 2 - 1.
1469   // Considering comparisons from leaf and non-leaf nodes, we can estimate the
1470   // number of comparisons in a simple closed form :
1471   //   n + n / 2 - 1 = n * 3 / 2 - 1
1472   if (NumCaseCluster <= 3) {
1473     // Suppose a comparison includes one compare and one conditional branch.
1474     Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
1475     return false;
1476   }
1477 
1478   int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
1479   int64_t SwitchCost =
1480       ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
1481 
1482   Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost);
1483   return false;
1484 }
1485 
1486 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
1487   // We never want to inline functions that contain an indirectbr.  This is
1488   // incorrect because all the blockaddress's (in static global initializers
1489   // for example) would be referring to the original function, and this
1490   // indirect jump would jump from the inlined copy of the function into the
1491   // original function which is extremely undefined behavior.
1492   // FIXME: This logic isn't really right; we can safely inline functions with
1493   // indirectbr's as long as no other function or global references the
1494   // blockaddress of a block within the current function.
1495   HasIndirectBr = true;
1496   return false;
1497 }
1498 
1499 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
1500   // FIXME: It's not clear that a single instruction is an accurate model for
1501   // the inline cost of a resume instruction.
1502   return false;
1503 }
1504 
1505 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
1506   // FIXME: It's not clear that a single instruction is an accurate model for
1507   // the inline cost of a cleanupret instruction.
1508   return false;
1509 }
1510 
1511 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
1512   // FIXME: It's not clear that a single instruction is an accurate model for
1513   // the inline cost of a catchret instruction.
1514   return false;
1515 }
1516 
1517 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
1518   // FIXME: It might be reasonably to discount the cost of instructions leading
1519   // to unreachable as they have the lowest possible impact on both runtime and
1520   // code size.
1521   return true; // No actual code is needed for unreachable.
1522 }
1523 
1524 bool CallAnalyzer::visitInstruction(Instruction &I) {
1525   // Some instructions are free. All of the free intrinsics can also be
1526   // handled by SROA, etc.
1527   if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
1528     return true;
1529 
1530   // We found something we don't understand or can't handle. Mark any SROA-able
1531   // values in the operand list as no longer viable.
1532   for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
1533     disableSROA(*OI);
1534 
1535   return false;
1536 }
1537 
1538 /// Analyze a basic block for its contribution to the inline cost.
1539 ///
1540 /// This method walks the analyzer over every instruction in the given basic
1541 /// block and accounts for their cost during inlining at this callsite. It
1542 /// aborts early if the threshold has been exceeded or an impossible to inline
1543 /// construct has been detected. It returns false if inlining is no longer
1544 /// viable, and true if inlining remains viable.
1545 InlineResult
1546 CallAnalyzer::analyzeBlock(BasicBlock *BB,
1547                            SmallPtrSetImpl<const Value *> &EphValues) {
1548   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
1549     // FIXME: Currently, the number of instructions in a function regardless of
1550     // our ability to simplify them during inline to constants or dead code,
1551     // are actually used by the vector bonus heuristic. As long as that's true,
1552     // we have to special case debug intrinsics here to prevent differences in
1553     // inlining due to debug symbols. Eventually, the number of unsimplified
1554     // instructions shouldn't factor into the cost computation, but until then,
1555     // hack around it here.
1556     if (isa<DbgInfoIntrinsic>(I))
1557       continue;
1558 
1559     // Skip ephemeral values.
1560     if (EphValues.count(&*I))
1561       continue;
1562 
1563     ++NumInstructions;
1564     if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
1565       ++NumVectorInstructions;
1566 
1567     // If the instruction simplified to a constant, there is no cost to this
1568     // instruction. Visit the instructions using our InstVisitor to account for
1569     // all of the per-instruction logic. The visit tree returns true if we
1570     // consumed the instruction in any way, and false if the instruction's base
1571     // cost should count against inlining.
1572     if (Base::visit(&*I))
1573       ++NumInstructionsSimplified;
1574     else
1575       Cost += InlineConstants::InstrCost;
1576 
1577     using namespace ore;
1578     // If the visit this instruction detected an uninlinable pattern, abort.
1579     InlineResult IR;
1580     if (IsRecursiveCall)
1581       IR = "recursive";
1582     else if (ExposesReturnsTwice)
1583       IR = "exposes returns twice";
1584     else if (HasDynamicAlloca)
1585       IR = "dynamic alloca";
1586     else if (HasIndirectBr)
1587       IR = "indirect branch";
1588     else if (HasUninlineableIntrinsic)
1589       IR = "uninlinable intrinsic";
1590     else if (InitsVargArgs)
1591       IR = "varargs";
1592     if (!IR) {
1593       if (ORE)
1594         ORE->emit([&]() {
1595           return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1596                                           &CandidateCall)
1597                  << NV("Callee", &F) << " has uninlinable pattern ("
1598                  << NV("InlineResult", IR.message)
1599                  << ") and cost is not fully computed";
1600         });
1601       return IR;
1602     }
1603 
1604     // If the caller is a recursive function then we don't want to inline
1605     // functions which allocate a lot of stack space because it would increase
1606     // the caller stack usage dramatically.
1607     if (IsCallerRecursive &&
1608         AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) {
1609       InlineResult IR = "recursive and allocates too much stack space";
1610       if (ORE)
1611         ORE->emit([&]() {
1612           return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1613                                           &CandidateCall)
1614                  << NV("Callee", &F) << " is " << NV("InlineResult", IR.message)
1615                  << ". Cost is not fully computed";
1616         });
1617       return IR;
1618     }
1619 
1620     // Check if we've past the maximum possible threshold so we don't spin in
1621     // huge basic blocks that will never inline.
1622     if (Cost >= Threshold && !ComputeFullInlineCost)
1623       return false;
1624   }
1625 
1626   return true;
1627 }
1628 
1629 /// Compute the base pointer and cumulative constant offsets for V.
1630 ///
1631 /// This strips all constant offsets off of V, leaving it the base pointer, and
1632 /// accumulates the total constant offset applied in the returned constant. It
1633 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
1634 /// no constant offsets applied.
1635 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
1636   if (!V->getType()->isPointerTy())
1637     return nullptr;
1638 
1639   unsigned AS = V->getType()->getPointerAddressSpace();
1640   unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
1641   APInt Offset = APInt::getNullValue(IntPtrWidth);
1642 
1643   // Even though we don't look through PHI nodes, we could be called on an
1644   // instruction in an unreachable block, which may be on a cycle.
1645   SmallPtrSet<Value *, 4> Visited;
1646   Visited.insert(V);
1647   do {
1648     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
1649       if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
1650         return nullptr;
1651       V = GEP->getPointerOperand();
1652     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
1653       V = cast<Operator>(V)->getOperand(0);
1654     } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1655       if (GA->isInterposable())
1656         break;
1657       V = GA->getAliasee();
1658     } else {
1659       break;
1660     }
1661     assert(V->getType()->isPointerTy() && "Unexpected operand type!");
1662   } while (Visited.insert(V).second);
1663 
1664   Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS);
1665   return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
1666 }
1667 
1668 /// Find dead blocks due to deleted CFG edges during inlining.
1669 ///
1670 /// If we know the successor of the current block, \p CurrBB, has to be \p
1671 /// NextBB, the other successors of \p CurrBB are dead if these successors have
1672 /// no live incoming CFG edges.  If one block is found to be dead, we can
1673 /// continue growing the dead block list by checking the successors of the dead
1674 /// blocks to see if all their incoming edges are dead or not.
1675 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
1676   auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
1677     // A CFG edge is dead if the predecessor is dead or the predecessor has a
1678     // known successor which is not the one under exam.
1679     return (DeadBlocks.count(Pred) ||
1680             (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
1681   };
1682 
1683   auto IsNewlyDead = [&](BasicBlock *BB) {
1684     // If all the edges to a block are dead, the block is also dead.
1685     return (!DeadBlocks.count(BB) &&
1686             llvm::all_of(predecessors(BB),
1687                          [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
1688   };
1689 
1690   for (BasicBlock *Succ : successors(CurrBB)) {
1691     if (Succ == NextBB || !IsNewlyDead(Succ))
1692       continue;
1693     SmallVector<BasicBlock *, 4> NewDead;
1694     NewDead.push_back(Succ);
1695     while (!NewDead.empty()) {
1696       BasicBlock *Dead = NewDead.pop_back_val();
1697       if (DeadBlocks.insert(Dead))
1698         // Continue growing the dead block lists.
1699         for (BasicBlock *S : successors(Dead))
1700           if (IsNewlyDead(S))
1701             NewDead.push_back(S);
1702     }
1703   }
1704 }
1705 
1706 /// Analyze a call site for potential inlining.
1707 ///
1708 /// Returns true if inlining this call is viable, and false if it is not
1709 /// viable. It computes the cost and adjusts the threshold based on numerous
1710 /// factors and heuristics. If this method returns false but the computed cost
1711 /// is below the computed threshold, then inlining was forcibly disabled by
1712 /// some artifact of the routine.
1713 InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
1714   ++NumCallsAnalyzed;
1715 
1716   // Perform some tweaks to the cost and threshold based on the direct
1717   // callsite information.
1718 
1719   // We want to more aggressively inline vector-dense kernels, so up the
1720   // threshold, and we'll lower it if the % of vector instructions gets too
1721   // low. Note that these bonuses are some what arbitrary and evolved over time
1722   // by accident as much as because they are principled bonuses.
1723   //
1724   // FIXME: It would be nice to remove all such bonuses. At least it would be
1725   // nice to base the bonus values on something more scientific.
1726   assert(NumInstructions == 0);
1727   assert(NumVectorInstructions == 0);
1728 
1729   // Update the threshold based on callsite properties
1730   updateThreshold(Call, F);
1731 
1732   // While Threshold depends on commandline options that can take negative
1733   // values, we want to enforce the invariant that the computed threshold and
1734   // bonuses are non-negative.
1735   assert(Threshold >= 0);
1736   assert(SingleBBBonus >= 0);
1737   assert(VectorBonus >= 0);
1738 
1739   // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1740   // this Threshold any time, and cost cannot decrease, we can stop processing
1741   // the rest of the function body.
1742   Threshold += (SingleBBBonus + VectorBonus);
1743 
1744   // Give out bonuses for the callsite, as the instructions setting them up
1745   // will be gone after inlining.
1746   Cost -= getCallsiteCost(Call, DL);
1747 
1748   // If this function uses the coldcc calling convention, prefer not to inline
1749   // it.
1750   if (F.getCallingConv() == CallingConv::Cold)
1751     Cost += InlineConstants::ColdccPenalty;
1752 
1753   // Check if we're done. This can happen due to bonuses and penalties.
1754   if (Cost >= Threshold && !ComputeFullInlineCost)
1755     return "high cost";
1756 
1757   if (F.empty())
1758     return true;
1759 
1760   Function *Caller = Call.getFunction();
1761   // Check if the caller function is recursive itself.
1762   for (User *U : Caller->users()) {
1763     CallBase *Call = dyn_cast<CallBase>(U);
1764     if (Call && Call->getFunction() == Caller) {
1765       IsCallerRecursive = true;
1766       break;
1767     }
1768   }
1769 
1770   // Populate our simplified values by mapping from function arguments to call
1771   // arguments with known important simplifications.
1772   auto CAI = Call.arg_begin();
1773   for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1774        FAI != FAE; ++FAI, ++CAI) {
1775     assert(CAI != Call.arg_end());
1776     if (Constant *C = dyn_cast<Constant>(CAI))
1777       SimplifiedValues[&*FAI] = C;
1778 
1779     Value *PtrArg = *CAI;
1780     if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1781       ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
1782 
1783       // We can SROA any pointer arguments derived from alloca instructions.
1784       if (isa<AllocaInst>(PtrArg)) {
1785         SROAArgValues[&*FAI] = PtrArg;
1786         SROAArgCosts[PtrArg] = 0;
1787       }
1788     }
1789   }
1790   NumConstantArgs = SimplifiedValues.size();
1791   NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1792   NumAllocaArgs = SROAArgValues.size();
1793 
1794   // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1795   // the ephemeral values multiple times (and they're completely determined by
1796   // the callee, so this is purely duplicate work).
1797   SmallPtrSet<const Value *, 32> EphValues;
1798   CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
1799 
1800   // The worklist of live basic blocks in the callee *after* inlining. We avoid
1801   // adding basic blocks of the callee which can be proven to be dead for this
1802   // particular call site in order to get more accurate cost estimates. This
1803   // requires a somewhat heavyweight iteration pattern: we need to walk the
1804   // basic blocks in a breadth-first order as we insert live successors. To
1805   // accomplish this, prioritizing for small iterations because we exit after
1806   // crossing our threshold, we use a small-size optimized SetVector.
1807   typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
1808                     SmallPtrSet<BasicBlock *, 16>>
1809       BBSetVector;
1810   BBSetVector BBWorklist;
1811   BBWorklist.insert(&F.getEntryBlock());
1812   bool SingleBB = true;
1813   // Note that we *must not* cache the size, this loop grows the worklist.
1814   for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1815     // Bail out the moment we cross the threshold. This means we'll under-count
1816     // the cost, but only when undercounting doesn't matter.
1817     if (Cost >= Threshold && !ComputeFullInlineCost)
1818       break;
1819 
1820     BasicBlock *BB = BBWorklist[Idx];
1821     if (BB->empty())
1822       continue;
1823 
1824     // Disallow inlining a blockaddress. A blockaddress only has defined
1825     // behavior for an indirect branch in the same function, and we do not
1826     // currently support inlining indirect branches. But, the inliner may not
1827     // see an indirect branch that ends up being dead code at a particular call
1828     // site. If the blockaddress escapes the function, e.g., via a global
1829     // variable, inlining may lead to an invalid cross-function reference.
1830     if (BB->hasAddressTaken())
1831       return "blockaddress";
1832 
1833     // Analyze the cost of this block. If we blow through the threshold, this
1834     // returns false, and we can bail on out.
1835     InlineResult IR = analyzeBlock(BB, EphValues);
1836     if (!IR)
1837       return IR;
1838 
1839     Instruction *TI = BB->getTerminator();
1840 
1841     // Add in the live successors by first checking whether we have terminator
1842     // that may be simplified based on the values simplified by this call.
1843     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1844       if (BI->isConditional()) {
1845         Value *Cond = BI->getCondition();
1846         if (ConstantInt *SimpleCond =
1847                 dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1848           BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
1849           BBWorklist.insert(NextBB);
1850           KnownSuccessors[BB] = NextBB;
1851           findDeadBlocks(BB, NextBB);
1852           continue;
1853         }
1854       }
1855     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1856       Value *Cond = SI->getCondition();
1857       if (ConstantInt *SimpleCond =
1858               dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1859         BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
1860         BBWorklist.insert(NextBB);
1861         KnownSuccessors[BB] = NextBB;
1862         findDeadBlocks(BB, NextBB);
1863         continue;
1864       }
1865     }
1866 
1867     // If we're unable to select a particular successor, just count all of
1868     // them.
1869     for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1870          ++TIdx)
1871       BBWorklist.insert(TI->getSuccessor(TIdx));
1872 
1873     // If we had any successors at this point, than post-inlining is likely to
1874     // have them as well. Note that we assume any basic blocks which existed
1875     // due to branches or switches which folded above will also fold after
1876     // inlining.
1877     if (SingleBB && TI->getNumSuccessors() > 1) {
1878       // Take off the bonus we applied to the threshold.
1879       Threshold -= SingleBBBonus;
1880       SingleBB = false;
1881     }
1882   }
1883 
1884   bool OnlyOneCallAndLocalLinkage =
1885       F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
1886   // If this is a noduplicate call, we can still inline as long as
1887   // inlining this would cause the removal of the caller (so the instruction
1888   // is not actually duplicated, just moved).
1889   if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1890     return "noduplicate";
1891 
1892   // Loops generally act a lot like calls in that they act like barriers to
1893   // movement, require a certain amount of setup, etc. So when optimising for
1894   // size, we penalise any call sites that perform loops. We do this after all
1895   // other costs here, so will likely only be dealing with relatively small
1896   // functions (and hence DT and LI will hopefully be cheap).
1897   if (Caller->hasMinSize()) {
1898     DominatorTree DT(F);
1899     LoopInfo LI(DT);
1900     int NumLoops = 0;
1901     for (Loop *L : LI) {
1902       // Ignore loops that will not be executed
1903       if (DeadBlocks.count(L->getHeader()))
1904         continue;
1905       NumLoops++;
1906     }
1907     Cost += NumLoops * InlineConstants::CallPenalty;
1908   }
1909 
1910   // We applied the maximum possible vector bonus at the beginning. Now,
1911   // subtract the excess bonus, if any, from the Threshold before
1912   // comparing against Cost.
1913   if (NumVectorInstructions <= NumInstructions / 10)
1914     Threshold -= VectorBonus;
1915   else if (NumVectorInstructions <= NumInstructions / 2)
1916     Threshold -= VectorBonus/2;
1917 
1918   return Cost < std::max(1, Threshold);
1919 }
1920 
1921 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1922 /// Dump stats about this call's analysis.
1923 LLVM_DUMP_METHOD void CallAnalyzer::dump() {
1924 #define DEBUG_PRINT_STAT(x) dbgs() << "      " #x ": " << x << "\n"
1925   DEBUG_PRINT_STAT(NumConstantArgs);
1926   DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1927   DEBUG_PRINT_STAT(NumAllocaArgs);
1928   DEBUG_PRINT_STAT(NumConstantPtrCmps);
1929   DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1930   DEBUG_PRINT_STAT(NumInstructionsSimplified);
1931   DEBUG_PRINT_STAT(NumInstructions);
1932   DEBUG_PRINT_STAT(SROACostSavings);
1933   DEBUG_PRINT_STAT(SROACostSavingsLost);
1934   DEBUG_PRINT_STAT(LoadEliminationCost);
1935   DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
1936   DEBUG_PRINT_STAT(Cost);
1937   DEBUG_PRINT_STAT(Threshold);
1938 #undef DEBUG_PRINT_STAT
1939 }
1940 #endif
1941 
1942 /// Test that there are no attribute conflicts between Caller and Callee
1943 ///        that prevent inlining.
1944 static bool functionsHaveCompatibleAttributes(Function *Caller,
1945                                               Function *Callee,
1946                                               TargetTransformInfo &TTI) {
1947   return TTI.areInlineCompatible(Caller, Callee) &&
1948          AttributeFuncs::areInlineCompatible(*Caller, *Callee);
1949 }
1950 
1951 int llvm::getCallsiteCost(CallBase &Call, const DataLayout &DL) {
1952   int Cost = 0;
1953   for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
1954     if (Call.isByValArgument(I)) {
1955       // We approximate the number of loads and stores needed by dividing the
1956       // size of the byval type by the target's pointer size.
1957       PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
1958       unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
1959       unsigned AS = PTy->getAddressSpace();
1960       unsigned PointerSize = DL.getPointerSizeInBits(AS);
1961       // Ceiling division.
1962       unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
1963 
1964       // If it generates more than 8 stores it is likely to be expanded as an
1965       // inline memcpy so we take that as an upper bound. Otherwise we assume
1966       // one load and one store per word copied.
1967       // FIXME: The maxStoresPerMemcpy setting from the target should be used
1968       // here instead of a magic number of 8, but it's not available via
1969       // DataLayout.
1970       NumStores = std::min(NumStores, 8U);
1971 
1972       Cost += 2 * NumStores * InlineConstants::InstrCost;
1973     } else {
1974       // For non-byval arguments subtract off one instruction per call
1975       // argument.
1976       Cost += InlineConstants::InstrCost;
1977     }
1978   }
1979   // The call instruction also disappears after inlining.
1980   Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty;
1981   return Cost;
1982 }
1983 
1984 InlineCost llvm::getInlineCost(
1985     CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
1986     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1987     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
1988     ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
1989   return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
1990                        GetAssumptionCache, GetBFI, PSI, ORE);
1991 }
1992 
1993 InlineCost llvm::getInlineCost(
1994     CallBase &Call, Function *Callee, const InlineParams &Params,
1995     TargetTransformInfo &CalleeTTI,
1996     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1997     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
1998     ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
1999 
2000   // Cannot inline indirect calls.
2001   if (!Callee)
2002     return llvm::InlineCost::getNever("indirect call");
2003 
2004   // Never inline calls with byval arguments that does not have the alloca
2005   // address space. Since byval arguments can be replaced with a copy to an
2006   // alloca, the inlined code would need to be adjusted to handle that the
2007   // argument is in the alloca address space (so it is a little bit complicated
2008   // to solve).
2009   unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
2010   for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)
2011     if (Call.isByValArgument(I)) {
2012       PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
2013       if (PTy->getAddressSpace() != AllocaAS)
2014         return llvm::InlineCost::getNever("byval arguments without alloca"
2015                                           " address space");
2016     }
2017 
2018   // Calls to functions with always-inline attributes should be inlined
2019   // whenever possible.
2020   if (Call.hasFnAttr(Attribute::AlwaysInline)) {
2021     auto IsViable = isInlineViable(*Callee);
2022     if (IsViable)
2023       return llvm::InlineCost::getAlways("always inline attribute");
2024     return llvm::InlineCost::getNever(IsViable.message);
2025   }
2026 
2027   // Never inline functions with conflicting attributes (unless callee has
2028   // always-inline attribute).
2029   Function *Caller = Call.getCaller();
2030   if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
2031     return llvm::InlineCost::getNever("conflicting attributes");
2032 
2033   // Don't inline this call if the caller has the optnone attribute.
2034   if (Caller->hasOptNone())
2035     return llvm::InlineCost::getNever("optnone attribute");
2036 
2037   // Don't inline a function that treats null pointer as valid into a caller
2038   // that does not have this attribute.
2039   if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
2040     return llvm::InlineCost::getNever("nullptr definitions incompatible");
2041 
2042   // Don't inline functions which can be interposed at link-time.
2043   if (Callee->isInterposable())
2044     return llvm::InlineCost::getNever("interposable");
2045 
2046   // Don't inline functions marked noinline.
2047   if (Callee->hasFnAttribute(Attribute::NoInline))
2048     return llvm::InlineCost::getNever("noinline function attribute");
2049 
2050   // Don't inline call sites marked noinline.
2051   if (Call.isNoInline())
2052     return llvm::InlineCost::getNever("noinline call site attribute");
2053 
2054   LLVM_DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
2055                           << "... (caller:" << Caller->getName() << ")\n");
2056 
2057   CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee,
2058                   Call, Params);
2059   InlineResult ShouldInline = CA.analyzeCall(Call);
2060 
2061   LLVM_DEBUG(CA.dump());
2062 
2063   // Check if there was a reason to force inlining or no inlining.
2064   if (!ShouldInline && CA.getCost() < CA.getThreshold())
2065     return InlineCost::getNever(ShouldInline.message);
2066   if (ShouldInline && CA.getCost() >= CA.getThreshold())
2067     return InlineCost::getAlways("empty function");
2068 
2069   return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
2070 }
2071 
2072 InlineResult llvm::isInlineViable(Function &F) {
2073   bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
2074   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
2075     // Disallow inlining of functions which contain indirect branches or
2076     // blockaddresses.
2077     if (isa<IndirectBrInst>(BI->getTerminator()))
2078       return "contains indirect branches";
2079 
2080     if (BI->hasAddressTaken())
2081       return "uses block address";
2082 
2083     for (auto &II : *BI) {
2084       CallBase *Call = dyn_cast<CallBase>(&II);
2085       if (!Call)
2086         continue;
2087 
2088       // Disallow recursive calls.
2089       if (&F == Call->getCalledFunction())
2090         return "recursive call";
2091 
2092       // Disallow calls which expose returns-twice to a function not previously
2093       // attributed as such.
2094       if (!ReturnsTwice && isa<CallInst>(Call) &&
2095           cast<CallInst>(Call)->canReturnTwice())
2096         return "exposes returns-twice attribute";
2097 
2098       if (Call->getCalledFunction())
2099         switch (Call->getCalledFunction()->getIntrinsicID()) {
2100         default:
2101           break;
2102         // Disallow inlining of @llvm.icall.branch.funnel because current
2103         // backend can't separate call targets from call arguments.
2104         case llvm::Intrinsic::icall_branch_funnel:
2105           return "disallowed inlining of @llvm.icall.branch.funnel";
2106         // Disallow inlining functions that call @llvm.localescape. Doing this
2107         // correctly would require major changes to the inliner.
2108         case llvm::Intrinsic::localescape:
2109           return "disallowed inlining of @llvm.localescape";
2110         // Disallow inlining of functions that initialize VarArgs with va_start.
2111         case llvm::Intrinsic::vastart:
2112           return "contains VarArgs initialized with va_start";
2113         }
2114     }
2115   }
2116 
2117   return true;
2118 }
2119 
2120 // APIs to create InlineParams based on command line flags and/or other
2121 // parameters.
2122 
2123 InlineParams llvm::getInlineParams(int Threshold) {
2124   InlineParams Params;
2125 
2126   // This field is the threshold to use for a callee by default. This is
2127   // derived from one or more of:
2128   //  * optimization or size-optimization levels,
2129   //  * a value passed to createFunctionInliningPass function, or
2130   //  * the -inline-threshold flag.
2131   //  If the -inline-threshold flag is explicitly specified, that is used
2132   //  irrespective of anything else.
2133   if (InlineThreshold.getNumOccurrences() > 0)
2134     Params.DefaultThreshold = InlineThreshold;
2135   else
2136     Params.DefaultThreshold = Threshold;
2137 
2138   // Set the HintThreshold knob from the -inlinehint-threshold.
2139   Params.HintThreshold = HintThreshold;
2140 
2141   // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
2142   Params.HotCallSiteThreshold = HotCallSiteThreshold;
2143 
2144   // If the -locally-hot-callsite-threshold is explicitly specified, use it to
2145   // populate LocallyHotCallSiteThreshold. Later, we populate
2146   // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
2147   // we know that optimization level is O3 (in the getInlineParams variant that
2148   // takes the opt and size levels).
2149   // FIXME: Remove this check (and make the assignment unconditional) after
2150   // addressing size regression issues at O2.
2151   if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
2152     Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;
2153 
2154   // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
2155   Params.ColdCallSiteThreshold = ColdCallSiteThreshold;
2156 
2157   // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
2158   // -inlinehint-threshold commandline option is not explicitly given. If that
2159   // option is present, then its value applies even for callees with size and
2160   // minsize attributes.
2161   // If the -inline-threshold is not specified, set the ColdThreshold from the
2162   // -inlinecold-threshold even if it is not explicitly passed. If
2163   // -inline-threshold is specified, then -inlinecold-threshold needs to be
2164   // explicitly specified to set the ColdThreshold knob
2165   if (InlineThreshold.getNumOccurrences() == 0) {
2166     Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold;
2167     Params.OptSizeThreshold = InlineConstants::OptSizeThreshold;
2168     Params.ColdThreshold = ColdThreshold;
2169   } else if (ColdThreshold.getNumOccurrences() > 0) {
2170     Params.ColdThreshold = ColdThreshold;
2171   }
2172   return Params;
2173 }
2174 
2175 InlineParams llvm::getInlineParams() {
2176   return getInlineParams(InlineThreshold);
2177 }
2178 
2179 // Compute the default threshold for inlining based on the opt level and the
2180 // size opt level.
2181 static int computeThresholdFromOptLevels(unsigned OptLevel,
2182                                          unsigned SizeOptLevel) {
2183   if (OptLevel > 2)
2184     return InlineConstants::OptAggressiveThreshold;
2185   if (SizeOptLevel == 1) // -Os
2186     return InlineConstants::OptSizeThreshold;
2187   if (SizeOptLevel == 2) // -Oz
2188     return InlineConstants::OptMinSizeThreshold;
2189   return InlineThreshold;
2190 }
2191 
2192 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
2193   auto Params =
2194       getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
2195   // At O3, use the value of -locally-hot-callsite-threshold option to populate
2196   // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
2197   // when it is specified explicitly.
2198   if (OptLevel > 2)
2199     Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;
2200   return Params;
2201 }
2202