1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements inline cost analysis. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Analysis/InlineCost.h" 14 #include "llvm/ADT/STLExtras.h" 15 #include "llvm/ADT/SetVector.h" 16 #include "llvm/ADT/SmallPtrSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/AssumptionCache.h" 20 #include "llvm/Analysis/BlockFrequencyInfo.h" 21 #include "llvm/Analysis/CodeMetrics.h" 22 #include "llvm/Analysis/ConstantFolding.h" 23 #include "llvm/Analysis/CFG.h" 24 #include "llvm/Analysis/InstructionSimplify.h" 25 #include "llvm/Analysis/LoopInfo.h" 26 #include "llvm/Analysis/ProfileSummaryInfo.h" 27 #include "llvm/Analysis/TargetTransformInfo.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/Config/llvm-config.h" 30 #include "llvm/IR/CallingConv.h" 31 #include "llvm/IR/DataLayout.h" 32 #include "llvm/IR/Dominators.h" 33 #include "llvm/IR/GetElementPtrTypeIterator.h" 34 #include "llvm/IR/GlobalAlias.h" 35 #include "llvm/IR/InstVisitor.h" 36 #include "llvm/IR/IntrinsicInst.h" 37 #include "llvm/IR/Operator.h" 38 #include "llvm/Support/Debug.h" 39 #include "llvm/Support/raw_ostream.h" 40 41 using namespace llvm; 42 43 #define DEBUG_TYPE "inline-cost" 44 45 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); 46 47 static cl::opt<int> InlineThreshold( 48 "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, 49 cl::desc("Control the amount of inlining to perform (default = 225)")); 50 51 static cl::opt<int> HintThreshold( 52 "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, 53 cl::desc("Threshold for inlining functions with inline hint")); 54 55 static cl::opt<int> 56 ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, 57 cl::init(45), cl::ZeroOrMore, 58 cl::desc("Threshold for inlining cold callsites")); 59 60 // We introduce this threshold to help performance of instrumentation based 61 // PGO before we actually hook up inliner with analysis passes such as BPI and 62 // BFI. 63 static cl::opt<int> ColdThreshold( 64 "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, 65 cl::desc("Threshold for inlining functions with cold attribute")); 66 67 static cl::opt<int> 68 HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), 69 cl::ZeroOrMore, 70 cl::desc("Threshold for hot callsites ")); 71 72 static cl::opt<int> LocallyHotCallSiteThreshold( 73 "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, 74 cl::desc("Threshold for locally hot callsites ")); 75 76 static cl::opt<int> ColdCallSiteRelFreq( 77 "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, 78 cl::desc("Maximum block frequency, expressed as a percentage of caller's " 79 "entry frequency, for a callsite to be cold in the absence of " 80 "profile information.")); 81 82 static cl::opt<int> HotCallSiteRelFreq( 83 "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, 84 cl::desc("Minimum block frequency, expressed as a multiple of caller's " 85 "entry frequency, for a callsite to be hot in the absence of " 86 "profile information.")); 87 88 static cl::opt<bool> OptComputeFullInlineCost( 89 "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore, 90 cl::desc("Compute the full inline cost of a call site even when the cost " 91 "exceeds the threshold.")); 92 93 namespace { 94 95 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { 96 typedef InstVisitor<CallAnalyzer, bool> Base; 97 friend class InstVisitor<CallAnalyzer, bool>; 98 99 /// The TargetTransformInfo available for this compilation. 100 const TargetTransformInfo &TTI; 101 102 /// Getter for the cache of @llvm.assume intrinsics. 103 std::function<AssumptionCache &(Function &)> &GetAssumptionCache; 104 105 /// Getter for BlockFrequencyInfo 106 Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI; 107 108 /// Profile summary information. 109 ProfileSummaryInfo *PSI; 110 111 /// The called function. 112 Function &F; 113 114 // Cache the DataLayout since we use it a lot. 115 const DataLayout &DL; 116 117 /// The OptimizationRemarkEmitter available for this compilation. 118 OptimizationRemarkEmitter *ORE; 119 120 /// The candidate callsite being analyzed. Please do not use this to do 121 /// analysis in the caller function; we want the inline cost query to be 122 /// easily cacheable. Instead, use the cover function paramHasAttr. 123 CallBase &CandidateCall; 124 125 /// Tunable parameters that control the analysis. 126 const InlineParams &Params; 127 128 int Threshold; 129 int Cost; 130 bool ComputeFullInlineCost; 131 132 bool IsCallerRecursive; 133 bool IsRecursiveCall; 134 bool ExposesReturnsTwice; 135 bool HasDynamicAlloca; 136 bool ContainsNoDuplicateCall; 137 bool HasReturn; 138 bool HasIndirectBr; 139 bool HasUninlineableIntrinsic; 140 bool InitsVargArgs; 141 142 /// Number of bytes allocated statically by the callee. 143 uint64_t AllocatedSize; 144 unsigned NumInstructions, NumVectorInstructions; 145 int VectorBonus, TenPercentVectorBonus; 146 // Bonus to be applied when the callee has only one reachable basic block. 147 int SingleBBBonus; 148 149 /// While we walk the potentially-inlined instructions, we build up and 150 /// maintain a mapping of simplified values specific to this callsite. The 151 /// idea is to propagate any special information we have about arguments to 152 /// this call through the inlinable section of the function, and account for 153 /// likely simplifications post-inlining. The most important aspect we track 154 /// is CFG altering simplifications -- when we prove a basic block dead, that 155 /// can cause dramatic shifts in the cost of inlining a function. 156 DenseMap<Value *, Constant *> SimplifiedValues; 157 158 /// Keep track of the values which map back (through function arguments) to 159 /// allocas on the caller stack which could be simplified through SROA. 160 DenseMap<Value *, Value *> SROAArgValues; 161 162 /// The mapping of caller Alloca values to their accumulated cost savings. If 163 /// we have to disable SROA for one of the allocas, this tells us how much 164 /// cost must be added. 165 DenseMap<Value *, int> SROAArgCosts; 166 167 /// Keep track of values which map to a pointer base and constant offset. 168 DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs; 169 170 /// Keep track of dead blocks due to the constant arguments. 171 SetVector<BasicBlock *> DeadBlocks; 172 173 /// The mapping of the blocks to their known unique successors due to the 174 /// constant arguments. 175 DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors; 176 177 /// Model the elimination of repeated loads that is expected to happen 178 /// whenever we simplify away the stores that would otherwise cause them to be 179 /// loads. 180 bool EnableLoadElimination; 181 SmallPtrSet<Value *, 16> LoadAddrSet; 182 int LoadEliminationCost; 183 184 // Custom simplification helper routines. 185 bool isAllocaDerivedArg(Value *V); 186 bool lookupSROAArgAndCost(Value *V, Value *&Arg, 187 DenseMap<Value *, int>::iterator &CostIt); 188 void disableSROA(DenseMap<Value *, int>::iterator CostIt); 189 void disableSROA(Value *V); 190 void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB); 191 void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, 192 int InstructionCost); 193 void disableLoadElimination(); 194 bool isGEPFree(GetElementPtrInst &GEP); 195 bool canFoldInboundsGEP(GetElementPtrInst &I); 196 bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); 197 bool simplifyCallSite(Function *F, CallBase &Call); 198 template <typename Callable> 199 bool simplifyInstruction(Instruction &I, Callable Evaluate); 200 ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); 201 202 /// Return true if the given argument to the function being considered for 203 /// inlining has the given attribute set either at the call site or the 204 /// function declaration. Primarily used to inspect call site specific 205 /// attributes since these can be more precise than the ones on the callee 206 /// itself. 207 bool paramHasAttr(Argument *A, Attribute::AttrKind Attr); 208 209 /// Return true if the given value is known non null within the callee if 210 /// inlined through this particular callsite. 211 bool isKnownNonNullInCallee(Value *V); 212 213 /// Update Threshold based on callsite properties such as callee 214 /// attributes and callee hotness for PGO builds. The Callee is explicitly 215 /// passed to support analyzing indirect calls whose target is inferred by 216 /// analysis. 217 void updateThreshold(CallBase &Call, Function &Callee); 218 219 /// Return true if size growth is allowed when inlining the callee at \p Call. 220 bool allowSizeGrowth(CallBase &Call); 221 222 /// Return true if \p Call is a cold callsite. 223 bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI); 224 225 /// Return a higher threshold if \p Call is a hot callsite. 226 Optional<int> getHotCallSiteThreshold(CallBase &Call, 227 BlockFrequencyInfo *CallerBFI); 228 229 // Custom analysis routines. 230 InlineResult analyzeBlock(BasicBlock *BB, 231 SmallPtrSetImpl<const Value *> &EphValues); 232 233 // Disable several entry points to the visitor so we don't accidentally use 234 // them by declaring but not defining them here. 235 void visit(Module *); 236 void visit(Module &); 237 void visit(Function *); 238 void visit(Function &); 239 void visit(BasicBlock *); 240 void visit(BasicBlock &); 241 242 // Provide base case for our instruction visit. 243 bool visitInstruction(Instruction &I); 244 245 // Our visit overrides. 246 bool visitAlloca(AllocaInst &I); 247 bool visitPHI(PHINode &I); 248 bool visitGetElementPtr(GetElementPtrInst &I); 249 bool visitBitCast(BitCastInst &I); 250 bool visitPtrToInt(PtrToIntInst &I); 251 bool visitIntToPtr(IntToPtrInst &I); 252 bool visitCastInst(CastInst &I); 253 bool visitUnaryInstruction(UnaryInstruction &I); 254 bool visitCmpInst(CmpInst &I); 255 bool visitSub(BinaryOperator &I); 256 bool visitBinaryOperator(BinaryOperator &I); 257 bool visitLoad(LoadInst &I); 258 bool visitStore(StoreInst &I); 259 bool visitExtractValue(ExtractValueInst &I); 260 bool visitInsertValue(InsertValueInst &I); 261 bool visitCallBase(CallBase &Call); 262 bool visitReturnInst(ReturnInst &RI); 263 bool visitBranchInst(BranchInst &BI); 264 bool visitSelectInst(SelectInst &SI); 265 bool visitSwitchInst(SwitchInst &SI); 266 bool visitIndirectBrInst(IndirectBrInst &IBI); 267 bool visitResumeInst(ResumeInst &RI); 268 bool visitCleanupReturnInst(CleanupReturnInst &RI); 269 bool visitCatchReturnInst(CatchReturnInst &RI); 270 bool visitUnreachableInst(UnreachableInst &I); 271 272 public: 273 CallAnalyzer(const TargetTransformInfo &TTI, 274 std::function<AssumptionCache &(Function &)> &GetAssumptionCache, 275 Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI, 276 ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, 277 Function &Callee, CallBase &Call, const InlineParams &Params) 278 : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), 279 PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), 280 CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold), 281 Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost || 282 Params.ComputeFullInlineCost || ORE), 283 IsCallerRecursive(false), IsRecursiveCall(false), 284 ExposesReturnsTwice(false), HasDynamicAlloca(false), 285 ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), 286 HasUninlineableIntrinsic(false), InitsVargArgs(false), AllocatedSize(0), 287 NumInstructions(0), NumVectorInstructions(0), VectorBonus(0), 288 SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0), 289 NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), 290 NumConstantPtrCmps(0), NumConstantPtrDiffs(0), 291 NumInstructionsSimplified(0), SROACostSavings(0), 292 SROACostSavingsLost(0) {} 293 294 InlineResult analyzeCall(CallBase &Call); 295 296 int getThreshold() { return Threshold; } 297 int getCost() { return Cost; } 298 299 // Keep a bunch of stats about the cost savings found so we can print them 300 // out when debugging. 301 unsigned NumConstantArgs; 302 unsigned NumConstantOffsetPtrArgs; 303 unsigned NumAllocaArgs; 304 unsigned NumConstantPtrCmps; 305 unsigned NumConstantPtrDiffs; 306 unsigned NumInstructionsSimplified; 307 unsigned SROACostSavings; 308 unsigned SROACostSavingsLost; 309 310 void dump(); 311 }; 312 313 } // namespace 314 315 /// Test whether the given value is an Alloca-derived function argument. 316 bool CallAnalyzer::isAllocaDerivedArg(Value *V) { 317 return SROAArgValues.count(V); 318 } 319 320 /// Lookup the SROA-candidate argument and cost iterator which V maps to. 321 /// Returns false if V does not map to a SROA-candidate. 322 bool CallAnalyzer::lookupSROAArgAndCost( 323 Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) { 324 if (SROAArgValues.empty() || SROAArgCosts.empty()) 325 return false; 326 327 DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V); 328 if (ArgIt == SROAArgValues.end()) 329 return false; 330 331 Arg = ArgIt->second; 332 CostIt = SROAArgCosts.find(Arg); 333 return CostIt != SROAArgCosts.end(); 334 } 335 336 /// Disable SROA for the candidate marked by this cost iterator. 337 /// 338 /// This marks the candidate as no longer viable for SROA, and adds the cost 339 /// savings associated with it back into the inline cost measurement. 340 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) { 341 // If we're no longer able to perform SROA we need to undo its cost savings 342 // and prevent subsequent analysis. 343 Cost += CostIt->second; 344 SROACostSavings -= CostIt->second; 345 SROACostSavingsLost += CostIt->second; 346 SROAArgCosts.erase(CostIt); 347 disableLoadElimination(); 348 } 349 350 /// If 'V' maps to a SROA candidate, disable SROA for it. 351 void CallAnalyzer::disableSROA(Value *V) { 352 Value *SROAArg; 353 DenseMap<Value *, int>::iterator CostIt; 354 if (lookupSROAArgAndCost(V, SROAArg, CostIt)) 355 disableSROA(CostIt); 356 } 357 358 /// Accumulate the given cost for a particular SROA candidate. 359 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, 360 int InstructionCost) { 361 CostIt->second += InstructionCost; 362 SROACostSavings += InstructionCost; 363 } 364 365 void CallAnalyzer::disableLoadElimination() { 366 if (EnableLoadElimination) { 367 Cost += LoadEliminationCost; 368 LoadEliminationCost = 0; 369 EnableLoadElimination = false; 370 } 371 } 372 373 /// Accumulate a constant GEP offset into an APInt if possible. 374 /// 375 /// Returns false if unable to compute the offset for any reason. Respects any 376 /// simplified values known during the analysis of this callsite. 377 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { 378 unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType()); 379 assert(IntPtrWidth == Offset.getBitWidth()); 380 381 for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); 382 GTI != GTE; ++GTI) { 383 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); 384 if (!OpC) 385 if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) 386 OpC = dyn_cast<ConstantInt>(SimpleOp); 387 if (!OpC) 388 return false; 389 if (OpC->isZero()) 390 continue; 391 392 // Handle a struct index, which adds its field offset to the pointer. 393 if (StructType *STy = GTI.getStructTypeOrNull()) { 394 unsigned ElementIdx = OpC->getZExtValue(); 395 const StructLayout *SL = DL.getStructLayout(STy); 396 Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); 397 continue; 398 } 399 400 APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType())); 401 Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; 402 } 403 return true; 404 } 405 406 /// Use TTI to check whether a GEP is free. 407 /// 408 /// Respects any simplified values known during the analysis of this callsite. 409 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) { 410 SmallVector<Value *, 4> Operands; 411 Operands.push_back(GEP.getOperand(0)); 412 for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) 413 if (Constant *SimpleOp = SimplifiedValues.lookup(*I)) 414 Operands.push_back(SimpleOp); 415 else 416 Operands.push_back(*I); 417 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands); 418 } 419 420 bool CallAnalyzer::visitAlloca(AllocaInst &I) { 421 // Check whether inlining will turn a dynamic alloca into a static 422 // alloca and handle that case. 423 if (I.isArrayAllocation()) { 424 Constant *Size = SimplifiedValues.lookup(I.getArraySize()); 425 if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) { 426 Type *Ty = I.getAllocatedType(); 427 AllocatedSize = SaturatingMultiplyAdd( 428 AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize); 429 return Base::visitAlloca(I); 430 } 431 } 432 433 // Accumulate the allocated size. 434 if (I.isStaticAlloca()) { 435 Type *Ty = I.getAllocatedType(); 436 AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize); 437 } 438 439 // We will happily inline static alloca instructions. 440 if (I.isStaticAlloca()) 441 return Base::visitAlloca(I); 442 443 // FIXME: This is overly conservative. Dynamic allocas are inefficient for 444 // a variety of reasons, and so we would like to not inline them into 445 // functions which don't currently have a dynamic alloca. This simply 446 // disables inlining altogether in the presence of a dynamic alloca. 447 HasDynamicAlloca = true; 448 return false; 449 } 450 451 bool CallAnalyzer::visitPHI(PHINode &I) { 452 // FIXME: We need to propagate SROA *disabling* through phi nodes, even 453 // though we don't want to propagate it's bonuses. The idea is to disable 454 // SROA if it *might* be used in an inappropriate manner. 455 456 // Phi nodes are always zero-cost. 457 // FIXME: Pointer sizes may differ between different address spaces, so do we 458 // need to use correct address space in the call to getPointerSizeInBits here? 459 // Or could we skip the getPointerSizeInBits call completely? As far as I can 460 // see the ZeroOffset is used as a dummy value, so we can probably use any 461 // bit width for the ZeroOffset? 462 APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0)); 463 bool CheckSROA = I.getType()->isPointerTy(); 464 465 // Track the constant or pointer with constant offset we've seen so far. 466 Constant *FirstC = nullptr; 467 std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset}; 468 Value *FirstV = nullptr; 469 470 for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) { 471 BasicBlock *Pred = I.getIncomingBlock(i); 472 // If the incoming block is dead, skip the incoming block. 473 if (DeadBlocks.count(Pred)) 474 continue; 475 // If the parent block of phi is not the known successor of the incoming 476 // block, skip the incoming block. 477 BasicBlock *KnownSuccessor = KnownSuccessors[Pred]; 478 if (KnownSuccessor && KnownSuccessor != I.getParent()) 479 continue; 480 481 Value *V = I.getIncomingValue(i); 482 // If the incoming value is this phi itself, skip the incoming value. 483 if (&I == V) 484 continue; 485 486 Constant *C = dyn_cast<Constant>(V); 487 if (!C) 488 C = SimplifiedValues.lookup(V); 489 490 std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset}; 491 if (!C && CheckSROA) 492 BaseAndOffset = ConstantOffsetPtrs.lookup(V); 493 494 if (!C && !BaseAndOffset.first) 495 // The incoming value is neither a constant nor a pointer with constant 496 // offset, exit early. 497 return true; 498 499 if (FirstC) { 500 if (FirstC == C) 501 // If we've seen a constant incoming value before and it is the same 502 // constant we see this time, continue checking the next incoming value. 503 continue; 504 // Otherwise early exit because we either see a different constant or saw 505 // a constant before but we have a pointer with constant offset this time. 506 return true; 507 } 508 509 if (FirstV) { 510 // The same logic as above, but check pointer with constant offset here. 511 if (FirstBaseAndOffset == BaseAndOffset) 512 continue; 513 return true; 514 } 515 516 if (C) { 517 // This is the 1st time we've seen a constant, record it. 518 FirstC = C; 519 continue; 520 } 521 522 // The remaining case is that this is the 1st time we've seen a pointer with 523 // constant offset, record it. 524 FirstV = V; 525 FirstBaseAndOffset = BaseAndOffset; 526 } 527 528 // Check if we can map phi to a constant. 529 if (FirstC) { 530 SimplifiedValues[&I] = FirstC; 531 return true; 532 } 533 534 // Check if we can map phi to a pointer with constant offset. 535 if (FirstBaseAndOffset.first) { 536 ConstantOffsetPtrs[&I] = FirstBaseAndOffset; 537 538 Value *SROAArg; 539 DenseMap<Value *, int>::iterator CostIt; 540 if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt)) 541 SROAArgValues[&I] = SROAArg; 542 } 543 544 return true; 545 } 546 547 /// Check we can fold GEPs of constant-offset call site argument pointers. 548 /// This requires target data and inbounds GEPs. 549 /// 550 /// \return true if the specified GEP can be folded. 551 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) { 552 // Check if we have a base + offset for the pointer. 553 std::pair<Value *, APInt> BaseAndOffset = 554 ConstantOffsetPtrs.lookup(I.getPointerOperand()); 555 if (!BaseAndOffset.first) 556 return false; 557 558 // Check if the offset of this GEP is constant, and if so accumulate it 559 // into Offset. 560 if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) 561 return false; 562 563 // Add the result as a new mapping to Base + Offset. 564 ConstantOffsetPtrs[&I] = BaseAndOffset; 565 566 return true; 567 } 568 569 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { 570 Value *SROAArg; 571 DenseMap<Value *, int>::iterator CostIt; 572 bool SROACandidate = 573 lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt); 574 575 // Lambda to check whether a GEP's indices are all constant. 576 auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) { 577 for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) 578 if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I)) 579 return false; 580 return true; 581 }; 582 583 if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) { 584 if (SROACandidate) 585 SROAArgValues[&I] = SROAArg; 586 587 // Constant GEPs are modeled as free. 588 return true; 589 } 590 591 // Variable GEPs will require math and will disable SROA. 592 if (SROACandidate) 593 disableSROA(CostIt); 594 return isGEPFree(I); 595 } 596 597 /// Simplify \p I if its operands are constants and update SimplifiedValues. 598 /// \p Evaluate is a callable specific to instruction type that evaluates the 599 /// instruction when all the operands are constants. 600 template <typename Callable> 601 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) { 602 SmallVector<Constant *, 2> COps; 603 for (Value *Op : I.operands()) { 604 Constant *COp = dyn_cast<Constant>(Op); 605 if (!COp) 606 COp = SimplifiedValues.lookup(Op); 607 if (!COp) 608 return false; 609 COps.push_back(COp); 610 } 611 auto *C = Evaluate(COps); 612 if (!C) 613 return false; 614 SimplifiedValues[&I] = C; 615 return true; 616 } 617 618 bool CallAnalyzer::visitBitCast(BitCastInst &I) { 619 // Propagate constants through bitcasts. 620 if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { 621 return ConstantExpr::getBitCast(COps[0], I.getType()); 622 })) 623 return true; 624 625 // Track base/offsets through casts 626 std::pair<Value *, APInt> BaseAndOffset = 627 ConstantOffsetPtrs.lookup(I.getOperand(0)); 628 // Casts don't change the offset, just wrap it up. 629 if (BaseAndOffset.first) 630 ConstantOffsetPtrs[&I] = BaseAndOffset; 631 632 // Also look for SROA candidates here. 633 Value *SROAArg; 634 DenseMap<Value *, int>::iterator CostIt; 635 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) 636 SROAArgValues[&I] = SROAArg; 637 638 // Bitcasts are always zero cost. 639 return true; 640 } 641 642 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { 643 // Propagate constants through ptrtoint. 644 if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { 645 return ConstantExpr::getPtrToInt(COps[0], I.getType()); 646 })) 647 return true; 648 649 // Track base/offset pairs when converted to a plain integer provided the 650 // integer is large enough to represent the pointer. 651 unsigned IntegerSize = I.getType()->getScalarSizeInBits(); 652 unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace(); 653 if (IntegerSize >= DL.getPointerSizeInBits(AS)) { 654 std::pair<Value *, APInt> BaseAndOffset = 655 ConstantOffsetPtrs.lookup(I.getOperand(0)); 656 if (BaseAndOffset.first) 657 ConstantOffsetPtrs[&I] = BaseAndOffset; 658 } 659 660 // This is really weird. Technically, ptrtoint will disable SROA. However, 661 // unless that ptrtoint is *used* somewhere in the live basic blocks after 662 // inlining, it will be nuked, and SROA should proceed. All of the uses which 663 // would block SROA would also block SROA if applied directly to a pointer, 664 // and so we can just add the integer in here. The only places where SROA is 665 // preserved either cannot fire on an integer, or won't in-and-of themselves 666 // disable SROA (ext) w/o some later use that we would see and disable. 667 Value *SROAArg; 668 DenseMap<Value *, int>::iterator CostIt; 669 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) 670 SROAArgValues[&I] = SROAArg; 671 672 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); 673 } 674 675 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { 676 // Propagate constants through ptrtoint. 677 if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { 678 return ConstantExpr::getIntToPtr(COps[0], I.getType()); 679 })) 680 return true; 681 682 // Track base/offset pairs when round-tripped through a pointer without 683 // modifications provided the integer is not too large. 684 Value *Op = I.getOperand(0); 685 unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); 686 if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) { 687 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op); 688 if (BaseAndOffset.first) 689 ConstantOffsetPtrs[&I] = BaseAndOffset; 690 } 691 692 // "Propagate" SROA here in the same manner as we do for ptrtoint above. 693 Value *SROAArg; 694 DenseMap<Value *, int>::iterator CostIt; 695 if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) 696 SROAArgValues[&I] = SROAArg; 697 698 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); 699 } 700 701 bool CallAnalyzer::visitCastInst(CastInst &I) { 702 // Propagate constants through ptrtoint. 703 if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { 704 return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType()); 705 })) 706 return true; 707 708 // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. 709 disableSROA(I.getOperand(0)); 710 711 // If this is a floating-point cast, and the target says this operation 712 // is expensive, this may eventually become a library call. Treat the cost 713 // as such. 714 switch (I.getOpcode()) { 715 case Instruction::FPTrunc: 716 case Instruction::FPExt: 717 case Instruction::UIToFP: 718 case Instruction::SIToFP: 719 case Instruction::FPToUI: 720 case Instruction::FPToSI: 721 if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) 722 Cost += InlineConstants::CallPenalty; 723 break; 724 default: 725 break; 726 } 727 728 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); 729 } 730 731 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { 732 Value *Operand = I.getOperand(0); 733 if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { 734 return ConstantFoldInstOperands(&I, COps[0], DL); 735 })) 736 return true; 737 738 // Disable any SROA on the argument to arbitrary unary operators. 739 disableSROA(Operand); 740 741 return false; 742 } 743 744 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) { 745 return CandidateCall.paramHasAttr(A->getArgNo(), Attr); 746 } 747 748 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) { 749 // Does the *call site* have the NonNull attribute set on an argument? We 750 // use the attribute on the call site to memoize any analysis done in the 751 // caller. This will also trip if the callee function has a non-null 752 // parameter attribute, but that's a less interesting case because hopefully 753 // the callee would already have been simplified based on that. 754 if (Argument *A = dyn_cast<Argument>(V)) 755 if (paramHasAttr(A, Attribute::NonNull)) 756 return true; 757 758 // Is this an alloca in the caller? This is distinct from the attribute case 759 // above because attributes aren't updated within the inliner itself and we 760 // always want to catch the alloca derived case. 761 if (isAllocaDerivedArg(V)) 762 // We can actually predict the result of comparisons between an 763 // alloca-derived value and null. Note that this fires regardless of 764 // SROA firing. 765 return true; 766 767 return false; 768 } 769 770 bool CallAnalyzer::allowSizeGrowth(CallBase &Call) { 771 // If the normal destination of the invoke or the parent block of the call 772 // site is unreachable-terminated, there is little point in inlining this 773 // unless there is literally zero cost. 774 // FIXME: Note that it is possible that an unreachable-terminated block has a 775 // hot entry. For example, in below scenario inlining hot_call_X() may be 776 // beneficial : 777 // main() { 778 // hot_call_1(); 779 // ... 780 // hot_call_N() 781 // exit(0); 782 // } 783 // For now, we are not handling this corner case here as it is rare in real 784 // code. In future, we should elaborate this based on BPI and BFI in more 785 // general threshold adjusting heuristics in updateThreshold(). 786 if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) { 787 if (isa<UnreachableInst>(II->getNormalDest()->getTerminator())) 788 return false; 789 } else if (isa<UnreachableInst>(Call.getParent()->getTerminator())) 790 return false; 791 792 return true; 793 } 794 795 bool CallAnalyzer::isColdCallSite(CallBase &Call, 796 BlockFrequencyInfo *CallerBFI) { 797 // If global profile summary is available, then callsite's coldness is 798 // determined based on that. 799 if (PSI && PSI->hasProfileSummary()) 800 return PSI->isColdCallSite(CallSite(&Call), CallerBFI); 801 802 // Otherwise we need BFI to be available. 803 if (!CallerBFI) 804 return false; 805 806 // Determine if the callsite is cold relative to caller's entry. We could 807 // potentially cache the computation of scaled entry frequency, but the added 808 // complexity is not worth it unless this scaling shows up high in the 809 // profiles. 810 const BranchProbability ColdProb(ColdCallSiteRelFreq, 100); 811 auto CallSiteBB = Call.getParent(); 812 auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB); 813 auto CallerEntryFreq = 814 CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock())); 815 return CallSiteFreq < CallerEntryFreq * ColdProb; 816 } 817 818 Optional<int> 819 CallAnalyzer::getHotCallSiteThreshold(CallBase &Call, 820 BlockFrequencyInfo *CallerBFI) { 821 822 // If global profile summary is available, then callsite's hotness is 823 // determined based on that. 824 if (PSI && PSI->hasProfileSummary() && 825 PSI->isHotCallSite(CallSite(&Call), CallerBFI)) 826 return Params.HotCallSiteThreshold; 827 828 // Otherwise we need BFI to be available and to have a locally hot callsite 829 // threshold. 830 if (!CallerBFI || !Params.LocallyHotCallSiteThreshold) 831 return None; 832 833 // Determine if the callsite is hot relative to caller's entry. We could 834 // potentially cache the computation of scaled entry frequency, but the added 835 // complexity is not worth it unless this scaling shows up high in the 836 // profiles. 837 auto CallSiteBB = Call.getParent(); 838 auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency(); 839 auto CallerEntryFreq = CallerBFI->getEntryFreq(); 840 if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq) 841 return Params.LocallyHotCallSiteThreshold; 842 843 // Otherwise treat it normally. 844 return None; 845 } 846 847 void CallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) { 848 // If no size growth is allowed for this inlining, set Threshold to 0. 849 if (!allowSizeGrowth(Call)) { 850 Threshold = 0; 851 return; 852 } 853 854 Function *Caller = Call.getCaller(); 855 856 // return min(A, B) if B is valid. 857 auto MinIfValid = [](int A, Optional<int> B) { 858 return B ? std::min(A, B.getValue()) : A; 859 }; 860 861 // return max(A, B) if B is valid. 862 auto MaxIfValid = [](int A, Optional<int> B) { 863 return B ? std::max(A, B.getValue()) : A; 864 }; 865 866 // Various bonus percentages. These are multiplied by Threshold to get the 867 // bonus values. 868 // SingleBBBonus: This bonus is applied if the callee has a single reachable 869 // basic block at the given callsite context. This is speculatively applied 870 // and withdrawn if more than one basic block is seen. 871 // 872 // Vector bonuses: We want to more aggressively inline vector-dense kernels 873 // and apply this bonus based on the percentage of vector instructions. A 874 // bonus is applied if the vector instructions exceed 50% and half that amount 875 // is applied if it exceeds 10%. Note that these bonuses are some what 876 // arbitrary and evolved over time by accident as much as because they are 877 // principled bonuses. 878 // FIXME: It would be nice to base the bonus values on something more 879 // scientific. 880 // 881 // LstCallToStaticBonus: This large bonus is applied to ensure the inlining 882 // of the last call to a static function as inlining such functions is 883 // guaranteed to reduce code size. 884 // 885 // These bonus percentages may be set to 0 based on properties of the caller 886 // and the callsite. 887 int SingleBBBonusPercent = 50; 888 int VectorBonusPercent = 150; 889 int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus; 890 891 // Lambda to set all the above bonus and bonus percentages to 0. 892 auto DisallowAllBonuses = [&]() { 893 SingleBBBonusPercent = 0; 894 VectorBonusPercent = 0; 895 LastCallToStaticBonus = 0; 896 }; 897 898 // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available 899 // and reduce the threshold if the caller has the necessary attribute. 900 if (Caller->hasMinSize()) { 901 Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold); 902 // For minsize, we want to disable the single BB bonus and the vector 903 // bonuses, but not the last-call-to-static bonus. Inlining the last call to 904 // a static function will, at the minimum, eliminate the parameter setup and 905 // call/return instructions. 906 SingleBBBonusPercent = 0; 907 VectorBonusPercent = 0; 908 } else if (Caller->hasOptSize()) 909 Threshold = MinIfValid(Threshold, Params.OptSizeThreshold); 910 911 // Adjust the threshold based on inlinehint attribute and profile based 912 // hotness information if the caller does not have MinSize attribute. 913 if (!Caller->hasMinSize()) { 914 if (Callee.hasFnAttribute(Attribute::InlineHint)) 915 Threshold = MaxIfValid(Threshold, Params.HintThreshold); 916 917 // FIXME: After switching to the new passmanager, simplify the logic below 918 // by checking only the callsite hotness/coldness as we will reliably 919 // have local profile information. 920 // 921 // Callsite hotness and coldness can be determined if sample profile is 922 // used (which adds hotness metadata to calls) or if caller's 923 // BlockFrequencyInfo is available. 924 BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr; 925 auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI); 926 if (!Caller->hasOptSize() && HotCallSiteThreshold) { 927 LLVM_DEBUG(dbgs() << "Hot callsite.\n"); 928 // FIXME: This should update the threshold only if it exceeds the 929 // current threshold, but AutoFDO + ThinLTO currently relies on this 930 // behavior to prevent inlining of hot callsites during ThinLTO 931 // compile phase. 932 Threshold = HotCallSiteThreshold.getValue(); 933 } else if (isColdCallSite(Call, CallerBFI)) { 934 LLVM_DEBUG(dbgs() << "Cold callsite.\n"); 935 // Do not apply bonuses for a cold callsite including the 936 // LastCallToStatic bonus. While this bonus might result in code size 937 // reduction, it can cause the size of a non-cold caller to increase 938 // preventing it from being inlined. 939 DisallowAllBonuses(); 940 Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold); 941 } else if (PSI) { 942 // Use callee's global profile information only if we have no way of 943 // determining this via callsite information. 944 if (PSI->isFunctionEntryHot(&Callee)) { 945 LLVM_DEBUG(dbgs() << "Hot callee.\n"); 946 // If callsite hotness can not be determined, we may still know 947 // that the callee is hot and treat it as a weaker hint for threshold 948 // increase. 949 Threshold = MaxIfValid(Threshold, Params.HintThreshold); 950 } else if (PSI->isFunctionEntryCold(&Callee)) { 951 LLVM_DEBUG(dbgs() << "Cold callee.\n"); 952 // Do not apply bonuses for a cold callee including the 953 // LastCallToStatic bonus. While this bonus might result in code size 954 // reduction, it can cause the size of a non-cold caller to increase 955 // preventing it from being inlined. 956 DisallowAllBonuses(); 957 Threshold = MinIfValid(Threshold, Params.ColdThreshold); 958 } 959 } 960 } 961 962 // Finally, take the target-specific inlining threshold multiplier into 963 // account. 964 Threshold *= TTI.getInliningThresholdMultiplier(); 965 966 SingleBBBonus = Threshold * SingleBBBonusPercent / 100; 967 VectorBonus = Threshold * VectorBonusPercent / 100; 968 969 bool OnlyOneCallAndLocalLinkage = 970 F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction(); 971 // If there is only one call of the function, and it has internal linkage, 972 // the cost of inlining it drops dramatically. It may seem odd to update 973 // Cost in updateThreshold, but the bonus depends on the logic in this method. 974 if (OnlyOneCallAndLocalLinkage) 975 Cost -= LastCallToStaticBonus; 976 } 977 978 bool CallAnalyzer::visitCmpInst(CmpInst &I) { 979 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 980 // First try to handle simplified comparisons. 981 if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { 982 return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]); 983 })) 984 return true; 985 986 if (I.getOpcode() == Instruction::FCmp) 987 return false; 988 989 // Otherwise look for a comparison between constant offset pointers with 990 // a common base. 991 Value *LHSBase, *RHSBase; 992 APInt LHSOffset, RHSOffset; 993 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); 994 if (LHSBase) { 995 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); 996 if (RHSBase && LHSBase == RHSBase) { 997 // We have common bases, fold the icmp to a constant based on the 998 // offsets. 999 Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); 1000 Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); 1001 if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { 1002 SimplifiedValues[&I] = C; 1003 ++NumConstantPtrCmps; 1004 return true; 1005 } 1006 } 1007 } 1008 1009 // If the comparison is an equality comparison with null, we can simplify it 1010 // if we know the value (argument) can't be null 1011 if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) && 1012 isKnownNonNullInCallee(I.getOperand(0))) { 1013 bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; 1014 SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) 1015 : ConstantInt::getFalse(I.getType()); 1016 return true; 1017 } 1018 // Finally check for SROA candidates in comparisons. 1019 Value *SROAArg; 1020 DenseMap<Value *, int>::iterator CostIt; 1021 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { 1022 if (isa<ConstantPointerNull>(I.getOperand(1))) { 1023 accumulateSROACost(CostIt, InlineConstants::InstrCost); 1024 return true; 1025 } 1026 1027 disableSROA(CostIt); 1028 } 1029 1030 return false; 1031 } 1032 1033 bool CallAnalyzer::visitSub(BinaryOperator &I) { 1034 // Try to handle a special case: we can fold computing the difference of two 1035 // constant-related pointers. 1036 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 1037 Value *LHSBase, *RHSBase; 1038 APInt LHSOffset, RHSOffset; 1039 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); 1040 if (LHSBase) { 1041 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); 1042 if (RHSBase && LHSBase == RHSBase) { 1043 // We have common bases, fold the subtract to a constant based on the 1044 // offsets. 1045 Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); 1046 Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); 1047 if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { 1048 SimplifiedValues[&I] = C; 1049 ++NumConstantPtrDiffs; 1050 return true; 1051 } 1052 } 1053 } 1054 1055 // Otherwise, fall back to the generic logic for simplifying and handling 1056 // instructions. 1057 return Base::visitSub(I); 1058 } 1059 1060 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { 1061 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 1062 Constant *CLHS = dyn_cast<Constant>(LHS); 1063 if (!CLHS) 1064 CLHS = SimplifiedValues.lookup(LHS); 1065 Constant *CRHS = dyn_cast<Constant>(RHS); 1066 if (!CRHS) 1067 CRHS = SimplifiedValues.lookup(RHS); 1068 1069 Value *SimpleV = nullptr; 1070 if (auto FI = dyn_cast<FPMathOperator>(&I)) 1071 SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS, 1072 CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL); 1073 else 1074 SimpleV = 1075 SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL); 1076 1077 if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) 1078 SimplifiedValues[&I] = C; 1079 1080 if (SimpleV) 1081 return true; 1082 1083 // Disable any SROA on arguments to arbitrary, unsimplified binary operators. 1084 disableSROA(LHS); 1085 disableSROA(RHS); 1086 1087 // If the instruction is floating point, and the target says this operation 1088 // is expensive, this may eventually become a library call. Treat the cost 1089 // as such. 1090 if (I.getType()->isFloatingPointTy() && 1091 TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) 1092 Cost += InlineConstants::CallPenalty; 1093 1094 return false; 1095 } 1096 1097 bool CallAnalyzer::visitLoad(LoadInst &I) { 1098 Value *SROAArg; 1099 DenseMap<Value *, int>::iterator CostIt; 1100 if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) { 1101 if (I.isSimple()) { 1102 accumulateSROACost(CostIt, InlineConstants::InstrCost); 1103 return true; 1104 } 1105 1106 disableSROA(CostIt); 1107 } 1108 1109 // If the data is already loaded from this address and hasn't been clobbered 1110 // by any stores or calls, this load is likely to be redundant and can be 1111 // eliminated. 1112 if (EnableLoadElimination && 1113 !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) { 1114 LoadEliminationCost += InlineConstants::InstrCost; 1115 return true; 1116 } 1117 1118 return false; 1119 } 1120 1121 bool CallAnalyzer::visitStore(StoreInst &I) { 1122 Value *SROAArg; 1123 DenseMap<Value *, int>::iterator CostIt; 1124 if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) { 1125 if (I.isSimple()) { 1126 accumulateSROACost(CostIt, InlineConstants::InstrCost); 1127 return true; 1128 } 1129 1130 disableSROA(CostIt); 1131 } 1132 1133 // The store can potentially clobber loads and prevent repeated loads from 1134 // being eliminated. 1135 // FIXME: 1136 // 1. We can probably keep an initial set of eliminatable loads substracted 1137 // from the cost even when we finally see a store. We just need to disable 1138 // *further* accumulation of elimination savings. 1139 // 2. We should probably at some point thread MemorySSA for the callee into 1140 // this and then use that to actually compute *really* precise savings. 1141 disableLoadElimination(); 1142 return false; 1143 } 1144 1145 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { 1146 // Constant folding for extract value is trivial. 1147 if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { 1148 return ConstantExpr::getExtractValue(COps[0], I.getIndices()); 1149 })) 1150 return true; 1151 1152 // SROA can look through these but give them a cost. 1153 return false; 1154 } 1155 1156 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { 1157 // Constant folding for insert value is trivial. 1158 if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { 1159 return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0], 1160 /*InsertedValueOperand*/ COps[1], 1161 I.getIndices()); 1162 })) 1163 return true; 1164 1165 // SROA can look through these but give them a cost. 1166 return false; 1167 } 1168 1169 /// Try to simplify a call site. 1170 /// 1171 /// Takes a concrete function and callsite and tries to actually simplify it by 1172 /// analyzing the arguments and call itself with instsimplify. Returns true if 1173 /// it has simplified the callsite to some other entity (a constant), making it 1174 /// free. 1175 bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) { 1176 // FIXME: Using the instsimplify logic directly for this is inefficient 1177 // because we have to continually rebuild the argument list even when no 1178 // simplifications can be performed. Until that is fixed with remapping 1179 // inside of instsimplify, directly constant fold calls here. 1180 if (!canConstantFoldCallTo(&Call, F)) 1181 return false; 1182 1183 // Try to re-map the arguments to constants. 1184 SmallVector<Constant *, 4> ConstantArgs; 1185 ConstantArgs.reserve(Call.arg_size()); 1186 for (Value *I : Call.args()) { 1187 Constant *C = dyn_cast<Constant>(I); 1188 if (!C) 1189 C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I)); 1190 if (!C) 1191 return false; // This argument doesn't map to a constant. 1192 1193 ConstantArgs.push_back(C); 1194 } 1195 if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) { 1196 SimplifiedValues[&Call] = C; 1197 return true; 1198 } 1199 1200 return false; 1201 } 1202 1203 bool CallAnalyzer::visitCallBase(CallBase &Call) { 1204 if (Call.hasFnAttr(Attribute::ReturnsTwice) && 1205 !F.hasFnAttribute(Attribute::ReturnsTwice)) { 1206 // This aborts the entire analysis. 1207 ExposesReturnsTwice = true; 1208 return false; 1209 } 1210 if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate()) 1211 ContainsNoDuplicateCall = true; 1212 1213 if (Function *F = Call.getCalledFunction()) { 1214 // When we have a concrete function, first try to simplify it directly. 1215 if (simplifyCallSite(F, Call)) 1216 return true; 1217 1218 // Next check if it is an intrinsic we know about. 1219 // FIXME: Lift this into part of the InstVisitor. 1220 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) { 1221 switch (II->getIntrinsicID()) { 1222 default: 1223 if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) 1224 disableLoadElimination(); 1225 return Base::visitCallBase(Call); 1226 1227 case Intrinsic::load_relative: 1228 // This is normally lowered to 4 LLVM instructions. 1229 Cost += 3 * InlineConstants::InstrCost; 1230 return false; 1231 1232 case Intrinsic::memset: 1233 case Intrinsic::memcpy: 1234 case Intrinsic::memmove: 1235 disableLoadElimination(); 1236 // SROA can usually chew through these intrinsics, but they aren't free. 1237 return false; 1238 case Intrinsic::icall_branch_funnel: 1239 case Intrinsic::localescape: 1240 HasUninlineableIntrinsic = true; 1241 return false; 1242 case Intrinsic::vastart: 1243 InitsVargArgs = true; 1244 return false; 1245 } 1246 } 1247 1248 if (F == Call.getFunction()) { 1249 // This flag will fully abort the analysis, so don't bother with anything 1250 // else. 1251 IsRecursiveCall = true; 1252 return false; 1253 } 1254 1255 if (TTI.isLoweredToCall(F)) { 1256 // We account for the average 1 instruction per call argument setup 1257 // here. 1258 Cost += Call.arg_size() * InlineConstants::InstrCost; 1259 1260 // Everything other than inline ASM will also have a significant cost 1261 // merely from making the call. 1262 if (!isa<InlineAsm>(Call.getCalledValue())) 1263 Cost += InlineConstants::CallPenalty; 1264 } 1265 1266 if (!Call.onlyReadsMemory()) 1267 disableLoadElimination(); 1268 return Base::visitCallBase(Call); 1269 } 1270 1271 // Otherwise we're in a very special case -- an indirect function call. See 1272 // if we can be particularly clever about this. 1273 Value *Callee = Call.getCalledValue(); 1274 1275 // First, pay the price of the argument setup. We account for the average 1276 // 1 instruction per call argument setup here. 1277 Cost += Call.arg_size() * InlineConstants::InstrCost; 1278 1279 // Next, check if this happens to be an indirect function call to a known 1280 // function in this inline context. If not, we've done all we can. 1281 Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee)); 1282 if (!F) { 1283 if (!Call.onlyReadsMemory()) 1284 disableLoadElimination(); 1285 return Base::visitCallBase(Call); 1286 } 1287 1288 // If we have a constant that we are calling as a function, we can peer 1289 // through it and see the function target. This happens not infrequently 1290 // during devirtualization and so we want to give it a hefty bonus for 1291 // inlining, but cap that bonus in the event that inlining wouldn't pan 1292 // out. Pretend to inline the function, with a custom threshold. 1293 auto IndirectCallParams = Params; 1294 IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; 1295 CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call, 1296 IndirectCallParams); 1297 if (CA.analyzeCall(Call)) { 1298 // We were able to inline the indirect call! Subtract the cost from the 1299 // threshold to get the bonus we want to apply, but don't go below zero. 1300 Cost -= std::max(0, CA.getThreshold() - CA.getCost()); 1301 } 1302 1303 if (!F->onlyReadsMemory()) 1304 disableLoadElimination(); 1305 return Base::visitCallBase(Call); 1306 } 1307 1308 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) { 1309 // At least one return instruction will be free after inlining. 1310 bool Free = !HasReturn; 1311 HasReturn = true; 1312 return Free; 1313 } 1314 1315 bool CallAnalyzer::visitBranchInst(BranchInst &BI) { 1316 // We model unconditional branches as essentially free -- they really 1317 // shouldn't exist at all, but handling them makes the behavior of the 1318 // inliner more regular and predictable. Interestingly, conditional branches 1319 // which will fold away are also free. 1320 return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) || 1321 dyn_cast_or_null<ConstantInt>( 1322 SimplifiedValues.lookup(BI.getCondition())); 1323 } 1324 1325 bool CallAnalyzer::visitSelectInst(SelectInst &SI) { 1326 bool CheckSROA = SI.getType()->isPointerTy(); 1327 Value *TrueVal = SI.getTrueValue(); 1328 Value *FalseVal = SI.getFalseValue(); 1329 1330 Constant *TrueC = dyn_cast<Constant>(TrueVal); 1331 if (!TrueC) 1332 TrueC = SimplifiedValues.lookup(TrueVal); 1333 Constant *FalseC = dyn_cast<Constant>(FalseVal); 1334 if (!FalseC) 1335 FalseC = SimplifiedValues.lookup(FalseVal); 1336 Constant *CondC = 1337 dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition())); 1338 1339 if (!CondC) { 1340 // Select C, X, X => X 1341 if (TrueC == FalseC && TrueC) { 1342 SimplifiedValues[&SI] = TrueC; 1343 return true; 1344 } 1345 1346 if (!CheckSROA) 1347 return Base::visitSelectInst(SI); 1348 1349 std::pair<Value *, APInt> TrueBaseAndOffset = 1350 ConstantOffsetPtrs.lookup(TrueVal); 1351 std::pair<Value *, APInt> FalseBaseAndOffset = 1352 ConstantOffsetPtrs.lookup(FalseVal); 1353 if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) { 1354 ConstantOffsetPtrs[&SI] = TrueBaseAndOffset; 1355 1356 Value *SROAArg; 1357 DenseMap<Value *, int>::iterator CostIt; 1358 if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt)) 1359 SROAArgValues[&SI] = SROAArg; 1360 return true; 1361 } 1362 1363 return Base::visitSelectInst(SI); 1364 } 1365 1366 // Select condition is a constant. 1367 Value *SelectedV = CondC->isAllOnesValue() 1368 ? TrueVal 1369 : (CondC->isNullValue()) ? FalseVal : nullptr; 1370 if (!SelectedV) { 1371 // Condition is a vector constant that is not all 1s or all 0s. If all 1372 // operands are constants, ConstantExpr::getSelect() can handle the cases 1373 // such as select vectors. 1374 if (TrueC && FalseC) { 1375 if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) { 1376 SimplifiedValues[&SI] = C; 1377 return true; 1378 } 1379 } 1380 return Base::visitSelectInst(SI); 1381 } 1382 1383 // Condition is either all 1s or all 0s. SI can be simplified. 1384 if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) { 1385 SimplifiedValues[&SI] = SelectedC; 1386 return true; 1387 } 1388 1389 if (!CheckSROA) 1390 return true; 1391 1392 std::pair<Value *, APInt> BaseAndOffset = 1393 ConstantOffsetPtrs.lookup(SelectedV); 1394 if (BaseAndOffset.first) { 1395 ConstantOffsetPtrs[&SI] = BaseAndOffset; 1396 1397 Value *SROAArg; 1398 DenseMap<Value *, int>::iterator CostIt; 1399 if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt)) 1400 SROAArgValues[&SI] = SROAArg; 1401 } 1402 1403 return true; 1404 } 1405 1406 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { 1407 // We model unconditional switches as free, see the comments on handling 1408 // branches. 1409 if (isa<ConstantInt>(SI.getCondition())) 1410 return true; 1411 if (Value *V = SimplifiedValues.lookup(SI.getCondition())) 1412 if (isa<ConstantInt>(V)) 1413 return true; 1414 1415 // Assume the most general case where the switch is lowered into 1416 // either a jump table, bit test, or a balanced binary tree consisting of 1417 // case clusters without merging adjacent clusters with the same 1418 // destination. We do not consider the switches that are lowered with a mix 1419 // of jump table/bit test/binary search tree. The cost of the switch is 1420 // proportional to the size of the tree or the size of jump table range. 1421 // 1422 // NB: We convert large switches which are just used to initialize large phi 1423 // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent 1424 // inlining those. It will prevent inlining in cases where the optimization 1425 // does not (yet) fire. 1426 1427 // Maximum valid cost increased in this function. 1428 int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1; 1429 1430 // Exit early for a large switch, assuming one case needs at least one 1431 // instruction. 1432 // FIXME: This is not true for a bit test, but ignore such case for now to 1433 // save compile-time. 1434 int64_t CostLowerBound = 1435 std::min((int64_t)CostUpperBound, 1436 (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); 1437 1438 if (CostLowerBound > Threshold && !ComputeFullInlineCost) { 1439 Cost = CostLowerBound; 1440 return false; 1441 } 1442 1443 unsigned JumpTableSize = 0; 1444 unsigned NumCaseCluster = 1445 TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize); 1446 1447 // If suitable for a jump table, consider the cost for the table size and 1448 // branch to destination. 1449 if (JumpTableSize) { 1450 int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost + 1451 4 * InlineConstants::InstrCost; 1452 1453 Cost = std::min((int64_t)CostUpperBound, JTCost + Cost); 1454 return false; 1455 } 1456 1457 // Considering forming a binary search, we should find the number of nodes 1458 // which is same as the number of comparisons when lowered. For a given 1459 // number of clusters, n, we can define a recursive function, f(n), to find 1460 // the number of nodes in the tree. The recursion is : 1461 // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3, 1462 // and f(n) = n, when n <= 3. 1463 // This will lead a binary tree where the leaf should be either f(2) or f(3) 1464 // when n > 3. So, the number of comparisons from leaves should be n, while 1465 // the number of non-leaf should be : 1466 // 2^(log2(n) - 1) - 1 1467 // = 2^log2(n) * 2^-1 - 1 1468 // = n / 2 - 1. 1469 // Considering comparisons from leaf and non-leaf nodes, we can estimate the 1470 // number of comparisons in a simple closed form : 1471 // n + n / 2 - 1 = n * 3 / 2 - 1 1472 if (NumCaseCluster <= 3) { 1473 // Suppose a comparison includes one compare and one conditional branch. 1474 Cost += NumCaseCluster * 2 * InlineConstants::InstrCost; 1475 return false; 1476 } 1477 1478 int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1; 1479 int64_t SwitchCost = 1480 ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; 1481 1482 Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost); 1483 return false; 1484 } 1485 1486 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) { 1487 // We never want to inline functions that contain an indirectbr. This is 1488 // incorrect because all the blockaddress's (in static global initializers 1489 // for example) would be referring to the original function, and this 1490 // indirect jump would jump from the inlined copy of the function into the 1491 // original function which is extremely undefined behavior. 1492 // FIXME: This logic isn't really right; we can safely inline functions with 1493 // indirectbr's as long as no other function or global references the 1494 // blockaddress of a block within the current function. 1495 HasIndirectBr = true; 1496 return false; 1497 } 1498 1499 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) { 1500 // FIXME: It's not clear that a single instruction is an accurate model for 1501 // the inline cost of a resume instruction. 1502 return false; 1503 } 1504 1505 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) { 1506 // FIXME: It's not clear that a single instruction is an accurate model for 1507 // the inline cost of a cleanupret instruction. 1508 return false; 1509 } 1510 1511 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) { 1512 // FIXME: It's not clear that a single instruction is an accurate model for 1513 // the inline cost of a catchret instruction. 1514 return false; 1515 } 1516 1517 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) { 1518 // FIXME: It might be reasonably to discount the cost of instructions leading 1519 // to unreachable as they have the lowest possible impact on both runtime and 1520 // code size. 1521 return true; // No actual code is needed for unreachable. 1522 } 1523 1524 bool CallAnalyzer::visitInstruction(Instruction &I) { 1525 // Some instructions are free. All of the free intrinsics can also be 1526 // handled by SROA, etc. 1527 if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I)) 1528 return true; 1529 1530 // We found something we don't understand or can't handle. Mark any SROA-able 1531 // values in the operand list as no longer viable. 1532 for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) 1533 disableSROA(*OI); 1534 1535 return false; 1536 } 1537 1538 /// Analyze a basic block for its contribution to the inline cost. 1539 /// 1540 /// This method walks the analyzer over every instruction in the given basic 1541 /// block and accounts for their cost during inlining at this callsite. It 1542 /// aborts early if the threshold has been exceeded or an impossible to inline 1543 /// construct has been detected. It returns false if inlining is no longer 1544 /// viable, and true if inlining remains viable. 1545 InlineResult 1546 CallAnalyzer::analyzeBlock(BasicBlock *BB, 1547 SmallPtrSetImpl<const Value *> &EphValues) { 1548 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { 1549 // FIXME: Currently, the number of instructions in a function regardless of 1550 // our ability to simplify them during inline to constants or dead code, 1551 // are actually used by the vector bonus heuristic. As long as that's true, 1552 // we have to special case debug intrinsics here to prevent differences in 1553 // inlining due to debug symbols. Eventually, the number of unsimplified 1554 // instructions shouldn't factor into the cost computation, but until then, 1555 // hack around it here. 1556 if (isa<DbgInfoIntrinsic>(I)) 1557 continue; 1558 1559 // Skip ephemeral values. 1560 if (EphValues.count(&*I)) 1561 continue; 1562 1563 ++NumInstructions; 1564 if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy()) 1565 ++NumVectorInstructions; 1566 1567 // If the instruction simplified to a constant, there is no cost to this 1568 // instruction. Visit the instructions using our InstVisitor to account for 1569 // all of the per-instruction logic. The visit tree returns true if we 1570 // consumed the instruction in any way, and false if the instruction's base 1571 // cost should count against inlining. 1572 if (Base::visit(&*I)) 1573 ++NumInstructionsSimplified; 1574 else 1575 Cost += InlineConstants::InstrCost; 1576 1577 using namespace ore; 1578 // If the visit this instruction detected an uninlinable pattern, abort. 1579 InlineResult IR; 1580 if (IsRecursiveCall) 1581 IR = "recursive"; 1582 else if (ExposesReturnsTwice) 1583 IR = "exposes returns twice"; 1584 else if (HasDynamicAlloca) 1585 IR = "dynamic alloca"; 1586 else if (HasIndirectBr) 1587 IR = "indirect branch"; 1588 else if (HasUninlineableIntrinsic) 1589 IR = "uninlinable intrinsic"; 1590 else if (InitsVargArgs) 1591 IR = "varargs"; 1592 if (!IR) { 1593 if (ORE) 1594 ORE->emit([&]() { 1595 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", 1596 &CandidateCall) 1597 << NV("Callee", &F) << " has uninlinable pattern (" 1598 << NV("InlineResult", IR.message) 1599 << ") and cost is not fully computed"; 1600 }); 1601 return IR; 1602 } 1603 1604 // If the caller is a recursive function then we don't want to inline 1605 // functions which allocate a lot of stack space because it would increase 1606 // the caller stack usage dramatically. 1607 if (IsCallerRecursive && 1608 AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) { 1609 InlineResult IR = "recursive and allocates too much stack space"; 1610 if (ORE) 1611 ORE->emit([&]() { 1612 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", 1613 &CandidateCall) 1614 << NV("Callee", &F) << " is " << NV("InlineResult", IR.message) 1615 << ". Cost is not fully computed"; 1616 }); 1617 return IR; 1618 } 1619 1620 // Check if we've past the maximum possible threshold so we don't spin in 1621 // huge basic blocks that will never inline. 1622 if (Cost >= Threshold && !ComputeFullInlineCost) 1623 return false; 1624 } 1625 1626 return true; 1627 } 1628 1629 /// Compute the base pointer and cumulative constant offsets for V. 1630 /// 1631 /// This strips all constant offsets off of V, leaving it the base pointer, and 1632 /// accumulates the total constant offset applied in the returned constant. It 1633 /// returns 0 if V is not a pointer, and returns the constant '0' if there are 1634 /// no constant offsets applied. 1635 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { 1636 if (!V->getType()->isPointerTy()) 1637 return nullptr; 1638 1639 unsigned AS = V->getType()->getPointerAddressSpace(); 1640 unsigned IntPtrWidth = DL.getIndexSizeInBits(AS); 1641 APInt Offset = APInt::getNullValue(IntPtrWidth); 1642 1643 // Even though we don't look through PHI nodes, we could be called on an 1644 // instruction in an unreachable block, which may be on a cycle. 1645 SmallPtrSet<Value *, 4> Visited; 1646 Visited.insert(V); 1647 do { 1648 if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { 1649 if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) 1650 return nullptr; 1651 V = GEP->getPointerOperand(); 1652 } else if (Operator::getOpcode(V) == Instruction::BitCast) { 1653 V = cast<Operator>(V)->getOperand(0); 1654 } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { 1655 if (GA->isInterposable()) 1656 break; 1657 V = GA->getAliasee(); 1658 } else { 1659 break; 1660 } 1661 assert(V->getType()->isPointerTy() && "Unexpected operand type!"); 1662 } while (Visited.insert(V).second); 1663 1664 Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS); 1665 return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset)); 1666 } 1667 1668 /// Find dead blocks due to deleted CFG edges during inlining. 1669 /// 1670 /// If we know the successor of the current block, \p CurrBB, has to be \p 1671 /// NextBB, the other successors of \p CurrBB are dead if these successors have 1672 /// no live incoming CFG edges. If one block is found to be dead, we can 1673 /// continue growing the dead block list by checking the successors of the dead 1674 /// blocks to see if all their incoming edges are dead or not. 1675 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) { 1676 auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) { 1677 // A CFG edge is dead if the predecessor is dead or the predecessor has a 1678 // known successor which is not the one under exam. 1679 return (DeadBlocks.count(Pred) || 1680 (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ)); 1681 }; 1682 1683 auto IsNewlyDead = [&](BasicBlock *BB) { 1684 // If all the edges to a block are dead, the block is also dead. 1685 return (!DeadBlocks.count(BB) && 1686 llvm::all_of(predecessors(BB), 1687 [&](BasicBlock *P) { return IsEdgeDead(P, BB); })); 1688 }; 1689 1690 for (BasicBlock *Succ : successors(CurrBB)) { 1691 if (Succ == NextBB || !IsNewlyDead(Succ)) 1692 continue; 1693 SmallVector<BasicBlock *, 4> NewDead; 1694 NewDead.push_back(Succ); 1695 while (!NewDead.empty()) { 1696 BasicBlock *Dead = NewDead.pop_back_val(); 1697 if (DeadBlocks.insert(Dead)) 1698 // Continue growing the dead block lists. 1699 for (BasicBlock *S : successors(Dead)) 1700 if (IsNewlyDead(S)) 1701 NewDead.push_back(S); 1702 } 1703 } 1704 } 1705 1706 /// Analyze a call site for potential inlining. 1707 /// 1708 /// Returns true if inlining this call is viable, and false if it is not 1709 /// viable. It computes the cost and adjusts the threshold based on numerous 1710 /// factors and heuristics. If this method returns false but the computed cost 1711 /// is below the computed threshold, then inlining was forcibly disabled by 1712 /// some artifact of the routine. 1713 InlineResult CallAnalyzer::analyzeCall(CallBase &Call) { 1714 ++NumCallsAnalyzed; 1715 1716 // Perform some tweaks to the cost and threshold based on the direct 1717 // callsite information. 1718 1719 // We want to more aggressively inline vector-dense kernels, so up the 1720 // threshold, and we'll lower it if the % of vector instructions gets too 1721 // low. Note that these bonuses are some what arbitrary and evolved over time 1722 // by accident as much as because they are principled bonuses. 1723 // 1724 // FIXME: It would be nice to remove all such bonuses. At least it would be 1725 // nice to base the bonus values on something more scientific. 1726 assert(NumInstructions == 0); 1727 assert(NumVectorInstructions == 0); 1728 1729 // Update the threshold based on callsite properties 1730 updateThreshold(Call, F); 1731 1732 // While Threshold depends on commandline options that can take negative 1733 // values, we want to enforce the invariant that the computed threshold and 1734 // bonuses are non-negative. 1735 assert(Threshold >= 0); 1736 assert(SingleBBBonus >= 0); 1737 assert(VectorBonus >= 0); 1738 1739 // Speculatively apply all possible bonuses to Threshold. If cost exceeds 1740 // this Threshold any time, and cost cannot decrease, we can stop processing 1741 // the rest of the function body. 1742 Threshold += (SingleBBBonus + VectorBonus); 1743 1744 // Give out bonuses for the callsite, as the instructions setting them up 1745 // will be gone after inlining. 1746 Cost -= getCallsiteCost(Call, DL); 1747 1748 // If this function uses the coldcc calling convention, prefer not to inline 1749 // it. 1750 if (F.getCallingConv() == CallingConv::Cold) 1751 Cost += InlineConstants::ColdccPenalty; 1752 1753 // Check if we're done. This can happen due to bonuses and penalties. 1754 if (Cost >= Threshold && !ComputeFullInlineCost) 1755 return "high cost"; 1756 1757 if (F.empty()) 1758 return true; 1759 1760 Function *Caller = Call.getFunction(); 1761 // Check if the caller function is recursive itself. 1762 for (User *U : Caller->users()) { 1763 CallBase *Call = dyn_cast<CallBase>(U); 1764 if (Call && Call->getFunction() == Caller) { 1765 IsCallerRecursive = true; 1766 break; 1767 } 1768 } 1769 1770 // Populate our simplified values by mapping from function arguments to call 1771 // arguments with known important simplifications. 1772 auto CAI = Call.arg_begin(); 1773 for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); 1774 FAI != FAE; ++FAI, ++CAI) { 1775 assert(CAI != Call.arg_end()); 1776 if (Constant *C = dyn_cast<Constant>(CAI)) 1777 SimplifiedValues[&*FAI] = C; 1778 1779 Value *PtrArg = *CAI; 1780 if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { 1781 ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue()); 1782 1783 // We can SROA any pointer arguments derived from alloca instructions. 1784 if (isa<AllocaInst>(PtrArg)) { 1785 SROAArgValues[&*FAI] = PtrArg; 1786 SROAArgCosts[PtrArg] = 0; 1787 } 1788 } 1789 } 1790 NumConstantArgs = SimplifiedValues.size(); 1791 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); 1792 NumAllocaArgs = SROAArgValues.size(); 1793 1794 // FIXME: If a caller has multiple calls to a callee, we end up recomputing 1795 // the ephemeral values multiple times (and they're completely determined by 1796 // the callee, so this is purely duplicate work). 1797 SmallPtrSet<const Value *, 32> EphValues; 1798 CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues); 1799 1800 // The worklist of live basic blocks in the callee *after* inlining. We avoid 1801 // adding basic blocks of the callee which can be proven to be dead for this 1802 // particular call site in order to get more accurate cost estimates. This 1803 // requires a somewhat heavyweight iteration pattern: we need to walk the 1804 // basic blocks in a breadth-first order as we insert live successors. To 1805 // accomplish this, prioritizing for small iterations because we exit after 1806 // crossing our threshold, we use a small-size optimized SetVector. 1807 typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>, 1808 SmallPtrSet<BasicBlock *, 16>> 1809 BBSetVector; 1810 BBSetVector BBWorklist; 1811 BBWorklist.insert(&F.getEntryBlock()); 1812 bool SingleBB = true; 1813 // Note that we *must not* cache the size, this loop grows the worklist. 1814 for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { 1815 // Bail out the moment we cross the threshold. This means we'll under-count 1816 // the cost, but only when undercounting doesn't matter. 1817 if (Cost >= Threshold && !ComputeFullInlineCost) 1818 break; 1819 1820 BasicBlock *BB = BBWorklist[Idx]; 1821 if (BB->empty()) 1822 continue; 1823 1824 // Disallow inlining a blockaddress. A blockaddress only has defined 1825 // behavior for an indirect branch in the same function, and we do not 1826 // currently support inlining indirect branches. But, the inliner may not 1827 // see an indirect branch that ends up being dead code at a particular call 1828 // site. If the blockaddress escapes the function, e.g., via a global 1829 // variable, inlining may lead to an invalid cross-function reference. 1830 if (BB->hasAddressTaken()) 1831 return "blockaddress"; 1832 1833 // Analyze the cost of this block. If we blow through the threshold, this 1834 // returns false, and we can bail on out. 1835 InlineResult IR = analyzeBlock(BB, EphValues); 1836 if (!IR) 1837 return IR; 1838 1839 Instruction *TI = BB->getTerminator(); 1840 1841 // Add in the live successors by first checking whether we have terminator 1842 // that may be simplified based on the values simplified by this call. 1843 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 1844 if (BI->isConditional()) { 1845 Value *Cond = BI->getCondition(); 1846 if (ConstantInt *SimpleCond = 1847 dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { 1848 BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0); 1849 BBWorklist.insert(NextBB); 1850 KnownSuccessors[BB] = NextBB; 1851 findDeadBlocks(BB, NextBB); 1852 continue; 1853 } 1854 } 1855 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { 1856 Value *Cond = SI->getCondition(); 1857 if (ConstantInt *SimpleCond = 1858 dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { 1859 BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor(); 1860 BBWorklist.insert(NextBB); 1861 KnownSuccessors[BB] = NextBB; 1862 findDeadBlocks(BB, NextBB); 1863 continue; 1864 } 1865 } 1866 1867 // If we're unable to select a particular successor, just count all of 1868 // them. 1869 for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; 1870 ++TIdx) 1871 BBWorklist.insert(TI->getSuccessor(TIdx)); 1872 1873 // If we had any successors at this point, than post-inlining is likely to 1874 // have them as well. Note that we assume any basic blocks which existed 1875 // due to branches or switches which folded above will also fold after 1876 // inlining. 1877 if (SingleBB && TI->getNumSuccessors() > 1) { 1878 // Take off the bonus we applied to the threshold. 1879 Threshold -= SingleBBBonus; 1880 SingleBB = false; 1881 } 1882 } 1883 1884 bool OnlyOneCallAndLocalLinkage = 1885 F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction(); 1886 // If this is a noduplicate call, we can still inline as long as 1887 // inlining this would cause the removal of the caller (so the instruction 1888 // is not actually duplicated, just moved). 1889 if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) 1890 return "noduplicate"; 1891 1892 // Loops generally act a lot like calls in that they act like barriers to 1893 // movement, require a certain amount of setup, etc. So when optimising for 1894 // size, we penalise any call sites that perform loops. We do this after all 1895 // other costs here, so will likely only be dealing with relatively small 1896 // functions (and hence DT and LI will hopefully be cheap). 1897 if (Caller->hasMinSize()) { 1898 DominatorTree DT(F); 1899 LoopInfo LI(DT); 1900 int NumLoops = 0; 1901 for (Loop *L : LI) { 1902 // Ignore loops that will not be executed 1903 if (DeadBlocks.count(L->getHeader())) 1904 continue; 1905 NumLoops++; 1906 } 1907 Cost += NumLoops * InlineConstants::CallPenalty; 1908 } 1909 1910 // We applied the maximum possible vector bonus at the beginning. Now, 1911 // subtract the excess bonus, if any, from the Threshold before 1912 // comparing against Cost. 1913 if (NumVectorInstructions <= NumInstructions / 10) 1914 Threshold -= VectorBonus; 1915 else if (NumVectorInstructions <= NumInstructions / 2) 1916 Threshold -= VectorBonus/2; 1917 1918 return Cost < std::max(1, Threshold); 1919 } 1920 1921 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1922 /// Dump stats about this call's analysis. 1923 LLVM_DUMP_METHOD void CallAnalyzer::dump() { 1924 #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n" 1925 DEBUG_PRINT_STAT(NumConstantArgs); 1926 DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); 1927 DEBUG_PRINT_STAT(NumAllocaArgs); 1928 DEBUG_PRINT_STAT(NumConstantPtrCmps); 1929 DEBUG_PRINT_STAT(NumConstantPtrDiffs); 1930 DEBUG_PRINT_STAT(NumInstructionsSimplified); 1931 DEBUG_PRINT_STAT(NumInstructions); 1932 DEBUG_PRINT_STAT(SROACostSavings); 1933 DEBUG_PRINT_STAT(SROACostSavingsLost); 1934 DEBUG_PRINT_STAT(LoadEliminationCost); 1935 DEBUG_PRINT_STAT(ContainsNoDuplicateCall); 1936 DEBUG_PRINT_STAT(Cost); 1937 DEBUG_PRINT_STAT(Threshold); 1938 #undef DEBUG_PRINT_STAT 1939 } 1940 #endif 1941 1942 /// Test that there are no attribute conflicts between Caller and Callee 1943 /// that prevent inlining. 1944 static bool functionsHaveCompatibleAttributes(Function *Caller, 1945 Function *Callee, 1946 TargetTransformInfo &TTI) { 1947 return TTI.areInlineCompatible(Caller, Callee) && 1948 AttributeFuncs::areInlineCompatible(*Caller, *Callee); 1949 } 1950 1951 int llvm::getCallsiteCost(CallBase &Call, const DataLayout &DL) { 1952 int Cost = 0; 1953 for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) { 1954 if (Call.isByValArgument(I)) { 1955 // We approximate the number of loads and stores needed by dividing the 1956 // size of the byval type by the target's pointer size. 1957 PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType()); 1958 unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType()); 1959 unsigned AS = PTy->getAddressSpace(); 1960 unsigned PointerSize = DL.getPointerSizeInBits(AS); 1961 // Ceiling division. 1962 unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; 1963 1964 // If it generates more than 8 stores it is likely to be expanded as an 1965 // inline memcpy so we take that as an upper bound. Otherwise we assume 1966 // one load and one store per word copied. 1967 // FIXME: The maxStoresPerMemcpy setting from the target should be used 1968 // here instead of a magic number of 8, but it's not available via 1969 // DataLayout. 1970 NumStores = std::min(NumStores, 8U); 1971 1972 Cost += 2 * NumStores * InlineConstants::InstrCost; 1973 } else { 1974 // For non-byval arguments subtract off one instruction per call 1975 // argument. 1976 Cost += InlineConstants::InstrCost; 1977 } 1978 } 1979 // The call instruction also disappears after inlining. 1980 Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty; 1981 return Cost; 1982 } 1983 1984 InlineCost llvm::getInlineCost( 1985 CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, 1986 std::function<AssumptionCache &(Function &)> &GetAssumptionCache, 1987 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, 1988 ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { 1989 return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI, 1990 GetAssumptionCache, GetBFI, PSI, ORE); 1991 } 1992 1993 InlineCost llvm::getInlineCost( 1994 CallBase &Call, Function *Callee, const InlineParams &Params, 1995 TargetTransformInfo &CalleeTTI, 1996 std::function<AssumptionCache &(Function &)> &GetAssumptionCache, 1997 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, 1998 ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { 1999 2000 // Cannot inline indirect calls. 2001 if (!Callee) 2002 return llvm::InlineCost::getNever("indirect call"); 2003 2004 // Never inline calls with byval arguments that does not have the alloca 2005 // address space. Since byval arguments can be replaced with a copy to an 2006 // alloca, the inlined code would need to be adjusted to handle that the 2007 // argument is in the alloca address space (so it is a little bit complicated 2008 // to solve). 2009 unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace(); 2010 for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) 2011 if (Call.isByValArgument(I)) { 2012 PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType()); 2013 if (PTy->getAddressSpace() != AllocaAS) 2014 return llvm::InlineCost::getNever("byval arguments without alloca" 2015 " address space"); 2016 } 2017 2018 // Calls to functions with always-inline attributes should be inlined 2019 // whenever possible. 2020 if (Call.hasFnAttr(Attribute::AlwaysInline)) { 2021 auto IsViable = isInlineViable(*Callee); 2022 if (IsViable) 2023 return llvm::InlineCost::getAlways("always inline attribute"); 2024 return llvm::InlineCost::getNever(IsViable.message); 2025 } 2026 2027 // Never inline functions with conflicting attributes (unless callee has 2028 // always-inline attribute). 2029 Function *Caller = Call.getCaller(); 2030 if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI)) 2031 return llvm::InlineCost::getNever("conflicting attributes"); 2032 2033 // Don't inline this call if the caller has the optnone attribute. 2034 if (Caller->hasOptNone()) 2035 return llvm::InlineCost::getNever("optnone attribute"); 2036 2037 // Don't inline a function that treats null pointer as valid into a caller 2038 // that does not have this attribute. 2039 if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined()) 2040 return llvm::InlineCost::getNever("nullptr definitions incompatible"); 2041 2042 // Don't inline functions which can be interposed at link-time. 2043 if (Callee->isInterposable()) 2044 return llvm::InlineCost::getNever("interposable"); 2045 2046 // Don't inline functions marked noinline. 2047 if (Callee->hasFnAttribute(Attribute::NoInline)) 2048 return llvm::InlineCost::getNever("noinline function attribute"); 2049 2050 // Don't inline call sites marked noinline. 2051 if (Call.isNoInline()) 2052 return llvm::InlineCost::getNever("noinline call site attribute"); 2053 2054 LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() 2055 << "... (caller:" << Caller->getName() << ")\n"); 2056 2057 CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, 2058 Call, Params); 2059 InlineResult ShouldInline = CA.analyzeCall(Call); 2060 2061 LLVM_DEBUG(CA.dump()); 2062 2063 // Check if there was a reason to force inlining or no inlining. 2064 if (!ShouldInline && CA.getCost() < CA.getThreshold()) 2065 return InlineCost::getNever(ShouldInline.message); 2066 if (ShouldInline && CA.getCost() >= CA.getThreshold()) 2067 return InlineCost::getAlways("empty function"); 2068 2069 return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); 2070 } 2071 2072 InlineResult llvm::isInlineViable(Function &F) { 2073 bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice); 2074 for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { 2075 // Disallow inlining of functions which contain indirect branches or 2076 // blockaddresses. 2077 if (isa<IndirectBrInst>(BI->getTerminator())) 2078 return "contains indirect branches"; 2079 2080 if (BI->hasAddressTaken()) 2081 return "uses block address"; 2082 2083 for (auto &II : *BI) { 2084 CallBase *Call = dyn_cast<CallBase>(&II); 2085 if (!Call) 2086 continue; 2087 2088 // Disallow recursive calls. 2089 if (&F == Call->getCalledFunction()) 2090 return "recursive call"; 2091 2092 // Disallow calls which expose returns-twice to a function not previously 2093 // attributed as such. 2094 if (!ReturnsTwice && isa<CallInst>(Call) && 2095 cast<CallInst>(Call)->canReturnTwice()) 2096 return "exposes returns-twice attribute"; 2097 2098 if (Call->getCalledFunction()) 2099 switch (Call->getCalledFunction()->getIntrinsicID()) { 2100 default: 2101 break; 2102 // Disallow inlining of @llvm.icall.branch.funnel because current 2103 // backend can't separate call targets from call arguments. 2104 case llvm::Intrinsic::icall_branch_funnel: 2105 return "disallowed inlining of @llvm.icall.branch.funnel"; 2106 // Disallow inlining functions that call @llvm.localescape. Doing this 2107 // correctly would require major changes to the inliner. 2108 case llvm::Intrinsic::localescape: 2109 return "disallowed inlining of @llvm.localescape"; 2110 // Disallow inlining of functions that initialize VarArgs with va_start. 2111 case llvm::Intrinsic::vastart: 2112 return "contains VarArgs initialized with va_start"; 2113 } 2114 } 2115 } 2116 2117 return true; 2118 } 2119 2120 // APIs to create InlineParams based on command line flags and/or other 2121 // parameters. 2122 2123 InlineParams llvm::getInlineParams(int Threshold) { 2124 InlineParams Params; 2125 2126 // This field is the threshold to use for a callee by default. This is 2127 // derived from one or more of: 2128 // * optimization or size-optimization levels, 2129 // * a value passed to createFunctionInliningPass function, or 2130 // * the -inline-threshold flag. 2131 // If the -inline-threshold flag is explicitly specified, that is used 2132 // irrespective of anything else. 2133 if (InlineThreshold.getNumOccurrences() > 0) 2134 Params.DefaultThreshold = InlineThreshold; 2135 else 2136 Params.DefaultThreshold = Threshold; 2137 2138 // Set the HintThreshold knob from the -inlinehint-threshold. 2139 Params.HintThreshold = HintThreshold; 2140 2141 // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold. 2142 Params.HotCallSiteThreshold = HotCallSiteThreshold; 2143 2144 // If the -locally-hot-callsite-threshold is explicitly specified, use it to 2145 // populate LocallyHotCallSiteThreshold. Later, we populate 2146 // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if 2147 // we know that optimization level is O3 (in the getInlineParams variant that 2148 // takes the opt and size levels). 2149 // FIXME: Remove this check (and make the assignment unconditional) after 2150 // addressing size regression issues at O2. 2151 if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0) 2152 Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold; 2153 2154 // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold. 2155 Params.ColdCallSiteThreshold = ColdCallSiteThreshold; 2156 2157 // Set the OptMinSizeThreshold and OptSizeThreshold params only if the 2158 // -inlinehint-threshold commandline option is not explicitly given. If that 2159 // option is present, then its value applies even for callees with size and 2160 // minsize attributes. 2161 // If the -inline-threshold is not specified, set the ColdThreshold from the 2162 // -inlinecold-threshold even if it is not explicitly passed. If 2163 // -inline-threshold is specified, then -inlinecold-threshold needs to be 2164 // explicitly specified to set the ColdThreshold knob 2165 if (InlineThreshold.getNumOccurrences() == 0) { 2166 Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold; 2167 Params.OptSizeThreshold = InlineConstants::OptSizeThreshold; 2168 Params.ColdThreshold = ColdThreshold; 2169 } else if (ColdThreshold.getNumOccurrences() > 0) { 2170 Params.ColdThreshold = ColdThreshold; 2171 } 2172 return Params; 2173 } 2174 2175 InlineParams llvm::getInlineParams() { 2176 return getInlineParams(InlineThreshold); 2177 } 2178 2179 // Compute the default threshold for inlining based on the opt level and the 2180 // size opt level. 2181 static int computeThresholdFromOptLevels(unsigned OptLevel, 2182 unsigned SizeOptLevel) { 2183 if (OptLevel > 2) 2184 return InlineConstants::OptAggressiveThreshold; 2185 if (SizeOptLevel == 1) // -Os 2186 return InlineConstants::OptSizeThreshold; 2187 if (SizeOptLevel == 2) // -Oz 2188 return InlineConstants::OptMinSizeThreshold; 2189 return InlineThreshold; 2190 } 2191 2192 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) { 2193 auto Params = 2194 getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel)); 2195 // At O3, use the value of -locally-hot-callsite-threshold option to populate 2196 // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only 2197 // when it is specified explicitly. 2198 if (OptLevel > 2) 2199 Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold; 2200 return Params; 2201 } 2202