1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements inline cost analysis. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/InlineCost.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/ADT/SetVector.h" 17 #include "llvm/ADT/SmallPtrSet.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/Analysis/AssumptionCache.h" 21 #include "llvm/Analysis/BlockFrequencyInfo.h" 22 #include "llvm/Analysis/CodeMetrics.h" 23 #include "llvm/Analysis/ConstantFolding.h" 24 #include "llvm/Analysis/CFG.h" 25 #include "llvm/Analysis/InstructionSimplify.h" 26 #include "llvm/Analysis/LoopInfo.h" 27 #include "llvm/Analysis/ProfileSummaryInfo.h" 28 #include "llvm/Analysis/TargetTransformInfo.h" 29 #include "llvm/Analysis/ValueTracking.h" 30 #include "llvm/Config/llvm-config.h" 31 #include "llvm/IR/CallSite.h" 32 #include "llvm/IR/CallingConv.h" 33 #include "llvm/IR/DataLayout.h" 34 #include "llvm/IR/Dominators.h" 35 #include "llvm/IR/GetElementPtrTypeIterator.h" 36 #include "llvm/IR/GlobalAlias.h" 37 #include "llvm/IR/InstVisitor.h" 38 #include "llvm/IR/IntrinsicInst.h" 39 #include "llvm/IR/Operator.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/raw_ostream.h" 42 43 using namespace llvm; 44 45 #define DEBUG_TYPE "inline-cost" 46 47 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); 48 49 static cl::opt<int> InlineThreshold( 50 "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, 51 cl::desc("Control the amount of inlining to perform (default = 225)")); 52 53 static cl::opt<int> HintThreshold( 54 "inlinehint-threshold", cl::Hidden, cl::init(325), 55 cl::desc("Threshold for inlining functions with inline hint")); 56 57 static cl::opt<int> 58 ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, 59 cl::init(45), 60 cl::desc("Threshold for inlining cold callsites")); 61 62 // We introduce this threshold to help performance of instrumentation based 63 // PGO before we actually hook up inliner with analysis passes such as BPI and 64 // BFI. 65 static cl::opt<int> ColdThreshold( 66 "inlinecold-threshold", cl::Hidden, cl::init(45), 67 cl::desc("Threshold for inlining functions with cold attribute")); 68 69 static cl::opt<int> 70 HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), 71 cl::ZeroOrMore, 72 cl::desc("Threshold for hot callsites ")); 73 74 static cl::opt<int> LocallyHotCallSiteThreshold( 75 "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, 76 cl::desc("Threshold for locally hot callsites ")); 77 78 static cl::opt<int> ColdCallSiteRelFreq( 79 "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, 80 cl::desc("Maxmimum block frequency, expressed as a percentage of caller's " 81 "entry frequency, for a callsite to be cold in the absence of " 82 "profile information.")); 83 84 static cl::opt<int> HotCallSiteRelFreq( 85 "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, 86 cl::desc("Minimum block frequency, expressed as a multiple of caller's " 87 "entry frequency, for a callsite to be hot in the absence of " 88 "profile information.")); 89 90 static cl::opt<bool> OptComputeFullInlineCost( 91 "inline-cost-full", cl::Hidden, cl::init(false), 92 cl::desc("Compute the full inline cost of a call site even when the cost " 93 "exceeds the threshold.")); 94 95 namespace { 96 97 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { 98 typedef InstVisitor<CallAnalyzer, bool> Base; 99 friend class InstVisitor<CallAnalyzer, bool>; 100 101 /// The TargetTransformInfo available for this compilation. 102 const TargetTransformInfo &TTI; 103 104 /// Getter for the cache of @llvm.assume intrinsics. 105 std::function<AssumptionCache &(Function &)> &GetAssumptionCache; 106 107 /// Getter for BlockFrequencyInfo 108 Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI; 109 110 /// Profile summary information. 111 ProfileSummaryInfo *PSI; 112 113 /// The called function. 114 Function &F; 115 116 // Cache the DataLayout since we use it a lot. 117 const DataLayout &DL; 118 119 /// The OptimizationRemarkEmitter available for this compilation. 120 OptimizationRemarkEmitter *ORE; 121 122 /// The candidate callsite being analyzed. Please do not use this to do 123 /// analysis in the caller function; we want the inline cost query to be 124 /// easily cacheable. Instead, use the cover function paramHasAttr. 125 CallSite CandidateCS; 126 127 /// Tunable parameters that control the analysis. 128 const InlineParams &Params; 129 130 int Threshold; 131 int Cost; 132 bool ComputeFullInlineCost; 133 134 bool IsCallerRecursive; 135 bool IsRecursiveCall; 136 bool ExposesReturnsTwice; 137 bool HasDynamicAlloca; 138 bool ContainsNoDuplicateCall; 139 bool HasReturn; 140 bool HasIndirectBr; 141 bool HasUninlineableIntrinsic; 142 bool InitsVargArgs; 143 144 /// Number of bytes allocated statically by the callee. 145 uint64_t AllocatedSize; 146 unsigned NumInstructions, NumVectorInstructions; 147 int VectorBonus, TenPercentVectorBonus; 148 // Bonus to be applied when the callee has only one reachable basic block. 149 int SingleBBBonus; 150 151 /// While we walk the potentially-inlined instructions, we build up and 152 /// maintain a mapping of simplified values specific to this callsite. The 153 /// idea is to propagate any special information we have about arguments to 154 /// this call through the inlinable section of the function, and account for 155 /// likely simplifications post-inlining. The most important aspect we track 156 /// is CFG altering simplifications -- when we prove a basic block dead, that 157 /// can cause dramatic shifts in the cost of inlining a function. 158 DenseMap<Value *, Constant *> SimplifiedValues; 159 160 /// Keep track of the values which map back (through function arguments) to 161 /// allocas on the caller stack which could be simplified through SROA. 162 DenseMap<Value *, Value *> SROAArgValues; 163 164 /// The mapping of caller Alloca values to their accumulated cost savings. If 165 /// we have to disable SROA for one of the allocas, this tells us how much 166 /// cost must be added. 167 DenseMap<Value *, int> SROAArgCosts; 168 169 /// Keep track of values which map to a pointer base and constant offset. 170 DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs; 171 172 /// Keep track of dead blocks due to the constant arguments. 173 SetVector<BasicBlock *> DeadBlocks; 174 175 /// The mapping of the blocks to their known unique successors due to the 176 /// constant arguments. 177 DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors; 178 179 /// Model the elimination of repeated loads that is expected to happen 180 /// whenever we simplify away the stores that would otherwise cause them to be 181 /// loads. 182 bool EnableLoadElimination; 183 SmallPtrSet<Value *, 16> LoadAddrSet; 184 int LoadEliminationCost; 185 186 // Custom simplification helper routines. 187 bool isAllocaDerivedArg(Value *V); 188 bool lookupSROAArgAndCost(Value *V, Value *&Arg, 189 DenseMap<Value *, int>::iterator &CostIt); 190 void disableSROA(DenseMap<Value *, int>::iterator CostIt); 191 void disableSROA(Value *V); 192 void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB); 193 void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, 194 int InstructionCost); 195 void disableLoadElimination(); 196 bool isGEPFree(GetElementPtrInst &GEP); 197 bool canFoldInboundsGEP(GetElementPtrInst &I); 198 bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); 199 bool simplifyCallSite(Function *F, CallSite CS); 200 template <typename Callable> 201 bool simplifyInstruction(Instruction &I, Callable Evaluate); 202 ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); 203 204 /// Return true if the given argument to the function being considered for 205 /// inlining has the given attribute set either at the call site or the 206 /// function declaration. Primarily used to inspect call site specific 207 /// attributes since these can be more precise than the ones on the callee 208 /// itself. 209 bool paramHasAttr(Argument *A, Attribute::AttrKind Attr); 210 211 /// Return true if the given value is known non null within the callee if 212 /// inlined through this particular callsite. 213 bool isKnownNonNullInCallee(Value *V); 214 215 /// Update Threshold based on callsite properties such as callee 216 /// attributes and callee hotness for PGO builds. The Callee is explicitly 217 /// passed to support analyzing indirect calls whose target is inferred by 218 /// analysis. 219 void updateThreshold(CallSite CS, Function &Callee); 220 221 /// Return true if size growth is allowed when inlining the callee at CS. 222 bool allowSizeGrowth(CallSite CS); 223 224 /// Return true if \p CS is a cold callsite. 225 bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI); 226 227 /// Return a higher threshold if \p CS is a hot callsite. 228 Optional<int> getHotCallSiteThreshold(CallSite CS, 229 BlockFrequencyInfo *CallerBFI); 230 231 // Custom analysis routines. 232 InlineResult analyzeBlock(BasicBlock *BB, 233 SmallPtrSetImpl<const Value *> &EphValues); 234 235 // Disable several entry points to the visitor so we don't accidentally use 236 // them by declaring but not defining them here. 237 void visit(Module *); 238 void visit(Module &); 239 void visit(Function *); 240 void visit(Function &); 241 void visit(BasicBlock *); 242 void visit(BasicBlock &); 243 244 // Provide base case for our instruction visit. 245 bool visitInstruction(Instruction &I); 246 247 // Our visit overrides. 248 bool visitAlloca(AllocaInst &I); 249 bool visitPHI(PHINode &I); 250 bool visitGetElementPtr(GetElementPtrInst &I); 251 bool visitBitCast(BitCastInst &I); 252 bool visitPtrToInt(PtrToIntInst &I); 253 bool visitIntToPtr(IntToPtrInst &I); 254 bool visitCastInst(CastInst &I); 255 bool visitUnaryInstruction(UnaryInstruction &I); 256 bool visitCmpInst(CmpInst &I); 257 bool visitSub(BinaryOperator &I); 258 bool visitBinaryOperator(BinaryOperator &I); 259 bool visitLoad(LoadInst &I); 260 bool visitStore(StoreInst &I); 261 bool visitExtractValue(ExtractValueInst &I); 262 bool visitInsertValue(InsertValueInst &I); 263 bool visitCallSite(CallSite CS); 264 bool visitReturnInst(ReturnInst &RI); 265 bool visitBranchInst(BranchInst &BI); 266 bool visitSelectInst(SelectInst &SI); 267 bool visitSwitchInst(SwitchInst &SI); 268 bool visitIndirectBrInst(IndirectBrInst &IBI); 269 bool visitResumeInst(ResumeInst &RI); 270 bool visitCleanupReturnInst(CleanupReturnInst &RI); 271 bool visitCatchReturnInst(CatchReturnInst &RI); 272 bool visitUnreachableInst(UnreachableInst &I); 273 274 public: 275 CallAnalyzer(const TargetTransformInfo &TTI, 276 std::function<AssumptionCache &(Function &)> &GetAssumptionCache, 277 Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI, 278 ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, 279 Function &Callee, CallSite CSArg, const InlineParams &Params) 280 : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), 281 PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), 282 CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold), 283 Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost || 284 Params.ComputeFullInlineCost || ORE), 285 IsCallerRecursive(false), IsRecursiveCall(false), 286 ExposesReturnsTwice(false), HasDynamicAlloca(false), 287 ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), 288 HasUninlineableIntrinsic(false), InitsVargArgs(false), AllocatedSize(0), 289 NumInstructions(0), NumVectorInstructions(0), VectorBonus(0), 290 SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0), 291 NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), 292 NumConstantPtrCmps(0), NumConstantPtrDiffs(0), 293 NumInstructionsSimplified(0), SROACostSavings(0), 294 SROACostSavingsLost(0) {} 295 296 InlineResult analyzeCall(CallSite CS); 297 298 int getThreshold() { return Threshold; } 299 int getCost() { return Cost; } 300 301 // Keep a bunch of stats about the cost savings found so we can print them 302 // out when debugging. 303 unsigned NumConstantArgs; 304 unsigned NumConstantOffsetPtrArgs; 305 unsigned NumAllocaArgs; 306 unsigned NumConstantPtrCmps; 307 unsigned NumConstantPtrDiffs; 308 unsigned NumInstructionsSimplified; 309 unsigned SROACostSavings; 310 unsigned SROACostSavingsLost; 311 312 void dump(); 313 }; 314 315 } // namespace 316 317 /// Test whether the given value is an Alloca-derived function argument. 318 bool CallAnalyzer::isAllocaDerivedArg(Value *V) { 319 return SROAArgValues.count(V); 320 } 321 322 /// Lookup the SROA-candidate argument and cost iterator which V maps to. 323 /// Returns false if V does not map to a SROA-candidate. 324 bool CallAnalyzer::lookupSROAArgAndCost( 325 Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) { 326 if (SROAArgValues.empty() || SROAArgCosts.empty()) 327 return false; 328 329 DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V); 330 if (ArgIt == SROAArgValues.end()) 331 return false; 332 333 Arg = ArgIt->second; 334 CostIt = SROAArgCosts.find(Arg); 335 return CostIt != SROAArgCosts.end(); 336 } 337 338 /// Disable SROA for the candidate marked by this cost iterator. 339 /// 340 /// This marks the candidate as no longer viable for SROA, and adds the cost 341 /// savings associated with it back into the inline cost measurement. 342 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) { 343 // If we're no longer able to perform SROA we need to undo its cost savings 344 // and prevent subsequent analysis. 345 Cost += CostIt->second; 346 SROACostSavings -= CostIt->second; 347 SROACostSavingsLost += CostIt->second; 348 SROAArgCosts.erase(CostIt); 349 disableLoadElimination(); 350 } 351 352 /// If 'V' maps to a SROA candidate, disable SROA for it. 353 void CallAnalyzer::disableSROA(Value *V) { 354 Value *SROAArg; 355 DenseMap<Value *, int>::iterator CostIt; 356 if (lookupSROAArgAndCost(V, SROAArg, CostIt)) 357 disableSROA(CostIt); 358 } 359 360 /// Accumulate the given cost for a particular SROA candidate. 361 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, 362 int InstructionCost) { 363 CostIt->second += InstructionCost; 364 SROACostSavings += InstructionCost; 365 } 366 367 void CallAnalyzer::disableLoadElimination() { 368 if (EnableLoadElimination) { 369 Cost += LoadEliminationCost; 370 LoadEliminationCost = 0; 371 EnableLoadElimination = false; 372 } 373 } 374 375 /// Accumulate a constant GEP offset into an APInt if possible. 376 /// 377 /// Returns false if unable to compute the offset for any reason. Respects any 378 /// simplified values known during the analysis of this callsite. 379 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { 380 unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType()); 381 assert(IntPtrWidth == Offset.getBitWidth()); 382 383 for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); 384 GTI != GTE; ++GTI) { 385 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); 386 if (!OpC) 387 if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) 388 OpC = dyn_cast<ConstantInt>(SimpleOp); 389 if (!OpC) 390 return false; 391 if (OpC->isZero()) 392 continue; 393 394 // Handle a struct index, which adds its field offset to the pointer. 395 if (StructType *STy = GTI.getStructTypeOrNull()) { 396 unsigned ElementIdx = OpC->getZExtValue(); 397 const StructLayout *SL = DL.getStructLayout(STy); 398 Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); 399 continue; 400 } 401 402 APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType())); 403 Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; 404 } 405 return true; 406 } 407 408 /// Use TTI to check whether a GEP is free. 409 /// 410 /// Respects any simplified values known during the analysis of this callsite. 411 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) { 412 SmallVector<Value *, 4> Operands; 413 Operands.push_back(GEP.getOperand(0)); 414 for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) 415 if (Constant *SimpleOp = SimplifiedValues.lookup(*I)) 416 Operands.push_back(SimpleOp); 417 else 418 Operands.push_back(*I); 419 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands); 420 } 421 422 bool CallAnalyzer::visitAlloca(AllocaInst &I) { 423 // Check whether inlining will turn a dynamic alloca into a static 424 // alloca and handle that case. 425 if (I.isArrayAllocation()) { 426 Constant *Size = SimplifiedValues.lookup(I.getArraySize()); 427 if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) { 428 Type *Ty = I.getAllocatedType(); 429 AllocatedSize = SaturatingMultiplyAdd( 430 AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize); 431 return Base::visitAlloca(I); 432 } 433 } 434 435 // Accumulate the allocated size. 436 if (I.isStaticAlloca()) { 437 Type *Ty = I.getAllocatedType(); 438 AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize); 439 } 440 441 // We will happily inline static alloca instructions. 442 if (I.isStaticAlloca()) 443 return Base::visitAlloca(I); 444 445 // FIXME: This is overly conservative. Dynamic allocas are inefficient for 446 // a variety of reasons, and so we would like to not inline them into 447 // functions which don't currently have a dynamic alloca. This simply 448 // disables inlining altogether in the presence of a dynamic alloca. 449 HasDynamicAlloca = true; 450 return false; 451 } 452 453 bool CallAnalyzer::visitPHI(PHINode &I) { 454 // FIXME: We need to propagate SROA *disabling* through phi nodes, even 455 // though we don't want to propagate it's bonuses. The idea is to disable 456 // SROA if it *might* be used in an inappropriate manner. 457 458 // Phi nodes are always zero-cost. 459 // FIXME: Pointer sizes may differ between different address spaces, so do we 460 // need to use correct address space in the call to getPointerSizeInBits here? 461 // Or could we skip the getPointerSizeInBits call completely? As far as I can 462 // see the ZeroOffset is used as a dummy value, so we can probably use any 463 // bit width for the ZeroOffset? 464 APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0)); 465 bool CheckSROA = I.getType()->isPointerTy(); 466 467 // Track the constant or pointer with constant offset we've seen so far. 468 Constant *FirstC = nullptr; 469 std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset}; 470 Value *FirstV = nullptr; 471 472 for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) { 473 BasicBlock *Pred = I.getIncomingBlock(i); 474 // If the incoming block is dead, skip the incoming block. 475 if (DeadBlocks.count(Pred)) 476 continue; 477 // If the parent block of phi is not the known successor of the incoming 478 // block, skip the incoming block. 479 BasicBlock *KnownSuccessor = KnownSuccessors[Pred]; 480 if (KnownSuccessor && KnownSuccessor != I.getParent()) 481 continue; 482 483 Value *V = I.getIncomingValue(i); 484 // If the incoming value is this phi itself, skip the incoming value. 485 if (&I == V) 486 continue; 487 488 Constant *C = dyn_cast<Constant>(V); 489 if (!C) 490 C = SimplifiedValues.lookup(V); 491 492 std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset}; 493 if (!C && CheckSROA) 494 BaseAndOffset = ConstantOffsetPtrs.lookup(V); 495 496 if (!C && !BaseAndOffset.first) 497 // The incoming value is neither a constant nor a pointer with constant 498 // offset, exit early. 499 return true; 500 501 if (FirstC) { 502 if (FirstC == C) 503 // If we've seen a constant incoming value before and it is the same 504 // constant we see this time, continue checking the next incoming value. 505 continue; 506 // Otherwise early exit because we either see a different constant or saw 507 // a constant before but we have a pointer with constant offset this time. 508 return true; 509 } 510 511 if (FirstV) { 512 // The same logic as above, but check pointer with constant offset here. 513 if (FirstBaseAndOffset == BaseAndOffset) 514 continue; 515 return true; 516 } 517 518 if (C) { 519 // This is the 1st time we've seen a constant, record it. 520 FirstC = C; 521 continue; 522 } 523 524 // The remaining case is that this is the 1st time we've seen a pointer with 525 // constant offset, record it. 526 FirstV = V; 527 FirstBaseAndOffset = BaseAndOffset; 528 } 529 530 // Check if we can map phi to a constant. 531 if (FirstC) { 532 SimplifiedValues[&I] = FirstC; 533 return true; 534 } 535 536 // Check if we can map phi to a pointer with constant offset. 537 if (FirstBaseAndOffset.first) { 538 ConstantOffsetPtrs[&I] = FirstBaseAndOffset; 539 540 Value *SROAArg; 541 DenseMap<Value *, int>::iterator CostIt; 542 if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt)) 543 SROAArgValues[&I] = SROAArg; 544 } 545 546 return true; 547 } 548 549 /// Check we can fold GEPs of constant-offset call site argument pointers. 550 /// This requires target data and inbounds GEPs. 551 /// 552 /// \return true if the specified GEP can be folded. 553 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) { 554 // Check if we have a base + offset for the pointer. 555 std::pair<Value *, APInt> BaseAndOffset = 556 ConstantOffsetPtrs.lookup(I.getPointerOperand()); 557 if (!BaseAndOffset.first) 558 return false; 559 560 // Check if the offset of this GEP is constant, and if so accumulate it 561 // into Offset. 562 if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) 563 return false; 564 565 // Add the result as a new mapping to Base + Offset. 566 ConstantOffsetPtrs[&I] = BaseAndOffset; 567 568 return true; 569 } 570 571 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { 572 Value *SROAArg; 573 DenseMap<Value *, int>::iterator CostIt; 574 bool SROACandidate = 575 lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt); 576 577 // Lambda to check whether a GEP's indices are all constant. 578 auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) { 579 for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) 580 if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I)) 581 return false; 582 return true; 583 }; 584 585 if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) { 586 if (SROACandidate) 587 SROAArgValues[&I] = SROAArg; 588 589 // Constant GEPs are modeled as free. 590 return true; 591 } 592 593 // Variable GEPs will require math and will disable SROA. 594 if (SROACandidate) 595 disableSROA(CostIt); 596 return isGEPFree(I); 597 } 598 599 /// Simplify \p I if its operands are constants and update SimplifiedValues. 600 /// \p Evaluate is a callable specific to instruction type that evaluates the 601 /// instruction when all the operands are constants. 602 template <typename Callable> 603 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) { 604 SmallVector<Constant *, 2> COps; 605 for (Value *Op : I.operands()) { 606 Constant *COp = dyn_cast<Constant>(Op); 607 if (!COp) 608 COp = SimplifiedValues.lookup(Op); 609 if (!COp) 610 return false; 611 COps.push_back(COp); 612 } 613 auto *C = Evaluate(COps); 614 if (!C) 615 return false; 616 SimplifiedValues[&I] = C; 617 return true; 618 } 619 620 bool CallAnalyzer::visitBitCast(BitCastInst &I) { 621 // Propagate constants through bitcasts. 622 if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { 623 return ConstantExpr::getBitCast(COps[0], I.getType()); 624 })) 625 return true; 626 627 // Track base/offsets through casts 628 std::pair<Value *, APInt> BaseAndOffset = 629 ConstantOffsetPtrs.lookup(I.getOperand(0)); 630 // Casts don't change the offset, just wrap it up. 631 if (BaseAndOffset.first) 632 ConstantOffsetPtrs[&I] = BaseAndOffset; 633 634 // Also look for SROA candidates here. 635 Value *SROAArg; 636 DenseMap<Value *, int>::iterator CostIt; 637 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) 638 SROAArgValues[&I] = SROAArg; 639 640 // Bitcasts are always zero cost. 641 return true; 642 } 643 644 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { 645 // Propagate constants through ptrtoint. 646 if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { 647 return ConstantExpr::getPtrToInt(COps[0], I.getType()); 648 })) 649 return true; 650 651 // Track base/offset pairs when converted to a plain integer provided the 652 // integer is large enough to represent the pointer. 653 unsigned IntegerSize = I.getType()->getScalarSizeInBits(); 654 unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace(); 655 if (IntegerSize >= DL.getPointerSizeInBits(AS)) { 656 std::pair<Value *, APInt> BaseAndOffset = 657 ConstantOffsetPtrs.lookup(I.getOperand(0)); 658 if (BaseAndOffset.first) 659 ConstantOffsetPtrs[&I] = BaseAndOffset; 660 } 661 662 // This is really weird. Technically, ptrtoint will disable SROA. However, 663 // unless that ptrtoint is *used* somewhere in the live basic blocks after 664 // inlining, it will be nuked, and SROA should proceed. All of the uses which 665 // would block SROA would also block SROA if applied directly to a pointer, 666 // and so we can just add the integer in here. The only places where SROA is 667 // preserved either cannot fire on an integer, or won't in-and-of themselves 668 // disable SROA (ext) w/o some later use that we would see and disable. 669 Value *SROAArg; 670 DenseMap<Value *, int>::iterator CostIt; 671 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) 672 SROAArgValues[&I] = SROAArg; 673 674 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); 675 } 676 677 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { 678 // Propagate constants through ptrtoint. 679 if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { 680 return ConstantExpr::getIntToPtr(COps[0], I.getType()); 681 })) 682 return true; 683 684 // Track base/offset pairs when round-tripped through a pointer without 685 // modifications provided the integer is not too large. 686 Value *Op = I.getOperand(0); 687 unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); 688 if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) { 689 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op); 690 if (BaseAndOffset.first) 691 ConstantOffsetPtrs[&I] = BaseAndOffset; 692 } 693 694 // "Propagate" SROA here in the same manner as we do for ptrtoint above. 695 Value *SROAArg; 696 DenseMap<Value *, int>::iterator CostIt; 697 if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) 698 SROAArgValues[&I] = SROAArg; 699 700 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); 701 } 702 703 bool CallAnalyzer::visitCastInst(CastInst &I) { 704 // Propagate constants through ptrtoint. 705 if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { 706 return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType()); 707 })) 708 return true; 709 710 // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. 711 disableSROA(I.getOperand(0)); 712 713 // If this is a floating-point cast, and the target says this operation 714 // is expensive, this may eventually become a library call. Treat the cost 715 // as such. 716 switch (I.getOpcode()) { 717 case Instruction::FPTrunc: 718 case Instruction::FPExt: 719 case Instruction::UIToFP: 720 case Instruction::SIToFP: 721 case Instruction::FPToUI: 722 case Instruction::FPToSI: 723 if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) 724 Cost += InlineConstants::CallPenalty; 725 break; 726 default: 727 break; 728 } 729 730 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); 731 } 732 733 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { 734 Value *Operand = I.getOperand(0); 735 if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { 736 return ConstantFoldInstOperands(&I, COps[0], DL); 737 })) 738 return true; 739 740 // Disable any SROA on the argument to arbitrary unary operators. 741 disableSROA(Operand); 742 743 return false; 744 } 745 746 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) { 747 return CandidateCS.paramHasAttr(A->getArgNo(), Attr); 748 } 749 750 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) { 751 // Does the *call site* have the NonNull attribute set on an argument? We 752 // use the attribute on the call site to memoize any analysis done in the 753 // caller. This will also trip if the callee function has a non-null 754 // parameter attribute, but that's a less interesting case because hopefully 755 // the callee would already have been simplified based on that. 756 if (Argument *A = dyn_cast<Argument>(V)) 757 if (paramHasAttr(A, Attribute::NonNull)) 758 return true; 759 760 // Is this an alloca in the caller? This is distinct from the attribute case 761 // above because attributes aren't updated within the inliner itself and we 762 // always want to catch the alloca derived case. 763 if (isAllocaDerivedArg(V)) 764 // We can actually predict the result of comparisons between an 765 // alloca-derived value and null. Note that this fires regardless of 766 // SROA firing. 767 return true; 768 769 return false; 770 } 771 772 bool CallAnalyzer::allowSizeGrowth(CallSite CS) { 773 // If the normal destination of the invoke or the parent block of the call 774 // site is unreachable-terminated, there is little point in inlining this 775 // unless there is literally zero cost. 776 // FIXME: Note that it is possible that an unreachable-terminated block has a 777 // hot entry. For example, in below scenario inlining hot_call_X() may be 778 // beneficial : 779 // main() { 780 // hot_call_1(); 781 // ... 782 // hot_call_N() 783 // exit(0); 784 // } 785 // For now, we are not handling this corner case here as it is rare in real 786 // code. In future, we should elaborate this based on BPI and BFI in more 787 // general threshold adjusting heuristics in updateThreshold(). 788 Instruction *Instr = CS.getInstruction(); 789 if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) { 790 if (isa<UnreachableInst>(II->getNormalDest()->getTerminator())) 791 return false; 792 } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator())) 793 return false; 794 795 return true; 796 } 797 798 bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) { 799 // If global profile summary is available, then callsite's coldness is 800 // determined based on that. 801 if (PSI && PSI->hasProfileSummary()) 802 return PSI->isColdCallSite(CS, CallerBFI); 803 804 // Otherwise we need BFI to be available. 805 if (!CallerBFI) 806 return false; 807 808 // Determine if the callsite is cold relative to caller's entry. We could 809 // potentially cache the computation of scaled entry frequency, but the added 810 // complexity is not worth it unless this scaling shows up high in the 811 // profiles. 812 const BranchProbability ColdProb(ColdCallSiteRelFreq, 100); 813 auto CallSiteBB = CS.getInstruction()->getParent(); 814 auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB); 815 auto CallerEntryFreq = 816 CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock())); 817 return CallSiteFreq < CallerEntryFreq * ColdProb; 818 } 819 820 Optional<int> 821 CallAnalyzer::getHotCallSiteThreshold(CallSite CS, 822 BlockFrequencyInfo *CallerBFI) { 823 824 // If global profile summary is available, then callsite's hotness is 825 // determined based on that. 826 if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(CS, CallerBFI)) 827 return Params.HotCallSiteThreshold; 828 829 // Otherwise we need BFI to be available and to have a locally hot callsite 830 // threshold. 831 if (!CallerBFI || !Params.LocallyHotCallSiteThreshold) 832 return None; 833 834 // Determine if the callsite is hot relative to caller's entry. We could 835 // potentially cache the computation of scaled entry frequency, but the added 836 // complexity is not worth it unless this scaling shows up high in the 837 // profiles. 838 auto CallSiteBB = CS.getInstruction()->getParent(); 839 auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency(); 840 auto CallerEntryFreq = CallerBFI->getEntryFreq(); 841 if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq) 842 return Params.LocallyHotCallSiteThreshold; 843 844 // Otherwise treat it normally. 845 return None; 846 } 847 848 void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { 849 // If no size growth is allowed for this inlining, set Threshold to 0. 850 if (!allowSizeGrowth(CS)) { 851 Threshold = 0; 852 return; 853 } 854 855 Function *Caller = CS.getCaller(); 856 857 // return min(A, B) if B is valid. 858 auto MinIfValid = [](int A, Optional<int> B) { 859 return B ? std::min(A, B.getValue()) : A; 860 }; 861 862 // return max(A, B) if B is valid. 863 auto MaxIfValid = [](int A, Optional<int> B) { 864 return B ? std::max(A, B.getValue()) : A; 865 }; 866 867 // Various bonus percentages. These are multiplied by Threshold to get the 868 // bonus values. 869 // SingleBBBonus: This bonus is applied if the callee has a single reachable 870 // basic block at the given callsite context. This is speculatively applied 871 // and withdrawn if more than one basic block is seen. 872 // 873 // Vector bonuses: We want to more aggressively inline vector-dense kernels 874 // and apply this bonus based on the percentage of vector instructions. A 875 // bonus is applied if the vector instructions exceed 50% and half that amount 876 // is applied if it exceeds 10%. Note that these bonuses are some what 877 // arbitrary and evolved over time by accident as much as because they are 878 // principled bonuses. 879 // FIXME: It would be nice to base the bonus values on something more 880 // scientific. 881 // 882 // LstCallToStaticBonus: This large bonus is applied to ensure the inlining 883 // of the last call to a static function as inlining such functions is 884 // guaranteed to reduce code size. 885 // 886 // These bonus percentages may be set to 0 based on properties of the caller 887 // and the callsite. 888 int SingleBBBonusPercent = 50; 889 int VectorBonusPercent = 150; 890 int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus; 891 892 // Lambda to set all the above bonus and bonus percentages to 0. 893 auto DisallowAllBonuses = [&]() { 894 SingleBBBonusPercent = 0; 895 VectorBonusPercent = 0; 896 LastCallToStaticBonus = 0; 897 }; 898 899 // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available 900 // and reduce the threshold if the caller has the necessary attribute. 901 if (Caller->optForMinSize()) { 902 Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold); 903 // For minsize, we want to disable the single BB bonus and the vector 904 // bonuses, but not the last-call-to-static bonus. Inlining the last call to 905 // a static function will, at the minimum, eliminate the parameter setup and 906 // call/return instructions. 907 SingleBBBonusPercent = 0; 908 VectorBonusPercent = 0; 909 } else if (Caller->optForSize()) 910 Threshold = MinIfValid(Threshold, Params.OptSizeThreshold); 911 912 // Adjust the threshold based on inlinehint attribute and profile based 913 // hotness information if the caller does not have MinSize attribute. 914 if (!Caller->optForMinSize()) { 915 if (Callee.hasFnAttribute(Attribute::InlineHint)) 916 Threshold = MaxIfValid(Threshold, Params.HintThreshold); 917 918 // FIXME: After switching to the new passmanager, simplify the logic below 919 // by checking only the callsite hotness/coldness as we will reliably 920 // have local profile information. 921 // 922 // Callsite hotness and coldness can be determined if sample profile is 923 // used (which adds hotness metadata to calls) or if caller's 924 // BlockFrequencyInfo is available. 925 BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr; 926 auto HotCallSiteThreshold = getHotCallSiteThreshold(CS, CallerBFI); 927 if (!Caller->optForSize() && HotCallSiteThreshold) { 928 LLVM_DEBUG(dbgs() << "Hot callsite.\n"); 929 // FIXME: This should update the threshold only if it exceeds the 930 // current threshold, but AutoFDO + ThinLTO currently relies on this 931 // behavior to prevent inlining of hot callsites during ThinLTO 932 // compile phase. 933 Threshold = HotCallSiteThreshold.getValue(); 934 } else if (isColdCallSite(CS, CallerBFI)) { 935 LLVM_DEBUG(dbgs() << "Cold callsite.\n"); 936 // Do not apply bonuses for a cold callsite including the 937 // LastCallToStatic bonus. While this bonus might result in code size 938 // reduction, it can cause the size of a non-cold caller to increase 939 // preventing it from being inlined. 940 DisallowAllBonuses(); 941 Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold); 942 } else if (PSI) { 943 // Use callee's global profile information only if we have no way of 944 // determining this via callsite information. 945 if (PSI->isFunctionEntryHot(&Callee)) { 946 LLVM_DEBUG(dbgs() << "Hot callee.\n"); 947 // If callsite hotness can not be determined, we may still know 948 // that the callee is hot and treat it as a weaker hint for threshold 949 // increase. 950 Threshold = MaxIfValid(Threshold, Params.HintThreshold); 951 } else if (PSI->isFunctionEntryCold(&Callee)) { 952 LLVM_DEBUG(dbgs() << "Cold callee.\n"); 953 // Do not apply bonuses for a cold callee including the 954 // LastCallToStatic bonus. While this bonus might result in code size 955 // reduction, it can cause the size of a non-cold caller to increase 956 // preventing it from being inlined. 957 DisallowAllBonuses(); 958 Threshold = MinIfValid(Threshold, Params.ColdThreshold); 959 } 960 } 961 } 962 963 // Finally, take the target-specific inlining threshold multiplier into 964 // account. 965 Threshold *= TTI.getInliningThresholdMultiplier(); 966 967 SingleBBBonus = Threshold * SingleBBBonusPercent / 100; 968 VectorBonus = Threshold * VectorBonusPercent / 100; 969 970 bool OnlyOneCallAndLocalLinkage = 971 F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction(); 972 // If there is only one call of the function, and it has internal linkage, 973 // the cost of inlining it drops dramatically. It may seem odd to update 974 // Cost in updateThreshold, but the bonus depends on the logic in this method. 975 if (OnlyOneCallAndLocalLinkage) 976 Cost -= LastCallToStaticBonus; 977 } 978 979 bool CallAnalyzer::visitCmpInst(CmpInst &I) { 980 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 981 // First try to handle simplified comparisons. 982 if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { 983 return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]); 984 })) 985 return true; 986 987 if (I.getOpcode() == Instruction::FCmp) 988 return false; 989 990 // Otherwise look for a comparison between constant offset pointers with 991 // a common base. 992 Value *LHSBase, *RHSBase; 993 APInt LHSOffset, RHSOffset; 994 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); 995 if (LHSBase) { 996 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); 997 if (RHSBase && LHSBase == RHSBase) { 998 // We have common bases, fold the icmp to a constant based on the 999 // offsets. 1000 Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); 1001 Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); 1002 if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { 1003 SimplifiedValues[&I] = C; 1004 ++NumConstantPtrCmps; 1005 return true; 1006 } 1007 } 1008 } 1009 1010 // If the comparison is an equality comparison with null, we can simplify it 1011 // if we know the value (argument) can't be null 1012 if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) && 1013 isKnownNonNullInCallee(I.getOperand(0))) { 1014 bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; 1015 SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) 1016 : ConstantInt::getFalse(I.getType()); 1017 return true; 1018 } 1019 // Finally check for SROA candidates in comparisons. 1020 Value *SROAArg; 1021 DenseMap<Value *, int>::iterator CostIt; 1022 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { 1023 if (isa<ConstantPointerNull>(I.getOperand(1))) { 1024 accumulateSROACost(CostIt, InlineConstants::InstrCost); 1025 return true; 1026 } 1027 1028 disableSROA(CostIt); 1029 } 1030 1031 return false; 1032 } 1033 1034 bool CallAnalyzer::visitSub(BinaryOperator &I) { 1035 // Try to handle a special case: we can fold computing the difference of two 1036 // constant-related pointers. 1037 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 1038 Value *LHSBase, *RHSBase; 1039 APInt LHSOffset, RHSOffset; 1040 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); 1041 if (LHSBase) { 1042 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); 1043 if (RHSBase && LHSBase == RHSBase) { 1044 // We have common bases, fold the subtract to a constant based on the 1045 // offsets. 1046 Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); 1047 Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); 1048 if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { 1049 SimplifiedValues[&I] = C; 1050 ++NumConstantPtrDiffs; 1051 return true; 1052 } 1053 } 1054 } 1055 1056 // Otherwise, fall back to the generic logic for simplifying and handling 1057 // instructions. 1058 return Base::visitSub(I); 1059 } 1060 1061 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { 1062 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 1063 Constant *CLHS = dyn_cast<Constant>(LHS); 1064 if (!CLHS) 1065 CLHS = SimplifiedValues.lookup(LHS); 1066 Constant *CRHS = dyn_cast<Constant>(RHS); 1067 if (!CRHS) 1068 CRHS = SimplifiedValues.lookup(RHS); 1069 1070 Value *SimpleV = nullptr; 1071 if (auto FI = dyn_cast<FPMathOperator>(&I)) 1072 SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS, 1073 CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL); 1074 else 1075 SimpleV = 1076 SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL); 1077 1078 if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) 1079 SimplifiedValues[&I] = C; 1080 1081 if (SimpleV) 1082 return true; 1083 1084 // Disable any SROA on arguments to arbitrary, unsimplified binary operators. 1085 disableSROA(LHS); 1086 disableSROA(RHS); 1087 1088 // If the instruction is floating point, and the target says this operation 1089 // is expensive, this may eventually become a library call. Treat the cost 1090 // as such. 1091 if (I.getType()->isFloatingPointTy() && 1092 TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) 1093 Cost += InlineConstants::CallPenalty; 1094 1095 return false; 1096 } 1097 1098 bool CallAnalyzer::visitLoad(LoadInst &I) { 1099 Value *SROAArg; 1100 DenseMap<Value *, int>::iterator CostIt; 1101 if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) { 1102 if (I.isSimple()) { 1103 accumulateSROACost(CostIt, InlineConstants::InstrCost); 1104 return true; 1105 } 1106 1107 disableSROA(CostIt); 1108 } 1109 1110 // If the data is already loaded from this address and hasn't been clobbered 1111 // by any stores or calls, this load is likely to be redundant and can be 1112 // eliminated. 1113 if (EnableLoadElimination && 1114 !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) { 1115 LoadEliminationCost += InlineConstants::InstrCost; 1116 return true; 1117 } 1118 1119 return false; 1120 } 1121 1122 bool CallAnalyzer::visitStore(StoreInst &I) { 1123 Value *SROAArg; 1124 DenseMap<Value *, int>::iterator CostIt; 1125 if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) { 1126 if (I.isSimple()) { 1127 accumulateSROACost(CostIt, InlineConstants::InstrCost); 1128 return true; 1129 } 1130 1131 disableSROA(CostIt); 1132 } 1133 1134 // The store can potentially clobber loads and prevent repeated loads from 1135 // being eliminated. 1136 // FIXME: 1137 // 1. We can probably keep an initial set of eliminatable loads substracted 1138 // from the cost even when we finally see a store. We just need to disable 1139 // *further* accumulation of elimination savings. 1140 // 2. We should probably at some point thread MemorySSA for the callee into 1141 // this and then use that to actually compute *really* precise savings. 1142 disableLoadElimination(); 1143 return false; 1144 } 1145 1146 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { 1147 // Constant folding for extract value is trivial. 1148 if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { 1149 return ConstantExpr::getExtractValue(COps[0], I.getIndices()); 1150 })) 1151 return true; 1152 1153 // SROA can look through these but give them a cost. 1154 return false; 1155 } 1156 1157 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { 1158 // Constant folding for insert value is trivial. 1159 if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { 1160 return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0], 1161 /*InsertedValueOperand*/ COps[1], 1162 I.getIndices()); 1163 })) 1164 return true; 1165 1166 // SROA can look through these but give them a cost. 1167 return false; 1168 } 1169 1170 /// Try to simplify a call site. 1171 /// 1172 /// Takes a concrete function and callsite and tries to actually simplify it by 1173 /// analyzing the arguments and call itself with instsimplify. Returns true if 1174 /// it has simplified the callsite to some other entity (a constant), making it 1175 /// free. 1176 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { 1177 // FIXME: Using the instsimplify logic directly for this is inefficient 1178 // because we have to continually rebuild the argument list even when no 1179 // simplifications can be performed. Until that is fixed with remapping 1180 // inside of instsimplify, directly constant fold calls here. 1181 if (!canConstantFoldCallTo(CS, F)) 1182 return false; 1183 1184 // Try to re-map the arguments to constants. 1185 SmallVector<Constant *, 4> ConstantArgs; 1186 ConstantArgs.reserve(CS.arg_size()); 1187 for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; 1188 ++I) { 1189 Constant *C = dyn_cast<Constant>(*I); 1190 if (!C) 1191 C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I)); 1192 if (!C) 1193 return false; // This argument doesn't map to a constant. 1194 1195 ConstantArgs.push_back(C); 1196 } 1197 if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) { 1198 SimplifiedValues[CS.getInstruction()] = C; 1199 return true; 1200 } 1201 1202 return false; 1203 } 1204 1205 bool CallAnalyzer::visitCallSite(CallSite CS) { 1206 if (CS.hasFnAttr(Attribute::ReturnsTwice) && 1207 !F.hasFnAttribute(Attribute::ReturnsTwice)) { 1208 // This aborts the entire analysis. 1209 ExposesReturnsTwice = true; 1210 return false; 1211 } 1212 if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate()) 1213 ContainsNoDuplicateCall = true; 1214 1215 if (Function *F = CS.getCalledFunction()) { 1216 // When we have a concrete function, first try to simplify it directly. 1217 if (simplifyCallSite(F, CS)) 1218 return true; 1219 1220 // Next check if it is an intrinsic we know about. 1221 // FIXME: Lift this into part of the InstVisitor. 1222 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { 1223 switch (II->getIntrinsicID()) { 1224 default: 1225 if (!CS.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) 1226 disableLoadElimination(); 1227 return Base::visitCallSite(CS); 1228 1229 case Intrinsic::load_relative: 1230 // This is normally lowered to 4 LLVM instructions. 1231 Cost += 3 * InlineConstants::InstrCost; 1232 return false; 1233 1234 case Intrinsic::memset: 1235 case Intrinsic::memcpy: 1236 case Intrinsic::memmove: 1237 disableLoadElimination(); 1238 // SROA can usually chew through these intrinsics, but they aren't free. 1239 return false; 1240 case Intrinsic::icall_branch_funnel: 1241 case Intrinsic::localescape: 1242 HasUninlineableIntrinsic = true; 1243 return false; 1244 case Intrinsic::vastart: 1245 InitsVargArgs = true; 1246 return false; 1247 } 1248 } 1249 1250 if (F == CS.getInstruction()->getFunction()) { 1251 // This flag will fully abort the analysis, so don't bother with anything 1252 // else. 1253 IsRecursiveCall = true; 1254 return false; 1255 } 1256 1257 if (TTI.isLoweredToCall(F)) { 1258 // We account for the average 1 instruction per call argument setup 1259 // here. 1260 Cost += CS.arg_size() * InlineConstants::InstrCost; 1261 1262 // Everything other than inline ASM will also have a significant cost 1263 // merely from making the call. 1264 if (!isa<InlineAsm>(CS.getCalledValue())) 1265 Cost += InlineConstants::CallPenalty; 1266 } 1267 1268 if (!CS.onlyReadsMemory()) 1269 disableLoadElimination(); 1270 return Base::visitCallSite(CS); 1271 } 1272 1273 // Otherwise we're in a very special case -- an indirect function call. See 1274 // if we can be particularly clever about this. 1275 Value *Callee = CS.getCalledValue(); 1276 1277 // First, pay the price of the argument setup. We account for the average 1278 // 1 instruction per call argument setup here. 1279 Cost += CS.arg_size() * InlineConstants::InstrCost; 1280 1281 // Next, check if this happens to be an indirect function call to a known 1282 // function in this inline context. If not, we've done all we can. 1283 Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee)); 1284 if (!F) { 1285 if (!CS.onlyReadsMemory()) 1286 disableLoadElimination(); 1287 return Base::visitCallSite(CS); 1288 } 1289 1290 // If we have a constant that we are calling as a function, we can peer 1291 // through it and see the function target. This happens not infrequently 1292 // during devirtualization and so we want to give it a hefty bonus for 1293 // inlining, but cap that bonus in the event that inlining wouldn't pan 1294 // out. Pretend to inline the function, with a custom threshold. 1295 auto IndirectCallParams = Params; 1296 IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; 1297 CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS, 1298 IndirectCallParams); 1299 if (CA.analyzeCall(CS)) { 1300 // We were able to inline the indirect call! Subtract the cost from the 1301 // threshold to get the bonus we want to apply, but don't go below zero. 1302 Cost -= std::max(0, CA.getThreshold() - CA.getCost()); 1303 } 1304 1305 if (!F->onlyReadsMemory()) 1306 disableLoadElimination(); 1307 return Base::visitCallSite(CS); 1308 } 1309 1310 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) { 1311 // At least one return instruction will be free after inlining. 1312 bool Free = !HasReturn; 1313 HasReturn = true; 1314 return Free; 1315 } 1316 1317 bool CallAnalyzer::visitBranchInst(BranchInst &BI) { 1318 // We model unconditional branches as essentially free -- they really 1319 // shouldn't exist at all, but handling them makes the behavior of the 1320 // inliner more regular and predictable. Interestingly, conditional branches 1321 // which will fold away are also free. 1322 return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) || 1323 dyn_cast_or_null<ConstantInt>( 1324 SimplifiedValues.lookup(BI.getCondition())); 1325 } 1326 1327 bool CallAnalyzer::visitSelectInst(SelectInst &SI) { 1328 bool CheckSROA = SI.getType()->isPointerTy(); 1329 Value *TrueVal = SI.getTrueValue(); 1330 Value *FalseVal = SI.getFalseValue(); 1331 1332 Constant *TrueC = dyn_cast<Constant>(TrueVal); 1333 if (!TrueC) 1334 TrueC = SimplifiedValues.lookup(TrueVal); 1335 Constant *FalseC = dyn_cast<Constant>(FalseVal); 1336 if (!FalseC) 1337 FalseC = SimplifiedValues.lookup(FalseVal); 1338 Constant *CondC = 1339 dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition())); 1340 1341 if (!CondC) { 1342 // Select C, X, X => X 1343 if (TrueC == FalseC && TrueC) { 1344 SimplifiedValues[&SI] = TrueC; 1345 return true; 1346 } 1347 1348 if (!CheckSROA) 1349 return Base::visitSelectInst(SI); 1350 1351 std::pair<Value *, APInt> TrueBaseAndOffset = 1352 ConstantOffsetPtrs.lookup(TrueVal); 1353 std::pair<Value *, APInt> FalseBaseAndOffset = 1354 ConstantOffsetPtrs.lookup(FalseVal); 1355 if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) { 1356 ConstantOffsetPtrs[&SI] = TrueBaseAndOffset; 1357 1358 Value *SROAArg; 1359 DenseMap<Value *, int>::iterator CostIt; 1360 if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt)) 1361 SROAArgValues[&SI] = SROAArg; 1362 return true; 1363 } 1364 1365 return Base::visitSelectInst(SI); 1366 } 1367 1368 // Select condition is a constant. 1369 Value *SelectedV = CondC->isAllOnesValue() 1370 ? TrueVal 1371 : (CondC->isNullValue()) ? FalseVal : nullptr; 1372 if (!SelectedV) { 1373 // Condition is a vector constant that is not all 1s or all 0s. If all 1374 // operands are constants, ConstantExpr::getSelect() can handle the cases 1375 // such as select vectors. 1376 if (TrueC && FalseC) { 1377 if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) { 1378 SimplifiedValues[&SI] = C; 1379 return true; 1380 } 1381 } 1382 return Base::visitSelectInst(SI); 1383 } 1384 1385 // Condition is either all 1s or all 0s. SI can be simplified. 1386 if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) { 1387 SimplifiedValues[&SI] = SelectedC; 1388 return true; 1389 } 1390 1391 if (!CheckSROA) 1392 return true; 1393 1394 std::pair<Value *, APInt> BaseAndOffset = 1395 ConstantOffsetPtrs.lookup(SelectedV); 1396 if (BaseAndOffset.first) { 1397 ConstantOffsetPtrs[&SI] = BaseAndOffset; 1398 1399 Value *SROAArg; 1400 DenseMap<Value *, int>::iterator CostIt; 1401 if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt)) 1402 SROAArgValues[&SI] = SROAArg; 1403 } 1404 1405 return true; 1406 } 1407 1408 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { 1409 // We model unconditional switches as free, see the comments on handling 1410 // branches. 1411 if (isa<ConstantInt>(SI.getCondition())) 1412 return true; 1413 if (Value *V = SimplifiedValues.lookup(SI.getCondition())) 1414 if (isa<ConstantInt>(V)) 1415 return true; 1416 1417 // Assume the most general case where the switch is lowered into 1418 // either a jump table, bit test, or a balanced binary tree consisting of 1419 // case clusters without merging adjacent clusters with the same 1420 // destination. We do not consider the switches that are lowered with a mix 1421 // of jump table/bit test/binary search tree. The cost of the switch is 1422 // proportional to the size of the tree or the size of jump table range. 1423 // 1424 // NB: We convert large switches which are just used to initialize large phi 1425 // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent 1426 // inlining those. It will prevent inlining in cases where the optimization 1427 // does not (yet) fire. 1428 1429 // Maximum valid cost increased in this function. 1430 int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1; 1431 1432 // Exit early for a large switch, assuming one case needs at least one 1433 // instruction. 1434 // FIXME: This is not true for a bit test, but ignore such case for now to 1435 // save compile-time. 1436 int64_t CostLowerBound = 1437 std::min((int64_t)CostUpperBound, 1438 (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); 1439 1440 if (CostLowerBound > Threshold && !ComputeFullInlineCost) { 1441 Cost = CostLowerBound; 1442 return false; 1443 } 1444 1445 unsigned JumpTableSize = 0; 1446 unsigned NumCaseCluster = 1447 TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize); 1448 1449 // If suitable for a jump table, consider the cost for the table size and 1450 // branch to destination. 1451 if (JumpTableSize) { 1452 int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost + 1453 4 * InlineConstants::InstrCost; 1454 1455 Cost = std::min((int64_t)CostUpperBound, JTCost + Cost); 1456 return false; 1457 } 1458 1459 // Considering forming a binary search, we should find the number of nodes 1460 // which is same as the number of comparisons when lowered. For a given 1461 // number of clusters, n, we can define a recursive function, f(n), to find 1462 // the number of nodes in the tree. The recursion is : 1463 // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3, 1464 // and f(n) = n, when n <= 3. 1465 // This will lead a binary tree where the leaf should be either f(2) or f(3) 1466 // when n > 3. So, the number of comparisons from leaves should be n, while 1467 // the number of non-leaf should be : 1468 // 2^(log2(n) - 1) - 1 1469 // = 2^log2(n) * 2^-1 - 1 1470 // = n / 2 - 1. 1471 // Considering comparisons from leaf and non-leaf nodes, we can estimate the 1472 // number of comparisons in a simple closed form : 1473 // n + n / 2 - 1 = n * 3 / 2 - 1 1474 if (NumCaseCluster <= 3) { 1475 // Suppose a comparison includes one compare and one conditional branch. 1476 Cost += NumCaseCluster * 2 * InlineConstants::InstrCost; 1477 return false; 1478 } 1479 1480 int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1; 1481 int64_t SwitchCost = 1482 ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; 1483 1484 Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost); 1485 return false; 1486 } 1487 1488 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) { 1489 // We never want to inline functions that contain an indirectbr. This is 1490 // incorrect because all the blockaddress's (in static global initializers 1491 // for example) would be referring to the original function, and this 1492 // indirect jump would jump from the inlined copy of the function into the 1493 // original function which is extremely undefined behavior. 1494 // FIXME: This logic isn't really right; we can safely inline functions with 1495 // indirectbr's as long as no other function or global references the 1496 // blockaddress of a block within the current function. 1497 HasIndirectBr = true; 1498 return false; 1499 } 1500 1501 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) { 1502 // FIXME: It's not clear that a single instruction is an accurate model for 1503 // the inline cost of a resume instruction. 1504 return false; 1505 } 1506 1507 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) { 1508 // FIXME: It's not clear that a single instruction is an accurate model for 1509 // the inline cost of a cleanupret instruction. 1510 return false; 1511 } 1512 1513 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) { 1514 // FIXME: It's not clear that a single instruction is an accurate model for 1515 // the inline cost of a catchret instruction. 1516 return false; 1517 } 1518 1519 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) { 1520 // FIXME: It might be reasonably to discount the cost of instructions leading 1521 // to unreachable as they have the lowest possible impact on both runtime and 1522 // code size. 1523 return true; // No actual code is needed for unreachable. 1524 } 1525 1526 bool CallAnalyzer::visitInstruction(Instruction &I) { 1527 // Some instructions are free. All of the free intrinsics can also be 1528 // handled by SROA, etc. 1529 if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I)) 1530 return true; 1531 1532 // We found something we don't understand or can't handle. Mark any SROA-able 1533 // values in the operand list as no longer viable. 1534 for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) 1535 disableSROA(*OI); 1536 1537 return false; 1538 } 1539 1540 /// Analyze a basic block for its contribution to the inline cost. 1541 /// 1542 /// This method walks the analyzer over every instruction in the given basic 1543 /// block and accounts for their cost during inlining at this callsite. It 1544 /// aborts early if the threshold has been exceeded or an impossible to inline 1545 /// construct has been detected. It returns false if inlining is no longer 1546 /// viable, and true if inlining remains viable. 1547 InlineResult 1548 CallAnalyzer::analyzeBlock(BasicBlock *BB, 1549 SmallPtrSetImpl<const Value *> &EphValues) { 1550 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { 1551 // FIXME: Currently, the number of instructions in a function regardless of 1552 // our ability to simplify them during inline to constants or dead code, 1553 // are actually used by the vector bonus heuristic. As long as that's true, 1554 // we have to special case debug intrinsics here to prevent differences in 1555 // inlining due to debug symbols. Eventually, the number of unsimplified 1556 // instructions shouldn't factor into the cost computation, but until then, 1557 // hack around it here. 1558 if (isa<DbgInfoIntrinsic>(I)) 1559 continue; 1560 1561 // Skip ephemeral values. 1562 if (EphValues.count(&*I)) 1563 continue; 1564 1565 ++NumInstructions; 1566 if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy()) 1567 ++NumVectorInstructions; 1568 1569 // If the instruction simplified to a constant, there is no cost to this 1570 // instruction. Visit the instructions using our InstVisitor to account for 1571 // all of the per-instruction logic. The visit tree returns true if we 1572 // consumed the instruction in any way, and false if the instruction's base 1573 // cost should count against inlining. 1574 if (Base::visit(&*I)) 1575 ++NumInstructionsSimplified; 1576 else 1577 Cost += InlineConstants::InstrCost; 1578 1579 using namespace ore; 1580 // If the visit this instruction detected an uninlinable pattern, abort. 1581 InlineResult IR; 1582 if (IsRecursiveCall) 1583 IR = "recursive"; 1584 else if (ExposesReturnsTwice) 1585 IR = "exposes returns twice"; 1586 else if (HasDynamicAlloca) 1587 IR = "dynamic alloca"; 1588 else if (HasIndirectBr) 1589 IR = "indirect branch"; 1590 else if (HasUninlineableIntrinsic) 1591 IR = "uninlinable intrinsic"; 1592 else if (InitsVargArgs) 1593 IR = "varargs"; 1594 if (!IR) { 1595 if (ORE) 1596 ORE->emit([&]() { 1597 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", 1598 CandidateCS.getInstruction()) 1599 << NV("Callee", &F) << " has uninlinable pattern (" 1600 << NV("InlineResult", IR.message) 1601 << ") and cost is not fully computed"; 1602 }); 1603 return IR; 1604 } 1605 1606 // If the caller is a recursive function then we don't want to inline 1607 // functions which allocate a lot of stack space because it would increase 1608 // the caller stack usage dramatically. 1609 if (IsCallerRecursive && 1610 AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) { 1611 InlineResult IR = "recursive and allocates too much stack space"; 1612 if (ORE) 1613 ORE->emit([&]() { 1614 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", 1615 CandidateCS.getInstruction()) 1616 << NV("Callee", &F) << " is " << NV("InlineResult", IR.message) 1617 << ". Cost is not fully computed"; 1618 }); 1619 return IR; 1620 } 1621 1622 // Check if we've past the maximum possible threshold so we don't spin in 1623 // huge basic blocks that will never inline. 1624 if (Cost >= Threshold && !ComputeFullInlineCost) 1625 return false; 1626 } 1627 1628 return true; 1629 } 1630 1631 /// Compute the base pointer and cumulative constant offsets for V. 1632 /// 1633 /// This strips all constant offsets off of V, leaving it the base pointer, and 1634 /// accumulates the total constant offset applied in the returned constant. It 1635 /// returns 0 if V is not a pointer, and returns the constant '0' if there are 1636 /// no constant offsets applied. 1637 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { 1638 if (!V->getType()->isPointerTy()) 1639 return nullptr; 1640 1641 unsigned AS = V->getType()->getPointerAddressSpace(); 1642 unsigned IntPtrWidth = DL.getIndexSizeInBits(AS); 1643 APInt Offset = APInt::getNullValue(IntPtrWidth); 1644 1645 // Even though we don't look through PHI nodes, we could be called on an 1646 // instruction in an unreachable block, which may be on a cycle. 1647 SmallPtrSet<Value *, 4> Visited; 1648 Visited.insert(V); 1649 do { 1650 if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { 1651 if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) 1652 return nullptr; 1653 V = GEP->getPointerOperand(); 1654 } else if (Operator::getOpcode(V) == Instruction::BitCast) { 1655 V = cast<Operator>(V)->getOperand(0); 1656 } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { 1657 if (GA->isInterposable()) 1658 break; 1659 V = GA->getAliasee(); 1660 } else { 1661 break; 1662 } 1663 assert(V->getType()->isPointerTy() && "Unexpected operand type!"); 1664 } while (Visited.insert(V).second); 1665 1666 Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS); 1667 return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset)); 1668 } 1669 1670 /// Find dead blocks due to deleted CFG edges during inlining. 1671 /// 1672 /// If we know the successor of the current block, \p CurrBB, has to be \p 1673 /// NextBB, the other successors of \p CurrBB are dead if these successors have 1674 /// no live incoming CFG edges. If one block is found to be dead, we can 1675 /// continue growing the dead block list by checking the successors of the dead 1676 /// blocks to see if all their incoming edges are dead or not. 1677 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) { 1678 auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) { 1679 // A CFG edge is dead if the predecessor is dead or the predessor has a 1680 // known successor which is not the one under exam. 1681 return (DeadBlocks.count(Pred) || 1682 (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ)); 1683 }; 1684 1685 auto IsNewlyDead = [&](BasicBlock *BB) { 1686 // If all the edges to a block are dead, the block is also dead. 1687 return (!DeadBlocks.count(BB) && 1688 llvm::all_of(predecessors(BB), 1689 [&](BasicBlock *P) { return IsEdgeDead(P, BB); })); 1690 }; 1691 1692 for (BasicBlock *Succ : successors(CurrBB)) { 1693 if (Succ == NextBB || !IsNewlyDead(Succ)) 1694 continue; 1695 SmallVector<BasicBlock *, 4> NewDead; 1696 NewDead.push_back(Succ); 1697 while (!NewDead.empty()) { 1698 BasicBlock *Dead = NewDead.pop_back_val(); 1699 if (DeadBlocks.insert(Dead)) 1700 // Continue growing the dead block lists. 1701 for (BasicBlock *S : successors(Dead)) 1702 if (IsNewlyDead(S)) 1703 NewDead.push_back(S); 1704 } 1705 } 1706 } 1707 1708 /// Analyze a call site for potential inlining. 1709 /// 1710 /// Returns true if inlining this call is viable, and false if it is not 1711 /// viable. It computes the cost and adjusts the threshold based on numerous 1712 /// factors and heuristics. If this method returns false but the computed cost 1713 /// is below the computed threshold, then inlining was forcibly disabled by 1714 /// some artifact of the routine. 1715 InlineResult CallAnalyzer::analyzeCall(CallSite CS) { 1716 ++NumCallsAnalyzed; 1717 1718 // Perform some tweaks to the cost and threshold based on the direct 1719 // callsite information. 1720 1721 // We want to more aggressively inline vector-dense kernels, so up the 1722 // threshold, and we'll lower it if the % of vector instructions gets too 1723 // low. Note that these bonuses are some what arbitrary and evolved over time 1724 // by accident as much as because they are principled bonuses. 1725 // 1726 // FIXME: It would be nice to remove all such bonuses. At least it would be 1727 // nice to base the bonus values on something more scientific. 1728 assert(NumInstructions == 0); 1729 assert(NumVectorInstructions == 0); 1730 1731 // Update the threshold based on callsite properties 1732 updateThreshold(CS, F); 1733 1734 // While Threshold depends on commandline options that can take negative 1735 // values, we want to enforce the invariant that the computed threshold and 1736 // bonuses are non-negative. 1737 assert(Threshold >= 0); 1738 assert(SingleBBBonus >= 0); 1739 assert(VectorBonus >= 0); 1740 1741 // Speculatively apply all possible bonuses to Threshold. If cost exceeds 1742 // this Threshold any time, and cost cannot decrease, we can stop processing 1743 // the rest of the function body. 1744 Threshold += (SingleBBBonus + VectorBonus); 1745 1746 // Give out bonuses for the callsite, as the instructions setting them up 1747 // will be gone after inlining. 1748 Cost -= getCallsiteCost(CS, DL); 1749 1750 // If this function uses the coldcc calling convention, prefer not to inline 1751 // it. 1752 if (F.getCallingConv() == CallingConv::Cold) 1753 Cost += InlineConstants::ColdccPenalty; 1754 1755 // Check if we're done. This can happen due to bonuses and penalties. 1756 if (Cost >= Threshold && !ComputeFullInlineCost) 1757 return "high cost"; 1758 1759 if (F.empty()) 1760 return true; 1761 1762 Function *Caller = CS.getInstruction()->getFunction(); 1763 // Check if the caller function is recursive itself. 1764 for (User *U : Caller->users()) { 1765 CallSite Site(U); 1766 if (!Site) 1767 continue; 1768 Instruction *I = Site.getInstruction(); 1769 if (I->getFunction() == Caller) { 1770 IsCallerRecursive = true; 1771 break; 1772 } 1773 } 1774 1775 // Populate our simplified values by mapping from function arguments to call 1776 // arguments with known important simplifications. 1777 CallSite::arg_iterator CAI = CS.arg_begin(); 1778 for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); 1779 FAI != FAE; ++FAI, ++CAI) { 1780 assert(CAI != CS.arg_end()); 1781 if (Constant *C = dyn_cast<Constant>(CAI)) 1782 SimplifiedValues[&*FAI] = C; 1783 1784 Value *PtrArg = *CAI; 1785 if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { 1786 ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue()); 1787 1788 // We can SROA any pointer arguments derived from alloca instructions. 1789 if (isa<AllocaInst>(PtrArg)) { 1790 SROAArgValues[&*FAI] = PtrArg; 1791 SROAArgCosts[PtrArg] = 0; 1792 } 1793 } 1794 } 1795 NumConstantArgs = SimplifiedValues.size(); 1796 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); 1797 NumAllocaArgs = SROAArgValues.size(); 1798 1799 // FIXME: If a caller has multiple calls to a callee, we end up recomputing 1800 // the ephemeral values multiple times (and they're completely determined by 1801 // the callee, so this is purely duplicate work). 1802 SmallPtrSet<const Value *, 32> EphValues; 1803 CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues); 1804 1805 // The worklist of live basic blocks in the callee *after* inlining. We avoid 1806 // adding basic blocks of the callee which can be proven to be dead for this 1807 // particular call site in order to get more accurate cost estimates. This 1808 // requires a somewhat heavyweight iteration pattern: we need to walk the 1809 // basic blocks in a breadth-first order as we insert live successors. To 1810 // accomplish this, prioritizing for small iterations because we exit after 1811 // crossing our threshold, we use a small-size optimized SetVector. 1812 typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>, 1813 SmallPtrSet<BasicBlock *, 16>> 1814 BBSetVector; 1815 BBSetVector BBWorklist; 1816 BBWorklist.insert(&F.getEntryBlock()); 1817 bool SingleBB = true; 1818 // Note that we *must not* cache the size, this loop grows the worklist. 1819 for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { 1820 // Bail out the moment we cross the threshold. This means we'll under-count 1821 // the cost, but only when undercounting doesn't matter. 1822 if (Cost >= Threshold && !ComputeFullInlineCost) 1823 break; 1824 1825 BasicBlock *BB = BBWorklist[Idx]; 1826 if (BB->empty()) 1827 continue; 1828 1829 // Disallow inlining a blockaddress. A blockaddress only has defined 1830 // behavior for an indirect branch in the same function, and we do not 1831 // currently support inlining indirect branches. But, the inliner may not 1832 // see an indirect branch that ends up being dead code at a particular call 1833 // site. If the blockaddress escapes the function, e.g., via a global 1834 // variable, inlining may lead to an invalid cross-function reference. 1835 if (BB->hasAddressTaken()) 1836 return "blockaddress"; 1837 1838 // Analyze the cost of this block. If we blow through the threshold, this 1839 // returns false, and we can bail on out. 1840 InlineResult IR = analyzeBlock(BB, EphValues); 1841 if (!IR) 1842 return IR; 1843 1844 Instruction *TI = BB->getTerminator(); 1845 1846 // Add in the live successors by first checking whether we have terminator 1847 // that may be simplified based on the values simplified by this call. 1848 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 1849 if (BI->isConditional()) { 1850 Value *Cond = BI->getCondition(); 1851 if (ConstantInt *SimpleCond = 1852 dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { 1853 BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0); 1854 BBWorklist.insert(NextBB); 1855 KnownSuccessors[BB] = NextBB; 1856 findDeadBlocks(BB, NextBB); 1857 continue; 1858 } 1859 } 1860 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { 1861 Value *Cond = SI->getCondition(); 1862 if (ConstantInt *SimpleCond = 1863 dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { 1864 BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor(); 1865 BBWorklist.insert(NextBB); 1866 KnownSuccessors[BB] = NextBB; 1867 findDeadBlocks(BB, NextBB); 1868 continue; 1869 } 1870 } 1871 1872 // If we're unable to select a particular successor, just count all of 1873 // them. 1874 for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; 1875 ++TIdx) 1876 BBWorklist.insert(TI->getSuccessor(TIdx)); 1877 1878 // If we had any successors at this point, than post-inlining is likely to 1879 // have them as well. Note that we assume any basic blocks which existed 1880 // due to branches or switches which folded above will also fold after 1881 // inlining. 1882 if (SingleBB && TI->getNumSuccessors() > 1) { 1883 // Take off the bonus we applied to the threshold. 1884 Threshold -= SingleBBBonus; 1885 SingleBB = false; 1886 } 1887 } 1888 1889 bool OnlyOneCallAndLocalLinkage = 1890 F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction(); 1891 // If this is a noduplicate call, we can still inline as long as 1892 // inlining this would cause the removal of the caller (so the instruction 1893 // is not actually duplicated, just moved). 1894 if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) 1895 return "noduplicate"; 1896 1897 // Loops generally act a lot like calls in that they act like barriers to 1898 // movement, require a certain amount of setup, etc. So when optimising for 1899 // size, we penalise any call sites that perform loops. We do this after all 1900 // other costs here, so will likely only be dealing with relatively small 1901 // functions (and hence DT and LI will hopefully be cheap). 1902 if (Caller->optForMinSize()) { 1903 DominatorTree DT(F); 1904 LoopInfo LI(DT); 1905 int NumLoops = 0; 1906 for (Loop *L : LI) { 1907 // Ignore loops that will not be executed 1908 if (DeadBlocks.count(L->getHeader())) 1909 continue; 1910 NumLoops++; 1911 } 1912 Cost += NumLoops * InlineConstants::CallPenalty; 1913 } 1914 1915 // We applied the maximum possible vector bonus at the beginning. Now, 1916 // subtract the excess bonus, if any, from the Threshold before 1917 // comparing against Cost. 1918 if (NumVectorInstructions <= NumInstructions / 10) 1919 Threshold -= VectorBonus; 1920 else if (NumVectorInstructions <= NumInstructions / 2) 1921 Threshold -= VectorBonus/2; 1922 1923 return Cost < std::max(1, Threshold); 1924 } 1925 1926 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1927 /// Dump stats about this call's analysis. 1928 LLVM_DUMP_METHOD void CallAnalyzer::dump() { 1929 #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n" 1930 DEBUG_PRINT_STAT(NumConstantArgs); 1931 DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); 1932 DEBUG_PRINT_STAT(NumAllocaArgs); 1933 DEBUG_PRINT_STAT(NumConstantPtrCmps); 1934 DEBUG_PRINT_STAT(NumConstantPtrDiffs); 1935 DEBUG_PRINT_STAT(NumInstructionsSimplified); 1936 DEBUG_PRINT_STAT(NumInstructions); 1937 DEBUG_PRINT_STAT(SROACostSavings); 1938 DEBUG_PRINT_STAT(SROACostSavingsLost); 1939 DEBUG_PRINT_STAT(LoadEliminationCost); 1940 DEBUG_PRINT_STAT(ContainsNoDuplicateCall); 1941 DEBUG_PRINT_STAT(Cost); 1942 DEBUG_PRINT_STAT(Threshold); 1943 #undef DEBUG_PRINT_STAT 1944 } 1945 #endif 1946 1947 /// Test that there are no attribute conflicts between Caller and Callee 1948 /// that prevent inlining. 1949 static bool functionsHaveCompatibleAttributes(Function *Caller, 1950 Function *Callee, 1951 TargetTransformInfo &TTI) { 1952 return TTI.areInlineCompatible(Caller, Callee) && 1953 AttributeFuncs::areInlineCompatible(*Caller, *Callee); 1954 } 1955 1956 int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) { 1957 int Cost = 0; 1958 for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { 1959 if (CS.isByValArgument(I)) { 1960 // We approximate the number of loads and stores needed by dividing the 1961 // size of the byval type by the target's pointer size. 1962 PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); 1963 unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType()); 1964 unsigned AS = PTy->getAddressSpace(); 1965 unsigned PointerSize = DL.getPointerSizeInBits(AS); 1966 // Ceiling division. 1967 unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; 1968 1969 // If it generates more than 8 stores it is likely to be expanded as an 1970 // inline memcpy so we take that as an upper bound. Otherwise we assume 1971 // one load and one store per word copied. 1972 // FIXME: The maxStoresPerMemcpy setting from the target should be used 1973 // here instead of a magic number of 8, but it's not available via 1974 // DataLayout. 1975 NumStores = std::min(NumStores, 8U); 1976 1977 Cost += 2 * NumStores * InlineConstants::InstrCost; 1978 } else { 1979 // For non-byval arguments subtract off one instruction per call 1980 // argument. 1981 Cost += InlineConstants::InstrCost; 1982 } 1983 } 1984 // The call instruction also disappears after inlining. 1985 Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty; 1986 return Cost; 1987 } 1988 1989 InlineCost llvm::getInlineCost( 1990 CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, 1991 std::function<AssumptionCache &(Function &)> &GetAssumptionCache, 1992 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, 1993 ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { 1994 return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI, 1995 GetAssumptionCache, GetBFI, PSI, ORE); 1996 } 1997 1998 InlineCost llvm::getInlineCost( 1999 CallSite CS, Function *Callee, const InlineParams &Params, 2000 TargetTransformInfo &CalleeTTI, 2001 std::function<AssumptionCache &(Function &)> &GetAssumptionCache, 2002 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, 2003 ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { 2004 2005 // Cannot inline indirect calls. 2006 if (!Callee) 2007 return llvm::InlineCost::getNever("indirect call"); 2008 2009 // Never inline calls with byval arguments that does not have the alloca 2010 // address space. Since byval arguments can be replaced with a copy to an 2011 // alloca, the inlined code would need to be adjusted to handle that the 2012 // argument is in the alloca address space (so it is a little bit complicated 2013 // to solve). 2014 unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace(); 2015 for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) 2016 if (CS.isByValArgument(I)) { 2017 PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); 2018 if (PTy->getAddressSpace() != AllocaAS) 2019 return llvm::InlineCost::getNever("byval arguments without alloca" 2020 " address space"); 2021 } 2022 2023 // Calls to functions with always-inline attributes should be inlined 2024 // whenever possible. 2025 if (CS.hasFnAttr(Attribute::AlwaysInline)) { 2026 if (isInlineViable(*Callee)) 2027 return llvm::InlineCost::getAlways("always inline attribute"); 2028 return llvm::InlineCost::getNever("inapplicable always inline attribute"); 2029 } 2030 2031 // Never inline functions with conflicting attributes (unless callee has 2032 // always-inline attribute). 2033 Function *Caller = CS.getCaller(); 2034 if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI)) 2035 return llvm::InlineCost::getNever("conflicting attributes"); 2036 2037 // Don't inline this call if the caller has the optnone attribute. 2038 if (Caller->hasFnAttribute(Attribute::OptimizeNone)) 2039 return llvm::InlineCost::getNever("optnone attribute"); 2040 2041 // Don't inline a function that treats null pointer as valid into a caller 2042 // that does not have this attribute. 2043 if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined()) 2044 return llvm::InlineCost::getNever("nullptr definitions incompatible"); 2045 2046 // Don't inline functions which can be interposed at link-time. 2047 if (Callee->isInterposable()) 2048 return llvm::InlineCost::getNever("interposable"); 2049 2050 // Don't inline functions marked noinline. 2051 if (Callee->hasFnAttribute(Attribute::NoInline)) 2052 return llvm::InlineCost::getNever("noinline function attribute"); 2053 2054 // Don't inline call sites marked noinline. 2055 if (CS.isNoInline()) 2056 return llvm::InlineCost::getNever("noinline call site attribute"); 2057 2058 LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() 2059 << "... (caller:" << Caller->getName() << ")\n"); 2060 2061 CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS, 2062 Params); 2063 InlineResult ShouldInline = CA.analyzeCall(CS); 2064 2065 LLVM_DEBUG(CA.dump()); 2066 2067 // Check if there was a reason to force inlining or no inlining. 2068 if (!ShouldInline && CA.getCost() < CA.getThreshold()) 2069 return InlineCost::getNever(ShouldInline.message); 2070 if (ShouldInline && CA.getCost() >= CA.getThreshold()) 2071 return InlineCost::getAlways("empty function"); 2072 2073 return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); 2074 } 2075 2076 bool llvm::isInlineViable(Function &F) { 2077 bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice); 2078 for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { 2079 // Disallow inlining of functions which contain indirect branches or 2080 // blockaddresses. 2081 if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken()) 2082 return false; 2083 2084 for (auto &II : *BI) { 2085 CallSite CS(&II); 2086 if (!CS) 2087 continue; 2088 2089 // Disallow recursive calls. 2090 if (&F == CS.getCalledFunction()) 2091 return false; 2092 2093 // Disallow calls which expose returns-twice to a function not previously 2094 // attributed as such. 2095 if (!ReturnsTwice && CS.isCall() && 2096 cast<CallInst>(CS.getInstruction())->canReturnTwice()) 2097 return false; 2098 2099 if (CS.getCalledFunction()) 2100 switch (CS.getCalledFunction()->getIntrinsicID()) { 2101 default: 2102 break; 2103 // Disallow inlining of @llvm.icall.branch.funnel because current 2104 // backend can't separate call targets from call arguments. 2105 case llvm::Intrinsic::icall_branch_funnel: 2106 // Disallow inlining functions that call @llvm.localescape. Doing this 2107 // correctly would require major changes to the inliner. 2108 case llvm::Intrinsic::localescape: 2109 // Disallow inlining of functions that initialize VarArgs with va_start. 2110 case llvm::Intrinsic::vastart: 2111 return false; 2112 } 2113 } 2114 } 2115 2116 return true; 2117 } 2118 2119 // APIs to create InlineParams based on command line flags and/or other 2120 // parameters. 2121 2122 InlineParams llvm::getInlineParams(int Threshold) { 2123 InlineParams Params; 2124 2125 // This field is the threshold to use for a callee by default. This is 2126 // derived from one or more of: 2127 // * optimization or size-optimization levels, 2128 // * a value passed to createFunctionInliningPass function, or 2129 // * the -inline-threshold flag. 2130 // If the -inline-threshold flag is explicitly specified, that is used 2131 // irrespective of anything else. 2132 if (InlineThreshold.getNumOccurrences() > 0) 2133 Params.DefaultThreshold = InlineThreshold; 2134 else 2135 Params.DefaultThreshold = Threshold; 2136 2137 // Set the HintThreshold knob from the -inlinehint-threshold. 2138 Params.HintThreshold = HintThreshold; 2139 2140 // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold. 2141 Params.HotCallSiteThreshold = HotCallSiteThreshold; 2142 2143 // If the -locally-hot-callsite-threshold is explicitly specified, use it to 2144 // populate LocallyHotCallSiteThreshold. Later, we populate 2145 // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if 2146 // we know that optimization level is O3 (in the getInlineParams variant that 2147 // takes the opt and size levels). 2148 // FIXME: Remove this check (and make the assignment unconditional) after 2149 // addressing size regression issues at O2. 2150 if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0) 2151 Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold; 2152 2153 // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold. 2154 Params.ColdCallSiteThreshold = ColdCallSiteThreshold; 2155 2156 // Set the OptMinSizeThreshold and OptSizeThreshold params only if the 2157 // -inlinehint-threshold commandline option is not explicitly given. If that 2158 // option is present, then its value applies even for callees with size and 2159 // minsize attributes. 2160 // If the -inline-threshold is not specified, set the ColdThreshold from the 2161 // -inlinecold-threshold even if it is not explicitly passed. If 2162 // -inline-threshold is specified, then -inlinecold-threshold needs to be 2163 // explicitly specified to set the ColdThreshold knob 2164 if (InlineThreshold.getNumOccurrences() == 0) { 2165 Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold; 2166 Params.OptSizeThreshold = InlineConstants::OptSizeThreshold; 2167 Params.ColdThreshold = ColdThreshold; 2168 } else if (ColdThreshold.getNumOccurrences() > 0) { 2169 Params.ColdThreshold = ColdThreshold; 2170 } 2171 return Params; 2172 } 2173 2174 InlineParams llvm::getInlineParams() { 2175 return getInlineParams(InlineThreshold); 2176 } 2177 2178 // Compute the default threshold for inlining based on the opt level and the 2179 // size opt level. 2180 static int computeThresholdFromOptLevels(unsigned OptLevel, 2181 unsigned SizeOptLevel) { 2182 if (OptLevel > 2) 2183 return InlineConstants::OptAggressiveThreshold; 2184 if (SizeOptLevel == 1) // -Os 2185 return InlineConstants::OptSizeThreshold; 2186 if (SizeOptLevel == 2) // -Oz 2187 return InlineConstants::OptMinSizeThreshold; 2188 return InlineThreshold; 2189 } 2190 2191 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) { 2192 auto Params = 2193 getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel)); 2194 // At O3, use the value of -locally-hot-callsite-threshold option to populate 2195 // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only 2196 // when it is specified explicitly. 2197 if (OptLevel > 2) 2198 Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold; 2199 return Params; 2200 } 2201