1 //===- FunctionSpecialization.cpp - Function Specialization ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This specialises functions with constant parameters. Constant parameters 10 // like function pointers and constant globals are propagated to the callee by 11 // specializing the function. The main benefit of this pass at the moment is 12 // that indirect calls are transformed into direct calls, which provides inline 13 // opportunities that the inliner would not have been able to achieve. That's 14 // why function specialisation is run before the inliner in the optimisation 15 // pipeline; that is by design. Otherwise, we would only benefit from constant 16 // passing, which is a valid use-case too, but hasn't been explored much in 17 // terms of performance uplifts, cost-model and compile-time impact. 18 // 19 // Current limitations: 20 // - It does not yet handle integer ranges. We do support "literal constants", 21 // but that's off by default under an option. 22 // - Only 1 argument per function is specialised, 23 // - The cost-model could be further looked into (it mainly focuses on inlining 24 // benefits), 25 // - We are not yet caching analysis results, but profiling and checking where 26 // extra compile time is spent didn't suggest this to be a problem. 27 // 28 // Ideas: 29 // - With a function specialization attribute for arguments, we could have 30 // a direct way to steer function specialization, avoiding the cost-model, 31 // and thus control compile-times / code-size. 32 // 33 // Todos: 34 // - Specializing recursive functions relies on running the transformation a 35 // number of times, which is controlled by option 36 // `func-specialization-max-iters`. Thus, increasing this value and the 37 // number of iterations, will linearly increase the number of times recursive 38 // functions get specialized, see also the discussion in 39 // https://reviews.llvm.org/D106426 for details. Perhaps there is a 40 // compile-time friendlier way to control/limit the number of specialisations 41 // for recursive functions. 42 // - Don't transform the function if function specialization does not trigger; 43 // the SCCPSolver may make IR changes. 44 // 45 // References: 46 // - 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable 47 // it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q 48 // 49 //===----------------------------------------------------------------------===// 50 51 #include "llvm/ADT/Statistic.h" 52 #include "llvm/Analysis/AssumptionCache.h" 53 #include "llvm/Analysis/CodeMetrics.h" 54 #include "llvm/Analysis/DomTreeUpdater.h" 55 #include "llvm/Analysis/InlineCost.h" 56 #include "llvm/Analysis/LoopInfo.h" 57 #include "llvm/Analysis/TargetLibraryInfo.h" 58 #include "llvm/Analysis/TargetTransformInfo.h" 59 #include "llvm/Analysis/ValueLattice.h" 60 #include "llvm/Analysis/ValueLatticeUtils.h" 61 #include "llvm/IR/IntrinsicInst.h" 62 #include "llvm/Transforms/Scalar/SCCP.h" 63 #include "llvm/Transforms/Utils/Cloning.h" 64 #include "llvm/Transforms/Utils/SCCPSolver.h" 65 #include "llvm/Transforms/Utils/SizeOpts.h" 66 #include <cmath> 67 68 using namespace llvm; 69 70 #define DEBUG_TYPE "function-specialization" 71 72 STATISTIC(NumFuncSpecialized, "Number of functions specialized"); 73 74 static cl::opt<bool> ForceFunctionSpecialization( 75 "force-function-specialization", cl::init(false), cl::Hidden, 76 cl::desc("Force function specialization for every call site with a " 77 "constant argument")); 78 79 static cl::opt<unsigned> FuncSpecializationMaxIters( 80 "func-specialization-max-iters", cl::Hidden, 81 cl::desc("The maximum number of iterations function specialization is run"), 82 cl::init(1)); 83 84 static cl::opt<unsigned> MaxClonesThreshold( 85 "func-specialization-max-clones", cl::Hidden, 86 cl::desc("The maximum number of clones allowed for a single function " 87 "specialization"), 88 cl::init(3)); 89 90 static cl::opt<unsigned> SmallFunctionThreshold( 91 "func-specialization-size-threshold", cl::Hidden, 92 cl::desc("Don't specialize functions that have less than this theshold " 93 "number of instructions"), 94 cl::init(100)); 95 96 static cl::opt<unsigned> 97 AvgLoopIterationCount("func-specialization-avg-iters-cost", cl::Hidden, 98 cl::desc("Average loop iteration count cost"), 99 cl::init(10)); 100 101 static cl::opt<bool> SpecializeOnAddresses( 102 "func-specialization-on-address", cl::init(false), cl::Hidden, 103 cl::desc("Enable function specialization on the address of global values")); 104 105 // TODO: This needs checking to see the impact on compile-times, which is why 106 // this is off by default for now. 107 static cl::opt<bool> EnableSpecializationForLiteralConstant( 108 "function-specialization-for-literal-constant", cl::init(false), cl::Hidden, 109 cl::desc("Enable specialization of functions that take a literal constant " 110 "as an argument.")); 111 112 namespace { 113 // Bookkeeping struct to pass data from the analysis and profitability phase 114 // to the actual transform helper functions. 115 struct SpecializationInfo { 116 ArgInfo Arg; // Stores the {formal,actual} argument pair. 117 InstructionCost Gain; // Profitability: Gain = Bonus - Cost. 118 119 SpecializationInfo(Argument *A, Constant *C, InstructionCost G) 120 : Arg(A, C), Gain(G){}; 121 }; 122 } // Anonymous namespace 123 124 using FuncList = SmallVectorImpl<Function *>; 125 using ConstList = SmallVector<Constant *>; 126 using SpecializationList = SmallVector<SpecializationInfo>; 127 128 // Helper to check if \p LV is either a constant or a constant 129 // range with a single element. This should cover exactly the same cases as the 130 // old ValueLatticeElement::isConstant() and is intended to be used in the 131 // transition to ValueLatticeElement. 132 static bool isConstant(const ValueLatticeElement &LV) { 133 return LV.isConstant() || 134 (LV.isConstantRange() && LV.getConstantRange().isSingleElement()); 135 } 136 137 // Helper to check if \p LV is either overdefined or a constant int. 138 static bool isOverdefined(const ValueLatticeElement &LV) { 139 return !LV.isUnknownOrUndef() && !isConstant(LV); 140 } 141 142 static Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call) { 143 Value *StoreValue = nullptr; 144 for (auto *User : Alloca->users()) { 145 // We can't use llvm::isAllocaPromotable() as that would fail because of 146 // the usage in the CallInst, which is what we check here. 147 if (User == Call) 148 continue; 149 if (auto *Bitcast = dyn_cast<BitCastInst>(User)) { 150 if (!Bitcast->hasOneUse() || *Bitcast->user_begin() != Call) 151 return nullptr; 152 continue; 153 } 154 155 if (auto *Store = dyn_cast<StoreInst>(User)) { 156 // This is a duplicate store, bail out. 157 if (StoreValue || Store->isVolatile()) 158 return nullptr; 159 StoreValue = Store->getValueOperand(); 160 continue; 161 } 162 // Bail if there is any other unknown usage. 163 return nullptr; 164 } 165 return dyn_cast_or_null<Constant>(StoreValue); 166 } 167 168 // A constant stack value is an AllocaInst that has a single constant 169 // value stored to it. Return this constant if such an alloca stack value 170 // is a function argument. 171 static Constant *getConstantStackValue(CallInst *Call, Value *Val, 172 SCCPSolver &Solver) { 173 if (!Val) 174 return nullptr; 175 Val = Val->stripPointerCasts(); 176 if (auto *ConstVal = dyn_cast<ConstantInt>(Val)) 177 return ConstVal; 178 auto *Alloca = dyn_cast<AllocaInst>(Val); 179 if (!Alloca || !Alloca->getAllocatedType()->isIntegerTy()) 180 return nullptr; 181 return getPromotableAlloca(Alloca, Call); 182 } 183 184 // To support specializing recursive functions, it is important to propagate 185 // constant arguments because after a first iteration of specialisation, a 186 // reduced example may look like this: 187 // 188 // define internal void @RecursiveFn(i32* arg1) { 189 // %temp = alloca i32, align 4 190 // store i32 2 i32* %temp, align 4 191 // call void @RecursiveFn.1(i32* nonnull %temp) 192 // ret void 193 // } 194 // 195 // Before a next iteration, we need to propagate the constant like so 196 // which allows further specialization in next iterations. 197 // 198 // @funcspec.arg = internal constant i32 2 199 // 200 // define internal void @someFunc(i32* arg1) { 201 // call void @otherFunc(i32* nonnull @funcspec.arg) 202 // ret void 203 // } 204 // 205 static void constantArgPropagation(FuncList &WorkList, 206 Module &M, SCCPSolver &Solver) { 207 // Iterate over the argument tracked functions see if there 208 // are any new constant values for the call instruction via 209 // stack variables. 210 for (auto *F : WorkList) { 211 // TODO: Generalize for any read only arguments. 212 if (F->arg_size() != 1) 213 continue; 214 215 auto &Arg = *F->arg_begin(); 216 if (!Arg.onlyReadsMemory() || !Arg.getType()->isPointerTy()) 217 continue; 218 219 for (auto *User : F->users()) { 220 auto *Call = dyn_cast<CallInst>(User); 221 if (!Call) 222 break; 223 auto *ArgOp = Call->getArgOperand(0); 224 auto *ArgOpType = ArgOp->getType(); 225 auto *ConstVal = getConstantStackValue(Call, ArgOp, Solver); 226 if (!ConstVal) 227 break; 228 229 Value *GV = new GlobalVariable(M, ConstVal->getType(), true, 230 GlobalValue::InternalLinkage, ConstVal, 231 "funcspec.arg"); 232 233 if (ArgOpType != ConstVal->getType()) 234 GV = ConstantExpr::getBitCast(cast<Constant>(GV), ArgOp->getType()); 235 236 Call->setArgOperand(0, GV); 237 238 // Add the changed CallInst to Solver Worklist 239 Solver.visitCall(*Call); 240 } 241 } 242 } 243 244 // ssa_copy intrinsics are introduced by the SCCP solver. These intrinsics 245 // interfere with the constantArgPropagation optimization. 246 static void removeSSACopy(Function &F) { 247 for (BasicBlock &BB : F) { 248 for (Instruction &Inst : llvm::make_early_inc_range(BB)) { 249 auto *II = dyn_cast<IntrinsicInst>(&Inst); 250 if (!II) 251 continue; 252 if (II->getIntrinsicID() != Intrinsic::ssa_copy) 253 continue; 254 Inst.replaceAllUsesWith(II->getOperand(0)); 255 Inst.eraseFromParent(); 256 } 257 } 258 } 259 260 static void removeSSACopy(Module &M) { 261 for (Function &F : M) 262 removeSSACopy(F); 263 } 264 265 namespace { 266 class FunctionSpecializer { 267 268 /// The IPSCCP Solver. 269 SCCPSolver &Solver; 270 271 /// Analyses used to help determine if a function should be specialized. 272 std::function<AssumptionCache &(Function &)> GetAC; 273 std::function<TargetTransformInfo &(Function &)> GetTTI; 274 std::function<TargetLibraryInfo &(Function &)> GetTLI; 275 276 SmallPtrSet<Function *, 4> SpecializedFuncs; 277 SmallPtrSet<Function *, 4> FullySpecialized; 278 SmallVector<Instruction *> ReplacedWithConstant; 279 280 public: 281 FunctionSpecializer(SCCPSolver &Solver, 282 std::function<AssumptionCache &(Function &)> GetAC, 283 std::function<TargetTransformInfo &(Function &)> GetTTI, 284 std::function<TargetLibraryInfo &(Function &)> GetTLI) 285 : Solver(Solver), GetAC(GetAC), GetTTI(GetTTI), GetTLI(GetTLI) {} 286 287 ~FunctionSpecializer() { 288 // Eliminate dead code. 289 removeDeadInstructions(); 290 removeDeadFunctions(); 291 } 292 293 /// Attempt to specialize functions in the module to enable constant 294 /// propagation across function boundaries. 295 /// 296 /// \returns true if at least one function is specialized. 297 bool specializeFunctions(FuncList &Candidates, FuncList &WorkList) { 298 bool Changed = false; 299 for (auto *F : Candidates) { 300 if (!isCandidateFunction(F)) 301 continue; 302 303 auto Cost = getSpecializationCost(F); 304 if (!Cost.isValid()) { 305 LLVM_DEBUG( 306 dbgs() << "FnSpecialization: Invalid specialisation cost.\n"); 307 continue; 308 } 309 310 LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for " 311 << F->getName() << " is " << Cost << "\n"); 312 313 SpecializationList Specializations; 314 calculateGains(F, Cost, Specializations); 315 if (Specializations.empty()) { 316 LLVM_DEBUG(dbgs() << "FnSpecialization: no possible constants found\n"); 317 continue; 318 } 319 320 for (SpecializationInfo &S : Specializations) { 321 specializeFunction(F, S, WorkList); 322 Changed = true; 323 } 324 } 325 326 updateSpecializedFuncs(Candidates, WorkList); 327 NumFuncSpecialized += NbFunctionsSpecialized; 328 return Changed; 329 } 330 331 void removeDeadInstructions() { 332 for (auto *I : ReplacedWithConstant) { 333 LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead instruction " 334 << *I << "\n"); 335 I->eraseFromParent(); 336 } 337 ReplacedWithConstant.clear(); 338 } 339 340 void removeDeadFunctions() { 341 for (auto *F : FullySpecialized) { 342 LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead function " 343 << F->getName() << "\n"); 344 F->eraseFromParent(); 345 } 346 FullySpecialized.clear(); 347 } 348 349 bool tryToReplaceWithConstant(Value *V) { 350 if (!V->getType()->isSingleValueType() || isa<CallBase>(V) || 351 V->user_empty()) 352 return false; 353 354 const ValueLatticeElement &IV = Solver.getLatticeValueFor(V); 355 if (isOverdefined(IV)) 356 return false; 357 auto *Const = 358 isConstant(IV) ? Solver.getConstant(IV) : UndefValue::get(V->getType()); 359 360 LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing " << *V 361 << "\nFnSpecialization: with " << *Const << "\n"); 362 363 // Record uses of V to avoid visiting irrelevant uses of const later. 364 SmallVector<Instruction *> UseInsts; 365 for (auto *U : V->users()) 366 if (auto *I = dyn_cast<Instruction>(U)) 367 if (Solver.isBlockExecutable(I->getParent())) 368 UseInsts.push_back(I); 369 370 V->replaceAllUsesWith(Const); 371 372 for (auto *I : UseInsts) 373 Solver.visit(I); 374 375 // Remove the instruction from Block and Solver. 376 if (auto *I = dyn_cast<Instruction>(V)) { 377 if (I->isSafeToRemove()) { 378 ReplacedWithConstant.push_back(I); 379 Solver.removeLatticeValueFor(I); 380 } 381 } 382 return true; 383 } 384 385 private: 386 // The number of functions specialised, used for collecting statistics and 387 // also in the cost model. 388 unsigned NbFunctionsSpecialized = 0; 389 390 /// Clone the function \p F and remove the ssa_copy intrinsics added by 391 /// the SCCPSolver in the cloned version. 392 Function *cloneCandidateFunction(Function *F, ValueToValueMapTy &Mappings) { 393 Function *Clone = CloneFunction(F, Mappings); 394 removeSSACopy(*Clone); 395 return Clone; 396 } 397 398 /// This function decides whether it's worthwhile to specialize function \p F 399 /// based on the known constant values its arguments can take on, i.e. it 400 /// calculates a gain and returns a list of actual arguments that are deemed 401 /// profitable to specialize. Specialization is performed on the first 402 /// interesting argument. Specializations based on additional arguments will 403 /// be evaluated on following iterations of the main IPSCCP solve loop. 404 void calculateGains(Function *F, InstructionCost Cost, 405 SpecializationList &WorkList) { 406 // Determine if we should specialize the function based on the values the 407 // argument can take on. If specialization is not profitable, we continue 408 // on to the next argument. 409 for (Argument &FormalArg : F->args()) { 410 // Determine if this argument is interesting. If we know the argument can 411 // take on any constant values, they are collected in Constants. 412 ConstList ActualArgs; 413 if (!isArgumentInteresting(&FormalArg, ActualArgs)) { 414 LLVM_DEBUG(dbgs() << "FnSpecialization: Argument " 415 << FormalArg.getNameOrAsOperand() 416 << " is not interesting\n"); 417 continue; 418 } 419 420 for (auto *ActualArg : ActualArgs) { 421 InstructionCost Gain = 422 ForceFunctionSpecialization 423 ? 1 424 : getSpecializationBonus(&FormalArg, ActualArg) - Cost; 425 426 if (Gain <= 0) 427 continue; 428 WorkList.push_back({&FormalArg, ActualArg, Gain}); 429 } 430 431 if (WorkList.empty()) 432 continue; 433 434 // Sort the candidates in descending order. 435 llvm::stable_sort(WorkList, [](const SpecializationInfo &L, 436 const SpecializationInfo &R) { 437 return L.Gain > R.Gain; 438 }); 439 440 // Truncate the worklist to 'MaxClonesThreshold' candidates if 441 // necessary. 442 if (WorkList.size() > MaxClonesThreshold) { 443 LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed " 444 << "the maximum number of clones threshold.\n" 445 << "FnSpecialization: Truncating worklist to " 446 << MaxClonesThreshold << " candidates.\n"); 447 WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end()); 448 } 449 450 LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function " 451 << F->getName() << "\n"; 452 for (SpecializationInfo &S : WorkList) { 453 dbgs() << "FnSpecialization: FormalArg = " 454 << S.Arg.Formal->getNameOrAsOperand() 455 << ", ActualArg = " 456 << S.Arg.Actual->getNameOrAsOperand() 457 << ", Gain = " << S.Gain << "\n"; 458 }); 459 460 // FIXME: Only one argument per function. 461 break; 462 } 463 } 464 465 bool isCandidateFunction(Function *F) { 466 // Do not specialize the cloned function again. 467 if (SpecializedFuncs.contains(F)) 468 return false; 469 470 // If we're optimizing the function for size, we shouldn't specialize it. 471 if (F->hasOptSize() || 472 shouldOptimizeForSize(F, nullptr, nullptr, PGSOQueryType::IRPass)) 473 return false; 474 475 // Exit if the function is not executable. There's no point in specializing 476 // a dead function. 477 if (!Solver.isBlockExecutable(&F->getEntryBlock())) 478 return false; 479 480 // It wastes time to specialize a function which would get inlined finally. 481 if (F->hasFnAttribute(Attribute::AlwaysInline)) 482 return false; 483 484 LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName() 485 << "\n"); 486 return true; 487 } 488 489 void specializeFunction(Function *F, SpecializationInfo &S, 490 FuncList &WorkList) { 491 ValueToValueMapTy Mappings; 492 Function *Clone = cloneCandidateFunction(F, Mappings); 493 494 // Rewrite calls to the function so that they call the clone instead. 495 rewriteCallSites(Clone, S.Arg, Mappings); 496 497 // Initialize the lattice state of the arguments of the function clone, 498 // marking the argument on which we specialized the function constant 499 // with the given value. 500 Solver.markArgInFuncSpecialization(Clone, S.Arg); 501 502 // Mark all the specialized functions 503 WorkList.push_back(Clone); 504 NbFunctionsSpecialized++; 505 506 // If the function has been completely specialized, the original function 507 // is no longer needed. Mark it unreachable. 508 if (F->getNumUses() == 0 || all_of(F->users(), [F](User *U) { 509 if (auto *CS = dyn_cast<CallBase>(U)) 510 return CS->getFunction() == F; 511 return false; 512 })) { 513 Solver.markFunctionUnreachable(F); 514 FullySpecialized.insert(F); 515 } 516 } 517 518 /// Compute and return the cost of specializing function \p F. 519 InstructionCost getSpecializationCost(Function *F) { 520 // Compute the code metrics for the function. 521 SmallPtrSet<const Value *, 32> EphValues; 522 CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues); 523 CodeMetrics Metrics; 524 for (BasicBlock &BB : *F) 525 Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues); 526 527 // If the code metrics reveal that we shouldn't duplicate the function, we 528 // shouldn't specialize it. Set the specialization cost to Invalid. 529 // Or if the lines of codes implies that this function is easy to get 530 // inlined so that we shouldn't specialize it. 531 if (Metrics.notDuplicatable || 532 (!ForceFunctionSpecialization && 533 Metrics.NumInsts < SmallFunctionThreshold)) { 534 InstructionCost C{}; 535 C.setInvalid(); 536 return C; 537 } 538 539 // Otherwise, set the specialization cost to be the cost of all the 540 // instructions in the function and penalty for specializing more functions. 541 unsigned Penalty = NbFunctionsSpecialized + 1; 542 return Metrics.NumInsts * InlineConstants::InstrCost * Penalty; 543 } 544 545 InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI, 546 LoopInfo &LI) { 547 auto *I = dyn_cast_or_null<Instruction>(U); 548 // If not an instruction we do not know how to evaluate. 549 // Keep minimum possible cost for now so that it doesnt affect 550 // specialization. 551 if (!I) 552 return std::numeric_limits<unsigned>::min(); 553 554 auto Cost = TTI.getUserCost(U, TargetTransformInfo::TCK_SizeAndLatency); 555 556 // Traverse recursively if there are more uses. 557 // TODO: Any other instructions to be added here? 558 if (I->mayReadFromMemory() || I->isCast()) 559 for (auto *User : I->users()) 560 Cost += getUserBonus(User, TTI, LI); 561 562 // Increase the cost if it is inside the loop. 563 auto LoopDepth = LI.getLoopDepth(I->getParent()); 564 Cost *= std::pow((double)AvgLoopIterationCount, LoopDepth); 565 return Cost; 566 } 567 568 /// Compute a bonus for replacing argument \p A with constant \p C. 569 InstructionCost getSpecializationBonus(Argument *A, Constant *C) { 570 Function *F = A->getParent(); 571 DominatorTree DT(*F); 572 LoopInfo LI(DT); 573 auto &TTI = (GetTTI)(*F); 574 LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " 575 << C->getNameOrAsOperand() << "\n"); 576 577 InstructionCost TotalCost = 0; 578 for (auto *U : A->users()) { 579 TotalCost += getUserBonus(U, TTI, LI); 580 LLVM_DEBUG(dbgs() << "FnSpecialization: User cost "; 581 TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n"); 582 } 583 584 // The below heuristic is only concerned with exposing inlining 585 // opportunities via indirect call promotion. If the argument is not a 586 // (potentially casted) function pointer, give up. 587 Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts()); 588 if (!CalledFunction) 589 return TotalCost; 590 591 // Get TTI for the called function (used for the inline cost). 592 auto &CalleeTTI = (GetTTI)(*CalledFunction); 593 594 // Look at all the call sites whose called value is the argument. 595 // Specializing the function on the argument would allow these indirect 596 // calls to be promoted to direct calls. If the indirect call promotion 597 // would likely enable the called function to be inlined, specializing is a 598 // good idea. 599 int Bonus = 0; 600 for (User *U : A->users()) { 601 if (!isa<CallInst>(U) && !isa<InvokeInst>(U)) 602 continue; 603 auto *CS = cast<CallBase>(U); 604 if (CS->getCalledOperand() != A) 605 continue; 606 607 // Get the cost of inlining the called function at this call site. Note 608 // that this is only an estimate. The called function may eventually 609 // change in a way that leads to it not being inlined here, even though 610 // inlining looks profitable now. For example, one of its called 611 // functions may be inlined into it, making the called function too large 612 // to be inlined into this call site. 613 // 614 // We apply a boost for performing indirect call promotion by increasing 615 // the default threshold by the threshold for indirect calls. 616 auto Params = getInlineParams(); 617 Params.DefaultThreshold += InlineConstants::IndirectCallThreshold; 618 InlineCost IC = 619 getInlineCost(*CS, CalledFunction, Params, CalleeTTI, GetAC, GetTLI); 620 621 // We clamp the bonus for this call to be between zero and the default 622 // threshold. 623 if (IC.isAlways()) 624 Bonus += Params.DefaultThreshold; 625 else if (IC.isVariable() && IC.getCostDelta() > 0) 626 Bonus += IC.getCostDelta(); 627 628 LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << Bonus 629 << " for user " << *U << "\n"); 630 } 631 632 return TotalCost + Bonus; 633 } 634 635 /// Determine if we should specialize a function based on the incoming values 636 /// of the given argument. 637 /// 638 /// This function implements the goal-directed heuristic. It determines if 639 /// specializing the function based on the incoming values of argument \p A 640 /// would result in any significant optimization opportunities. If 641 /// optimization opportunities exist, the constant values of \p A on which to 642 /// specialize the function are collected in \p Constants. 643 /// 644 /// \returns true if the function should be specialized on the given 645 /// argument. 646 bool isArgumentInteresting(Argument *A, ConstList &Constants) { 647 // For now, don't attempt to specialize functions based on the values of 648 // composite types. 649 if (!A->getType()->isSingleValueType() || A->user_empty()) 650 return false; 651 652 // If the argument isn't overdefined, there's nothing to do. It should 653 // already be constant. 654 if (!Solver.getLatticeValueFor(A).isOverdefined()) { 655 LLVM_DEBUG(dbgs() << "FnSpecialization: Nothing to do, argument " 656 << A->getNameOrAsOperand() 657 << " is already constant?\n"); 658 return false; 659 } 660 661 // Collect the constant values that the argument can take on. If the 662 // argument can't take on any constant values, we aren't going to 663 // specialize the function. While it's possible to specialize the function 664 // based on non-constant arguments, there's likely not much benefit to 665 // constant propagation in doing so. 666 // 667 // TODO 1: currently it won't specialize if there are over the threshold of 668 // calls using the same argument, e.g foo(a) x 4 and foo(b) x 1, but it 669 // might be beneficial to take the occurrences into account in the cost 670 // model, so we would need to find the unique constants. 671 // 672 // TODO 2: this currently does not support constants, i.e. integer ranges. 673 // 674 getPossibleConstants(A, Constants); 675 676 if (Constants.empty()) 677 return false; 678 679 LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument " 680 << A->getNameOrAsOperand() << "\n"); 681 return true; 682 } 683 684 /// Collect in \p Constants all the constant values that argument \p A can 685 /// take on. 686 void getPossibleConstants(Argument *A, ConstList &Constants) { 687 Function *F = A->getParent(); 688 689 // Iterate over all the call sites of the argument's parent function. 690 for (User *U : F->users()) { 691 if (!isa<CallInst>(U) && !isa<InvokeInst>(U)) 692 continue; 693 auto &CS = *cast<CallBase>(U); 694 // If the call site has attribute minsize set, that callsite won't be 695 // specialized. 696 if (CS.hasFnAttr(Attribute::MinSize)) 697 continue; 698 699 // If the parent of the call site will never be executed, we don't need 700 // to worry about the passed value. 701 if (!Solver.isBlockExecutable(CS.getParent())) 702 continue; 703 704 auto *V = CS.getArgOperand(A->getArgNo()); 705 if (isa<PoisonValue>(V)) 706 return; 707 708 // For now, constant expressions are fine but only if they are function 709 // calls. 710 if (auto *CE = dyn_cast<ConstantExpr>(V)) 711 if (!isa<Function>(CE->getOperand(0))) 712 return; 713 714 // TrackValueOfGlobalVariable only tracks scalar global variables. 715 if (auto *GV = dyn_cast<GlobalVariable>(V)) { 716 // Check if we want to specialize on the address of non-constant 717 // global values. 718 if (!GV->isConstant()) 719 if (!SpecializeOnAddresses) 720 return; 721 722 if (!GV->getValueType()->isSingleValueType()) 723 return; 724 } 725 726 if (isa<Constant>(V) && (Solver.getLatticeValueFor(V).isConstant() || 727 EnableSpecializationForLiteralConstant)) 728 Constants.push_back(cast<Constant>(V)); 729 } 730 } 731 732 /// Rewrite calls to function \p F to call function \p Clone instead. 733 /// 734 /// This function modifies calls to function \p F as long as the actual 735 /// argument matches the one in \p Arg. Note that for recursive calls we 736 /// need to compare against the cloned formal argument. 737 /// 738 /// Callsites that have been marked with the MinSize function attribute won't 739 /// be specialized and rewritten. 740 void rewriteCallSites(Function *Clone, const ArgInfo &Arg, 741 ValueToValueMapTy &Mappings) { 742 Function *F = Arg.Formal->getParent(); 743 unsigned ArgNo = Arg.Formal->getArgNo(); 744 SmallVector<CallBase *, 4> CallSitesToRewrite; 745 for (auto *U : F->users()) { 746 if (!isa<CallInst>(U) && !isa<InvokeInst>(U)) 747 continue; 748 auto &CS = *cast<CallBase>(U); 749 if (!CS.getCalledFunction() || CS.getCalledFunction() != F) 750 continue; 751 CallSitesToRewrite.push_back(&CS); 752 } 753 754 LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing call sites of " 755 << F->getName() << " with " 756 << Clone->getName() << "\n"); 757 758 for (auto *CS : CallSitesToRewrite) { 759 LLVM_DEBUG(dbgs() << "FnSpecialization: " 760 << CS->getFunction()->getName() << " ->" 761 << *CS << "\n"); 762 if (/* recursive call */ 763 (CS->getFunction() == Clone && 764 CS->getArgOperand(ArgNo) == Mappings[Arg.Formal]) || 765 /* normal call */ 766 CS->getArgOperand(ArgNo) == Arg.Actual) { 767 CS->setCalledFunction(Clone); 768 Solver.markOverdefined(CS); 769 } 770 } 771 } 772 773 void updateSpecializedFuncs(FuncList &Candidates, FuncList &WorkList) { 774 for (auto *F : WorkList) { 775 SpecializedFuncs.insert(F); 776 777 // Initialize the state of the newly created functions, marking them 778 // argument-tracked and executable. 779 if (F->hasExactDefinition() && !F->hasFnAttribute(Attribute::Naked)) 780 Solver.addTrackedFunction(F); 781 782 Solver.addArgumentTrackedFunction(F); 783 Candidates.push_back(F); 784 Solver.markBlockExecutable(&F->front()); 785 786 // Replace the function arguments for the specialized functions. 787 for (Argument &Arg : F->args()) 788 if (!Arg.use_empty() && tryToReplaceWithConstant(&Arg)) 789 LLVM_DEBUG(dbgs() << "FnSpecialization: Replaced constant argument: " 790 << Arg.getNameOrAsOperand() << "\n"); 791 } 792 } 793 }; 794 } // namespace 795 796 bool llvm::runFunctionSpecialization( 797 Module &M, const DataLayout &DL, 798 std::function<TargetLibraryInfo &(Function &)> GetTLI, 799 std::function<TargetTransformInfo &(Function &)> GetTTI, 800 std::function<AssumptionCache &(Function &)> GetAC, 801 function_ref<AnalysisResultsForFn(Function &)> GetAnalysis) { 802 SCCPSolver Solver(DL, GetTLI, M.getContext()); 803 FunctionSpecializer FS(Solver, GetAC, GetTTI, GetTLI); 804 bool Changed = false; 805 806 // Loop over all functions, marking arguments to those with their addresses 807 // taken or that are external as overdefined. 808 for (Function &F : M) { 809 if (F.isDeclaration()) 810 continue; 811 if (F.hasFnAttribute(Attribute::NoDuplicate)) 812 continue; 813 814 LLVM_DEBUG(dbgs() << "\nFnSpecialization: Analysing decl: " << F.getName() 815 << "\n"); 816 Solver.addAnalysis(F, GetAnalysis(F)); 817 818 // Determine if we can track the function's arguments. If so, add the 819 // function to the solver's set of argument-tracked functions. 820 if (canTrackArgumentsInterprocedurally(&F)) { 821 LLVM_DEBUG(dbgs() << "FnSpecialization: Can track arguments\n"); 822 Solver.addArgumentTrackedFunction(&F); 823 continue; 824 } else { 825 LLVM_DEBUG(dbgs() << "FnSpecialization: Can't track arguments!\n" 826 << "FnSpecialization: Doesn't have local linkage, or " 827 << "has its address taken\n"); 828 } 829 830 // Assume the function is called. 831 Solver.markBlockExecutable(&F.front()); 832 833 // Assume nothing about the incoming arguments. 834 for (Argument &AI : F.args()) 835 Solver.markOverdefined(&AI); 836 } 837 838 // Determine if we can track any of the module's global variables. If so, add 839 // the global variables we can track to the solver's set of tracked global 840 // variables. 841 for (GlobalVariable &G : M.globals()) { 842 G.removeDeadConstantUsers(); 843 if (canTrackGlobalVariableInterprocedurally(&G)) 844 Solver.trackValueOfGlobalVariable(&G); 845 } 846 847 auto &TrackedFuncs = Solver.getArgumentTrackedFunctions(); 848 SmallVector<Function *, 16> FuncDecls(TrackedFuncs.begin(), 849 TrackedFuncs.end()); 850 851 // No tracked functions, so nothing to do: don't run the solver and remove 852 // the ssa_copy intrinsics that may have been introduced. 853 if (TrackedFuncs.empty()) { 854 removeSSACopy(M); 855 return false; 856 } 857 858 // Solve for constants. 859 auto RunSCCPSolver = [&](auto &WorkList) { 860 bool ResolvedUndefs = true; 861 862 while (ResolvedUndefs) { 863 // Not running the solver unnecessary is checked in regression test 864 // nothing-to-do.ll, so if this debug message is changed, this regression 865 // test needs updating too. 866 LLVM_DEBUG(dbgs() << "FnSpecialization: Running solver\n"); 867 868 Solver.solve(); 869 LLVM_DEBUG(dbgs() << "FnSpecialization: Resolving undefs\n"); 870 ResolvedUndefs = false; 871 for (Function *F : WorkList) 872 if (Solver.resolvedUndefsIn(*F)) 873 ResolvedUndefs = true; 874 } 875 876 for (auto *F : WorkList) { 877 for (BasicBlock &BB : *F) { 878 if (!Solver.isBlockExecutable(&BB)) 879 continue; 880 // FIXME: The solver may make changes to the function here, so set 881 // Changed, even if later function specialization does not trigger. 882 for (auto &I : make_early_inc_range(BB)) 883 Changed |= FS.tryToReplaceWithConstant(&I); 884 } 885 } 886 }; 887 888 #ifndef NDEBUG 889 LLVM_DEBUG(dbgs() << "FnSpecialization: Worklist fn decls:\n"); 890 for (auto *F : FuncDecls) 891 LLVM_DEBUG(dbgs() << "FnSpecialization: *) " << F->getName() << "\n"); 892 #endif 893 894 // Initially resolve the constants in all the argument tracked functions. 895 RunSCCPSolver(FuncDecls); 896 897 SmallVector<Function *, 2> WorkList; 898 unsigned I = 0; 899 while (FuncSpecializationMaxIters != I++ && 900 FS.specializeFunctions(FuncDecls, WorkList)) { 901 LLVM_DEBUG(dbgs() << "FnSpecialization: Finished iteration " << I << "\n"); 902 903 // Run the solver for the specialized functions. 904 RunSCCPSolver(WorkList); 905 906 // Replace some unresolved constant arguments. 907 constantArgPropagation(FuncDecls, M, Solver); 908 909 WorkList.clear(); 910 Changed = true; 911 } 912 913 LLVM_DEBUG(dbgs() << "FnSpecialization: Number of specializations = " 914 << NumFuncSpecialized <<"\n"); 915 916 // Remove any ssa_copy intrinsics that may have been introduced. 917 removeSSACopy(M); 918 return Changed; 919 } 920