1 //===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the transformation that promotes indirect calls to 10 // conditional direct calls when the indirect-call value profile metadata is 11 // available. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" 19 #include "llvm/Analysis/IndirectCallVisitor.h" 20 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 21 #include "llvm/Analysis/ProfileSummaryInfo.h" 22 #include "llvm/IR/DiagnosticInfo.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/InstrTypes.h" 25 #include "llvm/IR/Instructions.h" 26 #include "llvm/IR/LLVMContext.h" 27 #include "llvm/IR/MDBuilder.h" 28 #include "llvm/IR/PassManager.h" 29 #include "llvm/IR/Value.h" 30 #include "llvm/InitializePasses.h" 31 #include "llvm/Pass.h" 32 #include "llvm/ProfileData/InstrProf.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/Error.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "llvm/Transforms/Instrumentation.h" 39 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 40 #include "llvm/Transforms/Utils/CallPromotionUtils.h" 41 #include <cassert> 42 #include <cstdint> 43 #include <memory> 44 #include <string> 45 #include <utility> 46 #include <vector> 47 48 using namespace llvm; 49 50 #define DEBUG_TYPE "pgo-icall-prom" 51 52 STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions."); 53 STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites."); 54 55 // Command line option to disable indirect-call promotion with the default as 56 // false. This is for debug purpose. 57 static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden, 58 cl::desc("Disable indirect call promotion")); 59 60 // Set the cutoff value for the promotion. If the value is other than 0, we 61 // stop the transformation once the total number of promotions equals the cutoff 62 // value. 63 // For debug use only. 64 static cl::opt<unsigned> 65 ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore, 66 cl::desc("Max number of promotions for this compilation")); 67 68 // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped. 69 // For debug use only. 70 static cl::opt<unsigned> 71 ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore, 72 cl::desc("Skip Callsite up to this number for this compilation")); 73 74 // Set if the pass is called in LTO optimization. The difference for LTO mode 75 // is the pass won't prefix the source module name to the internal linkage 76 // symbols. 77 static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, 78 cl::desc("Run indirect-call promotion in LTO " 79 "mode")); 80 81 // Set if the pass is called in SamplePGO mode. The difference for SamplePGO 82 // mode is it will add prof metadatato the created direct call. 83 static cl::opt<bool> 84 ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden, 85 cl::desc("Run indirect-call promotion in SamplePGO mode")); 86 87 // If the option is set to true, only call instructions will be considered for 88 // transformation -- invoke instructions will be ignored. 89 static cl::opt<bool> 90 ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, 91 cl::desc("Run indirect-call promotion for call instructions " 92 "only")); 93 94 // If the option is set to true, only invoke instructions will be considered for 95 // transformation -- call instructions will be ignored. 96 static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false), 97 cl::Hidden, 98 cl::desc("Run indirect-call promotion for " 99 "invoke instruction only")); 100 101 // Dump the function level IR if the transformation happened in this 102 // function. For debug use only. 103 static cl::opt<bool> 104 ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, 105 cl::desc("Dump IR after transformation happens")); 106 107 namespace { 108 109 // The class for main data structure to promote indirect calls to conditional 110 // direct calls. 111 class ICallPromotionFunc { 112 private: 113 Function &F; 114 Module *M; 115 116 // Symtab that maps indirect call profile values to function names and 117 // defines. 118 InstrProfSymtab *Symtab; 119 120 bool SamplePGO; 121 122 OptimizationRemarkEmitter &ORE; 123 124 // A struct that records the direct target and it's call count. 125 struct PromotionCandidate { 126 Function *TargetFunction; 127 uint64_t Count; 128 129 PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {} 130 }; 131 132 // Check if the indirect-call call site should be promoted. Return the number 133 // of promotions. Inst is the candidate indirect call, ValueDataRef 134 // contains the array of value profile data for profiled targets, 135 // TotalCount is the total profiled count of call executions, and 136 // NumCandidates is the number of candidate entries in ValueDataRef. 137 std::vector<PromotionCandidate> getPromotionCandidatesForCallSite( 138 const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef, 139 uint64_t TotalCount, uint32_t NumCandidates); 140 141 // Promote a list of targets for one indirect-call callsite. Return 142 // the number of promotions. 143 uint32_t tryToPromote(CallBase &CB, 144 const std::vector<PromotionCandidate> &Candidates, 145 uint64_t &TotalCount); 146 147 public: 148 ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab, 149 bool SamplePGO, OptimizationRemarkEmitter &ORE) 150 : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {} 151 ICallPromotionFunc(const ICallPromotionFunc &) = delete; 152 ICallPromotionFunc &operator=(const ICallPromotionFunc &) = delete; 153 154 bool processFunction(ProfileSummaryInfo *PSI); 155 }; 156 157 } // end anonymous namespace 158 159 // Indirect-call promotion heuristic. The direct targets are sorted based on 160 // the count. Stop at the first target that is not promoted. 161 std::vector<ICallPromotionFunc::PromotionCandidate> 162 ICallPromotionFunc::getPromotionCandidatesForCallSite( 163 const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef, 164 uint64_t TotalCount, uint32_t NumCandidates) { 165 std::vector<PromotionCandidate> Ret; 166 167 LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB 168 << " Num_targets: " << ValueDataRef.size() 169 << " Num_candidates: " << NumCandidates << "\n"); 170 NumOfPGOICallsites++; 171 if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) { 172 LLVM_DEBUG(dbgs() << " Skip: User options.\n"); 173 return Ret; 174 } 175 176 for (uint32_t I = 0; I < NumCandidates; I++) { 177 uint64_t Count = ValueDataRef[I].Count; 178 assert(Count <= TotalCount); 179 (void)TotalCount; 180 uint64_t Target = ValueDataRef[I].Value; 181 LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count 182 << " Target_func: " << Target << "\n"); 183 184 if (ICPInvokeOnly && isa<CallInst>(CB)) { 185 LLVM_DEBUG(dbgs() << " Not promote: User options.\n"); 186 ORE.emit([&]() { 187 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB) 188 << " Not promote: User options"; 189 }); 190 break; 191 } 192 if (ICPCallOnly && isa<InvokeInst>(CB)) { 193 LLVM_DEBUG(dbgs() << " Not promote: User option.\n"); 194 ORE.emit([&]() { 195 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB) 196 << " Not promote: User options"; 197 }); 198 break; 199 } 200 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { 201 LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n"); 202 ORE.emit([&]() { 203 return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB) 204 << " Not promote: Cutoff reached"; 205 }); 206 break; 207 } 208 209 // Don't promote if the symbol is not defined in the module. This avoids 210 // creating a reference to a symbol that doesn't exist in the module 211 // This can happen when we compile with a sample profile collected from 212 // one binary but used for another, which may have profiled targets that 213 // aren't used in the new binary. We might have a declaration initially in 214 // the case where the symbol is globally dead in the binary and removed by 215 // ThinLTO. 216 Function *TargetFunction = Symtab->getFunction(Target); 217 if (TargetFunction == nullptr || TargetFunction->isDeclaration()) { 218 LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n"); 219 ORE.emit([&]() { 220 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB) 221 << "Cannot promote indirect call: target with md5sum " 222 << ore::NV("target md5sum", Target) << " not found"; 223 }); 224 break; 225 } 226 227 const char *Reason = nullptr; 228 if (!isLegalToPromote(CB, TargetFunction, &Reason)) { 229 using namespace ore; 230 231 ORE.emit([&]() { 232 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB) 233 << "Cannot promote indirect call to " 234 << NV("TargetFunction", TargetFunction) << " with count of " 235 << NV("Count", Count) << ": " << Reason; 236 }); 237 break; 238 } 239 240 Ret.push_back(PromotionCandidate(TargetFunction, Count)); 241 TotalCount -= Count; 242 } 243 return Ret; 244 } 245 246 CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee, 247 uint64_t Count, uint64_t TotalCount, 248 bool AttachProfToDirectCall, 249 OptimizationRemarkEmitter *ORE) { 250 251 uint64_t ElseCount = TotalCount - Count; 252 uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount); 253 uint64_t Scale = calculateCountScale(MaxCount); 254 MDBuilder MDB(CB.getContext()); 255 MDNode *BranchWeights = MDB.createBranchWeights( 256 scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale)); 257 258 CallBase &NewInst = 259 promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights); 260 261 if (AttachProfToDirectCall) { 262 MDBuilder MDB(NewInst.getContext()); 263 NewInst.setMetadata( 264 LLVMContext::MD_prof, 265 MDB.createBranchWeights({static_cast<uint32_t>(Count)})); 266 } 267 268 using namespace ore; 269 270 if (ORE) 271 ORE->emit([&]() { 272 return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB) 273 << "Promote indirect call to " << NV("DirectCallee", DirectCallee) 274 << " with count " << NV("Count", Count) << " out of " 275 << NV("TotalCount", TotalCount); 276 }); 277 return NewInst; 278 } 279 280 // Promote indirect-call to conditional direct-call for one callsite. 281 uint32_t ICallPromotionFunc::tryToPromote( 282 CallBase &CB, const std::vector<PromotionCandidate> &Candidates, 283 uint64_t &TotalCount) { 284 uint32_t NumPromoted = 0; 285 286 for (auto &C : Candidates) { 287 uint64_t Count = C.Count; 288 pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount, SamplePGO, 289 &ORE); 290 assert(TotalCount >= Count); 291 TotalCount -= Count; 292 NumOfPGOICallPromotion++; 293 NumPromoted++; 294 } 295 return NumPromoted; 296 } 297 298 // Traverse all the indirect-call callsite and get the value profile 299 // annotation to perform indirect-call promotion. 300 bool ICallPromotionFunc::processFunction(ProfileSummaryInfo *PSI) { 301 bool Changed = false; 302 ICallPromotionAnalysis ICallAnalysis; 303 for (auto *CB : findIndirectCalls(F)) { 304 uint32_t NumVals, NumCandidates; 305 uint64_t TotalCount; 306 auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction( 307 CB, NumVals, TotalCount, NumCandidates); 308 if (!NumCandidates || 309 (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount))) 310 continue; 311 auto PromotionCandidates = getPromotionCandidatesForCallSite( 312 *CB, ICallProfDataRef, TotalCount, NumCandidates); 313 uint32_t NumPromoted = tryToPromote(*CB, PromotionCandidates, TotalCount); 314 if (NumPromoted == 0) 315 continue; 316 317 Changed = true; 318 // Adjust the MD.prof metadata. First delete the old one. 319 CB->setMetadata(LLVMContext::MD_prof, nullptr); 320 // If all promoted, we don't need the MD.prof metadata. 321 if (TotalCount == 0 || NumPromoted == NumVals) 322 continue; 323 // Otherwise we need update with the un-promoted records back. 324 annotateValueSite(*M, *CB, ICallProfDataRef.slice(NumPromoted), TotalCount, 325 IPVK_IndirectCallTarget, NumCandidates); 326 } 327 return Changed; 328 } 329 330 // A wrapper function that does the actual work. 331 static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, 332 bool InLTO, bool SamplePGO, 333 ModuleAnalysisManager *AM = nullptr) { 334 if (DisableICP) 335 return false; 336 InstrProfSymtab Symtab; 337 if (Error E = Symtab.create(M, InLTO)) { 338 std::string SymtabFailure = toString(std::move(E)); 339 M.getContext().emitError("Failed to create symtab: " + SymtabFailure); 340 return false; 341 } 342 bool Changed = false; 343 for (auto &F : M) { 344 if (F.isDeclaration() || F.hasOptNone()) 345 continue; 346 347 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE; 348 OptimizationRemarkEmitter *ORE; 349 if (AM) { 350 auto &FAM = 351 AM->getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 352 ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 353 } else { 354 OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F); 355 ORE = OwnedORE.get(); 356 } 357 358 ICallPromotionFunc ICallPromotion(F, &M, &Symtab, SamplePGO, *ORE); 359 bool FuncChanged = ICallPromotion.processFunction(PSI); 360 if (ICPDUMPAFTER && FuncChanged) { 361 LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs())); 362 LLVM_DEBUG(dbgs() << "\n"); 363 } 364 Changed |= FuncChanged; 365 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { 366 LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n"); 367 break; 368 } 369 } 370 return Changed; 371 } 372 373 PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, 374 ModuleAnalysisManager &AM) { 375 ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 376 377 if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode, 378 SamplePGO | ICPSamplePGOMode, &AM)) 379 return PreservedAnalyses::all(); 380 381 return PreservedAnalyses::none(); 382 } 383