1 //===-- PGOInstrumentation.cpp - MST-based PGO Instrumentation ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements PGO instrumentation using a minimum spanning tree based 11 // on the following paper: 12 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 13 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 14 // Issue 3, pp 313-322 15 // The idea of the algorithm based on the fact that for each node (except for 16 // the entry and exit), the sum of incoming edge counts equals the sum of 17 // outgoing edge counts. The count of edge on spanning tree can be derived from 18 // those edges not on the spanning tree. Knuth proves this method instruments 19 // the minimum number of edges. 20 // 21 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 22 // edges have higher frequencies (more likely to execute). The idea is to 23 // instrument those less frequently executed edges to reduce the runtime 24 // overhead of instrumented binaries. 25 // 26 // This file contains two passes: 27 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 28 // count profile, and generates the instrumentation for indirect call 29 // profiling. 30 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 31 // annotates the branch weights. It also reads the indirect call value 32 // profiling records and annotate the indirect call instructions. 33 // 34 // To get the precise counter information, These two passes need to invoke at 35 // the same compilation point (so they see the same IR). For pass 36 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 37 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 38 // the profile is opened in module level and passed to each PGOUseFunc instance. 39 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 40 // in class FuncPGOInstrumentation. 41 // 42 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 43 // BBInfo contains auxiliary information for each BB. These two classes are used 44 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 45 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 46 // used in populating profile counters. 47 // The MST implementation is in Class CFGMST (CFGMST.h). 48 // 49 //===----------------------------------------------------------------------===// 50 51 #include "llvm/Transforms/PGOInstrumentation.h" 52 #include "CFGMST.h" 53 #include "llvm/ADT/STLExtras.h" 54 #include "llvm/ADT/SmallVector.h" 55 #include "llvm/ADT/Statistic.h" 56 #include "llvm/ADT/Triple.h" 57 #include "llvm/Analysis/BlockFrequencyInfo.h" 58 #include "llvm/Analysis/BranchProbabilityInfo.h" 59 #include "llvm/Analysis/CFG.h" 60 #include "llvm/Analysis/IndirectCallSiteVisitor.h" 61 #include "llvm/IR/CallSite.h" 62 #include "llvm/IR/DiagnosticInfo.h" 63 #include "llvm/IR/GlobalValue.h" 64 #include "llvm/IR/IRBuilder.h" 65 #include "llvm/IR/InstIterator.h" 66 #include "llvm/IR/Instructions.h" 67 #include "llvm/IR/IntrinsicInst.h" 68 #include "llvm/IR/MDBuilder.h" 69 #include "llvm/IR/Module.h" 70 #include "llvm/Pass.h" 71 #include "llvm/ProfileData/InstrProfReader.h" 72 #include "llvm/ProfileData/ProfileCommon.h" 73 #include "llvm/Support/BranchProbability.h" 74 #include "llvm/Support/Debug.h" 75 #include "llvm/Support/JamCRC.h" 76 #include "llvm/Transforms/Instrumentation.h" 77 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 78 #include <algorithm> 79 #include <string> 80 #include <unordered_map> 81 #include <utility> 82 #include <vector> 83 84 using namespace llvm; 85 86 #define DEBUG_TYPE "pgo-instrumentation" 87 88 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 89 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 90 STATISTIC(NumOfPGOEdge, "Number of edges."); 91 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 92 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 93 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 94 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 95 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 96 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 97 98 // Command line option to specify the file to read profile from. This is 99 // mainly used for testing. 100 static cl::opt<std::string> 101 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 102 cl::value_desc("filename"), 103 cl::desc("Specify the path of profile data file. This is" 104 "mainly for test purpose.")); 105 106 // Command line option to disable value profiling. The default is false: 107 // i.e. value profiling is enabled by default. This is for debug purpose. 108 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 109 cl::Hidden, 110 cl::desc("Disable Value Profiling")); 111 112 // Command line option to set the maximum number of VP annotations to write to 113 // the metadata for a single indirect call callsite. 114 static cl::opt<unsigned> MaxNumAnnotations( 115 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 116 cl::desc("Max number of annotations for a single indirect " 117 "call callsite")); 118 119 // Command line option to control appending FunctionHash to the name of a COMDAT 120 // function. This is to avoid the hash mismatch caused by the preinliner. 121 static cl::opt<bool> DoComdatRenaming( 122 "do-comdat-renaming", cl::init(false), cl::Hidden, 123 cl::desc("Append function hash to the name of COMDAT function to avoid " 124 "function hash mismatch due to the preinliner")); 125 126 // Command line option to enable/disable the warning about missing profile 127 // information. 128 static cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", 129 cl::init(false), 130 cl::Hidden); 131 132 // Command line option to enable/disable the warning about a hash mismatch in 133 // the profile data. 134 static cl::opt<bool> NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), 135 cl::Hidden); 136 137 // Command line option to enable/disable the warning about a hash mismatch in 138 // the profile data for Comdat functions, which often turns out to be false 139 // positive due to the pre-instrumentation inline. 140 static cl::opt<bool> NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", 141 cl::init(true), cl::Hidden); 142 143 // Command line option to enable/disable select instruction instrumentation. 144 static cl::opt<bool> PGOInstrSelect("pgo-instr-select", cl::init(true), 145 cl::Hidden); 146 namespace { 147 148 /// The select instruction visitor plays three roles specified 149 /// by the mode. In \c VM_counting mode, it simply counts the number of 150 /// select instructions. In \c VM_instrument mode, it inserts code to count 151 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 152 /// it reads the profile data and annotate the select instruction with metadata. 153 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 154 class PGOUseFunc; 155 156 /// Instruction Visitor class to visit select instructions. 157 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 158 Function &F; 159 unsigned NSIs = 0; // Number of select instructions instrumented. 160 VisitMode Mode = VM_counting; // Visiting mode. 161 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 162 unsigned TotalNumCtrs = 0; // Total number of counters 163 GlobalVariable *FuncNameVar = nullptr; 164 uint64_t FuncHash = 0; 165 PGOUseFunc *UseFunc = nullptr; 166 167 SelectInstVisitor(Function &Func) : F(Func) {} 168 169 void countSelects(Function &Func) { 170 Mode = VM_counting; 171 visit(Func); 172 } 173 // Visit the IR stream and instrument all select instructions. \p 174 // Ind is a pointer to the counter index variable; \p TotalNC 175 // is the total number of counters; \p FNV is the pointer to the 176 // PGO function name var; \p FHash is the function hash. 177 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 178 GlobalVariable *FNV, uint64_t FHash) { 179 Mode = VM_instrument; 180 CurCtrIdx = Ind; 181 TotalNumCtrs = TotalNC; 182 FuncHash = FHash; 183 FuncNameVar = FNV; 184 visit(Func); 185 } 186 187 // Visit the IR stream and annotate all select instructions. 188 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 189 Mode = VM_annotate; 190 UseFunc = UF; 191 CurCtrIdx = Ind; 192 visit(Func); 193 } 194 195 void instrumentOneSelectInst(SelectInst &SI); 196 void annotateOneSelectInst(SelectInst &SI); 197 // Visit \p SI instruction and perform tasks according to visit mode. 198 void visitSelectInst(SelectInst &SI); 199 unsigned getNumOfSelectInsts() const { return NSIs; } 200 }; 201 202 class PGOInstrumentationGenLegacyPass : public ModulePass { 203 public: 204 static char ID; 205 206 PGOInstrumentationGenLegacyPass() : ModulePass(ID) { 207 initializePGOInstrumentationGenLegacyPassPass( 208 *PassRegistry::getPassRegistry()); 209 } 210 211 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } 212 213 private: 214 bool runOnModule(Module &M) override; 215 216 void getAnalysisUsage(AnalysisUsage &AU) const override { 217 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 218 } 219 }; 220 221 class PGOInstrumentationUseLegacyPass : public ModulePass { 222 public: 223 static char ID; 224 225 // Provide the profile filename as the parameter. 226 PGOInstrumentationUseLegacyPass(std::string Filename = "") 227 : ModulePass(ID), ProfileFileName(std::move(Filename)) { 228 if (!PGOTestProfileFile.empty()) 229 ProfileFileName = PGOTestProfileFile; 230 initializePGOInstrumentationUseLegacyPassPass( 231 *PassRegistry::getPassRegistry()); 232 } 233 234 StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } 235 236 private: 237 std::string ProfileFileName; 238 239 bool runOnModule(Module &M) override; 240 void getAnalysisUsage(AnalysisUsage &AU) const override { 241 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 242 } 243 }; 244 245 } // end anonymous namespace 246 247 char PGOInstrumentationGenLegacyPass::ID = 0; 248 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 249 "PGO instrumentation.", false, false) 250 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 251 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 252 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 253 "PGO instrumentation.", false, false) 254 255 ModulePass *llvm::createPGOInstrumentationGenLegacyPass() { 256 return new PGOInstrumentationGenLegacyPass(); 257 } 258 259 char PGOInstrumentationUseLegacyPass::ID = 0; 260 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 261 "Read PGO instrumentation profile.", false, false) 262 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 263 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 264 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 265 "Read PGO instrumentation profile.", false, false) 266 267 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename) { 268 return new PGOInstrumentationUseLegacyPass(Filename.str()); 269 } 270 271 namespace { 272 /// \brief An MST based instrumentation for PGO 273 /// 274 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 275 /// in the function level. 276 struct PGOEdge { 277 // This class implements the CFG edges. Note the CFG can be a multi-graph. 278 // So there might be multiple edges with same SrcBB and DestBB. 279 const BasicBlock *SrcBB; 280 const BasicBlock *DestBB; 281 uint64_t Weight; 282 bool InMST; 283 bool Removed; 284 bool IsCritical; 285 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1) 286 : SrcBB(Src), DestBB(Dest), Weight(W), InMST(false), Removed(false), 287 IsCritical(false) {} 288 // Return the information string of an edge. 289 const std::string infoString() const { 290 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 291 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 292 } 293 }; 294 295 // This class stores the auxiliary information for each BB. 296 struct BBInfo { 297 BBInfo *Group; 298 uint32_t Index; 299 uint32_t Rank; 300 301 BBInfo(unsigned IX) : Group(this), Index(IX), Rank(0) {} 302 303 // Return the information string of this object. 304 const std::string infoString() const { 305 return (Twine("Index=") + Twine(Index)).str(); 306 } 307 }; 308 309 // This class implements the CFG edges. Note the CFG can be a multi-graph. 310 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 311 private: 312 Function &F; 313 void computeCFGHash(); 314 void renameComdatFunction(); 315 // A map that stores the Comdat group in function F. 316 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 317 318 public: 319 std::vector<Instruction *> IndirectCallSites; 320 SelectInstVisitor SIVisitor; 321 std::string FuncName; 322 GlobalVariable *FuncNameVar; 323 // CFG hash value for this function. 324 uint64_t FunctionHash; 325 326 // The Minimum Spanning Tree of function CFG. 327 CFGMST<Edge, BBInfo> MST; 328 329 // Give an edge, find the BB that will be instrumented. 330 // Return nullptr if there is no BB to be instrumented. 331 BasicBlock *getInstrBB(Edge *E); 332 333 // Return the auxiliary BB information. 334 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 335 336 // Return the auxiliary BB information if available. 337 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 338 339 // Dump edges and BB information. 340 void dumpInfo(std::string Str = "") const { 341 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 342 Twine(FunctionHash) + "\t" + Str); 343 } 344 345 FuncPGOInstrumentation( 346 Function &Func, 347 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 348 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 349 BlockFrequencyInfo *BFI = nullptr) 350 : F(Func), ComdatMembers(ComdatMembers), SIVisitor(Func), FunctionHash(0), 351 MST(F, BPI, BFI) { 352 353 // This should be done before CFG hash computation. 354 SIVisitor.countSelects(Func); 355 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 356 IndirectCallSites = findIndirectCallSites(Func); 357 358 FuncName = getPGOFuncName(F); 359 computeCFGHash(); 360 if (ComdatMembers.size()) 361 renameComdatFunction(); 362 DEBUG(dumpInfo("after CFGMST")); 363 364 NumOfPGOBB += MST.BBInfos.size(); 365 for (auto &E : MST.AllEdges) { 366 if (E->Removed) 367 continue; 368 NumOfPGOEdge++; 369 if (!E->InMST) 370 NumOfPGOInstrument++; 371 } 372 373 if (CreateGlobalVar) 374 FuncNameVar = createPGOFuncNameVar(F, FuncName); 375 } 376 377 // Return the number of profile counters needed for the function. 378 unsigned getNumCounters() { 379 unsigned NumCounters = 0; 380 for (auto &E : this->MST.AllEdges) { 381 if (!E->InMST && !E->Removed) 382 NumCounters++; 383 } 384 return NumCounters + SIVisitor.getNumOfSelectInsts(); 385 } 386 }; 387 388 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 389 // value of each BB in the CFG. The higher 32 bits record the number of edges. 390 template <class Edge, class BBInfo> 391 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 392 std::vector<char> Indexes; 393 JamCRC JC; 394 for (auto &BB : F) { 395 const TerminatorInst *TI = BB.getTerminator(); 396 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 397 BasicBlock *Succ = TI->getSuccessor(I); 398 auto BI = findBBInfo(Succ); 399 if (BI == nullptr) 400 continue; 401 uint32_t Index = BI->Index; 402 for (int J = 0; J < 4; J++) 403 Indexes.push_back((char)(Index >> (J * 8))); 404 } 405 } 406 JC.update(Indexes); 407 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 408 (uint64_t)IndirectCallSites.size() << 48 | 409 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 410 } 411 412 // Check if we can safely rename this Comdat function. 413 static bool canRenameComdat( 414 Function &F, 415 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 416 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 417 return false; 418 419 // FIXME: Current only handle those Comdat groups that only containing one 420 // function and function aliases. 421 // (1) For a Comdat group containing multiple functions, we need to have a 422 // unique postfix based on the hashes for each function. There is a 423 // non-trivial code refactoring to do this efficiently. 424 // (2) Variables can not be renamed, so we can not rename Comdat function in a 425 // group including global vars. 426 Comdat *C = F.getComdat(); 427 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 428 if (dyn_cast<GlobalAlias>(CM.second)) 429 continue; 430 Function *FM = dyn_cast<Function>(CM.second); 431 if (FM != &F) 432 return false; 433 } 434 return true; 435 } 436 437 // Append the CFGHash to the Comdat function name. 438 template <class Edge, class BBInfo> 439 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 440 if (!canRenameComdat(F, ComdatMembers)) 441 return; 442 std::string OrigName = F.getName().str(); 443 std::string NewFuncName = 444 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 445 F.setName(Twine(NewFuncName)); 446 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 447 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 448 Comdat *NewComdat; 449 Module *M = F.getParent(); 450 // For AvailableExternallyLinkage functions, change the linkage to 451 // LinkOnceODR and put them into comdat. This is because after renaming, there 452 // is no backup external copy available for the function. 453 if (!F.hasComdat()) { 454 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 455 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 456 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 457 F.setComdat(NewComdat); 458 return; 459 } 460 461 // This function belongs to a single function Comdat group. 462 Comdat *OrigComdat = F.getComdat(); 463 std::string NewComdatName = 464 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 465 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 466 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 467 468 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 469 if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) { 470 // For aliases, change the name directly. 471 assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F); 472 std::string OrigGAName = GA->getName().str(); 473 GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash))); 474 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA); 475 continue; 476 } 477 // Must be a function. 478 Function *CF = dyn_cast<Function>(CM.second); 479 assert(CF); 480 CF->setComdat(NewComdat); 481 } 482 } 483 484 // Given a CFG E to be instrumented, find which BB to place the instrumented 485 // code. The function will split the critical edge if necessary. 486 template <class Edge, class BBInfo> 487 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 488 if (E->InMST || E->Removed) 489 return nullptr; 490 491 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 492 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 493 // For a fake edge, instrument the real BB. 494 if (SrcBB == nullptr) 495 return DestBB; 496 if (DestBB == nullptr) 497 return SrcBB; 498 499 // Instrument the SrcBB if it has a single successor, 500 // otherwise, the DestBB if this is not a critical edge. 501 TerminatorInst *TI = SrcBB->getTerminator(); 502 if (TI->getNumSuccessors() <= 1) 503 return SrcBB; 504 if (!E->IsCritical) 505 return DestBB; 506 507 // For a critical edge, we have to split. Instrument the newly 508 // created BB. 509 NumOfPGOSplit++; 510 DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index << " --> " 511 << getBBInfo(DestBB).Index << "\n"); 512 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 513 BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum); 514 assert(InstrBB && "Critical edge is not split"); 515 516 E->Removed = true; 517 return InstrBB; 518 } 519 520 // Visit all edge and instrument the edges not in MST, and do value profiling. 521 // Critical edges will be split. 522 static void instrumentOneFunc( 523 Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, 524 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 525 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI, 526 BFI); 527 unsigned NumCounters = FuncInfo.getNumCounters(); 528 529 uint32_t I = 0; 530 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 531 for (auto &E : FuncInfo.MST.AllEdges) { 532 BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get()); 533 if (!InstrBB) 534 continue; 535 536 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 537 assert(Builder.GetInsertPoint() != InstrBB->end() && 538 "Cannot get the Instrumentation point"); 539 Builder.CreateCall( 540 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 541 {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 542 Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), 543 Builder.getInt32(I++)}); 544 } 545 546 // Now instrument select instructions: 547 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 548 FuncInfo.FunctionHash); 549 assert(I == NumCounters); 550 551 if (DisableValueProfiling) 552 return; 553 554 unsigned NumIndirectCallSites = 0; 555 for (auto &I : FuncInfo.IndirectCallSites) { 556 CallSite CS(I); 557 Value *Callee = CS.getCalledValue(); 558 DEBUG(dbgs() << "Instrument one indirect call: CallSite Index = " 559 << NumIndirectCallSites << "\n"); 560 IRBuilder<> Builder(I); 561 assert(Builder.GetInsertPoint() != I->getParent()->end() && 562 "Cannot get the Instrumentation point"); 563 Builder.CreateCall( 564 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 565 {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 566 Builder.getInt64(FuncInfo.FunctionHash), 567 Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()), 568 Builder.getInt32(llvm::InstrProfValueKind::IPVK_IndirectCallTarget), 569 Builder.getInt32(NumIndirectCallSites++)}); 570 } 571 NumOfPGOICall += NumIndirectCallSites; 572 } 573 574 // This class represents a CFG edge in profile use compilation. 575 struct PGOUseEdge : public PGOEdge { 576 bool CountValid; 577 uint64_t CountValue; 578 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1) 579 : PGOEdge(Src, Dest, W), CountValid(false), CountValue(0) {} 580 581 // Set edge count value 582 void setEdgeCount(uint64_t Value) { 583 CountValue = Value; 584 CountValid = true; 585 } 586 587 // Return the information string for this object. 588 const std::string infoString() const { 589 if (!CountValid) 590 return PGOEdge::infoString(); 591 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 592 .str(); 593 } 594 }; 595 596 typedef SmallVector<PGOUseEdge *, 2> DirectEdges; 597 598 // This class stores the auxiliary information for each BB. 599 struct UseBBInfo : public BBInfo { 600 uint64_t CountValue; 601 bool CountValid; 602 int32_t UnknownCountInEdge; 603 int32_t UnknownCountOutEdge; 604 DirectEdges InEdges; 605 DirectEdges OutEdges; 606 UseBBInfo(unsigned IX) 607 : BBInfo(IX), CountValue(0), CountValid(false), UnknownCountInEdge(0), 608 UnknownCountOutEdge(0) {} 609 UseBBInfo(unsigned IX, uint64_t C) 610 : BBInfo(IX), CountValue(C), CountValid(true), UnknownCountInEdge(0), 611 UnknownCountOutEdge(0) {} 612 613 // Set the profile count value for this BB. 614 void setBBInfoCount(uint64_t Value) { 615 CountValue = Value; 616 CountValid = true; 617 } 618 619 // Return the information string of this object. 620 const std::string infoString() const { 621 if (!CountValid) 622 return BBInfo::infoString(); 623 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 624 } 625 }; 626 627 // Sum up the count values for all the edges. 628 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 629 uint64_t Total = 0; 630 for (auto &E : Edges) { 631 if (E->Removed) 632 continue; 633 Total += E->CountValue; 634 } 635 return Total; 636 } 637 638 class PGOUseFunc { 639 public: 640 PGOUseFunc(Function &Func, Module *Modu, 641 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 642 BranchProbabilityInfo *BPI = nullptr, 643 BlockFrequencyInfo *BFI = nullptr) 644 : F(Func), M(Modu), FuncInfo(Func, ComdatMembers, false, BPI, BFI), 645 CountPosition(0), ProfileCountSize(0), FreqAttr(FFA_Normal) {} 646 647 // Read counts for the instrumented BB from profile. 648 bool readCounters(IndexedInstrProfReader *PGOReader); 649 650 // Populate the counts for all BBs. 651 void populateCounters(); 652 653 // Set the branch weights based on the count values. 654 void setBranchWeights(); 655 656 // Annotate the indirect call sites. 657 void annotateIndirectCallSites(); 658 659 // The hotness of the function from the profile count. 660 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 661 662 // Return the function hotness from the profile. 663 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 664 665 // Return the function hash. 666 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 667 // Return the profile record for this function; 668 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 669 670 // Return the auxiliary BB information. 671 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 672 return FuncInfo.getBBInfo(BB); 673 } 674 675 // Return the auxiliary BB information if available. 676 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 677 return FuncInfo.findBBInfo(BB); 678 } 679 680 private: 681 Function &F; 682 Module *M; 683 // This member stores the shared information with class PGOGenFunc. 684 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 685 686 // The maximum count value in the profile. This is only used in PGO use 687 // compilation. 688 uint64_t ProgramMaxCount; 689 690 // Position of counter that remains to be read. 691 uint32_t CountPosition; 692 693 // Total size of the profile count for this function. 694 uint32_t ProfileCountSize; 695 696 // ProfileRecord for this function. 697 InstrProfRecord ProfileRecord; 698 699 // Function hotness info derived from profile. 700 FuncFreqAttr FreqAttr; 701 702 // Find the Instrumented BB and set the value. 703 void setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 704 705 // Set the edge counter value for the unknown edge -- there should be only 706 // one unknown edge. 707 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 708 709 // Return FuncName string; 710 const std::string getFuncName() const { return FuncInfo.FuncName; } 711 712 // Set the hot/cold inline hints based on the count values. 713 // FIXME: This function should be removed once the functionality in 714 // the inliner is implemented. 715 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 716 if (ProgramMaxCount == 0) 717 return; 718 // Threshold of the hot functions. 719 const BranchProbability HotFunctionThreshold(1, 100); 720 // Threshold of the cold functions. 721 const BranchProbability ColdFunctionThreshold(2, 10000); 722 if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount)) 723 FreqAttr = FFA_Hot; 724 else if (MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount)) 725 FreqAttr = FFA_Cold; 726 } 727 }; 728 729 // Visit all the edges and assign the count value for the instrumented 730 // edges and the BB. 731 void PGOUseFunc::setInstrumentedCounts( 732 const std::vector<uint64_t> &CountFromProfile) { 733 734 assert(FuncInfo.getNumCounters() == CountFromProfile.size()); 735 // Use a worklist as we will update the vector during the iteration. 736 std::vector<PGOUseEdge *> WorkList; 737 for (auto &E : FuncInfo.MST.AllEdges) 738 WorkList.push_back(E.get()); 739 740 uint32_t I = 0; 741 for (auto &E : WorkList) { 742 BasicBlock *InstrBB = FuncInfo.getInstrBB(E); 743 if (!InstrBB) 744 continue; 745 uint64_t CountValue = CountFromProfile[I++]; 746 if (!E->Removed) { 747 getBBInfo(InstrBB).setBBInfoCount(CountValue); 748 E->setEdgeCount(CountValue); 749 continue; 750 } 751 752 // Need to add two new edges. 753 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 754 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 755 // Add new edge of SrcBB->InstrBB. 756 PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0); 757 NewEdge.setEdgeCount(CountValue); 758 // Add new edge of InstrBB->DestBB. 759 PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0); 760 NewEdge1.setEdgeCount(CountValue); 761 NewEdge1.InMST = true; 762 getBBInfo(InstrBB).setBBInfoCount(CountValue); 763 } 764 ProfileCountSize = CountFromProfile.size(); 765 CountPosition = I; 766 } 767 768 // Set the count value for the unknown edge. There should be one and only one 769 // unknown edge in Edges vector. 770 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 771 for (auto &E : Edges) { 772 if (E->CountValid) 773 continue; 774 E->setEdgeCount(Value); 775 776 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 777 getBBInfo(E->DestBB).UnknownCountInEdge--; 778 return; 779 } 780 llvm_unreachable("Cannot find the unknown count edge"); 781 } 782 783 // Read the profile from ProfileFileName and assign the value to the 784 // instrumented BB and the edges. This function also updates ProgramMaxCount. 785 // Return true if the profile are successfully read, and false on errors. 786 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader) { 787 auto &Ctx = M->getContext(); 788 Expected<InstrProfRecord> Result = 789 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 790 if (Error E = Result.takeError()) { 791 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 792 auto Err = IPE.get(); 793 bool SkipWarning = false; 794 if (Err == instrprof_error::unknown_function) { 795 NumOfPGOMissing++; 796 SkipWarning = !PGOWarnMissing; 797 } else if (Err == instrprof_error::hash_mismatch || 798 Err == instrprof_error::malformed) { 799 NumOfPGOMismatch++; 800 SkipWarning = 801 NoPGOWarnMismatch || 802 (NoPGOWarnMismatchComdat && 803 (F.hasComdat() || 804 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 805 } 806 807 if (SkipWarning) 808 return; 809 810 std::string Msg = IPE.message() + std::string(" ") + F.getName().str(); 811 Ctx.diagnose( 812 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 813 }); 814 return false; 815 } 816 ProfileRecord = std::move(Result.get()); 817 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 818 819 NumOfPGOFunc++; 820 DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 821 uint64_t ValueSum = 0; 822 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 823 DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 824 ValueSum += CountFromProfile[I]; 825 } 826 827 DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 828 829 getBBInfo(nullptr).UnknownCountOutEdge = 2; 830 getBBInfo(nullptr).UnknownCountInEdge = 2; 831 832 setInstrumentedCounts(CountFromProfile); 833 ProgramMaxCount = PGOReader->getMaximumFunctionCount(); 834 return true; 835 } 836 837 // Populate the counters from instrumented BBs to all BBs. 838 // In the end of this operation, all BBs should have a valid count value. 839 void PGOUseFunc::populateCounters() { 840 // First set up Count variable for all BBs. 841 for (auto &E : FuncInfo.MST.AllEdges) { 842 if (E->Removed) 843 continue; 844 845 const BasicBlock *SrcBB = E->SrcBB; 846 const BasicBlock *DestBB = E->DestBB; 847 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 848 UseBBInfo &DestInfo = getBBInfo(DestBB); 849 SrcInfo.OutEdges.push_back(E.get()); 850 DestInfo.InEdges.push_back(E.get()); 851 SrcInfo.UnknownCountOutEdge++; 852 DestInfo.UnknownCountInEdge++; 853 854 if (!E->CountValid) 855 continue; 856 DestInfo.UnknownCountInEdge--; 857 SrcInfo.UnknownCountOutEdge--; 858 } 859 860 bool Changes = true; 861 unsigned NumPasses = 0; 862 while (Changes) { 863 NumPasses++; 864 Changes = false; 865 866 // For efficient traversal, it's better to start from the end as most 867 // of the instrumented edges are at the end. 868 for (auto &BB : reverse(F)) { 869 UseBBInfo *Count = findBBInfo(&BB); 870 if (Count == nullptr) 871 continue; 872 if (!Count->CountValid) { 873 if (Count->UnknownCountOutEdge == 0) { 874 Count->CountValue = sumEdgeCount(Count->OutEdges); 875 Count->CountValid = true; 876 Changes = true; 877 } else if (Count->UnknownCountInEdge == 0) { 878 Count->CountValue = sumEdgeCount(Count->InEdges); 879 Count->CountValid = true; 880 Changes = true; 881 } 882 } 883 if (Count->CountValid) { 884 if (Count->UnknownCountOutEdge == 1) { 885 uint64_t Total = 0; 886 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 887 // If the one of the successor block can early terminate (no-return), 888 // we can end up with situation where out edge sum count is larger as 889 // the source BB's count is collected by a post-dominated block. 890 if (Count->CountValue > OutSum) 891 Total = Count->CountValue - OutSum; 892 setEdgeCount(Count->OutEdges, Total); 893 Changes = true; 894 } 895 if (Count->UnknownCountInEdge == 1) { 896 uint64_t Total = 0; 897 uint64_t InSum = sumEdgeCount(Count->InEdges); 898 if (Count->CountValue > InSum) 899 Total = Count->CountValue - InSum; 900 setEdgeCount(Count->InEdges, Total); 901 Changes = true; 902 } 903 } 904 } 905 } 906 907 DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 908 #ifndef NDEBUG 909 // Assert every BB has a valid counter. 910 for (auto &BB : F) { 911 auto BI = findBBInfo(&BB); 912 if (BI == nullptr) 913 continue; 914 assert(BI->CountValid && "BB count is not valid"); 915 } 916 #endif 917 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 918 F.setEntryCount(FuncEntryCount); 919 uint64_t FuncMaxCount = FuncEntryCount; 920 for (auto &BB : F) { 921 auto BI = findBBInfo(&BB); 922 if (BI == nullptr) 923 continue; 924 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 925 } 926 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 927 928 // Now annotate select instructions 929 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 930 assert(CountPosition == ProfileCountSize); 931 932 DEBUG(FuncInfo.dumpInfo("after reading profile.")); 933 } 934 935 static void setProfMetadata(Module *M, Instruction *TI, 936 ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) { 937 MDBuilder MDB(M->getContext()); 938 assert(MaxCount > 0 && "Bad max count"); 939 uint64_t Scale = calculateCountScale(MaxCount); 940 SmallVector<unsigned, 4> Weights; 941 for (const auto &ECI : EdgeCounts) 942 Weights.push_back(scaleBranchCount(ECI, Scale)); 943 944 DEBUG(dbgs() << "Weight is: "; 945 for (const auto &W : Weights) { dbgs() << W << " "; } 946 dbgs() << "\n";); 947 TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 948 } 949 950 // Assign the scaled count values to the BB with multiple out edges. 951 void PGOUseFunc::setBranchWeights() { 952 // Generate MD_prof metadata for every branch instruction. 953 DEBUG(dbgs() << "\nSetting branch weights.\n"); 954 for (auto &BB : F) { 955 TerminatorInst *TI = BB.getTerminator(); 956 if (TI->getNumSuccessors() < 2) 957 continue; 958 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) 959 continue; 960 if (getBBInfo(&BB).CountValue == 0) 961 continue; 962 963 // We have a non-zero Branch BB. 964 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 965 unsigned Size = BBCountInfo.OutEdges.size(); 966 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 967 uint64_t MaxCount = 0; 968 for (unsigned s = 0; s < Size; s++) { 969 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 970 const BasicBlock *SrcBB = E->SrcBB; 971 const BasicBlock *DestBB = E->DestBB; 972 if (DestBB == nullptr) 973 continue; 974 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 975 uint64_t EdgeCount = E->CountValue; 976 if (EdgeCount > MaxCount) 977 MaxCount = EdgeCount; 978 EdgeCounts[SuccNum] = EdgeCount; 979 } 980 setProfMetadata(M, TI, EdgeCounts, MaxCount); 981 } 982 } 983 984 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 985 Module *M = F.getParent(); 986 IRBuilder<> Builder(&SI); 987 Type *Int64Ty = Builder.getInt64Ty(); 988 Type *I8PtrTy = Builder.getInt8PtrTy(); 989 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 990 Builder.CreateCall( 991 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 992 {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 993 Builder.getInt64(FuncHash), 994 Builder.getInt32(TotalNumCtrs), Builder.getInt32(*CurCtrIdx), Step}); 995 ++(*CurCtrIdx); 996 } 997 998 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 999 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1000 assert(*CurCtrIdx < CountFromProfile.size() && 1001 "Out of bound access of counters"); 1002 uint64_t SCounts[2]; 1003 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1004 ++(*CurCtrIdx); 1005 uint64_t TotalCount = 0; 1006 auto BI = UseFunc->findBBInfo(SI.getParent()); 1007 if (BI != nullptr) 1008 TotalCount = BI->CountValue; 1009 // False Count 1010 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1011 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1012 if (MaxCount) 1013 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1014 } 1015 1016 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1017 if (!PGOInstrSelect) 1018 return; 1019 // FIXME: do not handle this yet. 1020 if (SI.getCondition()->getType()->isVectorTy()) 1021 return; 1022 1023 NSIs++; 1024 switch (Mode) { 1025 case VM_counting: 1026 return; 1027 case VM_instrument: 1028 instrumentOneSelectInst(SI); 1029 return; 1030 case VM_annotate: 1031 annotateOneSelectInst(SI); 1032 return; 1033 } 1034 1035 llvm_unreachable("Unknown visiting mode"); 1036 } 1037 1038 // Traverse all the indirect callsites and annotate the instructions. 1039 void PGOUseFunc::annotateIndirectCallSites() { 1040 if (DisableValueProfiling) 1041 return; 1042 1043 // Create the PGOFuncName meta data. 1044 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1045 1046 unsigned IndirectCallSiteIndex = 0; 1047 auto &IndirectCallSites = FuncInfo.IndirectCallSites; 1048 unsigned NumValueSites = 1049 ProfileRecord.getNumValueSites(IPVK_IndirectCallTarget); 1050 if (NumValueSites != IndirectCallSites.size()) { 1051 std::string Msg = 1052 std::string("Inconsistent number of indirect call sites: ") + 1053 F.getName().str(); 1054 auto &Ctx = M->getContext(); 1055 Ctx.diagnose( 1056 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1057 return; 1058 } 1059 1060 for (auto &I : IndirectCallSites) { 1061 DEBUG(dbgs() << "Read one indirect call instrumentation: Index=" 1062 << IndirectCallSiteIndex << " out of " << NumValueSites 1063 << "\n"); 1064 annotateValueSite(*M, *I, ProfileRecord, IPVK_IndirectCallTarget, 1065 IndirectCallSiteIndex, MaxNumAnnotations); 1066 IndirectCallSiteIndex++; 1067 } 1068 } 1069 } // end anonymous namespace 1070 1071 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime 1072 // aware this is an ir_level profile so it can set the version flag. 1073 static void createIRLevelProfileFlagVariable(Module &M) { 1074 Type *IntTy64 = Type::getInt64Ty(M.getContext()); 1075 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); 1076 auto IRLevelVersionVariable = new GlobalVariable( 1077 M, IntTy64, true, GlobalVariable::ExternalLinkage, 1078 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), 1079 INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); 1080 IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); 1081 Triple TT(M.getTargetTriple()); 1082 if (!TT.supportsCOMDAT()) 1083 IRLevelVersionVariable->setLinkage(GlobalValue::WeakAnyLinkage); 1084 else 1085 IRLevelVersionVariable->setComdat(M.getOrInsertComdat( 1086 StringRef(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)))); 1087 } 1088 1089 // Collect the set of members for each Comdat in module M and store 1090 // in ComdatMembers. 1091 static void collectComdatMembers( 1092 Module &M, 1093 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1094 if (!DoComdatRenaming) 1095 return; 1096 for (Function &F : M) 1097 if (Comdat *C = F.getComdat()) 1098 ComdatMembers.insert(std::make_pair(C, &F)); 1099 for (GlobalVariable &GV : M.globals()) 1100 if (Comdat *C = GV.getComdat()) 1101 ComdatMembers.insert(std::make_pair(C, &GV)); 1102 for (GlobalAlias &GA : M.aliases()) 1103 if (Comdat *C = GA.getComdat()) 1104 ComdatMembers.insert(std::make_pair(C, &GA)); 1105 } 1106 1107 static bool InstrumentAllFunctions( 1108 Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1109 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) { 1110 createIRLevelProfileFlagVariable(M); 1111 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1112 collectComdatMembers(M, ComdatMembers); 1113 1114 for (auto &F : M) { 1115 if (F.isDeclaration()) 1116 continue; 1117 auto *BPI = LookupBPI(F); 1118 auto *BFI = LookupBFI(F); 1119 instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers); 1120 } 1121 return true; 1122 } 1123 1124 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { 1125 if (skipModule(M)) 1126 return false; 1127 1128 auto LookupBPI = [this](Function &F) { 1129 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1130 }; 1131 auto LookupBFI = [this](Function &F) { 1132 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1133 }; 1134 return InstrumentAllFunctions(M, LookupBPI, LookupBFI); 1135 } 1136 1137 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1138 ModuleAnalysisManager &AM) { 1139 1140 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1141 auto LookupBPI = [&FAM](Function &F) { 1142 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1143 }; 1144 1145 auto LookupBFI = [&FAM](Function &F) { 1146 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1147 }; 1148 1149 if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI)) 1150 return PreservedAnalyses::all(); 1151 1152 return PreservedAnalyses::none(); 1153 } 1154 1155 static bool annotateAllFunctions( 1156 Module &M, StringRef ProfileFileName, 1157 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1158 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) { 1159 DEBUG(dbgs() << "Read in profile counters: "); 1160 auto &Ctx = M.getContext(); 1161 // Read the counter array from file. 1162 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName); 1163 if (Error E = ReaderOrErr.takeError()) { 1164 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1165 Ctx.diagnose( 1166 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1167 }); 1168 return false; 1169 } 1170 1171 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1172 std::move(ReaderOrErr.get()); 1173 if (!PGOReader) { 1174 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1175 StringRef("Cannot get PGOReader"))); 1176 return false; 1177 } 1178 // TODO: might need to change the warning once the clang option is finalized. 1179 if (!PGOReader->isIRLevelProfile()) { 1180 Ctx.diagnose(DiagnosticInfoPGOProfile( 1181 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1182 return false; 1183 } 1184 1185 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1186 collectComdatMembers(M, ComdatMembers); 1187 std::vector<Function *> HotFunctions; 1188 std::vector<Function *> ColdFunctions; 1189 for (auto &F : M) { 1190 if (F.isDeclaration()) 1191 continue; 1192 auto *BPI = LookupBPI(F); 1193 auto *BFI = LookupBFI(F); 1194 PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI); 1195 if (!Func.readCounters(PGOReader.get())) 1196 continue; 1197 Func.populateCounters(); 1198 Func.setBranchWeights(); 1199 Func.annotateIndirectCallSites(); 1200 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1201 if (FreqAttr == PGOUseFunc::FFA_Cold) 1202 ColdFunctions.push_back(&F); 1203 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1204 HotFunctions.push_back(&F); 1205 } 1206 M.setProfileSummary(PGOReader->getSummary().getMD(M.getContext())); 1207 // Set function hotness attribute from the profile. 1208 // We have to apply these attributes at the end because their presence 1209 // can affect the BranchProbabilityInfo of any callers, resulting in an 1210 // inconsistent MST between prof-gen and prof-use. 1211 for (auto &F : HotFunctions) { 1212 F->addFnAttr(llvm::Attribute::InlineHint); 1213 DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 1214 << "\n"); 1215 } 1216 for (auto &F : ColdFunctions) { 1217 F->addFnAttr(llvm::Attribute::Cold); 1218 DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() << "\n"); 1219 } 1220 return true; 1221 } 1222 1223 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename) 1224 : ProfileFileName(std::move(Filename)) { 1225 if (!PGOTestProfileFile.empty()) 1226 ProfileFileName = PGOTestProfileFile; 1227 } 1228 1229 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 1230 ModuleAnalysisManager &AM) { 1231 1232 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1233 auto LookupBPI = [&FAM](Function &F) { 1234 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1235 }; 1236 1237 auto LookupBFI = [&FAM](Function &F) { 1238 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1239 }; 1240 1241 if (!annotateAllFunctions(M, ProfileFileName, LookupBPI, LookupBFI)) 1242 return PreservedAnalyses::all(); 1243 1244 return PreservedAnalyses::none(); 1245 } 1246 1247 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { 1248 if (skipModule(M)) 1249 return false; 1250 1251 auto LookupBPI = [this](Function &F) { 1252 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1253 }; 1254 auto LookupBFI = [this](Function &F) { 1255 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1256 }; 1257 1258 return annotateAllFunctions(M, ProfileFileName, LookupBPI, LookupBFI); 1259 } 1260