1 //===-- PGOInstrumentation.cpp - MST-based PGO Instrumentation ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements PGO instrumentation using a minimum spanning tree based 11 // on the following paper: 12 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 13 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 14 // Issue 3, pp 313-322 15 // The idea of the algorithm based on the fact that for each node (except for 16 // the entry and exit), the sum of incoming edge counts equals the sum of 17 // outgoing edge counts. The count of edge on spanning tree can be derived from 18 // those edges not on the spanning tree. Knuth proves this method instruments 19 // the minimum number of edges. 20 // 21 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 22 // edges have higher frequencies (more likely to execute). The idea is to 23 // instrument those less frequently executed edges to reduce the runtime 24 // overhead of instrumented binaries. 25 // 26 // This file contains two passes: 27 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 28 // count profile, and generates the instrumentation for indirect call 29 // profiling. 30 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 31 // annotates the branch weights. It also reads the indirect call value 32 // profiling records and annotate the indirect call instructions. 33 // 34 // To get the precise counter information, These two passes need to invoke at 35 // the same compilation point (so they see the same IR). For pass 36 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 37 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 38 // the profile is opened in module level and passed to each PGOUseFunc instance. 39 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 40 // in class FuncPGOInstrumentation. 41 // 42 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 43 // BBInfo contains auxiliary information for each BB. These two classes are used 44 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 45 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 46 // used in populating profile counters. 47 // The MST implementation is in Class CFGMST (CFGMST.h). 48 // 49 //===----------------------------------------------------------------------===// 50 51 #include "llvm/Transforms/PGOInstrumentation.h" 52 #include "CFGMST.h" 53 #include "llvm/ADT/STLExtras.h" 54 #include "llvm/ADT/SmallVector.h" 55 #include "llvm/ADT/Statistic.h" 56 #include "llvm/ADT/Triple.h" 57 #include "llvm/Analysis/BlockFrequencyInfo.h" 58 #include "llvm/Analysis/BranchProbabilityInfo.h" 59 #include "llvm/Analysis/CFG.h" 60 #include "llvm/Analysis/IndirectCallSiteVisitor.h" 61 #include "llvm/IR/CallSite.h" 62 #include "llvm/IR/DiagnosticInfo.h" 63 #include "llvm/IR/GlobalValue.h" 64 #include "llvm/IR/IRBuilder.h" 65 #include "llvm/IR/InstIterator.h" 66 #include "llvm/IR/Instructions.h" 67 #include "llvm/IR/IntrinsicInst.h" 68 #include "llvm/IR/MDBuilder.h" 69 #include "llvm/IR/Module.h" 70 #include "llvm/Pass.h" 71 #include "llvm/ProfileData/InstrProfReader.h" 72 #include "llvm/ProfileData/ProfileCommon.h" 73 #include "llvm/Support/BranchProbability.h" 74 #include "llvm/Support/Debug.h" 75 #include "llvm/Support/JamCRC.h" 76 #include "llvm/Transforms/Instrumentation.h" 77 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 78 #include <algorithm> 79 #include <string> 80 #include <unordered_map> 81 #include <utility> 82 #include <vector> 83 84 using namespace llvm; 85 86 #define DEBUG_TYPE "pgo-instrumentation" 87 88 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 89 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 90 STATISTIC(NumOfPGOEdge, "Number of edges."); 91 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 92 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 93 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 94 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 95 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 96 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 97 98 // Command line option to specify the file to read profile from. This is 99 // mainly used for testing. 100 static cl::opt<std::string> 101 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 102 cl::value_desc("filename"), 103 cl::desc("Specify the path of profile data file. This is" 104 "mainly for test purpose.")); 105 106 // Command line option to disable value profiling. The default is false: 107 // i.e. value profiling is enabled by default. This is for debug purpose. 108 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 109 cl::Hidden, 110 cl::desc("Disable Value Profiling")); 111 112 // Command line option to set the maximum number of VP annotations to write to 113 // the metadata for a single indirect call callsite. 114 static cl::opt<unsigned> MaxNumAnnotations( 115 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 116 cl::desc("Max number of annotations for a single indirect " 117 "call callsite")); 118 119 // Command line option to control appending FunctionHash to the name of a COMDAT 120 // function. This is to avoid the hash mismatch caused by the preinliner. 121 static cl::opt<bool> DoComdatRenaming( 122 "do-comdat-renaming", cl::init(true), cl::Hidden, 123 cl::desc("Append function hash to the name of COMDAT function to avoid " 124 "function hash mismatch due to the preinliner")); 125 126 // Command line option to enable/disable the warning about missing profile 127 // information. 128 static cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", 129 cl::init(false), 130 cl::Hidden); 131 132 // Command line option to enable/disable the warning about a hash mismatch in 133 // the profile data. 134 static cl::opt<bool> NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), 135 cl::Hidden); 136 137 // Command line option to enable/disable select instruction instrumentation. 138 static cl::opt<bool> PGOInstrSelect("pgo-instr-select", cl::init(true), 139 cl::Hidden); 140 namespace { 141 142 /// The select instruction visitor plays three roles specified 143 /// by the mode. In \c VM_counting mode, it simply counts the number of 144 /// select instructions. In \c VM_instrument mode, it inserts code to count 145 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 146 /// it reads the profile data and annotate the select instruction with metadata. 147 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 148 class PGOUseFunc; 149 150 /// Instruction Visitor class to visit select instructions. 151 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 152 Function &F; 153 unsigned NSIs = 0; // Number of select instructions instrumented. 154 VisitMode Mode = VM_counting; // Visiting mode. 155 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 156 unsigned TotalNumCtrs = 0; // Total number of counters 157 GlobalVariable *FuncNameVar = nullptr; 158 uint64_t FuncHash = 0; 159 PGOUseFunc *UseFunc = nullptr; 160 161 SelectInstVisitor(Function &Func) : F(Func) {} 162 163 void countSelects(Function &Func) { 164 Mode = VM_counting; 165 visit(Func); 166 } 167 // Visit the IR stream and instrument all select instructions. \p 168 // Ind is a pointer to the counter index variable; \p TotalNC 169 // is the total number of counters; \p FNV is the pointer to the 170 // PGO function name var; \p FHash is the function hash. 171 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 172 GlobalVariable *FNV, uint64_t FHash) { 173 Mode = VM_instrument; 174 CurCtrIdx = Ind; 175 TotalNumCtrs = TotalNC; 176 FuncHash = FHash; 177 FuncNameVar = FNV; 178 visit(Func); 179 } 180 181 // Visit the IR stream and annotate all select instructions. 182 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 183 Mode = VM_annotate; 184 UseFunc = UF; 185 CurCtrIdx = Ind; 186 visit(Func); 187 } 188 189 void instrumentOneSelectInst(SelectInst &SI); 190 void annotateOneSelectInst(SelectInst &SI); 191 // Visit \p SI instruction and perform tasks according to visit mode. 192 void visitSelectInst(SelectInst &SI); 193 unsigned getNumOfSelectInsts() const { return NSIs; } 194 }; 195 196 class PGOInstrumentationGenLegacyPass : public ModulePass { 197 public: 198 static char ID; 199 200 PGOInstrumentationGenLegacyPass() : ModulePass(ID) { 201 initializePGOInstrumentationGenLegacyPassPass( 202 *PassRegistry::getPassRegistry()); 203 } 204 205 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } 206 207 private: 208 bool runOnModule(Module &M) override; 209 210 void getAnalysisUsage(AnalysisUsage &AU) const override { 211 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 212 } 213 }; 214 215 class PGOInstrumentationUseLegacyPass : public ModulePass { 216 public: 217 static char ID; 218 219 // Provide the profile filename as the parameter. 220 PGOInstrumentationUseLegacyPass(std::string Filename = "") 221 : ModulePass(ID), ProfileFileName(std::move(Filename)) { 222 if (!PGOTestProfileFile.empty()) 223 ProfileFileName = PGOTestProfileFile; 224 initializePGOInstrumentationUseLegacyPassPass( 225 *PassRegistry::getPassRegistry()); 226 } 227 228 StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } 229 230 private: 231 std::string ProfileFileName; 232 233 bool runOnModule(Module &M) override; 234 void getAnalysisUsage(AnalysisUsage &AU) const override { 235 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 236 } 237 }; 238 239 } // end anonymous namespace 240 241 char PGOInstrumentationGenLegacyPass::ID = 0; 242 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 243 "PGO instrumentation.", false, false) 244 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 245 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 246 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 247 "PGO instrumentation.", false, false) 248 249 ModulePass *llvm::createPGOInstrumentationGenLegacyPass() { 250 return new PGOInstrumentationGenLegacyPass(); 251 } 252 253 char PGOInstrumentationUseLegacyPass::ID = 0; 254 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 255 "Read PGO instrumentation profile.", false, false) 256 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 257 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 258 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 259 "Read PGO instrumentation profile.", false, false) 260 261 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename) { 262 return new PGOInstrumentationUseLegacyPass(Filename.str()); 263 } 264 265 namespace { 266 /// \brief An MST based instrumentation for PGO 267 /// 268 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 269 /// in the function level. 270 struct PGOEdge { 271 // This class implements the CFG edges. Note the CFG can be a multi-graph. 272 // So there might be multiple edges with same SrcBB and DestBB. 273 const BasicBlock *SrcBB; 274 const BasicBlock *DestBB; 275 uint64_t Weight; 276 bool InMST; 277 bool Removed; 278 bool IsCritical; 279 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1) 280 : SrcBB(Src), DestBB(Dest), Weight(W), InMST(false), Removed(false), 281 IsCritical(false) {} 282 // Return the information string of an edge. 283 const std::string infoString() const { 284 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 285 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 286 } 287 }; 288 289 // This class stores the auxiliary information for each BB. 290 struct BBInfo { 291 BBInfo *Group; 292 uint32_t Index; 293 uint32_t Rank; 294 295 BBInfo(unsigned IX) : Group(this), Index(IX), Rank(0) {} 296 297 // Return the information string of this object. 298 const std::string infoString() const { 299 return (Twine("Index=") + Twine(Index)).str(); 300 } 301 }; 302 303 // This class implements the CFG edges. Note the CFG can be a multi-graph. 304 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 305 private: 306 Function &F; 307 void computeCFGHash(); 308 void renameComdatFunction(); 309 // A map that stores the Comdat group in function F. 310 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 311 312 public: 313 std::vector<Instruction *> IndirectCallSites; 314 SelectInstVisitor SIVisitor; 315 std::string FuncName; 316 GlobalVariable *FuncNameVar; 317 // CFG hash value for this function. 318 uint64_t FunctionHash; 319 320 // The Minimum Spanning Tree of function CFG. 321 CFGMST<Edge, BBInfo> MST; 322 323 // Give an edge, find the BB that will be instrumented. 324 // Return nullptr if there is no BB to be instrumented. 325 BasicBlock *getInstrBB(Edge *E); 326 327 // Return the auxiliary BB information. 328 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 329 330 // Dump edges and BB information. 331 void dumpInfo(std::string Str = "") const { 332 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 333 Twine(FunctionHash) + "\t" + Str); 334 } 335 336 FuncPGOInstrumentation( 337 Function &Func, 338 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 339 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 340 BlockFrequencyInfo *BFI = nullptr) 341 : F(Func), ComdatMembers(ComdatMembers), SIVisitor(Func), FunctionHash(0), 342 MST(F, BPI, BFI) { 343 344 // This should be done before CFG hash computation. 345 SIVisitor.countSelects(Func); 346 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 347 IndirectCallSites = findIndirectCallSites(Func); 348 349 FuncName = getPGOFuncName(F); 350 computeCFGHash(); 351 if (ComdatMembers.size()) 352 renameComdatFunction(); 353 DEBUG(dumpInfo("after CFGMST")); 354 355 NumOfPGOBB += MST.BBInfos.size(); 356 for (auto &E : MST.AllEdges) { 357 if (E->Removed) 358 continue; 359 NumOfPGOEdge++; 360 if (!E->InMST) 361 NumOfPGOInstrument++; 362 } 363 364 if (CreateGlobalVar) 365 FuncNameVar = createPGOFuncNameVar(F, FuncName); 366 } 367 368 // Return the number of profile counters needed for the function. 369 unsigned getNumCounters() { 370 unsigned NumCounters = 0; 371 for (auto &E : this->MST.AllEdges) { 372 if (!E->InMST && !E->Removed) 373 NumCounters++; 374 } 375 return NumCounters + SIVisitor.getNumOfSelectInsts(); 376 } 377 }; 378 379 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 380 // value of each BB in the CFG. The higher 32 bits record the number of edges. 381 template <class Edge, class BBInfo> 382 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 383 std::vector<char> Indexes; 384 JamCRC JC; 385 for (auto &BB : F) { 386 const TerminatorInst *TI = BB.getTerminator(); 387 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 388 BasicBlock *Succ = TI->getSuccessor(I); 389 uint32_t Index = getBBInfo(Succ).Index; 390 for (int J = 0; J < 4; J++) 391 Indexes.push_back((char)(Index >> (J * 8))); 392 } 393 } 394 JC.update(Indexes); 395 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 396 (uint64_t)IndirectCallSites.size() << 48 | 397 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 398 } 399 400 // Check if we can safely rename this Comdat function. 401 static bool canRenameComdat( 402 Function &F, 403 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 404 if (F.getName().empty()) 405 return false; 406 if (!needsComdatForCounter(F, *(F.getParent()))) 407 return false; 408 // Only safe to do if this function may be discarded if it is not used 409 // in the compilation unit. 410 if (!GlobalValue::isDiscardableIfUnused(F.getLinkage())) 411 return false; 412 413 // For AvailableExternallyLinkage functions. 414 if (!F.hasComdat()) { 415 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 416 return true; 417 } 418 419 // FIXME: Current only handle those Comdat groups that only containing one 420 // function and function aliases. 421 // (1) For a Comdat group containing multiple functions, we need to have a 422 // unique postfix based on the hashes for each function. There is a 423 // non-trivial code refactoring to do this efficiently. 424 // (2) Variables can not be renamed, so we can not rename Comdat function in a 425 // group including global vars. 426 Comdat *C = F.getComdat(); 427 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 428 if (dyn_cast<GlobalAlias>(CM.second)) 429 continue; 430 Function *FM = dyn_cast<Function>(CM.second); 431 if (FM != &F) 432 return false; 433 } 434 return true; 435 } 436 437 // Append the CFGHash to the Comdat function name. 438 template <class Edge, class BBInfo> 439 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 440 if (!canRenameComdat(F, ComdatMembers)) 441 return; 442 std::string OrigName = F.getName().str(); 443 std::string NewFuncName = 444 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 445 F.setName(Twine(NewFuncName)); 446 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 447 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 448 Comdat *NewComdat; 449 Module *M = F.getParent(); 450 // For AvailableExternallyLinkage functions, change the linkage to 451 // LinkOnceODR and put them into comdat. This is because after renaming, there 452 // is no backup external copy available for the function. 453 if (!F.hasComdat()) { 454 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 455 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 456 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 457 F.setComdat(NewComdat); 458 return; 459 } 460 461 // This function belongs to a single function Comdat group. 462 Comdat *OrigComdat = F.getComdat(); 463 std::string NewComdatName = 464 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 465 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 466 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 467 468 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 469 if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) { 470 // For aliases, change the name directly. 471 assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F); 472 std::string OrigGAName = GA->getName().str(); 473 GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash))); 474 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA); 475 continue; 476 } 477 // Must be a function. 478 Function *CF = dyn_cast<Function>(CM.second); 479 assert(CF); 480 CF->setComdat(NewComdat); 481 } 482 } 483 484 // Given a CFG E to be instrumented, find which BB to place the instrumented 485 // code. The function will split the critical edge if necessary. 486 template <class Edge, class BBInfo> 487 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 488 if (E->InMST || E->Removed) 489 return nullptr; 490 491 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 492 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 493 // For a fake edge, instrument the real BB. 494 if (SrcBB == nullptr) 495 return DestBB; 496 if (DestBB == nullptr) 497 return SrcBB; 498 499 // Instrument the SrcBB if it has a single successor, 500 // otherwise, the DestBB if this is not a critical edge. 501 TerminatorInst *TI = SrcBB->getTerminator(); 502 if (TI->getNumSuccessors() <= 1) 503 return SrcBB; 504 if (!E->IsCritical) 505 return DestBB; 506 507 // For a critical edge, we have to split. Instrument the newly 508 // created BB. 509 NumOfPGOSplit++; 510 DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index << " --> " 511 << getBBInfo(DestBB).Index << "\n"); 512 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 513 BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum); 514 assert(InstrBB && "Critical edge is not split"); 515 516 E->Removed = true; 517 return InstrBB; 518 } 519 520 // Visit all edge and instrument the edges not in MST, and do value profiling. 521 // Critical edges will be split. 522 static void instrumentOneFunc( 523 Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, 524 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 525 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI, 526 BFI); 527 unsigned NumCounters = FuncInfo.getNumCounters(); 528 529 uint32_t I = 0; 530 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 531 for (auto &E : FuncInfo.MST.AllEdges) { 532 BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get()); 533 if (!InstrBB) 534 continue; 535 536 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 537 assert(Builder.GetInsertPoint() != InstrBB->end() && 538 "Cannot get the Instrumentation point"); 539 Builder.CreateCall( 540 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 541 {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 542 Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), 543 Builder.getInt32(I++)}); 544 } 545 546 // Now instrument select instructions: 547 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 548 FuncInfo.FunctionHash); 549 assert(I == NumCounters); 550 551 if (DisableValueProfiling) 552 return; 553 554 unsigned NumIndirectCallSites = 0; 555 for (auto &I : FuncInfo.IndirectCallSites) { 556 CallSite CS(I); 557 Value *Callee = CS.getCalledValue(); 558 DEBUG(dbgs() << "Instrument one indirect call: CallSite Index = " 559 << NumIndirectCallSites << "\n"); 560 IRBuilder<> Builder(I); 561 assert(Builder.GetInsertPoint() != I->getParent()->end() && 562 "Cannot get the Instrumentation point"); 563 Builder.CreateCall( 564 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 565 {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 566 Builder.getInt64(FuncInfo.FunctionHash), 567 Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()), 568 Builder.getInt32(llvm::InstrProfValueKind::IPVK_IndirectCallTarget), 569 Builder.getInt32(NumIndirectCallSites++)}); 570 } 571 NumOfPGOICall += NumIndirectCallSites; 572 } 573 574 // This class represents a CFG edge in profile use compilation. 575 struct PGOUseEdge : public PGOEdge { 576 bool CountValid; 577 uint64_t CountValue; 578 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1) 579 : PGOEdge(Src, Dest, W), CountValid(false), CountValue(0) {} 580 581 // Set edge count value 582 void setEdgeCount(uint64_t Value) { 583 CountValue = Value; 584 CountValid = true; 585 } 586 587 // Return the information string for this object. 588 const std::string infoString() const { 589 if (!CountValid) 590 return PGOEdge::infoString(); 591 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 592 .str(); 593 } 594 }; 595 596 typedef SmallVector<PGOUseEdge *, 2> DirectEdges; 597 598 // This class stores the auxiliary information for each BB. 599 struct UseBBInfo : public BBInfo { 600 uint64_t CountValue; 601 bool CountValid; 602 int32_t UnknownCountInEdge; 603 int32_t UnknownCountOutEdge; 604 DirectEdges InEdges; 605 DirectEdges OutEdges; 606 UseBBInfo(unsigned IX) 607 : BBInfo(IX), CountValue(0), CountValid(false), UnknownCountInEdge(0), 608 UnknownCountOutEdge(0) {} 609 UseBBInfo(unsigned IX, uint64_t C) 610 : BBInfo(IX), CountValue(C), CountValid(true), UnknownCountInEdge(0), 611 UnknownCountOutEdge(0) {} 612 613 // Set the profile count value for this BB. 614 void setBBInfoCount(uint64_t Value) { 615 CountValue = Value; 616 CountValid = true; 617 } 618 619 // Return the information string of this object. 620 const std::string infoString() const { 621 if (!CountValid) 622 return BBInfo::infoString(); 623 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 624 } 625 }; 626 627 // Sum up the count values for all the edges. 628 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 629 uint64_t Total = 0; 630 for (auto &E : Edges) { 631 if (E->Removed) 632 continue; 633 Total += E->CountValue; 634 } 635 return Total; 636 } 637 638 class PGOUseFunc { 639 public: 640 PGOUseFunc(Function &Func, Module *Modu, 641 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 642 BranchProbabilityInfo *BPI = nullptr, 643 BlockFrequencyInfo *BFI = nullptr) 644 : F(Func), M(Modu), FuncInfo(Func, ComdatMembers, false, BPI, BFI), 645 FreqAttr(FFA_Normal) {} 646 647 // Read counts for the instrumented BB from profile. 648 bool readCounters(IndexedInstrProfReader *PGOReader); 649 650 // Populate the counts for all BBs. 651 void populateCounters(); 652 653 // Set the branch weights based on the count values. 654 void setBranchWeights(); 655 656 // Annotate the indirect call sites. 657 void annotateIndirectCallSites(); 658 659 // The hotness of the function from the profile count. 660 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 661 662 // Return the function hotness from the profile. 663 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 664 665 // Return the function hash. 666 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 667 // Return the profile record for this function; 668 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 669 670 // Return the auxiliary BB information. 671 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 672 return FuncInfo.getBBInfo(BB); 673 } 674 675 private: 676 Function &F; 677 Module *M; 678 // This member stores the shared information with class PGOGenFunc. 679 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 680 681 // The maximum count value in the profile. This is only used in PGO use 682 // compilation. 683 uint64_t ProgramMaxCount; 684 685 // ProfileRecord for this function. 686 InstrProfRecord ProfileRecord; 687 688 // Function hotness info derived from profile. 689 FuncFreqAttr FreqAttr; 690 691 // Find the Instrumented BB and set the value. 692 void setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 693 694 // Set the edge counter value for the unknown edge -- there should be only 695 // one unknown edge. 696 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 697 698 // Return FuncName string; 699 const std::string getFuncName() const { return FuncInfo.FuncName; } 700 701 // Set the hot/cold inline hints based on the count values. 702 // FIXME: This function should be removed once the functionality in 703 // the inliner is implemented. 704 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 705 if (ProgramMaxCount == 0) 706 return; 707 // Threshold of the hot functions. 708 const BranchProbability HotFunctionThreshold(1, 100); 709 // Threshold of the cold functions. 710 const BranchProbability ColdFunctionThreshold(2, 10000); 711 if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount)) 712 FreqAttr = FFA_Hot; 713 else if (MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount)) 714 FreqAttr = FFA_Cold; 715 } 716 }; 717 718 // Visit all the edges and assign the count value for the instrumented 719 // edges and the BB. 720 void PGOUseFunc::setInstrumentedCounts( 721 const std::vector<uint64_t> &CountFromProfile) { 722 723 assert(FuncInfo.getNumCounters() == CountFromProfile.size()); 724 // Use a worklist as we will update the vector during the iteration. 725 std::vector<PGOUseEdge *> WorkList; 726 for (auto &E : FuncInfo.MST.AllEdges) 727 WorkList.push_back(E.get()); 728 729 uint32_t I = 0; 730 for (auto &E : WorkList) { 731 BasicBlock *InstrBB = FuncInfo.getInstrBB(E); 732 if (!InstrBB) 733 continue; 734 uint64_t CountValue = CountFromProfile[I++]; 735 if (!E->Removed) { 736 getBBInfo(InstrBB).setBBInfoCount(CountValue); 737 E->setEdgeCount(CountValue); 738 continue; 739 } 740 741 // Need to add two new edges. 742 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 743 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 744 // Add new edge of SrcBB->InstrBB. 745 PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0); 746 NewEdge.setEdgeCount(CountValue); 747 // Add new edge of InstrBB->DestBB. 748 PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0); 749 NewEdge1.setEdgeCount(CountValue); 750 NewEdge1.InMST = true; 751 getBBInfo(InstrBB).setBBInfoCount(CountValue); 752 } 753 // Now annotate select instructions 754 FuncInfo.SIVisitor.annotateSelects(F, this, &I); 755 assert(I == CountFromProfile.size()); 756 } 757 758 // Set the count value for the unknown edge. There should be one and only one 759 // unknown edge in Edges vector. 760 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 761 for (auto &E : Edges) { 762 if (E->CountValid) 763 continue; 764 E->setEdgeCount(Value); 765 766 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 767 getBBInfo(E->DestBB).UnknownCountInEdge--; 768 return; 769 } 770 llvm_unreachable("Cannot find the unknown count edge"); 771 } 772 773 // Read the profile from ProfileFileName and assign the value to the 774 // instrumented BB and the edges. This function also updates ProgramMaxCount. 775 // Return true if the profile are successfully read, and false on errors. 776 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader) { 777 auto &Ctx = M->getContext(); 778 Expected<InstrProfRecord> Result = 779 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 780 if (Error E = Result.takeError()) { 781 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 782 auto Err = IPE.get(); 783 bool SkipWarning = false; 784 if (Err == instrprof_error::unknown_function) { 785 NumOfPGOMissing++; 786 SkipWarning = !PGOWarnMissing; 787 } else if (Err == instrprof_error::hash_mismatch || 788 Err == instrprof_error::malformed) { 789 NumOfPGOMismatch++; 790 SkipWarning = NoPGOWarnMismatch; 791 } 792 793 if (SkipWarning) 794 return; 795 796 std::string Msg = IPE.message() + std::string(" ") + F.getName().str(); 797 Ctx.diagnose( 798 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 799 }); 800 return false; 801 } 802 ProfileRecord = std::move(Result.get()); 803 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 804 805 NumOfPGOFunc++; 806 DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 807 uint64_t ValueSum = 0; 808 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 809 DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 810 ValueSum += CountFromProfile[I]; 811 } 812 813 DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 814 815 getBBInfo(nullptr).UnknownCountOutEdge = 2; 816 getBBInfo(nullptr).UnknownCountInEdge = 2; 817 818 setInstrumentedCounts(CountFromProfile); 819 ProgramMaxCount = PGOReader->getMaximumFunctionCount(); 820 return true; 821 } 822 823 // Populate the counters from instrumented BBs to all BBs. 824 // In the end of this operation, all BBs should have a valid count value. 825 void PGOUseFunc::populateCounters() { 826 // First set up Count variable for all BBs. 827 for (auto &E : FuncInfo.MST.AllEdges) { 828 if (E->Removed) 829 continue; 830 831 const BasicBlock *SrcBB = E->SrcBB; 832 const BasicBlock *DestBB = E->DestBB; 833 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 834 UseBBInfo &DestInfo = getBBInfo(DestBB); 835 SrcInfo.OutEdges.push_back(E.get()); 836 DestInfo.InEdges.push_back(E.get()); 837 SrcInfo.UnknownCountOutEdge++; 838 DestInfo.UnknownCountInEdge++; 839 840 if (!E->CountValid) 841 continue; 842 DestInfo.UnknownCountInEdge--; 843 SrcInfo.UnknownCountOutEdge--; 844 } 845 846 bool Changes = true; 847 unsigned NumPasses = 0; 848 while (Changes) { 849 NumPasses++; 850 Changes = false; 851 852 // For efficient traversal, it's better to start from the end as most 853 // of the instrumented edges are at the end. 854 for (auto &BB : reverse(F)) { 855 UseBBInfo &Count = getBBInfo(&BB); 856 if (!Count.CountValid) { 857 if (Count.UnknownCountOutEdge == 0) { 858 Count.CountValue = sumEdgeCount(Count.OutEdges); 859 Count.CountValid = true; 860 Changes = true; 861 } else if (Count.UnknownCountInEdge == 0) { 862 Count.CountValue = sumEdgeCount(Count.InEdges); 863 Count.CountValid = true; 864 Changes = true; 865 } 866 } 867 if (Count.CountValid) { 868 if (Count.UnknownCountOutEdge == 1) { 869 uint64_t Total = Count.CountValue - sumEdgeCount(Count.OutEdges); 870 setEdgeCount(Count.OutEdges, Total); 871 Changes = true; 872 } 873 if (Count.UnknownCountInEdge == 1) { 874 uint64_t Total = Count.CountValue - sumEdgeCount(Count.InEdges); 875 setEdgeCount(Count.InEdges, Total); 876 Changes = true; 877 } 878 } 879 } 880 } 881 882 DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 883 #ifndef NDEBUG 884 // Assert every BB has a valid counter. 885 for (auto &BB : F) 886 assert(getBBInfo(&BB).CountValid && "BB count is not valid"); 887 #endif 888 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 889 F.setEntryCount(FuncEntryCount); 890 uint64_t FuncMaxCount = FuncEntryCount; 891 for (auto &BB : F) 892 FuncMaxCount = std::max(FuncMaxCount, getBBInfo(&BB).CountValue); 893 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 894 895 DEBUG(FuncInfo.dumpInfo("after reading profile.")); 896 } 897 898 static void setProfMetadata(Module *M, Instruction *TI, 899 ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) { 900 MDBuilder MDB(M->getContext()); 901 assert(MaxCount > 0 && "Bad max count"); 902 uint64_t Scale = calculateCountScale(MaxCount); 903 SmallVector<unsigned, 4> Weights; 904 for (const auto &ECI : EdgeCounts) 905 Weights.push_back(scaleBranchCount(ECI, Scale)); 906 907 DEBUG(dbgs() << "Weight is: "; 908 for (const auto &W : Weights) { dbgs() << W << " "; } 909 dbgs() << "\n";); 910 TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 911 } 912 913 // Assign the scaled count values to the BB with multiple out edges. 914 void PGOUseFunc::setBranchWeights() { 915 // Generate MD_prof metadata for every branch instruction. 916 DEBUG(dbgs() << "\nSetting branch weights.\n"); 917 for (auto &BB : F) { 918 TerminatorInst *TI = BB.getTerminator(); 919 if (TI->getNumSuccessors() < 2) 920 continue; 921 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) 922 continue; 923 if (getBBInfo(&BB).CountValue == 0) 924 continue; 925 926 // We have a non-zero Branch BB. 927 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 928 unsigned Size = BBCountInfo.OutEdges.size(); 929 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 930 uint64_t MaxCount = 0; 931 for (unsigned s = 0; s < Size; s++) { 932 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 933 const BasicBlock *SrcBB = E->SrcBB; 934 const BasicBlock *DestBB = E->DestBB; 935 if (DestBB == nullptr) 936 continue; 937 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 938 uint64_t EdgeCount = E->CountValue; 939 if (EdgeCount > MaxCount) 940 MaxCount = EdgeCount; 941 EdgeCounts[SuccNum] = EdgeCount; 942 } 943 setProfMetadata(M, TI, EdgeCounts, MaxCount); 944 } 945 } 946 947 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 948 Module *M = F.getParent(); 949 IRBuilder<> Builder(&SI); 950 Type *Int64Ty = Builder.getInt64Ty(); 951 Type *I8PtrTy = Builder.getInt8PtrTy(); 952 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 953 Builder.CreateCall( 954 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 955 {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 956 Builder.getInt64(FuncHash), 957 Builder.getInt32(TotalNumCtrs), Builder.getInt32(*CurCtrIdx), Step}); 958 ++(*CurCtrIdx); 959 } 960 961 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 962 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 963 assert(*CurCtrIdx < CountFromProfile.size() && 964 "Out of bound access of counters"); 965 uint64_t SCounts[2]; 966 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 967 ++(*CurCtrIdx); 968 uint64_t TotalCount = UseFunc->getBBInfo(SI.getParent()).CountValue; 969 // False Count 970 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 971 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 972 if (MaxCount) 973 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 974 } 975 976 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 977 if (!PGOInstrSelect) 978 return; 979 // FIXME: do not handle this yet. 980 if (SI.getCondition()->getType()->isVectorTy()) 981 return; 982 983 NSIs++; 984 switch (Mode) { 985 case VM_counting: 986 return; 987 case VM_instrument: 988 instrumentOneSelectInst(SI); 989 return; 990 case VM_annotate: 991 annotateOneSelectInst(SI); 992 return; 993 } 994 995 llvm_unreachable("Unknown visiting mode"); 996 } 997 998 // Traverse all the indirect callsites and annotate the instructions. 999 void PGOUseFunc::annotateIndirectCallSites() { 1000 if (DisableValueProfiling) 1001 return; 1002 1003 // Create the PGOFuncName meta data. 1004 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1005 1006 unsigned IndirectCallSiteIndex = 0; 1007 auto &IndirectCallSites = FuncInfo.IndirectCallSites; 1008 unsigned NumValueSites = 1009 ProfileRecord.getNumValueSites(IPVK_IndirectCallTarget); 1010 if (NumValueSites != IndirectCallSites.size()) { 1011 std::string Msg = 1012 std::string("Inconsistent number of indirect call sites: ") + 1013 F.getName().str(); 1014 auto &Ctx = M->getContext(); 1015 Ctx.diagnose( 1016 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1017 return; 1018 } 1019 1020 for (auto &I : IndirectCallSites) { 1021 DEBUG(dbgs() << "Read one indirect call instrumentation: Index=" 1022 << IndirectCallSiteIndex << " out of " << NumValueSites 1023 << "\n"); 1024 annotateValueSite(*M, *I, ProfileRecord, IPVK_IndirectCallTarget, 1025 IndirectCallSiteIndex, MaxNumAnnotations); 1026 IndirectCallSiteIndex++; 1027 } 1028 } 1029 } // end anonymous namespace 1030 1031 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime 1032 // aware this is an ir_level profile so it can set the version flag. 1033 static void createIRLevelProfileFlagVariable(Module &M) { 1034 Type *IntTy64 = Type::getInt64Ty(M.getContext()); 1035 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); 1036 auto IRLevelVersionVariable = new GlobalVariable( 1037 M, IntTy64, true, GlobalVariable::ExternalLinkage, 1038 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), 1039 INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); 1040 IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); 1041 Triple TT(M.getTargetTriple()); 1042 if (!TT.supportsCOMDAT()) 1043 IRLevelVersionVariable->setLinkage(GlobalValue::WeakAnyLinkage); 1044 else 1045 IRLevelVersionVariable->setComdat(M.getOrInsertComdat( 1046 StringRef(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)))); 1047 } 1048 1049 // Collect the set of members for each Comdat in module M and store 1050 // in ComdatMembers. 1051 static void collectComdatMembers( 1052 Module &M, 1053 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1054 if (!DoComdatRenaming) 1055 return; 1056 for (Function &F : M) 1057 if (Comdat *C = F.getComdat()) 1058 ComdatMembers.insert(std::make_pair(C, &F)); 1059 for (GlobalVariable &GV : M.globals()) 1060 if (Comdat *C = GV.getComdat()) 1061 ComdatMembers.insert(std::make_pair(C, &GV)); 1062 for (GlobalAlias &GA : M.aliases()) 1063 if (Comdat *C = GA.getComdat()) 1064 ComdatMembers.insert(std::make_pair(C, &GA)); 1065 } 1066 1067 static bool InstrumentAllFunctions( 1068 Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1069 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) { 1070 createIRLevelProfileFlagVariable(M); 1071 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1072 collectComdatMembers(M, ComdatMembers); 1073 1074 for (auto &F : M) { 1075 if (F.isDeclaration()) 1076 continue; 1077 auto *BPI = LookupBPI(F); 1078 auto *BFI = LookupBFI(F); 1079 instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers); 1080 } 1081 return true; 1082 } 1083 1084 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { 1085 if (skipModule(M)) 1086 return false; 1087 1088 auto LookupBPI = [this](Function &F) { 1089 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1090 }; 1091 auto LookupBFI = [this](Function &F) { 1092 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1093 }; 1094 return InstrumentAllFunctions(M, LookupBPI, LookupBFI); 1095 } 1096 1097 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1098 ModuleAnalysisManager &AM) { 1099 1100 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1101 auto LookupBPI = [&FAM](Function &F) { 1102 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1103 }; 1104 1105 auto LookupBFI = [&FAM](Function &F) { 1106 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1107 }; 1108 1109 if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI)) 1110 return PreservedAnalyses::all(); 1111 1112 return PreservedAnalyses::none(); 1113 } 1114 1115 static bool annotateAllFunctions( 1116 Module &M, StringRef ProfileFileName, 1117 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1118 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) { 1119 DEBUG(dbgs() << "Read in profile counters: "); 1120 auto &Ctx = M.getContext(); 1121 // Read the counter array from file. 1122 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName); 1123 if (Error E = ReaderOrErr.takeError()) { 1124 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1125 Ctx.diagnose( 1126 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1127 }); 1128 return false; 1129 } 1130 1131 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1132 std::move(ReaderOrErr.get()); 1133 if (!PGOReader) { 1134 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1135 StringRef("Cannot get PGOReader"))); 1136 return false; 1137 } 1138 // TODO: might need to change the warning once the clang option is finalized. 1139 if (!PGOReader->isIRLevelProfile()) { 1140 Ctx.diagnose(DiagnosticInfoPGOProfile( 1141 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1142 return false; 1143 } 1144 1145 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1146 collectComdatMembers(M, ComdatMembers); 1147 std::vector<Function *> HotFunctions; 1148 std::vector<Function *> ColdFunctions; 1149 for (auto &F : M) { 1150 if (F.isDeclaration()) 1151 continue; 1152 auto *BPI = LookupBPI(F); 1153 auto *BFI = LookupBFI(F); 1154 PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI); 1155 if (!Func.readCounters(PGOReader.get())) 1156 continue; 1157 Func.populateCounters(); 1158 Func.setBranchWeights(); 1159 Func.annotateIndirectCallSites(); 1160 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1161 if (FreqAttr == PGOUseFunc::FFA_Cold) 1162 ColdFunctions.push_back(&F); 1163 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1164 HotFunctions.push_back(&F); 1165 } 1166 M.setProfileSummary(PGOReader->getSummary().getMD(M.getContext())); 1167 // Set function hotness attribute from the profile. 1168 // We have to apply these attributes at the end because their presence 1169 // can affect the BranchProbabilityInfo of any callers, resulting in an 1170 // inconsistent MST between prof-gen and prof-use. 1171 for (auto &F : HotFunctions) { 1172 F->addFnAttr(llvm::Attribute::InlineHint); 1173 DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 1174 << "\n"); 1175 } 1176 for (auto &F : ColdFunctions) { 1177 F->addFnAttr(llvm::Attribute::Cold); 1178 DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() << "\n"); 1179 } 1180 return true; 1181 } 1182 1183 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename) 1184 : ProfileFileName(std::move(Filename)) { 1185 if (!PGOTestProfileFile.empty()) 1186 ProfileFileName = PGOTestProfileFile; 1187 } 1188 1189 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 1190 ModuleAnalysisManager &AM) { 1191 1192 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1193 auto LookupBPI = [&FAM](Function &F) { 1194 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1195 }; 1196 1197 auto LookupBFI = [&FAM](Function &F) { 1198 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1199 }; 1200 1201 if (!annotateAllFunctions(M, ProfileFileName, LookupBPI, LookupBFI)) 1202 return PreservedAnalyses::all(); 1203 1204 return PreservedAnalyses::none(); 1205 } 1206 1207 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { 1208 if (skipModule(M)) 1209 return false; 1210 1211 auto LookupBPI = [this](Function &F) { 1212 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1213 }; 1214 auto LookupBFI = [this](Function &F) { 1215 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1216 }; 1217 1218 return annotateAllFunctions(M, ProfileFileName, LookupBPI, LookupBFI); 1219 } 1220