1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements PGO instrumentation using a minimum spanning tree based 11 // on the following paper: 12 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 13 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 14 // Issue 3, pp 313-322 15 // The idea of the algorithm based on the fact that for each node (except for 16 // the entry and exit), the sum of incoming edge counts equals the sum of 17 // outgoing edge counts. The count of edge on spanning tree can be derived from 18 // those edges not on the spanning tree. Knuth proves this method instruments 19 // the minimum number of edges. 20 // 21 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 22 // edges have higher frequencies (more likely to execute). The idea is to 23 // instrument those less frequently executed edges to reduce the runtime 24 // overhead of instrumented binaries. 25 // 26 // This file contains two passes: 27 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 28 // count profile, and generates the instrumentation for indirect call 29 // profiling. 30 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 31 // annotates the branch weights. It also reads the indirect call value 32 // profiling records and annotate the indirect call instructions. 33 // 34 // To get the precise counter information, These two passes need to invoke at 35 // the same compilation point (so they see the same IR). For pass 36 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 37 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 38 // the profile is opened in module level and passed to each PGOUseFunc instance. 39 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 40 // in class FuncPGOInstrumentation. 41 // 42 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 43 // BBInfo contains auxiliary information for each BB. These two classes are used 44 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 45 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 46 // used in populating profile counters. 47 // The MST implementation is in Class CFGMST (CFGMST.h). 48 // 49 //===----------------------------------------------------------------------===// 50 51 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 52 #include "CFGMST.h" 53 #include "llvm/ADT/APInt.h" 54 #include "llvm/ADT/ArrayRef.h" 55 #include "llvm/ADT/STLExtras.h" 56 #include "llvm/ADT/SmallVector.h" 57 #include "llvm/ADT/Statistic.h" 58 #include "llvm/ADT/StringRef.h" 59 #include "llvm/ADT/Triple.h" 60 #include "llvm/ADT/Twine.h" 61 #include "llvm/ADT/iterator.h" 62 #include "llvm/ADT/iterator_range.h" 63 #include "llvm/Analysis/BlockFrequencyInfo.h" 64 #include "llvm/Analysis/BranchProbabilityInfo.h" 65 #include "llvm/Analysis/CFG.h" 66 #include "llvm/Analysis/IndirectCallSiteVisitor.h" 67 #include "llvm/Analysis/LoopInfo.h" 68 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 69 #include "llvm/IR/Attributes.h" 70 #include "llvm/IR/BasicBlock.h" 71 #include "llvm/IR/CFG.h" 72 #include "llvm/IR/CallSite.h" 73 #include "llvm/IR/Comdat.h" 74 #include "llvm/IR/Constant.h" 75 #include "llvm/IR/Constants.h" 76 #include "llvm/IR/DiagnosticInfo.h" 77 #include "llvm/IR/Dominators.h" 78 #include "llvm/IR/Function.h" 79 #include "llvm/IR/GlobalAlias.h" 80 #include "llvm/IR/GlobalValue.h" 81 #include "llvm/IR/GlobalVariable.h" 82 #include "llvm/IR/IRBuilder.h" 83 #include "llvm/IR/InstVisitor.h" 84 #include "llvm/IR/InstrTypes.h" 85 #include "llvm/IR/Instruction.h" 86 #include "llvm/IR/Instructions.h" 87 #include "llvm/IR/IntrinsicInst.h" 88 #include "llvm/IR/Intrinsics.h" 89 #include "llvm/IR/LLVMContext.h" 90 #include "llvm/IR/MDBuilder.h" 91 #include "llvm/IR/Module.h" 92 #include "llvm/IR/PassManager.h" 93 #include "llvm/IR/ProfileSummary.h" 94 #include "llvm/IR/Type.h" 95 #include "llvm/IR/Value.h" 96 #include "llvm/Pass.h" 97 #include "llvm/ProfileData/InstrProf.h" 98 #include "llvm/ProfileData/InstrProfReader.h" 99 #include "llvm/Support/BranchProbability.h" 100 #include "llvm/Support/Casting.h" 101 #include "llvm/Support/CommandLine.h" 102 #include "llvm/Support/DOTGraphTraits.h" 103 #include "llvm/Support/Debug.h" 104 #include "llvm/Support/Error.h" 105 #include "llvm/Support/ErrorHandling.h" 106 #include "llvm/Support/GraphWriter.h" 107 #include "llvm/Support/JamCRC.h" 108 #include "llvm/Support/raw_ostream.h" 109 #include "llvm/Transforms/Instrumentation.h" 110 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 111 #include <algorithm> 112 #include <cassert> 113 #include <cstdint> 114 #include <memory> 115 #include <numeric> 116 #include <string> 117 #include <unordered_map> 118 #include <utility> 119 #include <vector> 120 121 using namespace llvm; 122 using ProfileCount = Function::ProfileCount; 123 124 #define DEBUG_TYPE "pgo-instrumentation" 125 126 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 127 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 128 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 129 STATISTIC(NumOfPGOEdge, "Number of edges."); 130 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 131 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 132 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 133 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 134 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 135 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 136 137 // Command line option to specify the file to read profile from. This is 138 // mainly used for testing. 139 static cl::opt<std::string> 140 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 141 cl::value_desc("filename"), 142 cl::desc("Specify the path of profile data file. This is" 143 "mainly for test purpose.")); 144 static cl::opt<std::string> PGOTestProfileRemappingFile( 145 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 146 cl::value_desc("filename"), 147 cl::desc("Specify the path of profile remapping file. This is mainly for " 148 "test purpose.")); 149 150 // Command line option to disable value profiling. The default is false: 151 // i.e. value profiling is enabled by default. This is for debug purpose. 152 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 153 cl::Hidden, 154 cl::desc("Disable Value Profiling")); 155 156 // Command line option to set the maximum number of VP annotations to write to 157 // the metadata for a single indirect call callsite. 158 static cl::opt<unsigned> MaxNumAnnotations( 159 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 160 cl::desc("Max number of annotations for a single indirect " 161 "call callsite")); 162 163 // Command line option to set the maximum number of value annotations 164 // to write to the metadata for a single memop intrinsic. 165 static cl::opt<unsigned> MaxNumMemOPAnnotations( 166 "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, 167 cl::desc("Max number of preicise value annotations for a single memop" 168 "intrinsic")); 169 170 // Command line option to control appending FunctionHash to the name of a COMDAT 171 // function. This is to avoid the hash mismatch caused by the preinliner. 172 static cl::opt<bool> DoComdatRenaming( 173 "do-comdat-renaming", cl::init(false), cl::Hidden, 174 cl::desc("Append function hash to the name of COMDAT function to avoid " 175 "function hash mismatch due to the preinliner")); 176 177 // Command line option to enable/disable the warning about missing profile 178 // information. 179 static cl::opt<bool> 180 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 181 cl::desc("Use this option to turn on/off " 182 "warnings about missing profile data for " 183 "functions.")); 184 185 // Command line option to enable/disable the warning about a hash mismatch in 186 // the profile data. 187 static cl::opt<bool> 188 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 189 cl::desc("Use this option to turn off/on " 190 "warnings about profile cfg mismatch.")); 191 192 // Command line option to enable/disable the warning about a hash mismatch in 193 // the profile data for Comdat functions, which often turns out to be false 194 // positive due to the pre-instrumentation inline. 195 static cl::opt<bool> 196 NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), 197 cl::Hidden, 198 cl::desc("The option is used to turn on/off " 199 "warnings about hash mismatch for comdat " 200 "functions.")); 201 202 // Command line option to enable/disable select instruction instrumentation. 203 static cl::opt<bool> 204 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 205 cl::desc("Use this option to turn on/off SELECT " 206 "instruction instrumentation. ")); 207 208 // Command line option to turn on CFG dot or text dump of raw profile counts 209 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 210 "pgo-view-raw-counts", cl::Hidden, 211 cl::desc("A boolean option to show CFG dag or text " 212 "with raw profile counts from " 213 "profile data. See also option " 214 "-pgo-view-counts. To limit graph " 215 "display to only one function, use " 216 "filtering option -view-bfi-func-name."), 217 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 218 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 219 clEnumValN(PGOVCT_Text, "text", "show in text."))); 220 221 // Command line option to enable/disable memop intrinsic call.size profiling. 222 static cl::opt<bool> 223 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 224 cl::desc("Use this option to turn on/off " 225 "memory intrinsic size profiling.")); 226 227 // Emit branch probability as optimization remarks. 228 static cl::opt<bool> 229 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 230 cl::desc("When this option is on, the annotated " 231 "branch probability will be emitted as " 232 "optimization remarks: -{Rpass|" 233 "pass-remarks}=pgo-instrumentation")); 234 235 // Command line option to turn on CFG dot dump after profile annotation. 236 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 237 extern cl::opt<PGOViewCountsType> PGOViewCounts; 238 239 // Command line option to specify the name of the function for CFG dump 240 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 241 extern cl::opt<std::string> ViewBlockFreqFuncName; 242 243 // Return a string describing the branch condition that can be 244 // used in static branch probability heuristics: 245 static std::string getBranchCondString(Instruction *TI) { 246 BranchInst *BI = dyn_cast<BranchInst>(TI); 247 if (!BI || !BI->isConditional()) 248 return std::string(); 249 250 Value *Cond = BI->getCondition(); 251 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 252 if (!CI) 253 return std::string(); 254 255 std::string result; 256 raw_string_ostream OS(result); 257 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 258 CI->getOperand(0)->getType()->print(OS, true); 259 260 Value *RHS = CI->getOperand(1); 261 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 262 if (CV) { 263 if (CV->isZero()) 264 OS << "_Zero"; 265 else if (CV->isOne()) 266 OS << "_One"; 267 else if (CV->isMinusOne()) 268 OS << "_MinusOne"; 269 else 270 OS << "_Const"; 271 } 272 OS.flush(); 273 return result; 274 } 275 276 namespace { 277 278 /// The select instruction visitor plays three roles specified 279 /// by the mode. In \c VM_counting mode, it simply counts the number of 280 /// select instructions. In \c VM_instrument mode, it inserts code to count 281 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 282 /// it reads the profile data and annotate the select instruction with metadata. 283 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 284 class PGOUseFunc; 285 286 /// Instruction Visitor class to visit select instructions. 287 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 288 Function &F; 289 unsigned NSIs = 0; // Number of select instructions instrumented. 290 VisitMode Mode = VM_counting; // Visiting mode. 291 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 292 unsigned TotalNumCtrs = 0; // Total number of counters 293 GlobalVariable *FuncNameVar = nullptr; 294 uint64_t FuncHash = 0; 295 PGOUseFunc *UseFunc = nullptr; 296 297 SelectInstVisitor(Function &Func) : F(Func) {} 298 299 void countSelects(Function &Func) { 300 NSIs = 0; 301 Mode = VM_counting; 302 visit(Func); 303 } 304 305 // Visit the IR stream and instrument all select instructions. \p 306 // Ind is a pointer to the counter index variable; \p TotalNC 307 // is the total number of counters; \p FNV is the pointer to the 308 // PGO function name var; \p FHash is the function hash. 309 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 310 GlobalVariable *FNV, uint64_t FHash) { 311 Mode = VM_instrument; 312 CurCtrIdx = Ind; 313 TotalNumCtrs = TotalNC; 314 FuncHash = FHash; 315 FuncNameVar = FNV; 316 visit(Func); 317 } 318 319 // Visit the IR stream and annotate all select instructions. 320 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 321 Mode = VM_annotate; 322 UseFunc = UF; 323 CurCtrIdx = Ind; 324 visit(Func); 325 } 326 327 void instrumentOneSelectInst(SelectInst &SI); 328 void annotateOneSelectInst(SelectInst &SI); 329 330 // Visit \p SI instruction and perform tasks according to visit mode. 331 void visitSelectInst(SelectInst &SI); 332 333 // Return the number of select instructions. This needs be called after 334 // countSelects(). 335 unsigned getNumOfSelectInsts() const { return NSIs; } 336 }; 337 338 /// Instruction Visitor class to visit memory intrinsic calls. 339 struct MemIntrinsicVisitor : public InstVisitor<MemIntrinsicVisitor> { 340 Function &F; 341 unsigned NMemIs = 0; // Number of memIntrinsics instrumented. 342 VisitMode Mode = VM_counting; // Visiting mode. 343 unsigned CurCtrId = 0; // Current counter index. 344 unsigned TotalNumCtrs = 0; // Total number of counters 345 GlobalVariable *FuncNameVar = nullptr; 346 uint64_t FuncHash = 0; 347 PGOUseFunc *UseFunc = nullptr; 348 std::vector<Instruction *> Candidates; 349 350 MemIntrinsicVisitor(Function &Func) : F(Func) {} 351 352 void countMemIntrinsics(Function &Func) { 353 NMemIs = 0; 354 Mode = VM_counting; 355 visit(Func); 356 } 357 358 void instrumentMemIntrinsics(Function &Func, unsigned TotalNC, 359 GlobalVariable *FNV, uint64_t FHash) { 360 Mode = VM_instrument; 361 TotalNumCtrs = TotalNC; 362 FuncHash = FHash; 363 FuncNameVar = FNV; 364 visit(Func); 365 } 366 367 std::vector<Instruction *> findMemIntrinsics(Function &Func) { 368 Candidates.clear(); 369 Mode = VM_annotate; 370 visit(Func); 371 return Candidates; 372 } 373 374 // Visit the IR stream and annotate all mem intrinsic call instructions. 375 void instrumentOneMemIntrinsic(MemIntrinsic &MI); 376 377 // Visit \p MI instruction and perform tasks according to visit mode. 378 void visitMemIntrinsic(MemIntrinsic &SI); 379 380 unsigned getNumOfMemIntrinsics() const { return NMemIs; } 381 }; 382 383 class PGOInstrumentationGenLegacyPass : public ModulePass { 384 public: 385 static char ID; 386 387 PGOInstrumentationGenLegacyPass() : ModulePass(ID) { 388 initializePGOInstrumentationGenLegacyPassPass( 389 *PassRegistry::getPassRegistry()); 390 } 391 392 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } 393 394 private: 395 bool runOnModule(Module &M) override; 396 397 void getAnalysisUsage(AnalysisUsage &AU) const override { 398 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 399 } 400 }; 401 402 class PGOInstrumentationUseLegacyPass : public ModulePass { 403 public: 404 static char ID; 405 406 // Provide the profile filename as the parameter. 407 PGOInstrumentationUseLegacyPass(std::string Filename = "") 408 : ModulePass(ID), ProfileFileName(std::move(Filename)) { 409 if (!PGOTestProfileFile.empty()) 410 ProfileFileName = PGOTestProfileFile; 411 initializePGOInstrumentationUseLegacyPassPass( 412 *PassRegistry::getPassRegistry()); 413 } 414 415 StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } 416 417 private: 418 std::string ProfileFileName; 419 420 bool runOnModule(Module &M) override; 421 422 void getAnalysisUsage(AnalysisUsage &AU) const override { 423 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 424 } 425 }; 426 427 } // end anonymous namespace 428 429 char PGOInstrumentationGenLegacyPass::ID = 0; 430 431 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 432 "PGO instrumentation.", false, false) 433 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 434 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 435 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 436 "PGO instrumentation.", false, false) 437 438 ModulePass *llvm::createPGOInstrumentationGenLegacyPass() { 439 return new PGOInstrumentationGenLegacyPass(); 440 } 441 442 char PGOInstrumentationUseLegacyPass::ID = 0; 443 444 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 445 "Read PGO instrumentation profile.", false, false) 446 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 447 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 448 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 449 "Read PGO instrumentation profile.", false, false) 450 451 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename) { 452 return new PGOInstrumentationUseLegacyPass(Filename.str()); 453 } 454 455 namespace { 456 457 /// An MST based instrumentation for PGO 458 /// 459 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 460 /// in the function level. 461 struct PGOEdge { 462 // This class implements the CFG edges. Note the CFG can be a multi-graph. 463 // So there might be multiple edges with same SrcBB and DestBB. 464 const BasicBlock *SrcBB; 465 const BasicBlock *DestBB; 466 uint64_t Weight; 467 bool InMST = false; 468 bool Removed = false; 469 bool IsCritical = false; 470 471 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 472 : SrcBB(Src), DestBB(Dest), Weight(W) {} 473 474 // Return the information string of an edge. 475 const std::string infoString() const { 476 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 477 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 478 } 479 }; 480 481 // This class stores the auxiliary information for each BB. 482 struct BBInfo { 483 BBInfo *Group; 484 uint32_t Index; 485 uint32_t Rank = 0; 486 487 BBInfo(unsigned IX) : Group(this), Index(IX) {} 488 489 // Return the information string of this object. 490 const std::string infoString() const { 491 return (Twine("Index=") + Twine(Index)).str(); 492 } 493 }; 494 495 // This class implements the CFG edges. Note the CFG can be a multi-graph. 496 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 497 private: 498 Function &F; 499 500 // A map that stores the Comdat group in function F. 501 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 502 503 void computeCFGHash(); 504 void renameComdatFunction(); 505 506 public: 507 std::vector<std::vector<Instruction *>> ValueSites; 508 SelectInstVisitor SIVisitor; 509 MemIntrinsicVisitor MIVisitor; 510 std::string FuncName; 511 GlobalVariable *FuncNameVar; 512 513 // CFG hash value for this function. 514 uint64_t FunctionHash = 0; 515 516 // The Minimum Spanning Tree of function CFG. 517 CFGMST<Edge, BBInfo> MST; 518 519 // Give an edge, find the BB that will be instrumented. 520 // Return nullptr if there is no BB to be instrumented. 521 BasicBlock *getInstrBB(Edge *E); 522 523 // Return the auxiliary BB information. 524 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 525 526 // Return the auxiliary BB information if available. 527 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 528 529 // Dump edges and BB information. 530 void dumpInfo(std::string Str = "") const { 531 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 532 Twine(FunctionHash) + "\t" + Str); 533 } 534 535 FuncPGOInstrumentation( 536 Function &Func, 537 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 538 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 539 BlockFrequencyInfo *BFI = nullptr) 540 : F(Func), ComdatMembers(ComdatMembers), ValueSites(IPVK_Last + 1), 541 SIVisitor(Func), MIVisitor(Func), MST(F, BPI, BFI) { 542 // This should be done before CFG hash computation. 543 SIVisitor.countSelects(Func); 544 MIVisitor.countMemIntrinsics(Func); 545 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 546 NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); 547 ValueSites[IPVK_IndirectCallTarget] = findIndirectCallSites(Func); 548 ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func); 549 550 FuncName = getPGOFuncName(F); 551 computeCFGHash(); 552 if (!ComdatMembers.empty()) 553 renameComdatFunction(); 554 LLVM_DEBUG(dumpInfo("after CFGMST")); 555 556 NumOfPGOBB += MST.BBInfos.size(); 557 for (auto &E : MST.AllEdges) { 558 if (E->Removed) 559 continue; 560 NumOfPGOEdge++; 561 if (!E->InMST) 562 NumOfPGOInstrument++; 563 } 564 565 if (CreateGlobalVar) 566 FuncNameVar = createPGOFuncNameVar(F, FuncName); 567 } 568 569 // Return the number of profile counters needed for the function. 570 unsigned getNumCounters() { 571 unsigned NumCounters = 0; 572 for (auto &E : this->MST.AllEdges) { 573 if (!E->InMST && !E->Removed) 574 NumCounters++; 575 } 576 return NumCounters + SIVisitor.getNumOfSelectInsts(); 577 } 578 }; 579 580 } // end anonymous namespace 581 582 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 583 // value of each BB in the CFG. The higher 32 bits record the number of edges. 584 template <class Edge, class BBInfo> 585 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 586 std::vector<char> Indexes; 587 JamCRC JC; 588 for (auto &BB : F) { 589 const Instruction *TI = BB.getTerminator(); 590 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 591 BasicBlock *Succ = TI->getSuccessor(I); 592 auto BI = findBBInfo(Succ); 593 if (BI == nullptr) 594 continue; 595 uint32_t Index = BI->Index; 596 for (int J = 0; J < 4; J++) 597 Indexes.push_back((char)(Index >> (J * 8))); 598 } 599 } 600 JC.update(Indexes); 601 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 602 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 603 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 604 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 605 << " CRC = " << JC.getCRC() 606 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 607 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 608 << ValueSites[IPVK_IndirectCallTarget].size() 609 << ", Hash = " << FunctionHash << "\n";); 610 } 611 612 // Check if we can safely rename this Comdat function. 613 static bool canRenameComdat( 614 Function &F, 615 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 616 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 617 return false; 618 619 // FIXME: Current only handle those Comdat groups that only containing one 620 // function and function aliases. 621 // (1) For a Comdat group containing multiple functions, we need to have a 622 // unique postfix based on the hashes for each function. There is a 623 // non-trivial code refactoring to do this efficiently. 624 // (2) Variables can not be renamed, so we can not rename Comdat function in a 625 // group including global vars. 626 Comdat *C = F.getComdat(); 627 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 628 if (dyn_cast<GlobalAlias>(CM.second)) 629 continue; 630 Function *FM = dyn_cast<Function>(CM.second); 631 if (FM != &F) 632 return false; 633 } 634 return true; 635 } 636 637 // Append the CFGHash to the Comdat function name. 638 template <class Edge, class BBInfo> 639 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 640 if (!canRenameComdat(F, ComdatMembers)) 641 return; 642 std::string OrigName = F.getName().str(); 643 std::string NewFuncName = 644 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 645 F.setName(Twine(NewFuncName)); 646 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 647 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 648 Comdat *NewComdat; 649 Module *M = F.getParent(); 650 // For AvailableExternallyLinkage functions, change the linkage to 651 // LinkOnceODR and put them into comdat. This is because after renaming, there 652 // is no backup external copy available for the function. 653 if (!F.hasComdat()) { 654 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 655 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 656 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 657 F.setComdat(NewComdat); 658 return; 659 } 660 661 // This function belongs to a single function Comdat group. 662 Comdat *OrigComdat = F.getComdat(); 663 std::string NewComdatName = 664 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 665 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 666 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 667 668 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 669 if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) { 670 // For aliases, change the name directly. 671 assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F); 672 std::string OrigGAName = GA->getName().str(); 673 GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash))); 674 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA); 675 continue; 676 } 677 // Must be a function. 678 Function *CF = dyn_cast<Function>(CM.second); 679 assert(CF); 680 CF->setComdat(NewComdat); 681 } 682 } 683 684 // Given a CFG E to be instrumented, find which BB to place the instrumented 685 // code. The function will split the critical edge if necessary. 686 template <class Edge, class BBInfo> 687 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 688 if (E->InMST || E->Removed) 689 return nullptr; 690 691 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 692 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 693 // For a fake edge, instrument the real BB. 694 if (SrcBB == nullptr) 695 return DestBB; 696 if (DestBB == nullptr) 697 return SrcBB; 698 699 // Instrument the SrcBB if it has a single successor, 700 // otherwise, the DestBB if this is not a critical edge. 701 Instruction *TI = SrcBB->getTerminator(); 702 if (TI->getNumSuccessors() <= 1) 703 return SrcBB; 704 if (!E->IsCritical) 705 return DestBB; 706 707 // For a critical edge, we have to split. Instrument the newly 708 // created BB. 709 NumOfPGOSplit++; 710 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 711 << " --> " << getBBInfo(DestBB).Index << "\n"); 712 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 713 BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum); 714 assert(InstrBB && "Critical edge is not split"); 715 716 E->Removed = true; 717 return InstrBB; 718 } 719 720 // Visit all edge and instrument the edges not in MST, and do value profiling. 721 // Critical edges will be split. 722 static void instrumentOneFunc( 723 Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, 724 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 725 // Split indirectbr critical edges here before computing the MST rather than 726 // later in getInstrBB() to avoid invalidating it. 727 SplitIndirectBrCriticalEdges(F, BPI, BFI); 728 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI, 729 BFI); 730 unsigned NumCounters = FuncInfo.getNumCounters(); 731 732 uint32_t I = 0; 733 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 734 for (auto &E : FuncInfo.MST.AllEdges) { 735 BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get()); 736 if (!InstrBB) 737 continue; 738 739 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 740 assert(Builder.GetInsertPoint() != InstrBB->end() && 741 "Cannot get the Instrumentation point"); 742 Builder.CreateCall( 743 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 744 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 745 Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), 746 Builder.getInt32(I++)}); 747 } 748 749 // Now instrument select instructions: 750 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 751 FuncInfo.FunctionHash); 752 assert(I == NumCounters); 753 754 if (DisableValueProfiling) 755 return; 756 757 unsigned NumIndirectCallSites = 0; 758 for (auto &I : FuncInfo.ValueSites[IPVK_IndirectCallTarget]) { 759 CallSite CS(I); 760 Value *Callee = CS.getCalledValue(); 761 LLVM_DEBUG(dbgs() << "Instrument one indirect call: CallSite Index = " 762 << NumIndirectCallSites << "\n"); 763 IRBuilder<> Builder(I); 764 assert(Builder.GetInsertPoint() != I->getParent()->end() && 765 "Cannot get the Instrumentation point"); 766 Builder.CreateCall( 767 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 768 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 769 Builder.getInt64(FuncInfo.FunctionHash), 770 Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()), 771 Builder.getInt32(IPVK_IndirectCallTarget), 772 Builder.getInt32(NumIndirectCallSites++)}); 773 } 774 NumOfPGOICall += NumIndirectCallSites; 775 776 // Now instrument memop intrinsic calls. 777 FuncInfo.MIVisitor.instrumentMemIntrinsics( 778 F, NumCounters, FuncInfo.FuncNameVar, FuncInfo.FunctionHash); 779 } 780 781 namespace { 782 783 // This class represents a CFG edge in profile use compilation. 784 struct PGOUseEdge : public PGOEdge { 785 bool CountValid = false; 786 uint64_t CountValue = 0; 787 788 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 789 : PGOEdge(Src, Dest, W) {} 790 791 // Set edge count value 792 void setEdgeCount(uint64_t Value) { 793 CountValue = Value; 794 CountValid = true; 795 } 796 797 // Return the information string for this object. 798 const std::string infoString() const { 799 if (!CountValid) 800 return PGOEdge::infoString(); 801 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 802 .str(); 803 } 804 }; 805 806 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 807 808 // This class stores the auxiliary information for each BB. 809 struct UseBBInfo : public BBInfo { 810 uint64_t CountValue = 0; 811 bool CountValid; 812 int32_t UnknownCountInEdge = 0; 813 int32_t UnknownCountOutEdge = 0; 814 DirectEdges InEdges; 815 DirectEdges OutEdges; 816 817 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 818 819 UseBBInfo(unsigned IX, uint64_t C) 820 : BBInfo(IX), CountValue(C), CountValid(true) {} 821 822 // Set the profile count value for this BB. 823 void setBBInfoCount(uint64_t Value) { 824 CountValue = Value; 825 CountValid = true; 826 } 827 828 // Return the information string of this object. 829 const std::string infoString() const { 830 if (!CountValid) 831 return BBInfo::infoString(); 832 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 833 } 834 }; 835 836 } // end anonymous namespace 837 838 // Sum up the count values for all the edges. 839 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 840 uint64_t Total = 0; 841 for (auto &E : Edges) { 842 if (E->Removed) 843 continue; 844 Total += E->CountValue; 845 } 846 return Total; 847 } 848 849 namespace { 850 851 class PGOUseFunc { 852 public: 853 PGOUseFunc(Function &Func, Module *Modu, 854 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 855 BranchProbabilityInfo *BPI = nullptr, 856 BlockFrequencyInfo *BFIin = nullptr) 857 : F(Func), M(Modu), BFI(BFIin), 858 FuncInfo(Func, ComdatMembers, false, BPI, BFIin), 859 FreqAttr(FFA_Normal) {} 860 861 // Read counts for the instrumented BB from profile. 862 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros); 863 864 // Populate the counts for all BBs. 865 void populateCounters(); 866 867 // Set the branch weights based on the count values. 868 void setBranchWeights(); 869 870 // Annotate the value profile call sites for all value kind. 871 void annotateValueSites(); 872 873 // Annotate the value profile call sites for one value kind. 874 void annotateValueSites(uint32_t Kind); 875 876 // Annotate the irreducible loop header weights. 877 void annotateIrrLoopHeaderWeights(); 878 879 // The hotness of the function from the profile count. 880 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 881 882 // Return the function hotness from the profile. 883 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 884 885 // Return the function hash. 886 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 887 888 // Return the profile record for this function; 889 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 890 891 // Return the auxiliary BB information. 892 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 893 return FuncInfo.getBBInfo(BB); 894 } 895 896 // Return the auxiliary BB information if available. 897 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 898 return FuncInfo.findBBInfo(BB); 899 } 900 901 Function &getFunc() const { return F; } 902 903 void dumpInfo(std::string Str = "") const { 904 FuncInfo.dumpInfo(Str); 905 } 906 907 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 908 private: 909 Function &F; 910 Module *M; 911 BlockFrequencyInfo *BFI; 912 913 // This member stores the shared information with class PGOGenFunc. 914 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 915 916 // The maximum count value in the profile. This is only used in PGO use 917 // compilation. 918 uint64_t ProgramMaxCount; 919 920 // Position of counter that remains to be read. 921 uint32_t CountPosition = 0; 922 923 // Total size of the profile count for this function. 924 uint32_t ProfileCountSize = 0; 925 926 // ProfileRecord for this function. 927 InstrProfRecord ProfileRecord; 928 929 // Function hotness info derived from profile. 930 FuncFreqAttr FreqAttr; 931 932 // Find the Instrumented BB and set the value. 933 void setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 934 935 // Set the edge counter value for the unknown edge -- there should be only 936 // one unknown edge. 937 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 938 939 // Return FuncName string; 940 const std::string getFuncName() const { return FuncInfo.FuncName; } 941 942 // Set the hot/cold inline hints based on the count values. 943 // FIXME: This function should be removed once the functionality in 944 // the inliner is implemented. 945 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 946 if (ProgramMaxCount == 0) 947 return; 948 // Threshold of the hot functions. 949 const BranchProbability HotFunctionThreshold(1, 100); 950 // Threshold of the cold functions. 951 const BranchProbability ColdFunctionThreshold(2, 10000); 952 if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount)) 953 FreqAttr = FFA_Hot; 954 else if (MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount)) 955 FreqAttr = FFA_Cold; 956 } 957 }; 958 959 } // end anonymous namespace 960 961 // Visit all the edges and assign the count value for the instrumented 962 // edges and the BB. 963 void PGOUseFunc::setInstrumentedCounts( 964 const std::vector<uint64_t> &CountFromProfile) { 965 assert(FuncInfo.getNumCounters() == CountFromProfile.size()); 966 // Use a worklist as we will update the vector during the iteration. 967 std::vector<PGOUseEdge *> WorkList; 968 for (auto &E : FuncInfo.MST.AllEdges) 969 WorkList.push_back(E.get()); 970 971 uint32_t I = 0; 972 for (auto &E : WorkList) { 973 BasicBlock *InstrBB = FuncInfo.getInstrBB(E); 974 if (!InstrBB) 975 continue; 976 uint64_t CountValue = CountFromProfile[I++]; 977 if (!E->Removed) { 978 getBBInfo(InstrBB).setBBInfoCount(CountValue); 979 E->setEdgeCount(CountValue); 980 continue; 981 } 982 983 // Need to add two new edges. 984 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 985 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 986 // Add new edge of SrcBB->InstrBB. 987 PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0); 988 NewEdge.setEdgeCount(CountValue); 989 // Add new edge of InstrBB->DestBB. 990 PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0); 991 NewEdge1.setEdgeCount(CountValue); 992 NewEdge1.InMST = true; 993 getBBInfo(InstrBB).setBBInfoCount(CountValue); 994 } 995 ProfileCountSize = CountFromProfile.size(); 996 CountPosition = I; 997 } 998 999 // Set the count value for the unknown edge. There should be one and only one 1000 // unknown edge in Edges vector. 1001 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1002 for (auto &E : Edges) { 1003 if (E->CountValid) 1004 continue; 1005 E->setEdgeCount(Value); 1006 1007 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1008 getBBInfo(E->DestBB).UnknownCountInEdge--; 1009 return; 1010 } 1011 llvm_unreachable("Cannot find the unknown count edge"); 1012 } 1013 1014 // Read the profile from ProfileFileName and assign the value to the 1015 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1016 // Return true if the profile are successfully read, and false on errors. 1017 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) { 1018 auto &Ctx = M->getContext(); 1019 Expected<InstrProfRecord> Result = 1020 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 1021 if (Error E = Result.takeError()) { 1022 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1023 auto Err = IPE.get(); 1024 bool SkipWarning = false; 1025 if (Err == instrprof_error::unknown_function) { 1026 NumOfPGOMissing++; 1027 SkipWarning = !PGOWarnMissing; 1028 } else if (Err == instrprof_error::hash_mismatch || 1029 Err == instrprof_error::malformed) { 1030 NumOfPGOMismatch++; 1031 SkipWarning = 1032 NoPGOWarnMismatch || 1033 (NoPGOWarnMismatchComdat && 1034 (F.hasComdat() || 1035 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1036 } 1037 1038 if (SkipWarning) 1039 return; 1040 1041 std::string Msg = IPE.message() + std::string(" ") + F.getName().str(); 1042 Ctx.diagnose( 1043 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1044 }); 1045 return false; 1046 } 1047 ProfileRecord = std::move(Result.get()); 1048 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1049 1050 NumOfPGOFunc++; 1051 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1052 uint64_t ValueSum = 0; 1053 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1054 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1055 ValueSum += CountFromProfile[I]; 1056 } 1057 AllZeros = (ValueSum == 0); 1058 1059 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1060 1061 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1062 getBBInfo(nullptr).UnknownCountInEdge = 2; 1063 1064 setInstrumentedCounts(CountFromProfile); 1065 ProgramMaxCount = PGOReader->getMaximumFunctionCount(); 1066 return true; 1067 } 1068 1069 // Populate the counters from instrumented BBs to all BBs. 1070 // In the end of this operation, all BBs should have a valid count value. 1071 void PGOUseFunc::populateCounters() { 1072 // First set up Count variable for all BBs. 1073 for (auto &E : FuncInfo.MST.AllEdges) { 1074 if (E->Removed) 1075 continue; 1076 1077 const BasicBlock *SrcBB = E->SrcBB; 1078 const BasicBlock *DestBB = E->DestBB; 1079 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 1080 UseBBInfo &DestInfo = getBBInfo(DestBB); 1081 SrcInfo.OutEdges.push_back(E.get()); 1082 DestInfo.InEdges.push_back(E.get()); 1083 SrcInfo.UnknownCountOutEdge++; 1084 DestInfo.UnknownCountInEdge++; 1085 1086 if (!E->CountValid) 1087 continue; 1088 DestInfo.UnknownCountInEdge--; 1089 SrcInfo.UnknownCountOutEdge--; 1090 } 1091 1092 bool Changes = true; 1093 unsigned NumPasses = 0; 1094 while (Changes) { 1095 NumPasses++; 1096 Changes = false; 1097 1098 // For efficient traversal, it's better to start from the end as most 1099 // of the instrumented edges are at the end. 1100 for (auto &BB : reverse(F)) { 1101 UseBBInfo *Count = findBBInfo(&BB); 1102 if (Count == nullptr) 1103 continue; 1104 if (!Count->CountValid) { 1105 if (Count->UnknownCountOutEdge == 0) { 1106 Count->CountValue = sumEdgeCount(Count->OutEdges); 1107 Count->CountValid = true; 1108 Changes = true; 1109 } else if (Count->UnknownCountInEdge == 0) { 1110 Count->CountValue = sumEdgeCount(Count->InEdges); 1111 Count->CountValid = true; 1112 Changes = true; 1113 } 1114 } 1115 if (Count->CountValid) { 1116 if (Count->UnknownCountOutEdge == 1) { 1117 uint64_t Total = 0; 1118 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1119 // If the one of the successor block can early terminate (no-return), 1120 // we can end up with situation where out edge sum count is larger as 1121 // the source BB's count is collected by a post-dominated block. 1122 if (Count->CountValue > OutSum) 1123 Total = Count->CountValue - OutSum; 1124 setEdgeCount(Count->OutEdges, Total); 1125 Changes = true; 1126 } 1127 if (Count->UnknownCountInEdge == 1) { 1128 uint64_t Total = 0; 1129 uint64_t InSum = sumEdgeCount(Count->InEdges); 1130 if (Count->CountValue > InSum) 1131 Total = Count->CountValue - InSum; 1132 setEdgeCount(Count->InEdges, Total); 1133 Changes = true; 1134 } 1135 } 1136 } 1137 } 1138 1139 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1140 #ifndef NDEBUG 1141 // Assert every BB has a valid counter. 1142 for (auto &BB : F) { 1143 auto BI = findBBInfo(&BB); 1144 if (BI == nullptr) 1145 continue; 1146 assert(BI->CountValid && "BB count is not valid"); 1147 } 1148 #endif 1149 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1150 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1151 uint64_t FuncMaxCount = FuncEntryCount; 1152 for (auto &BB : F) { 1153 auto BI = findBBInfo(&BB); 1154 if (BI == nullptr) 1155 continue; 1156 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1157 } 1158 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1159 1160 // Now annotate select instructions 1161 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1162 assert(CountPosition == ProfileCountSize); 1163 1164 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1165 } 1166 1167 // Assign the scaled count values to the BB with multiple out edges. 1168 void PGOUseFunc::setBranchWeights() { 1169 // Generate MD_prof metadata for every branch instruction. 1170 LLVM_DEBUG(dbgs() << "\nSetting branch weights.\n"); 1171 for (auto &BB : F) { 1172 Instruction *TI = BB.getTerminator(); 1173 if (TI->getNumSuccessors() < 2) 1174 continue; 1175 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1176 isa<IndirectBrInst>(TI))) 1177 continue; 1178 if (getBBInfo(&BB).CountValue == 0) 1179 continue; 1180 1181 // We have a non-zero Branch BB. 1182 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1183 unsigned Size = BBCountInfo.OutEdges.size(); 1184 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1185 uint64_t MaxCount = 0; 1186 for (unsigned s = 0; s < Size; s++) { 1187 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1188 const BasicBlock *SrcBB = E->SrcBB; 1189 const BasicBlock *DestBB = E->DestBB; 1190 if (DestBB == nullptr) 1191 continue; 1192 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1193 uint64_t EdgeCount = E->CountValue; 1194 if (EdgeCount > MaxCount) 1195 MaxCount = EdgeCount; 1196 EdgeCounts[SuccNum] = EdgeCount; 1197 } 1198 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1199 } 1200 } 1201 1202 static bool isIndirectBrTarget(BasicBlock *BB) { 1203 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 1204 if (isa<IndirectBrInst>((*PI)->getTerminator())) 1205 return true; 1206 } 1207 return false; 1208 } 1209 1210 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1211 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1212 // Find irr loop headers 1213 for (auto &BB : F) { 1214 // As a heuristic also annotate indrectbr targets as they have a high chance 1215 // to become an irreducible loop header after the indirectbr tail 1216 // duplication. 1217 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1218 Instruction *TI = BB.getTerminator(); 1219 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1220 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1221 } 1222 } 1223 } 1224 1225 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1226 Module *M = F.getParent(); 1227 IRBuilder<> Builder(&SI); 1228 Type *Int64Ty = Builder.getInt64Ty(); 1229 Type *I8PtrTy = Builder.getInt8PtrTy(); 1230 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1231 Builder.CreateCall( 1232 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1233 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1234 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1235 Builder.getInt32(*CurCtrIdx), Step}); 1236 ++(*CurCtrIdx); 1237 } 1238 1239 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1240 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1241 assert(*CurCtrIdx < CountFromProfile.size() && 1242 "Out of bound access of counters"); 1243 uint64_t SCounts[2]; 1244 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1245 ++(*CurCtrIdx); 1246 uint64_t TotalCount = 0; 1247 auto BI = UseFunc->findBBInfo(SI.getParent()); 1248 if (BI != nullptr) 1249 TotalCount = BI->CountValue; 1250 // False Count 1251 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1252 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1253 if (MaxCount) 1254 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1255 } 1256 1257 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1258 if (!PGOInstrSelect) 1259 return; 1260 // FIXME: do not handle this yet. 1261 if (SI.getCondition()->getType()->isVectorTy()) 1262 return; 1263 1264 switch (Mode) { 1265 case VM_counting: 1266 NSIs++; 1267 return; 1268 case VM_instrument: 1269 instrumentOneSelectInst(SI); 1270 return; 1271 case VM_annotate: 1272 annotateOneSelectInst(SI); 1273 return; 1274 } 1275 1276 llvm_unreachable("Unknown visiting mode"); 1277 } 1278 1279 void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) { 1280 Module *M = F.getParent(); 1281 IRBuilder<> Builder(&MI); 1282 Type *Int64Ty = Builder.getInt64Ty(); 1283 Type *I8PtrTy = Builder.getInt8PtrTy(); 1284 Value *Length = MI.getLength(); 1285 assert(!dyn_cast<ConstantInt>(Length)); 1286 Builder.CreateCall( 1287 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 1288 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1289 Builder.getInt64(FuncHash), Builder.CreateZExtOrTrunc(Length, Int64Ty), 1290 Builder.getInt32(IPVK_MemOPSize), Builder.getInt32(CurCtrId)}); 1291 ++CurCtrId; 1292 } 1293 1294 void MemIntrinsicVisitor::visitMemIntrinsic(MemIntrinsic &MI) { 1295 if (!PGOInstrMemOP) 1296 return; 1297 Value *Length = MI.getLength(); 1298 // Not instrument constant length calls. 1299 if (dyn_cast<ConstantInt>(Length)) 1300 return; 1301 1302 switch (Mode) { 1303 case VM_counting: 1304 NMemIs++; 1305 return; 1306 case VM_instrument: 1307 instrumentOneMemIntrinsic(MI); 1308 return; 1309 case VM_annotate: 1310 Candidates.push_back(&MI); 1311 return; 1312 } 1313 llvm_unreachable("Unknown visiting mode"); 1314 } 1315 1316 // Traverse all valuesites and annotate the instructions for all value kind. 1317 void PGOUseFunc::annotateValueSites() { 1318 if (DisableValueProfiling) 1319 return; 1320 1321 // Create the PGOFuncName meta data. 1322 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1323 1324 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1325 annotateValueSites(Kind); 1326 } 1327 1328 // Annotate the instructions for a specific value kind. 1329 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1330 unsigned ValueSiteIndex = 0; 1331 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1332 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1333 if (NumValueSites != ValueSites.size()) { 1334 auto &Ctx = M->getContext(); 1335 Ctx.diagnose(DiagnosticInfoPGOProfile( 1336 M->getName().data(), 1337 Twine("Inconsistent number of value sites for kind = ") + Twine(Kind) + 1338 " in " + F.getName().str(), 1339 DS_Warning)); 1340 return; 1341 } 1342 1343 for (auto &I : ValueSites) { 1344 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1345 << "): Index = " << ValueSiteIndex << " out of " 1346 << NumValueSites << "\n"); 1347 annotateValueSite(*M, *I, ProfileRecord, 1348 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1349 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1350 : MaxNumAnnotations); 1351 ValueSiteIndex++; 1352 } 1353 } 1354 1355 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime 1356 // aware this is an ir_level profile so it can set the version flag. 1357 static void createIRLevelProfileFlagVariable(Module &M) { 1358 Type *IntTy64 = Type::getInt64Ty(M.getContext()); 1359 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); 1360 auto IRLevelVersionVariable = new GlobalVariable( 1361 M, IntTy64, true, GlobalVariable::ExternalLinkage, 1362 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), 1363 INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); 1364 IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); 1365 Triple TT(M.getTargetTriple()); 1366 if (!TT.supportsCOMDAT()) 1367 IRLevelVersionVariable->setLinkage(GlobalValue::WeakAnyLinkage); 1368 else 1369 IRLevelVersionVariable->setComdat(M.getOrInsertComdat( 1370 StringRef(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)))); 1371 } 1372 1373 // Collect the set of members for each Comdat in module M and store 1374 // in ComdatMembers. 1375 static void collectComdatMembers( 1376 Module &M, 1377 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1378 if (!DoComdatRenaming) 1379 return; 1380 for (Function &F : M) 1381 if (Comdat *C = F.getComdat()) 1382 ComdatMembers.insert(std::make_pair(C, &F)); 1383 for (GlobalVariable &GV : M.globals()) 1384 if (Comdat *C = GV.getComdat()) 1385 ComdatMembers.insert(std::make_pair(C, &GV)); 1386 for (GlobalAlias &GA : M.aliases()) 1387 if (Comdat *C = GA.getComdat()) 1388 ComdatMembers.insert(std::make_pair(C, &GA)); 1389 } 1390 1391 static bool InstrumentAllFunctions( 1392 Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1393 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) { 1394 createIRLevelProfileFlagVariable(M); 1395 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1396 collectComdatMembers(M, ComdatMembers); 1397 1398 for (auto &F : M) { 1399 if (F.isDeclaration()) 1400 continue; 1401 auto *BPI = LookupBPI(F); 1402 auto *BFI = LookupBFI(F); 1403 instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers); 1404 } 1405 return true; 1406 } 1407 1408 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { 1409 if (skipModule(M)) 1410 return false; 1411 1412 auto LookupBPI = [this](Function &F) { 1413 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1414 }; 1415 auto LookupBFI = [this](Function &F) { 1416 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1417 }; 1418 return InstrumentAllFunctions(M, LookupBPI, LookupBFI); 1419 } 1420 1421 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1422 ModuleAnalysisManager &AM) { 1423 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1424 auto LookupBPI = [&FAM](Function &F) { 1425 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1426 }; 1427 1428 auto LookupBFI = [&FAM](Function &F) { 1429 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1430 }; 1431 1432 if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI)) 1433 return PreservedAnalyses::all(); 1434 1435 return PreservedAnalyses::none(); 1436 } 1437 1438 static bool annotateAllFunctions( 1439 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1440 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1441 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) { 1442 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1443 auto &Ctx = M.getContext(); 1444 // Read the counter array from file. 1445 auto ReaderOrErr = 1446 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 1447 if (Error E = ReaderOrErr.takeError()) { 1448 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1449 Ctx.diagnose( 1450 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1451 }); 1452 return false; 1453 } 1454 1455 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1456 std::move(ReaderOrErr.get()); 1457 if (!PGOReader) { 1458 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1459 StringRef("Cannot get PGOReader"))); 1460 return false; 1461 } 1462 // TODO: might need to change the warning once the clang option is finalized. 1463 if (!PGOReader->isIRLevelProfile()) { 1464 Ctx.diagnose(DiagnosticInfoPGOProfile( 1465 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1466 return false; 1467 } 1468 1469 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1470 collectComdatMembers(M, ComdatMembers); 1471 std::vector<Function *> HotFunctions; 1472 std::vector<Function *> ColdFunctions; 1473 for (auto &F : M) { 1474 if (F.isDeclaration()) 1475 continue; 1476 auto *BPI = LookupBPI(F); 1477 auto *BFI = LookupBFI(F); 1478 // Split indirectbr critical edges here before computing the MST rather than 1479 // later in getInstrBB() to avoid invalidating it. 1480 SplitIndirectBrCriticalEdges(F, BPI, BFI); 1481 PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI); 1482 bool AllZeros = false; 1483 if (!Func.readCounters(PGOReader.get(), AllZeros)) 1484 continue; 1485 if (AllZeros) { 1486 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 1487 if (Func.getProgramMaxCount() != 0) 1488 ColdFunctions.push_back(&F); 1489 continue; 1490 } 1491 Func.populateCounters(); 1492 Func.setBranchWeights(); 1493 Func.annotateValueSites(); 1494 Func.annotateIrrLoopHeaderWeights(); 1495 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1496 if (FreqAttr == PGOUseFunc::FFA_Cold) 1497 ColdFunctions.push_back(&F); 1498 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1499 HotFunctions.push_back(&F); 1500 if (PGOViewCounts != PGOVCT_None && 1501 (ViewBlockFreqFuncName.empty() || 1502 F.getName().equals(ViewBlockFreqFuncName))) { 1503 LoopInfo LI{DominatorTree(F)}; 1504 std::unique_ptr<BranchProbabilityInfo> NewBPI = 1505 llvm::make_unique<BranchProbabilityInfo>(F, LI); 1506 std::unique_ptr<BlockFrequencyInfo> NewBFI = 1507 llvm::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 1508 if (PGOViewCounts == PGOVCT_Graph) 1509 NewBFI->view(); 1510 else if (PGOViewCounts == PGOVCT_Text) { 1511 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 1512 NewBFI->print(dbgs()); 1513 } 1514 } 1515 if (PGOViewRawCounts != PGOVCT_None && 1516 (ViewBlockFreqFuncName.empty() || 1517 F.getName().equals(ViewBlockFreqFuncName))) { 1518 if (PGOViewRawCounts == PGOVCT_Graph) 1519 if (ViewBlockFreqFuncName.empty()) 1520 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1521 else 1522 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1523 else if (PGOViewRawCounts == PGOVCT_Text) { 1524 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 1525 Func.dumpInfo(); 1526 } 1527 } 1528 } 1529 M.setProfileSummary(PGOReader->getSummary().getMD(M.getContext())); 1530 // Set function hotness attribute from the profile. 1531 // We have to apply these attributes at the end because their presence 1532 // can affect the BranchProbabilityInfo of any callers, resulting in an 1533 // inconsistent MST between prof-gen and prof-use. 1534 for (auto &F : HotFunctions) { 1535 F->addFnAttr(Attribute::InlineHint); 1536 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 1537 << "\n"); 1538 } 1539 for (auto &F : ColdFunctions) { 1540 F->addFnAttr(Attribute::Cold); 1541 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 1542 << "\n"); 1543 } 1544 return true; 1545 } 1546 1547 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 1548 std::string RemappingFilename) 1549 : ProfileFileName(std::move(Filename)), 1550 ProfileRemappingFileName(std::move(RemappingFilename)) { 1551 if (!PGOTestProfileFile.empty()) 1552 ProfileFileName = PGOTestProfileFile; 1553 if (!PGOTestProfileRemappingFile.empty()) 1554 ProfileRemappingFileName = PGOTestProfileRemappingFile; 1555 } 1556 1557 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 1558 ModuleAnalysisManager &AM) { 1559 1560 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1561 auto LookupBPI = [&FAM](Function &F) { 1562 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1563 }; 1564 1565 auto LookupBFI = [&FAM](Function &F) { 1566 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1567 }; 1568 1569 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 1570 LookupBPI, LookupBFI)) 1571 return PreservedAnalyses::all(); 1572 1573 return PreservedAnalyses::none(); 1574 } 1575 1576 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { 1577 if (skipModule(M)) 1578 return false; 1579 1580 auto LookupBPI = [this](Function &F) { 1581 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1582 }; 1583 auto LookupBFI = [this](Function &F) { 1584 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1585 }; 1586 1587 return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI); 1588 } 1589 1590 static std::string getSimpleNodeName(const BasicBlock *Node) { 1591 if (!Node->getName().empty()) 1592 return Node->getName(); 1593 1594 std::string SimpleNodeName; 1595 raw_string_ostream OS(SimpleNodeName); 1596 Node->printAsOperand(OS, false); 1597 return OS.str(); 1598 } 1599 1600 void llvm::setProfMetadata(Module *M, Instruction *TI, 1601 ArrayRef<uint64_t> EdgeCounts, 1602 uint64_t MaxCount) { 1603 MDBuilder MDB(M->getContext()); 1604 assert(MaxCount > 0 && "Bad max count"); 1605 uint64_t Scale = calculateCountScale(MaxCount); 1606 SmallVector<unsigned, 4> Weights; 1607 for (const auto &ECI : EdgeCounts) 1608 Weights.push_back(scaleBranchCount(ECI, Scale)); 1609 1610 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 1611 : Weights) { 1612 dbgs() << W << " "; 1613 } dbgs() << "\n";); 1614 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1615 if (EmitBranchProbability) { 1616 std::string BrCondStr = getBranchCondString(TI); 1617 if (BrCondStr.empty()) 1618 return; 1619 1620 uint64_t WSum = 1621 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 1622 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 1623 uint64_t TotalCount = 1624 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 1625 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 1626 Scale = calculateCountScale(WSum); 1627 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 1628 scaleBranchCount(WSum, Scale)); 1629 std::string BranchProbStr; 1630 raw_string_ostream OS(BranchProbStr); 1631 OS << BP; 1632 OS << " (total count : " << TotalCount << ")"; 1633 OS.flush(); 1634 Function *F = TI->getParent()->getParent(); 1635 OptimizationRemarkEmitter ORE(F); 1636 ORE.emit([&]() { 1637 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 1638 << BrCondStr << " is true with probability : " << BranchProbStr; 1639 }); 1640 } 1641 } 1642 1643 namespace llvm { 1644 1645 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 1646 MDBuilder MDB(M->getContext()); 1647 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 1648 MDB.createIrrLoopHeaderWeight(Count)); 1649 } 1650 1651 template <> struct GraphTraits<PGOUseFunc *> { 1652 using NodeRef = const BasicBlock *; 1653 using ChildIteratorType = succ_const_iterator; 1654 using nodes_iterator = pointer_iterator<Function::const_iterator>; 1655 1656 static NodeRef getEntryNode(const PGOUseFunc *G) { 1657 return &G->getFunc().front(); 1658 } 1659 1660 static ChildIteratorType child_begin(const NodeRef N) { 1661 return succ_begin(N); 1662 } 1663 1664 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 1665 1666 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 1667 return nodes_iterator(G->getFunc().begin()); 1668 } 1669 1670 static nodes_iterator nodes_end(const PGOUseFunc *G) { 1671 return nodes_iterator(G->getFunc().end()); 1672 } 1673 }; 1674 1675 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 1676 explicit DOTGraphTraits(bool isSimple = false) 1677 : DefaultDOTGraphTraits(isSimple) {} 1678 1679 static std::string getGraphName(const PGOUseFunc *G) { 1680 return G->getFunc().getName(); 1681 } 1682 1683 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 1684 std::string Result; 1685 raw_string_ostream OS(Result); 1686 1687 OS << getSimpleNodeName(Node) << ":\\l"; 1688 UseBBInfo *BI = Graph->findBBInfo(Node); 1689 OS << "Count : "; 1690 if (BI && BI->CountValid) 1691 OS << BI->CountValue << "\\l"; 1692 else 1693 OS << "Unknown\\l"; 1694 1695 if (!PGOInstrSelect) 1696 return Result; 1697 1698 for (auto BI = Node->begin(); BI != Node->end(); ++BI) { 1699 auto *I = &*BI; 1700 if (!isa<SelectInst>(I)) 1701 continue; 1702 // Display scaled counts for SELECT instruction: 1703 OS << "SELECT : { T = "; 1704 uint64_t TC, FC; 1705 bool HasProf = I->extractProfMetadata(TC, FC); 1706 if (!HasProf) 1707 OS << "Unknown, F = Unknown }\\l"; 1708 else 1709 OS << TC << ", F = " << FC << " }\\l"; 1710 } 1711 return Result; 1712 } 1713 }; 1714 1715 } // end namespace llvm 1716