1 //===-- PGOInstrumentation.cpp - MST-based PGO Instrumentation ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements PGO instrumentation using a minimum spanning tree based 11 // on the following paper: 12 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 13 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 14 // Issue 3, pp 313-322 15 // The idea of the algorithm based on the fact that for each node (except for 16 // the entry and exit), the sum of incoming edge counts equals the sum of 17 // outgoing edge counts. The count of edge on spanning tree can be derived from 18 // those edges not on the spanning tree. Knuth proves this method instruments 19 // the minimum number of edges. 20 // 21 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 22 // edges have higher frequencies (more likely to execute). The idea is to 23 // instrument those less frequently executed edges to reduce the runtime 24 // overhead of instrumented binaries. 25 // 26 // This file contains two passes: 27 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 28 // count profile, and generates the instrumentation for indirect call 29 // profiling. 30 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 31 // annotates the branch weights. It also reads the indirect call value 32 // profiling records and annotate the indirect call instructions. 33 // 34 // To get the precise counter information, These two passes need to invoke at 35 // the same compilation point (so they see the same IR). For pass 36 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 37 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 38 // the profile is opened in module level and passed to each PGOUseFunc instance. 39 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 40 // in class FuncPGOInstrumentation. 41 // 42 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 43 // BBInfo contains auxiliary information for each BB. These two classes are used 44 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 45 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 46 // used in populating profile counters. 47 // The MST implementation is in Class CFGMST (CFGMST.h). 48 // 49 //===----------------------------------------------------------------------===// 50 51 #include "llvm/Transforms/PGOInstrumentation.h" 52 #include "CFGMST.h" 53 #include "llvm/ADT/STLExtras.h" 54 #include "llvm/ADT/SmallVector.h" 55 #include "llvm/ADT/Statistic.h" 56 #include "llvm/ADT/Triple.h" 57 #include "llvm/Analysis/BlockFrequencyInfo.h" 58 #include "llvm/Analysis/BranchProbabilityInfo.h" 59 #include "llvm/Analysis/CFG.h" 60 #include "llvm/Analysis/IndirectCallSiteVisitor.h" 61 #include "llvm/Analysis/LoopInfo.h" 62 #include "llvm/IR/CallSite.h" 63 #include "llvm/IR/DiagnosticInfo.h" 64 #include "llvm/IR/Dominators.h" 65 #include "llvm/IR/GlobalValue.h" 66 #include "llvm/IR/IRBuilder.h" 67 #include "llvm/IR/InstIterator.h" 68 #include "llvm/IR/Instructions.h" 69 #include "llvm/IR/IntrinsicInst.h" 70 #include "llvm/IR/MDBuilder.h" 71 #include "llvm/IR/Module.h" 72 #include "llvm/Pass.h" 73 #include "llvm/ProfileData/InstrProfReader.h" 74 #include "llvm/ProfileData/ProfileCommon.h" 75 #include "llvm/Support/BranchProbability.h" 76 #include "llvm/Support/DOTGraphTraits.h" 77 #include "llvm/Support/Debug.h" 78 #include "llvm/Support/GraphWriter.h" 79 #include "llvm/Support/JamCRC.h" 80 #include "llvm/Transforms/Instrumentation.h" 81 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 82 #include <algorithm> 83 #include <string> 84 #include <unordered_map> 85 #include <utility> 86 #include <vector> 87 88 using namespace llvm; 89 90 #define DEBUG_TYPE "pgo-instrumentation" 91 92 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 93 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 94 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 95 STATISTIC(NumOfPGOEdge, "Number of edges."); 96 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 97 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 98 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 99 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 100 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 101 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 102 103 // Command line option to specify the file to read profile from. This is 104 // mainly used for testing. 105 static cl::opt<std::string> 106 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 107 cl::value_desc("filename"), 108 cl::desc("Specify the path of profile data file. This is" 109 "mainly for test purpose.")); 110 111 // Command line option to disable value profiling. The default is false: 112 // i.e. value profiling is enabled by default. This is for debug purpose. 113 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 114 cl::Hidden, 115 cl::desc("Disable Value Profiling")); 116 117 // Command line option to set the maximum number of VP annotations to write to 118 // the metadata for a single indirect call callsite. 119 static cl::opt<unsigned> MaxNumAnnotations( 120 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 121 cl::desc("Max number of annotations for a single indirect " 122 "call callsite")); 123 124 // Command line option to set the maximum number of value annotations 125 // to write to the metadata for a single memop intrinsic. 126 static cl::opt<unsigned> MaxNumMemOPAnnotations( 127 "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, 128 cl::desc("Max number of preicise value annotations for a single memop" 129 "intrinsic")); 130 131 // Command line option to control appending FunctionHash to the name of a COMDAT 132 // function. This is to avoid the hash mismatch caused by the preinliner. 133 static cl::opt<bool> DoComdatRenaming( 134 "do-comdat-renaming", cl::init(false), cl::Hidden, 135 cl::desc("Append function hash to the name of COMDAT function to avoid " 136 "function hash mismatch due to the preinliner")); 137 138 // Command line option to enable/disable the warning about missing profile 139 // information. 140 static cl::opt<bool> 141 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 142 cl::desc("Use this option to turn on/off " 143 "warnings about missing profile data for " 144 "functions.")); 145 146 // Command line option to enable/disable the warning about a hash mismatch in 147 // the profile data. 148 static cl::opt<bool> 149 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 150 cl::desc("Use this option to turn off/on " 151 "warnings about profile cfg mismatch.")); 152 153 // Command line option to enable/disable the warning about a hash mismatch in 154 // the profile data for Comdat functions, which often turns out to be false 155 // positive due to the pre-instrumentation inline. 156 static cl::opt<bool> 157 NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), 158 cl::Hidden, 159 cl::desc("The option is used to turn on/off " 160 "warnings about hash mismatch for comdat " 161 "functions.")); 162 163 // Command line option to enable/disable select instruction instrumentation. 164 static cl::opt<bool> 165 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 166 cl::desc("Use this option to turn on/off SELECT " 167 "instruction instrumentation. ")); 168 169 // Command line option to turn on CFG dot dump of raw profile counts 170 static cl::opt<bool> 171 PGOViewRawCounts("pgo-view-raw-counts", cl::init(false), cl::Hidden, 172 cl::desc("A boolean option to show CFG dag " 173 "with raw profile counts from " 174 "profile data. See also option " 175 "-pgo-view-counts. To limit graph " 176 "display to only one function, use " 177 "filtering option -view-bfi-func-name.")); 178 179 // Command line option to enable/disable memop intrinsic call.size profiling. 180 static cl::opt<bool> 181 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 182 cl::desc("Use this option to turn on/off " 183 "memory intrinsic size profiling.")); 184 185 // Emit branch probability as optimization remarks. 186 static cl::opt<bool> 187 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 188 cl::desc("When this option is on, the annotated " 189 "branch probability will be emitted as " 190 " optimization remarks: -Rpass-analysis=" 191 "pgo-instr-use")); 192 193 // Command line option to turn on CFG dot dump after profile annotation. 194 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 195 extern cl::opt<bool> PGOViewCounts; 196 197 // Command line option to specify the name of the function for CFG dump 198 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 199 extern cl::opt<std::string> ViewBlockFreqFuncName; 200 201 namespace { 202 203 // Return a string describing the branch condition that can be 204 // used in static branch probability heuristics: 205 std::string getBranchCondString(Instruction *TI) { 206 BranchInst *BI = dyn_cast<BranchInst>(TI); 207 if (!BI || !BI->isConditional()) 208 return std::string(); 209 210 Value *Cond = BI->getCondition(); 211 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 212 if (!CI) 213 return std::string(); 214 215 std::string result; 216 raw_string_ostream OS(result); 217 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 218 CI->getOperand(0)->getType()->print(OS, true); 219 220 Value *RHS = CI->getOperand(1); 221 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 222 if (CV) { 223 if (CV->isZero()) 224 OS << "_Zero"; 225 else if (CV->isOne()) 226 OS << "_One"; 227 else if (CV->isAllOnesValue()) 228 OS << "_MinusOne"; 229 else 230 OS << "_Const"; 231 } 232 OS.flush(); 233 return result; 234 } 235 236 /// The select instruction visitor plays three roles specified 237 /// by the mode. In \c VM_counting mode, it simply counts the number of 238 /// select instructions. In \c VM_instrument mode, it inserts code to count 239 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 240 /// it reads the profile data and annotate the select instruction with metadata. 241 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 242 class PGOUseFunc; 243 244 /// Instruction Visitor class to visit select instructions. 245 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 246 Function &F; 247 unsigned NSIs = 0; // Number of select instructions instrumented. 248 VisitMode Mode = VM_counting; // Visiting mode. 249 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 250 unsigned TotalNumCtrs = 0; // Total number of counters 251 GlobalVariable *FuncNameVar = nullptr; 252 uint64_t FuncHash = 0; 253 PGOUseFunc *UseFunc = nullptr; 254 255 SelectInstVisitor(Function &Func) : F(Func) {} 256 257 void countSelects(Function &Func) { 258 NSIs = 0; 259 Mode = VM_counting; 260 visit(Func); 261 } 262 // Visit the IR stream and instrument all select instructions. \p 263 // Ind is a pointer to the counter index variable; \p TotalNC 264 // is the total number of counters; \p FNV is the pointer to the 265 // PGO function name var; \p FHash is the function hash. 266 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 267 GlobalVariable *FNV, uint64_t FHash) { 268 Mode = VM_instrument; 269 CurCtrIdx = Ind; 270 TotalNumCtrs = TotalNC; 271 FuncHash = FHash; 272 FuncNameVar = FNV; 273 visit(Func); 274 } 275 276 // Visit the IR stream and annotate all select instructions. 277 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 278 Mode = VM_annotate; 279 UseFunc = UF; 280 CurCtrIdx = Ind; 281 visit(Func); 282 } 283 284 void instrumentOneSelectInst(SelectInst &SI); 285 void annotateOneSelectInst(SelectInst &SI); 286 // Visit \p SI instruction and perform tasks according to visit mode. 287 void visitSelectInst(SelectInst &SI); 288 // Return the number of select instructions. This needs be called after 289 // countSelects(). 290 unsigned getNumOfSelectInsts() const { return NSIs; } 291 }; 292 293 /// Instruction Visitor class to visit memory intrinsic calls. 294 struct MemIntrinsicVisitor : public InstVisitor<MemIntrinsicVisitor> { 295 Function &F; 296 unsigned NMemIs = 0; // Number of memIntrinsics instrumented. 297 VisitMode Mode = VM_counting; // Visiting mode. 298 unsigned CurCtrId = 0; // Current counter index. 299 unsigned TotalNumCtrs = 0; // Total number of counters 300 GlobalVariable *FuncNameVar = nullptr; 301 uint64_t FuncHash = 0; 302 PGOUseFunc *UseFunc = nullptr; 303 std::vector<Instruction *> Candidates; 304 305 MemIntrinsicVisitor(Function &Func) : F(Func) {} 306 307 void countMemIntrinsics(Function &Func) { 308 NMemIs = 0; 309 Mode = VM_counting; 310 visit(Func); 311 } 312 313 void instrumentMemIntrinsics(Function &Func, unsigned TotalNC, 314 GlobalVariable *FNV, uint64_t FHash) { 315 Mode = VM_instrument; 316 TotalNumCtrs = TotalNC; 317 FuncHash = FHash; 318 FuncNameVar = FNV; 319 visit(Func); 320 } 321 322 std::vector<Instruction *> findMemIntrinsics(Function &Func) { 323 Candidates.clear(); 324 Mode = VM_annotate; 325 visit(Func); 326 return Candidates; 327 } 328 329 // Visit the IR stream and annotate all mem intrinsic call instructions. 330 void instrumentOneMemIntrinsic(MemIntrinsic &MI); 331 // Visit \p MI instruction and perform tasks according to visit mode. 332 void visitMemIntrinsic(MemIntrinsic &SI); 333 unsigned getNumOfMemIntrinsics() const { return NMemIs; } 334 }; 335 336 class PGOInstrumentationGenLegacyPass : public ModulePass { 337 public: 338 static char ID; 339 340 PGOInstrumentationGenLegacyPass() : ModulePass(ID) { 341 initializePGOInstrumentationGenLegacyPassPass( 342 *PassRegistry::getPassRegistry()); 343 } 344 345 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } 346 347 private: 348 bool runOnModule(Module &M) override; 349 350 void getAnalysisUsage(AnalysisUsage &AU) const override { 351 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 352 } 353 }; 354 355 class PGOInstrumentationUseLegacyPass : public ModulePass { 356 public: 357 static char ID; 358 359 // Provide the profile filename as the parameter. 360 PGOInstrumentationUseLegacyPass(std::string Filename = "") 361 : ModulePass(ID), ProfileFileName(std::move(Filename)) { 362 if (!PGOTestProfileFile.empty()) 363 ProfileFileName = PGOTestProfileFile; 364 initializePGOInstrumentationUseLegacyPassPass( 365 *PassRegistry::getPassRegistry()); 366 } 367 368 StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } 369 370 private: 371 std::string ProfileFileName; 372 373 bool runOnModule(Module &M) override; 374 void getAnalysisUsage(AnalysisUsage &AU) const override { 375 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 376 } 377 }; 378 379 } // end anonymous namespace 380 381 char PGOInstrumentationGenLegacyPass::ID = 0; 382 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 383 "PGO instrumentation.", false, false) 384 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 385 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 386 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 387 "PGO instrumentation.", false, false) 388 389 ModulePass *llvm::createPGOInstrumentationGenLegacyPass() { 390 return new PGOInstrumentationGenLegacyPass(); 391 } 392 393 char PGOInstrumentationUseLegacyPass::ID = 0; 394 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 395 "Read PGO instrumentation profile.", false, false) 396 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 397 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 398 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 399 "Read PGO instrumentation profile.", false, false) 400 401 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename) { 402 return new PGOInstrumentationUseLegacyPass(Filename.str()); 403 } 404 405 namespace { 406 /// \brief An MST based instrumentation for PGO 407 /// 408 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 409 /// in the function level. 410 struct PGOEdge { 411 // This class implements the CFG edges. Note the CFG can be a multi-graph. 412 // So there might be multiple edges with same SrcBB and DestBB. 413 const BasicBlock *SrcBB; 414 const BasicBlock *DestBB; 415 uint64_t Weight; 416 bool InMST; 417 bool Removed; 418 bool IsCritical; 419 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1) 420 : SrcBB(Src), DestBB(Dest), Weight(W), InMST(false), Removed(false), 421 IsCritical(false) {} 422 // Return the information string of an edge. 423 const std::string infoString() const { 424 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 425 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 426 } 427 }; 428 429 // This class stores the auxiliary information for each BB. 430 struct BBInfo { 431 BBInfo *Group; 432 uint32_t Index; 433 uint32_t Rank; 434 435 BBInfo(unsigned IX) : Group(this), Index(IX), Rank(0) {} 436 437 // Return the information string of this object. 438 const std::string infoString() const { 439 return (Twine("Index=") + Twine(Index)).str(); 440 } 441 }; 442 443 // This class implements the CFG edges. Note the CFG can be a multi-graph. 444 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 445 private: 446 Function &F; 447 void computeCFGHash(); 448 void renameComdatFunction(); 449 // A map that stores the Comdat group in function F. 450 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 451 452 public: 453 std::vector<std::vector<Instruction *>> ValueSites; 454 SelectInstVisitor SIVisitor; 455 MemIntrinsicVisitor MIVisitor; 456 std::string FuncName; 457 GlobalVariable *FuncNameVar; 458 // CFG hash value for this function. 459 uint64_t FunctionHash; 460 461 // The Minimum Spanning Tree of function CFG. 462 CFGMST<Edge, BBInfo> MST; 463 464 // Give an edge, find the BB that will be instrumented. 465 // Return nullptr if there is no BB to be instrumented. 466 BasicBlock *getInstrBB(Edge *E); 467 468 // Return the auxiliary BB information. 469 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 470 471 // Return the auxiliary BB information if available. 472 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 473 474 // Dump edges and BB information. 475 void dumpInfo(std::string Str = "") const { 476 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 477 Twine(FunctionHash) + "\t" + Str); 478 } 479 480 FuncPGOInstrumentation( 481 Function &Func, 482 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 483 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 484 BlockFrequencyInfo *BFI = nullptr) 485 : F(Func), ComdatMembers(ComdatMembers), ValueSites(IPVK_Last + 1), 486 SIVisitor(Func), MIVisitor(Func), FunctionHash(0), MST(F, BPI, BFI) { 487 488 // This should be done before CFG hash computation. 489 SIVisitor.countSelects(Func); 490 MIVisitor.countMemIntrinsics(Func); 491 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 492 NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); 493 ValueSites[IPVK_IndirectCallTarget] = findIndirectCallSites(Func); 494 ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func); 495 496 FuncName = getPGOFuncName(F); 497 computeCFGHash(); 498 if (ComdatMembers.size()) 499 renameComdatFunction(); 500 DEBUG(dumpInfo("after CFGMST")); 501 502 NumOfPGOBB += MST.BBInfos.size(); 503 for (auto &E : MST.AllEdges) { 504 if (E->Removed) 505 continue; 506 NumOfPGOEdge++; 507 if (!E->InMST) 508 NumOfPGOInstrument++; 509 } 510 511 if (CreateGlobalVar) 512 FuncNameVar = createPGOFuncNameVar(F, FuncName); 513 } 514 515 // Return the number of profile counters needed for the function. 516 unsigned getNumCounters() { 517 unsigned NumCounters = 0; 518 for (auto &E : this->MST.AllEdges) { 519 if (!E->InMST && !E->Removed) 520 NumCounters++; 521 } 522 return NumCounters + SIVisitor.getNumOfSelectInsts(); 523 } 524 }; 525 526 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 527 // value of each BB in the CFG. The higher 32 bits record the number of edges. 528 template <class Edge, class BBInfo> 529 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 530 std::vector<char> Indexes; 531 JamCRC JC; 532 for (auto &BB : F) { 533 const TerminatorInst *TI = BB.getTerminator(); 534 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 535 BasicBlock *Succ = TI->getSuccessor(I); 536 auto BI = findBBInfo(Succ); 537 if (BI == nullptr) 538 continue; 539 uint32_t Index = BI->Index; 540 for (int J = 0; J < 4; J++) 541 Indexes.push_back((char)(Index >> (J * 8))); 542 } 543 } 544 JC.update(Indexes); 545 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 546 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 547 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 548 } 549 550 // Check if we can safely rename this Comdat function. 551 static bool canRenameComdat( 552 Function &F, 553 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 554 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 555 return false; 556 557 // FIXME: Current only handle those Comdat groups that only containing one 558 // function and function aliases. 559 // (1) For a Comdat group containing multiple functions, we need to have a 560 // unique postfix based on the hashes for each function. There is a 561 // non-trivial code refactoring to do this efficiently. 562 // (2) Variables can not be renamed, so we can not rename Comdat function in a 563 // group including global vars. 564 Comdat *C = F.getComdat(); 565 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 566 if (dyn_cast<GlobalAlias>(CM.second)) 567 continue; 568 Function *FM = dyn_cast<Function>(CM.second); 569 if (FM != &F) 570 return false; 571 } 572 return true; 573 } 574 575 // Append the CFGHash to the Comdat function name. 576 template <class Edge, class BBInfo> 577 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 578 if (!canRenameComdat(F, ComdatMembers)) 579 return; 580 std::string OrigName = F.getName().str(); 581 std::string NewFuncName = 582 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 583 F.setName(Twine(NewFuncName)); 584 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 585 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 586 Comdat *NewComdat; 587 Module *M = F.getParent(); 588 // For AvailableExternallyLinkage functions, change the linkage to 589 // LinkOnceODR and put them into comdat. This is because after renaming, there 590 // is no backup external copy available for the function. 591 if (!F.hasComdat()) { 592 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 593 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 594 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 595 F.setComdat(NewComdat); 596 return; 597 } 598 599 // This function belongs to a single function Comdat group. 600 Comdat *OrigComdat = F.getComdat(); 601 std::string NewComdatName = 602 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 603 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 604 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 605 606 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 607 if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) { 608 // For aliases, change the name directly. 609 assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F); 610 std::string OrigGAName = GA->getName().str(); 611 GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash))); 612 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA); 613 continue; 614 } 615 // Must be a function. 616 Function *CF = dyn_cast<Function>(CM.second); 617 assert(CF); 618 CF->setComdat(NewComdat); 619 } 620 } 621 622 // Given a CFG E to be instrumented, find which BB to place the instrumented 623 // code. The function will split the critical edge if necessary. 624 template <class Edge, class BBInfo> 625 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 626 if (E->InMST || E->Removed) 627 return nullptr; 628 629 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 630 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 631 // For a fake edge, instrument the real BB. 632 if (SrcBB == nullptr) 633 return DestBB; 634 if (DestBB == nullptr) 635 return SrcBB; 636 637 // Instrument the SrcBB if it has a single successor, 638 // otherwise, the DestBB if this is not a critical edge. 639 TerminatorInst *TI = SrcBB->getTerminator(); 640 if (TI->getNumSuccessors() <= 1) 641 return SrcBB; 642 if (!E->IsCritical) 643 return DestBB; 644 645 // For a critical edge, we have to split. Instrument the newly 646 // created BB. 647 NumOfPGOSplit++; 648 DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index << " --> " 649 << getBBInfo(DestBB).Index << "\n"); 650 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 651 BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum); 652 assert(InstrBB && "Critical edge is not split"); 653 654 E->Removed = true; 655 return InstrBB; 656 } 657 658 // Visit all edge and instrument the edges not in MST, and do value profiling. 659 // Critical edges will be split. 660 static void instrumentOneFunc( 661 Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, 662 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 663 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI, 664 BFI); 665 unsigned NumCounters = FuncInfo.getNumCounters(); 666 667 uint32_t I = 0; 668 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 669 for (auto &E : FuncInfo.MST.AllEdges) { 670 BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get()); 671 if (!InstrBB) 672 continue; 673 674 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 675 assert(Builder.GetInsertPoint() != InstrBB->end() && 676 "Cannot get the Instrumentation point"); 677 Builder.CreateCall( 678 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 679 {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 680 Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), 681 Builder.getInt32(I++)}); 682 } 683 684 // Now instrument select instructions: 685 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 686 FuncInfo.FunctionHash); 687 assert(I == NumCounters); 688 689 if (DisableValueProfiling) 690 return; 691 692 unsigned NumIndirectCallSites = 0; 693 for (auto &I : FuncInfo.ValueSites[IPVK_IndirectCallTarget]) { 694 CallSite CS(I); 695 Value *Callee = CS.getCalledValue(); 696 DEBUG(dbgs() << "Instrument one indirect call: CallSite Index = " 697 << NumIndirectCallSites << "\n"); 698 IRBuilder<> Builder(I); 699 assert(Builder.GetInsertPoint() != I->getParent()->end() && 700 "Cannot get the Instrumentation point"); 701 Builder.CreateCall( 702 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 703 {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 704 Builder.getInt64(FuncInfo.FunctionHash), 705 Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()), 706 Builder.getInt32(IPVK_IndirectCallTarget), 707 Builder.getInt32(NumIndirectCallSites++)}); 708 } 709 NumOfPGOICall += NumIndirectCallSites; 710 711 // Now instrument memop intrinsic calls. 712 FuncInfo.MIVisitor.instrumentMemIntrinsics( 713 F, NumCounters, FuncInfo.FuncNameVar, FuncInfo.FunctionHash); 714 } 715 716 // This class represents a CFG edge in profile use compilation. 717 struct PGOUseEdge : public PGOEdge { 718 bool CountValid; 719 uint64_t CountValue; 720 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1) 721 : PGOEdge(Src, Dest, W), CountValid(false), CountValue(0) {} 722 723 // Set edge count value 724 void setEdgeCount(uint64_t Value) { 725 CountValue = Value; 726 CountValid = true; 727 } 728 729 // Return the information string for this object. 730 const std::string infoString() const { 731 if (!CountValid) 732 return PGOEdge::infoString(); 733 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 734 .str(); 735 } 736 }; 737 738 typedef SmallVector<PGOUseEdge *, 2> DirectEdges; 739 740 // This class stores the auxiliary information for each BB. 741 struct UseBBInfo : public BBInfo { 742 uint64_t CountValue; 743 bool CountValid; 744 int32_t UnknownCountInEdge; 745 int32_t UnknownCountOutEdge; 746 DirectEdges InEdges; 747 DirectEdges OutEdges; 748 UseBBInfo(unsigned IX) 749 : BBInfo(IX), CountValue(0), CountValid(false), UnknownCountInEdge(0), 750 UnknownCountOutEdge(0) {} 751 UseBBInfo(unsigned IX, uint64_t C) 752 : BBInfo(IX), CountValue(C), CountValid(true), UnknownCountInEdge(0), 753 UnknownCountOutEdge(0) {} 754 755 // Set the profile count value for this BB. 756 void setBBInfoCount(uint64_t Value) { 757 CountValue = Value; 758 CountValid = true; 759 } 760 761 // Return the information string of this object. 762 const std::string infoString() const { 763 if (!CountValid) 764 return BBInfo::infoString(); 765 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 766 } 767 }; 768 769 // Sum up the count values for all the edges. 770 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 771 uint64_t Total = 0; 772 for (auto &E : Edges) { 773 if (E->Removed) 774 continue; 775 Total += E->CountValue; 776 } 777 return Total; 778 } 779 780 class PGOUseFunc { 781 public: 782 PGOUseFunc(Function &Func, Module *Modu, 783 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 784 BranchProbabilityInfo *BPI = nullptr, 785 BlockFrequencyInfo *BFI = nullptr) 786 : F(Func), M(Modu), FuncInfo(Func, ComdatMembers, false, BPI, BFI), 787 CountPosition(0), ProfileCountSize(0), FreqAttr(FFA_Normal) {} 788 789 // Read counts for the instrumented BB from profile. 790 bool readCounters(IndexedInstrProfReader *PGOReader); 791 792 // Populate the counts for all BBs. 793 void populateCounters(); 794 795 // Set the branch weights based on the count values. 796 void setBranchWeights(); 797 798 // Annotate the value profile call sites all all value kind. 799 void annotateValueSites(); 800 801 // Annotate the value profile call sites for one value kind. 802 void annotateValueSites(uint32_t Kind); 803 804 // The hotness of the function from the profile count. 805 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 806 807 // Return the function hotness from the profile. 808 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 809 810 // Return the function hash. 811 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 812 // Return the profile record for this function; 813 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 814 815 // Return the auxiliary BB information. 816 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 817 return FuncInfo.getBBInfo(BB); 818 } 819 820 // Return the auxiliary BB information if available. 821 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 822 return FuncInfo.findBBInfo(BB); 823 } 824 825 Function &getFunc() const { return F; } 826 827 private: 828 Function &F; 829 Module *M; 830 // This member stores the shared information with class PGOGenFunc. 831 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 832 833 // The maximum count value in the profile. This is only used in PGO use 834 // compilation. 835 uint64_t ProgramMaxCount; 836 837 // Position of counter that remains to be read. 838 uint32_t CountPosition; 839 840 // Total size of the profile count for this function. 841 uint32_t ProfileCountSize; 842 843 // ProfileRecord for this function. 844 InstrProfRecord ProfileRecord; 845 846 // Function hotness info derived from profile. 847 FuncFreqAttr FreqAttr; 848 849 // Find the Instrumented BB and set the value. 850 void setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 851 852 // Set the edge counter value for the unknown edge -- there should be only 853 // one unknown edge. 854 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 855 856 // Return FuncName string; 857 const std::string getFuncName() const { return FuncInfo.FuncName; } 858 859 // Set the hot/cold inline hints based on the count values. 860 // FIXME: This function should be removed once the functionality in 861 // the inliner is implemented. 862 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 863 if (ProgramMaxCount == 0) 864 return; 865 // Threshold of the hot functions. 866 const BranchProbability HotFunctionThreshold(1, 100); 867 // Threshold of the cold functions. 868 const BranchProbability ColdFunctionThreshold(2, 10000); 869 if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount)) 870 FreqAttr = FFA_Hot; 871 else if (MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount)) 872 FreqAttr = FFA_Cold; 873 } 874 }; 875 876 // Visit all the edges and assign the count value for the instrumented 877 // edges and the BB. 878 void PGOUseFunc::setInstrumentedCounts( 879 const std::vector<uint64_t> &CountFromProfile) { 880 881 assert(FuncInfo.getNumCounters() == CountFromProfile.size()); 882 // Use a worklist as we will update the vector during the iteration. 883 std::vector<PGOUseEdge *> WorkList; 884 for (auto &E : FuncInfo.MST.AllEdges) 885 WorkList.push_back(E.get()); 886 887 uint32_t I = 0; 888 for (auto &E : WorkList) { 889 BasicBlock *InstrBB = FuncInfo.getInstrBB(E); 890 if (!InstrBB) 891 continue; 892 uint64_t CountValue = CountFromProfile[I++]; 893 if (!E->Removed) { 894 getBBInfo(InstrBB).setBBInfoCount(CountValue); 895 E->setEdgeCount(CountValue); 896 continue; 897 } 898 899 // Need to add two new edges. 900 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 901 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 902 // Add new edge of SrcBB->InstrBB. 903 PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0); 904 NewEdge.setEdgeCount(CountValue); 905 // Add new edge of InstrBB->DestBB. 906 PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0); 907 NewEdge1.setEdgeCount(CountValue); 908 NewEdge1.InMST = true; 909 getBBInfo(InstrBB).setBBInfoCount(CountValue); 910 } 911 ProfileCountSize = CountFromProfile.size(); 912 CountPosition = I; 913 } 914 915 // Set the count value for the unknown edge. There should be one and only one 916 // unknown edge in Edges vector. 917 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 918 for (auto &E : Edges) { 919 if (E->CountValid) 920 continue; 921 E->setEdgeCount(Value); 922 923 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 924 getBBInfo(E->DestBB).UnknownCountInEdge--; 925 return; 926 } 927 llvm_unreachable("Cannot find the unknown count edge"); 928 } 929 930 // Read the profile from ProfileFileName and assign the value to the 931 // instrumented BB and the edges. This function also updates ProgramMaxCount. 932 // Return true if the profile are successfully read, and false on errors. 933 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader) { 934 auto &Ctx = M->getContext(); 935 Expected<InstrProfRecord> Result = 936 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 937 if (Error E = Result.takeError()) { 938 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 939 auto Err = IPE.get(); 940 bool SkipWarning = false; 941 if (Err == instrprof_error::unknown_function) { 942 NumOfPGOMissing++; 943 SkipWarning = !PGOWarnMissing; 944 } else if (Err == instrprof_error::hash_mismatch || 945 Err == instrprof_error::malformed) { 946 NumOfPGOMismatch++; 947 SkipWarning = 948 NoPGOWarnMismatch || 949 (NoPGOWarnMismatchComdat && 950 (F.hasComdat() || 951 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 952 } 953 954 if (SkipWarning) 955 return; 956 957 std::string Msg = IPE.message() + std::string(" ") + F.getName().str(); 958 Ctx.diagnose( 959 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 960 }); 961 return false; 962 } 963 ProfileRecord = std::move(Result.get()); 964 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 965 966 NumOfPGOFunc++; 967 DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 968 uint64_t ValueSum = 0; 969 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 970 DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 971 ValueSum += CountFromProfile[I]; 972 } 973 974 DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 975 976 getBBInfo(nullptr).UnknownCountOutEdge = 2; 977 getBBInfo(nullptr).UnknownCountInEdge = 2; 978 979 setInstrumentedCounts(CountFromProfile); 980 ProgramMaxCount = PGOReader->getMaximumFunctionCount(); 981 return true; 982 } 983 984 // Populate the counters from instrumented BBs to all BBs. 985 // In the end of this operation, all BBs should have a valid count value. 986 void PGOUseFunc::populateCounters() { 987 // First set up Count variable for all BBs. 988 for (auto &E : FuncInfo.MST.AllEdges) { 989 if (E->Removed) 990 continue; 991 992 const BasicBlock *SrcBB = E->SrcBB; 993 const BasicBlock *DestBB = E->DestBB; 994 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 995 UseBBInfo &DestInfo = getBBInfo(DestBB); 996 SrcInfo.OutEdges.push_back(E.get()); 997 DestInfo.InEdges.push_back(E.get()); 998 SrcInfo.UnknownCountOutEdge++; 999 DestInfo.UnknownCountInEdge++; 1000 1001 if (!E->CountValid) 1002 continue; 1003 DestInfo.UnknownCountInEdge--; 1004 SrcInfo.UnknownCountOutEdge--; 1005 } 1006 1007 bool Changes = true; 1008 unsigned NumPasses = 0; 1009 while (Changes) { 1010 NumPasses++; 1011 Changes = false; 1012 1013 // For efficient traversal, it's better to start from the end as most 1014 // of the instrumented edges are at the end. 1015 for (auto &BB : reverse(F)) { 1016 UseBBInfo *Count = findBBInfo(&BB); 1017 if (Count == nullptr) 1018 continue; 1019 if (!Count->CountValid) { 1020 if (Count->UnknownCountOutEdge == 0) { 1021 Count->CountValue = sumEdgeCount(Count->OutEdges); 1022 Count->CountValid = true; 1023 Changes = true; 1024 } else if (Count->UnknownCountInEdge == 0) { 1025 Count->CountValue = sumEdgeCount(Count->InEdges); 1026 Count->CountValid = true; 1027 Changes = true; 1028 } 1029 } 1030 if (Count->CountValid) { 1031 if (Count->UnknownCountOutEdge == 1) { 1032 uint64_t Total = 0; 1033 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1034 // If the one of the successor block can early terminate (no-return), 1035 // we can end up with situation where out edge sum count is larger as 1036 // the source BB's count is collected by a post-dominated block. 1037 if (Count->CountValue > OutSum) 1038 Total = Count->CountValue - OutSum; 1039 setEdgeCount(Count->OutEdges, Total); 1040 Changes = true; 1041 } 1042 if (Count->UnknownCountInEdge == 1) { 1043 uint64_t Total = 0; 1044 uint64_t InSum = sumEdgeCount(Count->InEdges); 1045 if (Count->CountValue > InSum) 1046 Total = Count->CountValue - InSum; 1047 setEdgeCount(Count->InEdges, Total); 1048 Changes = true; 1049 } 1050 } 1051 } 1052 } 1053 1054 DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1055 #ifndef NDEBUG 1056 // Assert every BB has a valid counter. 1057 for (auto &BB : F) { 1058 auto BI = findBBInfo(&BB); 1059 if (BI == nullptr) 1060 continue; 1061 assert(BI->CountValid && "BB count is not valid"); 1062 } 1063 #endif 1064 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1065 F.setEntryCount(FuncEntryCount); 1066 uint64_t FuncMaxCount = FuncEntryCount; 1067 for (auto &BB : F) { 1068 auto BI = findBBInfo(&BB); 1069 if (BI == nullptr) 1070 continue; 1071 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1072 } 1073 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1074 1075 // Now annotate select instructions 1076 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1077 assert(CountPosition == ProfileCountSize); 1078 1079 DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1080 } 1081 1082 // Assign the scaled count values to the BB with multiple out edges. 1083 void PGOUseFunc::setBranchWeights() { 1084 // Generate MD_prof metadata for every branch instruction. 1085 DEBUG(dbgs() << "\nSetting branch weights.\n"); 1086 for (auto &BB : F) { 1087 TerminatorInst *TI = BB.getTerminator(); 1088 if (TI->getNumSuccessors() < 2) 1089 continue; 1090 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) 1091 continue; 1092 if (getBBInfo(&BB).CountValue == 0) 1093 continue; 1094 1095 // We have a non-zero Branch BB. 1096 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1097 unsigned Size = BBCountInfo.OutEdges.size(); 1098 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1099 uint64_t MaxCount = 0; 1100 for (unsigned s = 0; s < Size; s++) { 1101 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1102 const BasicBlock *SrcBB = E->SrcBB; 1103 const BasicBlock *DestBB = E->DestBB; 1104 if (DestBB == nullptr) 1105 continue; 1106 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1107 uint64_t EdgeCount = E->CountValue; 1108 if (EdgeCount > MaxCount) 1109 MaxCount = EdgeCount; 1110 EdgeCounts[SuccNum] = EdgeCount; 1111 } 1112 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1113 } 1114 } 1115 1116 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1117 Module *M = F.getParent(); 1118 IRBuilder<> Builder(&SI); 1119 Type *Int64Ty = Builder.getInt64Ty(); 1120 Type *I8PtrTy = Builder.getInt8PtrTy(); 1121 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1122 Builder.CreateCall( 1123 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1124 {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1125 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1126 Builder.getInt32(*CurCtrIdx), Step}); 1127 ++(*CurCtrIdx); 1128 } 1129 1130 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1131 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1132 assert(*CurCtrIdx < CountFromProfile.size() && 1133 "Out of bound access of counters"); 1134 uint64_t SCounts[2]; 1135 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1136 ++(*CurCtrIdx); 1137 uint64_t TotalCount = 0; 1138 auto BI = UseFunc->findBBInfo(SI.getParent()); 1139 if (BI != nullptr) 1140 TotalCount = BI->CountValue; 1141 // False Count 1142 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1143 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1144 if (MaxCount) 1145 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1146 } 1147 1148 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1149 if (!PGOInstrSelect) 1150 return; 1151 // FIXME: do not handle this yet. 1152 if (SI.getCondition()->getType()->isVectorTy()) 1153 return; 1154 1155 switch (Mode) { 1156 case VM_counting: 1157 NSIs++; 1158 return; 1159 case VM_instrument: 1160 instrumentOneSelectInst(SI); 1161 return; 1162 case VM_annotate: 1163 annotateOneSelectInst(SI); 1164 return; 1165 } 1166 1167 llvm_unreachable("Unknown visiting mode"); 1168 } 1169 1170 void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) { 1171 Module *M = F.getParent(); 1172 IRBuilder<> Builder(&MI); 1173 Type *Int64Ty = Builder.getInt64Ty(); 1174 Type *I8PtrTy = Builder.getInt8PtrTy(); 1175 Value *Length = MI.getLength(); 1176 assert(!dyn_cast<ConstantInt>(Length)); 1177 Builder.CreateCall( 1178 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 1179 {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1180 Builder.getInt64(FuncHash), Builder.CreateZExtOrTrunc(Length, Int64Ty), 1181 Builder.getInt32(IPVK_MemOPSize), Builder.getInt32(CurCtrId)}); 1182 ++CurCtrId; 1183 } 1184 1185 void MemIntrinsicVisitor::visitMemIntrinsic(MemIntrinsic &MI) { 1186 if (!PGOInstrMemOP) 1187 return; 1188 Value *Length = MI.getLength(); 1189 // Not instrument constant length calls. 1190 if (dyn_cast<ConstantInt>(Length)) 1191 return; 1192 1193 switch (Mode) { 1194 case VM_counting: 1195 NMemIs++; 1196 return; 1197 case VM_instrument: 1198 instrumentOneMemIntrinsic(MI); 1199 return; 1200 case VM_annotate: 1201 Candidates.push_back(&MI); 1202 return; 1203 } 1204 llvm_unreachable("Unknown visiting mode"); 1205 } 1206 1207 // Traverse all valuesites and annotate the instructions for all value kind. 1208 void PGOUseFunc::annotateValueSites() { 1209 if (DisableValueProfiling) 1210 return; 1211 1212 // Create the PGOFuncName meta data. 1213 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1214 1215 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1216 annotateValueSites(Kind); 1217 } 1218 1219 // Annotate the instructions for a specific value kind. 1220 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1221 unsigned ValueSiteIndex = 0; 1222 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1223 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1224 if (NumValueSites != ValueSites.size()) { 1225 auto &Ctx = M->getContext(); 1226 Ctx.diagnose(DiagnosticInfoPGOProfile( 1227 M->getName().data(), 1228 Twine("Inconsistent number of value sites for kind = ") + Twine(Kind) + 1229 " in " + F.getName().str(), 1230 DS_Warning)); 1231 return; 1232 } 1233 1234 for (auto &I : ValueSites) { 1235 DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1236 << "): Index = " << ValueSiteIndex << " out of " 1237 << NumValueSites << "\n"); 1238 annotateValueSite(*M, *I, ProfileRecord, 1239 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1240 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1241 : MaxNumAnnotations); 1242 ValueSiteIndex++; 1243 } 1244 } 1245 } // end anonymous namespace 1246 1247 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime 1248 // aware this is an ir_level profile so it can set the version flag. 1249 static void createIRLevelProfileFlagVariable(Module &M) { 1250 Type *IntTy64 = Type::getInt64Ty(M.getContext()); 1251 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); 1252 auto IRLevelVersionVariable = new GlobalVariable( 1253 M, IntTy64, true, GlobalVariable::ExternalLinkage, 1254 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), 1255 INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); 1256 IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); 1257 Triple TT(M.getTargetTriple()); 1258 if (!TT.supportsCOMDAT()) 1259 IRLevelVersionVariable->setLinkage(GlobalValue::WeakAnyLinkage); 1260 else 1261 IRLevelVersionVariable->setComdat(M.getOrInsertComdat( 1262 StringRef(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)))); 1263 } 1264 1265 // Collect the set of members for each Comdat in module M and store 1266 // in ComdatMembers. 1267 static void collectComdatMembers( 1268 Module &M, 1269 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1270 if (!DoComdatRenaming) 1271 return; 1272 for (Function &F : M) 1273 if (Comdat *C = F.getComdat()) 1274 ComdatMembers.insert(std::make_pair(C, &F)); 1275 for (GlobalVariable &GV : M.globals()) 1276 if (Comdat *C = GV.getComdat()) 1277 ComdatMembers.insert(std::make_pair(C, &GV)); 1278 for (GlobalAlias &GA : M.aliases()) 1279 if (Comdat *C = GA.getComdat()) 1280 ComdatMembers.insert(std::make_pair(C, &GA)); 1281 } 1282 1283 static bool InstrumentAllFunctions( 1284 Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1285 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) { 1286 createIRLevelProfileFlagVariable(M); 1287 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1288 collectComdatMembers(M, ComdatMembers); 1289 1290 for (auto &F : M) { 1291 if (F.isDeclaration()) 1292 continue; 1293 auto *BPI = LookupBPI(F); 1294 auto *BFI = LookupBFI(F); 1295 instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers); 1296 } 1297 return true; 1298 } 1299 1300 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { 1301 if (skipModule(M)) 1302 return false; 1303 1304 auto LookupBPI = [this](Function &F) { 1305 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1306 }; 1307 auto LookupBFI = [this](Function &F) { 1308 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1309 }; 1310 return InstrumentAllFunctions(M, LookupBPI, LookupBFI); 1311 } 1312 1313 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1314 ModuleAnalysisManager &AM) { 1315 1316 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1317 auto LookupBPI = [&FAM](Function &F) { 1318 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1319 }; 1320 1321 auto LookupBFI = [&FAM](Function &F) { 1322 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1323 }; 1324 1325 if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI)) 1326 return PreservedAnalyses::all(); 1327 1328 return PreservedAnalyses::none(); 1329 } 1330 1331 static bool annotateAllFunctions( 1332 Module &M, StringRef ProfileFileName, 1333 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1334 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) { 1335 DEBUG(dbgs() << "Read in profile counters: "); 1336 auto &Ctx = M.getContext(); 1337 // Read the counter array from file. 1338 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName); 1339 if (Error E = ReaderOrErr.takeError()) { 1340 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1341 Ctx.diagnose( 1342 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1343 }); 1344 return false; 1345 } 1346 1347 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1348 std::move(ReaderOrErr.get()); 1349 if (!PGOReader) { 1350 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1351 StringRef("Cannot get PGOReader"))); 1352 return false; 1353 } 1354 // TODO: might need to change the warning once the clang option is finalized. 1355 if (!PGOReader->isIRLevelProfile()) { 1356 Ctx.diagnose(DiagnosticInfoPGOProfile( 1357 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1358 return false; 1359 } 1360 1361 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1362 collectComdatMembers(M, ComdatMembers); 1363 std::vector<Function *> HotFunctions; 1364 std::vector<Function *> ColdFunctions; 1365 for (auto &F : M) { 1366 if (F.isDeclaration()) 1367 continue; 1368 auto *BPI = LookupBPI(F); 1369 auto *BFI = LookupBFI(F); 1370 PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI); 1371 if (!Func.readCounters(PGOReader.get())) 1372 continue; 1373 Func.populateCounters(); 1374 Func.setBranchWeights(); 1375 Func.annotateValueSites(); 1376 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1377 if (FreqAttr == PGOUseFunc::FFA_Cold) 1378 ColdFunctions.push_back(&F); 1379 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1380 HotFunctions.push_back(&F); 1381 if (PGOViewCounts && (ViewBlockFreqFuncName.empty() || 1382 F.getName().equals(ViewBlockFreqFuncName))) { 1383 LoopInfo LI{DominatorTree(F)}; 1384 std::unique_ptr<BranchProbabilityInfo> NewBPI = 1385 llvm::make_unique<BranchProbabilityInfo>(F, LI); 1386 std::unique_ptr<BlockFrequencyInfo> NewBFI = 1387 llvm::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 1388 1389 NewBFI->view(); 1390 } 1391 if (PGOViewRawCounts && (ViewBlockFreqFuncName.empty() || 1392 F.getName().equals(ViewBlockFreqFuncName))) { 1393 if (ViewBlockFreqFuncName.empty()) 1394 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1395 else 1396 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1397 } 1398 } 1399 M.setProfileSummary(PGOReader->getSummary().getMD(M.getContext())); 1400 // Set function hotness attribute from the profile. 1401 // We have to apply these attributes at the end because their presence 1402 // can affect the BranchProbabilityInfo of any callers, resulting in an 1403 // inconsistent MST between prof-gen and prof-use. 1404 for (auto &F : HotFunctions) { 1405 F->addFnAttr(llvm::Attribute::InlineHint); 1406 DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 1407 << "\n"); 1408 } 1409 for (auto &F : ColdFunctions) { 1410 F->addFnAttr(llvm::Attribute::Cold); 1411 DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() << "\n"); 1412 } 1413 return true; 1414 } 1415 1416 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename) 1417 : ProfileFileName(std::move(Filename)) { 1418 if (!PGOTestProfileFile.empty()) 1419 ProfileFileName = PGOTestProfileFile; 1420 } 1421 1422 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 1423 ModuleAnalysisManager &AM) { 1424 1425 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1426 auto LookupBPI = [&FAM](Function &F) { 1427 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1428 }; 1429 1430 auto LookupBFI = [&FAM](Function &F) { 1431 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1432 }; 1433 1434 if (!annotateAllFunctions(M, ProfileFileName, LookupBPI, LookupBFI)) 1435 return PreservedAnalyses::all(); 1436 1437 return PreservedAnalyses::none(); 1438 } 1439 1440 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { 1441 if (skipModule(M)) 1442 return false; 1443 1444 auto LookupBPI = [this](Function &F) { 1445 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1446 }; 1447 auto LookupBFI = [this](Function &F) { 1448 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1449 }; 1450 1451 return annotateAllFunctions(M, ProfileFileName, LookupBPI, LookupBFI); 1452 } 1453 1454 namespace llvm { 1455 void setProfMetadata(Module *M, Instruction *TI, ArrayRef<uint64_t> EdgeCounts, 1456 uint64_t MaxCount) { 1457 MDBuilder MDB(M->getContext()); 1458 assert(MaxCount > 0 && "Bad max count"); 1459 uint64_t Scale = calculateCountScale(MaxCount); 1460 SmallVector<unsigned, 4> Weights; 1461 for (const auto &ECI : EdgeCounts) 1462 Weights.push_back(scaleBranchCount(ECI, Scale)); 1463 1464 DEBUG(dbgs() << "Weight is: "; 1465 for (const auto &W : Weights) { dbgs() << W << " "; } 1466 dbgs() << "\n";); 1467 TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1468 if (EmitBranchProbability) { 1469 std::string BrCondStr = getBranchCondString(TI); 1470 if (BrCondStr.empty()) 1471 return; 1472 1473 unsigned WSum = 1474 std::accumulate(Weights.begin(), Weights.end(), 0, 1475 [](unsigned w1, unsigned w2) { return w1 + w2; }); 1476 uint64_t TotalCount = 1477 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), 0, 1478 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 1479 BranchProbability BP(Weights[0], WSum); 1480 std::string BranchProbStr; 1481 raw_string_ostream OS(BranchProbStr); 1482 OS << BP; 1483 OS << " (total count : " << TotalCount << ")"; 1484 OS.flush(); 1485 Function *F = TI->getParent()->getParent(); 1486 emitOptimizationRemarkAnalysis( 1487 F->getContext(), "pgo-use-annot", *F, TI->getDebugLoc(), 1488 Twine(BrCondStr) + 1489 " is true with probability : " + Twine(BranchProbStr)); 1490 } 1491 } 1492 1493 template <> struct GraphTraits<PGOUseFunc *> { 1494 typedef const BasicBlock *NodeRef; 1495 typedef succ_const_iterator ChildIteratorType; 1496 typedef pointer_iterator<Function::const_iterator> nodes_iterator; 1497 1498 static NodeRef getEntryNode(const PGOUseFunc *G) { 1499 return &G->getFunc().front(); 1500 } 1501 static ChildIteratorType child_begin(const NodeRef N) { 1502 return succ_begin(N); 1503 } 1504 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 1505 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 1506 return nodes_iterator(G->getFunc().begin()); 1507 } 1508 static nodes_iterator nodes_end(const PGOUseFunc *G) { 1509 return nodes_iterator(G->getFunc().end()); 1510 } 1511 }; 1512 1513 static std::string getSimpleNodeName(const BasicBlock *Node) { 1514 if (!Node->getName().empty()) 1515 return Node->getName(); 1516 1517 std::string SimpleNodeName; 1518 raw_string_ostream OS(SimpleNodeName); 1519 Node->printAsOperand(OS, false); 1520 return OS.str(); 1521 } 1522 1523 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 1524 explicit DOTGraphTraits(bool isSimple = false) 1525 : DefaultDOTGraphTraits(isSimple) {} 1526 1527 static std::string getGraphName(const PGOUseFunc *G) { 1528 return G->getFunc().getName(); 1529 } 1530 1531 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 1532 std::string Result; 1533 raw_string_ostream OS(Result); 1534 1535 OS << getSimpleNodeName(Node) << ":\\l"; 1536 UseBBInfo *BI = Graph->findBBInfo(Node); 1537 OS << "Count : "; 1538 if (BI && BI->CountValid) 1539 OS << BI->CountValue << "\\l"; 1540 else 1541 OS << "Unknown\\l"; 1542 1543 if (!PGOInstrSelect) 1544 return Result; 1545 1546 for (auto BI = Node->begin(); BI != Node->end(); ++BI) { 1547 auto *I = &*BI; 1548 if (!isa<SelectInst>(I)) 1549 continue; 1550 // Display scaled counts for SELECT instruction: 1551 OS << "SELECT : { T = "; 1552 uint64_t TC, FC; 1553 bool HasProf = I->extractProfMetadata(TC, FC); 1554 if (!HasProf) 1555 OS << "Unknown, F = Unknown }\\l"; 1556 else 1557 OS << TC << ", F = " << FC << " }\\l"; 1558 } 1559 return Result; 1560 } 1561 }; 1562 } // namespace llvm 1563