1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "CFGMST.h" 52 #include "ValueProfileCollector.h" 53 #include "llvm/ADT/APInt.h" 54 #include "llvm/ADT/ArrayRef.h" 55 #include "llvm/ADT/MapVector.h" 56 #include "llvm/ADT/STLExtras.h" 57 #include "llvm/ADT/SmallVector.h" 58 #include "llvm/ADT/Statistic.h" 59 #include "llvm/ADT/StringRef.h" 60 #include "llvm/ADT/Triple.h" 61 #include "llvm/ADT/Twine.h" 62 #include "llvm/ADT/iterator.h" 63 #include "llvm/ADT/iterator_range.h" 64 #include "llvm/Analysis/BlockFrequencyInfo.h" 65 #include "llvm/Analysis/BranchProbabilityInfo.h" 66 #include "llvm/Analysis/CFG.h" 67 #include "llvm/Analysis/EHPersonalities.h" 68 #include "llvm/Analysis/LoopInfo.h" 69 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 70 #include "llvm/Analysis/ProfileSummaryInfo.h" 71 #include "llvm/IR/Attributes.h" 72 #include "llvm/IR/BasicBlock.h" 73 #include "llvm/IR/CFG.h" 74 #include "llvm/IR/Comdat.h" 75 #include "llvm/IR/Constant.h" 76 #include "llvm/IR/Constants.h" 77 #include "llvm/IR/DiagnosticInfo.h" 78 #include "llvm/IR/Dominators.h" 79 #include "llvm/IR/Function.h" 80 #include "llvm/IR/GlobalAlias.h" 81 #include "llvm/IR/GlobalValue.h" 82 #include "llvm/IR/GlobalVariable.h" 83 #include "llvm/IR/IRBuilder.h" 84 #include "llvm/IR/InstVisitor.h" 85 #include "llvm/IR/InstrTypes.h" 86 #include "llvm/IR/Instruction.h" 87 #include "llvm/IR/Instructions.h" 88 #include "llvm/IR/IntrinsicInst.h" 89 #include "llvm/IR/Intrinsics.h" 90 #include "llvm/IR/LLVMContext.h" 91 #include "llvm/IR/MDBuilder.h" 92 #include "llvm/IR/Module.h" 93 #include "llvm/IR/PassManager.h" 94 #include "llvm/IR/ProfileSummary.h" 95 #include "llvm/IR/Type.h" 96 #include "llvm/IR/Value.h" 97 #include "llvm/InitializePasses.h" 98 #include "llvm/Pass.h" 99 #include "llvm/ProfileData/InstrProf.h" 100 #include "llvm/ProfileData/InstrProfReader.h" 101 #include "llvm/Support/BranchProbability.h" 102 #include "llvm/Support/CRC.h" 103 #include "llvm/Support/Casting.h" 104 #include "llvm/Support/CommandLine.h" 105 #include "llvm/Support/DOTGraphTraits.h" 106 #include "llvm/Support/Debug.h" 107 #include "llvm/Support/Error.h" 108 #include "llvm/Support/ErrorHandling.h" 109 #include "llvm/Support/GraphWriter.h" 110 #include "llvm/Support/raw_ostream.h" 111 #include "llvm/Transforms/Instrumentation.h" 112 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 113 #include "llvm/Transforms/Utils/ModuleUtils.h" 114 #include <algorithm> 115 #include <cassert> 116 #include <cstdint> 117 #include <memory> 118 #include <numeric> 119 #include <string> 120 #include <unordered_map> 121 #include <utility> 122 #include <vector> 123 124 using namespace llvm; 125 using ProfileCount = Function::ProfileCount; 126 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 127 128 #define DEBUG_TYPE "pgo-instrumentation" 129 130 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 131 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 132 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 133 STATISTIC(NumOfPGOEdge, "Number of edges."); 134 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 135 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 136 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 137 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 138 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 139 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 140 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 141 STATISTIC(NumOfCSPGOSelectInsts, 142 "Number of select instruction instrumented in CSPGO."); 143 STATISTIC(NumOfCSPGOMemIntrinsics, 144 "Number of mem intrinsics instrumented in CSPGO."); 145 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 146 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 147 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 148 STATISTIC(NumOfCSPGOFunc, 149 "Number of functions having valid profile counts in CSPGO."); 150 STATISTIC(NumOfCSPGOMismatch, 151 "Number of functions having mismatch profile in CSPGO."); 152 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 153 154 // Command line option to specify the file to read profile from. This is 155 // mainly used for testing. 156 static cl::opt<std::string> 157 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 158 cl::value_desc("filename"), 159 cl::desc("Specify the path of profile data file. This is" 160 "mainly for test purpose.")); 161 static cl::opt<std::string> PGOTestProfileRemappingFile( 162 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 163 cl::value_desc("filename"), 164 cl::desc("Specify the path of profile remapping file. This is mainly for " 165 "test purpose.")); 166 167 // Command line option to disable value profiling. The default is false: 168 // i.e. value profiling is enabled by default. This is for debug purpose. 169 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 170 cl::Hidden, 171 cl::desc("Disable Value Profiling")); 172 173 // Command line option to set the maximum number of VP annotations to write to 174 // the metadata for a single indirect call callsite. 175 static cl::opt<unsigned> MaxNumAnnotations( 176 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 177 cl::desc("Max number of annotations for a single indirect " 178 "call callsite")); 179 180 // Command line option to set the maximum number of value annotations 181 // to write to the metadata for a single memop intrinsic. 182 static cl::opt<unsigned> MaxNumMemOPAnnotations( 183 "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, 184 cl::desc("Max number of preicise value annotations for a single memop" 185 "intrinsic")); 186 187 // Command line option to control appending FunctionHash to the name of a COMDAT 188 // function. This is to avoid the hash mismatch caused by the preinliner. 189 static cl::opt<bool> DoComdatRenaming( 190 "do-comdat-renaming", cl::init(false), cl::Hidden, 191 cl::desc("Append function hash to the name of COMDAT function to avoid " 192 "function hash mismatch due to the preinliner")); 193 194 // Command line option to enable/disable the warning about missing profile 195 // information. 196 static cl::opt<bool> 197 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 198 cl::desc("Use this option to turn on/off " 199 "warnings about missing profile data for " 200 "functions.")); 201 202 namespace llvm { 203 // Command line option to enable/disable the warning about a hash mismatch in 204 // the profile data. 205 cl::opt<bool> 206 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 207 cl::desc("Use this option to turn off/on " 208 "warnings about profile cfg mismatch.")); 209 } // namespace llvm 210 211 // Command line option to enable/disable the warning about a hash mismatch in 212 // the profile data for Comdat functions, which often turns out to be false 213 // positive due to the pre-instrumentation inline. 214 static cl::opt<bool> 215 NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), 216 cl::Hidden, 217 cl::desc("The option is used to turn on/off " 218 "warnings about hash mismatch for comdat " 219 "functions.")); 220 221 // Command line option to enable/disable select instruction instrumentation. 222 static cl::opt<bool> 223 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 224 cl::desc("Use this option to turn on/off SELECT " 225 "instruction instrumentation. ")); 226 227 // Command line option to turn on CFG dot or text dump of raw profile counts 228 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 229 "pgo-view-raw-counts", cl::Hidden, 230 cl::desc("A boolean option to show CFG dag or text " 231 "with raw profile counts from " 232 "profile data. See also option " 233 "-pgo-view-counts. To limit graph " 234 "display to only one function, use " 235 "filtering option -view-bfi-func-name."), 236 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 237 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 238 clEnumValN(PGOVCT_Text, "text", "show in text."))); 239 240 // Command line option to enable/disable memop intrinsic call.size profiling. 241 static cl::opt<bool> 242 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 243 cl::desc("Use this option to turn on/off " 244 "memory intrinsic size profiling.")); 245 246 // Emit branch probability as optimization remarks. 247 static cl::opt<bool> 248 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 249 cl::desc("When this option is on, the annotated " 250 "branch probability will be emitted as " 251 "optimization remarks: -{Rpass|" 252 "pass-remarks}=pgo-instrumentation")); 253 254 static cl::opt<bool> PGOInstrumentEntry( 255 "pgo-instrument-entry", cl::init(false), cl::Hidden, 256 cl::desc("Force to instrument function entry basicblock.")); 257 258 static cl::opt<bool> 259 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, 260 cl::desc("Fix function entry count in profile use.")); 261 262 static cl::opt<bool> PGOVerifyHotBFI( 263 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden, 264 cl::desc("Print out the non-match BFI count if a hot raw profile count " 265 "becomes non-hot, or a cold raw profile count becomes hot. " 266 "The print is enabled under -Rpass-analysis=pgo, or " 267 "internal option -pass-remakrs-analysis=pgo.")); 268 269 static cl::opt<bool> PGOVerifyBFI( 270 "pgo-verify-bfi", cl::init(false), cl::Hidden, 271 cl::desc("Print out mismatched BFI counts after setting profile metadata " 272 "The print is enabled under -Rpass-analysis=pgo, or " 273 "internal option -pass-remakrs-analysis=pgo.")); 274 275 static cl::opt<unsigned> PGOVerifyBFIRatio( 276 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, 277 cl::desc("Set the threshold for pgo-verify-bfi: only print out " 278 "mismatched BFI if the difference percentage is greater than " 279 "this value (in percentage).")); 280 281 static cl::opt<unsigned> PGOVerifyBFICutoff( 282 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, 283 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " 284 "profile count value is below.")); 285 286 namespace llvm { 287 // Command line option to turn on CFG dot dump after profile annotation. 288 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 289 extern cl::opt<PGOViewCountsType> PGOViewCounts; 290 291 // Command line option to specify the name of the function for CFG dump 292 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 293 extern cl::opt<std::string> ViewBlockFreqFuncName; 294 295 extern cl::opt<bool> DebugInfoCorrelate; 296 } // namespace llvm 297 298 static cl::opt<bool> 299 PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, 300 cl::desc("Use the old CFG function hashing")); 301 302 // Return a string describing the branch condition that can be 303 // used in static branch probability heuristics: 304 static std::string getBranchCondString(Instruction *TI) { 305 BranchInst *BI = dyn_cast<BranchInst>(TI); 306 if (!BI || !BI->isConditional()) 307 return std::string(); 308 309 Value *Cond = BI->getCondition(); 310 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 311 if (!CI) 312 return std::string(); 313 314 std::string result; 315 raw_string_ostream OS(result); 316 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 317 CI->getOperand(0)->getType()->print(OS, true); 318 319 Value *RHS = CI->getOperand(1); 320 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 321 if (CV) { 322 if (CV->isZero()) 323 OS << "_Zero"; 324 else if (CV->isOne()) 325 OS << "_One"; 326 else if (CV->isMinusOne()) 327 OS << "_MinusOne"; 328 else 329 OS << "_Const"; 330 } 331 OS.flush(); 332 return result; 333 } 334 335 static const char *ValueProfKindDescr[] = { 336 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 337 #include "llvm/ProfileData/InstrProfData.inc" 338 }; 339 340 namespace { 341 342 /// The select instruction visitor plays three roles specified 343 /// by the mode. In \c VM_counting mode, it simply counts the number of 344 /// select instructions. In \c VM_instrument mode, it inserts code to count 345 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 346 /// it reads the profile data and annotate the select instruction with metadata. 347 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 348 class PGOUseFunc; 349 350 /// Instruction Visitor class to visit select instructions. 351 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 352 Function &F; 353 unsigned NSIs = 0; // Number of select instructions instrumented. 354 VisitMode Mode = VM_counting; // Visiting mode. 355 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 356 unsigned TotalNumCtrs = 0; // Total number of counters 357 GlobalVariable *FuncNameVar = nullptr; 358 uint64_t FuncHash = 0; 359 PGOUseFunc *UseFunc = nullptr; 360 361 SelectInstVisitor(Function &Func) : F(Func) {} 362 363 void countSelects(Function &Func) { 364 NSIs = 0; 365 Mode = VM_counting; 366 visit(Func); 367 } 368 369 // Visit the IR stream and instrument all select instructions. \p 370 // Ind is a pointer to the counter index variable; \p TotalNC 371 // is the total number of counters; \p FNV is the pointer to the 372 // PGO function name var; \p FHash is the function hash. 373 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 374 GlobalVariable *FNV, uint64_t FHash) { 375 Mode = VM_instrument; 376 CurCtrIdx = Ind; 377 TotalNumCtrs = TotalNC; 378 FuncHash = FHash; 379 FuncNameVar = FNV; 380 visit(Func); 381 } 382 383 // Visit the IR stream and annotate all select instructions. 384 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 385 Mode = VM_annotate; 386 UseFunc = UF; 387 CurCtrIdx = Ind; 388 visit(Func); 389 } 390 391 void instrumentOneSelectInst(SelectInst &SI); 392 void annotateOneSelectInst(SelectInst &SI); 393 394 // Visit \p SI instruction and perform tasks according to visit mode. 395 void visitSelectInst(SelectInst &SI); 396 397 // Return the number of select instructions. This needs be called after 398 // countSelects(). 399 unsigned getNumOfSelectInsts() const { return NSIs; } 400 }; 401 402 403 class PGOInstrumentationGenLegacyPass : public ModulePass { 404 public: 405 static char ID; 406 407 PGOInstrumentationGenLegacyPass(bool IsCS = false) 408 : ModulePass(ID), IsCS(IsCS) { 409 initializePGOInstrumentationGenLegacyPassPass( 410 *PassRegistry::getPassRegistry()); 411 } 412 413 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } 414 415 private: 416 // Is this is context-sensitive instrumentation. 417 bool IsCS; 418 bool runOnModule(Module &M) override; 419 420 void getAnalysisUsage(AnalysisUsage &AU) const override { 421 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 422 AU.addRequired<TargetLibraryInfoWrapperPass>(); 423 } 424 }; 425 426 class PGOInstrumentationUseLegacyPass : public ModulePass { 427 public: 428 static char ID; 429 430 // Provide the profile filename as the parameter. 431 PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) 432 : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { 433 if (!PGOTestProfileFile.empty()) 434 ProfileFileName = PGOTestProfileFile; 435 initializePGOInstrumentationUseLegacyPassPass( 436 *PassRegistry::getPassRegistry()); 437 } 438 439 StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } 440 441 private: 442 std::string ProfileFileName; 443 // Is this is context-sensitive instrumentation use. 444 bool IsCS; 445 446 bool runOnModule(Module &M) override; 447 448 void getAnalysisUsage(AnalysisUsage &AU) const override { 449 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 450 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 451 AU.addRequired<TargetLibraryInfoWrapperPass>(); 452 } 453 }; 454 455 class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { 456 public: 457 static char ID; 458 StringRef getPassName() const override { 459 return "PGOInstrumentationGenCreateVarPass"; 460 } 461 PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") 462 : ModulePass(ID), InstrProfileOutput(CSInstrName) { 463 initializePGOInstrumentationGenCreateVarLegacyPassPass( 464 *PassRegistry::getPassRegistry()); 465 } 466 467 private: 468 bool runOnModule(Module &M) override { 469 createProfileFileNameVar(M, InstrProfileOutput); 470 // The variable in a comdat may be discarded by LTO. Ensure the 471 // declaration will be retained. 472 appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true, 473 PGOInstrumentEntry, 474 DebugInfoCorrelate)); 475 return false; 476 } 477 std::string InstrProfileOutput; 478 }; 479 480 } // end anonymous namespace 481 482 char PGOInstrumentationGenLegacyPass::ID = 0; 483 484 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 485 "PGO instrumentation.", false, false) 486 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 487 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 488 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 489 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 490 "PGO instrumentation.", false, false) 491 492 ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { 493 return new PGOInstrumentationGenLegacyPass(IsCS); 494 } 495 496 char PGOInstrumentationUseLegacyPass::ID = 0; 497 498 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 499 "Read PGO instrumentation profile.", false, false) 500 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 501 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 502 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 503 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 504 "Read PGO instrumentation profile.", false, false) 505 506 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, 507 bool IsCS) { 508 return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); 509 } 510 511 char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; 512 513 INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, 514 "pgo-instr-gen-create-var", 515 "Create PGO instrumentation version variable for CSPGO.", false, 516 false) 517 518 ModulePass * 519 llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { 520 return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName)); 521 } 522 523 namespace { 524 525 /// An MST based instrumentation for PGO 526 /// 527 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 528 /// in the function level. 529 struct PGOEdge { 530 // This class implements the CFG edges. Note the CFG can be a multi-graph. 531 // So there might be multiple edges with same SrcBB and DestBB. 532 const BasicBlock *SrcBB; 533 const BasicBlock *DestBB; 534 uint64_t Weight; 535 bool InMST = false; 536 bool Removed = false; 537 bool IsCritical = false; 538 539 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 540 : SrcBB(Src), DestBB(Dest), Weight(W) {} 541 542 // Return the information string of an edge. 543 std::string infoString() const { 544 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 545 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 546 } 547 }; 548 549 // This class stores the auxiliary information for each BB. 550 struct BBInfo { 551 BBInfo *Group; 552 uint32_t Index; 553 uint32_t Rank = 0; 554 555 BBInfo(unsigned IX) : Group(this), Index(IX) {} 556 557 // Return the information string of this object. 558 std::string infoString() const { 559 return (Twine("Index=") + Twine(Index)).str(); 560 } 561 562 // Empty function -- only applicable to UseBBInfo. 563 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 564 565 // Empty function -- only applicable to UseBBInfo. 566 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 567 }; 568 569 // This class implements the CFG edges. Note the CFG can be a multi-graph. 570 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 571 private: 572 Function &F; 573 574 // Is this is context-sensitive instrumentation. 575 bool IsCS; 576 577 // A map that stores the Comdat group in function F. 578 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 579 580 ValueProfileCollector VPC; 581 582 void computeCFGHash(); 583 void renameComdatFunction(); 584 585 public: 586 std::vector<std::vector<VPCandidateInfo>> ValueSites; 587 SelectInstVisitor SIVisitor; 588 std::string FuncName; 589 GlobalVariable *FuncNameVar; 590 591 // CFG hash value for this function. 592 uint64_t FunctionHash = 0; 593 594 // The Minimum Spanning Tree of function CFG. 595 CFGMST<Edge, BBInfo> MST; 596 597 // Collect all the BBs that will be instrumented, and store them in 598 // InstrumentBBs. 599 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 600 601 // Give an edge, find the BB that will be instrumented. 602 // Return nullptr if there is no BB to be instrumented. 603 BasicBlock *getInstrBB(Edge *E); 604 605 // Return the auxiliary BB information. 606 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 607 608 // Return the auxiliary BB information if available. 609 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 610 611 // Dump edges and BB information. 612 void dumpInfo(std::string Str = "") const { 613 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 614 Twine(FunctionHash) + "\t" + Str); 615 } 616 617 FuncPGOInstrumentation( 618 Function &Func, TargetLibraryInfo &TLI, 619 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 620 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 621 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, 622 bool InstrumentFuncEntry = true) 623 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 624 ValueSites(IPVK_Last + 1), SIVisitor(Func), 625 MST(F, InstrumentFuncEntry, BPI, BFI) { 626 // This should be done before CFG hash computation. 627 SIVisitor.countSelects(Func); 628 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 629 if (!IsCS) { 630 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 631 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 632 NumOfPGOBB += MST.BBInfos.size(); 633 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 634 } else { 635 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 636 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 637 NumOfCSPGOBB += MST.BBInfos.size(); 638 } 639 640 FuncName = getPGOFuncName(F); 641 computeCFGHash(); 642 if (!ComdatMembers.empty()) 643 renameComdatFunction(); 644 LLVM_DEBUG(dumpInfo("after CFGMST")); 645 646 for (auto &E : MST.AllEdges) { 647 if (E->Removed) 648 continue; 649 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 650 if (!E->InMST) 651 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 652 } 653 654 if (CreateGlobalVar) 655 FuncNameVar = createPGOFuncNameVar(F, FuncName); 656 } 657 }; 658 659 } // end anonymous namespace 660 661 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 662 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers 663 // of selects, indirect calls, mem ops and edges. 664 template <class Edge, class BBInfo> 665 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 666 std::vector<uint8_t> Indexes; 667 JamCRC JC; 668 for (auto &BB : F) { 669 const Instruction *TI = BB.getTerminator(); 670 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 671 BasicBlock *Succ = TI->getSuccessor(I); 672 auto BI = findBBInfo(Succ); 673 if (BI == nullptr) 674 continue; 675 uint32_t Index = BI->Index; 676 for (int J = 0; J < 4; J++) 677 Indexes.push_back((uint8_t)(Index >> (J * 8))); 678 } 679 } 680 JC.update(Indexes); 681 682 JamCRC JCH; 683 if (PGOOldCFGHashing) { 684 // Hash format for context sensitive profile. Reserve 4 bits for other 685 // information. 686 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 687 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 688 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 689 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 690 } else { 691 // The higher 32 bits. 692 auto updateJCH = [&JCH](uint64_t Num) { 693 uint8_t Data[8]; 694 support::endian::write64le(Data, Num); 695 JCH.update(Data); 696 }; 697 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); 698 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); 699 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); 700 updateJCH((uint64_t)MST.AllEdges.size()); 701 702 // Hash format for context sensitive profile. Reserve 4 bits for other 703 // information. 704 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); 705 } 706 707 // Reserve bit 60-63 for other information purpose. 708 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 709 if (IsCS) 710 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 711 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 712 << " CRC = " << JC.getCRC() 713 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 714 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 715 << ValueSites[IPVK_IndirectCallTarget].size()); 716 if (!PGOOldCFGHashing) { 717 LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() 718 << ", High32 CRC = " << JCH.getCRC()); 719 } 720 LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); 721 } 722 723 // Check if we can safely rename this Comdat function. 724 static bool canRenameComdat( 725 Function &F, 726 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 727 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 728 return false; 729 730 // FIXME: Current only handle those Comdat groups that only containing one 731 // function. 732 // (1) For a Comdat group containing multiple functions, we need to have a 733 // unique postfix based on the hashes for each function. There is a 734 // non-trivial code refactoring to do this efficiently. 735 // (2) Variables can not be renamed, so we can not rename Comdat function in a 736 // group including global vars. 737 Comdat *C = F.getComdat(); 738 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 739 assert(!isa<GlobalAlias>(CM.second)); 740 Function *FM = dyn_cast<Function>(CM.second); 741 if (FM != &F) 742 return false; 743 } 744 return true; 745 } 746 747 // Append the CFGHash to the Comdat function name. 748 template <class Edge, class BBInfo> 749 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 750 if (!canRenameComdat(F, ComdatMembers)) 751 return; 752 std::string OrigName = F.getName().str(); 753 std::string NewFuncName = 754 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 755 F.setName(Twine(NewFuncName)); 756 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 757 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 758 Comdat *NewComdat; 759 Module *M = F.getParent(); 760 // For AvailableExternallyLinkage functions, change the linkage to 761 // LinkOnceODR and put them into comdat. This is because after renaming, there 762 // is no backup external copy available for the function. 763 if (!F.hasComdat()) { 764 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 765 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 766 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 767 F.setComdat(NewComdat); 768 return; 769 } 770 771 // This function belongs to a single function Comdat group. 772 Comdat *OrigComdat = F.getComdat(); 773 std::string NewComdatName = 774 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 775 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 776 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 777 778 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 779 // Must be a function. 780 cast<Function>(CM.second)->setComdat(NewComdat); 781 } 782 } 783 784 // Collect all the BBs that will be instruments and return them in 785 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. 786 template <class Edge, class BBInfo> 787 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 788 std::vector<BasicBlock *> &InstrumentBBs) { 789 // Use a worklist as we will update the vector during the iteration. 790 std::vector<Edge *> EdgeList; 791 EdgeList.reserve(MST.AllEdges.size()); 792 for (auto &E : MST.AllEdges) 793 EdgeList.push_back(E.get()); 794 795 for (auto &E : EdgeList) { 796 BasicBlock *InstrBB = getInstrBB(E); 797 if (InstrBB) 798 InstrumentBBs.push_back(InstrBB); 799 } 800 801 // Set up InEdges/OutEdges for all BBs. 802 for (auto &E : MST.AllEdges) { 803 if (E->Removed) 804 continue; 805 const BasicBlock *SrcBB = E->SrcBB; 806 const BasicBlock *DestBB = E->DestBB; 807 BBInfo &SrcInfo = getBBInfo(SrcBB); 808 BBInfo &DestInfo = getBBInfo(DestBB); 809 SrcInfo.addOutEdge(E.get()); 810 DestInfo.addInEdge(E.get()); 811 } 812 } 813 814 // Given a CFG E to be instrumented, find which BB to place the instrumented 815 // code. The function will split the critical edge if necessary. 816 template <class Edge, class BBInfo> 817 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 818 if (E->InMST || E->Removed) 819 return nullptr; 820 821 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 822 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 823 // For a fake edge, instrument the real BB. 824 if (SrcBB == nullptr) 825 return DestBB; 826 if (DestBB == nullptr) 827 return SrcBB; 828 829 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 830 // There are basic blocks (such as catchswitch) cannot be instrumented. 831 // If the returned first insertion point is the end of BB, skip this BB. 832 if (BB->getFirstInsertionPt() == BB->end()) 833 return nullptr; 834 return BB; 835 }; 836 837 // Instrument the SrcBB if it has a single successor, 838 // otherwise, the DestBB if this is not a critical edge. 839 Instruction *TI = SrcBB->getTerminator(); 840 if (TI->getNumSuccessors() <= 1) 841 return canInstrument(SrcBB); 842 if (!E->IsCritical) 843 return canInstrument(DestBB); 844 845 // Some IndirectBr critical edges cannot be split by the previous 846 // SplitIndirectBrCriticalEdges call. Bail out. 847 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 848 BasicBlock *InstrBB = 849 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); 850 if (!InstrBB) { 851 LLVM_DEBUG( 852 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 853 return nullptr; 854 } 855 // For a critical edge, we have to split. Instrument the newly 856 // created BB. 857 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 858 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 859 << " --> " << getBBInfo(DestBB).Index << "\n"); 860 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 861 MST.addEdge(SrcBB, InstrBB, 0); 862 // Second one: Add new edge of InstrBB->DestBB. 863 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 864 NewEdge1.InMST = true; 865 E->Removed = true; 866 867 return canInstrument(InstrBB); 868 } 869 870 // When generating value profiling calls on Windows routines that make use of 871 // handler funclets for exception processing an operand bundle needs to attached 872 // to the called function. This routine will set \p OpBundles to contain the 873 // funclet information, if any is needed, that should be placed on the generated 874 // value profiling call for the value profile candidate call. 875 static void 876 populateEHOperandBundle(VPCandidateInfo &Cand, 877 DenseMap<BasicBlock *, ColorVector> &BlockColors, 878 SmallVectorImpl<OperandBundleDef> &OpBundles) { 879 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 880 if (!OrigCall) 881 return; 882 883 if (!isa<IntrinsicInst>(OrigCall)) { 884 // The instrumentation call should belong to the same funclet as a 885 // non-intrinsic call, so just copy the operand bundle, if any exists. 886 Optional<OperandBundleUse> ParentFunclet = 887 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 888 if (ParentFunclet) 889 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 890 } else { 891 // Intrinsics or other instructions do not get funclet information from the 892 // front-end. Need to use the BlockColors that was computed by the routine 893 // colorEHFunclets to determine whether a funclet is needed. 894 if (!BlockColors.empty()) { 895 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 896 assert(CV.size() == 1 && "non-unique color for block!"); 897 Instruction *EHPad = CV.front()->getFirstNonPHI(); 898 if (EHPad->isEHPad()) 899 OpBundles.emplace_back("funclet", EHPad); 900 } 901 } 902 } 903 904 // Visit all edge and instrument the edges not in MST, and do value profiling. 905 // Critical edges will be split. 906 static void instrumentOneFunc( 907 Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, 908 BlockFrequencyInfo *BFI, 909 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 910 bool IsCS) { 911 // Split indirectbr critical edges here before computing the MST rather than 912 // later in getInstrBB() to avoid invalidating it. 913 SplitIndirectBrCriticalEdges(F, BPI, BFI); 914 915 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo( 916 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry); 917 std::vector<BasicBlock *> InstrumentBBs; 918 FuncInfo.getInstrumentBBs(InstrumentBBs); 919 unsigned NumCounters = 920 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 921 922 uint32_t I = 0; 923 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 924 for (auto *InstrBB : InstrumentBBs) { 925 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 926 assert(Builder.GetInsertPoint() != InstrBB->end() && 927 "Cannot get the Instrumentation point"); 928 Builder.CreateCall( 929 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 930 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 931 Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), 932 Builder.getInt32(I++)}); 933 } 934 935 // Now instrument select instructions: 936 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 937 FuncInfo.FunctionHash); 938 assert(I == NumCounters); 939 940 if (DisableValueProfiling) 941 return; 942 943 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 944 945 // Intrinsic function calls do not have funclet operand bundles needed for 946 // Windows exception handling attached to them. However, if value profiling is 947 // inserted for one of these calls, then a funclet value will need to be set 948 // on the instrumentation call based on the funclet coloring. 949 DenseMap<BasicBlock *, ColorVector> BlockColors; 950 if (F.hasPersonalityFn() && 951 isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 952 BlockColors = colorEHFunclets(F); 953 954 // For each VP Kind, walk the VP candidates and instrument each one. 955 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 956 unsigned SiteIndex = 0; 957 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 958 continue; 959 960 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 961 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 962 << " site: CallSite Index = " << SiteIndex << "\n"); 963 964 IRBuilder<> Builder(Cand.InsertPt); 965 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 966 "Cannot get the Instrumentation point"); 967 968 Value *ToProfile = nullptr; 969 if (Cand.V->getType()->isIntegerTy()) 970 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 971 else if (Cand.V->getType()->isPointerTy()) 972 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 973 assert(ToProfile && "value profiling Value is of unexpected type"); 974 975 SmallVector<OperandBundleDef, 1> OpBundles; 976 populateEHOperandBundle(Cand, BlockColors, OpBundles); 977 Builder.CreateCall( 978 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 979 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 980 Builder.getInt64(FuncInfo.FunctionHash), ToProfile, 981 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 982 OpBundles); 983 } 984 } // IPVK_First <= Kind <= IPVK_Last 985 } 986 987 namespace { 988 989 // This class represents a CFG edge in profile use compilation. 990 struct PGOUseEdge : public PGOEdge { 991 bool CountValid = false; 992 uint64_t CountValue = 0; 993 994 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 995 : PGOEdge(Src, Dest, W) {} 996 997 // Set edge count value 998 void setEdgeCount(uint64_t Value) { 999 CountValue = Value; 1000 CountValid = true; 1001 } 1002 1003 // Return the information string for this object. 1004 std::string infoString() const { 1005 if (!CountValid) 1006 return PGOEdge::infoString(); 1007 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 1008 .str(); 1009 } 1010 }; 1011 1012 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 1013 1014 // This class stores the auxiliary information for each BB. 1015 struct UseBBInfo : public BBInfo { 1016 uint64_t CountValue = 0; 1017 bool CountValid; 1018 int32_t UnknownCountInEdge = 0; 1019 int32_t UnknownCountOutEdge = 0; 1020 DirectEdges InEdges; 1021 DirectEdges OutEdges; 1022 1023 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 1024 1025 UseBBInfo(unsigned IX, uint64_t C) 1026 : BBInfo(IX), CountValue(C), CountValid(true) {} 1027 1028 // Set the profile count value for this BB. 1029 void setBBInfoCount(uint64_t Value) { 1030 CountValue = Value; 1031 CountValid = true; 1032 } 1033 1034 // Return the information string of this object. 1035 std::string infoString() const { 1036 if (!CountValid) 1037 return BBInfo::infoString(); 1038 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 1039 } 1040 1041 // Add an OutEdge and update the edge count. 1042 void addOutEdge(PGOUseEdge *E) { 1043 OutEdges.push_back(E); 1044 UnknownCountOutEdge++; 1045 } 1046 1047 // Add an InEdge and update the edge count. 1048 void addInEdge(PGOUseEdge *E) { 1049 InEdges.push_back(E); 1050 UnknownCountInEdge++; 1051 } 1052 }; 1053 1054 } // end anonymous namespace 1055 1056 // Sum up the count values for all the edges. 1057 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 1058 uint64_t Total = 0; 1059 for (auto &E : Edges) { 1060 if (E->Removed) 1061 continue; 1062 Total += E->CountValue; 1063 } 1064 return Total; 1065 } 1066 1067 namespace { 1068 1069 class PGOUseFunc { 1070 public: 1071 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 1072 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1073 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1074 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry) 1075 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1076 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, 1077 InstrumentFuncEntry), 1078 FreqAttr(FFA_Normal), IsCS(IsCS) {} 1079 1080 // Read counts for the instrumented BB from profile. 1081 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1082 bool &AllMinusOnes); 1083 1084 // Populate the counts for all BBs. 1085 void populateCounters(); 1086 1087 // Set the branch weights based on the count values. 1088 void setBranchWeights(); 1089 1090 // Annotate the value profile call sites for all value kind. 1091 void annotateValueSites(); 1092 1093 // Annotate the value profile call sites for one value kind. 1094 void annotateValueSites(uint32_t Kind); 1095 1096 // Annotate the irreducible loop header weights. 1097 void annotateIrrLoopHeaderWeights(); 1098 1099 // The hotness of the function from the profile count. 1100 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1101 1102 // Return the function hotness from the profile. 1103 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1104 1105 // Return the function hash. 1106 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1107 1108 // Return the profile record for this function; 1109 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1110 1111 // Return the auxiliary BB information. 1112 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 1113 return FuncInfo.getBBInfo(BB); 1114 } 1115 1116 // Return the auxiliary BB information if available. 1117 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 1118 return FuncInfo.findBBInfo(BB); 1119 } 1120 1121 Function &getFunc() const { return F; } 1122 1123 void dumpInfo(std::string Str = "") const { 1124 FuncInfo.dumpInfo(Str); 1125 } 1126 1127 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1128 private: 1129 Function &F; 1130 Module *M; 1131 BlockFrequencyInfo *BFI; 1132 ProfileSummaryInfo *PSI; 1133 1134 // This member stores the shared information with class PGOGenFunc. 1135 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 1136 1137 // The maximum count value in the profile. This is only used in PGO use 1138 // compilation. 1139 uint64_t ProgramMaxCount; 1140 1141 // Position of counter that remains to be read. 1142 uint32_t CountPosition = 0; 1143 1144 // Total size of the profile count for this function. 1145 uint32_t ProfileCountSize = 0; 1146 1147 // ProfileRecord for this function. 1148 InstrProfRecord ProfileRecord; 1149 1150 // Function hotness info derived from profile. 1151 FuncFreqAttr FreqAttr; 1152 1153 // Is to use the context sensitive profile. 1154 bool IsCS; 1155 1156 // Find the Instrumented BB and set the value. Return false on error. 1157 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1158 1159 // Set the edge counter value for the unknown edge -- there should be only 1160 // one unknown edge. 1161 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1162 1163 // Return FuncName string; 1164 std::string getFuncName() const { return FuncInfo.FuncName; } 1165 1166 // Set the hot/cold inline hints based on the count values. 1167 // FIXME: This function should be removed once the functionality in 1168 // the inliner is implemented. 1169 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1170 if (PSI->isHotCount(EntryCount)) 1171 FreqAttr = FFA_Hot; 1172 else if (PSI->isColdCount(MaxCount)) 1173 FreqAttr = FFA_Cold; 1174 } 1175 }; 1176 1177 } // end anonymous namespace 1178 1179 // Visit all the edges and assign the count value for the instrumented 1180 // edges and the BB. Return false on error. 1181 bool PGOUseFunc::setInstrumentedCounts( 1182 const std::vector<uint64_t> &CountFromProfile) { 1183 1184 std::vector<BasicBlock *> InstrumentBBs; 1185 FuncInfo.getInstrumentBBs(InstrumentBBs); 1186 unsigned NumCounters = 1187 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1188 // The number of counters here should match the number of counters 1189 // in profile. Return if they mismatch. 1190 if (NumCounters != CountFromProfile.size()) { 1191 return false; 1192 } 1193 auto *FuncEntry = &*F.begin(); 1194 1195 // Set the profile count to the Instrumented BBs. 1196 uint32_t I = 0; 1197 for (BasicBlock *InstrBB : InstrumentBBs) { 1198 uint64_t CountValue = CountFromProfile[I++]; 1199 UseBBInfo &Info = getBBInfo(InstrBB); 1200 // If we reach here, we know that we have some nonzero count 1201 // values in this function. The entry count should not be 0. 1202 // Fix it if necessary. 1203 if (InstrBB == FuncEntry && CountValue == 0) 1204 CountValue = 1; 1205 Info.setBBInfoCount(CountValue); 1206 } 1207 ProfileCountSize = CountFromProfile.size(); 1208 CountPosition = I; 1209 1210 // Set the edge count and update the count of unknown edges for BBs. 1211 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1212 E->setEdgeCount(Value); 1213 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1214 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1215 }; 1216 1217 // Set the profile count the Instrumented edges. There are BBs that not in 1218 // MST but not instrumented. Need to set the edge count value so that we can 1219 // populate the profile counts later. 1220 for (auto &E : FuncInfo.MST.AllEdges) { 1221 if (E->Removed || E->InMST) 1222 continue; 1223 const BasicBlock *SrcBB = E->SrcBB; 1224 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 1225 1226 // If only one out-edge, the edge profile count should be the same as BB 1227 // profile count. 1228 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1229 setEdgeCount(E.get(), SrcInfo.CountValue); 1230 else { 1231 const BasicBlock *DestBB = E->DestBB; 1232 UseBBInfo &DestInfo = getBBInfo(DestBB); 1233 // If only one in-edge, the edge profile count should be the same as BB 1234 // profile count. 1235 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1236 setEdgeCount(E.get(), DestInfo.CountValue); 1237 } 1238 if (E->CountValid) 1239 continue; 1240 // E's count should have been set from profile. If not, this meenas E skips 1241 // the instrumentation. We set the count to 0. 1242 setEdgeCount(E.get(), 0); 1243 } 1244 return true; 1245 } 1246 1247 // Set the count value for the unknown edge. There should be one and only one 1248 // unknown edge in Edges vector. 1249 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1250 for (auto &E : Edges) { 1251 if (E->CountValid) 1252 continue; 1253 E->setEdgeCount(Value); 1254 1255 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1256 getBBInfo(E->DestBB).UnknownCountInEdge--; 1257 return; 1258 } 1259 llvm_unreachable("Cannot find the unknown count edge"); 1260 } 1261 1262 // Emit function metadata indicating PGO profile mismatch. 1263 static void annotateFunctionWithHashMismatch(Function &F, 1264 LLVMContext &ctx) { 1265 const char MetadataName[] = "instr_prof_hash_mismatch"; 1266 SmallVector<Metadata *, 2> Names; 1267 // If this metadata already exists, ignore. 1268 auto *Existing = F.getMetadata(LLVMContext::MD_annotation); 1269 if (Existing) { 1270 MDTuple *Tuple = cast<MDTuple>(Existing); 1271 for (auto &N : Tuple->operands()) { 1272 if (cast<MDString>(N.get())->getString() == MetadataName) 1273 return; 1274 Names.push_back(N.get()); 1275 } 1276 } 1277 1278 MDBuilder MDB(ctx); 1279 Names.push_back(MDB.createString(MetadataName)); 1280 MDNode *MD = MDTuple::get(ctx, Names); 1281 F.setMetadata(LLVMContext::MD_annotation, MD); 1282 } 1283 1284 // Read the profile from ProfileFileName and assign the value to the 1285 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1286 // Return true if the profile are successfully read, and false on errors. 1287 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1288 bool &AllMinusOnes) { 1289 auto &Ctx = M->getContext(); 1290 Expected<InstrProfRecord> Result = 1291 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 1292 if (Error E = Result.takeError()) { 1293 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1294 auto Err = IPE.get(); 1295 bool SkipWarning = false; 1296 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1297 << FuncInfo.FuncName << ": "); 1298 if (Err == instrprof_error::unknown_function) { 1299 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1300 SkipWarning = !PGOWarnMissing; 1301 LLVM_DEBUG(dbgs() << "unknown function"); 1302 } else if (Err == instrprof_error::hash_mismatch || 1303 Err == instrprof_error::malformed) { 1304 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1305 SkipWarning = 1306 NoPGOWarnMismatch || 1307 (NoPGOWarnMismatchComdat && 1308 (F.hasComdat() || 1309 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1310 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 1311 // Emit function metadata indicating PGO profile mismatch. 1312 annotateFunctionWithHashMismatch(F, M->getContext()); 1313 } 1314 1315 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1316 if (SkipWarning) 1317 return; 1318 1319 std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + 1320 std::string(" Hash = ") + 1321 std::to_string(FuncInfo.FunctionHash); 1322 1323 Ctx.diagnose( 1324 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1325 }); 1326 return false; 1327 } 1328 ProfileRecord = std::move(Result.get()); 1329 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1330 1331 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1332 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1333 AllMinusOnes = (CountFromProfile.size() > 0); 1334 uint64_t ValueSum = 0; 1335 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1336 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1337 ValueSum += CountFromProfile[I]; 1338 if (CountFromProfile[I] != (uint64_t)-1) 1339 AllMinusOnes = false; 1340 } 1341 AllZeros = (ValueSum == 0); 1342 1343 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1344 1345 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1346 getBBInfo(nullptr).UnknownCountInEdge = 2; 1347 1348 if (!setInstrumentedCounts(CountFromProfile)) { 1349 LLVM_DEBUG( 1350 dbgs() << "Inconsistent number of counts, skipping this function"); 1351 Ctx.diagnose(DiagnosticInfoPGOProfile( 1352 M->getName().data(), 1353 Twine("Inconsistent number of counts in ") + F.getName().str() 1354 + Twine(": the profile may be stale or there is a function name collision."), 1355 DS_Warning)); 1356 return false; 1357 } 1358 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1359 return true; 1360 } 1361 1362 // Populate the counters from instrumented BBs to all BBs. 1363 // In the end of this operation, all BBs should have a valid count value. 1364 void PGOUseFunc::populateCounters() { 1365 bool Changes = true; 1366 unsigned NumPasses = 0; 1367 while (Changes) { 1368 NumPasses++; 1369 Changes = false; 1370 1371 // For efficient traversal, it's better to start from the end as most 1372 // of the instrumented edges are at the end. 1373 for (auto &BB : reverse(F)) { 1374 UseBBInfo *Count = findBBInfo(&BB); 1375 if (Count == nullptr) 1376 continue; 1377 if (!Count->CountValid) { 1378 if (Count->UnknownCountOutEdge == 0) { 1379 Count->CountValue = sumEdgeCount(Count->OutEdges); 1380 Count->CountValid = true; 1381 Changes = true; 1382 } else if (Count->UnknownCountInEdge == 0) { 1383 Count->CountValue = sumEdgeCount(Count->InEdges); 1384 Count->CountValid = true; 1385 Changes = true; 1386 } 1387 } 1388 if (Count->CountValid) { 1389 if (Count->UnknownCountOutEdge == 1) { 1390 uint64_t Total = 0; 1391 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1392 // If the one of the successor block can early terminate (no-return), 1393 // we can end up with situation where out edge sum count is larger as 1394 // the source BB's count is collected by a post-dominated block. 1395 if (Count->CountValue > OutSum) 1396 Total = Count->CountValue - OutSum; 1397 setEdgeCount(Count->OutEdges, Total); 1398 Changes = true; 1399 } 1400 if (Count->UnknownCountInEdge == 1) { 1401 uint64_t Total = 0; 1402 uint64_t InSum = sumEdgeCount(Count->InEdges); 1403 if (Count->CountValue > InSum) 1404 Total = Count->CountValue - InSum; 1405 setEdgeCount(Count->InEdges, Total); 1406 Changes = true; 1407 } 1408 } 1409 } 1410 } 1411 1412 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1413 #ifndef NDEBUG 1414 // Assert every BB has a valid counter. 1415 for (auto &BB : F) { 1416 auto BI = findBBInfo(&BB); 1417 if (BI == nullptr) 1418 continue; 1419 assert(BI->CountValid && "BB count is not valid"); 1420 } 1421 #endif 1422 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1423 uint64_t FuncMaxCount = FuncEntryCount; 1424 for (auto &BB : F) { 1425 auto BI = findBBInfo(&BB); 1426 if (BI == nullptr) 1427 continue; 1428 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1429 } 1430 1431 // Fix the obviously inconsistent entry count. 1432 if (FuncMaxCount > 0 && FuncEntryCount == 0) 1433 FuncEntryCount = 1; 1434 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1435 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1436 1437 // Now annotate select instructions 1438 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1439 assert(CountPosition == ProfileCountSize); 1440 1441 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1442 } 1443 1444 // Assign the scaled count values to the BB with multiple out edges. 1445 void PGOUseFunc::setBranchWeights() { 1446 // Generate MD_prof metadata for every branch instruction. 1447 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1448 << " IsCS=" << IsCS << "\n"); 1449 for (auto &BB : F) { 1450 Instruction *TI = BB.getTerminator(); 1451 if (TI->getNumSuccessors() < 2) 1452 continue; 1453 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1454 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI))) 1455 continue; 1456 1457 if (getBBInfo(&BB).CountValue == 0) 1458 continue; 1459 1460 // We have a non-zero Branch BB. 1461 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1462 unsigned Size = BBCountInfo.OutEdges.size(); 1463 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1464 uint64_t MaxCount = 0; 1465 for (unsigned s = 0; s < Size; s++) { 1466 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1467 const BasicBlock *SrcBB = E->SrcBB; 1468 const BasicBlock *DestBB = E->DestBB; 1469 if (DestBB == nullptr) 1470 continue; 1471 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1472 uint64_t EdgeCount = E->CountValue; 1473 if (EdgeCount > MaxCount) 1474 MaxCount = EdgeCount; 1475 EdgeCounts[SuccNum] = EdgeCount; 1476 } 1477 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1478 } 1479 } 1480 1481 static bool isIndirectBrTarget(BasicBlock *BB) { 1482 for (BasicBlock *Pred : predecessors(BB)) { 1483 if (isa<IndirectBrInst>(Pred->getTerminator())) 1484 return true; 1485 } 1486 return false; 1487 } 1488 1489 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1490 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1491 // Find irr loop headers 1492 for (auto &BB : F) { 1493 // As a heuristic also annotate indrectbr targets as they have a high chance 1494 // to become an irreducible loop header after the indirectbr tail 1495 // duplication. 1496 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1497 Instruction *TI = BB.getTerminator(); 1498 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1499 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1500 } 1501 } 1502 } 1503 1504 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1505 Module *M = F.getParent(); 1506 IRBuilder<> Builder(&SI); 1507 Type *Int64Ty = Builder.getInt64Ty(); 1508 Type *I8PtrTy = Builder.getInt8PtrTy(); 1509 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1510 Builder.CreateCall( 1511 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1512 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1513 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1514 Builder.getInt32(*CurCtrIdx), Step}); 1515 ++(*CurCtrIdx); 1516 } 1517 1518 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1519 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1520 assert(*CurCtrIdx < CountFromProfile.size() && 1521 "Out of bound access of counters"); 1522 uint64_t SCounts[2]; 1523 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1524 ++(*CurCtrIdx); 1525 uint64_t TotalCount = 0; 1526 auto BI = UseFunc->findBBInfo(SI.getParent()); 1527 if (BI != nullptr) 1528 TotalCount = BI->CountValue; 1529 // False Count 1530 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1531 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1532 if (MaxCount) 1533 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1534 } 1535 1536 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1537 if (!PGOInstrSelect) 1538 return; 1539 // FIXME: do not handle this yet. 1540 if (SI.getCondition()->getType()->isVectorTy()) 1541 return; 1542 1543 switch (Mode) { 1544 case VM_counting: 1545 NSIs++; 1546 return; 1547 case VM_instrument: 1548 instrumentOneSelectInst(SI); 1549 return; 1550 case VM_annotate: 1551 annotateOneSelectInst(SI); 1552 return; 1553 } 1554 1555 llvm_unreachable("Unknown visiting mode"); 1556 } 1557 1558 // Traverse all valuesites and annotate the instructions for all value kind. 1559 void PGOUseFunc::annotateValueSites() { 1560 if (DisableValueProfiling) 1561 return; 1562 1563 // Create the PGOFuncName meta data. 1564 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1565 1566 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1567 annotateValueSites(Kind); 1568 } 1569 1570 // Annotate the instructions for a specific value kind. 1571 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1572 assert(Kind <= IPVK_Last); 1573 unsigned ValueSiteIndex = 0; 1574 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1575 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1576 if (NumValueSites != ValueSites.size()) { 1577 auto &Ctx = M->getContext(); 1578 Ctx.diagnose(DiagnosticInfoPGOProfile( 1579 M->getName().data(), 1580 Twine("Inconsistent number of value sites for ") + 1581 Twine(ValueProfKindDescr[Kind]) + 1582 Twine(" profiling in \"") + F.getName().str() + 1583 Twine("\", possibly due to the use of a stale profile."), 1584 DS_Warning)); 1585 return; 1586 } 1587 1588 for (VPCandidateInfo &I : ValueSites) { 1589 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1590 << "): Index = " << ValueSiteIndex << " out of " 1591 << NumValueSites << "\n"); 1592 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1593 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1594 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1595 : MaxNumAnnotations); 1596 ValueSiteIndex++; 1597 } 1598 } 1599 1600 // Collect the set of members for each Comdat in module M and store 1601 // in ComdatMembers. 1602 static void collectComdatMembers( 1603 Module &M, 1604 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1605 if (!DoComdatRenaming) 1606 return; 1607 for (Function &F : M) 1608 if (Comdat *C = F.getComdat()) 1609 ComdatMembers.insert(std::make_pair(C, &F)); 1610 for (GlobalVariable &GV : M.globals()) 1611 if (Comdat *C = GV.getComdat()) 1612 ComdatMembers.insert(std::make_pair(C, &GV)); 1613 for (GlobalAlias &GA : M.aliases()) 1614 if (Comdat *C = GA.getComdat()) 1615 ComdatMembers.insert(std::make_pair(C, &GA)); 1616 } 1617 1618 static bool InstrumentAllFunctions( 1619 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1620 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1621 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1622 // For the context-sensitve instrumentation, we should have a separated pass 1623 // (before LTO/ThinLTO linking) to create these variables. 1624 if (!IsCS) 1625 createIRLevelProfileFlagVar(M, /*IsCS=*/false, PGOInstrumentEntry, 1626 DebugInfoCorrelate); 1627 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1628 collectComdatMembers(M, ComdatMembers); 1629 1630 for (auto &F : M) { 1631 if (F.isDeclaration()) 1632 continue; 1633 if (F.hasFnAttribute(llvm::Attribute::NoProfile)) 1634 continue; 1635 auto &TLI = LookupTLI(F); 1636 auto *BPI = LookupBPI(F); 1637 auto *BFI = LookupBFI(F); 1638 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); 1639 } 1640 return true; 1641 } 1642 1643 PreservedAnalyses 1644 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { 1645 createProfileFileNameVar(M, CSInstrName); 1646 // The variable in a comdat may be discarded by LTO. Ensure the declaration 1647 // will be retained. 1648 appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true, 1649 PGOInstrumentEntry, 1650 DebugInfoCorrelate)); 1651 return PreservedAnalyses::all(); 1652 } 1653 1654 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { 1655 if (skipModule(M)) 1656 return false; 1657 1658 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 1659 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 1660 }; 1661 auto LookupBPI = [this](Function &F) { 1662 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1663 }; 1664 auto LookupBFI = [this](Function &F) { 1665 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1666 }; 1667 return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS); 1668 } 1669 1670 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1671 ModuleAnalysisManager &AM) { 1672 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1673 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1674 return FAM.getResult<TargetLibraryAnalysis>(F); 1675 }; 1676 auto LookupBPI = [&FAM](Function &F) { 1677 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1678 }; 1679 auto LookupBFI = [&FAM](Function &F) { 1680 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1681 }; 1682 1683 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) 1684 return PreservedAnalyses::all(); 1685 1686 return PreservedAnalyses::none(); 1687 } 1688 1689 // Using the ratio b/w sums of profile count values and BFI count values to 1690 // adjust the func entry count. 1691 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, 1692 BranchProbabilityInfo &NBPI) { 1693 Function &F = Func.getFunc(); 1694 BlockFrequencyInfo NBFI(F, NBPI, LI); 1695 #ifndef NDEBUG 1696 auto BFIEntryCount = F.getEntryCount(); 1697 assert(BFIEntryCount.hasValue() && (BFIEntryCount->getCount() > 0) && 1698 "Invalid BFI Entrycount"); 1699 #endif 1700 auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); 1701 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); 1702 for (auto &BBI : F) { 1703 uint64_t CountValue = 0; 1704 uint64_t BFICountValue = 0; 1705 if (!Func.findBBInfo(&BBI)) 1706 continue; 1707 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1708 CountValue = Func.getBBInfo(&BBI).CountValue; 1709 BFICountValue = BFICount.getValue(); 1710 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); 1711 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); 1712 } 1713 if (SumCount.isZero()) 1714 return; 1715 1716 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && 1717 "Incorrect sum of BFI counts"); 1718 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual) 1719 return; 1720 double Scale = (SumCount / SumBFICount).convertToDouble(); 1721 if (Scale < 1.001 && Scale > 0.999) 1722 return; 1723 1724 uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue; 1725 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; 1726 if (NewEntryCount == 0) 1727 NewEntryCount = 1; 1728 if (NewEntryCount != FuncEntryCount) { 1729 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); 1730 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() 1731 << ", entry_count " << FuncEntryCount << " --> " 1732 << NewEntryCount << "\n"); 1733 } 1734 } 1735 1736 // Compare the profile count values with BFI count values, and print out 1737 // the non-matching ones. 1738 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, 1739 BranchProbabilityInfo &NBPI, 1740 uint64_t HotCountThreshold, 1741 uint64_t ColdCountThreshold) { 1742 Function &F = Func.getFunc(); 1743 BlockFrequencyInfo NBFI(F, NBPI, LI); 1744 // bool PrintFunc = false; 1745 bool HotBBOnly = PGOVerifyHotBFI; 1746 std::string Msg; 1747 OptimizationRemarkEmitter ORE(&F); 1748 1749 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; 1750 for (auto &BBI : F) { 1751 uint64_t CountValue = 0; 1752 uint64_t BFICountValue = 0; 1753 1754 if (Func.getBBInfo(&BBI).CountValid) 1755 CountValue = Func.getBBInfo(&BBI).CountValue; 1756 1757 BBNum++; 1758 if (CountValue) 1759 NonZeroBBNum++; 1760 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1761 if (BFICount) 1762 BFICountValue = BFICount.getValue(); 1763 1764 if (HotBBOnly) { 1765 bool rawIsHot = CountValue >= HotCountThreshold; 1766 bool BFIIsHot = BFICountValue >= HotCountThreshold; 1767 bool rawIsCold = CountValue <= ColdCountThreshold; 1768 bool ShowCount = false; 1769 if (rawIsHot && !BFIIsHot) { 1770 Msg = "raw-Hot to BFI-nonHot"; 1771 ShowCount = true; 1772 } else if (rawIsCold && BFIIsHot) { 1773 Msg = "raw-Cold to BFI-Hot"; 1774 ShowCount = true; 1775 } 1776 if (!ShowCount) 1777 continue; 1778 } else { 1779 if ((CountValue < PGOVerifyBFICutoff) && 1780 (BFICountValue < PGOVerifyBFICutoff)) 1781 continue; 1782 uint64_t Diff = (BFICountValue >= CountValue) 1783 ? BFICountValue - CountValue 1784 : CountValue - BFICountValue; 1785 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio) 1786 continue; 1787 } 1788 BBMisMatchNum++; 1789 1790 ORE.emit([&]() { 1791 OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify", 1792 F.getSubprogram(), &BBI); 1793 Remark << "BB " << ore::NV("Block", BBI.getName()) 1794 << " Count=" << ore::NV("Count", CountValue) 1795 << " BFI_Count=" << ore::NV("Count", BFICountValue); 1796 if (!Msg.empty()) 1797 Remark << " (" << Msg << ")"; 1798 return Remark; 1799 }); 1800 } 1801 if (BBMisMatchNum) 1802 ORE.emit([&]() { 1803 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify", 1804 F.getSubprogram(), &F.getEntryBlock()) 1805 << "In Func " << ore::NV("Function", F.getName()) 1806 << ": Num_of_BB=" << ore::NV("Count", BBNum) 1807 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum) 1808 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum); 1809 }); 1810 } 1811 1812 static bool annotateAllFunctions( 1813 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1814 function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1815 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1816 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1817 ProfileSummaryInfo *PSI, bool IsCS) { 1818 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1819 auto &Ctx = M.getContext(); 1820 // Read the counter array from file. 1821 auto ReaderOrErr = 1822 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 1823 if (Error E = ReaderOrErr.takeError()) { 1824 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1825 Ctx.diagnose( 1826 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1827 }); 1828 return false; 1829 } 1830 1831 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1832 std::move(ReaderOrErr.get()); 1833 if (!PGOReader) { 1834 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1835 StringRef("Cannot get PGOReader"))); 1836 return false; 1837 } 1838 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 1839 return false; 1840 1841 // TODO: might need to change the warning once the clang option is finalized. 1842 if (!PGOReader->isIRLevelProfile()) { 1843 Ctx.diagnose(DiagnosticInfoPGOProfile( 1844 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1845 return false; 1846 } 1847 1848 // Add the profile summary (read from the header of the indexed summary) here 1849 // so that we can use it below when reading counters (which checks if the 1850 // function should be marked with a cold or inlinehint attribute). 1851 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 1852 IsCS ? ProfileSummary::PSK_CSInstr 1853 : ProfileSummary::PSK_Instr); 1854 PSI->refresh(); 1855 1856 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1857 collectComdatMembers(M, ComdatMembers); 1858 std::vector<Function *> HotFunctions; 1859 std::vector<Function *> ColdFunctions; 1860 1861 // If the profile marked as always instrument the entry BB, do the 1862 // same. Note this can be overwritten by the internal option in CFGMST.h 1863 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); 1864 if (PGOInstrumentEntry.getNumOccurrences() > 0) 1865 InstrumentFuncEntry = PGOInstrumentEntry; 1866 for (auto &F : M) { 1867 if (F.isDeclaration()) 1868 continue; 1869 auto &TLI = LookupTLI(F); 1870 auto *BPI = LookupBPI(F); 1871 auto *BFI = LookupBFI(F); 1872 // Split indirectbr critical edges here before computing the MST rather than 1873 // later in getInstrBB() to avoid invalidating it. 1874 SplitIndirectBrCriticalEdges(F, BPI, BFI); 1875 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, 1876 InstrumentFuncEntry); 1877 // When AllMinusOnes is true, it means the profile for the function 1878 // is unrepresentative and this function is actually hot. Set the 1879 // entry count of the function to be multiple times of hot threshold 1880 // and drop all its internal counters. 1881 bool AllMinusOnes = false; 1882 bool AllZeros = false; 1883 if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes)) 1884 continue; 1885 if (AllZeros) { 1886 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 1887 if (Func.getProgramMaxCount() != 0) 1888 ColdFunctions.push_back(&F); 1889 continue; 1890 } 1891 const unsigned MultiplyFactor = 3; 1892 if (AllMinusOnes) { 1893 uint64_t HotThreshold = PSI->getHotCountThreshold(); 1894 if (HotThreshold) 1895 F.setEntryCount( 1896 ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real)); 1897 HotFunctions.push_back(&F); 1898 continue; 1899 } 1900 Func.populateCounters(); 1901 Func.setBranchWeights(); 1902 Func.annotateValueSites(); 1903 Func.annotateIrrLoopHeaderWeights(); 1904 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1905 if (FreqAttr == PGOUseFunc::FFA_Cold) 1906 ColdFunctions.push_back(&F); 1907 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1908 HotFunctions.push_back(&F); 1909 if (PGOViewCounts != PGOVCT_None && 1910 (ViewBlockFreqFuncName.empty() || 1911 F.getName().equals(ViewBlockFreqFuncName))) { 1912 LoopInfo LI{DominatorTree(F)}; 1913 std::unique_ptr<BranchProbabilityInfo> NewBPI = 1914 std::make_unique<BranchProbabilityInfo>(F, LI); 1915 std::unique_ptr<BlockFrequencyInfo> NewBFI = 1916 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 1917 if (PGOViewCounts == PGOVCT_Graph) 1918 NewBFI->view(); 1919 else if (PGOViewCounts == PGOVCT_Text) { 1920 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 1921 NewBFI->print(dbgs()); 1922 } 1923 } 1924 if (PGOViewRawCounts != PGOVCT_None && 1925 (ViewBlockFreqFuncName.empty() || 1926 F.getName().equals(ViewBlockFreqFuncName))) { 1927 if (PGOViewRawCounts == PGOVCT_Graph) 1928 if (ViewBlockFreqFuncName.empty()) 1929 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1930 else 1931 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1932 else if (PGOViewRawCounts == PGOVCT_Text) { 1933 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 1934 Func.dumpInfo(); 1935 } 1936 } 1937 1938 if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { 1939 LoopInfo LI{DominatorTree(F)}; 1940 BranchProbabilityInfo NBPI(F, LI); 1941 1942 // Fix func entry count. 1943 if (PGOFixEntryCount) 1944 fixFuncEntryCount(Func, LI, NBPI); 1945 1946 // Verify BlockFrequency information. 1947 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; 1948 if (PGOVerifyHotBFI) { 1949 HotCountThreshold = PSI->getOrCompHotCountThreshold(); 1950 ColdCountThreshold = PSI->getOrCompColdCountThreshold(); 1951 } 1952 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); 1953 } 1954 } 1955 1956 // Set function hotness attribute from the profile. 1957 // We have to apply these attributes at the end because their presence 1958 // can affect the BranchProbabilityInfo of any callers, resulting in an 1959 // inconsistent MST between prof-gen and prof-use. 1960 for (auto &F : HotFunctions) { 1961 F->addFnAttr(Attribute::InlineHint); 1962 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 1963 << "\n"); 1964 } 1965 for (auto &F : ColdFunctions) { 1966 // Only set when there is no Attribute::Hot set by the user. For Hot 1967 // attribute, user's annotation has the precedence over the profile. 1968 if (F->hasFnAttribute(Attribute::Hot)) { 1969 auto &Ctx = M.getContext(); 1970 std::string Msg = std::string("Function ") + F->getName().str() + 1971 std::string(" is annotated as a hot function but" 1972 " the profile is cold"); 1973 Ctx.diagnose( 1974 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 1975 continue; 1976 } 1977 F->addFnAttr(Attribute::Cold); 1978 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 1979 << "\n"); 1980 } 1981 return true; 1982 } 1983 1984 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 1985 std::string RemappingFilename, 1986 bool IsCS) 1987 : ProfileFileName(std::move(Filename)), 1988 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { 1989 if (!PGOTestProfileFile.empty()) 1990 ProfileFileName = PGOTestProfileFile; 1991 if (!PGOTestProfileRemappingFile.empty()) 1992 ProfileRemappingFileName = PGOTestProfileRemappingFile; 1993 } 1994 1995 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 1996 ModuleAnalysisManager &AM) { 1997 1998 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1999 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 2000 return FAM.getResult<TargetLibraryAnalysis>(F); 2001 }; 2002 auto LookupBPI = [&FAM](Function &F) { 2003 return &FAM.getResult<BranchProbabilityAnalysis>(F); 2004 }; 2005 auto LookupBFI = [&FAM](Function &F) { 2006 return &FAM.getResult<BlockFrequencyAnalysis>(F); 2007 }; 2008 2009 auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 2010 2011 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 2012 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) 2013 return PreservedAnalyses::all(); 2014 2015 return PreservedAnalyses::none(); 2016 } 2017 2018 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { 2019 if (skipModule(M)) 2020 return false; 2021 2022 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 2023 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 2024 }; 2025 auto LookupBPI = [this](Function &F) { 2026 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 2027 }; 2028 auto LookupBFI = [this](Function &F) { 2029 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 2030 }; 2031 2032 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 2033 return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI, 2034 LookupBFI, PSI, IsCS); 2035 } 2036 2037 static std::string getSimpleNodeName(const BasicBlock *Node) { 2038 if (!Node->getName().empty()) 2039 return std::string(Node->getName()); 2040 2041 std::string SimpleNodeName; 2042 raw_string_ostream OS(SimpleNodeName); 2043 Node->printAsOperand(OS, false); 2044 return OS.str(); 2045 } 2046 2047 void llvm::setProfMetadata(Module *M, Instruction *TI, 2048 ArrayRef<uint64_t> EdgeCounts, 2049 uint64_t MaxCount) { 2050 MDBuilder MDB(M->getContext()); 2051 assert(MaxCount > 0 && "Bad max count"); 2052 uint64_t Scale = calculateCountScale(MaxCount); 2053 SmallVector<unsigned, 4> Weights; 2054 for (const auto &ECI : EdgeCounts) 2055 Weights.push_back(scaleBranchCount(ECI, Scale)); 2056 2057 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 2058 : Weights) { 2059 dbgs() << W << " "; 2060 } dbgs() << "\n";); 2061 2062 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 2063 if (EmitBranchProbability) { 2064 std::string BrCondStr = getBranchCondString(TI); 2065 if (BrCondStr.empty()) 2066 return; 2067 2068 uint64_t WSum = 2069 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 2070 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 2071 uint64_t TotalCount = 2072 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 2073 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 2074 Scale = calculateCountScale(WSum); 2075 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 2076 scaleBranchCount(WSum, Scale)); 2077 std::string BranchProbStr; 2078 raw_string_ostream OS(BranchProbStr); 2079 OS << BP; 2080 OS << " (total count : " << TotalCount << ")"; 2081 OS.flush(); 2082 Function *F = TI->getParent()->getParent(); 2083 OptimizationRemarkEmitter ORE(F); 2084 ORE.emit([&]() { 2085 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 2086 << BrCondStr << " is true with probability : " << BranchProbStr; 2087 }); 2088 } 2089 } 2090 2091 namespace llvm { 2092 2093 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 2094 MDBuilder MDB(M->getContext()); 2095 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 2096 MDB.createIrrLoopHeaderWeight(Count)); 2097 } 2098 2099 template <> struct GraphTraits<PGOUseFunc *> { 2100 using NodeRef = const BasicBlock *; 2101 using ChildIteratorType = const_succ_iterator; 2102 using nodes_iterator = pointer_iterator<Function::const_iterator>; 2103 2104 static NodeRef getEntryNode(const PGOUseFunc *G) { 2105 return &G->getFunc().front(); 2106 } 2107 2108 static ChildIteratorType child_begin(const NodeRef N) { 2109 return succ_begin(N); 2110 } 2111 2112 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 2113 2114 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 2115 return nodes_iterator(G->getFunc().begin()); 2116 } 2117 2118 static nodes_iterator nodes_end(const PGOUseFunc *G) { 2119 return nodes_iterator(G->getFunc().end()); 2120 } 2121 }; 2122 2123 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 2124 explicit DOTGraphTraits(bool isSimple = false) 2125 : DefaultDOTGraphTraits(isSimple) {} 2126 2127 static std::string getGraphName(const PGOUseFunc *G) { 2128 return std::string(G->getFunc().getName()); 2129 } 2130 2131 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 2132 std::string Result; 2133 raw_string_ostream OS(Result); 2134 2135 OS << getSimpleNodeName(Node) << ":\\l"; 2136 UseBBInfo *BI = Graph->findBBInfo(Node); 2137 OS << "Count : "; 2138 if (BI && BI->CountValid) 2139 OS << BI->CountValue << "\\l"; 2140 else 2141 OS << "Unknown\\l"; 2142 2143 if (!PGOInstrSelect) 2144 return Result; 2145 2146 for (const Instruction &I : *Node) { 2147 if (!isa<SelectInst>(&I)) 2148 continue; 2149 // Display scaled counts for SELECT instruction: 2150 OS << "SELECT : { T = "; 2151 uint64_t TC, FC; 2152 bool HasProf = I.extractProfMetadata(TC, FC); 2153 if (!HasProf) 2154 OS << "Unknown, F = Unknown }\\l"; 2155 else 2156 OS << TC << ", F = " << FC << " }\\l"; 2157 } 2158 return Result; 2159 } 2160 }; 2161 2162 } // end namespace llvm 2163