1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "CFGMST.h" 52 #include "ValueProfileCollector.h" 53 #include "llvm/ADT/APInt.h" 54 #include "llvm/ADT/ArrayRef.h" 55 #include "llvm/ADT/MapVector.h" 56 #include "llvm/ADT/STLExtras.h" 57 #include "llvm/ADT/SmallVector.h" 58 #include "llvm/ADT/Statistic.h" 59 #include "llvm/ADT/StringRef.h" 60 #include "llvm/ADT/Triple.h" 61 #include "llvm/ADT/Twine.h" 62 #include "llvm/ADT/iterator.h" 63 #include "llvm/ADT/iterator_range.h" 64 #include "llvm/Analysis/BlockFrequencyInfo.h" 65 #include "llvm/Analysis/BranchProbabilityInfo.h" 66 #include "llvm/Analysis/CFG.h" 67 #include "llvm/Analysis/EHPersonalities.h" 68 #include "llvm/Analysis/LoopInfo.h" 69 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 70 #include "llvm/Analysis/ProfileSummaryInfo.h" 71 #include "llvm/IR/Attributes.h" 72 #include "llvm/IR/BasicBlock.h" 73 #include "llvm/IR/CFG.h" 74 #include "llvm/IR/Comdat.h" 75 #include "llvm/IR/Constant.h" 76 #include "llvm/IR/Constants.h" 77 #include "llvm/IR/DiagnosticInfo.h" 78 #include "llvm/IR/Dominators.h" 79 #include "llvm/IR/Function.h" 80 #include "llvm/IR/GlobalAlias.h" 81 #include "llvm/IR/GlobalValue.h" 82 #include "llvm/IR/GlobalVariable.h" 83 #include "llvm/IR/IRBuilder.h" 84 #include "llvm/IR/InstVisitor.h" 85 #include "llvm/IR/InstrTypes.h" 86 #include "llvm/IR/Instruction.h" 87 #include "llvm/IR/Instructions.h" 88 #include "llvm/IR/IntrinsicInst.h" 89 #include "llvm/IR/Intrinsics.h" 90 #include "llvm/IR/LLVMContext.h" 91 #include "llvm/IR/MDBuilder.h" 92 #include "llvm/IR/Module.h" 93 #include "llvm/IR/PassManager.h" 94 #include "llvm/IR/ProfileSummary.h" 95 #include "llvm/IR/Type.h" 96 #include "llvm/IR/Value.h" 97 #include "llvm/InitializePasses.h" 98 #include "llvm/Pass.h" 99 #include "llvm/ProfileData/InstrProf.h" 100 #include "llvm/ProfileData/InstrProfReader.h" 101 #include "llvm/Support/BranchProbability.h" 102 #include "llvm/Support/CRC.h" 103 #include "llvm/Support/Casting.h" 104 #include "llvm/Support/CommandLine.h" 105 #include "llvm/Support/DOTGraphTraits.h" 106 #include "llvm/Support/Debug.h" 107 #include "llvm/Support/Error.h" 108 #include "llvm/Support/ErrorHandling.h" 109 #include "llvm/Support/GraphWriter.h" 110 #include "llvm/Support/raw_ostream.h" 111 #include "llvm/Transforms/Instrumentation.h" 112 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 113 #include <algorithm> 114 #include <cassert> 115 #include <cstdint> 116 #include <memory> 117 #include <numeric> 118 #include <string> 119 #include <unordered_map> 120 #include <utility> 121 #include <vector> 122 123 using namespace llvm; 124 using ProfileCount = Function::ProfileCount; 125 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 126 127 #define DEBUG_TYPE "pgo-instrumentation" 128 129 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 130 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 131 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 132 STATISTIC(NumOfPGOEdge, "Number of edges."); 133 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 134 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 135 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 136 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 137 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 138 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 139 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 140 STATISTIC(NumOfCSPGOSelectInsts, 141 "Number of select instruction instrumented in CSPGO."); 142 STATISTIC(NumOfCSPGOMemIntrinsics, 143 "Number of mem intrinsics instrumented in CSPGO."); 144 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 145 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 146 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 147 STATISTIC(NumOfCSPGOFunc, 148 "Number of functions having valid profile counts in CSPGO."); 149 STATISTIC(NumOfCSPGOMismatch, 150 "Number of functions having mismatch profile in CSPGO."); 151 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 152 153 // Command line option to specify the file to read profile from. This is 154 // mainly used for testing. 155 static cl::opt<std::string> 156 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 157 cl::value_desc("filename"), 158 cl::desc("Specify the path of profile data file. This is" 159 "mainly for test purpose.")); 160 static cl::opt<std::string> PGOTestProfileRemappingFile( 161 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 162 cl::value_desc("filename"), 163 cl::desc("Specify the path of profile remapping file. This is mainly for " 164 "test purpose.")); 165 166 // Command line option to disable value profiling. The default is false: 167 // i.e. value profiling is enabled by default. This is for debug purpose. 168 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 169 cl::Hidden, 170 cl::desc("Disable Value Profiling")); 171 172 // Command line option to set the maximum number of VP annotations to write to 173 // the metadata for a single indirect call callsite. 174 static cl::opt<unsigned> MaxNumAnnotations( 175 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 176 cl::desc("Max number of annotations for a single indirect " 177 "call callsite")); 178 179 // Command line option to set the maximum number of value annotations 180 // to write to the metadata for a single memop intrinsic. 181 static cl::opt<unsigned> MaxNumMemOPAnnotations( 182 "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, 183 cl::desc("Max number of preicise value annotations for a single memop" 184 "intrinsic")); 185 186 // Command line option to control appending FunctionHash to the name of a COMDAT 187 // function. This is to avoid the hash mismatch caused by the preinliner. 188 static cl::opt<bool> DoComdatRenaming( 189 "do-comdat-renaming", cl::init(false), cl::Hidden, 190 cl::desc("Append function hash to the name of COMDAT function to avoid " 191 "function hash mismatch due to the preinliner")); 192 193 // Command line option to enable/disable the warning about missing profile 194 // information. 195 static cl::opt<bool> 196 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 197 cl::desc("Use this option to turn on/off " 198 "warnings about missing profile data for " 199 "functions.")); 200 201 namespace llvm { 202 // Command line option to enable/disable the warning about a hash mismatch in 203 // the profile data. 204 cl::opt<bool> 205 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 206 cl::desc("Use this option to turn off/on " 207 "warnings about profile cfg mismatch.")); 208 } // namespace llvm 209 210 // Command line option to enable/disable the warning about a hash mismatch in 211 // the profile data for Comdat functions, which often turns out to be false 212 // positive due to the pre-instrumentation inline. 213 static cl::opt<bool> 214 NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), 215 cl::Hidden, 216 cl::desc("The option is used to turn on/off " 217 "warnings about hash mismatch for comdat " 218 "functions.")); 219 220 // Command line option to enable/disable select instruction instrumentation. 221 static cl::opt<bool> 222 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 223 cl::desc("Use this option to turn on/off SELECT " 224 "instruction instrumentation. ")); 225 226 // Command line option to turn on CFG dot or text dump of raw profile counts 227 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 228 "pgo-view-raw-counts", cl::Hidden, 229 cl::desc("A boolean option to show CFG dag or text " 230 "with raw profile counts from " 231 "profile data. See also option " 232 "-pgo-view-counts. To limit graph " 233 "display to only one function, use " 234 "filtering option -view-bfi-func-name."), 235 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 236 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 237 clEnumValN(PGOVCT_Text, "text", "show in text."))); 238 239 // Command line option to enable/disable memop intrinsic call.size profiling. 240 static cl::opt<bool> 241 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 242 cl::desc("Use this option to turn on/off " 243 "memory intrinsic size profiling.")); 244 245 // Emit branch probability as optimization remarks. 246 static cl::opt<bool> 247 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 248 cl::desc("When this option is on, the annotated " 249 "branch probability will be emitted as " 250 "optimization remarks: -{Rpass|" 251 "pass-remarks}=pgo-instrumentation")); 252 253 static cl::opt<bool> PGOInstrumentEntry( 254 "pgo-instrument-entry", cl::init(false), cl::Hidden, 255 cl::desc("Force to instrument function entry basicblock.")); 256 257 static cl::opt<bool> 258 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, 259 cl::desc("Fix function entry count in profile use.")); 260 261 static cl::opt<bool> PGOVerifyHotBFI( 262 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden, 263 cl::desc("Print out the non-match BFI count if a hot raw profile count " 264 "becomes non-hot, or a cold raw profile count becomes hot. " 265 "The print is enabled under -Rpass-analysis=pgo, or " 266 "internal option -pass-remakrs-analysis=pgo.")); 267 268 static cl::opt<bool> PGOVerifyBFI( 269 "pgo-verify-bfi", cl::init(false), cl::Hidden, 270 cl::desc("Print out mismatched BFI counts after setting profile metadata " 271 "The print is enabled under -Rpass-analysis=pgo, or " 272 "internal option -pass-remakrs-analysis=pgo.")); 273 274 static cl::opt<unsigned> PGOVerifyBFIRatio( 275 "pgo-verify-bfi-ratio", cl::init(5), cl::Hidden, 276 cl::desc("Set the threshold for pgo-verify-big -- only print out " 277 "mismatched BFI if the difference percentage is greater than " 278 "this value (in percentage).")); 279 280 static cl::opt<unsigned> PGOVerifyBFICutoff( 281 "pgo-verify-bfi-cutoff", cl::init(1), cl::Hidden, 282 cl::desc("Set the threshold for pgo-verify-bfi -- skip the counts whose " 283 "profile count value is below.")); 284 285 namespace llvm { 286 // Command line option to turn on CFG dot dump after profile annotation. 287 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 288 extern cl::opt<PGOViewCountsType> PGOViewCounts; 289 290 // Command line option to specify the name of the function for CFG dump 291 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 292 extern cl::opt<std::string> ViewBlockFreqFuncName; 293 } // namespace llvm 294 295 static cl::opt<bool> 296 PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, 297 cl::desc("Use the old CFG function hashing")); 298 299 // Return a string describing the branch condition that can be 300 // used in static branch probability heuristics: 301 static std::string getBranchCondString(Instruction *TI) { 302 BranchInst *BI = dyn_cast<BranchInst>(TI); 303 if (!BI || !BI->isConditional()) 304 return std::string(); 305 306 Value *Cond = BI->getCondition(); 307 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 308 if (!CI) 309 return std::string(); 310 311 std::string result; 312 raw_string_ostream OS(result); 313 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 314 CI->getOperand(0)->getType()->print(OS, true); 315 316 Value *RHS = CI->getOperand(1); 317 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 318 if (CV) { 319 if (CV->isZero()) 320 OS << "_Zero"; 321 else if (CV->isOne()) 322 OS << "_One"; 323 else if (CV->isMinusOne()) 324 OS << "_MinusOne"; 325 else 326 OS << "_Const"; 327 } 328 OS.flush(); 329 return result; 330 } 331 332 static const char *ValueProfKindDescr[] = { 333 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 334 #include "llvm/ProfileData/InstrProfData.inc" 335 }; 336 337 namespace { 338 339 /// The select instruction visitor plays three roles specified 340 /// by the mode. In \c VM_counting mode, it simply counts the number of 341 /// select instructions. In \c VM_instrument mode, it inserts code to count 342 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 343 /// it reads the profile data and annotate the select instruction with metadata. 344 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 345 class PGOUseFunc; 346 347 /// Instruction Visitor class to visit select instructions. 348 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 349 Function &F; 350 unsigned NSIs = 0; // Number of select instructions instrumented. 351 VisitMode Mode = VM_counting; // Visiting mode. 352 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 353 unsigned TotalNumCtrs = 0; // Total number of counters 354 GlobalVariable *FuncNameVar = nullptr; 355 uint64_t FuncHash = 0; 356 PGOUseFunc *UseFunc = nullptr; 357 358 SelectInstVisitor(Function &Func) : F(Func) {} 359 360 void countSelects(Function &Func) { 361 NSIs = 0; 362 Mode = VM_counting; 363 visit(Func); 364 } 365 366 // Visit the IR stream and instrument all select instructions. \p 367 // Ind is a pointer to the counter index variable; \p TotalNC 368 // is the total number of counters; \p FNV is the pointer to the 369 // PGO function name var; \p FHash is the function hash. 370 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 371 GlobalVariable *FNV, uint64_t FHash) { 372 Mode = VM_instrument; 373 CurCtrIdx = Ind; 374 TotalNumCtrs = TotalNC; 375 FuncHash = FHash; 376 FuncNameVar = FNV; 377 visit(Func); 378 } 379 380 // Visit the IR stream and annotate all select instructions. 381 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 382 Mode = VM_annotate; 383 UseFunc = UF; 384 CurCtrIdx = Ind; 385 visit(Func); 386 } 387 388 void instrumentOneSelectInst(SelectInst &SI); 389 void annotateOneSelectInst(SelectInst &SI); 390 391 // Visit \p SI instruction and perform tasks according to visit mode. 392 void visitSelectInst(SelectInst &SI); 393 394 // Return the number of select instructions. This needs be called after 395 // countSelects(). 396 unsigned getNumOfSelectInsts() const { return NSIs; } 397 }; 398 399 400 class PGOInstrumentationGenLegacyPass : public ModulePass { 401 public: 402 static char ID; 403 404 PGOInstrumentationGenLegacyPass(bool IsCS = false) 405 : ModulePass(ID), IsCS(IsCS) { 406 initializePGOInstrumentationGenLegacyPassPass( 407 *PassRegistry::getPassRegistry()); 408 } 409 410 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } 411 412 private: 413 // Is this is context-sensitive instrumentation. 414 bool IsCS; 415 bool runOnModule(Module &M) override; 416 417 void getAnalysisUsage(AnalysisUsage &AU) const override { 418 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 419 AU.addRequired<TargetLibraryInfoWrapperPass>(); 420 } 421 }; 422 423 class PGOInstrumentationUseLegacyPass : public ModulePass { 424 public: 425 static char ID; 426 427 // Provide the profile filename as the parameter. 428 PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) 429 : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { 430 if (!PGOTestProfileFile.empty()) 431 ProfileFileName = PGOTestProfileFile; 432 initializePGOInstrumentationUseLegacyPassPass( 433 *PassRegistry::getPassRegistry()); 434 } 435 436 StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } 437 438 private: 439 std::string ProfileFileName; 440 // Is this is context-sensitive instrumentation use. 441 bool IsCS; 442 443 bool runOnModule(Module &M) override; 444 445 void getAnalysisUsage(AnalysisUsage &AU) const override { 446 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 447 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 448 AU.addRequired<TargetLibraryInfoWrapperPass>(); 449 } 450 }; 451 452 class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { 453 public: 454 static char ID; 455 StringRef getPassName() const override { 456 return "PGOInstrumentationGenCreateVarPass"; 457 } 458 PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") 459 : ModulePass(ID), InstrProfileOutput(CSInstrName) { 460 initializePGOInstrumentationGenCreateVarLegacyPassPass( 461 *PassRegistry::getPassRegistry()); 462 } 463 464 private: 465 bool runOnModule(Module &M) override { 466 createProfileFileNameVar(M, InstrProfileOutput); 467 createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry); 468 return false; 469 } 470 std::string InstrProfileOutput; 471 }; 472 473 } // end anonymous namespace 474 475 char PGOInstrumentationGenLegacyPass::ID = 0; 476 477 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 478 "PGO instrumentation.", false, false) 479 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 480 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 481 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 482 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 483 "PGO instrumentation.", false, false) 484 485 ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { 486 return new PGOInstrumentationGenLegacyPass(IsCS); 487 } 488 489 char PGOInstrumentationUseLegacyPass::ID = 0; 490 491 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 492 "Read PGO instrumentation profile.", false, false) 493 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 494 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 495 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 496 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 497 "Read PGO instrumentation profile.", false, false) 498 499 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, 500 bool IsCS) { 501 return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); 502 } 503 504 char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; 505 506 INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, 507 "pgo-instr-gen-create-var", 508 "Create PGO instrumentation version variable for CSPGO.", false, 509 false) 510 511 ModulePass * 512 llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { 513 return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName)); 514 } 515 516 namespace { 517 518 /// An MST based instrumentation for PGO 519 /// 520 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 521 /// in the function level. 522 struct PGOEdge { 523 // This class implements the CFG edges. Note the CFG can be a multi-graph. 524 // So there might be multiple edges with same SrcBB and DestBB. 525 const BasicBlock *SrcBB; 526 const BasicBlock *DestBB; 527 uint64_t Weight; 528 bool InMST = false; 529 bool Removed = false; 530 bool IsCritical = false; 531 532 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 533 : SrcBB(Src), DestBB(Dest), Weight(W) {} 534 535 // Return the information string of an edge. 536 std::string infoString() const { 537 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 538 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 539 } 540 }; 541 542 // This class stores the auxiliary information for each BB. 543 struct BBInfo { 544 BBInfo *Group; 545 uint32_t Index; 546 uint32_t Rank = 0; 547 548 BBInfo(unsigned IX) : Group(this), Index(IX) {} 549 550 // Return the information string of this object. 551 std::string infoString() const { 552 return (Twine("Index=") + Twine(Index)).str(); 553 } 554 555 // Empty function -- only applicable to UseBBInfo. 556 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 557 558 // Empty function -- only applicable to UseBBInfo. 559 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 560 }; 561 562 // This class implements the CFG edges. Note the CFG can be a multi-graph. 563 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 564 private: 565 Function &F; 566 567 // Is this is context-sensitive instrumentation. 568 bool IsCS; 569 570 // A map that stores the Comdat group in function F. 571 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 572 573 ValueProfileCollector VPC; 574 575 void computeCFGHash(); 576 void renameComdatFunction(); 577 578 public: 579 std::vector<std::vector<VPCandidateInfo>> ValueSites; 580 SelectInstVisitor SIVisitor; 581 std::string FuncName; 582 GlobalVariable *FuncNameVar; 583 584 // CFG hash value for this function. 585 uint64_t FunctionHash = 0; 586 587 // The Minimum Spanning Tree of function CFG. 588 CFGMST<Edge, BBInfo> MST; 589 590 // Collect all the BBs that will be instrumented, and store them in 591 // InstrumentBBs. 592 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 593 594 // Give an edge, find the BB that will be instrumented. 595 // Return nullptr if there is no BB to be instrumented. 596 BasicBlock *getInstrBB(Edge *E); 597 598 // Return the auxiliary BB information. 599 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 600 601 // Return the auxiliary BB information if available. 602 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 603 604 // Dump edges and BB information. 605 void dumpInfo(std::string Str = "") const { 606 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 607 Twine(FunctionHash) + "\t" + Str); 608 } 609 610 FuncPGOInstrumentation( 611 Function &Func, TargetLibraryInfo &TLI, 612 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 613 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 614 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, 615 bool InstrumentFuncEntry = true) 616 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 617 ValueSites(IPVK_Last + 1), SIVisitor(Func), 618 MST(F, InstrumentFuncEntry, BPI, BFI) { 619 // This should be done before CFG hash computation. 620 SIVisitor.countSelects(Func); 621 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 622 if (!IsCS) { 623 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 624 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 625 NumOfPGOBB += MST.BBInfos.size(); 626 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 627 } else { 628 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 629 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 630 NumOfCSPGOBB += MST.BBInfos.size(); 631 } 632 633 FuncName = getPGOFuncName(F); 634 computeCFGHash(); 635 if (!ComdatMembers.empty()) 636 renameComdatFunction(); 637 LLVM_DEBUG(dumpInfo("after CFGMST")); 638 639 for (auto &E : MST.AllEdges) { 640 if (E->Removed) 641 continue; 642 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 643 if (!E->InMST) 644 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 645 } 646 647 if (CreateGlobalVar) 648 FuncNameVar = createPGOFuncNameVar(F, FuncName); 649 } 650 }; 651 652 } // end anonymous namespace 653 654 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 655 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers 656 // of selects, indirect calls, mem ops and edges. 657 template <class Edge, class BBInfo> 658 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 659 std::vector<uint8_t> Indexes; 660 JamCRC JC; 661 for (auto &BB : F) { 662 const Instruction *TI = BB.getTerminator(); 663 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 664 BasicBlock *Succ = TI->getSuccessor(I); 665 auto BI = findBBInfo(Succ); 666 if (BI == nullptr) 667 continue; 668 uint32_t Index = BI->Index; 669 for (int J = 0; J < 4; J++) 670 Indexes.push_back((uint8_t)(Index >> (J * 8))); 671 } 672 } 673 JC.update(Indexes); 674 675 JamCRC JCH; 676 if (PGOOldCFGHashing) { 677 // Hash format for context sensitive profile. Reserve 4 bits for other 678 // information. 679 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 680 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 681 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 682 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 683 } else { 684 // The higher 32 bits. 685 auto updateJCH = [&JCH](uint64_t Num) { 686 uint8_t Data[8]; 687 support::endian::write64le(Data, Num); 688 JCH.update(Data); 689 }; 690 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); 691 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); 692 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); 693 updateJCH((uint64_t)MST.AllEdges.size()); 694 695 // Hash format for context sensitive profile. Reserve 4 bits for other 696 // information. 697 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); 698 } 699 700 // Reserve bit 60-63 for other information purpose. 701 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 702 if (IsCS) 703 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 704 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 705 << " CRC = " << JC.getCRC() 706 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 707 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 708 << ValueSites[IPVK_IndirectCallTarget].size()); 709 if (!PGOOldCFGHashing) { 710 LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() 711 << ", High32 CRC = " << JCH.getCRC()); 712 } 713 LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); 714 } 715 716 // Check if we can safely rename this Comdat function. 717 static bool canRenameComdat( 718 Function &F, 719 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 720 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 721 return false; 722 723 // FIXME: Current only handle those Comdat groups that only containing one 724 // function. 725 // (1) For a Comdat group containing multiple functions, we need to have a 726 // unique postfix based on the hashes for each function. There is a 727 // non-trivial code refactoring to do this efficiently. 728 // (2) Variables can not be renamed, so we can not rename Comdat function in a 729 // group including global vars. 730 Comdat *C = F.getComdat(); 731 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 732 assert(!isa<GlobalAlias>(CM.second)); 733 Function *FM = dyn_cast<Function>(CM.second); 734 if (FM != &F) 735 return false; 736 } 737 return true; 738 } 739 740 // Append the CFGHash to the Comdat function name. 741 template <class Edge, class BBInfo> 742 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 743 if (!canRenameComdat(F, ComdatMembers)) 744 return; 745 std::string OrigName = F.getName().str(); 746 std::string NewFuncName = 747 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 748 F.setName(Twine(NewFuncName)); 749 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 750 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 751 Comdat *NewComdat; 752 Module *M = F.getParent(); 753 // For AvailableExternallyLinkage functions, change the linkage to 754 // LinkOnceODR and put them into comdat. This is because after renaming, there 755 // is no backup external copy available for the function. 756 if (!F.hasComdat()) { 757 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 758 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 759 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 760 F.setComdat(NewComdat); 761 return; 762 } 763 764 // This function belongs to a single function Comdat group. 765 Comdat *OrigComdat = F.getComdat(); 766 std::string NewComdatName = 767 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 768 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 769 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 770 771 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 772 // Must be a function. 773 cast<Function>(CM.second)->setComdat(NewComdat); 774 } 775 } 776 777 // Collect all the BBs that will be instruments and return them in 778 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. 779 template <class Edge, class BBInfo> 780 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 781 std::vector<BasicBlock *> &InstrumentBBs) { 782 // Use a worklist as we will update the vector during the iteration. 783 std::vector<Edge *> EdgeList; 784 EdgeList.reserve(MST.AllEdges.size()); 785 for (auto &E : MST.AllEdges) 786 EdgeList.push_back(E.get()); 787 788 for (auto &E : EdgeList) { 789 BasicBlock *InstrBB = getInstrBB(E); 790 if (InstrBB) 791 InstrumentBBs.push_back(InstrBB); 792 } 793 794 // Set up InEdges/OutEdges for all BBs. 795 for (auto &E : MST.AllEdges) { 796 if (E->Removed) 797 continue; 798 const BasicBlock *SrcBB = E->SrcBB; 799 const BasicBlock *DestBB = E->DestBB; 800 BBInfo &SrcInfo = getBBInfo(SrcBB); 801 BBInfo &DestInfo = getBBInfo(DestBB); 802 SrcInfo.addOutEdge(E.get()); 803 DestInfo.addInEdge(E.get()); 804 } 805 } 806 807 // Given a CFG E to be instrumented, find which BB to place the instrumented 808 // code. The function will split the critical edge if necessary. 809 template <class Edge, class BBInfo> 810 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 811 if (E->InMST || E->Removed) 812 return nullptr; 813 814 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 815 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 816 // For a fake edge, instrument the real BB. 817 if (SrcBB == nullptr) 818 return DestBB; 819 if (DestBB == nullptr) 820 return SrcBB; 821 822 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 823 // There are basic blocks (such as catchswitch) cannot be instrumented. 824 // If the returned first insertion point is the end of BB, skip this BB. 825 if (BB->getFirstInsertionPt() == BB->end()) 826 return nullptr; 827 return BB; 828 }; 829 830 // Instrument the SrcBB if it has a single successor, 831 // otherwise, the DestBB if this is not a critical edge. 832 Instruction *TI = SrcBB->getTerminator(); 833 if (TI->getNumSuccessors() <= 1) 834 return canInstrument(SrcBB); 835 if (!E->IsCritical) 836 return canInstrument(DestBB); 837 838 // Some IndirectBr critical edges cannot be split by the previous 839 // SplitIndirectBrCriticalEdges call. Bail out. 840 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 841 BasicBlock *InstrBB = 842 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); 843 if (!InstrBB) { 844 LLVM_DEBUG( 845 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 846 return nullptr; 847 } 848 // For a critical edge, we have to split. Instrument the newly 849 // created BB. 850 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 851 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 852 << " --> " << getBBInfo(DestBB).Index << "\n"); 853 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 854 MST.addEdge(SrcBB, InstrBB, 0); 855 // Second one: Add new edge of InstrBB->DestBB. 856 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 857 NewEdge1.InMST = true; 858 E->Removed = true; 859 860 return canInstrument(InstrBB); 861 } 862 863 // When generating value profiling calls on Windows routines that make use of 864 // handler funclets for exception processing an operand bundle needs to attached 865 // to the called function. This routine will set \p OpBundles to contain the 866 // funclet information, if any is needed, that should be placed on the generated 867 // value profiling call for the value profile candidate call. 868 static void 869 populateEHOperandBundle(VPCandidateInfo &Cand, 870 DenseMap<BasicBlock *, ColorVector> &BlockColors, 871 SmallVectorImpl<OperandBundleDef> &OpBundles) { 872 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 873 if (OrigCall && !isa<IntrinsicInst>(OrigCall)) { 874 // The instrumentation call should belong to the same funclet as a 875 // non-intrinsic call, so just copy the operand bundle, if any exists. 876 Optional<OperandBundleUse> ParentFunclet = 877 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 878 if (ParentFunclet) 879 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 880 } else { 881 // Intrinsics or other instructions do not get funclet information from the 882 // front-end. Need to use the BlockColors that was computed by the routine 883 // colorEHFunclets to determine whether a funclet is needed. 884 if (!BlockColors.empty()) { 885 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 886 assert(CV.size() == 1 && "non-unique color for block!"); 887 Instruction *EHPad = CV.front()->getFirstNonPHI(); 888 if (EHPad->isEHPad()) 889 OpBundles.emplace_back("funclet", EHPad); 890 } 891 } 892 } 893 894 // Visit all edge and instrument the edges not in MST, and do value profiling. 895 // Critical edges will be split. 896 static void instrumentOneFunc( 897 Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, 898 BlockFrequencyInfo *BFI, 899 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 900 bool IsCS) { 901 // Split indirectbr critical edges here before computing the MST rather than 902 // later in getInstrBB() to avoid invalidating it. 903 SplitIndirectBrCriticalEdges(F, BPI, BFI); 904 905 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo( 906 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry); 907 std::vector<BasicBlock *> InstrumentBBs; 908 FuncInfo.getInstrumentBBs(InstrumentBBs); 909 unsigned NumCounters = 910 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 911 912 uint32_t I = 0; 913 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 914 for (auto *InstrBB : InstrumentBBs) { 915 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 916 assert(Builder.GetInsertPoint() != InstrBB->end() && 917 "Cannot get the Instrumentation point"); 918 Builder.CreateCall( 919 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 920 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 921 Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), 922 Builder.getInt32(I++)}); 923 } 924 925 // Now instrument select instructions: 926 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 927 FuncInfo.FunctionHash); 928 assert(I == NumCounters); 929 930 if (DisableValueProfiling) 931 return; 932 933 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 934 935 // Intrinsic function calls do not have funclet operand bundles needed for 936 // Windows exception handling attached to them. However, if value profiling is 937 // inserted for one of these calls, then a funclet value will need to be set 938 // on the instrumentation call based on the funclet coloring. 939 DenseMap<BasicBlock *, ColorVector> BlockColors; 940 if (F.hasPersonalityFn() && 941 isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 942 BlockColors = colorEHFunclets(F); 943 944 // For each VP Kind, walk the VP candidates and instrument each one. 945 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 946 unsigned SiteIndex = 0; 947 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 948 continue; 949 950 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 951 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 952 << " site: CallSite Index = " << SiteIndex << "\n"); 953 954 IRBuilder<> Builder(Cand.InsertPt); 955 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 956 "Cannot get the Instrumentation point"); 957 958 Value *ToProfile = nullptr; 959 if (Cand.V->getType()->isIntegerTy()) 960 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 961 else if (Cand.V->getType()->isPointerTy()) 962 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 963 assert(ToProfile && "value profiling Value is of unexpected type"); 964 965 SmallVector<OperandBundleDef, 1> OpBundles; 966 populateEHOperandBundle(Cand, BlockColors, OpBundles); 967 Builder.CreateCall( 968 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 969 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 970 Builder.getInt64(FuncInfo.FunctionHash), ToProfile, 971 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 972 OpBundles); 973 } 974 } // IPVK_First <= Kind <= IPVK_Last 975 } 976 977 namespace { 978 979 // This class represents a CFG edge in profile use compilation. 980 struct PGOUseEdge : public PGOEdge { 981 bool CountValid = false; 982 uint64_t CountValue = 0; 983 984 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 985 : PGOEdge(Src, Dest, W) {} 986 987 // Set edge count value 988 void setEdgeCount(uint64_t Value) { 989 CountValue = Value; 990 CountValid = true; 991 } 992 993 // Return the information string for this object. 994 std::string infoString() const { 995 if (!CountValid) 996 return PGOEdge::infoString(); 997 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 998 .str(); 999 } 1000 }; 1001 1002 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 1003 1004 // This class stores the auxiliary information for each BB. 1005 struct UseBBInfo : public BBInfo { 1006 uint64_t CountValue = 0; 1007 bool CountValid; 1008 int32_t UnknownCountInEdge = 0; 1009 int32_t UnknownCountOutEdge = 0; 1010 DirectEdges InEdges; 1011 DirectEdges OutEdges; 1012 1013 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 1014 1015 UseBBInfo(unsigned IX, uint64_t C) 1016 : BBInfo(IX), CountValue(C), CountValid(true) {} 1017 1018 // Set the profile count value for this BB. 1019 void setBBInfoCount(uint64_t Value) { 1020 CountValue = Value; 1021 CountValid = true; 1022 } 1023 1024 // Return the information string of this object. 1025 std::string infoString() const { 1026 if (!CountValid) 1027 return BBInfo::infoString(); 1028 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 1029 } 1030 1031 // Add an OutEdge and update the edge count. 1032 void addOutEdge(PGOUseEdge *E) { 1033 OutEdges.push_back(E); 1034 UnknownCountOutEdge++; 1035 } 1036 1037 // Add an InEdge and update the edge count. 1038 void addInEdge(PGOUseEdge *E) { 1039 InEdges.push_back(E); 1040 UnknownCountInEdge++; 1041 } 1042 }; 1043 1044 } // end anonymous namespace 1045 1046 // Sum up the count values for all the edges. 1047 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 1048 uint64_t Total = 0; 1049 for (auto &E : Edges) { 1050 if (E->Removed) 1051 continue; 1052 Total += E->CountValue; 1053 } 1054 return Total; 1055 } 1056 1057 namespace { 1058 1059 class PGOUseFunc { 1060 public: 1061 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 1062 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1063 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1064 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry) 1065 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1066 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, 1067 InstrumentFuncEntry), 1068 FreqAttr(FFA_Normal), IsCS(IsCS) {} 1069 1070 // Read counts for the instrumented BB from profile. 1071 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1072 bool &AllMinusOnes); 1073 1074 // Populate the counts for all BBs. 1075 void populateCounters(); 1076 1077 // Set the branch weights based on the count values. 1078 void setBranchWeights(); 1079 1080 // Annotate the value profile call sites for all value kind. 1081 void annotateValueSites(); 1082 1083 // Annotate the value profile call sites for one value kind. 1084 void annotateValueSites(uint32_t Kind); 1085 1086 // Annotate the irreducible loop header weights. 1087 void annotateIrrLoopHeaderWeights(); 1088 1089 // The hotness of the function from the profile count. 1090 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1091 1092 // Return the function hotness from the profile. 1093 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1094 1095 // Return the function hash. 1096 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1097 1098 // Return the profile record for this function; 1099 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1100 1101 // Return the auxiliary BB information. 1102 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 1103 return FuncInfo.getBBInfo(BB); 1104 } 1105 1106 // Return the auxiliary BB information if available. 1107 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 1108 return FuncInfo.findBBInfo(BB); 1109 } 1110 1111 Function &getFunc() const { return F; } 1112 1113 void dumpInfo(std::string Str = "") const { 1114 FuncInfo.dumpInfo(Str); 1115 } 1116 1117 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1118 private: 1119 Function &F; 1120 Module *M; 1121 BlockFrequencyInfo *BFI; 1122 ProfileSummaryInfo *PSI; 1123 1124 // This member stores the shared information with class PGOGenFunc. 1125 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 1126 1127 // The maximum count value in the profile. This is only used in PGO use 1128 // compilation. 1129 uint64_t ProgramMaxCount; 1130 1131 // Position of counter that remains to be read. 1132 uint32_t CountPosition = 0; 1133 1134 // Total size of the profile count for this function. 1135 uint32_t ProfileCountSize = 0; 1136 1137 // ProfileRecord for this function. 1138 InstrProfRecord ProfileRecord; 1139 1140 // Function hotness info derived from profile. 1141 FuncFreqAttr FreqAttr; 1142 1143 // Is to use the context sensitive profile. 1144 bool IsCS; 1145 1146 // Find the Instrumented BB and set the value. Return false on error. 1147 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1148 1149 // Set the edge counter value for the unknown edge -- there should be only 1150 // one unknown edge. 1151 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1152 1153 // Return FuncName string; 1154 std::string getFuncName() const { return FuncInfo.FuncName; } 1155 1156 // Set the hot/cold inline hints based on the count values. 1157 // FIXME: This function should be removed once the functionality in 1158 // the inliner is implemented. 1159 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1160 if (PSI->isHotCount(EntryCount)) 1161 FreqAttr = FFA_Hot; 1162 else if (PSI->isColdCount(MaxCount)) 1163 FreqAttr = FFA_Cold; 1164 } 1165 }; 1166 1167 } // end anonymous namespace 1168 1169 // Visit all the edges and assign the count value for the instrumented 1170 // edges and the BB. Return false on error. 1171 bool PGOUseFunc::setInstrumentedCounts( 1172 const std::vector<uint64_t> &CountFromProfile) { 1173 1174 std::vector<BasicBlock *> InstrumentBBs; 1175 FuncInfo.getInstrumentBBs(InstrumentBBs); 1176 unsigned NumCounters = 1177 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1178 // The number of counters here should match the number of counters 1179 // in profile. Return if they mismatch. 1180 if (NumCounters != CountFromProfile.size()) { 1181 return false; 1182 } 1183 auto *FuncEntry = &*F.begin(); 1184 1185 // Set the profile count to the Instrumented BBs. 1186 uint32_t I = 0; 1187 for (BasicBlock *InstrBB : InstrumentBBs) { 1188 uint64_t CountValue = CountFromProfile[I++]; 1189 UseBBInfo &Info = getBBInfo(InstrBB); 1190 // If we reach here, we know that we have some nonzero count 1191 // values in this function. The entry count should not be 0. 1192 // Fix it if necessary. 1193 if (InstrBB == FuncEntry && CountValue == 0) 1194 CountValue = 1; 1195 Info.setBBInfoCount(CountValue); 1196 } 1197 ProfileCountSize = CountFromProfile.size(); 1198 CountPosition = I; 1199 1200 // Set the edge count and update the count of unknown edges for BBs. 1201 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1202 E->setEdgeCount(Value); 1203 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1204 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1205 }; 1206 1207 // Set the profile count the Instrumented edges. There are BBs that not in 1208 // MST but not instrumented. Need to set the edge count value so that we can 1209 // populate the profile counts later. 1210 for (auto &E : FuncInfo.MST.AllEdges) { 1211 if (E->Removed || E->InMST) 1212 continue; 1213 const BasicBlock *SrcBB = E->SrcBB; 1214 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 1215 1216 // If only one out-edge, the edge profile count should be the same as BB 1217 // profile count. 1218 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1219 setEdgeCount(E.get(), SrcInfo.CountValue); 1220 else { 1221 const BasicBlock *DestBB = E->DestBB; 1222 UseBBInfo &DestInfo = getBBInfo(DestBB); 1223 // If only one in-edge, the edge profile count should be the same as BB 1224 // profile count. 1225 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1226 setEdgeCount(E.get(), DestInfo.CountValue); 1227 } 1228 if (E->CountValid) 1229 continue; 1230 // E's count should have been set from profile. If not, this meenas E skips 1231 // the instrumentation. We set the count to 0. 1232 setEdgeCount(E.get(), 0); 1233 } 1234 return true; 1235 } 1236 1237 // Set the count value for the unknown edge. There should be one and only one 1238 // unknown edge in Edges vector. 1239 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1240 for (auto &E : Edges) { 1241 if (E->CountValid) 1242 continue; 1243 E->setEdgeCount(Value); 1244 1245 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1246 getBBInfo(E->DestBB).UnknownCountInEdge--; 1247 return; 1248 } 1249 llvm_unreachable("Cannot find the unknown count edge"); 1250 } 1251 1252 // Emit function metadata indicating PGO profile mismatch. 1253 static void annotateFunctionWithHashMismatch(Function &F, 1254 LLVMContext &ctx) { 1255 const char MetadataName[] = "instr_prof_hash_mismatch"; 1256 SmallVector<Metadata *, 2> Names; 1257 // If this metadata already exists, ignore. 1258 auto *Existing = F.getMetadata(LLVMContext::MD_annotation); 1259 if (Existing) { 1260 MDTuple *Tuple = cast<MDTuple>(Existing); 1261 for (auto &N : Tuple->operands()) { 1262 if (cast<MDString>(N.get())->getString() == MetadataName) 1263 return; 1264 Names.push_back(N.get()); 1265 } 1266 } 1267 1268 MDBuilder MDB(ctx); 1269 Names.push_back(MDB.createString(MetadataName)); 1270 MDNode *MD = MDTuple::get(ctx, Names); 1271 F.setMetadata(LLVMContext::MD_annotation, MD); 1272 } 1273 1274 // Read the profile from ProfileFileName and assign the value to the 1275 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1276 // Return true if the profile are successfully read, and false on errors. 1277 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1278 bool &AllMinusOnes) { 1279 auto &Ctx = M->getContext(); 1280 Expected<InstrProfRecord> Result = 1281 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 1282 if (Error E = Result.takeError()) { 1283 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1284 auto Err = IPE.get(); 1285 bool SkipWarning = false; 1286 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1287 << FuncInfo.FuncName << ": "); 1288 if (Err == instrprof_error::unknown_function) { 1289 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1290 SkipWarning = !PGOWarnMissing; 1291 LLVM_DEBUG(dbgs() << "unknown function"); 1292 } else if (Err == instrprof_error::hash_mismatch || 1293 Err == instrprof_error::malformed) { 1294 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1295 SkipWarning = 1296 NoPGOWarnMismatch || 1297 (NoPGOWarnMismatchComdat && 1298 (F.hasComdat() || 1299 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1300 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 1301 // Emit function metadata indicating PGO profile mismatch. 1302 annotateFunctionWithHashMismatch(F, M->getContext()); 1303 } 1304 1305 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1306 if (SkipWarning) 1307 return; 1308 1309 std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + 1310 std::string(" Hash = ") + 1311 std::to_string(FuncInfo.FunctionHash); 1312 1313 Ctx.diagnose( 1314 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1315 }); 1316 return false; 1317 } 1318 ProfileRecord = std::move(Result.get()); 1319 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1320 1321 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1322 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1323 AllMinusOnes = (CountFromProfile.size() > 0); 1324 uint64_t ValueSum = 0; 1325 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1326 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1327 ValueSum += CountFromProfile[I]; 1328 if (CountFromProfile[I] != (uint64_t)-1) 1329 AllMinusOnes = false; 1330 } 1331 AllZeros = (ValueSum == 0); 1332 1333 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1334 1335 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1336 getBBInfo(nullptr).UnknownCountInEdge = 2; 1337 1338 if (!setInstrumentedCounts(CountFromProfile)) { 1339 LLVM_DEBUG( 1340 dbgs() << "Inconsistent number of counts, skipping this function"); 1341 Ctx.diagnose(DiagnosticInfoPGOProfile( 1342 M->getName().data(), 1343 Twine("Inconsistent number of counts in ") + F.getName().str() 1344 + Twine(": the profile may be stale or there is a function name collision."), 1345 DS_Warning)); 1346 return false; 1347 } 1348 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1349 return true; 1350 } 1351 1352 // Populate the counters from instrumented BBs to all BBs. 1353 // In the end of this operation, all BBs should have a valid count value. 1354 void PGOUseFunc::populateCounters() { 1355 bool Changes = true; 1356 unsigned NumPasses = 0; 1357 while (Changes) { 1358 NumPasses++; 1359 Changes = false; 1360 1361 // For efficient traversal, it's better to start from the end as most 1362 // of the instrumented edges are at the end. 1363 for (auto &BB : reverse(F)) { 1364 UseBBInfo *Count = findBBInfo(&BB); 1365 if (Count == nullptr) 1366 continue; 1367 if (!Count->CountValid) { 1368 if (Count->UnknownCountOutEdge == 0) { 1369 Count->CountValue = sumEdgeCount(Count->OutEdges); 1370 Count->CountValid = true; 1371 Changes = true; 1372 } else if (Count->UnknownCountInEdge == 0) { 1373 Count->CountValue = sumEdgeCount(Count->InEdges); 1374 Count->CountValid = true; 1375 Changes = true; 1376 } 1377 } 1378 if (Count->CountValid) { 1379 if (Count->UnknownCountOutEdge == 1) { 1380 uint64_t Total = 0; 1381 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1382 // If the one of the successor block can early terminate (no-return), 1383 // we can end up with situation where out edge sum count is larger as 1384 // the source BB's count is collected by a post-dominated block. 1385 if (Count->CountValue > OutSum) 1386 Total = Count->CountValue - OutSum; 1387 setEdgeCount(Count->OutEdges, Total); 1388 Changes = true; 1389 } 1390 if (Count->UnknownCountInEdge == 1) { 1391 uint64_t Total = 0; 1392 uint64_t InSum = sumEdgeCount(Count->InEdges); 1393 if (Count->CountValue > InSum) 1394 Total = Count->CountValue - InSum; 1395 setEdgeCount(Count->InEdges, Total); 1396 Changes = true; 1397 } 1398 } 1399 } 1400 } 1401 1402 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1403 #ifndef NDEBUG 1404 // Assert every BB has a valid counter. 1405 for (auto &BB : F) { 1406 auto BI = findBBInfo(&BB); 1407 if (BI == nullptr) 1408 continue; 1409 assert(BI->CountValid && "BB count is not valid"); 1410 } 1411 #endif 1412 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1413 uint64_t FuncMaxCount = FuncEntryCount; 1414 for (auto &BB : F) { 1415 auto BI = findBBInfo(&BB); 1416 if (BI == nullptr) 1417 continue; 1418 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1419 } 1420 1421 // Fix the obviously inconsistent entry count. 1422 if (FuncMaxCount > 0 && FuncEntryCount == 0) 1423 FuncEntryCount = 1; 1424 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1425 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1426 1427 // Now annotate select instructions 1428 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1429 assert(CountPosition == ProfileCountSize); 1430 1431 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1432 } 1433 1434 // Assign the scaled count values to the BB with multiple out edges. 1435 void PGOUseFunc::setBranchWeights() { 1436 // Generate MD_prof metadata for every branch instruction. 1437 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1438 << " IsCS=" << IsCS << "\n"); 1439 for (auto &BB : F) { 1440 Instruction *TI = BB.getTerminator(); 1441 if (TI->getNumSuccessors() < 2) 1442 continue; 1443 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1444 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI))) 1445 continue; 1446 1447 if (getBBInfo(&BB).CountValue == 0) 1448 continue; 1449 1450 // We have a non-zero Branch BB. 1451 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1452 unsigned Size = BBCountInfo.OutEdges.size(); 1453 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1454 uint64_t MaxCount = 0; 1455 for (unsigned s = 0; s < Size; s++) { 1456 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1457 const BasicBlock *SrcBB = E->SrcBB; 1458 const BasicBlock *DestBB = E->DestBB; 1459 if (DestBB == nullptr) 1460 continue; 1461 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1462 uint64_t EdgeCount = E->CountValue; 1463 if (EdgeCount > MaxCount) 1464 MaxCount = EdgeCount; 1465 EdgeCounts[SuccNum] = EdgeCount; 1466 } 1467 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1468 } 1469 } 1470 1471 static bool isIndirectBrTarget(BasicBlock *BB) { 1472 for (BasicBlock *Pred : predecessors(BB)) { 1473 if (isa<IndirectBrInst>(Pred->getTerminator())) 1474 return true; 1475 } 1476 return false; 1477 } 1478 1479 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1480 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1481 // Find irr loop headers 1482 for (auto &BB : F) { 1483 // As a heuristic also annotate indrectbr targets as they have a high chance 1484 // to become an irreducible loop header after the indirectbr tail 1485 // duplication. 1486 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1487 Instruction *TI = BB.getTerminator(); 1488 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1489 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1490 } 1491 } 1492 } 1493 1494 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1495 Module *M = F.getParent(); 1496 IRBuilder<> Builder(&SI); 1497 Type *Int64Ty = Builder.getInt64Ty(); 1498 Type *I8PtrTy = Builder.getInt8PtrTy(); 1499 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1500 Builder.CreateCall( 1501 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1502 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1503 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1504 Builder.getInt32(*CurCtrIdx), Step}); 1505 ++(*CurCtrIdx); 1506 } 1507 1508 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1509 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1510 assert(*CurCtrIdx < CountFromProfile.size() && 1511 "Out of bound access of counters"); 1512 uint64_t SCounts[2]; 1513 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1514 ++(*CurCtrIdx); 1515 uint64_t TotalCount = 0; 1516 auto BI = UseFunc->findBBInfo(SI.getParent()); 1517 if (BI != nullptr) 1518 TotalCount = BI->CountValue; 1519 // False Count 1520 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1521 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1522 if (MaxCount) 1523 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1524 } 1525 1526 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1527 if (!PGOInstrSelect) 1528 return; 1529 // FIXME: do not handle this yet. 1530 if (SI.getCondition()->getType()->isVectorTy()) 1531 return; 1532 1533 switch (Mode) { 1534 case VM_counting: 1535 NSIs++; 1536 return; 1537 case VM_instrument: 1538 instrumentOneSelectInst(SI); 1539 return; 1540 case VM_annotate: 1541 annotateOneSelectInst(SI); 1542 return; 1543 } 1544 1545 llvm_unreachable("Unknown visiting mode"); 1546 } 1547 1548 // Traverse all valuesites and annotate the instructions for all value kind. 1549 void PGOUseFunc::annotateValueSites() { 1550 if (DisableValueProfiling) 1551 return; 1552 1553 // Create the PGOFuncName meta data. 1554 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1555 1556 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1557 annotateValueSites(Kind); 1558 } 1559 1560 // Annotate the instructions for a specific value kind. 1561 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1562 assert(Kind <= IPVK_Last); 1563 unsigned ValueSiteIndex = 0; 1564 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1565 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1566 if (NumValueSites != ValueSites.size()) { 1567 auto &Ctx = M->getContext(); 1568 Ctx.diagnose(DiagnosticInfoPGOProfile( 1569 M->getName().data(), 1570 Twine("Inconsistent number of value sites for ") + 1571 Twine(ValueProfKindDescr[Kind]) + 1572 Twine(" profiling in \"") + F.getName().str() + 1573 Twine("\", possibly due to the use of a stale profile."), 1574 DS_Warning)); 1575 return; 1576 } 1577 1578 for (VPCandidateInfo &I : ValueSites) { 1579 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1580 << "): Index = " << ValueSiteIndex << " out of " 1581 << NumValueSites << "\n"); 1582 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1583 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1584 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1585 : MaxNumAnnotations); 1586 ValueSiteIndex++; 1587 } 1588 } 1589 1590 // Collect the set of members for each Comdat in module M and store 1591 // in ComdatMembers. 1592 static void collectComdatMembers( 1593 Module &M, 1594 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1595 if (!DoComdatRenaming) 1596 return; 1597 for (Function &F : M) 1598 if (Comdat *C = F.getComdat()) 1599 ComdatMembers.insert(std::make_pair(C, &F)); 1600 for (GlobalVariable &GV : M.globals()) 1601 if (Comdat *C = GV.getComdat()) 1602 ComdatMembers.insert(std::make_pair(C, &GV)); 1603 for (GlobalAlias &GA : M.aliases()) 1604 if (Comdat *C = GA.getComdat()) 1605 ComdatMembers.insert(std::make_pair(C, &GA)); 1606 } 1607 1608 static bool InstrumentAllFunctions( 1609 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1610 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1611 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1612 // For the context-sensitve instrumentation, we should have a separated pass 1613 // (before LTO/ThinLTO linking) to create these variables. 1614 if (!IsCS) 1615 createIRLevelProfileFlagVar(M, /* IsCS */ false, PGOInstrumentEntry); 1616 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1617 collectComdatMembers(M, ComdatMembers); 1618 1619 for (auto &F : M) { 1620 if (F.isDeclaration()) 1621 continue; 1622 if (F.hasFnAttribute(llvm::Attribute::NoProfile)) 1623 continue; 1624 auto &TLI = LookupTLI(F); 1625 auto *BPI = LookupBPI(F); 1626 auto *BFI = LookupBFI(F); 1627 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); 1628 } 1629 return true; 1630 } 1631 1632 PreservedAnalyses 1633 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { 1634 createProfileFileNameVar(M, CSInstrName); 1635 createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry); 1636 return PreservedAnalyses::all(); 1637 } 1638 1639 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { 1640 if (skipModule(M)) 1641 return false; 1642 1643 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 1644 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 1645 }; 1646 auto LookupBPI = [this](Function &F) { 1647 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1648 }; 1649 auto LookupBFI = [this](Function &F) { 1650 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1651 }; 1652 return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS); 1653 } 1654 1655 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1656 ModuleAnalysisManager &AM) { 1657 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1658 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1659 return FAM.getResult<TargetLibraryAnalysis>(F); 1660 }; 1661 auto LookupBPI = [&FAM](Function &F) { 1662 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1663 }; 1664 auto LookupBFI = [&FAM](Function &F) { 1665 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1666 }; 1667 1668 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) 1669 return PreservedAnalyses::all(); 1670 1671 return PreservedAnalyses::none(); 1672 } 1673 1674 // Using the ratio b/w sums of profile count values and BFI count values to 1675 // adjust the func entry count. 1676 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, 1677 BranchProbabilityInfo &NBPI) { 1678 Function &F = Func.getFunc(); 1679 BlockFrequencyInfo NBFI(F, NBPI, LI); 1680 #ifndef NDEBUG 1681 auto BFIEntryCount = F.getEntryCount(); 1682 assert(BFIEntryCount.hasValue() && (BFIEntryCount.getCount() > 0) && 1683 "Invalid BFI Entrycount"); 1684 #endif 1685 auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); 1686 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); 1687 for (auto &BBI : F) { 1688 uint64_t CountValue = 0; 1689 uint64_t BFICountValue = 0; 1690 if (!Func.findBBInfo(&BBI)) 1691 continue; 1692 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1693 CountValue = Func.getBBInfo(&BBI).CountValue; 1694 BFICountValue = BFICount.getValue(); 1695 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); 1696 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); 1697 } 1698 if (SumCount.isZero()) 1699 return; 1700 1701 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && 1702 "Incorrect sum of BFI counts"); 1703 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual) 1704 return; 1705 double Scale = (SumCount / SumBFICount).convertToDouble(); 1706 if (Scale < 1.001 && Scale > 0.999) 1707 return; 1708 1709 uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue; 1710 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; 1711 if (NewEntryCount == 0) 1712 NewEntryCount = 1; 1713 if (NewEntryCount != FuncEntryCount) { 1714 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); 1715 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() 1716 << ", entry_count " << FuncEntryCount << " --> " 1717 << NewEntryCount << "\n"); 1718 } 1719 } 1720 1721 // Compare the profile count values with BFI count values, and print out 1722 // the non-matching ones. 1723 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, 1724 BranchProbabilityInfo &NBPI, 1725 uint64_t HotCountThreshold, 1726 uint64_t ColdCountThreshold) { 1727 Function &F = Func.getFunc(); 1728 BlockFrequencyInfo NBFI(F, NBPI, LI); 1729 // bool PrintFunc = false; 1730 bool HotBBOnly = PGOVerifyHotBFI; 1731 std::string Msg; 1732 OptimizationRemarkEmitter ORE(&F); 1733 1734 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; 1735 for (auto &BBI : F) { 1736 uint64_t CountValue = 0; 1737 uint64_t BFICountValue = 0; 1738 1739 if (Func.getBBInfo(&BBI).CountValid) 1740 CountValue = Func.getBBInfo(&BBI).CountValue; 1741 1742 BBNum++; 1743 if (CountValue) 1744 NonZeroBBNum++; 1745 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1746 if (BFICount) 1747 BFICountValue = BFICount.getValue(); 1748 1749 if (HotBBOnly) { 1750 bool rawIsHot = CountValue >= HotCountThreshold; 1751 bool BFIIsHot = BFICountValue >= HotCountThreshold; 1752 bool rawIsCold = CountValue <= ColdCountThreshold; 1753 bool ShowCount = false; 1754 if (rawIsHot && !BFIIsHot) { 1755 Msg = "raw-Hot to BFI-nonHot"; 1756 ShowCount = true; 1757 } else if (rawIsCold && BFIIsHot) { 1758 Msg = "raw-Cold to BFI-Hot"; 1759 ShowCount = true; 1760 } 1761 if (!ShowCount) 1762 continue; 1763 } else { 1764 if ((CountValue < PGOVerifyBFICutoff) && 1765 (BFICountValue < PGOVerifyBFICutoff)) 1766 continue; 1767 uint64_t Diff = (BFICountValue >= CountValue) 1768 ? BFICountValue - CountValue 1769 : CountValue - BFICountValue; 1770 if (Diff < CountValue / 100 * PGOVerifyBFIRatio) 1771 continue; 1772 } 1773 BBMisMatchNum++; 1774 1775 ORE.emit([&]() { 1776 OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify", 1777 F.getSubprogram(), &BBI); 1778 Remark << "BB " << ore::NV("Block", BBI.getName()) 1779 << " Count=" << ore::NV("Count", CountValue) 1780 << " BFI_Count=" << ore::NV("Count", BFICountValue); 1781 if (!Msg.empty()) 1782 Remark << " (" << Msg << ")"; 1783 return Remark; 1784 }); 1785 } 1786 if (BBMisMatchNum) 1787 ORE.emit([&]() { 1788 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify", 1789 F.getSubprogram(), &F.getEntryBlock()) 1790 << "In Func " << ore::NV("Function", F.getName()) 1791 << ": Num_of_BB=" << ore::NV("Count", BBNum) 1792 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum) 1793 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum); 1794 }); 1795 } 1796 1797 static bool annotateAllFunctions( 1798 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1799 function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1800 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1801 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1802 ProfileSummaryInfo *PSI, bool IsCS) { 1803 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1804 auto &Ctx = M.getContext(); 1805 // Read the counter array from file. 1806 auto ReaderOrErr = 1807 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 1808 if (Error E = ReaderOrErr.takeError()) { 1809 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1810 Ctx.diagnose( 1811 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1812 }); 1813 return false; 1814 } 1815 1816 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1817 std::move(ReaderOrErr.get()); 1818 if (!PGOReader) { 1819 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1820 StringRef("Cannot get PGOReader"))); 1821 return false; 1822 } 1823 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 1824 return false; 1825 1826 // TODO: might need to change the warning once the clang option is finalized. 1827 if (!PGOReader->isIRLevelProfile()) { 1828 Ctx.diagnose(DiagnosticInfoPGOProfile( 1829 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1830 return false; 1831 } 1832 1833 // Add the profile summary (read from the header of the indexed summary) here 1834 // so that we can use it below when reading counters (which checks if the 1835 // function should be marked with a cold or inlinehint attribute). 1836 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 1837 IsCS ? ProfileSummary::PSK_CSInstr 1838 : ProfileSummary::PSK_Instr); 1839 PSI->refresh(); 1840 1841 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1842 collectComdatMembers(M, ComdatMembers); 1843 std::vector<Function *> HotFunctions; 1844 std::vector<Function *> ColdFunctions; 1845 1846 // If the profile marked as always instrument the entry BB, do the 1847 // same. Note this can be overwritten by the internal option in CFGMST.h 1848 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); 1849 if (PGOInstrumentEntry.getNumOccurrences() > 0) 1850 InstrumentFuncEntry = PGOInstrumentEntry; 1851 for (auto &F : M) { 1852 if (F.isDeclaration()) 1853 continue; 1854 auto &TLI = LookupTLI(F); 1855 auto *BPI = LookupBPI(F); 1856 auto *BFI = LookupBFI(F); 1857 // Split indirectbr critical edges here before computing the MST rather than 1858 // later in getInstrBB() to avoid invalidating it. 1859 SplitIndirectBrCriticalEdges(F, BPI, BFI); 1860 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, 1861 InstrumentFuncEntry); 1862 // When AllMinusOnes is true, it means the profile for the function 1863 // is unrepresentative and this function is actually hot. Set the 1864 // entry count of the function to be multiple times of hot threshold 1865 // and drop all its internal counters. 1866 bool AllMinusOnes = false; 1867 bool AllZeros = false; 1868 if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes)) 1869 continue; 1870 if (AllZeros) { 1871 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 1872 if (Func.getProgramMaxCount() != 0) 1873 ColdFunctions.push_back(&F); 1874 continue; 1875 } 1876 const unsigned MultiplyFactor = 3; 1877 if (AllMinusOnes) { 1878 uint64_t HotThreshold = PSI->getHotCountThreshold(); 1879 if (HotThreshold) 1880 F.setEntryCount( 1881 ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real)); 1882 HotFunctions.push_back(&F); 1883 continue; 1884 } 1885 Func.populateCounters(); 1886 Func.setBranchWeights(); 1887 Func.annotateValueSites(); 1888 Func.annotateIrrLoopHeaderWeights(); 1889 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1890 if (FreqAttr == PGOUseFunc::FFA_Cold) 1891 ColdFunctions.push_back(&F); 1892 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1893 HotFunctions.push_back(&F); 1894 if (PGOViewCounts != PGOVCT_None && 1895 (ViewBlockFreqFuncName.empty() || 1896 F.getName().equals(ViewBlockFreqFuncName))) { 1897 LoopInfo LI{DominatorTree(F)}; 1898 std::unique_ptr<BranchProbabilityInfo> NewBPI = 1899 std::make_unique<BranchProbabilityInfo>(F, LI); 1900 std::unique_ptr<BlockFrequencyInfo> NewBFI = 1901 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 1902 if (PGOViewCounts == PGOVCT_Graph) 1903 NewBFI->view(); 1904 else if (PGOViewCounts == PGOVCT_Text) { 1905 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 1906 NewBFI->print(dbgs()); 1907 } 1908 } 1909 if (PGOViewRawCounts != PGOVCT_None && 1910 (ViewBlockFreqFuncName.empty() || 1911 F.getName().equals(ViewBlockFreqFuncName))) { 1912 if (PGOViewRawCounts == PGOVCT_Graph) 1913 if (ViewBlockFreqFuncName.empty()) 1914 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1915 else 1916 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1917 else if (PGOViewRawCounts == PGOVCT_Text) { 1918 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 1919 Func.dumpInfo(); 1920 } 1921 } 1922 1923 if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { 1924 LoopInfo LI{DominatorTree(F)}; 1925 BranchProbabilityInfo NBPI(F, LI); 1926 1927 // Fix func entry count. 1928 if (PGOFixEntryCount) 1929 fixFuncEntryCount(Func, LI, NBPI); 1930 1931 // Verify BlockFrequency information. 1932 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; 1933 if (PGOVerifyHotBFI) { 1934 HotCountThreshold = PSI->getOrCompHotCountThreshold(); 1935 ColdCountThreshold = PSI->getOrCompColdCountThreshold(); 1936 } 1937 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); 1938 } 1939 } 1940 1941 // Set function hotness attribute from the profile. 1942 // We have to apply these attributes at the end because their presence 1943 // can affect the BranchProbabilityInfo of any callers, resulting in an 1944 // inconsistent MST between prof-gen and prof-use. 1945 for (auto &F : HotFunctions) { 1946 F->addFnAttr(Attribute::InlineHint); 1947 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 1948 << "\n"); 1949 } 1950 for (auto &F : ColdFunctions) { 1951 // Only set when there is no Attribute::Hot set by the user. For Hot 1952 // attribute, user's annotation has the precedence over the profile. 1953 if (F->hasFnAttribute(Attribute::Hot)) { 1954 auto &Ctx = M.getContext(); 1955 std::string Msg = std::string("Function ") + F->getName().str() + 1956 std::string(" is annotated as a hot function but" 1957 " the profile is cold"); 1958 Ctx.diagnose( 1959 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 1960 continue; 1961 } 1962 F->addFnAttr(Attribute::Cold); 1963 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 1964 << "\n"); 1965 } 1966 return true; 1967 } 1968 1969 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 1970 std::string RemappingFilename, 1971 bool IsCS) 1972 : ProfileFileName(std::move(Filename)), 1973 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { 1974 if (!PGOTestProfileFile.empty()) 1975 ProfileFileName = PGOTestProfileFile; 1976 if (!PGOTestProfileRemappingFile.empty()) 1977 ProfileRemappingFileName = PGOTestProfileRemappingFile; 1978 } 1979 1980 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 1981 ModuleAnalysisManager &AM) { 1982 1983 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1984 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1985 return FAM.getResult<TargetLibraryAnalysis>(F); 1986 }; 1987 auto LookupBPI = [&FAM](Function &F) { 1988 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1989 }; 1990 auto LookupBFI = [&FAM](Function &F) { 1991 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1992 }; 1993 1994 auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 1995 1996 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 1997 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) 1998 return PreservedAnalyses::all(); 1999 2000 return PreservedAnalyses::none(); 2001 } 2002 2003 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { 2004 if (skipModule(M)) 2005 return false; 2006 2007 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 2008 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 2009 }; 2010 auto LookupBPI = [this](Function &F) { 2011 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 2012 }; 2013 auto LookupBFI = [this](Function &F) { 2014 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 2015 }; 2016 2017 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 2018 return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI, 2019 LookupBFI, PSI, IsCS); 2020 } 2021 2022 static std::string getSimpleNodeName(const BasicBlock *Node) { 2023 if (!Node->getName().empty()) 2024 return std::string(Node->getName()); 2025 2026 std::string SimpleNodeName; 2027 raw_string_ostream OS(SimpleNodeName); 2028 Node->printAsOperand(OS, false); 2029 return OS.str(); 2030 } 2031 2032 void llvm::setProfMetadata(Module *M, Instruction *TI, 2033 ArrayRef<uint64_t> EdgeCounts, 2034 uint64_t MaxCount) { 2035 MDBuilder MDB(M->getContext()); 2036 assert(MaxCount > 0 && "Bad max count"); 2037 uint64_t Scale = calculateCountScale(MaxCount); 2038 SmallVector<unsigned, 4> Weights; 2039 for (const auto &ECI : EdgeCounts) 2040 Weights.push_back(scaleBranchCount(ECI, Scale)); 2041 2042 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 2043 : Weights) { 2044 dbgs() << W << " "; 2045 } dbgs() << "\n";); 2046 2047 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 2048 if (EmitBranchProbability) { 2049 std::string BrCondStr = getBranchCondString(TI); 2050 if (BrCondStr.empty()) 2051 return; 2052 2053 uint64_t WSum = 2054 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 2055 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 2056 uint64_t TotalCount = 2057 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 2058 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 2059 Scale = calculateCountScale(WSum); 2060 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 2061 scaleBranchCount(WSum, Scale)); 2062 std::string BranchProbStr; 2063 raw_string_ostream OS(BranchProbStr); 2064 OS << BP; 2065 OS << " (total count : " << TotalCount << ")"; 2066 OS.flush(); 2067 Function *F = TI->getParent()->getParent(); 2068 OptimizationRemarkEmitter ORE(F); 2069 ORE.emit([&]() { 2070 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 2071 << BrCondStr << " is true with probability : " << BranchProbStr; 2072 }); 2073 } 2074 } 2075 2076 namespace llvm { 2077 2078 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 2079 MDBuilder MDB(M->getContext()); 2080 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 2081 MDB.createIrrLoopHeaderWeight(Count)); 2082 } 2083 2084 template <> struct GraphTraits<PGOUseFunc *> { 2085 using NodeRef = const BasicBlock *; 2086 using ChildIteratorType = const_succ_iterator; 2087 using nodes_iterator = pointer_iterator<Function::const_iterator>; 2088 2089 static NodeRef getEntryNode(const PGOUseFunc *G) { 2090 return &G->getFunc().front(); 2091 } 2092 2093 static ChildIteratorType child_begin(const NodeRef N) { 2094 return succ_begin(N); 2095 } 2096 2097 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 2098 2099 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 2100 return nodes_iterator(G->getFunc().begin()); 2101 } 2102 2103 static nodes_iterator nodes_end(const PGOUseFunc *G) { 2104 return nodes_iterator(G->getFunc().end()); 2105 } 2106 }; 2107 2108 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 2109 explicit DOTGraphTraits(bool isSimple = false) 2110 : DefaultDOTGraphTraits(isSimple) {} 2111 2112 static std::string getGraphName(const PGOUseFunc *G) { 2113 return std::string(G->getFunc().getName()); 2114 } 2115 2116 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 2117 std::string Result; 2118 raw_string_ostream OS(Result); 2119 2120 OS << getSimpleNodeName(Node) << ":\\l"; 2121 UseBBInfo *BI = Graph->findBBInfo(Node); 2122 OS << "Count : "; 2123 if (BI && BI->CountValid) 2124 OS << BI->CountValue << "\\l"; 2125 else 2126 OS << "Unknown\\l"; 2127 2128 if (!PGOInstrSelect) 2129 return Result; 2130 2131 for (const Instruction &I : *Node) { 2132 if (!isa<SelectInst>(&I)) 2133 continue; 2134 // Display scaled counts for SELECT instruction: 2135 OS << "SELECT : { T = "; 2136 uint64_t TC, FC; 2137 bool HasProf = I.extractProfMetadata(TC, FC); 2138 if (!HasProf) 2139 OS << "Unknown, F = Unknown }\\l"; 2140 else 2141 OS << TC << ", F = " << FC << " }\\l"; 2142 } 2143 return Result; 2144 } 2145 }; 2146 2147 } // end namespace llvm 2148