1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "CFGMST.h" 52 #include "ValueProfileCollector.h" 53 #include "llvm/ADT/APInt.h" 54 #include "llvm/ADT/ArrayRef.h" 55 #include "llvm/ADT/MapVector.h" 56 #include "llvm/ADT/STLExtras.h" 57 #include "llvm/ADT/SmallVector.h" 58 #include "llvm/ADT/Statistic.h" 59 #include "llvm/ADT/StringRef.h" 60 #include "llvm/ADT/Triple.h" 61 #include "llvm/ADT/Twine.h" 62 #include "llvm/ADT/iterator.h" 63 #include "llvm/ADT/iterator_range.h" 64 #include "llvm/Analysis/BlockFrequencyInfo.h" 65 #include "llvm/Analysis/BranchProbabilityInfo.h" 66 #include "llvm/Analysis/CFG.h" 67 #include "llvm/Analysis/EHPersonalities.h" 68 #include "llvm/Analysis/LoopInfo.h" 69 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 70 #include "llvm/Analysis/ProfileSummaryInfo.h" 71 #include "llvm/IR/Attributes.h" 72 #include "llvm/IR/BasicBlock.h" 73 #include "llvm/IR/CFG.h" 74 #include "llvm/IR/Comdat.h" 75 #include "llvm/IR/Constant.h" 76 #include "llvm/IR/Constants.h" 77 #include "llvm/IR/DiagnosticInfo.h" 78 #include "llvm/IR/Dominators.h" 79 #include "llvm/IR/Function.h" 80 #include "llvm/IR/GlobalAlias.h" 81 #include "llvm/IR/GlobalValue.h" 82 #include "llvm/IR/GlobalVariable.h" 83 #include "llvm/IR/IRBuilder.h" 84 #include "llvm/IR/InstVisitor.h" 85 #include "llvm/IR/InstrTypes.h" 86 #include "llvm/IR/Instruction.h" 87 #include "llvm/IR/Instructions.h" 88 #include "llvm/IR/IntrinsicInst.h" 89 #include "llvm/IR/Intrinsics.h" 90 #include "llvm/IR/LLVMContext.h" 91 #include "llvm/IR/MDBuilder.h" 92 #include "llvm/IR/Module.h" 93 #include "llvm/IR/PassManager.h" 94 #include "llvm/IR/ProfileSummary.h" 95 #include "llvm/IR/Type.h" 96 #include "llvm/IR/Value.h" 97 #include "llvm/InitializePasses.h" 98 #include "llvm/Pass.h" 99 #include "llvm/ProfileData/InstrProf.h" 100 #include "llvm/ProfileData/InstrProfReader.h" 101 #include "llvm/Support/BranchProbability.h" 102 #include "llvm/Support/CRC.h" 103 #include "llvm/Support/Casting.h" 104 #include "llvm/Support/CommandLine.h" 105 #include "llvm/Support/DOTGraphTraits.h" 106 #include "llvm/Support/Debug.h" 107 #include "llvm/Support/Error.h" 108 #include "llvm/Support/ErrorHandling.h" 109 #include "llvm/Support/GraphWriter.h" 110 #include "llvm/Support/raw_ostream.h" 111 #include "llvm/Transforms/Instrumentation.h" 112 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 113 #include <algorithm> 114 #include <cassert> 115 #include <cstdint> 116 #include <memory> 117 #include <numeric> 118 #include <string> 119 #include <unordered_map> 120 #include <utility> 121 #include <vector> 122 123 using namespace llvm; 124 using ProfileCount = Function::ProfileCount; 125 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 126 127 #define DEBUG_TYPE "pgo-instrumentation" 128 129 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 130 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 131 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 132 STATISTIC(NumOfPGOEdge, "Number of edges."); 133 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 134 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 135 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 136 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 137 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 138 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 139 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 140 STATISTIC(NumOfCSPGOSelectInsts, 141 "Number of select instruction instrumented in CSPGO."); 142 STATISTIC(NumOfCSPGOMemIntrinsics, 143 "Number of mem intrinsics instrumented in CSPGO."); 144 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 145 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 146 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 147 STATISTIC(NumOfCSPGOFunc, 148 "Number of functions having valid profile counts in CSPGO."); 149 STATISTIC(NumOfCSPGOMismatch, 150 "Number of functions having mismatch profile in CSPGO."); 151 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 152 153 // Command line option to specify the file to read profile from. This is 154 // mainly used for testing. 155 static cl::opt<std::string> 156 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 157 cl::value_desc("filename"), 158 cl::desc("Specify the path of profile data file. This is" 159 "mainly for test purpose.")); 160 static cl::opt<std::string> PGOTestProfileRemappingFile( 161 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 162 cl::value_desc("filename"), 163 cl::desc("Specify the path of profile remapping file. This is mainly for " 164 "test purpose.")); 165 166 // Command line option to disable value profiling. The default is false: 167 // i.e. value profiling is enabled by default. This is for debug purpose. 168 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 169 cl::Hidden, 170 cl::desc("Disable Value Profiling")); 171 172 // Command line option to set the maximum number of VP annotations to write to 173 // the metadata for a single indirect call callsite. 174 static cl::opt<unsigned> MaxNumAnnotations( 175 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 176 cl::desc("Max number of annotations for a single indirect " 177 "call callsite")); 178 179 // Command line option to set the maximum number of value annotations 180 // to write to the metadata for a single memop intrinsic. 181 static cl::opt<unsigned> MaxNumMemOPAnnotations( 182 "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, 183 cl::desc("Max number of preicise value annotations for a single memop" 184 "intrinsic")); 185 186 // Command line option to control appending FunctionHash to the name of a COMDAT 187 // function. This is to avoid the hash mismatch caused by the preinliner. 188 static cl::opt<bool> DoComdatRenaming( 189 "do-comdat-renaming", cl::init(false), cl::Hidden, 190 cl::desc("Append function hash to the name of COMDAT function to avoid " 191 "function hash mismatch due to the preinliner")); 192 193 // Command line option to enable/disable the warning about missing profile 194 // information. 195 static cl::opt<bool> 196 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 197 cl::desc("Use this option to turn on/off " 198 "warnings about missing profile data for " 199 "functions.")); 200 201 // Command line option to enable/disable the warning about a hash mismatch in 202 // the profile data. 203 static cl::opt<bool> 204 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 205 cl::desc("Use this option to turn off/on " 206 "warnings about profile cfg mismatch.")); 207 208 // Command line option to enable/disable the warning about a hash mismatch in 209 // the profile data for Comdat functions, which often turns out to be false 210 // positive due to the pre-instrumentation inline. 211 static cl::opt<bool> 212 NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), 213 cl::Hidden, 214 cl::desc("The option is used to turn on/off " 215 "warnings about hash mismatch for comdat " 216 "functions.")); 217 218 // Command line option to enable/disable select instruction instrumentation. 219 static cl::opt<bool> 220 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 221 cl::desc("Use this option to turn on/off SELECT " 222 "instruction instrumentation. ")); 223 224 // Command line option to turn on CFG dot or text dump of raw profile counts 225 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 226 "pgo-view-raw-counts", cl::Hidden, 227 cl::desc("A boolean option to show CFG dag or text " 228 "with raw profile counts from " 229 "profile data. See also option " 230 "-pgo-view-counts. To limit graph " 231 "display to only one function, use " 232 "filtering option -view-bfi-func-name."), 233 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 234 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 235 clEnumValN(PGOVCT_Text, "text", "show in text."))); 236 237 // Command line option to enable/disable memop intrinsic call.size profiling. 238 static cl::opt<bool> 239 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 240 cl::desc("Use this option to turn on/off " 241 "memory intrinsic size profiling.")); 242 243 // Emit branch probability as optimization remarks. 244 static cl::opt<bool> 245 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 246 cl::desc("When this option is on, the annotated " 247 "branch probability will be emitted as " 248 "optimization remarks: -{Rpass|" 249 "pass-remarks}=pgo-instrumentation")); 250 251 static cl::opt<bool> PGOInstrumentEntry( 252 "pgo-instrument-entry", cl::init(false), cl::Hidden, 253 cl::desc("Force to instrument function entry basicblock.")); 254 255 static cl::opt<bool> 256 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, 257 cl::desc("Fix function entry count in profile use.")); 258 259 static cl::opt<bool> PGOVerifyHotBFI( 260 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden, 261 cl::desc("Print out the non-match BFI count if a hot raw profile count " 262 "becomes non-hot, or a cold raw profile count becomes hot. " 263 "The print is enabled under -Rpass-analysis=pgo, or " 264 "internal option -pass-remakrs-analysis=pgo.")); 265 266 static cl::opt<bool> PGOVerifyBFI( 267 "pgo-verify-bfi", cl::init(false), cl::Hidden, 268 cl::desc("Print out mismatched BFI counts after setting profile metadata " 269 "The print is enabled under -Rpass-analysis=pgo, or " 270 "internal option -pass-remakrs-analysis=pgo.")); 271 272 static cl::opt<unsigned> PGOVerifyBFIRatio( 273 "pgo-verify-bfi-ratio", cl::init(5), cl::Hidden, 274 cl::desc("Set the threshold for pgo-verify-big -- only print out " 275 "mismatched BFI if the difference percentage is greater than " 276 "this value (in percentage).")); 277 278 static cl::opt<unsigned> PGOVerifyBFICutoff( 279 "pgo-verify-bfi-cutoff", cl::init(1), cl::Hidden, 280 cl::desc("Set the threshold for pgo-verify-bfi -- skip the counts whose " 281 "profile count value is below.")); 282 283 // Command line option to turn on CFG dot dump after profile annotation. 284 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 285 extern cl::opt<PGOViewCountsType> PGOViewCounts; 286 287 // Command line option to specify the name of the function for CFG dump 288 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 289 extern cl::opt<std::string> ViewBlockFreqFuncName; 290 291 static cl::opt<bool> 292 PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, 293 cl::desc("Use the old CFG function hashing")); 294 295 // Return a string describing the branch condition that can be 296 // used in static branch probability heuristics: 297 static std::string getBranchCondString(Instruction *TI) { 298 BranchInst *BI = dyn_cast<BranchInst>(TI); 299 if (!BI || !BI->isConditional()) 300 return std::string(); 301 302 Value *Cond = BI->getCondition(); 303 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 304 if (!CI) 305 return std::string(); 306 307 std::string result; 308 raw_string_ostream OS(result); 309 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 310 CI->getOperand(0)->getType()->print(OS, true); 311 312 Value *RHS = CI->getOperand(1); 313 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 314 if (CV) { 315 if (CV->isZero()) 316 OS << "_Zero"; 317 else if (CV->isOne()) 318 OS << "_One"; 319 else if (CV->isMinusOne()) 320 OS << "_MinusOne"; 321 else 322 OS << "_Const"; 323 } 324 OS.flush(); 325 return result; 326 } 327 328 static const char *ValueProfKindDescr[] = { 329 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 330 #include "llvm/ProfileData/InstrProfData.inc" 331 }; 332 333 namespace { 334 335 /// The select instruction visitor plays three roles specified 336 /// by the mode. In \c VM_counting mode, it simply counts the number of 337 /// select instructions. In \c VM_instrument mode, it inserts code to count 338 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 339 /// it reads the profile data and annotate the select instruction with metadata. 340 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 341 class PGOUseFunc; 342 343 /// Instruction Visitor class to visit select instructions. 344 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 345 Function &F; 346 unsigned NSIs = 0; // Number of select instructions instrumented. 347 VisitMode Mode = VM_counting; // Visiting mode. 348 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 349 unsigned TotalNumCtrs = 0; // Total number of counters 350 GlobalVariable *FuncNameVar = nullptr; 351 uint64_t FuncHash = 0; 352 PGOUseFunc *UseFunc = nullptr; 353 354 SelectInstVisitor(Function &Func) : F(Func) {} 355 356 void countSelects(Function &Func) { 357 NSIs = 0; 358 Mode = VM_counting; 359 visit(Func); 360 } 361 362 // Visit the IR stream and instrument all select instructions. \p 363 // Ind is a pointer to the counter index variable; \p TotalNC 364 // is the total number of counters; \p FNV is the pointer to the 365 // PGO function name var; \p FHash is the function hash. 366 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 367 GlobalVariable *FNV, uint64_t FHash) { 368 Mode = VM_instrument; 369 CurCtrIdx = Ind; 370 TotalNumCtrs = TotalNC; 371 FuncHash = FHash; 372 FuncNameVar = FNV; 373 visit(Func); 374 } 375 376 // Visit the IR stream and annotate all select instructions. 377 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 378 Mode = VM_annotate; 379 UseFunc = UF; 380 CurCtrIdx = Ind; 381 visit(Func); 382 } 383 384 void instrumentOneSelectInst(SelectInst &SI); 385 void annotateOneSelectInst(SelectInst &SI); 386 387 // Visit \p SI instruction and perform tasks according to visit mode. 388 void visitSelectInst(SelectInst &SI); 389 390 // Return the number of select instructions. This needs be called after 391 // countSelects(). 392 unsigned getNumOfSelectInsts() const { return NSIs; } 393 }; 394 395 396 class PGOInstrumentationGenLegacyPass : public ModulePass { 397 public: 398 static char ID; 399 400 PGOInstrumentationGenLegacyPass(bool IsCS = false) 401 : ModulePass(ID), IsCS(IsCS) { 402 initializePGOInstrumentationGenLegacyPassPass( 403 *PassRegistry::getPassRegistry()); 404 } 405 406 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } 407 408 private: 409 // Is this is context-sensitive instrumentation. 410 bool IsCS; 411 bool runOnModule(Module &M) override; 412 413 void getAnalysisUsage(AnalysisUsage &AU) const override { 414 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 415 AU.addRequired<TargetLibraryInfoWrapperPass>(); 416 } 417 }; 418 419 class PGOInstrumentationUseLegacyPass : public ModulePass { 420 public: 421 static char ID; 422 423 // Provide the profile filename as the parameter. 424 PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) 425 : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { 426 if (!PGOTestProfileFile.empty()) 427 ProfileFileName = PGOTestProfileFile; 428 initializePGOInstrumentationUseLegacyPassPass( 429 *PassRegistry::getPassRegistry()); 430 } 431 432 StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } 433 434 private: 435 std::string ProfileFileName; 436 // Is this is context-sensitive instrumentation use. 437 bool IsCS; 438 439 bool runOnModule(Module &M) override; 440 441 void getAnalysisUsage(AnalysisUsage &AU) const override { 442 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 443 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 444 AU.addRequired<TargetLibraryInfoWrapperPass>(); 445 } 446 }; 447 448 class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { 449 public: 450 static char ID; 451 StringRef getPassName() const override { 452 return "PGOInstrumentationGenCreateVarPass"; 453 } 454 PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") 455 : ModulePass(ID), InstrProfileOutput(CSInstrName) { 456 initializePGOInstrumentationGenCreateVarLegacyPassPass( 457 *PassRegistry::getPassRegistry()); 458 } 459 460 private: 461 bool runOnModule(Module &M) override { 462 createProfileFileNameVar(M, InstrProfileOutput); 463 createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry); 464 return false; 465 } 466 std::string InstrProfileOutput; 467 }; 468 469 } // end anonymous namespace 470 471 char PGOInstrumentationGenLegacyPass::ID = 0; 472 473 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 474 "PGO instrumentation.", false, false) 475 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 476 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 477 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 478 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 479 "PGO instrumentation.", false, false) 480 481 ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { 482 return new PGOInstrumentationGenLegacyPass(IsCS); 483 } 484 485 char PGOInstrumentationUseLegacyPass::ID = 0; 486 487 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 488 "Read PGO instrumentation profile.", false, false) 489 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 490 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 491 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 492 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 493 "Read PGO instrumentation profile.", false, false) 494 495 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, 496 bool IsCS) { 497 return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); 498 } 499 500 char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; 501 502 INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, 503 "pgo-instr-gen-create-var", 504 "Create PGO instrumentation version variable for CSPGO.", false, 505 false) 506 507 ModulePass * 508 llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { 509 return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName)); 510 } 511 512 namespace { 513 514 /// An MST based instrumentation for PGO 515 /// 516 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 517 /// in the function level. 518 struct PGOEdge { 519 // This class implements the CFG edges. Note the CFG can be a multi-graph. 520 // So there might be multiple edges with same SrcBB and DestBB. 521 const BasicBlock *SrcBB; 522 const BasicBlock *DestBB; 523 uint64_t Weight; 524 bool InMST = false; 525 bool Removed = false; 526 bool IsCritical = false; 527 528 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 529 : SrcBB(Src), DestBB(Dest), Weight(W) {} 530 531 // Return the information string of an edge. 532 std::string infoString() const { 533 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 534 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 535 } 536 }; 537 538 // This class stores the auxiliary information for each BB. 539 struct BBInfo { 540 BBInfo *Group; 541 uint32_t Index; 542 uint32_t Rank = 0; 543 544 BBInfo(unsigned IX) : Group(this), Index(IX) {} 545 546 // Return the information string of this object. 547 std::string infoString() const { 548 return (Twine("Index=") + Twine(Index)).str(); 549 } 550 551 // Empty function -- only applicable to UseBBInfo. 552 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 553 554 // Empty function -- only applicable to UseBBInfo. 555 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 556 }; 557 558 // This class implements the CFG edges. Note the CFG can be a multi-graph. 559 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 560 private: 561 Function &F; 562 563 // Is this is context-sensitive instrumentation. 564 bool IsCS; 565 566 // A map that stores the Comdat group in function F. 567 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 568 569 ValueProfileCollector VPC; 570 571 void computeCFGHash(); 572 void renameComdatFunction(); 573 574 public: 575 std::vector<std::vector<VPCandidateInfo>> ValueSites; 576 SelectInstVisitor SIVisitor; 577 std::string FuncName; 578 GlobalVariable *FuncNameVar; 579 580 // CFG hash value for this function. 581 uint64_t FunctionHash = 0; 582 583 // The Minimum Spanning Tree of function CFG. 584 CFGMST<Edge, BBInfo> MST; 585 586 // Collect all the BBs that will be instrumented, and store them in 587 // InstrumentBBs. 588 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 589 590 // Give an edge, find the BB that will be instrumented. 591 // Return nullptr if there is no BB to be instrumented. 592 BasicBlock *getInstrBB(Edge *E); 593 594 // Return the auxiliary BB information. 595 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 596 597 // Return the auxiliary BB information if available. 598 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 599 600 // Dump edges and BB information. 601 void dumpInfo(std::string Str = "") const { 602 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 603 Twine(FunctionHash) + "\t" + Str); 604 } 605 606 FuncPGOInstrumentation( 607 Function &Func, TargetLibraryInfo &TLI, 608 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 609 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 610 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, 611 bool InstrumentFuncEntry = true) 612 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 613 ValueSites(IPVK_Last + 1), SIVisitor(Func), 614 MST(F, InstrumentFuncEntry, BPI, BFI) { 615 // This should be done before CFG hash computation. 616 SIVisitor.countSelects(Func); 617 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 618 if (!IsCS) { 619 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 620 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 621 NumOfPGOBB += MST.BBInfos.size(); 622 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 623 } else { 624 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 625 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 626 NumOfCSPGOBB += MST.BBInfos.size(); 627 } 628 629 FuncName = getPGOFuncName(F); 630 computeCFGHash(); 631 if (!ComdatMembers.empty()) 632 renameComdatFunction(); 633 LLVM_DEBUG(dumpInfo("after CFGMST")); 634 635 for (auto &E : MST.AllEdges) { 636 if (E->Removed) 637 continue; 638 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 639 if (!E->InMST) 640 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 641 } 642 643 if (CreateGlobalVar) 644 FuncNameVar = createPGOFuncNameVar(F, FuncName); 645 } 646 }; 647 648 } // end anonymous namespace 649 650 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 651 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers 652 // of selects, indirect calls, mem ops and edges. 653 template <class Edge, class BBInfo> 654 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 655 std::vector<uint8_t> Indexes; 656 JamCRC JC; 657 for (auto &BB : F) { 658 const Instruction *TI = BB.getTerminator(); 659 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 660 BasicBlock *Succ = TI->getSuccessor(I); 661 auto BI = findBBInfo(Succ); 662 if (BI == nullptr) 663 continue; 664 uint32_t Index = BI->Index; 665 for (int J = 0; J < 4; J++) 666 Indexes.push_back((uint8_t)(Index >> (J * 8))); 667 } 668 } 669 JC.update(Indexes); 670 671 JamCRC JCH; 672 if (PGOOldCFGHashing) { 673 // Hash format for context sensitive profile. Reserve 4 bits for other 674 // information. 675 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 676 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 677 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 678 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 679 } else { 680 // The higher 32 bits. 681 auto updateJCH = [&JCH](uint64_t Num) { 682 uint8_t Data[8]; 683 support::endian::write64le(Data, Num); 684 JCH.update(Data); 685 }; 686 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); 687 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); 688 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); 689 updateJCH((uint64_t)MST.AllEdges.size()); 690 691 // Hash format for context sensitive profile. Reserve 4 bits for other 692 // information. 693 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); 694 } 695 696 // Reserve bit 60-63 for other information purpose. 697 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 698 if (IsCS) 699 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 700 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 701 << " CRC = " << JC.getCRC() 702 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 703 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 704 << ValueSites[IPVK_IndirectCallTarget].size()); 705 if (!PGOOldCFGHashing) { 706 LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() 707 << ", High32 CRC = " << JCH.getCRC()); 708 } 709 LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); 710 } 711 712 // Check if we can safely rename this Comdat function. 713 static bool canRenameComdat( 714 Function &F, 715 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 716 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 717 return false; 718 719 // FIXME: Current only handle those Comdat groups that only containing one 720 // function. 721 // (1) For a Comdat group containing multiple functions, we need to have a 722 // unique postfix based on the hashes for each function. There is a 723 // non-trivial code refactoring to do this efficiently. 724 // (2) Variables can not be renamed, so we can not rename Comdat function in a 725 // group including global vars. 726 Comdat *C = F.getComdat(); 727 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 728 assert(!isa<GlobalAlias>(CM.second)); 729 Function *FM = dyn_cast<Function>(CM.second); 730 if (FM != &F) 731 return false; 732 } 733 return true; 734 } 735 736 // Append the CFGHash to the Comdat function name. 737 template <class Edge, class BBInfo> 738 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 739 if (!canRenameComdat(F, ComdatMembers)) 740 return; 741 std::string OrigName = F.getName().str(); 742 std::string NewFuncName = 743 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 744 F.setName(Twine(NewFuncName)); 745 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 746 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 747 Comdat *NewComdat; 748 Module *M = F.getParent(); 749 // For AvailableExternallyLinkage functions, change the linkage to 750 // LinkOnceODR and put them into comdat. This is because after renaming, there 751 // is no backup external copy available for the function. 752 if (!F.hasComdat()) { 753 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 754 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 755 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 756 F.setComdat(NewComdat); 757 return; 758 } 759 760 // This function belongs to a single function Comdat group. 761 Comdat *OrigComdat = F.getComdat(); 762 std::string NewComdatName = 763 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 764 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 765 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 766 767 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 768 // Must be a function. 769 cast<Function>(CM.second)->setComdat(NewComdat); 770 } 771 } 772 773 // Collect all the BBs that will be instruments and return them in 774 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. 775 template <class Edge, class BBInfo> 776 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 777 std::vector<BasicBlock *> &InstrumentBBs) { 778 // Use a worklist as we will update the vector during the iteration. 779 std::vector<Edge *> EdgeList; 780 EdgeList.reserve(MST.AllEdges.size()); 781 for (auto &E : MST.AllEdges) 782 EdgeList.push_back(E.get()); 783 784 for (auto &E : EdgeList) { 785 BasicBlock *InstrBB = getInstrBB(E); 786 if (InstrBB) 787 InstrumentBBs.push_back(InstrBB); 788 } 789 790 // Set up InEdges/OutEdges for all BBs. 791 for (auto &E : MST.AllEdges) { 792 if (E->Removed) 793 continue; 794 const BasicBlock *SrcBB = E->SrcBB; 795 const BasicBlock *DestBB = E->DestBB; 796 BBInfo &SrcInfo = getBBInfo(SrcBB); 797 BBInfo &DestInfo = getBBInfo(DestBB); 798 SrcInfo.addOutEdge(E.get()); 799 DestInfo.addInEdge(E.get()); 800 } 801 } 802 803 // Given a CFG E to be instrumented, find which BB to place the instrumented 804 // code. The function will split the critical edge if necessary. 805 template <class Edge, class BBInfo> 806 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 807 if (E->InMST || E->Removed) 808 return nullptr; 809 810 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 811 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 812 // For a fake edge, instrument the real BB. 813 if (SrcBB == nullptr) 814 return DestBB; 815 if (DestBB == nullptr) 816 return SrcBB; 817 818 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 819 // There are basic blocks (such as catchswitch) cannot be instrumented. 820 // If the returned first insertion point is the end of BB, skip this BB. 821 if (BB->getFirstInsertionPt() == BB->end()) 822 return nullptr; 823 return BB; 824 }; 825 826 // Instrument the SrcBB if it has a single successor, 827 // otherwise, the DestBB if this is not a critical edge. 828 Instruction *TI = SrcBB->getTerminator(); 829 if (TI->getNumSuccessors() <= 1) 830 return canInstrument(SrcBB); 831 if (!E->IsCritical) 832 return canInstrument(DestBB); 833 834 // Some IndirectBr critical edges cannot be split by the previous 835 // SplitIndirectBrCriticalEdges call. Bail out. 836 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 837 BasicBlock *InstrBB = 838 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); 839 if (!InstrBB) { 840 LLVM_DEBUG( 841 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 842 return nullptr; 843 } 844 // For a critical edge, we have to split. Instrument the newly 845 // created BB. 846 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 847 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 848 << " --> " << getBBInfo(DestBB).Index << "\n"); 849 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 850 MST.addEdge(SrcBB, InstrBB, 0); 851 // Second one: Add new edge of InstrBB->DestBB. 852 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 853 NewEdge1.InMST = true; 854 E->Removed = true; 855 856 return canInstrument(InstrBB); 857 } 858 859 // When generating value profiling calls on Windows routines that make use of 860 // handler funclets for exception processing an operand bundle needs to attached 861 // to the called function. This routine will set \p OpBundles to contain the 862 // funclet information, if any is needed, that should be placed on the generated 863 // value profiling call for the value profile candidate call. 864 static void 865 populateEHOperandBundle(VPCandidateInfo &Cand, 866 DenseMap<BasicBlock *, ColorVector> &BlockColors, 867 SmallVectorImpl<OperandBundleDef> &OpBundles) { 868 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 869 if (OrigCall && !isa<IntrinsicInst>(OrigCall)) { 870 // The instrumentation call should belong to the same funclet as a 871 // non-intrinsic call, so just copy the operand bundle, if any exists. 872 Optional<OperandBundleUse> ParentFunclet = 873 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 874 if (ParentFunclet) 875 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 876 } else { 877 // Intrinsics or other instructions do not get funclet information from the 878 // front-end. Need to use the BlockColors that was computed by the routine 879 // colorEHFunclets to determine whether a funclet is needed. 880 if (!BlockColors.empty()) { 881 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 882 assert(CV.size() == 1 && "non-unique color for block!"); 883 Instruction *EHPad = CV.front()->getFirstNonPHI(); 884 if (EHPad->isEHPad()) 885 OpBundles.emplace_back("funclet", EHPad); 886 } 887 } 888 } 889 890 // Visit all edge and instrument the edges not in MST, and do value profiling. 891 // Critical edges will be split. 892 static void instrumentOneFunc( 893 Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, 894 BlockFrequencyInfo *BFI, 895 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 896 bool IsCS) { 897 // Split indirectbr critical edges here before computing the MST rather than 898 // later in getInstrBB() to avoid invalidating it. 899 SplitIndirectBrCriticalEdges(F, BPI, BFI); 900 901 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo( 902 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry); 903 std::vector<BasicBlock *> InstrumentBBs; 904 FuncInfo.getInstrumentBBs(InstrumentBBs); 905 unsigned NumCounters = 906 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 907 908 uint32_t I = 0; 909 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 910 for (auto *InstrBB : InstrumentBBs) { 911 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 912 assert(Builder.GetInsertPoint() != InstrBB->end() && 913 "Cannot get the Instrumentation point"); 914 Builder.CreateCall( 915 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 916 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 917 Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), 918 Builder.getInt32(I++)}); 919 } 920 921 // Now instrument select instructions: 922 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 923 FuncInfo.FunctionHash); 924 assert(I == NumCounters); 925 926 if (DisableValueProfiling) 927 return; 928 929 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 930 931 // Intrinsic function calls do not have funclet operand bundles needed for 932 // Windows exception handling attached to them. However, if value profiling is 933 // inserted for one of these calls, then a funclet value will need to be set 934 // on the instrumentation call based on the funclet coloring. 935 DenseMap<BasicBlock *, ColorVector> BlockColors; 936 if (F.hasPersonalityFn() && 937 isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 938 BlockColors = colorEHFunclets(F); 939 940 // For each VP Kind, walk the VP candidates and instrument each one. 941 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 942 unsigned SiteIndex = 0; 943 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 944 continue; 945 946 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 947 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 948 << " site: CallSite Index = " << SiteIndex << "\n"); 949 950 IRBuilder<> Builder(Cand.InsertPt); 951 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 952 "Cannot get the Instrumentation point"); 953 954 Value *ToProfile = nullptr; 955 if (Cand.V->getType()->isIntegerTy()) 956 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 957 else if (Cand.V->getType()->isPointerTy()) 958 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 959 assert(ToProfile && "value profiling Value is of unexpected type"); 960 961 SmallVector<OperandBundleDef, 1> OpBundles; 962 populateEHOperandBundle(Cand, BlockColors, OpBundles); 963 Builder.CreateCall( 964 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 965 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 966 Builder.getInt64(FuncInfo.FunctionHash), ToProfile, 967 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 968 OpBundles); 969 } 970 } // IPVK_First <= Kind <= IPVK_Last 971 } 972 973 namespace { 974 975 // This class represents a CFG edge in profile use compilation. 976 struct PGOUseEdge : public PGOEdge { 977 bool CountValid = false; 978 uint64_t CountValue = 0; 979 980 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 981 : PGOEdge(Src, Dest, W) {} 982 983 // Set edge count value 984 void setEdgeCount(uint64_t Value) { 985 CountValue = Value; 986 CountValid = true; 987 } 988 989 // Return the information string for this object. 990 std::string infoString() const { 991 if (!CountValid) 992 return PGOEdge::infoString(); 993 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 994 .str(); 995 } 996 }; 997 998 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 999 1000 // This class stores the auxiliary information for each BB. 1001 struct UseBBInfo : public BBInfo { 1002 uint64_t CountValue = 0; 1003 bool CountValid; 1004 int32_t UnknownCountInEdge = 0; 1005 int32_t UnknownCountOutEdge = 0; 1006 DirectEdges InEdges; 1007 DirectEdges OutEdges; 1008 1009 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 1010 1011 UseBBInfo(unsigned IX, uint64_t C) 1012 : BBInfo(IX), CountValue(C), CountValid(true) {} 1013 1014 // Set the profile count value for this BB. 1015 void setBBInfoCount(uint64_t Value) { 1016 CountValue = Value; 1017 CountValid = true; 1018 } 1019 1020 // Return the information string of this object. 1021 std::string infoString() const { 1022 if (!CountValid) 1023 return BBInfo::infoString(); 1024 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 1025 } 1026 1027 // Add an OutEdge and update the edge count. 1028 void addOutEdge(PGOUseEdge *E) { 1029 OutEdges.push_back(E); 1030 UnknownCountOutEdge++; 1031 } 1032 1033 // Add an InEdge and update the edge count. 1034 void addInEdge(PGOUseEdge *E) { 1035 InEdges.push_back(E); 1036 UnknownCountInEdge++; 1037 } 1038 }; 1039 1040 } // end anonymous namespace 1041 1042 // Sum up the count values for all the edges. 1043 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 1044 uint64_t Total = 0; 1045 for (auto &E : Edges) { 1046 if (E->Removed) 1047 continue; 1048 Total += E->CountValue; 1049 } 1050 return Total; 1051 } 1052 1053 namespace { 1054 1055 class PGOUseFunc { 1056 public: 1057 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 1058 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1059 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1060 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry) 1061 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1062 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, 1063 InstrumentFuncEntry), 1064 FreqAttr(FFA_Normal), IsCS(IsCS) {} 1065 1066 // Read counts for the instrumented BB from profile. 1067 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1068 bool &AllMinusOnes); 1069 1070 // Populate the counts for all BBs. 1071 void populateCounters(); 1072 1073 // Set the branch weights based on the count values. 1074 void setBranchWeights(); 1075 1076 // Annotate the value profile call sites for all value kind. 1077 void annotateValueSites(); 1078 1079 // Annotate the value profile call sites for one value kind. 1080 void annotateValueSites(uint32_t Kind); 1081 1082 // Annotate the irreducible loop header weights. 1083 void annotateIrrLoopHeaderWeights(); 1084 1085 // The hotness of the function from the profile count. 1086 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1087 1088 // Return the function hotness from the profile. 1089 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1090 1091 // Return the function hash. 1092 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1093 1094 // Return the profile record for this function; 1095 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1096 1097 // Return the auxiliary BB information. 1098 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 1099 return FuncInfo.getBBInfo(BB); 1100 } 1101 1102 // Return the auxiliary BB information if available. 1103 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 1104 return FuncInfo.findBBInfo(BB); 1105 } 1106 1107 Function &getFunc() const { return F; } 1108 1109 void dumpInfo(std::string Str = "") const { 1110 FuncInfo.dumpInfo(Str); 1111 } 1112 1113 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1114 private: 1115 Function &F; 1116 Module *M; 1117 BlockFrequencyInfo *BFI; 1118 ProfileSummaryInfo *PSI; 1119 1120 // This member stores the shared information with class PGOGenFunc. 1121 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 1122 1123 // The maximum count value in the profile. This is only used in PGO use 1124 // compilation. 1125 uint64_t ProgramMaxCount; 1126 1127 // Position of counter that remains to be read. 1128 uint32_t CountPosition = 0; 1129 1130 // Total size of the profile count for this function. 1131 uint32_t ProfileCountSize = 0; 1132 1133 // ProfileRecord for this function. 1134 InstrProfRecord ProfileRecord; 1135 1136 // Function hotness info derived from profile. 1137 FuncFreqAttr FreqAttr; 1138 1139 // Is to use the context sensitive profile. 1140 bool IsCS; 1141 1142 // Find the Instrumented BB and set the value. Return false on error. 1143 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1144 1145 // Set the edge counter value for the unknown edge -- there should be only 1146 // one unknown edge. 1147 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1148 1149 // Return FuncName string; 1150 std::string getFuncName() const { return FuncInfo.FuncName; } 1151 1152 // Set the hot/cold inline hints based on the count values. 1153 // FIXME: This function should be removed once the functionality in 1154 // the inliner is implemented. 1155 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1156 if (PSI->isHotCount(EntryCount)) 1157 FreqAttr = FFA_Hot; 1158 else if (PSI->isColdCount(MaxCount)) 1159 FreqAttr = FFA_Cold; 1160 } 1161 }; 1162 1163 } // end anonymous namespace 1164 1165 // Visit all the edges and assign the count value for the instrumented 1166 // edges and the BB. Return false on error. 1167 bool PGOUseFunc::setInstrumentedCounts( 1168 const std::vector<uint64_t> &CountFromProfile) { 1169 1170 std::vector<BasicBlock *> InstrumentBBs; 1171 FuncInfo.getInstrumentBBs(InstrumentBBs); 1172 unsigned NumCounters = 1173 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1174 // The number of counters here should match the number of counters 1175 // in profile. Return if they mismatch. 1176 if (NumCounters != CountFromProfile.size()) { 1177 return false; 1178 } 1179 auto *FuncEntry = &*F.begin(); 1180 1181 // Set the profile count to the Instrumented BBs. 1182 uint32_t I = 0; 1183 for (BasicBlock *InstrBB : InstrumentBBs) { 1184 uint64_t CountValue = CountFromProfile[I++]; 1185 UseBBInfo &Info = getBBInfo(InstrBB); 1186 // If we reach here, we know that we have some nonzero count 1187 // values in this function. The entry count should not be 0. 1188 // Fix it if necessary. 1189 if (InstrBB == FuncEntry && CountValue == 0) 1190 CountValue = 1; 1191 Info.setBBInfoCount(CountValue); 1192 } 1193 ProfileCountSize = CountFromProfile.size(); 1194 CountPosition = I; 1195 1196 // Set the edge count and update the count of unknown edges for BBs. 1197 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1198 E->setEdgeCount(Value); 1199 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1200 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1201 }; 1202 1203 // Set the profile count the Instrumented edges. There are BBs that not in 1204 // MST but not instrumented. Need to set the edge count value so that we can 1205 // populate the profile counts later. 1206 for (auto &E : FuncInfo.MST.AllEdges) { 1207 if (E->Removed || E->InMST) 1208 continue; 1209 const BasicBlock *SrcBB = E->SrcBB; 1210 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 1211 1212 // If only one out-edge, the edge profile count should be the same as BB 1213 // profile count. 1214 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1215 setEdgeCount(E.get(), SrcInfo.CountValue); 1216 else { 1217 const BasicBlock *DestBB = E->DestBB; 1218 UseBBInfo &DestInfo = getBBInfo(DestBB); 1219 // If only one in-edge, the edge profile count should be the same as BB 1220 // profile count. 1221 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1222 setEdgeCount(E.get(), DestInfo.CountValue); 1223 } 1224 if (E->CountValid) 1225 continue; 1226 // E's count should have been set from profile. If not, this meenas E skips 1227 // the instrumentation. We set the count to 0. 1228 setEdgeCount(E.get(), 0); 1229 } 1230 return true; 1231 } 1232 1233 // Set the count value for the unknown edge. There should be one and only one 1234 // unknown edge in Edges vector. 1235 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1236 for (auto &E : Edges) { 1237 if (E->CountValid) 1238 continue; 1239 E->setEdgeCount(Value); 1240 1241 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1242 getBBInfo(E->DestBB).UnknownCountInEdge--; 1243 return; 1244 } 1245 llvm_unreachable("Cannot find the unknown count edge"); 1246 } 1247 1248 // Emit function metadata indicating PGO profile mismatch. 1249 static void annotateFunctionWithHashMismatch(Function &F, 1250 LLVMContext &ctx) { 1251 const char MetadataName[] = "instr_prof_hash_mismatch"; 1252 SmallVector<Metadata *, 2> Names; 1253 // If this metadata already exists, ignore. 1254 auto *Existing = F.getMetadata(LLVMContext::MD_annotation); 1255 if (Existing) { 1256 MDTuple *Tuple = cast<MDTuple>(Existing); 1257 for (auto &N : Tuple->operands()) { 1258 if (cast<MDString>(N.get())->getString() == MetadataName) 1259 return; 1260 Names.push_back(N.get()); 1261 } 1262 } 1263 1264 MDBuilder MDB(ctx); 1265 Names.push_back(MDB.createString(MetadataName)); 1266 MDNode *MD = MDTuple::get(ctx, Names); 1267 F.setMetadata(LLVMContext::MD_annotation, MD); 1268 } 1269 1270 // Read the profile from ProfileFileName and assign the value to the 1271 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1272 // Return true if the profile are successfully read, and false on errors. 1273 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1274 bool &AllMinusOnes) { 1275 auto &Ctx = M->getContext(); 1276 Expected<InstrProfRecord> Result = 1277 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 1278 if (Error E = Result.takeError()) { 1279 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1280 auto Err = IPE.get(); 1281 bool SkipWarning = false; 1282 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1283 << FuncInfo.FuncName << ": "); 1284 if (Err == instrprof_error::unknown_function) { 1285 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1286 SkipWarning = !PGOWarnMissing; 1287 LLVM_DEBUG(dbgs() << "unknown function"); 1288 } else if (Err == instrprof_error::hash_mismatch || 1289 Err == instrprof_error::malformed) { 1290 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1291 SkipWarning = 1292 NoPGOWarnMismatch || 1293 (NoPGOWarnMismatchComdat && 1294 (F.hasComdat() || 1295 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1296 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 1297 // Emit function metadata indicating PGO profile mismatch. 1298 annotateFunctionWithHashMismatch(F, M->getContext()); 1299 } 1300 1301 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1302 if (SkipWarning) 1303 return; 1304 1305 std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + 1306 std::string(" Hash = ") + 1307 std::to_string(FuncInfo.FunctionHash); 1308 1309 Ctx.diagnose( 1310 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1311 }); 1312 return false; 1313 } 1314 ProfileRecord = std::move(Result.get()); 1315 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1316 1317 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1318 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1319 AllMinusOnes = (CountFromProfile.size() > 0); 1320 uint64_t ValueSum = 0; 1321 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1322 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1323 ValueSum += CountFromProfile[I]; 1324 if (CountFromProfile[I] != (uint64_t)-1) 1325 AllMinusOnes = false; 1326 } 1327 AllZeros = (ValueSum == 0); 1328 1329 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1330 1331 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1332 getBBInfo(nullptr).UnknownCountInEdge = 2; 1333 1334 if (!setInstrumentedCounts(CountFromProfile)) { 1335 LLVM_DEBUG( 1336 dbgs() << "Inconsistent number of counts, skipping this function"); 1337 Ctx.diagnose(DiagnosticInfoPGOProfile( 1338 M->getName().data(), 1339 Twine("Inconsistent number of counts in ") + F.getName().str() 1340 + Twine(": the profile may be stale or there is a function name collision."), 1341 DS_Warning)); 1342 return false; 1343 } 1344 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1345 return true; 1346 } 1347 1348 // Populate the counters from instrumented BBs to all BBs. 1349 // In the end of this operation, all BBs should have a valid count value. 1350 void PGOUseFunc::populateCounters() { 1351 bool Changes = true; 1352 unsigned NumPasses = 0; 1353 while (Changes) { 1354 NumPasses++; 1355 Changes = false; 1356 1357 // For efficient traversal, it's better to start from the end as most 1358 // of the instrumented edges are at the end. 1359 for (auto &BB : reverse(F)) { 1360 UseBBInfo *Count = findBBInfo(&BB); 1361 if (Count == nullptr) 1362 continue; 1363 if (!Count->CountValid) { 1364 if (Count->UnknownCountOutEdge == 0) { 1365 Count->CountValue = sumEdgeCount(Count->OutEdges); 1366 Count->CountValid = true; 1367 Changes = true; 1368 } else if (Count->UnknownCountInEdge == 0) { 1369 Count->CountValue = sumEdgeCount(Count->InEdges); 1370 Count->CountValid = true; 1371 Changes = true; 1372 } 1373 } 1374 if (Count->CountValid) { 1375 if (Count->UnknownCountOutEdge == 1) { 1376 uint64_t Total = 0; 1377 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1378 // If the one of the successor block can early terminate (no-return), 1379 // we can end up with situation where out edge sum count is larger as 1380 // the source BB's count is collected by a post-dominated block. 1381 if (Count->CountValue > OutSum) 1382 Total = Count->CountValue - OutSum; 1383 setEdgeCount(Count->OutEdges, Total); 1384 Changes = true; 1385 } 1386 if (Count->UnknownCountInEdge == 1) { 1387 uint64_t Total = 0; 1388 uint64_t InSum = sumEdgeCount(Count->InEdges); 1389 if (Count->CountValue > InSum) 1390 Total = Count->CountValue - InSum; 1391 setEdgeCount(Count->InEdges, Total); 1392 Changes = true; 1393 } 1394 } 1395 } 1396 } 1397 1398 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1399 #ifndef NDEBUG 1400 // Assert every BB has a valid counter. 1401 for (auto &BB : F) { 1402 auto BI = findBBInfo(&BB); 1403 if (BI == nullptr) 1404 continue; 1405 assert(BI->CountValid && "BB count is not valid"); 1406 } 1407 #endif 1408 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1409 uint64_t FuncMaxCount = FuncEntryCount; 1410 for (auto &BB : F) { 1411 auto BI = findBBInfo(&BB); 1412 if (BI == nullptr) 1413 continue; 1414 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1415 } 1416 1417 // Fix the obviously inconsistent entry count. 1418 if (FuncMaxCount > 0 && FuncEntryCount == 0) 1419 FuncEntryCount = 1; 1420 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1421 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1422 1423 // Now annotate select instructions 1424 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1425 assert(CountPosition == ProfileCountSize); 1426 1427 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1428 } 1429 1430 // Assign the scaled count values to the BB with multiple out edges. 1431 void PGOUseFunc::setBranchWeights() { 1432 // Generate MD_prof metadata for every branch instruction. 1433 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1434 << " IsCS=" << IsCS << "\n"); 1435 for (auto &BB : F) { 1436 Instruction *TI = BB.getTerminator(); 1437 if (TI->getNumSuccessors() < 2) 1438 continue; 1439 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1440 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI))) 1441 continue; 1442 1443 if (getBBInfo(&BB).CountValue == 0) 1444 continue; 1445 1446 // We have a non-zero Branch BB. 1447 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1448 unsigned Size = BBCountInfo.OutEdges.size(); 1449 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1450 uint64_t MaxCount = 0; 1451 for (unsigned s = 0; s < Size; s++) { 1452 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1453 const BasicBlock *SrcBB = E->SrcBB; 1454 const BasicBlock *DestBB = E->DestBB; 1455 if (DestBB == nullptr) 1456 continue; 1457 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1458 uint64_t EdgeCount = E->CountValue; 1459 if (EdgeCount > MaxCount) 1460 MaxCount = EdgeCount; 1461 EdgeCounts[SuccNum] = EdgeCount; 1462 } 1463 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1464 } 1465 } 1466 1467 static bool isIndirectBrTarget(BasicBlock *BB) { 1468 for (BasicBlock *Pred : predecessors(BB)) { 1469 if (isa<IndirectBrInst>(Pred->getTerminator())) 1470 return true; 1471 } 1472 return false; 1473 } 1474 1475 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1476 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1477 // Find irr loop headers 1478 for (auto &BB : F) { 1479 // As a heuristic also annotate indrectbr targets as they have a high chance 1480 // to become an irreducible loop header after the indirectbr tail 1481 // duplication. 1482 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1483 Instruction *TI = BB.getTerminator(); 1484 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1485 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1486 } 1487 } 1488 } 1489 1490 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1491 Module *M = F.getParent(); 1492 IRBuilder<> Builder(&SI); 1493 Type *Int64Ty = Builder.getInt64Ty(); 1494 Type *I8PtrTy = Builder.getInt8PtrTy(); 1495 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1496 Builder.CreateCall( 1497 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1498 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1499 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1500 Builder.getInt32(*CurCtrIdx), Step}); 1501 ++(*CurCtrIdx); 1502 } 1503 1504 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1505 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1506 assert(*CurCtrIdx < CountFromProfile.size() && 1507 "Out of bound access of counters"); 1508 uint64_t SCounts[2]; 1509 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1510 ++(*CurCtrIdx); 1511 uint64_t TotalCount = 0; 1512 auto BI = UseFunc->findBBInfo(SI.getParent()); 1513 if (BI != nullptr) 1514 TotalCount = BI->CountValue; 1515 // False Count 1516 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1517 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1518 if (MaxCount) 1519 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1520 } 1521 1522 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1523 if (!PGOInstrSelect) 1524 return; 1525 // FIXME: do not handle this yet. 1526 if (SI.getCondition()->getType()->isVectorTy()) 1527 return; 1528 1529 switch (Mode) { 1530 case VM_counting: 1531 NSIs++; 1532 return; 1533 case VM_instrument: 1534 instrumentOneSelectInst(SI); 1535 return; 1536 case VM_annotate: 1537 annotateOneSelectInst(SI); 1538 return; 1539 } 1540 1541 llvm_unreachable("Unknown visiting mode"); 1542 } 1543 1544 // Traverse all valuesites and annotate the instructions for all value kind. 1545 void PGOUseFunc::annotateValueSites() { 1546 if (DisableValueProfiling) 1547 return; 1548 1549 // Create the PGOFuncName meta data. 1550 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1551 1552 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1553 annotateValueSites(Kind); 1554 } 1555 1556 // Annotate the instructions for a specific value kind. 1557 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1558 assert(Kind <= IPVK_Last); 1559 unsigned ValueSiteIndex = 0; 1560 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1561 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1562 if (NumValueSites != ValueSites.size()) { 1563 auto &Ctx = M->getContext(); 1564 Ctx.diagnose(DiagnosticInfoPGOProfile( 1565 M->getName().data(), 1566 Twine("Inconsistent number of value sites for ") + 1567 Twine(ValueProfKindDescr[Kind]) + 1568 Twine(" profiling in \"") + F.getName().str() + 1569 Twine("\", possibly due to the use of a stale profile."), 1570 DS_Warning)); 1571 return; 1572 } 1573 1574 for (VPCandidateInfo &I : ValueSites) { 1575 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1576 << "): Index = " << ValueSiteIndex << " out of " 1577 << NumValueSites << "\n"); 1578 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1579 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1580 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1581 : MaxNumAnnotations); 1582 ValueSiteIndex++; 1583 } 1584 } 1585 1586 // Collect the set of members for each Comdat in module M and store 1587 // in ComdatMembers. 1588 static void collectComdatMembers( 1589 Module &M, 1590 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1591 if (!DoComdatRenaming) 1592 return; 1593 for (Function &F : M) 1594 if (Comdat *C = F.getComdat()) 1595 ComdatMembers.insert(std::make_pair(C, &F)); 1596 for (GlobalVariable &GV : M.globals()) 1597 if (Comdat *C = GV.getComdat()) 1598 ComdatMembers.insert(std::make_pair(C, &GV)); 1599 for (GlobalAlias &GA : M.aliases()) 1600 if (Comdat *C = GA.getComdat()) 1601 ComdatMembers.insert(std::make_pair(C, &GA)); 1602 } 1603 1604 static bool InstrumentAllFunctions( 1605 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1606 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1607 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1608 // For the context-sensitve instrumentation, we should have a separated pass 1609 // (before LTO/ThinLTO linking) to create these variables. 1610 if (!IsCS) 1611 createIRLevelProfileFlagVar(M, /* IsCS */ false, PGOInstrumentEntry); 1612 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1613 collectComdatMembers(M, ComdatMembers); 1614 1615 for (auto &F : M) { 1616 if (F.isDeclaration()) 1617 continue; 1618 if (F.hasFnAttribute(llvm::Attribute::NoProfile)) 1619 continue; 1620 auto &TLI = LookupTLI(F); 1621 auto *BPI = LookupBPI(F); 1622 auto *BFI = LookupBFI(F); 1623 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); 1624 } 1625 return true; 1626 } 1627 1628 PreservedAnalyses 1629 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { 1630 createProfileFileNameVar(M, CSInstrName); 1631 createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry); 1632 return PreservedAnalyses::all(); 1633 } 1634 1635 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { 1636 if (skipModule(M)) 1637 return false; 1638 1639 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 1640 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 1641 }; 1642 auto LookupBPI = [this](Function &F) { 1643 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1644 }; 1645 auto LookupBFI = [this](Function &F) { 1646 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1647 }; 1648 return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS); 1649 } 1650 1651 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1652 ModuleAnalysisManager &AM) { 1653 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1654 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1655 return FAM.getResult<TargetLibraryAnalysis>(F); 1656 }; 1657 auto LookupBPI = [&FAM](Function &F) { 1658 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1659 }; 1660 auto LookupBFI = [&FAM](Function &F) { 1661 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1662 }; 1663 1664 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) 1665 return PreservedAnalyses::all(); 1666 1667 return PreservedAnalyses::none(); 1668 } 1669 1670 // Using the ratio b/w sums of profile count values and BFI count values to 1671 // adjust the func entry count. 1672 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, 1673 BranchProbabilityInfo &NBPI) { 1674 Function &F = Func.getFunc(); 1675 BlockFrequencyInfo NBFI(F, NBPI, LI); 1676 #ifndef NDEBUG 1677 auto BFIEntryCount = F.getEntryCount(); 1678 assert(BFIEntryCount.hasValue() && (BFIEntryCount.getCount() > 0) && 1679 "Invalid BFI Entrycount"); 1680 #endif 1681 auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); 1682 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); 1683 for (auto &BBI : F) { 1684 uint64_t CountValue = 0; 1685 uint64_t BFICountValue = 0; 1686 if (!Func.findBBInfo(&BBI)) 1687 continue; 1688 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1689 CountValue = Func.getBBInfo(&BBI).CountValue; 1690 BFICountValue = BFICount.getValue(); 1691 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); 1692 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); 1693 } 1694 if (SumCount.isZero()) 1695 return; 1696 1697 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && 1698 "Incorrect sum of BFI counts"); 1699 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual) 1700 return; 1701 double Scale = (SumCount / SumBFICount).convertToDouble(); 1702 if (Scale < 1.001 && Scale > 0.999) 1703 return; 1704 1705 uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue; 1706 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; 1707 if (NewEntryCount == 0) 1708 NewEntryCount = 1; 1709 if (NewEntryCount != FuncEntryCount) { 1710 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); 1711 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() 1712 << ", entry_count " << FuncEntryCount << " --> " 1713 << NewEntryCount << "\n"); 1714 } 1715 } 1716 1717 // Compare the profile count values with BFI count values, and print out 1718 // the non-matching ones. 1719 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, 1720 BranchProbabilityInfo &NBPI, 1721 uint64_t HotCountThreshold, 1722 uint64_t ColdCountThreshold) { 1723 Function &F = Func.getFunc(); 1724 BlockFrequencyInfo NBFI(F, NBPI, LI); 1725 // bool PrintFunc = false; 1726 bool HotBBOnly = PGOVerifyHotBFI; 1727 std::string Msg; 1728 OptimizationRemarkEmitter ORE(&F); 1729 1730 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; 1731 for (auto &BBI : F) { 1732 uint64_t CountValue = 0; 1733 uint64_t BFICountValue = 0; 1734 1735 if (Func.getBBInfo(&BBI).CountValid) 1736 CountValue = Func.getBBInfo(&BBI).CountValue; 1737 1738 BBNum++; 1739 if (CountValue) 1740 NonZeroBBNum++; 1741 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1742 if (BFICount) 1743 BFICountValue = BFICount.getValue(); 1744 1745 if (HotBBOnly) { 1746 bool rawIsHot = CountValue >= HotCountThreshold; 1747 bool BFIIsHot = BFICountValue >= HotCountThreshold; 1748 bool rawIsCold = CountValue <= ColdCountThreshold; 1749 bool ShowCount = false; 1750 if (rawIsHot && !BFIIsHot) { 1751 Msg = "raw-Hot to BFI-nonHot"; 1752 ShowCount = true; 1753 } else if (rawIsCold && BFIIsHot) { 1754 Msg = "raw-Cold to BFI-Hot"; 1755 ShowCount = true; 1756 } 1757 if (!ShowCount) 1758 continue; 1759 } else { 1760 if ((CountValue < PGOVerifyBFICutoff) && 1761 (BFICountValue < PGOVerifyBFICutoff)) 1762 continue; 1763 uint64_t Diff = (BFICountValue >= CountValue) 1764 ? BFICountValue - CountValue 1765 : CountValue - BFICountValue; 1766 if (Diff < CountValue / 100 * PGOVerifyBFIRatio) 1767 continue; 1768 } 1769 BBMisMatchNum++; 1770 1771 ORE.emit([&]() { 1772 OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify", 1773 F.getSubprogram(), &BBI); 1774 Remark << "BB " << ore::NV("Block", BBI.getName()) 1775 << " Count=" << ore::NV("Count", CountValue) 1776 << " BFI_Count=" << ore::NV("Count", BFICountValue); 1777 if (!Msg.empty()) 1778 Remark << " (" << Msg << ")"; 1779 return Remark; 1780 }); 1781 } 1782 if (BBMisMatchNum) 1783 ORE.emit([&]() { 1784 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify", 1785 F.getSubprogram(), &F.getEntryBlock()) 1786 << "In Func " << ore::NV("Function", F.getName()) 1787 << ": Num_of_BB=" << ore::NV("Count", BBNum) 1788 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum) 1789 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum); 1790 }); 1791 } 1792 1793 static bool annotateAllFunctions( 1794 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1795 function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1796 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1797 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1798 ProfileSummaryInfo *PSI, bool IsCS) { 1799 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1800 auto &Ctx = M.getContext(); 1801 // Read the counter array from file. 1802 auto ReaderOrErr = 1803 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 1804 if (Error E = ReaderOrErr.takeError()) { 1805 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1806 Ctx.diagnose( 1807 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1808 }); 1809 return false; 1810 } 1811 1812 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1813 std::move(ReaderOrErr.get()); 1814 if (!PGOReader) { 1815 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1816 StringRef("Cannot get PGOReader"))); 1817 return false; 1818 } 1819 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 1820 return false; 1821 1822 // TODO: might need to change the warning once the clang option is finalized. 1823 if (!PGOReader->isIRLevelProfile()) { 1824 Ctx.diagnose(DiagnosticInfoPGOProfile( 1825 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1826 return false; 1827 } 1828 1829 // Add the profile summary (read from the header of the indexed summary) here 1830 // so that we can use it below when reading counters (which checks if the 1831 // function should be marked with a cold or inlinehint attribute). 1832 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 1833 IsCS ? ProfileSummary::PSK_CSInstr 1834 : ProfileSummary::PSK_Instr); 1835 PSI->refresh(); 1836 1837 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1838 collectComdatMembers(M, ComdatMembers); 1839 std::vector<Function *> HotFunctions; 1840 std::vector<Function *> ColdFunctions; 1841 1842 // If the profile marked as always instrument the entry BB, do the 1843 // same. Note this can be overwritten by the internal option in CFGMST.h 1844 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); 1845 if (PGOInstrumentEntry.getNumOccurrences() > 0) 1846 InstrumentFuncEntry = PGOInstrumentEntry; 1847 for (auto &F : M) { 1848 if (F.isDeclaration()) 1849 continue; 1850 auto &TLI = LookupTLI(F); 1851 auto *BPI = LookupBPI(F); 1852 auto *BFI = LookupBFI(F); 1853 // Split indirectbr critical edges here before computing the MST rather than 1854 // later in getInstrBB() to avoid invalidating it. 1855 SplitIndirectBrCriticalEdges(F, BPI, BFI); 1856 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, 1857 InstrumentFuncEntry); 1858 // When AllMinusOnes is true, it means the profile for the function 1859 // is unrepresentative and this function is actually hot. Set the 1860 // entry count of the function to be multiple times of hot threshold 1861 // and drop all its internal counters. 1862 bool AllMinusOnes = false; 1863 bool AllZeros = false; 1864 if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes)) 1865 continue; 1866 if (AllZeros) { 1867 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 1868 if (Func.getProgramMaxCount() != 0) 1869 ColdFunctions.push_back(&F); 1870 continue; 1871 } 1872 const unsigned MultiplyFactor = 3; 1873 if (AllMinusOnes) { 1874 uint64_t HotThreshold = PSI->getHotCountThreshold(); 1875 if (HotThreshold) 1876 F.setEntryCount( 1877 ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real)); 1878 HotFunctions.push_back(&F); 1879 continue; 1880 } 1881 Func.populateCounters(); 1882 Func.setBranchWeights(); 1883 Func.annotateValueSites(); 1884 Func.annotateIrrLoopHeaderWeights(); 1885 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1886 if (FreqAttr == PGOUseFunc::FFA_Cold) 1887 ColdFunctions.push_back(&F); 1888 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1889 HotFunctions.push_back(&F); 1890 if (PGOViewCounts != PGOVCT_None && 1891 (ViewBlockFreqFuncName.empty() || 1892 F.getName().equals(ViewBlockFreqFuncName))) { 1893 LoopInfo LI{DominatorTree(F)}; 1894 std::unique_ptr<BranchProbabilityInfo> NewBPI = 1895 std::make_unique<BranchProbabilityInfo>(F, LI); 1896 std::unique_ptr<BlockFrequencyInfo> NewBFI = 1897 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 1898 if (PGOViewCounts == PGOVCT_Graph) 1899 NewBFI->view(); 1900 else if (PGOViewCounts == PGOVCT_Text) { 1901 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 1902 NewBFI->print(dbgs()); 1903 } 1904 } 1905 if (PGOViewRawCounts != PGOVCT_None && 1906 (ViewBlockFreqFuncName.empty() || 1907 F.getName().equals(ViewBlockFreqFuncName))) { 1908 if (PGOViewRawCounts == PGOVCT_Graph) 1909 if (ViewBlockFreqFuncName.empty()) 1910 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1911 else 1912 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1913 else if (PGOViewRawCounts == PGOVCT_Text) { 1914 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 1915 Func.dumpInfo(); 1916 } 1917 } 1918 1919 if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { 1920 LoopInfo LI{DominatorTree(F)}; 1921 BranchProbabilityInfo NBPI(F, LI); 1922 1923 // Fix func entry count. 1924 if (PGOFixEntryCount) 1925 fixFuncEntryCount(Func, LI, NBPI); 1926 1927 // Verify BlockFrequency information. 1928 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; 1929 if (PGOVerifyHotBFI) { 1930 HotCountThreshold = PSI->getOrCompHotCountThreshold(); 1931 ColdCountThreshold = PSI->getOrCompColdCountThreshold(); 1932 } 1933 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); 1934 } 1935 } 1936 1937 // Set function hotness attribute from the profile. 1938 // We have to apply these attributes at the end because their presence 1939 // can affect the BranchProbabilityInfo of any callers, resulting in an 1940 // inconsistent MST between prof-gen and prof-use. 1941 for (auto &F : HotFunctions) { 1942 F->addFnAttr(Attribute::InlineHint); 1943 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 1944 << "\n"); 1945 } 1946 for (auto &F : ColdFunctions) { 1947 // Only set when there is no Attribute::Hot set by the user. For Hot 1948 // attribute, user's annotation has the precedence over the profile. 1949 if (F->hasFnAttribute(Attribute::Hot)) { 1950 auto &Ctx = M.getContext(); 1951 std::string Msg = std::string("Function ") + F->getName().str() + 1952 std::string(" is annotated as a hot function but" 1953 " the profile is cold"); 1954 Ctx.diagnose( 1955 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 1956 continue; 1957 } 1958 F->addFnAttr(Attribute::Cold); 1959 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 1960 << "\n"); 1961 } 1962 return true; 1963 } 1964 1965 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 1966 std::string RemappingFilename, 1967 bool IsCS) 1968 : ProfileFileName(std::move(Filename)), 1969 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { 1970 if (!PGOTestProfileFile.empty()) 1971 ProfileFileName = PGOTestProfileFile; 1972 if (!PGOTestProfileRemappingFile.empty()) 1973 ProfileRemappingFileName = PGOTestProfileRemappingFile; 1974 } 1975 1976 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 1977 ModuleAnalysisManager &AM) { 1978 1979 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1980 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1981 return FAM.getResult<TargetLibraryAnalysis>(F); 1982 }; 1983 auto LookupBPI = [&FAM](Function &F) { 1984 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1985 }; 1986 auto LookupBFI = [&FAM](Function &F) { 1987 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1988 }; 1989 1990 auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 1991 1992 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 1993 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) 1994 return PreservedAnalyses::all(); 1995 1996 return PreservedAnalyses::none(); 1997 } 1998 1999 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { 2000 if (skipModule(M)) 2001 return false; 2002 2003 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 2004 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 2005 }; 2006 auto LookupBPI = [this](Function &F) { 2007 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 2008 }; 2009 auto LookupBFI = [this](Function &F) { 2010 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 2011 }; 2012 2013 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 2014 return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI, 2015 LookupBFI, PSI, IsCS); 2016 } 2017 2018 static std::string getSimpleNodeName(const BasicBlock *Node) { 2019 if (!Node->getName().empty()) 2020 return std::string(Node->getName()); 2021 2022 std::string SimpleNodeName; 2023 raw_string_ostream OS(SimpleNodeName); 2024 Node->printAsOperand(OS, false); 2025 return OS.str(); 2026 } 2027 2028 void llvm::setProfMetadata(Module *M, Instruction *TI, 2029 ArrayRef<uint64_t> EdgeCounts, 2030 uint64_t MaxCount) { 2031 MDBuilder MDB(M->getContext()); 2032 assert(MaxCount > 0 && "Bad max count"); 2033 uint64_t Scale = calculateCountScale(MaxCount); 2034 SmallVector<unsigned, 4> Weights; 2035 for (const auto &ECI : EdgeCounts) 2036 Weights.push_back(scaleBranchCount(ECI, Scale)); 2037 2038 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 2039 : Weights) { 2040 dbgs() << W << " "; 2041 } dbgs() << "\n";); 2042 2043 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 2044 if (EmitBranchProbability) { 2045 std::string BrCondStr = getBranchCondString(TI); 2046 if (BrCondStr.empty()) 2047 return; 2048 2049 uint64_t WSum = 2050 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 2051 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 2052 uint64_t TotalCount = 2053 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 2054 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 2055 Scale = calculateCountScale(WSum); 2056 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 2057 scaleBranchCount(WSum, Scale)); 2058 std::string BranchProbStr; 2059 raw_string_ostream OS(BranchProbStr); 2060 OS << BP; 2061 OS << " (total count : " << TotalCount << ")"; 2062 OS.flush(); 2063 Function *F = TI->getParent()->getParent(); 2064 OptimizationRemarkEmitter ORE(F); 2065 ORE.emit([&]() { 2066 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 2067 << BrCondStr << " is true with probability : " << BranchProbStr; 2068 }); 2069 } 2070 } 2071 2072 namespace llvm { 2073 2074 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 2075 MDBuilder MDB(M->getContext()); 2076 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 2077 MDB.createIrrLoopHeaderWeight(Count)); 2078 } 2079 2080 template <> struct GraphTraits<PGOUseFunc *> { 2081 using NodeRef = const BasicBlock *; 2082 using ChildIteratorType = const_succ_iterator; 2083 using nodes_iterator = pointer_iterator<Function::const_iterator>; 2084 2085 static NodeRef getEntryNode(const PGOUseFunc *G) { 2086 return &G->getFunc().front(); 2087 } 2088 2089 static ChildIteratorType child_begin(const NodeRef N) { 2090 return succ_begin(N); 2091 } 2092 2093 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 2094 2095 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 2096 return nodes_iterator(G->getFunc().begin()); 2097 } 2098 2099 static nodes_iterator nodes_end(const PGOUseFunc *G) { 2100 return nodes_iterator(G->getFunc().end()); 2101 } 2102 }; 2103 2104 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 2105 explicit DOTGraphTraits(bool isSimple = false) 2106 : DefaultDOTGraphTraits(isSimple) {} 2107 2108 static std::string getGraphName(const PGOUseFunc *G) { 2109 return std::string(G->getFunc().getName()); 2110 } 2111 2112 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 2113 std::string Result; 2114 raw_string_ostream OS(Result); 2115 2116 OS << getSimpleNodeName(Node) << ":\\l"; 2117 UseBBInfo *BI = Graph->findBBInfo(Node); 2118 OS << "Count : "; 2119 if (BI && BI->CountValid) 2120 OS << BI->CountValue << "\\l"; 2121 else 2122 OS << "Unknown\\l"; 2123 2124 if (!PGOInstrSelect) 2125 return Result; 2126 2127 for (const Instruction &I : *Node) { 2128 if (!isa<SelectInst>(&I)) 2129 continue; 2130 // Display scaled counts for SELECT instruction: 2131 OS << "SELECT : { T = "; 2132 uint64_t TC, FC; 2133 bool HasProf = I.extractProfMetadata(TC, FC); 2134 if (!HasProf) 2135 OS << "Unknown, F = Unknown }\\l"; 2136 else 2137 OS << TC << ", F = " << FC << " }\\l"; 2138 } 2139 return Result; 2140 } 2141 }; 2142 2143 } // end namespace llvm 2144