1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "CFGMST.h" 52 #include "ValueProfileCollector.h" 53 #include "llvm/ADT/APInt.h" 54 #include "llvm/ADT/ArrayRef.h" 55 #include "llvm/ADT/STLExtras.h" 56 #include "llvm/ADT/SmallVector.h" 57 #include "llvm/ADT/Statistic.h" 58 #include "llvm/ADT/StringRef.h" 59 #include "llvm/ADT/Triple.h" 60 #include "llvm/ADT/Twine.h" 61 #include "llvm/ADT/iterator.h" 62 #include "llvm/ADT/iterator_range.h" 63 #include "llvm/Analysis/BlockFrequencyInfo.h" 64 #include "llvm/Analysis/BranchProbabilityInfo.h" 65 #include "llvm/Analysis/CFG.h" 66 #include "llvm/Analysis/EHPersonalities.h" 67 #include "llvm/Analysis/LoopInfo.h" 68 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 69 #include "llvm/Analysis/ProfileSummaryInfo.h" 70 #include "llvm/IR/Attributes.h" 71 #include "llvm/IR/BasicBlock.h" 72 #include "llvm/IR/CFG.h" 73 #include "llvm/IR/Comdat.h" 74 #include "llvm/IR/Constant.h" 75 #include "llvm/IR/Constants.h" 76 #include "llvm/IR/DiagnosticInfo.h" 77 #include "llvm/IR/Dominators.h" 78 #include "llvm/IR/Function.h" 79 #include "llvm/IR/GlobalAlias.h" 80 #include "llvm/IR/GlobalValue.h" 81 #include "llvm/IR/GlobalVariable.h" 82 #include "llvm/IR/IRBuilder.h" 83 #include "llvm/IR/InstVisitor.h" 84 #include "llvm/IR/InstrTypes.h" 85 #include "llvm/IR/Instruction.h" 86 #include "llvm/IR/Instructions.h" 87 #include "llvm/IR/IntrinsicInst.h" 88 #include "llvm/IR/Intrinsics.h" 89 #include "llvm/IR/LLVMContext.h" 90 #include "llvm/IR/MDBuilder.h" 91 #include "llvm/IR/Module.h" 92 #include "llvm/IR/PassManager.h" 93 #include "llvm/IR/ProfileSummary.h" 94 #include "llvm/IR/Type.h" 95 #include "llvm/IR/Value.h" 96 #include "llvm/InitializePasses.h" 97 #include "llvm/Pass.h" 98 #include "llvm/ProfileData/InstrProf.h" 99 #include "llvm/ProfileData/InstrProfReader.h" 100 #include "llvm/Support/BranchProbability.h" 101 #include "llvm/Support/CRC.h" 102 #include "llvm/Support/Casting.h" 103 #include "llvm/Support/CommandLine.h" 104 #include "llvm/Support/DOTGraphTraits.h" 105 #include "llvm/Support/Debug.h" 106 #include "llvm/Support/Error.h" 107 #include "llvm/Support/ErrorHandling.h" 108 #include "llvm/Support/GraphWriter.h" 109 #include "llvm/Support/raw_ostream.h" 110 #include "llvm/Transforms/Instrumentation.h" 111 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 112 #include "llvm/Transforms/Utils/ModuleUtils.h" 113 #include <algorithm> 114 #include <cassert> 115 #include <cstdint> 116 #include <memory> 117 #include <numeric> 118 #include <string> 119 #include <unordered_map> 120 #include <utility> 121 #include <vector> 122 123 using namespace llvm; 124 using ProfileCount = Function::ProfileCount; 125 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 126 127 #define DEBUG_TYPE "pgo-instrumentation" 128 129 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 130 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 131 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 132 STATISTIC(NumOfPGOEdge, "Number of edges."); 133 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 134 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 135 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 136 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 137 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 138 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 139 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 140 STATISTIC(NumOfCSPGOSelectInsts, 141 "Number of select instruction instrumented in CSPGO."); 142 STATISTIC(NumOfCSPGOMemIntrinsics, 143 "Number of mem intrinsics instrumented in CSPGO."); 144 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 145 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 146 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 147 STATISTIC(NumOfCSPGOFunc, 148 "Number of functions having valid profile counts in CSPGO."); 149 STATISTIC(NumOfCSPGOMismatch, 150 "Number of functions having mismatch profile in CSPGO."); 151 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 152 153 // Command line option to specify the file to read profile from. This is 154 // mainly used for testing. 155 static cl::opt<std::string> 156 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 157 cl::value_desc("filename"), 158 cl::desc("Specify the path of profile data file. This is" 159 "mainly for test purpose.")); 160 static cl::opt<std::string> PGOTestProfileRemappingFile( 161 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 162 cl::value_desc("filename"), 163 cl::desc("Specify the path of profile remapping file. This is mainly for " 164 "test purpose.")); 165 166 // Command line option to disable value profiling. The default is false: 167 // i.e. value profiling is enabled by default. This is for debug purpose. 168 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 169 cl::Hidden, 170 cl::desc("Disable Value Profiling")); 171 172 // Command line option to set the maximum number of VP annotations to write to 173 // the metadata for a single indirect call callsite. 174 static cl::opt<unsigned> MaxNumAnnotations( 175 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 176 cl::desc("Max number of annotations for a single indirect " 177 "call callsite")); 178 179 // Command line option to set the maximum number of value annotations 180 // to write to the metadata for a single memop intrinsic. 181 static cl::opt<unsigned> MaxNumMemOPAnnotations( 182 "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, 183 cl::desc("Max number of preicise value annotations for a single memop" 184 "intrinsic")); 185 186 // Command line option to control appending FunctionHash to the name of a COMDAT 187 // function. This is to avoid the hash mismatch caused by the preinliner. 188 static cl::opt<bool> DoComdatRenaming( 189 "do-comdat-renaming", cl::init(false), cl::Hidden, 190 cl::desc("Append function hash to the name of COMDAT function to avoid " 191 "function hash mismatch due to the preinliner")); 192 193 // Command line option to enable/disable the warning about missing profile 194 // information. 195 static cl::opt<bool> 196 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 197 cl::desc("Use this option to turn on/off " 198 "warnings about missing profile data for " 199 "functions.")); 200 201 namespace llvm { 202 // Command line option to enable/disable the warning about a hash mismatch in 203 // the profile data. 204 cl::opt<bool> 205 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 206 cl::desc("Use this option to turn off/on " 207 "warnings about profile cfg mismatch.")); 208 } // namespace llvm 209 210 // Command line option to enable/disable the warning about a hash mismatch in 211 // the profile data for Comdat functions, which often turns out to be false 212 // positive due to the pre-instrumentation inline. 213 static cl::opt<bool> 214 NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), 215 cl::Hidden, 216 cl::desc("The option is used to turn on/off " 217 "warnings about hash mismatch for comdat " 218 "functions.")); 219 220 // Command line option to enable/disable select instruction instrumentation. 221 static cl::opt<bool> 222 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 223 cl::desc("Use this option to turn on/off SELECT " 224 "instruction instrumentation. ")); 225 226 // Command line option to turn on CFG dot or text dump of raw profile counts 227 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 228 "pgo-view-raw-counts", cl::Hidden, 229 cl::desc("A boolean option to show CFG dag or text " 230 "with raw profile counts from " 231 "profile data. See also option " 232 "-pgo-view-counts. To limit graph " 233 "display to only one function, use " 234 "filtering option -view-bfi-func-name."), 235 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 236 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 237 clEnumValN(PGOVCT_Text, "text", "show in text."))); 238 239 // Command line option to enable/disable memop intrinsic call.size profiling. 240 static cl::opt<bool> 241 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 242 cl::desc("Use this option to turn on/off " 243 "memory intrinsic size profiling.")); 244 245 // Emit branch probability as optimization remarks. 246 static cl::opt<bool> 247 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 248 cl::desc("When this option is on, the annotated " 249 "branch probability will be emitted as " 250 "optimization remarks: -{Rpass|" 251 "pass-remarks}=pgo-instrumentation")); 252 253 static cl::opt<bool> PGOInstrumentEntry( 254 "pgo-instrument-entry", cl::init(false), cl::Hidden, 255 cl::desc("Force to instrument function entry basicblock.")); 256 257 static cl::opt<bool> PGOFunctionEntryCoverage( 258 "pgo-function-entry-coverage", cl::init(false), cl::Hidden, cl::ZeroOrMore, 259 cl::desc( 260 "Use this option to enable function entry coverage instrumentation.")); 261 262 static cl::opt<bool> 263 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, 264 cl::desc("Fix function entry count in profile use.")); 265 266 static cl::opt<bool> PGOVerifyHotBFI( 267 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden, 268 cl::desc("Print out the non-match BFI count if a hot raw profile count " 269 "becomes non-hot, or a cold raw profile count becomes hot. " 270 "The print is enabled under -Rpass-analysis=pgo, or " 271 "internal option -pass-remakrs-analysis=pgo.")); 272 273 static cl::opt<bool> PGOVerifyBFI( 274 "pgo-verify-bfi", cl::init(false), cl::Hidden, 275 cl::desc("Print out mismatched BFI counts after setting profile metadata " 276 "The print is enabled under -Rpass-analysis=pgo, or " 277 "internal option -pass-remakrs-analysis=pgo.")); 278 279 static cl::opt<unsigned> PGOVerifyBFIRatio( 280 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, 281 cl::desc("Set the threshold for pgo-verify-bfi: only print out " 282 "mismatched BFI if the difference percentage is greater than " 283 "this value (in percentage).")); 284 285 static cl::opt<unsigned> PGOVerifyBFICutoff( 286 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, 287 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " 288 "profile count value is below.")); 289 290 namespace llvm { 291 // Command line option to turn on CFG dot dump after profile annotation. 292 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 293 extern cl::opt<PGOViewCountsType> PGOViewCounts; 294 295 // Command line option to specify the name of the function for CFG dump 296 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 297 extern cl::opt<std::string> ViewBlockFreqFuncName; 298 299 extern cl::opt<bool> DebugInfoCorrelate; 300 } // namespace llvm 301 302 static cl::opt<bool> 303 PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, 304 cl::desc("Use the old CFG function hashing")); 305 306 // Return a string describing the branch condition that can be 307 // used in static branch probability heuristics: 308 static std::string getBranchCondString(Instruction *TI) { 309 BranchInst *BI = dyn_cast<BranchInst>(TI); 310 if (!BI || !BI->isConditional()) 311 return std::string(); 312 313 Value *Cond = BI->getCondition(); 314 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 315 if (!CI) 316 return std::string(); 317 318 std::string result; 319 raw_string_ostream OS(result); 320 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 321 CI->getOperand(0)->getType()->print(OS, true); 322 323 Value *RHS = CI->getOperand(1); 324 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 325 if (CV) { 326 if (CV->isZero()) 327 OS << "_Zero"; 328 else if (CV->isOne()) 329 OS << "_One"; 330 else if (CV->isMinusOne()) 331 OS << "_MinusOne"; 332 else 333 OS << "_Const"; 334 } 335 OS.flush(); 336 return result; 337 } 338 339 static const char *ValueProfKindDescr[] = { 340 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 341 #include "llvm/ProfileData/InstrProfData.inc" 342 }; 343 344 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime 345 // aware this is an ir_level profile so it can set the version flag. 346 static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) { 347 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); 348 Type *IntTy64 = Type::getInt64Ty(M.getContext()); 349 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); 350 if (IsCS) 351 ProfileVersion |= VARIANT_MASK_CSIR_PROF; 352 if (PGOInstrumentEntry) 353 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; 354 if (DebugInfoCorrelate) 355 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; 356 if (PGOFunctionEntryCoverage) 357 ProfileVersion |= 358 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY; 359 auto IRLevelVersionVariable = new GlobalVariable( 360 M, IntTy64, true, GlobalValue::WeakAnyLinkage, 361 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); 362 IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); 363 Triple TT(M.getTargetTriple()); 364 if (TT.supportsCOMDAT()) { 365 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); 366 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); 367 } 368 return IRLevelVersionVariable; 369 } 370 371 namespace { 372 373 /// The select instruction visitor plays three roles specified 374 /// by the mode. In \c VM_counting mode, it simply counts the number of 375 /// select instructions. In \c VM_instrument mode, it inserts code to count 376 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 377 /// it reads the profile data and annotate the select instruction with metadata. 378 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 379 class PGOUseFunc; 380 381 /// Instruction Visitor class to visit select instructions. 382 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 383 Function &F; 384 unsigned NSIs = 0; // Number of select instructions instrumented. 385 VisitMode Mode = VM_counting; // Visiting mode. 386 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 387 unsigned TotalNumCtrs = 0; // Total number of counters 388 GlobalVariable *FuncNameVar = nullptr; 389 uint64_t FuncHash = 0; 390 PGOUseFunc *UseFunc = nullptr; 391 392 SelectInstVisitor(Function &Func) : F(Func) {} 393 394 void countSelects(Function &Func) { 395 NSIs = 0; 396 Mode = VM_counting; 397 visit(Func); 398 } 399 400 // Visit the IR stream and instrument all select instructions. \p 401 // Ind is a pointer to the counter index variable; \p TotalNC 402 // is the total number of counters; \p FNV is the pointer to the 403 // PGO function name var; \p FHash is the function hash. 404 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 405 GlobalVariable *FNV, uint64_t FHash) { 406 Mode = VM_instrument; 407 CurCtrIdx = Ind; 408 TotalNumCtrs = TotalNC; 409 FuncHash = FHash; 410 FuncNameVar = FNV; 411 visit(Func); 412 } 413 414 // Visit the IR stream and annotate all select instructions. 415 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 416 Mode = VM_annotate; 417 UseFunc = UF; 418 CurCtrIdx = Ind; 419 visit(Func); 420 } 421 422 void instrumentOneSelectInst(SelectInst &SI); 423 void annotateOneSelectInst(SelectInst &SI); 424 425 // Visit \p SI instruction and perform tasks according to visit mode. 426 void visitSelectInst(SelectInst &SI); 427 428 // Return the number of select instructions. This needs be called after 429 // countSelects(). 430 unsigned getNumOfSelectInsts() const { return NSIs; } 431 }; 432 433 434 class PGOInstrumentationGenLegacyPass : public ModulePass { 435 public: 436 static char ID; 437 438 PGOInstrumentationGenLegacyPass(bool IsCS = false) 439 : ModulePass(ID), IsCS(IsCS) { 440 initializePGOInstrumentationGenLegacyPassPass( 441 *PassRegistry::getPassRegistry()); 442 } 443 444 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } 445 446 private: 447 // Is this is context-sensitive instrumentation. 448 bool IsCS; 449 bool runOnModule(Module &M) override; 450 451 void getAnalysisUsage(AnalysisUsage &AU) const override { 452 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 453 AU.addRequired<TargetLibraryInfoWrapperPass>(); 454 } 455 }; 456 457 class PGOInstrumentationUseLegacyPass : public ModulePass { 458 public: 459 static char ID; 460 461 // Provide the profile filename as the parameter. 462 PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) 463 : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { 464 if (!PGOTestProfileFile.empty()) 465 ProfileFileName = PGOTestProfileFile; 466 initializePGOInstrumentationUseLegacyPassPass( 467 *PassRegistry::getPassRegistry()); 468 } 469 470 StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } 471 472 private: 473 std::string ProfileFileName; 474 // Is this is context-sensitive instrumentation use. 475 bool IsCS; 476 477 bool runOnModule(Module &M) override; 478 479 void getAnalysisUsage(AnalysisUsage &AU) const override { 480 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 481 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 482 AU.addRequired<TargetLibraryInfoWrapperPass>(); 483 } 484 }; 485 486 class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { 487 public: 488 static char ID; 489 StringRef getPassName() const override { 490 return "PGOInstrumentationGenCreateVarPass"; 491 } 492 PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") 493 : ModulePass(ID), InstrProfileOutput(CSInstrName) { 494 initializePGOInstrumentationGenCreateVarLegacyPassPass( 495 *PassRegistry::getPassRegistry()); 496 } 497 498 private: 499 bool runOnModule(Module &M) override { 500 createProfileFileNameVar(M, InstrProfileOutput); 501 // The variable in a comdat may be discarded by LTO. Ensure the 502 // declaration will be retained. 503 appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true)); 504 return false; 505 } 506 std::string InstrProfileOutput; 507 }; 508 509 } // end anonymous namespace 510 511 char PGOInstrumentationGenLegacyPass::ID = 0; 512 513 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 514 "PGO instrumentation.", false, false) 515 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 516 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 517 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 518 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 519 "PGO instrumentation.", false, false) 520 521 ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { 522 return new PGOInstrumentationGenLegacyPass(IsCS); 523 } 524 525 char PGOInstrumentationUseLegacyPass::ID = 0; 526 527 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 528 "Read PGO instrumentation profile.", false, false) 529 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 530 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 531 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 532 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 533 "Read PGO instrumentation profile.", false, false) 534 535 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, 536 bool IsCS) { 537 return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); 538 } 539 540 char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; 541 542 INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, 543 "pgo-instr-gen-create-var", 544 "Create PGO instrumentation version variable for CSPGO.", false, 545 false) 546 547 ModulePass * 548 llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { 549 return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName)); 550 } 551 552 namespace { 553 554 /// An MST based instrumentation for PGO 555 /// 556 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 557 /// in the function level. 558 struct PGOEdge { 559 // This class implements the CFG edges. Note the CFG can be a multi-graph. 560 // So there might be multiple edges with same SrcBB and DestBB. 561 const BasicBlock *SrcBB; 562 const BasicBlock *DestBB; 563 uint64_t Weight; 564 bool InMST = false; 565 bool Removed = false; 566 bool IsCritical = false; 567 568 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 569 : SrcBB(Src), DestBB(Dest), Weight(W) {} 570 571 // Return the information string of an edge. 572 std::string infoString() const { 573 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 574 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 575 } 576 }; 577 578 // This class stores the auxiliary information for each BB. 579 struct BBInfo { 580 BBInfo *Group; 581 uint32_t Index; 582 uint32_t Rank = 0; 583 584 BBInfo(unsigned IX) : Group(this), Index(IX) {} 585 586 // Return the information string of this object. 587 std::string infoString() const { 588 return (Twine("Index=") + Twine(Index)).str(); 589 } 590 591 // Empty function -- only applicable to UseBBInfo. 592 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 593 594 // Empty function -- only applicable to UseBBInfo. 595 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 596 }; 597 598 // This class implements the CFG edges. Note the CFG can be a multi-graph. 599 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 600 private: 601 Function &F; 602 603 // Is this is context-sensitive instrumentation. 604 bool IsCS; 605 606 // A map that stores the Comdat group in function F. 607 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 608 609 ValueProfileCollector VPC; 610 611 void computeCFGHash(); 612 void renameComdatFunction(); 613 614 public: 615 std::vector<std::vector<VPCandidateInfo>> ValueSites; 616 SelectInstVisitor SIVisitor; 617 std::string FuncName; 618 GlobalVariable *FuncNameVar; 619 620 // CFG hash value for this function. 621 uint64_t FunctionHash = 0; 622 623 // The Minimum Spanning Tree of function CFG. 624 CFGMST<Edge, BBInfo> MST; 625 626 // Collect all the BBs that will be instrumented, and store them in 627 // InstrumentBBs. 628 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 629 630 // Give an edge, find the BB that will be instrumented. 631 // Return nullptr if there is no BB to be instrumented. 632 BasicBlock *getInstrBB(Edge *E); 633 634 // Return the auxiliary BB information. 635 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 636 637 // Return the auxiliary BB information if available. 638 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 639 640 // Dump edges and BB information. 641 void dumpInfo(std::string Str = "") const { 642 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 643 Twine(FunctionHash) + "\t" + Str); 644 } 645 646 FuncPGOInstrumentation( 647 Function &Func, TargetLibraryInfo &TLI, 648 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 649 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 650 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, 651 bool InstrumentFuncEntry = true) 652 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 653 ValueSites(IPVK_Last + 1), SIVisitor(Func), 654 MST(F, InstrumentFuncEntry, BPI, BFI) { 655 // This should be done before CFG hash computation. 656 SIVisitor.countSelects(Func); 657 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 658 if (!IsCS) { 659 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 660 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 661 NumOfPGOBB += MST.BBInfos.size(); 662 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 663 } else { 664 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 665 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 666 NumOfCSPGOBB += MST.BBInfos.size(); 667 } 668 669 FuncName = getPGOFuncName(F); 670 computeCFGHash(); 671 if (!ComdatMembers.empty()) 672 renameComdatFunction(); 673 LLVM_DEBUG(dumpInfo("after CFGMST")); 674 675 for (auto &E : MST.AllEdges) { 676 if (E->Removed) 677 continue; 678 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 679 if (!E->InMST) 680 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 681 } 682 683 if (CreateGlobalVar) 684 FuncNameVar = createPGOFuncNameVar(F, FuncName); 685 } 686 }; 687 688 } // end anonymous namespace 689 690 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 691 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers 692 // of selects, indirect calls, mem ops and edges. 693 template <class Edge, class BBInfo> 694 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 695 std::vector<uint8_t> Indexes; 696 JamCRC JC; 697 for (auto &BB : F) { 698 const Instruction *TI = BB.getTerminator(); 699 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 700 BasicBlock *Succ = TI->getSuccessor(I); 701 auto BI = findBBInfo(Succ); 702 if (BI == nullptr) 703 continue; 704 uint32_t Index = BI->Index; 705 for (int J = 0; J < 4; J++) 706 Indexes.push_back((uint8_t)(Index >> (J * 8))); 707 } 708 } 709 JC.update(Indexes); 710 711 JamCRC JCH; 712 if (PGOOldCFGHashing) { 713 // Hash format for context sensitive profile. Reserve 4 bits for other 714 // information. 715 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 716 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 717 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 718 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 719 } else { 720 // The higher 32 bits. 721 auto updateJCH = [&JCH](uint64_t Num) { 722 uint8_t Data[8]; 723 support::endian::write64le(Data, Num); 724 JCH.update(Data); 725 }; 726 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); 727 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); 728 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); 729 updateJCH((uint64_t)MST.AllEdges.size()); 730 731 // Hash format for context sensitive profile. Reserve 4 bits for other 732 // information. 733 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); 734 } 735 736 // Reserve bit 60-63 for other information purpose. 737 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 738 if (IsCS) 739 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 740 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 741 << " CRC = " << JC.getCRC() 742 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 743 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 744 << ValueSites[IPVK_IndirectCallTarget].size()); 745 if (!PGOOldCFGHashing) { 746 LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() 747 << ", High32 CRC = " << JCH.getCRC()); 748 } 749 LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); 750 } 751 752 // Check if we can safely rename this Comdat function. 753 static bool canRenameComdat( 754 Function &F, 755 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 756 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 757 return false; 758 759 // FIXME: Current only handle those Comdat groups that only containing one 760 // function. 761 // (1) For a Comdat group containing multiple functions, we need to have a 762 // unique postfix based on the hashes for each function. There is a 763 // non-trivial code refactoring to do this efficiently. 764 // (2) Variables can not be renamed, so we can not rename Comdat function in a 765 // group including global vars. 766 Comdat *C = F.getComdat(); 767 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 768 assert(!isa<GlobalAlias>(CM.second)); 769 Function *FM = dyn_cast<Function>(CM.second); 770 if (FM != &F) 771 return false; 772 } 773 return true; 774 } 775 776 // Append the CFGHash to the Comdat function name. 777 template <class Edge, class BBInfo> 778 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 779 if (!canRenameComdat(F, ComdatMembers)) 780 return; 781 std::string OrigName = F.getName().str(); 782 std::string NewFuncName = 783 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 784 F.setName(Twine(NewFuncName)); 785 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 786 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 787 Comdat *NewComdat; 788 Module *M = F.getParent(); 789 // For AvailableExternallyLinkage functions, change the linkage to 790 // LinkOnceODR and put them into comdat. This is because after renaming, there 791 // is no backup external copy available for the function. 792 if (!F.hasComdat()) { 793 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 794 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 795 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 796 F.setComdat(NewComdat); 797 return; 798 } 799 800 // This function belongs to a single function Comdat group. 801 Comdat *OrigComdat = F.getComdat(); 802 std::string NewComdatName = 803 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 804 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 805 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 806 807 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 808 // Must be a function. 809 cast<Function>(CM.second)->setComdat(NewComdat); 810 } 811 } 812 813 // Collect all the BBs that will be instruments and return them in 814 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. 815 template <class Edge, class BBInfo> 816 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 817 std::vector<BasicBlock *> &InstrumentBBs) { 818 // Use a worklist as we will update the vector during the iteration. 819 std::vector<Edge *> EdgeList; 820 EdgeList.reserve(MST.AllEdges.size()); 821 for (auto &E : MST.AllEdges) 822 EdgeList.push_back(E.get()); 823 824 for (auto &E : EdgeList) { 825 BasicBlock *InstrBB = getInstrBB(E); 826 if (InstrBB) 827 InstrumentBBs.push_back(InstrBB); 828 } 829 830 // Set up InEdges/OutEdges for all BBs. 831 for (auto &E : MST.AllEdges) { 832 if (E->Removed) 833 continue; 834 const BasicBlock *SrcBB = E->SrcBB; 835 const BasicBlock *DestBB = E->DestBB; 836 BBInfo &SrcInfo = getBBInfo(SrcBB); 837 BBInfo &DestInfo = getBBInfo(DestBB); 838 SrcInfo.addOutEdge(E.get()); 839 DestInfo.addInEdge(E.get()); 840 } 841 } 842 843 // Given a CFG E to be instrumented, find which BB to place the instrumented 844 // code. The function will split the critical edge if necessary. 845 template <class Edge, class BBInfo> 846 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 847 if (E->InMST || E->Removed) 848 return nullptr; 849 850 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 851 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 852 // For a fake edge, instrument the real BB. 853 if (SrcBB == nullptr) 854 return DestBB; 855 if (DestBB == nullptr) 856 return SrcBB; 857 858 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 859 // There are basic blocks (such as catchswitch) cannot be instrumented. 860 // If the returned first insertion point is the end of BB, skip this BB. 861 if (BB->getFirstInsertionPt() == BB->end()) 862 return nullptr; 863 return BB; 864 }; 865 866 // Instrument the SrcBB if it has a single successor, 867 // otherwise, the DestBB if this is not a critical edge. 868 Instruction *TI = SrcBB->getTerminator(); 869 if (TI->getNumSuccessors() <= 1) 870 return canInstrument(SrcBB); 871 if (!E->IsCritical) 872 return canInstrument(DestBB); 873 874 // Some IndirectBr critical edges cannot be split by the previous 875 // SplitIndirectBrCriticalEdges call. Bail out. 876 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 877 BasicBlock *InstrBB = 878 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); 879 if (!InstrBB) { 880 LLVM_DEBUG( 881 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 882 return nullptr; 883 } 884 // For a critical edge, we have to split. Instrument the newly 885 // created BB. 886 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 887 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 888 << " --> " << getBBInfo(DestBB).Index << "\n"); 889 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 890 MST.addEdge(SrcBB, InstrBB, 0); 891 // Second one: Add new edge of InstrBB->DestBB. 892 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 893 NewEdge1.InMST = true; 894 E->Removed = true; 895 896 return canInstrument(InstrBB); 897 } 898 899 // When generating value profiling calls on Windows routines that make use of 900 // handler funclets for exception processing an operand bundle needs to attached 901 // to the called function. This routine will set \p OpBundles to contain the 902 // funclet information, if any is needed, that should be placed on the generated 903 // value profiling call for the value profile candidate call. 904 static void 905 populateEHOperandBundle(VPCandidateInfo &Cand, 906 DenseMap<BasicBlock *, ColorVector> &BlockColors, 907 SmallVectorImpl<OperandBundleDef> &OpBundles) { 908 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 909 if (!OrigCall) 910 return; 911 912 if (!isa<IntrinsicInst>(OrigCall)) { 913 // The instrumentation call should belong to the same funclet as a 914 // non-intrinsic call, so just copy the operand bundle, if any exists. 915 Optional<OperandBundleUse> ParentFunclet = 916 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 917 if (ParentFunclet) 918 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 919 } else { 920 // Intrinsics or other instructions do not get funclet information from the 921 // front-end. Need to use the BlockColors that was computed by the routine 922 // colorEHFunclets to determine whether a funclet is needed. 923 if (!BlockColors.empty()) { 924 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 925 assert(CV.size() == 1 && "non-unique color for block!"); 926 Instruction *EHPad = CV.front()->getFirstNonPHI(); 927 if (EHPad->isEHPad()) 928 OpBundles.emplace_back("funclet", EHPad); 929 } 930 } 931 } 932 933 // Visit all edge and instrument the edges not in MST, and do value profiling. 934 // Critical edges will be split. 935 static void instrumentOneFunc( 936 Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, 937 BlockFrequencyInfo *BFI, 938 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 939 bool IsCS) { 940 // Split indirectbr critical edges here before computing the MST rather than 941 // later in getInstrBB() to avoid invalidating it. 942 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); 943 944 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo( 945 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry); 946 947 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 948 auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy); 949 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()), 950 FuncInfo.FunctionHash); 951 if (PGOFunctionEntryCoverage) { 952 assert(!IsCS && 953 "entry coverge does not support context-sensitive instrumentation"); 954 auto &EntryBB = F.getEntryBlock(); 955 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); 956 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, 957 // i32 <index>) 958 Builder.CreateCall( 959 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover), 960 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)}); 961 return; 962 } 963 964 std::vector<BasicBlock *> InstrumentBBs; 965 FuncInfo.getInstrumentBBs(InstrumentBBs); 966 unsigned NumCounters = 967 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 968 969 uint32_t I = 0; 970 for (auto *InstrBB : InstrumentBBs) { 971 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 972 assert(Builder.GetInsertPoint() != InstrBB->end() && 973 "Cannot get the Instrumentation point"); 974 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>, 975 // i32 <index>) 976 Builder.CreateCall( 977 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 978 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)}); 979 } 980 981 // Now instrument select instructions: 982 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 983 FuncInfo.FunctionHash); 984 assert(I == NumCounters); 985 986 if (DisableValueProfiling) 987 return; 988 989 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 990 991 // Intrinsic function calls do not have funclet operand bundles needed for 992 // Windows exception handling attached to them. However, if value profiling is 993 // inserted for one of these calls, then a funclet value will need to be set 994 // on the instrumentation call based on the funclet coloring. 995 DenseMap<BasicBlock *, ColorVector> BlockColors; 996 if (F.hasPersonalityFn() && 997 isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 998 BlockColors = colorEHFunclets(F); 999 1000 // For each VP Kind, walk the VP candidates and instrument each one. 1001 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 1002 unsigned SiteIndex = 0; 1003 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 1004 continue; 1005 1006 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 1007 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 1008 << " site: CallSite Index = " << SiteIndex << "\n"); 1009 1010 IRBuilder<> Builder(Cand.InsertPt); 1011 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 1012 "Cannot get the Instrumentation point"); 1013 1014 Value *ToProfile = nullptr; 1015 if (Cand.V->getType()->isIntegerTy()) 1016 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 1017 else if (Cand.V->getType()->isPointerTy()) 1018 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 1019 assert(ToProfile && "value profiling Value is of unexpected type"); 1020 1021 SmallVector<OperandBundleDef, 1> OpBundles; 1022 populateEHOperandBundle(Cand, BlockColors, OpBundles); 1023 Builder.CreateCall( 1024 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 1025 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 1026 Builder.getInt64(FuncInfo.FunctionHash), ToProfile, 1027 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 1028 OpBundles); 1029 } 1030 } // IPVK_First <= Kind <= IPVK_Last 1031 } 1032 1033 namespace { 1034 1035 // This class represents a CFG edge in profile use compilation. 1036 struct PGOUseEdge : public PGOEdge { 1037 bool CountValid = false; 1038 uint64_t CountValue = 0; 1039 1040 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 1041 : PGOEdge(Src, Dest, W) {} 1042 1043 // Set edge count value 1044 void setEdgeCount(uint64_t Value) { 1045 CountValue = Value; 1046 CountValid = true; 1047 } 1048 1049 // Return the information string for this object. 1050 std::string infoString() const { 1051 if (!CountValid) 1052 return PGOEdge::infoString(); 1053 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 1054 .str(); 1055 } 1056 }; 1057 1058 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 1059 1060 // This class stores the auxiliary information for each BB. 1061 struct UseBBInfo : public BBInfo { 1062 uint64_t CountValue = 0; 1063 bool CountValid; 1064 int32_t UnknownCountInEdge = 0; 1065 int32_t UnknownCountOutEdge = 0; 1066 DirectEdges InEdges; 1067 DirectEdges OutEdges; 1068 1069 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 1070 1071 UseBBInfo(unsigned IX, uint64_t C) 1072 : BBInfo(IX), CountValue(C), CountValid(true) {} 1073 1074 // Set the profile count value for this BB. 1075 void setBBInfoCount(uint64_t Value) { 1076 CountValue = Value; 1077 CountValid = true; 1078 } 1079 1080 // Return the information string of this object. 1081 std::string infoString() const { 1082 if (!CountValid) 1083 return BBInfo::infoString(); 1084 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 1085 } 1086 1087 // Add an OutEdge and update the edge count. 1088 void addOutEdge(PGOUseEdge *E) { 1089 OutEdges.push_back(E); 1090 UnknownCountOutEdge++; 1091 } 1092 1093 // Add an InEdge and update the edge count. 1094 void addInEdge(PGOUseEdge *E) { 1095 InEdges.push_back(E); 1096 UnknownCountInEdge++; 1097 } 1098 }; 1099 1100 } // end anonymous namespace 1101 1102 // Sum up the count values for all the edges. 1103 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 1104 uint64_t Total = 0; 1105 for (auto &E : Edges) { 1106 if (E->Removed) 1107 continue; 1108 Total += E->CountValue; 1109 } 1110 return Total; 1111 } 1112 1113 namespace { 1114 1115 class PGOUseFunc { 1116 public: 1117 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 1118 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1119 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1120 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry) 1121 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1122 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, 1123 InstrumentFuncEntry), 1124 FreqAttr(FFA_Normal), IsCS(IsCS) {} 1125 1126 // Read counts for the instrumented BB from profile. 1127 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1128 bool &AllMinusOnes); 1129 1130 // Populate the counts for all BBs. 1131 void populateCounters(); 1132 1133 // Set the branch weights based on the count values. 1134 void setBranchWeights(); 1135 1136 // Annotate the value profile call sites for all value kind. 1137 void annotateValueSites(); 1138 1139 // Annotate the value profile call sites for one value kind. 1140 void annotateValueSites(uint32_t Kind); 1141 1142 // Annotate the irreducible loop header weights. 1143 void annotateIrrLoopHeaderWeights(); 1144 1145 // The hotness of the function from the profile count. 1146 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1147 1148 // Return the function hotness from the profile. 1149 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1150 1151 // Return the function hash. 1152 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1153 1154 // Return the profile record for this function; 1155 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1156 1157 // Return the auxiliary BB information. 1158 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 1159 return FuncInfo.getBBInfo(BB); 1160 } 1161 1162 // Return the auxiliary BB information if available. 1163 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 1164 return FuncInfo.findBBInfo(BB); 1165 } 1166 1167 Function &getFunc() const { return F; } 1168 1169 void dumpInfo(std::string Str = "") const { 1170 FuncInfo.dumpInfo(Str); 1171 } 1172 1173 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1174 private: 1175 Function &F; 1176 Module *M; 1177 BlockFrequencyInfo *BFI; 1178 ProfileSummaryInfo *PSI; 1179 1180 // This member stores the shared information with class PGOGenFunc. 1181 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 1182 1183 // The maximum count value in the profile. This is only used in PGO use 1184 // compilation. 1185 uint64_t ProgramMaxCount; 1186 1187 // Position of counter that remains to be read. 1188 uint32_t CountPosition = 0; 1189 1190 // Total size of the profile count for this function. 1191 uint32_t ProfileCountSize = 0; 1192 1193 // ProfileRecord for this function. 1194 InstrProfRecord ProfileRecord; 1195 1196 // Function hotness info derived from profile. 1197 FuncFreqAttr FreqAttr; 1198 1199 // Is to use the context sensitive profile. 1200 bool IsCS; 1201 1202 // Find the Instrumented BB and set the value. Return false on error. 1203 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1204 1205 // Set the edge counter value for the unknown edge -- there should be only 1206 // one unknown edge. 1207 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1208 1209 // Return FuncName string; 1210 std::string getFuncName() const { return FuncInfo.FuncName; } 1211 1212 // Set the hot/cold inline hints based on the count values. 1213 // FIXME: This function should be removed once the functionality in 1214 // the inliner is implemented. 1215 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1216 if (PSI->isHotCount(EntryCount)) 1217 FreqAttr = FFA_Hot; 1218 else if (PSI->isColdCount(MaxCount)) 1219 FreqAttr = FFA_Cold; 1220 } 1221 }; 1222 1223 } // end anonymous namespace 1224 1225 // Visit all the edges and assign the count value for the instrumented 1226 // edges and the BB. Return false on error. 1227 bool PGOUseFunc::setInstrumentedCounts( 1228 const std::vector<uint64_t> &CountFromProfile) { 1229 1230 std::vector<BasicBlock *> InstrumentBBs; 1231 FuncInfo.getInstrumentBBs(InstrumentBBs); 1232 unsigned NumCounters = 1233 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1234 // The number of counters here should match the number of counters 1235 // in profile. Return if they mismatch. 1236 if (NumCounters != CountFromProfile.size()) { 1237 return false; 1238 } 1239 auto *FuncEntry = &*F.begin(); 1240 1241 // Set the profile count to the Instrumented BBs. 1242 uint32_t I = 0; 1243 for (BasicBlock *InstrBB : InstrumentBBs) { 1244 uint64_t CountValue = CountFromProfile[I++]; 1245 UseBBInfo &Info = getBBInfo(InstrBB); 1246 // If we reach here, we know that we have some nonzero count 1247 // values in this function. The entry count should not be 0. 1248 // Fix it if necessary. 1249 if (InstrBB == FuncEntry && CountValue == 0) 1250 CountValue = 1; 1251 Info.setBBInfoCount(CountValue); 1252 } 1253 ProfileCountSize = CountFromProfile.size(); 1254 CountPosition = I; 1255 1256 // Set the edge count and update the count of unknown edges for BBs. 1257 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1258 E->setEdgeCount(Value); 1259 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1260 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1261 }; 1262 1263 // Set the profile count the Instrumented edges. There are BBs that not in 1264 // MST but not instrumented. Need to set the edge count value so that we can 1265 // populate the profile counts later. 1266 for (auto &E : FuncInfo.MST.AllEdges) { 1267 if (E->Removed || E->InMST) 1268 continue; 1269 const BasicBlock *SrcBB = E->SrcBB; 1270 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 1271 1272 // If only one out-edge, the edge profile count should be the same as BB 1273 // profile count. 1274 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1275 setEdgeCount(E.get(), SrcInfo.CountValue); 1276 else { 1277 const BasicBlock *DestBB = E->DestBB; 1278 UseBBInfo &DestInfo = getBBInfo(DestBB); 1279 // If only one in-edge, the edge profile count should be the same as BB 1280 // profile count. 1281 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1282 setEdgeCount(E.get(), DestInfo.CountValue); 1283 } 1284 if (E->CountValid) 1285 continue; 1286 // E's count should have been set from profile. If not, this meenas E skips 1287 // the instrumentation. We set the count to 0. 1288 setEdgeCount(E.get(), 0); 1289 } 1290 return true; 1291 } 1292 1293 // Set the count value for the unknown edge. There should be one and only one 1294 // unknown edge in Edges vector. 1295 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1296 for (auto &E : Edges) { 1297 if (E->CountValid) 1298 continue; 1299 E->setEdgeCount(Value); 1300 1301 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1302 getBBInfo(E->DestBB).UnknownCountInEdge--; 1303 return; 1304 } 1305 llvm_unreachable("Cannot find the unknown count edge"); 1306 } 1307 1308 // Emit function metadata indicating PGO profile mismatch. 1309 static void annotateFunctionWithHashMismatch(Function &F, 1310 LLVMContext &ctx) { 1311 const char MetadataName[] = "instr_prof_hash_mismatch"; 1312 SmallVector<Metadata *, 2> Names; 1313 // If this metadata already exists, ignore. 1314 auto *Existing = F.getMetadata(LLVMContext::MD_annotation); 1315 if (Existing) { 1316 MDTuple *Tuple = cast<MDTuple>(Existing); 1317 for (auto &N : Tuple->operands()) { 1318 if (cast<MDString>(N.get())->getString() == MetadataName) 1319 return; 1320 Names.push_back(N.get()); 1321 } 1322 } 1323 1324 MDBuilder MDB(ctx); 1325 Names.push_back(MDB.createString(MetadataName)); 1326 MDNode *MD = MDTuple::get(ctx, Names); 1327 F.setMetadata(LLVMContext::MD_annotation, MD); 1328 } 1329 1330 // Read the profile from ProfileFileName and assign the value to the 1331 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1332 // Return true if the profile are successfully read, and false on errors. 1333 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1334 bool &AllMinusOnes) { 1335 auto &Ctx = M->getContext(); 1336 Expected<InstrProfRecord> Result = 1337 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 1338 if (Error E = Result.takeError()) { 1339 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1340 auto Err = IPE.get(); 1341 bool SkipWarning = false; 1342 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1343 << FuncInfo.FuncName << ": "); 1344 if (Err == instrprof_error::unknown_function) { 1345 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1346 SkipWarning = !PGOWarnMissing; 1347 LLVM_DEBUG(dbgs() << "unknown function"); 1348 } else if (Err == instrprof_error::hash_mismatch || 1349 Err == instrprof_error::malformed) { 1350 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1351 SkipWarning = 1352 NoPGOWarnMismatch || 1353 (NoPGOWarnMismatchComdat && 1354 (F.hasComdat() || 1355 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1356 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 1357 // Emit function metadata indicating PGO profile mismatch. 1358 annotateFunctionWithHashMismatch(F, M->getContext()); 1359 } 1360 1361 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1362 if (SkipWarning) 1363 return; 1364 1365 std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + 1366 std::string(" Hash = ") + 1367 std::to_string(FuncInfo.FunctionHash); 1368 1369 Ctx.diagnose( 1370 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1371 }); 1372 return false; 1373 } 1374 ProfileRecord = std::move(Result.get()); 1375 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1376 1377 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1378 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1379 AllMinusOnes = (CountFromProfile.size() > 0); 1380 uint64_t ValueSum = 0; 1381 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1382 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1383 ValueSum += CountFromProfile[I]; 1384 if (CountFromProfile[I] != (uint64_t)-1) 1385 AllMinusOnes = false; 1386 } 1387 AllZeros = (ValueSum == 0); 1388 1389 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1390 1391 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1392 getBBInfo(nullptr).UnknownCountInEdge = 2; 1393 1394 if (!setInstrumentedCounts(CountFromProfile)) { 1395 LLVM_DEBUG( 1396 dbgs() << "Inconsistent number of counts, skipping this function"); 1397 Ctx.diagnose(DiagnosticInfoPGOProfile( 1398 M->getName().data(), 1399 Twine("Inconsistent number of counts in ") + F.getName().str() 1400 + Twine(": the profile may be stale or there is a function name collision."), 1401 DS_Warning)); 1402 return false; 1403 } 1404 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1405 return true; 1406 } 1407 1408 // Populate the counters from instrumented BBs to all BBs. 1409 // In the end of this operation, all BBs should have a valid count value. 1410 void PGOUseFunc::populateCounters() { 1411 bool Changes = true; 1412 unsigned NumPasses = 0; 1413 while (Changes) { 1414 NumPasses++; 1415 Changes = false; 1416 1417 // For efficient traversal, it's better to start from the end as most 1418 // of the instrumented edges are at the end. 1419 for (auto &BB : reverse(F)) { 1420 UseBBInfo *Count = findBBInfo(&BB); 1421 if (Count == nullptr) 1422 continue; 1423 if (!Count->CountValid) { 1424 if (Count->UnknownCountOutEdge == 0) { 1425 Count->CountValue = sumEdgeCount(Count->OutEdges); 1426 Count->CountValid = true; 1427 Changes = true; 1428 } else if (Count->UnknownCountInEdge == 0) { 1429 Count->CountValue = sumEdgeCount(Count->InEdges); 1430 Count->CountValid = true; 1431 Changes = true; 1432 } 1433 } 1434 if (Count->CountValid) { 1435 if (Count->UnknownCountOutEdge == 1) { 1436 uint64_t Total = 0; 1437 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1438 // If the one of the successor block can early terminate (no-return), 1439 // we can end up with situation where out edge sum count is larger as 1440 // the source BB's count is collected by a post-dominated block. 1441 if (Count->CountValue > OutSum) 1442 Total = Count->CountValue - OutSum; 1443 setEdgeCount(Count->OutEdges, Total); 1444 Changes = true; 1445 } 1446 if (Count->UnknownCountInEdge == 1) { 1447 uint64_t Total = 0; 1448 uint64_t InSum = sumEdgeCount(Count->InEdges); 1449 if (Count->CountValue > InSum) 1450 Total = Count->CountValue - InSum; 1451 setEdgeCount(Count->InEdges, Total); 1452 Changes = true; 1453 } 1454 } 1455 } 1456 } 1457 1458 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1459 #ifndef NDEBUG 1460 // Assert every BB has a valid counter. 1461 for (auto &BB : F) { 1462 auto BI = findBBInfo(&BB); 1463 if (BI == nullptr) 1464 continue; 1465 assert(BI->CountValid && "BB count is not valid"); 1466 } 1467 #endif 1468 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1469 uint64_t FuncMaxCount = FuncEntryCount; 1470 for (auto &BB : F) { 1471 auto BI = findBBInfo(&BB); 1472 if (BI == nullptr) 1473 continue; 1474 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1475 } 1476 1477 // Fix the obviously inconsistent entry count. 1478 if (FuncMaxCount > 0 && FuncEntryCount == 0) 1479 FuncEntryCount = 1; 1480 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1481 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1482 1483 // Now annotate select instructions 1484 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1485 assert(CountPosition == ProfileCountSize); 1486 1487 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1488 } 1489 1490 // Assign the scaled count values to the BB with multiple out edges. 1491 void PGOUseFunc::setBranchWeights() { 1492 // Generate MD_prof metadata for every branch instruction. 1493 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1494 << " IsCS=" << IsCS << "\n"); 1495 for (auto &BB : F) { 1496 Instruction *TI = BB.getTerminator(); 1497 if (TI->getNumSuccessors() < 2) 1498 continue; 1499 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1500 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI))) 1501 continue; 1502 1503 if (getBBInfo(&BB).CountValue == 0) 1504 continue; 1505 1506 // We have a non-zero Branch BB. 1507 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1508 unsigned Size = BBCountInfo.OutEdges.size(); 1509 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1510 uint64_t MaxCount = 0; 1511 for (unsigned s = 0; s < Size; s++) { 1512 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1513 const BasicBlock *SrcBB = E->SrcBB; 1514 const BasicBlock *DestBB = E->DestBB; 1515 if (DestBB == nullptr) 1516 continue; 1517 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1518 uint64_t EdgeCount = E->CountValue; 1519 if (EdgeCount > MaxCount) 1520 MaxCount = EdgeCount; 1521 EdgeCounts[SuccNum] = EdgeCount; 1522 } 1523 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1524 } 1525 } 1526 1527 static bool isIndirectBrTarget(BasicBlock *BB) { 1528 for (BasicBlock *Pred : predecessors(BB)) { 1529 if (isa<IndirectBrInst>(Pred->getTerminator())) 1530 return true; 1531 } 1532 return false; 1533 } 1534 1535 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1536 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1537 // Find irr loop headers 1538 for (auto &BB : F) { 1539 // As a heuristic also annotate indrectbr targets as they have a high chance 1540 // to become an irreducible loop header after the indirectbr tail 1541 // duplication. 1542 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1543 Instruction *TI = BB.getTerminator(); 1544 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1545 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1546 } 1547 } 1548 } 1549 1550 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1551 if (PGOFunctionEntryCoverage) 1552 return; 1553 Module *M = F.getParent(); 1554 IRBuilder<> Builder(&SI); 1555 Type *Int64Ty = Builder.getInt64Ty(); 1556 Type *I8PtrTy = Builder.getInt8PtrTy(); 1557 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1558 Builder.CreateCall( 1559 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1560 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1561 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1562 Builder.getInt32(*CurCtrIdx), Step}); 1563 ++(*CurCtrIdx); 1564 } 1565 1566 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1567 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1568 assert(*CurCtrIdx < CountFromProfile.size() && 1569 "Out of bound access of counters"); 1570 uint64_t SCounts[2]; 1571 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1572 ++(*CurCtrIdx); 1573 uint64_t TotalCount = 0; 1574 auto BI = UseFunc->findBBInfo(SI.getParent()); 1575 if (BI != nullptr) 1576 TotalCount = BI->CountValue; 1577 // False Count 1578 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1579 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1580 if (MaxCount) 1581 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1582 } 1583 1584 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1585 if (!PGOInstrSelect) 1586 return; 1587 // FIXME: do not handle this yet. 1588 if (SI.getCondition()->getType()->isVectorTy()) 1589 return; 1590 1591 switch (Mode) { 1592 case VM_counting: 1593 NSIs++; 1594 return; 1595 case VM_instrument: 1596 instrumentOneSelectInst(SI); 1597 return; 1598 case VM_annotate: 1599 annotateOneSelectInst(SI); 1600 return; 1601 } 1602 1603 llvm_unreachable("Unknown visiting mode"); 1604 } 1605 1606 // Traverse all valuesites and annotate the instructions for all value kind. 1607 void PGOUseFunc::annotateValueSites() { 1608 if (DisableValueProfiling) 1609 return; 1610 1611 // Create the PGOFuncName meta data. 1612 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1613 1614 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1615 annotateValueSites(Kind); 1616 } 1617 1618 // Annotate the instructions for a specific value kind. 1619 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1620 assert(Kind <= IPVK_Last); 1621 unsigned ValueSiteIndex = 0; 1622 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1623 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1624 if (NumValueSites != ValueSites.size()) { 1625 auto &Ctx = M->getContext(); 1626 Ctx.diagnose(DiagnosticInfoPGOProfile( 1627 M->getName().data(), 1628 Twine("Inconsistent number of value sites for ") + 1629 Twine(ValueProfKindDescr[Kind]) + 1630 Twine(" profiling in \"") + F.getName().str() + 1631 Twine("\", possibly due to the use of a stale profile."), 1632 DS_Warning)); 1633 return; 1634 } 1635 1636 for (VPCandidateInfo &I : ValueSites) { 1637 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1638 << "): Index = " << ValueSiteIndex << " out of " 1639 << NumValueSites << "\n"); 1640 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1641 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1642 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1643 : MaxNumAnnotations); 1644 ValueSiteIndex++; 1645 } 1646 } 1647 1648 // Collect the set of members for each Comdat in module M and store 1649 // in ComdatMembers. 1650 static void collectComdatMembers( 1651 Module &M, 1652 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1653 if (!DoComdatRenaming) 1654 return; 1655 for (Function &F : M) 1656 if (Comdat *C = F.getComdat()) 1657 ComdatMembers.insert(std::make_pair(C, &F)); 1658 for (GlobalVariable &GV : M.globals()) 1659 if (Comdat *C = GV.getComdat()) 1660 ComdatMembers.insert(std::make_pair(C, &GV)); 1661 for (GlobalAlias &GA : M.aliases()) 1662 if (Comdat *C = GA.getComdat()) 1663 ComdatMembers.insert(std::make_pair(C, &GA)); 1664 } 1665 1666 static bool InstrumentAllFunctions( 1667 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1668 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1669 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1670 // For the context-sensitve instrumentation, we should have a separated pass 1671 // (before LTO/ThinLTO linking) to create these variables. 1672 if (!IsCS) 1673 createIRLevelProfileFlagVar(M, /*IsCS=*/false); 1674 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1675 collectComdatMembers(M, ComdatMembers); 1676 1677 for (auto &F : M) { 1678 if (F.isDeclaration()) 1679 continue; 1680 if (F.hasFnAttribute(llvm::Attribute::NoProfile)) 1681 continue; 1682 auto &TLI = LookupTLI(F); 1683 auto *BPI = LookupBPI(F); 1684 auto *BFI = LookupBFI(F); 1685 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); 1686 } 1687 return true; 1688 } 1689 1690 PreservedAnalyses 1691 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { 1692 createProfileFileNameVar(M, CSInstrName); 1693 // The variable in a comdat may be discarded by LTO. Ensure the declaration 1694 // will be retained. 1695 appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true)); 1696 return PreservedAnalyses::all(); 1697 } 1698 1699 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { 1700 if (skipModule(M)) 1701 return false; 1702 1703 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 1704 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 1705 }; 1706 auto LookupBPI = [this](Function &F) { 1707 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1708 }; 1709 auto LookupBFI = [this](Function &F) { 1710 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1711 }; 1712 return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS); 1713 } 1714 1715 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1716 ModuleAnalysisManager &AM) { 1717 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1718 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1719 return FAM.getResult<TargetLibraryAnalysis>(F); 1720 }; 1721 auto LookupBPI = [&FAM](Function &F) { 1722 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1723 }; 1724 auto LookupBFI = [&FAM](Function &F) { 1725 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1726 }; 1727 1728 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) 1729 return PreservedAnalyses::all(); 1730 1731 return PreservedAnalyses::none(); 1732 } 1733 1734 // Using the ratio b/w sums of profile count values and BFI count values to 1735 // adjust the func entry count. 1736 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, 1737 BranchProbabilityInfo &NBPI) { 1738 Function &F = Func.getFunc(); 1739 BlockFrequencyInfo NBFI(F, NBPI, LI); 1740 #ifndef NDEBUG 1741 auto BFIEntryCount = F.getEntryCount(); 1742 assert(BFIEntryCount.hasValue() && (BFIEntryCount->getCount() > 0) && 1743 "Invalid BFI Entrycount"); 1744 #endif 1745 auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); 1746 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); 1747 for (auto &BBI : F) { 1748 uint64_t CountValue = 0; 1749 uint64_t BFICountValue = 0; 1750 if (!Func.findBBInfo(&BBI)) 1751 continue; 1752 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1753 CountValue = Func.getBBInfo(&BBI).CountValue; 1754 BFICountValue = BFICount.getValue(); 1755 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); 1756 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); 1757 } 1758 if (SumCount.isZero()) 1759 return; 1760 1761 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && 1762 "Incorrect sum of BFI counts"); 1763 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual) 1764 return; 1765 double Scale = (SumCount / SumBFICount).convertToDouble(); 1766 if (Scale < 1.001 && Scale > 0.999) 1767 return; 1768 1769 uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue; 1770 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; 1771 if (NewEntryCount == 0) 1772 NewEntryCount = 1; 1773 if (NewEntryCount != FuncEntryCount) { 1774 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); 1775 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() 1776 << ", entry_count " << FuncEntryCount << " --> " 1777 << NewEntryCount << "\n"); 1778 } 1779 } 1780 1781 // Compare the profile count values with BFI count values, and print out 1782 // the non-matching ones. 1783 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, 1784 BranchProbabilityInfo &NBPI, 1785 uint64_t HotCountThreshold, 1786 uint64_t ColdCountThreshold) { 1787 Function &F = Func.getFunc(); 1788 BlockFrequencyInfo NBFI(F, NBPI, LI); 1789 // bool PrintFunc = false; 1790 bool HotBBOnly = PGOVerifyHotBFI; 1791 std::string Msg; 1792 OptimizationRemarkEmitter ORE(&F); 1793 1794 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; 1795 for (auto &BBI : F) { 1796 uint64_t CountValue = 0; 1797 uint64_t BFICountValue = 0; 1798 1799 if (Func.getBBInfo(&BBI).CountValid) 1800 CountValue = Func.getBBInfo(&BBI).CountValue; 1801 1802 BBNum++; 1803 if (CountValue) 1804 NonZeroBBNum++; 1805 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1806 if (BFICount) 1807 BFICountValue = BFICount.getValue(); 1808 1809 if (HotBBOnly) { 1810 bool rawIsHot = CountValue >= HotCountThreshold; 1811 bool BFIIsHot = BFICountValue >= HotCountThreshold; 1812 bool rawIsCold = CountValue <= ColdCountThreshold; 1813 bool ShowCount = false; 1814 if (rawIsHot && !BFIIsHot) { 1815 Msg = "raw-Hot to BFI-nonHot"; 1816 ShowCount = true; 1817 } else if (rawIsCold && BFIIsHot) { 1818 Msg = "raw-Cold to BFI-Hot"; 1819 ShowCount = true; 1820 } 1821 if (!ShowCount) 1822 continue; 1823 } else { 1824 if ((CountValue < PGOVerifyBFICutoff) && 1825 (BFICountValue < PGOVerifyBFICutoff)) 1826 continue; 1827 uint64_t Diff = (BFICountValue >= CountValue) 1828 ? BFICountValue - CountValue 1829 : CountValue - BFICountValue; 1830 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio) 1831 continue; 1832 } 1833 BBMisMatchNum++; 1834 1835 ORE.emit([&]() { 1836 OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify", 1837 F.getSubprogram(), &BBI); 1838 Remark << "BB " << ore::NV("Block", BBI.getName()) 1839 << " Count=" << ore::NV("Count", CountValue) 1840 << " BFI_Count=" << ore::NV("Count", BFICountValue); 1841 if (!Msg.empty()) 1842 Remark << " (" << Msg << ")"; 1843 return Remark; 1844 }); 1845 } 1846 if (BBMisMatchNum) 1847 ORE.emit([&]() { 1848 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify", 1849 F.getSubprogram(), &F.getEntryBlock()) 1850 << "In Func " << ore::NV("Function", F.getName()) 1851 << ": Num_of_BB=" << ore::NV("Count", BBNum) 1852 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum) 1853 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum); 1854 }); 1855 } 1856 1857 static bool annotateAllFunctions( 1858 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1859 function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1860 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1861 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1862 ProfileSummaryInfo *PSI, bool IsCS) { 1863 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1864 auto &Ctx = M.getContext(); 1865 // Read the counter array from file. 1866 auto ReaderOrErr = 1867 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 1868 if (Error E = ReaderOrErr.takeError()) { 1869 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1870 Ctx.diagnose( 1871 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1872 }); 1873 return false; 1874 } 1875 1876 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1877 std::move(ReaderOrErr.get()); 1878 if (!PGOReader) { 1879 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1880 StringRef("Cannot get PGOReader"))); 1881 return false; 1882 } 1883 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 1884 return false; 1885 1886 // TODO: might need to change the warning once the clang option is finalized. 1887 if (!PGOReader->isIRLevelProfile()) { 1888 Ctx.diagnose(DiagnosticInfoPGOProfile( 1889 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1890 return false; 1891 } 1892 if (PGOReader->hasSingleByteCoverage()) { 1893 Ctx.diagnose(DiagnosticInfoPGOProfile( 1894 ProfileFileName.data(), 1895 "Cannot use coverage profiles for optimization")); 1896 return false; 1897 } 1898 if (PGOReader->functionEntryOnly()) { 1899 Ctx.diagnose(DiagnosticInfoPGOProfile( 1900 ProfileFileName.data(), 1901 "Function entry profiles are not yet supported for optimization")); 1902 return false; 1903 } 1904 1905 // Add the profile summary (read from the header of the indexed summary) here 1906 // so that we can use it below when reading counters (which checks if the 1907 // function should be marked with a cold or inlinehint attribute). 1908 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 1909 IsCS ? ProfileSummary::PSK_CSInstr 1910 : ProfileSummary::PSK_Instr); 1911 PSI->refresh(); 1912 1913 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1914 collectComdatMembers(M, ComdatMembers); 1915 std::vector<Function *> HotFunctions; 1916 std::vector<Function *> ColdFunctions; 1917 1918 // If the profile marked as always instrument the entry BB, do the 1919 // same. Note this can be overwritten by the internal option in CFGMST.h 1920 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); 1921 if (PGOInstrumentEntry.getNumOccurrences() > 0) 1922 InstrumentFuncEntry = PGOInstrumentEntry; 1923 for (auto &F : M) { 1924 if (F.isDeclaration()) 1925 continue; 1926 auto &TLI = LookupTLI(F); 1927 auto *BPI = LookupBPI(F); 1928 auto *BFI = LookupBFI(F); 1929 // Split indirectbr critical edges here before computing the MST rather than 1930 // later in getInstrBB() to avoid invalidating it. 1931 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); 1932 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, 1933 InstrumentFuncEntry); 1934 // When AllMinusOnes is true, it means the profile for the function 1935 // is unrepresentative and this function is actually hot. Set the 1936 // entry count of the function to be multiple times of hot threshold 1937 // and drop all its internal counters. 1938 bool AllMinusOnes = false; 1939 bool AllZeros = false; 1940 if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes)) 1941 continue; 1942 if (AllZeros) { 1943 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 1944 if (Func.getProgramMaxCount() != 0) 1945 ColdFunctions.push_back(&F); 1946 continue; 1947 } 1948 const unsigned MultiplyFactor = 3; 1949 if (AllMinusOnes) { 1950 uint64_t HotThreshold = PSI->getHotCountThreshold(); 1951 if (HotThreshold) 1952 F.setEntryCount( 1953 ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real)); 1954 HotFunctions.push_back(&F); 1955 continue; 1956 } 1957 Func.populateCounters(); 1958 Func.setBranchWeights(); 1959 Func.annotateValueSites(); 1960 Func.annotateIrrLoopHeaderWeights(); 1961 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1962 if (FreqAttr == PGOUseFunc::FFA_Cold) 1963 ColdFunctions.push_back(&F); 1964 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1965 HotFunctions.push_back(&F); 1966 if (PGOViewCounts != PGOVCT_None && 1967 (ViewBlockFreqFuncName.empty() || 1968 F.getName().equals(ViewBlockFreqFuncName))) { 1969 LoopInfo LI{DominatorTree(F)}; 1970 std::unique_ptr<BranchProbabilityInfo> NewBPI = 1971 std::make_unique<BranchProbabilityInfo>(F, LI); 1972 std::unique_ptr<BlockFrequencyInfo> NewBFI = 1973 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 1974 if (PGOViewCounts == PGOVCT_Graph) 1975 NewBFI->view(); 1976 else if (PGOViewCounts == PGOVCT_Text) { 1977 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 1978 NewBFI->print(dbgs()); 1979 } 1980 } 1981 if (PGOViewRawCounts != PGOVCT_None && 1982 (ViewBlockFreqFuncName.empty() || 1983 F.getName().equals(ViewBlockFreqFuncName))) { 1984 if (PGOViewRawCounts == PGOVCT_Graph) 1985 if (ViewBlockFreqFuncName.empty()) 1986 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1987 else 1988 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1989 else if (PGOViewRawCounts == PGOVCT_Text) { 1990 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 1991 Func.dumpInfo(); 1992 } 1993 } 1994 1995 if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { 1996 LoopInfo LI{DominatorTree(F)}; 1997 BranchProbabilityInfo NBPI(F, LI); 1998 1999 // Fix func entry count. 2000 if (PGOFixEntryCount) 2001 fixFuncEntryCount(Func, LI, NBPI); 2002 2003 // Verify BlockFrequency information. 2004 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; 2005 if (PGOVerifyHotBFI) { 2006 HotCountThreshold = PSI->getOrCompHotCountThreshold(); 2007 ColdCountThreshold = PSI->getOrCompColdCountThreshold(); 2008 } 2009 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); 2010 } 2011 } 2012 2013 // Set function hotness attribute from the profile. 2014 // We have to apply these attributes at the end because their presence 2015 // can affect the BranchProbabilityInfo of any callers, resulting in an 2016 // inconsistent MST between prof-gen and prof-use. 2017 for (auto &F : HotFunctions) { 2018 F->addFnAttr(Attribute::InlineHint); 2019 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 2020 << "\n"); 2021 } 2022 for (auto &F : ColdFunctions) { 2023 // Only set when there is no Attribute::Hot set by the user. For Hot 2024 // attribute, user's annotation has the precedence over the profile. 2025 if (F->hasFnAttribute(Attribute::Hot)) { 2026 auto &Ctx = M.getContext(); 2027 std::string Msg = std::string("Function ") + F->getName().str() + 2028 std::string(" is annotated as a hot function but" 2029 " the profile is cold"); 2030 Ctx.diagnose( 2031 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 2032 continue; 2033 } 2034 F->addFnAttr(Attribute::Cold); 2035 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 2036 << "\n"); 2037 } 2038 return true; 2039 } 2040 2041 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 2042 std::string RemappingFilename, 2043 bool IsCS) 2044 : ProfileFileName(std::move(Filename)), 2045 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { 2046 if (!PGOTestProfileFile.empty()) 2047 ProfileFileName = PGOTestProfileFile; 2048 if (!PGOTestProfileRemappingFile.empty()) 2049 ProfileRemappingFileName = PGOTestProfileRemappingFile; 2050 } 2051 2052 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 2053 ModuleAnalysisManager &AM) { 2054 2055 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 2056 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 2057 return FAM.getResult<TargetLibraryAnalysis>(F); 2058 }; 2059 auto LookupBPI = [&FAM](Function &F) { 2060 return &FAM.getResult<BranchProbabilityAnalysis>(F); 2061 }; 2062 auto LookupBFI = [&FAM](Function &F) { 2063 return &FAM.getResult<BlockFrequencyAnalysis>(F); 2064 }; 2065 2066 auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 2067 2068 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 2069 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) 2070 return PreservedAnalyses::all(); 2071 2072 return PreservedAnalyses::none(); 2073 } 2074 2075 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { 2076 if (skipModule(M)) 2077 return false; 2078 2079 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 2080 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 2081 }; 2082 auto LookupBPI = [this](Function &F) { 2083 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 2084 }; 2085 auto LookupBFI = [this](Function &F) { 2086 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 2087 }; 2088 2089 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 2090 return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI, 2091 LookupBFI, PSI, IsCS); 2092 } 2093 2094 static std::string getSimpleNodeName(const BasicBlock *Node) { 2095 if (!Node->getName().empty()) 2096 return std::string(Node->getName()); 2097 2098 std::string SimpleNodeName; 2099 raw_string_ostream OS(SimpleNodeName); 2100 Node->printAsOperand(OS, false); 2101 return OS.str(); 2102 } 2103 2104 void llvm::setProfMetadata(Module *M, Instruction *TI, 2105 ArrayRef<uint64_t> EdgeCounts, 2106 uint64_t MaxCount) { 2107 MDBuilder MDB(M->getContext()); 2108 assert(MaxCount > 0 && "Bad max count"); 2109 uint64_t Scale = calculateCountScale(MaxCount); 2110 SmallVector<unsigned, 4> Weights; 2111 for (const auto &ECI : EdgeCounts) 2112 Weights.push_back(scaleBranchCount(ECI, Scale)); 2113 2114 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 2115 : Weights) { 2116 dbgs() << W << " "; 2117 } dbgs() << "\n";); 2118 2119 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 2120 if (EmitBranchProbability) { 2121 std::string BrCondStr = getBranchCondString(TI); 2122 if (BrCondStr.empty()) 2123 return; 2124 2125 uint64_t WSum = 2126 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 2127 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 2128 uint64_t TotalCount = 2129 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 2130 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 2131 Scale = calculateCountScale(WSum); 2132 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 2133 scaleBranchCount(WSum, Scale)); 2134 std::string BranchProbStr; 2135 raw_string_ostream OS(BranchProbStr); 2136 OS << BP; 2137 OS << " (total count : " << TotalCount << ")"; 2138 OS.flush(); 2139 Function *F = TI->getParent()->getParent(); 2140 OptimizationRemarkEmitter ORE(F); 2141 ORE.emit([&]() { 2142 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 2143 << BrCondStr << " is true with probability : " << BranchProbStr; 2144 }); 2145 } 2146 } 2147 2148 namespace llvm { 2149 2150 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 2151 MDBuilder MDB(M->getContext()); 2152 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 2153 MDB.createIrrLoopHeaderWeight(Count)); 2154 } 2155 2156 template <> struct GraphTraits<PGOUseFunc *> { 2157 using NodeRef = const BasicBlock *; 2158 using ChildIteratorType = const_succ_iterator; 2159 using nodes_iterator = pointer_iterator<Function::const_iterator>; 2160 2161 static NodeRef getEntryNode(const PGOUseFunc *G) { 2162 return &G->getFunc().front(); 2163 } 2164 2165 static ChildIteratorType child_begin(const NodeRef N) { 2166 return succ_begin(N); 2167 } 2168 2169 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 2170 2171 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 2172 return nodes_iterator(G->getFunc().begin()); 2173 } 2174 2175 static nodes_iterator nodes_end(const PGOUseFunc *G) { 2176 return nodes_iterator(G->getFunc().end()); 2177 } 2178 }; 2179 2180 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 2181 explicit DOTGraphTraits(bool isSimple = false) 2182 : DefaultDOTGraphTraits(isSimple) {} 2183 2184 static std::string getGraphName(const PGOUseFunc *G) { 2185 return std::string(G->getFunc().getName()); 2186 } 2187 2188 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 2189 std::string Result; 2190 raw_string_ostream OS(Result); 2191 2192 OS << getSimpleNodeName(Node) << ":\\l"; 2193 UseBBInfo *BI = Graph->findBBInfo(Node); 2194 OS << "Count : "; 2195 if (BI && BI->CountValid) 2196 OS << BI->CountValue << "\\l"; 2197 else 2198 OS << "Unknown\\l"; 2199 2200 if (!PGOInstrSelect) 2201 return Result; 2202 2203 for (const Instruction &I : *Node) { 2204 if (!isa<SelectInst>(&I)) 2205 continue; 2206 // Display scaled counts for SELECT instruction: 2207 OS << "SELECT : { T = "; 2208 uint64_t TC, FC; 2209 bool HasProf = I.extractProfMetadata(TC, FC); 2210 if (!HasProf) 2211 OS << "Unknown, F = Unknown }\\l"; 2212 else 2213 OS << TC << ", F = " << FC << " }\\l"; 2214 } 2215 return Result; 2216 } 2217 }; 2218 2219 } // end namespace llvm 2220