1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "CFGMST.h" 52 #include "ValueProfileCollector.h" 53 #include "llvm/ADT/APInt.h" 54 #include "llvm/ADT/ArrayRef.h" 55 #include "llvm/ADT/MapVector.h" 56 #include "llvm/ADT/STLExtras.h" 57 #include "llvm/ADT/SmallVector.h" 58 #include "llvm/ADT/Statistic.h" 59 #include "llvm/ADT/StringRef.h" 60 #include "llvm/ADT/Triple.h" 61 #include "llvm/ADT/Twine.h" 62 #include "llvm/ADT/iterator.h" 63 #include "llvm/ADT/iterator_range.h" 64 #include "llvm/Analysis/BlockFrequencyInfo.h" 65 #include "llvm/Analysis/BranchProbabilityInfo.h" 66 #include "llvm/Analysis/CFG.h" 67 #include "llvm/Analysis/EHPersonalities.h" 68 #include "llvm/Analysis/LoopInfo.h" 69 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 70 #include "llvm/Analysis/ProfileSummaryInfo.h" 71 #include "llvm/IR/Attributes.h" 72 #include "llvm/IR/BasicBlock.h" 73 #include "llvm/IR/CFG.h" 74 #include "llvm/IR/Comdat.h" 75 #include "llvm/IR/Constant.h" 76 #include "llvm/IR/Constants.h" 77 #include "llvm/IR/DiagnosticInfo.h" 78 #include "llvm/IR/Dominators.h" 79 #include "llvm/IR/Function.h" 80 #include "llvm/IR/GlobalAlias.h" 81 #include "llvm/IR/GlobalValue.h" 82 #include "llvm/IR/GlobalVariable.h" 83 #include "llvm/IR/IRBuilder.h" 84 #include "llvm/IR/InstVisitor.h" 85 #include "llvm/IR/InstrTypes.h" 86 #include "llvm/IR/Instruction.h" 87 #include "llvm/IR/Instructions.h" 88 #include "llvm/IR/IntrinsicInst.h" 89 #include "llvm/IR/Intrinsics.h" 90 #include "llvm/IR/LLVMContext.h" 91 #include "llvm/IR/MDBuilder.h" 92 #include "llvm/IR/Module.h" 93 #include "llvm/IR/PassManager.h" 94 #include "llvm/IR/ProfileSummary.h" 95 #include "llvm/IR/Type.h" 96 #include "llvm/IR/Value.h" 97 #include "llvm/InitializePasses.h" 98 #include "llvm/Pass.h" 99 #include "llvm/ProfileData/InstrProf.h" 100 #include "llvm/ProfileData/InstrProfReader.h" 101 #include "llvm/Support/BranchProbability.h" 102 #include "llvm/Support/CRC.h" 103 #include "llvm/Support/Casting.h" 104 #include "llvm/Support/CommandLine.h" 105 #include "llvm/Support/DOTGraphTraits.h" 106 #include "llvm/Support/Debug.h" 107 #include "llvm/Support/Error.h" 108 #include "llvm/Support/ErrorHandling.h" 109 #include "llvm/Support/GraphWriter.h" 110 #include "llvm/Support/raw_ostream.h" 111 #include "llvm/Transforms/Instrumentation.h" 112 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 113 #include "llvm/Transforms/Utils/MisExpect.h" 114 #include <algorithm> 115 #include <cassert> 116 #include <cstdint> 117 #include <memory> 118 #include <numeric> 119 #include <string> 120 #include <unordered_map> 121 #include <utility> 122 #include <vector> 123 124 using namespace llvm; 125 using ProfileCount = Function::ProfileCount; 126 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 127 128 #define DEBUG_TYPE "pgo-instrumentation" 129 130 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 131 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 132 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 133 STATISTIC(NumOfPGOEdge, "Number of edges."); 134 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 135 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 136 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 137 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 138 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 139 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 140 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 141 STATISTIC(NumOfCSPGOSelectInsts, 142 "Number of select instruction instrumented in CSPGO."); 143 STATISTIC(NumOfCSPGOMemIntrinsics, 144 "Number of mem intrinsics instrumented in CSPGO."); 145 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 146 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 147 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 148 STATISTIC(NumOfCSPGOFunc, 149 "Number of functions having valid profile counts in CSPGO."); 150 STATISTIC(NumOfCSPGOMismatch, 151 "Number of functions having mismatch profile in CSPGO."); 152 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 153 154 // Command line option to specify the file to read profile from. This is 155 // mainly used for testing. 156 static cl::opt<std::string> 157 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 158 cl::value_desc("filename"), 159 cl::desc("Specify the path of profile data file. This is" 160 "mainly for test purpose.")); 161 static cl::opt<std::string> PGOTestProfileRemappingFile( 162 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 163 cl::value_desc("filename"), 164 cl::desc("Specify the path of profile remapping file. This is mainly for " 165 "test purpose.")); 166 167 // Command line option to disable value profiling. The default is false: 168 // i.e. value profiling is enabled by default. This is for debug purpose. 169 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 170 cl::Hidden, 171 cl::desc("Disable Value Profiling")); 172 173 // Command line option to set the maximum number of VP annotations to write to 174 // the metadata for a single indirect call callsite. 175 static cl::opt<unsigned> MaxNumAnnotations( 176 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 177 cl::desc("Max number of annotations for a single indirect " 178 "call callsite")); 179 180 // Command line option to set the maximum number of value annotations 181 // to write to the metadata for a single memop intrinsic. 182 static cl::opt<unsigned> MaxNumMemOPAnnotations( 183 "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, 184 cl::desc("Max number of preicise value annotations for a single memop" 185 "intrinsic")); 186 187 // Command line option to control appending FunctionHash to the name of a COMDAT 188 // function. This is to avoid the hash mismatch caused by the preinliner. 189 static cl::opt<bool> DoComdatRenaming( 190 "do-comdat-renaming", cl::init(false), cl::Hidden, 191 cl::desc("Append function hash to the name of COMDAT function to avoid " 192 "function hash mismatch due to the preinliner")); 193 194 // Command line option to enable/disable the warning about missing profile 195 // information. 196 static cl::opt<bool> 197 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 198 cl::desc("Use this option to turn on/off " 199 "warnings about missing profile data for " 200 "functions.")); 201 202 // Command line option to enable/disable the warning about a hash mismatch in 203 // the profile data. 204 static cl::opt<bool> 205 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 206 cl::desc("Use this option to turn off/on " 207 "warnings about profile cfg mismatch.")); 208 209 // Command line option to enable/disable the warning about a hash mismatch in 210 // the profile data for Comdat functions, which often turns out to be false 211 // positive due to the pre-instrumentation inline. 212 static cl::opt<bool> 213 NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), 214 cl::Hidden, 215 cl::desc("The option is used to turn on/off " 216 "warnings about hash mismatch for comdat " 217 "functions.")); 218 219 // Command line option to enable/disable select instruction instrumentation. 220 static cl::opt<bool> 221 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 222 cl::desc("Use this option to turn on/off SELECT " 223 "instruction instrumentation. ")); 224 225 // Command line option to turn on CFG dot or text dump of raw profile counts 226 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 227 "pgo-view-raw-counts", cl::Hidden, 228 cl::desc("A boolean option to show CFG dag or text " 229 "with raw profile counts from " 230 "profile data. See also option " 231 "-pgo-view-counts. To limit graph " 232 "display to only one function, use " 233 "filtering option -view-bfi-func-name."), 234 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 235 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 236 clEnumValN(PGOVCT_Text, "text", "show in text."))); 237 238 // Command line option to enable/disable memop intrinsic call.size profiling. 239 static cl::opt<bool> 240 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 241 cl::desc("Use this option to turn on/off " 242 "memory intrinsic size profiling.")); 243 244 // Emit branch probability as optimization remarks. 245 static cl::opt<bool> 246 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 247 cl::desc("When this option is on, the annotated " 248 "branch probability will be emitted as " 249 "optimization remarks: -{Rpass|" 250 "pass-remarks}=pgo-instrumentation")); 251 252 static cl::opt<bool> PGOInstrumentEntry( 253 "pgo-instrument-entry", cl::init(false), cl::Hidden, 254 cl::desc("Force to instrument function entry basicblock.")); 255 256 // Command line option to turn on CFG dot dump after profile annotation. 257 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 258 extern cl::opt<PGOViewCountsType> PGOViewCounts; 259 260 // Command line option to specify the name of the function for CFG dump 261 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 262 extern cl::opt<std::string> ViewBlockFreqFuncName; 263 264 static cl::opt<bool> 265 PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, 266 cl::desc("Use the old CFG function hashing")); 267 268 // Return a string describing the branch condition that can be 269 // used in static branch probability heuristics: 270 static std::string getBranchCondString(Instruction *TI) { 271 BranchInst *BI = dyn_cast<BranchInst>(TI); 272 if (!BI || !BI->isConditional()) 273 return std::string(); 274 275 Value *Cond = BI->getCondition(); 276 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 277 if (!CI) 278 return std::string(); 279 280 std::string result; 281 raw_string_ostream OS(result); 282 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 283 CI->getOperand(0)->getType()->print(OS, true); 284 285 Value *RHS = CI->getOperand(1); 286 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 287 if (CV) { 288 if (CV->isZero()) 289 OS << "_Zero"; 290 else if (CV->isOne()) 291 OS << "_One"; 292 else if (CV->isMinusOne()) 293 OS << "_MinusOne"; 294 else 295 OS << "_Const"; 296 } 297 OS.flush(); 298 return result; 299 } 300 301 static const char *ValueProfKindDescr[] = { 302 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 303 #include "llvm/ProfileData/InstrProfData.inc" 304 }; 305 306 namespace { 307 308 /// The select instruction visitor plays three roles specified 309 /// by the mode. In \c VM_counting mode, it simply counts the number of 310 /// select instructions. In \c VM_instrument mode, it inserts code to count 311 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 312 /// it reads the profile data and annotate the select instruction with metadata. 313 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 314 class PGOUseFunc; 315 316 /// Instruction Visitor class to visit select instructions. 317 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 318 Function &F; 319 unsigned NSIs = 0; // Number of select instructions instrumented. 320 VisitMode Mode = VM_counting; // Visiting mode. 321 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 322 unsigned TotalNumCtrs = 0; // Total number of counters 323 GlobalVariable *FuncNameVar = nullptr; 324 uint64_t FuncHash = 0; 325 PGOUseFunc *UseFunc = nullptr; 326 327 SelectInstVisitor(Function &Func) : F(Func) {} 328 329 void countSelects(Function &Func) { 330 NSIs = 0; 331 Mode = VM_counting; 332 visit(Func); 333 } 334 335 // Visit the IR stream and instrument all select instructions. \p 336 // Ind is a pointer to the counter index variable; \p TotalNC 337 // is the total number of counters; \p FNV is the pointer to the 338 // PGO function name var; \p FHash is the function hash. 339 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 340 GlobalVariable *FNV, uint64_t FHash) { 341 Mode = VM_instrument; 342 CurCtrIdx = Ind; 343 TotalNumCtrs = TotalNC; 344 FuncHash = FHash; 345 FuncNameVar = FNV; 346 visit(Func); 347 } 348 349 // Visit the IR stream and annotate all select instructions. 350 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 351 Mode = VM_annotate; 352 UseFunc = UF; 353 CurCtrIdx = Ind; 354 visit(Func); 355 } 356 357 void instrumentOneSelectInst(SelectInst &SI); 358 void annotateOneSelectInst(SelectInst &SI); 359 360 // Visit \p SI instruction and perform tasks according to visit mode. 361 void visitSelectInst(SelectInst &SI); 362 363 // Return the number of select instructions. This needs be called after 364 // countSelects(). 365 unsigned getNumOfSelectInsts() const { return NSIs; } 366 }; 367 368 369 class PGOInstrumentationGenLegacyPass : public ModulePass { 370 public: 371 static char ID; 372 373 PGOInstrumentationGenLegacyPass(bool IsCS = false) 374 : ModulePass(ID), IsCS(IsCS) { 375 initializePGOInstrumentationGenLegacyPassPass( 376 *PassRegistry::getPassRegistry()); 377 } 378 379 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } 380 381 private: 382 // Is this is context-sensitive instrumentation. 383 bool IsCS; 384 bool runOnModule(Module &M) override; 385 386 void getAnalysisUsage(AnalysisUsage &AU) const override { 387 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 388 AU.addRequired<TargetLibraryInfoWrapperPass>(); 389 } 390 }; 391 392 class PGOInstrumentationUseLegacyPass : public ModulePass { 393 public: 394 static char ID; 395 396 // Provide the profile filename as the parameter. 397 PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) 398 : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { 399 if (!PGOTestProfileFile.empty()) 400 ProfileFileName = PGOTestProfileFile; 401 initializePGOInstrumentationUseLegacyPassPass( 402 *PassRegistry::getPassRegistry()); 403 } 404 405 StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } 406 407 private: 408 std::string ProfileFileName; 409 // Is this is context-sensitive instrumentation use. 410 bool IsCS; 411 412 bool runOnModule(Module &M) override; 413 414 void getAnalysisUsage(AnalysisUsage &AU) const override { 415 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 416 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 417 AU.addRequired<TargetLibraryInfoWrapperPass>(); 418 } 419 }; 420 421 class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { 422 public: 423 static char ID; 424 StringRef getPassName() const override { 425 return "PGOInstrumentationGenCreateVarPass"; 426 } 427 PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") 428 : ModulePass(ID), InstrProfileOutput(CSInstrName) { 429 initializePGOInstrumentationGenCreateVarLegacyPassPass( 430 *PassRegistry::getPassRegistry()); 431 } 432 433 private: 434 bool runOnModule(Module &M) override { 435 createProfileFileNameVar(M, InstrProfileOutput); 436 createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry); 437 return false; 438 } 439 std::string InstrProfileOutput; 440 }; 441 442 } // end anonymous namespace 443 444 char PGOInstrumentationGenLegacyPass::ID = 0; 445 446 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 447 "PGO instrumentation.", false, false) 448 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 449 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 450 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 451 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 452 "PGO instrumentation.", false, false) 453 454 ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { 455 return new PGOInstrumentationGenLegacyPass(IsCS); 456 } 457 458 char PGOInstrumentationUseLegacyPass::ID = 0; 459 460 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 461 "Read PGO instrumentation profile.", false, false) 462 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 463 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 464 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 465 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 466 "Read PGO instrumentation profile.", false, false) 467 468 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, 469 bool IsCS) { 470 return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); 471 } 472 473 char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; 474 475 INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, 476 "pgo-instr-gen-create-var", 477 "Create PGO instrumentation version variable for CSPGO.", false, 478 false) 479 480 ModulePass * 481 llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { 482 return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName)); 483 } 484 485 namespace { 486 487 /// An MST based instrumentation for PGO 488 /// 489 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 490 /// in the function level. 491 struct PGOEdge { 492 // This class implements the CFG edges. Note the CFG can be a multi-graph. 493 // So there might be multiple edges with same SrcBB and DestBB. 494 const BasicBlock *SrcBB; 495 const BasicBlock *DestBB; 496 uint64_t Weight; 497 bool InMST = false; 498 bool Removed = false; 499 bool IsCritical = false; 500 501 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 502 : SrcBB(Src), DestBB(Dest), Weight(W) {} 503 504 // Return the information string of an edge. 505 const std::string infoString() const { 506 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 507 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 508 } 509 }; 510 511 // This class stores the auxiliary information for each BB. 512 struct BBInfo { 513 BBInfo *Group; 514 uint32_t Index; 515 uint32_t Rank = 0; 516 517 BBInfo(unsigned IX) : Group(this), Index(IX) {} 518 519 // Return the information string of this object. 520 const std::string infoString() const { 521 return (Twine("Index=") + Twine(Index)).str(); 522 } 523 524 // Empty function -- only applicable to UseBBInfo. 525 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 526 527 // Empty function -- only applicable to UseBBInfo. 528 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 529 }; 530 531 // This class implements the CFG edges. Note the CFG can be a multi-graph. 532 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 533 private: 534 Function &F; 535 536 // Is this is context-sensitive instrumentation. 537 bool IsCS; 538 539 // If we instrument function entry BB by default. 540 bool InstrumentFuncEntry; 541 542 // A map that stores the Comdat group in function F. 543 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 544 545 ValueProfileCollector VPC; 546 547 void computeCFGHash(); 548 void renameComdatFunction(); 549 550 public: 551 std::vector<std::vector<VPCandidateInfo>> ValueSites; 552 SelectInstVisitor SIVisitor; 553 std::string FuncName; 554 GlobalVariable *FuncNameVar; 555 556 // CFG hash value for this function. 557 uint64_t FunctionHash = 0; 558 559 // The Minimum Spanning Tree of function CFG. 560 CFGMST<Edge, BBInfo> MST; 561 562 // Collect all the BBs that will be instrumented, and store them in 563 // InstrumentBBs. 564 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 565 566 // Give an edge, find the BB that will be instrumented. 567 // Return nullptr if there is no BB to be instrumented. 568 BasicBlock *getInstrBB(Edge *E); 569 570 // Return the auxiliary BB information. 571 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 572 573 // Return the auxiliary BB information if available. 574 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 575 576 // Dump edges and BB information. 577 void dumpInfo(std::string Str = "") const { 578 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 579 Twine(FunctionHash) + "\t" + Str); 580 } 581 582 FuncPGOInstrumentation( 583 Function &Func, TargetLibraryInfo &TLI, 584 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 585 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 586 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, 587 bool InstrumentFuncEntry = true) 588 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 589 ValueSites(IPVK_Last + 1), SIVisitor(Func), 590 MST(F, InstrumentFuncEntry, BPI, BFI) { 591 // This should be done before CFG hash computation. 592 SIVisitor.countSelects(Func); 593 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 594 if (!IsCS) { 595 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 596 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 597 NumOfPGOBB += MST.BBInfos.size(); 598 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 599 } else { 600 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 601 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 602 NumOfCSPGOBB += MST.BBInfos.size(); 603 } 604 605 FuncName = getPGOFuncName(F); 606 computeCFGHash(); 607 if (!ComdatMembers.empty()) 608 renameComdatFunction(); 609 LLVM_DEBUG(dumpInfo("after CFGMST")); 610 611 for (auto &E : MST.AllEdges) { 612 if (E->Removed) 613 continue; 614 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 615 if (!E->InMST) 616 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 617 } 618 619 if (CreateGlobalVar) 620 FuncNameVar = createPGOFuncNameVar(F, FuncName); 621 } 622 }; 623 624 } // end anonymous namespace 625 626 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 627 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers 628 // of selects, indirect calls, mem ops and edges. 629 template <class Edge, class BBInfo> 630 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 631 std::vector<uint8_t> Indexes; 632 JamCRC JC; 633 for (auto &BB : F) { 634 const Instruction *TI = BB.getTerminator(); 635 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 636 BasicBlock *Succ = TI->getSuccessor(I); 637 auto BI = findBBInfo(Succ); 638 if (BI == nullptr) 639 continue; 640 uint32_t Index = BI->Index; 641 for (int J = 0; J < 4; J++) 642 Indexes.push_back((uint8_t)(Index >> (J * 8))); 643 } 644 } 645 JC.update(Indexes); 646 647 JamCRC JCH; 648 if (PGOOldCFGHashing) { 649 // Hash format for context sensitive profile. Reserve 4 bits for other 650 // information. 651 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 652 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 653 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 654 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 655 } else { 656 // The higher 32 bits. 657 auto updateJCH = [&JCH](uint64_t Num) { 658 uint8_t Data[8]; 659 support::endian::write64le(Data, Num); 660 JCH.update(Data); 661 }; 662 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); 663 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); 664 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); 665 updateJCH((uint64_t)MST.AllEdges.size()); 666 667 // Hash format for context sensitive profile. Reserve 4 bits for other 668 // information. 669 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); 670 } 671 672 // Reserve bit 60-63 for other information purpose. 673 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 674 if (IsCS) 675 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 676 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 677 << " CRC = " << JC.getCRC() 678 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 679 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 680 << ValueSites[IPVK_IndirectCallTarget].size()); 681 if (!PGOOldCFGHashing) { 682 LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() 683 << ", High32 CRC = " << JCH.getCRC()); 684 } 685 LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); 686 } 687 688 // Check if we can safely rename this Comdat function. 689 static bool canRenameComdat( 690 Function &F, 691 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 692 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 693 return false; 694 695 // FIXME: Current only handle those Comdat groups that only containing one 696 // function and function aliases. 697 // (1) For a Comdat group containing multiple functions, we need to have a 698 // unique postfix based on the hashes for each function. There is a 699 // non-trivial code refactoring to do this efficiently. 700 // (2) Variables can not be renamed, so we can not rename Comdat function in a 701 // group including global vars. 702 Comdat *C = F.getComdat(); 703 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 704 if (dyn_cast<GlobalAlias>(CM.second)) 705 continue; 706 Function *FM = dyn_cast<Function>(CM.second); 707 if (FM != &F) 708 return false; 709 } 710 return true; 711 } 712 713 // Append the CFGHash to the Comdat function name. 714 template <class Edge, class BBInfo> 715 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 716 if (!canRenameComdat(F, ComdatMembers)) 717 return; 718 std::string OrigName = F.getName().str(); 719 std::string NewFuncName = 720 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 721 F.setName(Twine(NewFuncName)); 722 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 723 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 724 Comdat *NewComdat; 725 Module *M = F.getParent(); 726 // For AvailableExternallyLinkage functions, change the linkage to 727 // LinkOnceODR and put them into comdat. This is because after renaming, there 728 // is no backup external copy available for the function. 729 if (!F.hasComdat()) { 730 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 731 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 732 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 733 F.setComdat(NewComdat); 734 return; 735 } 736 737 // This function belongs to a single function Comdat group. 738 Comdat *OrigComdat = F.getComdat(); 739 std::string NewComdatName = 740 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 741 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 742 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 743 744 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 745 if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) { 746 // For aliases, change the name directly. 747 assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F); 748 std::string OrigGAName = GA->getName().str(); 749 GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash))); 750 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA); 751 continue; 752 } 753 // Must be a function. 754 Function *CF = dyn_cast<Function>(CM.second); 755 assert(CF); 756 CF->setComdat(NewComdat); 757 } 758 } 759 760 // Collect all the BBs that will be instruments and return them in 761 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. 762 template <class Edge, class BBInfo> 763 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 764 std::vector<BasicBlock *> &InstrumentBBs) { 765 // Use a worklist as we will update the vector during the iteration. 766 std::vector<Edge *> EdgeList; 767 EdgeList.reserve(MST.AllEdges.size()); 768 for (auto &E : MST.AllEdges) 769 EdgeList.push_back(E.get()); 770 771 for (auto &E : EdgeList) { 772 BasicBlock *InstrBB = getInstrBB(E); 773 if (InstrBB) 774 InstrumentBBs.push_back(InstrBB); 775 } 776 777 // Set up InEdges/OutEdges for all BBs. 778 for (auto &E : MST.AllEdges) { 779 if (E->Removed) 780 continue; 781 const BasicBlock *SrcBB = E->SrcBB; 782 const BasicBlock *DestBB = E->DestBB; 783 BBInfo &SrcInfo = getBBInfo(SrcBB); 784 BBInfo &DestInfo = getBBInfo(DestBB); 785 SrcInfo.addOutEdge(E.get()); 786 DestInfo.addInEdge(E.get()); 787 } 788 } 789 790 // Given a CFG E to be instrumented, find which BB to place the instrumented 791 // code. The function will split the critical edge if necessary. 792 template <class Edge, class BBInfo> 793 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 794 if (E->InMST || E->Removed) 795 return nullptr; 796 797 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 798 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 799 // For a fake edge, instrument the real BB. 800 if (SrcBB == nullptr) 801 return DestBB; 802 if (DestBB == nullptr) 803 return SrcBB; 804 805 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 806 // There are basic blocks (such as catchswitch) cannot be instrumented. 807 // If the returned first insertion point is the end of BB, skip this BB. 808 if (BB->getFirstInsertionPt() == BB->end()) 809 return nullptr; 810 return BB; 811 }; 812 813 // Instrument the SrcBB if it has a single successor, 814 // otherwise, the DestBB if this is not a critical edge. 815 Instruction *TI = SrcBB->getTerminator(); 816 if (TI->getNumSuccessors() <= 1) 817 return canInstrument(SrcBB); 818 if (!E->IsCritical) 819 return canInstrument(DestBB); 820 821 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 822 BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum); 823 if (!InstrBB) { 824 LLVM_DEBUG( 825 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 826 return nullptr; 827 } 828 // For a critical edge, we have to split. Instrument the newly 829 // created BB. 830 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 831 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 832 << " --> " << getBBInfo(DestBB).Index << "\n"); 833 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 834 MST.addEdge(SrcBB, InstrBB, 0); 835 // Second one: Add new edge of InstrBB->DestBB. 836 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 837 NewEdge1.InMST = true; 838 E->Removed = true; 839 840 return canInstrument(InstrBB); 841 } 842 843 // When generating value profiling calls on Windows routines that make use of 844 // handler funclets for exception processing an operand bundle needs to attached 845 // to the called function. This routine will set \p OpBundles to contain the 846 // funclet information, if any is needed, that should be placed on the generated 847 // value profiling call for the value profile candidate call. 848 static void 849 populateEHOperandBundle(VPCandidateInfo &Cand, 850 DenseMap<BasicBlock *, ColorVector> &BlockColors, 851 SmallVectorImpl<OperandBundleDef> &OpBundles) { 852 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 853 if (OrigCall && !isa<IntrinsicInst>(OrigCall)) { 854 // The instrumentation call should belong to the same funclet as a 855 // non-intrinsic call, so just copy the operand bundle, if any exists. 856 Optional<OperandBundleUse> ParentFunclet = 857 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 858 if (ParentFunclet) 859 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 860 } else { 861 // Intrinsics or other instructions do not get funclet information from the 862 // front-end. Need to use the BlockColors that was computed by the routine 863 // colorEHFunclets to determine whether a funclet is needed. 864 if (!BlockColors.empty()) { 865 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 866 assert(CV.size() == 1 && "non-unique color for block!"); 867 Instruction *EHPad = CV.front()->getFirstNonPHI(); 868 if (EHPad->isEHPad()) 869 OpBundles.emplace_back("funclet", EHPad); 870 } 871 } 872 } 873 874 // Visit all edge and instrument the edges not in MST, and do value profiling. 875 // Critical edges will be split. 876 static void instrumentOneFunc( 877 Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, 878 BlockFrequencyInfo *BFI, 879 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 880 bool IsCS) { 881 // Split indirectbr critical edges here before computing the MST rather than 882 // later in getInstrBB() to avoid invalidating it. 883 SplitIndirectBrCriticalEdges(F, BPI, BFI); 884 885 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo( 886 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry); 887 std::vector<BasicBlock *> InstrumentBBs; 888 FuncInfo.getInstrumentBBs(InstrumentBBs); 889 unsigned NumCounters = 890 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 891 892 uint32_t I = 0; 893 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 894 for (auto *InstrBB : InstrumentBBs) { 895 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 896 assert(Builder.GetInsertPoint() != InstrBB->end() && 897 "Cannot get the Instrumentation point"); 898 Builder.CreateCall( 899 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 900 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 901 Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), 902 Builder.getInt32(I++)}); 903 } 904 905 // Now instrument select instructions: 906 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 907 FuncInfo.FunctionHash); 908 assert(I == NumCounters); 909 910 if (DisableValueProfiling) 911 return; 912 913 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 914 915 // Intrinsic function calls do not have funclet operand bundles needed for 916 // Windows exception handling attached to them. However, if value profiling is 917 // inserted for one of these calls, then a funclet value will need to be set 918 // on the instrumentation call based on the funclet coloring. 919 DenseMap<BasicBlock *, ColorVector> BlockColors; 920 if (F.hasPersonalityFn() && 921 isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 922 BlockColors = colorEHFunclets(F); 923 924 // For each VP Kind, walk the VP candidates and instrument each one. 925 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 926 unsigned SiteIndex = 0; 927 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 928 continue; 929 930 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 931 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 932 << " site: CallSite Index = " << SiteIndex << "\n"); 933 934 IRBuilder<> Builder(Cand.InsertPt); 935 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 936 "Cannot get the Instrumentation point"); 937 938 Value *ToProfile = nullptr; 939 if (Cand.V->getType()->isIntegerTy()) 940 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 941 else if (Cand.V->getType()->isPointerTy()) 942 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 943 assert(ToProfile && "value profiling Value is of unexpected type"); 944 945 SmallVector<OperandBundleDef, 1> OpBundles; 946 populateEHOperandBundle(Cand, BlockColors, OpBundles); 947 Builder.CreateCall( 948 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 949 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 950 Builder.getInt64(FuncInfo.FunctionHash), ToProfile, 951 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 952 OpBundles); 953 } 954 } // IPVK_First <= Kind <= IPVK_Last 955 } 956 957 namespace { 958 959 // This class represents a CFG edge in profile use compilation. 960 struct PGOUseEdge : public PGOEdge { 961 bool CountValid = false; 962 uint64_t CountValue = 0; 963 964 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 965 : PGOEdge(Src, Dest, W) {} 966 967 // Set edge count value 968 void setEdgeCount(uint64_t Value) { 969 CountValue = Value; 970 CountValid = true; 971 } 972 973 // Return the information string for this object. 974 const std::string infoString() const { 975 if (!CountValid) 976 return PGOEdge::infoString(); 977 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 978 .str(); 979 } 980 }; 981 982 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 983 984 // This class stores the auxiliary information for each BB. 985 struct UseBBInfo : public BBInfo { 986 uint64_t CountValue = 0; 987 bool CountValid; 988 int32_t UnknownCountInEdge = 0; 989 int32_t UnknownCountOutEdge = 0; 990 DirectEdges InEdges; 991 DirectEdges OutEdges; 992 993 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 994 995 UseBBInfo(unsigned IX, uint64_t C) 996 : BBInfo(IX), CountValue(C), CountValid(true) {} 997 998 // Set the profile count value for this BB. 999 void setBBInfoCount(uint64_t Value) { 1000 CountValue = Value; 1001 CountValid = true; 1002 } 1003 1004 // Return the information string of this object. 1005 const std::string infoString() const { 1006 if (!CountValid) 1007 return BBInfo::infoString(); 1008 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 1009 } 1010 1011 // Add an OutEdge and update the edge count. 1012 void addOutEdge(PGOUseEdge *E) { 1013 OutEdges.push_back(E); 1014 UnknownCountOutEdge++; 1015 } 1016 1017 // Add an InEdge and update the edge count. 1018 void addInEdge(PGOUseEdge *E) { 1019 InEdges.push_back(E); 1020 UnknownCountInEdge++; 1021 } 1022 }; 1023 1024 } // end anonymous namespace 1025 1026 // Sum up the count values for all the edges. 1027 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 1028 uint64_t Total = 0; 1029 for (auto &E : Edges) { 1030 if (E->Removed) 1031 continue; 1032 Total += E->CountValue; 1033 } 1034 return Total; 1035 } 1036 1037 namespace { 1038 1039 class PGOUseFunc { 1040 public: 1041 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 1042 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1043 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1044 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry) 1045 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1046 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, 1047 InstrumentFuncEntry), 1048 FreqAttr(FFA_Normal), IsCS(IsCS) {} 1049 1050 // Read counts for the instrumented BB from profile. 1051 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1052 bool &AllMinusOnes); 1053 1054 // Populate the counts for all BBs. 1055 void populateCounters(); 1056 1057 // Set the branch weights based on the count values. 1058 void setBranchWeights(); 1059 1060 // Annotate the value profile call sites for all value kind. 1061 void annotateValueSites(); 1062 1063 // Annotate the value profile call sites for one value kind. 1064 void annotateValueSites(uint32_t Kind); 1065 1066 // Annotate the irreducible loop header weights. 1067 void annotateIrrLoopHeaderWeights(); 1068 1069 // The hotness of the function from the profile count. 1070 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1071 1072 // Return the function hotness from the profile. 1073 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1074 1075 // Return the function hash. 1076 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1077 1078 // Return the profile record for this function; 1079 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1080 1081 // Return the auxiliary BB information. 1082 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 1083 return FuncInfo.getBBInfo(BB); 1084 } 1085 1086 // Return the auxiliary BB information if available. 1087 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 1088 return FuncInfo.findBBInfo(BB); 1089 } 1090 1091 Function &getFunc() const { return F; } 1092 1093 void dumpInfo(std::string Str = "") const { 1094 FuncInfo.dumpInfo(Str); 1095 } 1096 1097 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1098 private: 1099 Function &F; 1100 Module *M; 1101 BlockFrequencyInfo *BFI; 1102 ProfileSummaryInfo *PSI; 1103 1104 // This member stores the shared information with class PGOGenFunc. 1105 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 1106 1107 // The maximum count value in the profile. This is only used in PGO use 1108 // compilation. 1109 uint64_t ProgramMaxCount; 1110 1111 // Position of counter that remains to be read. 1112 uint32_t CountPosition = 0; 1113 1114 // Total size of the profile count for this function. 1115 uint32_t ProfileCountSize = 0; 1116 1117 // ProfileRecord for this function. 1118 InstrProfRecord ProfileRecord; 1119 1120 // Function hotness info derived from profile. 1121 FuncFreqAttr FreqAttr; 1122 1123 // Is to use the context sensitive profile. 1124 bool IsCS; 1125 1126 // Find the Instrumented BB and set the value. Return false on error. 1127 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1128 1129 // Set the edge counter value for the unknown edge -- there should be only 1130 // one unknown edge. 1131 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1132 1133 // Return FuncName string; 1134 const std::string getFuncName() const { return FuncInfo.FuncName; } 1135 1136 // Set the hot/cold inline hints based on the count values. 1137 // FIXME: This function should be removed once the functionality in 1138 // the inliner is implemented. 1139 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1140 if (PSI->isHotCount(EntryCount)) 1141 FreqAttr = FFA_Hot; 1142 else if (PSI->isColdCount(MaxCount)) 1143 FreqAttr = FFA_Cold; 1144 } 1145 }; 1146 1147 } // end anonymous namespace 1148 1149 // Visit all the edges and assign the count value for the instrumented 1150 // edges and the BB. Return false on error. 1151 bool PGOUseFunc::setInstrumentedCounts( 1152 const std::vector<uint64_t> &CountFromProfile) { 1153 1154 std::vector<BasicBlock *> InstrumentBBs; 1155 FuncInfo.getInstrumentBBs(InstrumentBBs); 1156 unsigned NumCounters = 1157 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1158 // The number of counters here should match the number of counters 1159 // in profile. Return if they mismatch. 1160 if (NumCounters != CountFromProfile.size()) { 1161 return false; 1162 } 1163 auto *FuncEntry = &*F.begin(); 1164 1165 // Set the profile count to the Instrumented BBs. 1166 uint32_t I = 0; 1167 for (BasicBlock *InstrBB : InstrumentBBs) { 1168 uint64_t CountValue = CountFromProfile[I++]; 1169 UseBBInfo &Info = getBBInfo(InstrBB); 1170 // If we reach here, we know that we have some nonzero count 1171 // values in this function. The entry count should not be 0. 1172 // Fix it if necessary. 1173 if (InstrBB == FuncEntry && CountValue == 0) 1174 CountValue = 1; 1175 Info.setBBInfoCount(CountValue); 1176 } 1177 ProfileCountSize = CountFromProfile.size(); 1178 CountPosition = I; 1179 1180 // Set the edge count and update the count of unknown edges for BBs. 1181 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1182 E->setEdgeCount(Value); 1183 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1184 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1185 }; 1186 1187 // Set the profile count the Instrumented edges. There are BBs that not in 1188 // MST but not instrumented. Need to set the edge count value so that we can 1189 // populate the profile counts later. 1190 for (auto &E : FuncInfo.MST.AllEdges) { 1191 if (E->Removed || E->InMST) 1192 continue; 1193 const BasicBlock *SrcBB = E->SrcBB; 1194 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 1195 1196 // If only one out-edge, the edge profile count should be the same as BB 1197 // profile count. 1198 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1199 setEdgeCount(E.get(), SrcInfo.CountValue); 1200 else { 1201 const BasicBlock *DestBB = E->DestBB; 1202 UseBBInfo &DestInfo = getBBInfo(DestBB); 1203 // If only one in-edge, the edge profile count should be the same as BB 1204 // profile count. 1205 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1206 setEdgeCount(E.get(), DestInfo.CountValue); 1207 } 1208 if (E->CountValid) 1209 continue; 1210 // E's count should have been set from profile. If not, this meenas E skips 1211 // the instrumentation. We set the count to 0. 1212 setEdgeCount(E.get(), 0); 1213 } 1214 return true; 1215 } 1216 1217 // Set the count value for the unknown edge. There should be one and only one 1218 // unknown edge in Edges vector. 1219 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1220 for (auto &E : Edges) { 1221 if (E->CountValid) 1222 continue; 1223 E->setEdgeCount(Value); 1224 1225 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1226 getBBInfo(E->DestBB).UnknownCountInEdge--; 1227 return; 1228 } 1229 llvm_unreachable("Cannot find the unknown count edge"); 1230 } 1231 1232 // Read the profile from ProfileFileName and assign the value to the 1233 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1234 // Return true if the profile are successfully read, and false on errors. 1235 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1236 bool &AllMinusOnes) { 1237 auto &Ctx = M->getContext(); 1238 Expected<InstrProfRecord> Result = 1239 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 1240 if (Error E = Result.takeError()) { 1241 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1242 auto Err = IPE.get(); 1243 bool SkipWarning = false; 1244 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1245 << FuncInfo.FuncName << ": "); 1246 if (Err == instrprof_error::unknown_function) { 1247 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1248 SkipWarning = !PGOWarnMissing; 1249 LLVM_DEBUG(dbgs() << "unknown function"); 1250 } else if (Err == instrprof_error::hash_mismatch || 1251 Err == instrprof_error::malformed) { 1252 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1253 SkipWarning = 1254 NoPGOWarnMismatch || 1255 (NoPGOWarnMismatchComdat && 1256 (F.hasComdat() || 1257 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1258 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 1259 } 1260 1261 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1262 if (SkipWarning) 1263 return; 1264 1265 std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + 1266 std::string(" Hash = ") + 1267 std::to_string(FuncInfo.FunctionHash); 1268 1269 Ctx.diagnose( 1270 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1271 }); 1272 return false; 1273 } 1274 ProfileRecord = std::move(Result.get()); 1275 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1276 1277 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1278 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1279 AllMinusOnes = (CountFromProfile.size() > 0); 1280 uint64_t ValueSum = 0; 1281 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1282 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1283 ValueSum += CountFromProfile[I]; 1284 if (CountFromProfile[I] != (uint64_t)-1) 1285 AllMinusOnes = false; 1286 } 1287 AllZeros = (ValueSum == 0); 1288 1289 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1290 1291 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1292 getBBInfo(nullptr).UnknownCountInEdge = 2; 1293 1294 if (!setInstrumentedCounts(CountFromProfile)) { 1295 LLVM_DEBUG( 1296 dbgs() << "Inconsistent number of counts, skipping this function"); 1297 Ctx.diagnose(DiagnosticInfoPGOProfile( 1298 M->getName().data(), 1299 Twine("Inconsistent number of counts in ") + F.getName().str() 1300 + Twine(": the profile may be stale or there is a function name collision."), 1301 DS_Warning)); 1302 return false; 1303 } 1304 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1305 return true; 1306 } 1307 1308 // Populate the counters from instrumented BBs to all BBs. 1309 // In the end of this operation, all BBs should have a valid count value. 1310 void PGOUseFunc::populateCounters() { 1311 bool Changes = true; 1312 unsigned NumPasses = 0; 1313 while (Changes) { 1314 NumPasses++; 1315 Changes = false; 1316 1317 // For efficient traversal, it's better to start from the end as most 1318 // of the instrumented edges are at the end. 1319 for (auto &BB : reverse(F)) { 1320 UseBBInfo *Count = findBBInfo(&BB); 1321 if (Count == nullptr) 1322 continue; 1323 if (!Count->CountValid) { 1324 if (Count->UnknownCountOutEdge == 0) { 1325 Count->CountValue = sumEdgeCount(Count->OutEdges); 1326 Count->CountValid = true; 1327 Changes = true; 1328 } else if (Count->UnknownCountInEdge == 0) { 1329 Count->CountValue = sumEdgeCount(Count->InEdges); 1330 Count->CountValid = true; 1331 Changes = true; 1332 } 1333 } 1334 if (Count->CountValid) { 1335 if (Count->UnknownCountOutEdge == 1) { 1336 uint64_t Total = 0; 1337 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1338 // If the one of the successor block can early terminate (no-return), 1339 // we can end up with situation where out edge sum count is larger as 1340 // the source BB's count is collected by a post-dominated block. 1341 if (Count->CountValue > OutSum) 1342 Total = Count->CountValue - OutSum; 1343 setEdgeCount(Count->OutEdges, Total); 1344 Changes = true; 1345 } 1346 if (Count->UnknownCountInEdge == 1) { 1347 uint64_t Total = 0; 1348 uint64_t InSum = sumEdgeCount(Count->InEdges); 1349 if (Count->CountValue > InSum) 1350 Total = Count->CountValue - InSum; 1351 setEdgeCount(Count->InEdges, Total); 1352 Changes = true; 1353 } 1354 } 1355 } 1356 } 1357 1358 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1359 #ifndef NDEBUG 1360 // Assert every BB has a valid counter. 1361 for (auto &BB : F) { 1362 auto BI = findBBInfo(&BB); 1363 if (BI == nullptr) 1364 continue; 1365 assert(BI->CountValid && "BB count is not valid"); 1366 } 1367 #endif 1368 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1369 uint64_t FuncMaxCount = FuncEntryCount; 1370 for (auto &BB : F) { 1371 auto BI = findBBInfo(&BB); 1372 if (BI == nullptr) 1373 continue; 1374 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1375 } 1376 1377 // Fix the obviously inconsistent entry count. 1378 if (FuncMaxCount > 0 && FuncEntryCount == 0) 1379 FuncEntryCount = 1; 1380 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1381 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1382 1383 // Now annotate select instructions 1384 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1385 assert(CountPosition == ProfileCountSize); 1386 1387 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1388 } 1389 1390 // Assign the scaled count values to the BB with multiple out edges. 1391 void PGOUseFunc::setBranchWeights() { 1392 // Generate MD_prof metadata for every branch instruction. 1393 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1394 << " IsCS=" << IsCS << "\n"); 1395 for (auto &BB : F) { 1396 Instruction *TI = BB.getTerminator(); 1397 if (TI->getNumSuccessors() < 2) 1398 continue; 1399 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1400 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI))) 1401 continue; 1402 1403 if (getBBInfo(&BB).CountValue == 0) 1404 continue; 1405 1406 // We have a non-zero Branch BB. 1407 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1408 unsigned Size = BBCountInfo.OutEdges.size(); 1409 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1410 uint64_t MaxCount = 0; 1411 for (unsigned s = 0; s < Size; s++) { 1412 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1413 const BasicBlock *SrcBB = E->SrcBB; 1414 const BasicBlock *DestBB = E->DestBB; 1415 if (DestBB == nullptr) 1416 continue; 1417 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1418 uint64_t EdgeCount = E->CountValue; 1419 if (EdgeCount > MaxCount) 1420 MaxCount = EdgeCount; 1421 EdgeCounts[SuccNum] = EdgeCount; 1422 } 1423 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1424 } 1425 } 1426 1427 static bool isIndirectBrTarget(BasicBlock *BB) { 1428 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 1429 if (isa<IndirectBrInst>((*PI)->getTerminator())) 1430 return true; 1431 } 1432 return false; 1433 } 1434 1435 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1436 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1437 // Find irr loop headers 1438 for (auto &BB : F) { 1439 // As a heuristic also annotate indrectbr targets as they have a high chance 1440 // to become an irreducible loop header after the indirectbr tail 1441 // duplication. 1442 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1443 Instruction *TI = BB.getTerminator(); 1444 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1445 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1446 } 1447 } 1448 } 1449 1450 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1451 Module *M = F.getParent(); 1452 IRBuilder<> Builder(&SI); 1453 Type *Int64Ty = Builder.getInt64Ty(); 1454 Type *I8PtrTy = Builder.getInt8PtrTy(); 1455 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1456 Builder.CreateCall( 1457 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1458 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1459 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1460 Builder.getInt32(*CurCtrIdx), Step}); 1461 ++(*CurCtrIdx); 1462 } 1463 1464 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1465 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1466 assert(*CurCtrIdx < CountFromProfile.size() && 1467 "Out of bound access of counters"); 1468 uint64_t SCounts[2]; 1469 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1470 ++(*CurCtrIdx); 1471 uint64_t TotalCount = 0; 1472 auto BI = UseFunc->findBBInfo(SI.getParent()); 1473 if (BI != nullptr) 1474 TotalCount = BI->CountValue; 1475 // False Count 1476 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1477 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1478 if (MaxCount) 1479 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1480 } 1481 1482 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1483 if (!PGOInstrSelect) 1484 return; 1485 // FIXME: do not handle this yet. 1486 if (SI.getCondition()->getType()->isVectorTy()) 1487 return; 1488 1489 switch (Mode) { 1490 case VM_counting: 1491 NSIs++; 1492 return; 1493 case VM_instrument: 1494 instrumentOneSelectInst(SI); 1495 return; 1496 case VM_annotate: 1497 annotateOneSelectInst(SI); 1498 return; 1499 } 1500 1501 llvm_unreachable("Unknown visiting mode"); 1502 } 1503 1504 // Traverse all valuesites and annotate the instructions for all value kind. 1505 void PGOUseFunc::annotateValueSites() { 1506 if (DisableValueProfiling) 1507 return; 1508 1509 // Create the PGOFuncName meta data. 1510 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1511 1512 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1513 annotateValueSites(Kind); 1514 } 1515 1516 // Annotate the instructions for a specific value kind. 1517 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1518 assert(Kind <= IPVK_Last); 1519 unsigned ValueSiteIndex = 0; 1520 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1521 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1522 if (NumValueSites != ValueSites.size()) { 1523 auto &Ctx = M->getContext(); 1524 Ctx.diagnose(DiagnosticInfoPGOProfile( 1525 M->getName().data(), 1526 Twine("Inconsistent number of value sites for ") + 1527 Twine(ValueProfKindDescr[Kind]) + 1528 Twine(" profiling in \"") + F.getName().str() + 1529 Twine("\", possibly due to the use of a stale profile."), 1530 DS_Warning)); 1531 return; 1532 } 1533 1534 for (VPCandidateInfo &I : ValueSites) { 1535 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1536 << "): Index = " << ValueSiteIndex << " out of " 1537 << NumValueSites << "\n"); 1538 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1539 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1540 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1541 : MaxNumAnnotations); 1542 ValueSiteIndex++; 1543 } 1544 } 1545 1546 // Collect the set of members for each Comdat in module M and store 1547 // in ComdatMembers. 1548 static void collectComdatMembers( 1549 Module &M, 1550 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1551 if (!DoComdatRenaming) 1552 return; 1553 for (Function &F : M) 1554 if (Comdat *C = F.getComdat()) 1555 ComdatMembers.insert(std::make_pair(C, &F)); 1556 for (GlobalVariable &GV : M.globals()) 1557 if (Comdat *C = GV.getComdat()) 1558 ComdatMembers.insert(std::make_pair(C, &GV)); 1559 for (GlobalAlias &GA : M.aliases()) 1560 if (Comdat *C = GA.getComdat()) 1561 ComdatMembers.insert(std::make_pair(C, &GA)); 1562 } 1563 1564 static bool InstrumentAllFunctions( 1565 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1566 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1567 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1568 // For the context-sensitve instrumentation, we should have a separated pass 1569 // (before LTO/ThinLTO linking) to create these variables. 1570 if (!IsCS) 1571 createIRLevelProfileFlagVar(M, /* IsCS */ false, PGOInstrumentEntry); 1572 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1573 collectComdatMembers(M, ComdatMembers); 1574 1575 for (auto &F : M) { 1576 if (F.isDeclaration()) 1577 continue; 1578 auto &TLI = LookupTLI(F); 1579 auto *BPI = LookupBPI(F); 1580 auto *BFI = LookupBFI(F); 1581 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); 1582 } 1583 return true; 1584 } 1585 1586 PreservedAnalyses 1587 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { 1588 createProfileFileNameVar(M, CSInstrName); 1589 createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry); 1590 return PreservedAnalyses::all(); 1591 } 1592 1593 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { 1594 if (skipModule(M)) 1595 return false; 1596 1597 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 1598 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 1599 }; 1600 auto LookupBPI = [this](Function &F) { 1601 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1602 }; 1603 auto LookupBFI = [this](Function &F) { 1604 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1605 }; 1606 return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS); 1607 } 1608 1609 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1610 ModuleAnalysisManager &AM) { 1611 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1612 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1613 return FAM.getResult<TargetLibraryAnalysis>(F); 1614 }; 1615 auto LookupBPI = [&FAM](Function &F) { 1616 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1617 }; 1618 auto LookupBFI = [&FAM](Function &F) { 1619 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1620 }; 1621 1622 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) 1623 return PreservedAnalyses::all(); 1624 1625 return PreservedAnalyses::none(); 1626 } 1627 1628 static bool annotateAllFunctions( 1629 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1630 function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1631 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1632 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1633 ProfileSummaryInfo *PSI, bool IsCS) { 1634 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1635 auto &Ctx = M.getContext(); 1636 // Read the counter array from file. 1637 auto ReaderOrErr = 1638 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 1639 if (Error E = ReaderOrErr.takeError()) { 1640 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1641 Ctx.diagnose( 1642 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1643 }); 1644 return false; 1645 } 1646 1647 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1648 std::move(ReaderOrErr.get()); 1649 if (!PGOReader) { 1650 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1651 StringRef("Cannot get PGOReader"))); 1652 return false; 1653 } 1654 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 1655 return false; 1656 1657 // TODO: might need to change the warning once the clang option is finalized. 1658 if (!PGOReader->isIRLevelProfile()) { 1659 Ctx.diagnose(DiagnosticInfoPGOProfile( 1660 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1661 return false; 1662 } 1663 1664 // Add the profile summary (read from the header of the indexed summary) here 1665 // so that we can use it below when reading counters (which checks if the 1666 // function should be marked with a cold or inlinehint attribute). 1667 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 1668 IsCS ? ProfileSummary::PSK_CSInstr 1669 : ProfileSummary::PSK_Instr); 1670 PSI->refresh(); 1671 1672 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1673 collectComdatMembers(M, ComdatMembers); 1674 std::vector<Function *> HotFunctions; 1675 std::vector<Function *> ColdFunctions; 1676 1677 // If the profile marked as always instrument the entry BB, do the 1678 // same. Note this can be overwritten by the internal option in CFGMST.h 1679 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); 1680 if (PGOInstrumentEntry.getNumOccurrences() > 0) 1681 InstrumentFuncEntry = PGOInstrumentEntry; 1682 for (auto &F : M) { 1683 if (F.isDeclaration()) 1684 continue; 1685 auto &TLI = LookupTLI(F); 1686 auto *BPI = LookupBPI(F); 1687 auto *BFI = LookupBFI(F); 1688 // Split indirectbr critical edges here before computing the MST rather than 1689 // later in getInstrBB() to avoid invalidating it. 1690 SplitIndirectBrCriticalEdges(F, BPI, BFI); 1691 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, 1692 InstrumentFuncEntry); 1693 // When AllMinusOnes is true, it means the profile for the function 1694 // is unrepresentative and this function is actually hot. Set the 1695 // entry count of the function to be multiple times of hot threshold 1696 // and drop all its internal counters. 1697 bool AllMinusOnes = false; 1698 bool AllZeros = false; 1699 if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes)) 1700 continue; 1701 if (AllZeros) { 1702 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 1703 if (Func.getProgramMaxCount() != 0) 1704 ColdFunctions.push_back(&F); 1705 continue; 1706 } 1707 const unsigned MultiplyFactor = 3; 1708 if (AllMinusOnes) { 1709 uint64_t HotThreshold = PSI->getHotCountThreshold(); 1710 if (HotThreshold) 1711 F.setEntryCount( 1712 ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real)); 1713 HotFunctions.push_back(&F); 1714 continue; 1715 } 1716 Func.populateCounters(); 1717 Func.setBranchWeights(); 1718 Func.annotateValueSites(); 1719 Func.annotateIrrLoopHeaderWeights(); 1720 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1721 if (FreqAttr == PGOUseFunc::FFA_Cold) 1722 ColdFunctions.push_back(&F); 1723 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1724 HotFunctions.push_back(&F); 1725 if (PGOViewCounts != PGOVCT_None && 1726 (ViewBlockFreqFuncName.empty() || 1727 F.getName().equals(ViewBlockFreqFuncName))) { 1728 LoopInfo LI{DominatorTree(F)}; 1729 std::unique_ptr<BranchProbabilityInfo> NewBPI = 1730 std::make_unique<BranchProbabilityInfo>(F, LI); 1731 std::unique_ptr<BlockFrequencyInfo> NewBFI = 1732 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 1733 if (PGOViewCounts == PGOVCT_Graph) 1734 NewBFI->view(); 1735 else if (PGOViewCounts == PGOVCT_Text) { 1736 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 1737 NewBFI->print(dbgs()); 1738 } 1739 } 1740 if (PGOViewRawCounts != PGOVCT_None && 1741 (ViewBlockFreqFuncName.empty() || 1742 F.getName().equals(ViewBlockFreqFuncName))) { 1743 if (PGOViewRawCounts == PGOVCT_Graph) 1744 if (ViewBlockFreqFuncName.empty()) 1745 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1746 else 1747 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1748 else if (PGOViewRawCounts == PGOVCT_Text) { 1749 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 1750 Func.dumpInfo(); 1751 } 1752 } 1753 } 1754 1755 // Set function hotness attribute from the profile. 1756 // We have to apply these attributes at the end because their presence 1757 // can affect the BranchProbabilityInfo of any callers, resulting in an 1758 // inconsistent MST between prof-gen and prof-use. 1759 for (auto &F : HotFunctions) { 1760 F->addFnAttr(Attribute::InlineHint); 1761 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 1762 << "\n"); 1763 } 1764 for (auto &F : ColdFunctions) { 1765 F->addFnAttr(Attribute::Cold); 1766 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 1767 << "\n"); 1768 } 1769 return true; 1770 } 1771 1772 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 1773 std::string RemappingFilename, 1774 bool IsCS) 1775 : ProfileFileName(std::move(Filename)), 1776 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { 1777 if (!PGOTestProfileFile.empty()) 1778 ProfileFileName = PGOTestProfileFile; 1779 if (!PGOTestProfileRemappingFile.empty()) 1780 ProfileRemappingFileName = PGOTestProfileRemappingFile; 1781 } 1782 1783 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 1784 ModuleAnalysisManager &AM) { 1785 1786 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1787 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1788 return FAM.getResult<TargetLibraryAnalysis>(F); 1789 }; 1790 auto LookupBPI = [&FAM](Function &F) { 1791 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1792 }; 1793 auto LookupBFI = [&FAM](Function &F) { 1794 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1795 }; 1796 1797 auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 1798 1799 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 1800 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) 1801 return PreservedAnalyses::all(); 1802 1803 return PreservedAnalyses::none(); 1804 } 1805 1806 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { 1807 if (skipModule(M)) 1808 return false; 1809 1810 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 1811 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 1812 }; 1813 auto LookupBPI = [this](Function &F) { 1814 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1815 }; 1816 auto LookupBFI = [this](Function &F) { 1817 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1818 }; 1819 1820 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 1821 return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI, 1822 LookupBFI, PSI, IsCS); 1823 } 1824 1825 static std::string getSimpleNodeName(const BasicBlock *Node) { 1826 if (!Node->getName().empty()) 1827 return std::string(Node->getName()); 1828 1829 std::string SimpleNodeName; 1830 raw_string_ostream OS(SimpleNodeName); 1831 Node->printAsOperand(OS, false); 1832 return OS.str(); 1833 } 1834 1835 void llvm::setProfMetadata(Module *M, Instruction *TI, 1836 ArrayRef<uint64_t> EdgeCounts, 1837 uint64_t MaxCount) { 1838 MDBuilder MDB(M->getContext()); 1839 assert(MaxCount > 0 && "Bad max count"); 1840 uint64_t Scale = calculateCountScale(MaxCount); 1841 SmallVector<unsigned, 4> Weights; 1842 for (const auto &ECI : EdgeCounts) 1843 Weights.push_back(scaleBranchCount(ECI, Scale)); 1844 1845 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 1846 : Weights) { 1847 dbgs() << W << " "; 1848 } dbgs() << "\n";); 1849 1850 misexpect::verifyMisExpect(TI, Weights, TI->getContext()); 1851 1852 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1853 if (EmitBranchProbability) { 1854 std::string BrCondStr = getBranchCondString(TI); 1855 if (BrCondStr.empty()) 1856 return; 1857 1858 uint64_t WSum = 1859 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 1860 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 1861 uint64_t TotalCount = 1862 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 1863 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 1864 Scale = calculateCountScale(WSum); 1865 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 1866 scaleBranchCount(WSum, Scale)); 1867 std::string BranchProbStr; 1868 raw_string_ostream OS(BranchProbStr); 1869 OS << BP; 1870 OS << " (total count : " << TotalCount << ")"; 1871 OS.flush(); 1872 Function *F = TI->getParent()->getParent(); 1873 OptimizationRemarkEmitter ORE(F); 1874 ORE.emit([&]() { 1875 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 1876 << BrCondStr << " is true with probability : " << BranchProbStr; 1877 }); 1878 } 1879 } 1880 1881 namespace llvm { 1882 1883 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 1884 MDBuilder MDB(M->getContext()); 1885 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 1886 MDB.createIrrLoopHeaderWeight(Count)); 1887 } 1888 1889 template <> struct GraphTraits<PGOUseFunc *> { 1890 using NodeRef = const BasicBlock *; 1891 using ChildIteratorType = const_succ_iterator; 1892 using nodes_iterator = pointer_iterator<Function::const_iterator>; 1893 1894 static NodeRef getEntryNode(const PGOUseFunc *G) { 1895 return &G->getFunc().front(); 1896 } 1897 1898 static ChildIteratorType child_begin(const NodeRef N) { 1899 return succ_begin(N); 1900 } 1901 1902 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 1903 1904 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 1905 return nodes_iterator(G->getFunc().begin()); 1906 } 1907 1908 static nodes_iterator nodes_end(const PGOUseFunc *G) { 1909 return nodes_iterator(G->getFunc().end()); 1910 } 1911 }; 1912 1913 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 1914 explicit DOTGraphTraits(bool isSimple = false) 1915 : DefaultDOTGraphTraits(isSimple) {} 1916 1917 static std::string getGraphName(const PGOUseFunc *G) { 1918 return std::string(G->getFunc().getName()); 1919 } 1920 1921 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 1922 std::string Result; 1923 raw_string_ostream OS(Result); 1924 1925 OS << getSimpleNodeName(Node) << ":\\l"; 1926 UseBBInfo *BI = Graph->findBBInfo(Node); 1927 OS << "Count : "; 1928 if (BI && BI->CountValid) 1929 OS << BI->CountValue << "\\l"; 1930 else 1931 OS << "Unknown\\l"; 1932 1933 if (!PGOInstrSelect) 1934 return Result; 1935 1936 for (auto BI = Node->begin(); BI != Node->end(); ++BI) { 1937 auto *I = &*BI; 1938 if (!isa<SelectInst>(I)) 1939 continue; 1940 // Display scaled counts for SELECT instruction: 1941 OS << "SELECT : { T = "; 1942 uint64_t TC, FC; 1943 bool HasProf = I->extractProfMetadata(TC, FC); 1944 if (!HasProf) 1945 OS << "Unknown, F = Unknown }\\l"; 1946 else 1947 OS << TC << ", F = " << FC << " }\\l"; 1948 } 1949 return Result; 1950 } 1951 }; 1952 1953 } // end namespace llvm 1954