1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "CFGMST.h" 52 #include "ValueProfileCollector.h" 53 #include "llvm/ADT/APInt.h" 54 #include "llvm/ADT/ArrayRef.h" 55 #include "llvm/ADT/MapVector.h" 56 #include "llvm/ADT/STLExtras.h" 57 #include "llvm/ADT/SmallVector.h" 58 #include "llvm/ADT/Statistic.h" 59 #include "llvm/ADT/StringRef.h" 60 #include "llvm/ADT/Triple.h" 61 #include "llvm/ADT/Twine.h" 62 #include "llvm/ADT/iterator.h" 63 #include "llvm/ADT/iterator_range.h" 64 #include "llvm/Analysis/BlockFrequencyInfo.h" 65 #include "llvm/Analysis/BranchProbabilityInfo.h" 66 #include "llvm/Analysis/CFG.h" 67 #include "llvm/Analysis/EHPersonalities.h" 68 #include "llvm/Analysis/LoopInfo.h" 69 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 70 #include "llvm/Analysis/ProfileSummaryInfo.h" 71 #include "llvm/IR/Attributes.h" 72 #include "llvm/IR/BasicBlock.h" 73 #include "llvm/IR/CFG.h" 74 #include "llvm/IR/CallSite.h" 75 #include "llvm/IR/Comdat.h" 76 #include "llvm/IR/Constant.h" 77 #include "llvm/IR/Constants.h" 78 #include "llvm/IR/DiagnosticInfo.h" 79 #include "llvm/IR/Dominators.h" 80 #include "llvm/IR/Function.h" 81 #include "llvm/IR/GlobalAlias.h" 82 #include "llvm/IR/GlobalValue.h" 83 #include "llvm/IR/GlobalVariable.h" 84 #include "llvm/IR/IRBuilder.h" 85 #include "llvm/IR/InstVisitor.h" 86 #include "llvm/IR/InstrTypes.h" 87 #include "llvm/IR/Instruction.h" 88 #include "llvm/IR/Instructions.h" 89 #include "llvm/IR/IntrinsicInst.h" 90 #include "llvm/IR/Intrinsics.h" 91 #include "llvm/IR/LLVMContext.h" 92 #include "llvm/IR/MDBuilder.h" 93 #include "llvm/IR/Module.h" 94 #include "llvm/IR/PassManager.h" 95 #include "llvm/IR/ProfileSummary.h" 96 #include "llvm/IR/Type.h" 97 #include "llvm/IR/Value.h" 98 #include "llvm/InitializePasses.h" 99 #include "llvm/Pass.h" 100 #include "llvm/ProfileData/InstrProf.h" 101 #include "llvm/ProfileData/InstrProfReader.h" 102 #include "llvm/Support/BranchProbability.h" 103 #include "llvm/Support/CRC.h" 104 #include "llvm/Support/Casting.h" 105 #include "llvm/Support/CommandLine.h" 106 #include "llvm/Support/DOTGraphTraits.h" 107 #include "llvm/Support/Debug.h" 108 #include "llvm/Support/Error.h" 109 #include "llvm/Support/ErrorHandling.h" 110 #include "llvm/Support/GraphWriter.h" 111 #include "llvm/Support/raw_ostream.h" 112 #include "llvm/Transforms/Instrumentation.h" 113 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 114 #include "llvm/Transforms/Utils/MisExpect.h" 115 #include <algorithm> 116 #include <cassert> 117 #include <cstdint> 118 #include <memory> 119 #include <numeric> 120 #include <string> 121 #include <unordered_map> 122 #include <utility> 123 #include <vector> 124 125 using namespace llvm; 126 using ProfileCount = Function::ProfileCount; 127 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 128 129 #define DEBUG_TYPE "pgo-instrumentation" 130 131 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 132 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 133 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 134 STATISTIC(NumOfPGOEdge, "Number of edges."); 135 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 136 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 137 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 138 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 139 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 140 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 141 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 142 STATISTIC(NumOfCSPGOSelectInsts, 143 "Number of select instruction instrumented in CSPGO."); 144 STATISTIC(NumOfCSPGOMemIntrinsics, 145 "Number of mem intrinsics instrumented in CSPGO."); 146 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 147 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 148 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 149 STATISTIC(NumOfCSPGOFunc, 150 "Number of functions having valid profile counts in CSPGO."); 151 STATISTIC(NumOfCSPGOMismatch, 152 "Number of functions having mismatch profile in CSPGO."); 153 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 154 155 // Command line option to specify the file to read profile from. This is 156 // mainly used for testing. 157 static cl::opt<std::string> 158 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 159 cl::value_desc("filename"), 160 cl::desc("Specify the path of profile data file. This is" 161 "mainly for test purpose.")); 162 static cl::opt<std::string> PGOTestProfileRemappingFile( 163 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 164 cl::value_desc("filename"), 165 cl::desc("Specify the path of profile remapping file. This is mainly for " 166 "test purpose.")); 167 168 // Command line option to disable value profiling. The default is false: 169 // i.e. value profiling is enabled by default. This is for debug purpose. 170 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 171 cl::Hidden, 172 cl::desc("Disable Value Profiling")); 173 174 // Command line option to set the maximum number of VP annotations to write to 175 // the metadata for a single indirect call callsite. 176 static cl::opt<unsigned> MaxNumAnnotations( 177 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 178 cl::desc("Max number of annotations for a single indirect " 179 "call callsite")); 180 181 // Command line option to set the maximum number of value annotations 182 // to write to the metadata for a single memop intrinsic. 183 static cl::opt<unsigned> MaxNumMemOPAnnotations( 184 "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, 185 cl::desc("Max number of preicise value annotations for a single memop" 186 "intrinsic")); 187 188 // Command line option to control appending FunctionHash to the name of a COMDAT 189 // function. This is to avoid the hash mismatch caused by the preinliner. 190 static cl::opt<bool> DoComdatRenaming( 191 "do-comdat-renaming", cl::init(false), cl::Hidden, 192 cl::desc("Append function hash to the name of COMDAT function to avoid " 193 "function hash mismatch due to the preinliner")); 194 195 // Command line option to enable/disable the warning about missing profile 196 // information. 197 static cl::opt<bool> 198 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 199 cl::desc("Use this option to turn on/off " 200 "warnings about missing profile data for " 201 "functions.")); 202 203 // Command line option to enable/disable the warning about a hash mismatch in 204 // the profile data. 205 static cl::opt<bool> 206 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 207 cl::desc("Use this option to turn off/on " 208 "warnings about profile cfg mismatch.")); 209 210 // Command line option to enable/disable the warning about a hash mismatch in 211 // the profile data for Comdat functions, which often turns out to be false 212 // positive due to the pre-instrumentation inline. 213 static cl::opt<bool> 214 NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), 215 cl::Hidden, 216 cl::desc("The option is used to turn on/off " 217 "warnings about hash mismatch for comdat " 218 "functions.")); 219 220 // Command line option to enable/disable select instruction instrumentation. 221 static cl::opt<bool> 222 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 223 cl::desc("Use this option to turn on/off SELECT " 224 "instruction instrumentation. ")); 225 226 // Command line option to turn on CFG dot or text dump of raw profile counts 227 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 228 "pgo-view-raw-counts", cl::Hidden, 229 cl::desc("A boolean option to show CFG dag or text " 230 "with raw profile counts from " 231 "profile data. See also option " 232 "-pgo-view-counts. To limit graph " 233 "display to only one function, use " 234 "filtering option -view-bfi-func-name."), 235 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 236 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 237 clEnumValN(PGOVCT_Text, "text", "show in text."))); 238 239 // Command line option to enable/disable memop intrinsic call.size profiling. 240 static cl::opt<bool> 241 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 242 cl::desc("Use this option to turn on/off " 243 "memory intrinsic size profiling.")); 244 245 // Emit branch probability as optimization remarks. 246 static cl::opt<bool> 247 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 248 cl::desc("When this option is on, the annotated " 249 "branch probability will be emitted as " 250 "optimization remarks: -{Rpass|" 251 "pass-remarks}=pgo-instrumentation")); 252 253 // Command line option to turn on CFG dot dump after profile annotation. 254 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 255 extern cl::opt<PGOViewCountsType> PGOViewCounts; 256 257 // Command line option to specify the name of the function for CFG dump 258 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 259 extern cl::opt<std::string> ViewBlockFreqFuncName; 260 261 // Return a string describing the branch condition that can be 262 // used in static branch probability heuristics: 263 static std::string getBranchCondString(Instruction *TI) { 264 BranchInst *BI = dyn_cast<BranchInst>(TI); 265 if (!BI || !BI->isConditional()) 266 return std::string(); 267 268 Value *Cond = BI->getCondition(); 269 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 270 if (!CI) 271 return std::string(); 272 273 std::string result; 274 raw_string_ostream OS(result); 275 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 276 CI->getOperand(0)->getType()->print(OS, true); 277 278 Value *RHS = CI->getOperand(1); 279 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 280 if (CV) { 281 if (CV->isZero()) 282 OS << "_Zero"; 283 else if (CV->isOne()) 284 OS << "_One"; 285 else if (CV->isMinusOne()) 286 OS << "_MinusOne"; 287 else 288 OS << "_Const"; 289 } 290 OS.flush(); 291 return result; 292 } 293 294 static const char *ValueProfKindDescr[] = { 295 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 296 #include "llvm/ProfileData/InstrProfData.inc" 297 }; 298 299 namespace { 300 301 /// The select instruction visitor plays three roles specified 302 /// by the mode. In \c VM_counting mode, it simply counts the number of 303 /// select instructions. In \c VM_instrument mode, it inserts code to count 304 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 305 /// it reads the profile data and annotate the select instruction with metadata. 306 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 307 class PGOUseFunc; 308 309 /// Instruction Visitor class to visit select instructions. 310 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 311 Function &F; 312 unsigned NSIs = 0; // Number of select instructions instrumented. 313 VisitMode Mode = VM_counting; // Visiting mode. 314 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 315 unsigned TotalNumCtrs = 0; // Total number of counters 316 GlobalVariable *FuncNameVar = nullptr; 317 uint64_t FuncHash = 0; 318 PGOUseFunc *UseFunc = nullptr; 319 320 SelectInstVisitor(Function &Func) : F(Func) {} 321 322 void countSelects(Function &Func) { 323 NSIs = 0; 324 Mode = VM_counting; 325 visit(Func); 326 } 327 328 // Visit the IR stream and instrument all select instructions. \p 329 // Ind is a pointer to the counter index variable; \p TotalNC 330 // is the total number of counters; \p FNV is the pointer to the 331 // PGO function name var; \p FHash is the function hash. 332 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 333 GlobalVariable *FNV, uint64_t FHash) { 334 Mode = VM_instrument; 335 CurCtrIdx = Ind; 336 TotalNumCtrs = TotalNC; 337 FuncHash = FHash; 338 FuncNameVar = FNV; 339 visit(Func); 340 } 341 342 // Visit the IR stream and annotate all select instructions. 343 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 344 Mode = VM_annotate; 345 UseFunc = UF; 346 CurCtrIdx = Ind; 347 visit(Func); 348 } 349 350 void instrumentOneSelectInst(SelectInst &SI); 351 void annotateOneSelectInst(SelectInst &SI); 352 353 // Visit \p SI instruction and perform tasks according to visit mode. 354 void visitSelectInst(SelectInst &SI); 355 356 // Return the number of select instructions. This needs be called after 357 // countSelects(). 358 unsigned getNumOfSelectInsts() const { return NSIs; } 359 }; 360 361 362 class PGOInstrumentationGenLegacyPass : public ModulePass { 363 public: 364 static char ID; 365 366 PGOInstrumentationGenLegacyPass(bool IsCS = false) 367 : ModulePass(ID), IsCS(IsCS) { 368 initializePGOInstrumentationGenLegacyPassPass( 369 *PassRegistry::getPassRegistry()); 370 } 371 372 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } 373 374 private: 375 // Is this is context-sensitive instrumentation. 376 bool IsCS; 377 bool runOnModule(Module &M) override; 378 379 void getAnalysisUsage(AnalysisUsage &AU) const override { 380 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 381 } 382 }; 383 384 class PGOInstrumentationUseLegacyPass : public ModulePass { 385 public: 386 static char ID; 387 388 // Provide the profile filename as the parameter. 389 PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) 390 : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { 391 if (!PGOTestProfileFile.empty()) 392 ProfileFileName = PGOTestProfileFile; 393 initializePGOInstrumentationUseLegacyPassPass( 394 *PassRegistry::getPassRegistry()); 395 } 396 397 StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } 398 399 private: 400 std::string ProfileFileName; 401 // Is this is context-sensitive instrumentation use. 402 bool IsCS; 403 404 bool runOnModule(Module &M) override; 405 406 void getAnalysisUsage(AnalysisUsage &AU) const override { 407 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 408 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 409 } 410 }; 411 412 class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { 413 public: 414 static char ID; 415 StringRef getPassName() const override { 416 return "PGOInstrumentationGenCreateVarPass"; 417 } 418 PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") 419 : ModulePass(ID), InstrProfileOutput(CSInstrName) { 420 initializePGOInstrumentationGenCreateVarLegacyPassPass( 421 *PassRegistry::getPassRegistry()); 422 } 423 424 private: 425 bool runOnModule(Module &M) override { 426 createProfileFileNameVar(M, InstrProfileOutput); 427 createIRLevelProfileFlagVar(M, true); 428 return false; 429 } 430 std::string InstrProfileOutput; 431 }; 432 433 } // end anonymous namespace 434 435 char PGOInstrumentationGenLegacyPass::ID = 0; 436 437 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 438 "PGO instrumentation.", false, false) 439 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 440 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 441 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 442 "PGO instrumentation.", false, false) 443 444 ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { 445 return new PGOInstrumentationGenLegacyPass(IsCS); 446 } 447 448 char PGOInstrumentationUseLegacyPass::ID = 0; 449 450 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 451 "Read PGO instrumentation profile.", false, false) 452 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 453 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 454 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 455 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 456 "Read PGO instrumentation profile.", false, false) 457 458 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, 459 bool IsCS) { 460 return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); 461 } 462 463 char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; 464 465 INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, 466 "pgo-instr-gen-create-var", 467 "Create PGO instrumentation version variable for CSPGO.", false, 468 false) 469 470 ModulePass * 471 llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { 472 return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName)); 473 } 474 475 namespace { 476 477 /// An MST based instrumentation for PGO 478 /// 479 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 480 /// in the function level. 481 struct PGOEdge { 482 // This class implements the CFG edges. Note the CFG can be a multi-graph. 483 // So there might be multiple edges with same SrcBB and DestBB. 484 const BasicBlock *SrcBB; 485 const BasicBlock *DestBB; 486 uint64_t Weight; 487 bool InMST = false; 488 bool Removed = false; 489 bool IsCritical = false; 490 491 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 492 : SrcBB(Src), DestBB(Dest), Weight(W) {} 493 494 // Return the information string of an edge. 495 const std::string infoString() const { 496 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 497 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 498 } 499 }; 500 501 // This class stores the auxiliary information for each BB. 502 struct BBInfo { 503 BBInfo *Group; 504 uint32_t Index; 505 uint32_t Rank = 0; 506 507 BBInfo(unsigned IX) : Group(this), Index(IX) {} 508 509 // Return the information string of this object. 510 const std::string infoString() const { 511 return (Twine("Index=") + Twine(Index)).str(); 512 } 513 514 // Empty function -- only applicable to UseBBInfo. 515 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 516 517 // Empty function -- only applicable to UseBBInfo. 518 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 519 }; 520 521 // This class implements the CFG edges. Note the CFG can be a multi-graph. 522 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 523 private: 524 Function &F; 525 526 // Is this is context-sensitive instrumentation. 527 bool IsCS; 528 529 // A map that stores the Comdat group in function F. 530 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 531 532 ValueProfileCollector VPC; 533 534 void computeCFGHash(); 535 void renameComdatFunction(); 536 537 public: 538 std::vector<std::vector<VPCandidateInfo>> ValueSites; 539 SelectInstVisitor SIVisitor; 540 std::string FuncName; 541 GlobalVariable *FuncNameVar; 542 543 // CFG hash value for this function. 544 uint64_t FunctionHash = 0; 545 546 // The Minimum Spanning Tree of function CFG. 547 CFGMST<Edge, BBInfo> MST; 548 549 // Collect all the BBs that will be instrumented, and store them in 550 // InstrumentBBs. 551 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 552 553 // Give an edge, find the BB that will be instrumented. 554 // Return nullptr if there is no BB to be instrumented. 555 BasicBlock *getInstrBB(Edge *E); 556 557 // Return the auxiliary BB information. 558 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 559 560 // Return the auxiliary BB information if available. 561 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 562 563 // Dump edges and BB information. 564 void dumpInfo(std::string Str = "") const { 565 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 566 Twine(FunctionHash) + "\t" + Str); 567 } 568 569 FuncPGOInstrumentation( 570 Function &Func, 571 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 572 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 573 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false) 574 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func), 575 ValueSites(IPVK_Last + 1), SIVisitor(Func), MST(F, BPI, BFI) { 576 // This should be done before CFG hash computation. 577 SIVisitor.countSelects(Func); 578 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 579 if (!IsCS) { 580 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 581 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 582 NumOfPGOBB += MST.BBInfos.size(); 583 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 584 } else { 585 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 586 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 587 NumOfCSPGOBB += MST.BBInfos.size(); 588 } 589 590 FuncName = getPGOFuncName(F); 591 computeCFGHash(); 592 if (!ComdatMembers.empty()) 593 renameComdatFunction(); 594 LLVM_DEBUG(dumpInfo("after CFGMST")); 595 596 for (auto &E : MST.AllEdges) { 597 if (E->Removed) 598 continue; 599 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 600 if (!E->InMST) 601 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 602 } 603 604 if (CreateGlobalVar) 605 FuncNameVar = createPGOFuncNameVar(F, FuncName); 606 } 607 }; 608 609 } // end anonymous namespace 610 611 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 612 // value of each BB in the CFG. The higher 32 bits record the number of edges. 613 template <class Edge, class BBInfo> 614 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 615 std::vector<uint8_t> Indexes; 616 JamCRC JC; 617 for (auto &BB : F) { 618 const Instruction *TI = BB.getTerminator(); 619 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 620 BasicBlock *Succ = TI->getSuccessor(I); 621 auto BI = findBBInfo(Succ); 622 if (BI == nullptr) 623 continue; 624 uint32_t Index = BI->Index; 625 for (int J = 0; J < 4; J++) 626 Indexes.push_back((uint8_t)(Index >> (J * 8))); 627 } 628 } 629 JC.update(Indexes); 630 631 // Hash format for context sensitive profile. Reserve 4 bits for other 632 // information. 633 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 634 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 635 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 636 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 637 // Reserve bit 60-63 for other information purpose. 638 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 639 if (IsCS) 640 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 641 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 642 << " CRC = " << JC.getCRC() 643 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 644 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 645 << ValueSites[IPVK_IndirectCallTarget].size() 646 << ", Hash = " << FunctionHash << "\n";); 647 } 648 649 // Check if we can safely rename this Comdat function. 650 static bool canRenameComdat( 651 Function &F, 652 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 653 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 654 return false; 655 656 // FIXME: Current only handle those Comdat groups that only containing one 657 // function and function aliases. 658 // (1) For a Comdat group containing multiple functions, we need to have a 659 // unique postfix based on the hashes for each function. There is a 660 // non-trivial code refactoring to do this efficiently. 661 // (2) Variables can not be renamed, so we can not rename Comdat function in a 662 // group including global vars. 663 Comdat *C = F.getComdat(); 664 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 665 if (dyn_cast<GlobalAlias>(CM.second)) 666 continue; 667 Function *FM = dyn_cast<Function>(CM.second); 668 if (FM != &F) 669 return false; 670 } 671 return true; 672 } 673 674 // Append the CFGHash to the Comdat function name. 675 template <class Edge, class BBInfo> 676 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 677 if (!canRenameComdat(F, ComdatMembers)) 678 return; 679 std::string OrigName = F.getName().str(); 680 std::string NewFuncName = 681 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 682 F.setName(Twine(NewFuncName)); 683 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 684 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 685 Comdat *NewComdat; 686 Module *M = F.getParent(); 687 // For AvailableExternallyLinkage functions, change the linkage to 688 // LinkOnceODR and put them into comdat. This is because after renaming, there 689 // is no backup external copy available for the function. 690 if (!F.hasComdat()) { 691 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 692 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 693 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 694 F.setComdat(NewComdat); 695 return; 696 } 697 698 // This function belongs to a single function Comdat group. 699 Comdat *OrigComdat = F.getComdat(); 700 std::string NewComdatName = 701 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 702 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 703 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 704 705 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 706 if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) { 707 // For aliases, change the name directly. 708 assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F); 709 std::string OrigGAName = GA->getName().str(); 710 GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash))); 711 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA); 712 continue; 713 } 714 // Must be a function. 715 Function *CF = dyn_cast<Function>(CM.second); 716 assert(CF); 717 CF->setComdat(NewComdat); 718 } 719 } 720 721 // Collect all the BBs that will be instruments and return them in 722 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. 723 template <class Edge, class BBInfo> 724 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 725 std::vector<BasicBlock *> &InstrumentBBs) { 726 // Use a worklist as we will update the vector during the iteration. 727 std::vector<Edge *> EdgeList; 728 EdgeList.reserve(MST.AllEdges.size()); 729 for (auto &E : MST.AllEdges) 730 EdgeList.push_back(E.get()); 731 732 for (auto &E : EdgeList) { 733 BasicBlock *InstrBB = getInstrBB(E); 734 if (InstrBB) 735 InstrumentBBs.push_back(InstrBB); 736 } 737 738 // Set up InEdges/OutEdges for all BBs. 739 for (auto &E : MST.AllEdges) { 740 if (E->Removed) 741 continue; 742 const BasicBlock *SrcBB = E->SrcBB; 743 const BasicBlock *DestBB = E->DestBB; 744 BBInfo &SrcInfo = getBBInfo(SrcBB); 745 BBInfo &DestInfo = getBBInfo(DestBB); 746 SrcInfo.addOutEdge(E.get()); 747 DestInfo.addInEdge(E.get()); 748 } 749 } 750 751 // Given a CFG E to be instrumented, find which BB to place the instrumented 752 // code. The function will split the critical edge if necessary. 753 template <class Edge, class BBInfo> 754 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 755 if (E->InMST || E->Removed) 756 return nullptr; 757 758 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 759 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 760 // For a fake edge, instrument the real BB. 761 if (SrcBB == nullptr) 762 return DestBB; 763 if (DestBB == nullptr) 764 return SrcBB; 765 766 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 767 // There are basic blocks (such as catchswitch) cannot be instrumented. 768 // If the returned first insertion point is the end of BB, skip this BB. 769 if (BB->getFirstInsertionPt() == BB->end()) 770 return nullptr; 771 return BB; 772 }; 773 774 // Instrument the SrcBB if it has a single successor, 775 // otherwise, the DestBB if this is not a critical edge. 776 Instruction *TI = SrcBB->getTerminator(); 777 if (TI->getNumSuccessors() <= 1) 778 return canInstrument(SrcBB); 779 if (!E->IsCritical) 780 return canInstrument(DestBB); 781 782 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 783 BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum); 784 if (!InstrBB) { 785 LLVM_DEBUG( 786 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 787 return nullptr; 788 } 789 // For a critical edge, we have to split. Instrument the newly 790 // created BB. 791 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 792 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 793 << " --> " << getBBInfo(DestBB).Index << "\n"); 794 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 795 MST.addEdge(SrcBB, InstrBB, 0); 796 // Second one: Add new edge of InstrBB->DestBB. 797 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 798 NewEdge1.InMST = true; 799 E->Removed = true; 800 801 return canInstrument(InstrBB); 802 } 803 804 // When generating value profiling calls on Windows routines that make use of 805 // handler funclets for exception processing an operand bundle needs to attached 806 // to the called function. This routine will set \p OpBundles to contain the 807 // funclet information, if any is needed, that should be placed on the generated 808 // value profiling call for the value profile candidate call. 809 static void 810 populateEHOperandBundle(VPCandidateInfo &Cand, 811 DenseMap<BasicBlock *, ColorVector> &BlockColors, 812 SmallVectorImpl<OperandBundleDef> &OpBundles) { 813 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 814 if (OrigCall && !isa<IntrinsicInst>(OrigCall)) { 815 // The instrumentation call should belong to the same funclet as a 816 // non-intrinsic call, so just copy the operand bundle, if any exists. 817 Optional<OperandBundleUse> ParentFunclet = 818 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 819 if (ParentFunclet) 820 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 821 } else { 822 // Intrinsics or other instructions do not get funclet information from the 823 // front-end. Need to use the BlockColors that was computed by the routine 824 // colorEHFunclets to determine whether a funclet is needed. 825 if (!BlockColors.empty()) { 826 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 827 assert(CV.size() == 1 && "non-unique color for block!"); 828 Instruction *EHPad = CV.front()->getFirstNonPHI(); 829 if (EHPad->isEHPad()) 830 OpBundles.emplace_back("funclet", EHPad); 831 } 832 } 833 } 834 835 // Visit all edge and instrument the edges not in MST, and do value profiling. 836 // Critical edges will be split. 837 static void instrumentOneFunc( 838 Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, 839 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 840 bool IsCS) { 841 // Split indirectbr critical edges here before computing the MST rather than 842 // later in getInstrBB() to avoid invalidating it. 843 SplitIndirectBrCriticalEdges(F, BPI, BFI); 844 845 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI, 846 BFI, IsCS); 847 std::vector<BasicBlock *> InstrumentBBs; 848 FuncInfo.getInstrumentBBs(InstrumentBBs); 849 unsigned NumCounters = 850 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 851 852 uint32_t I = 0; 853 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 854 for (auto *InstrBB : InstrumentBBs) { 855 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 856 assert(Builder.GetInsertPoint() != InstrBB->end() && 857 "Cannot get the Instrumentation point"); 858 Builder.CreateCall( 859 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 860 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 861 Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), 862 Builder.getInt32(I++)}); 863 } 864 865 // Now instrument select instructions: 866 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 867 FuncInfo.FunctionHash); 868 assert(I == NumCounters); 869 870 if (DisableValueProfiling) 871 return; 872 873 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 874 875 // Intrinsic function calls do not have funclet operand bundles needed for 876 // Windows exception handling attached to them. However, if value profiling is 877 // inserted for one of these calls, then a funclet value will need to be set 878 // on the instrumentation call based on the funclet coloring. 879 DenseMap<BasicBlock *, ColorVector> BlockColors; 880 if (F.hasPersonalityFn() && 881 isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 882 BlockColors = colorEHFunclets(F); 883 884 // For each VP Kind, walk the VP candidates and instrument each one. 885 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 886 unsigned SiteIndex = 0; 887 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 888 continue; 889 890 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 891 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 892 << " site: CallSite Index = " << SiteIndex << "\n"); 893 894 IRBuilder<> Builder(Cand.InsertPt); 895 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 896 "Cannot get the Instrumentation point"); 897 898 Value *ToProfile = nullptr; 899 if (Cand.V->getType()->isIntegerTy()) 900 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 901 else if (Cand.V->getType()->isPointerTy()) 902 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 903 assert(ToProfile && "value profiling Value is of unexpected type"); 904 905 SmallVector<OperandBundleDef, 1> OpBundles; 906 populateEHOperandBundle(Cand, BlockColors, OpBundles); 907 Builder.CreateCall( 908 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 909 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 910 Builder.getInt64(FuncInfo.FunctionHash), ToProfile, 911 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 912 OpBundles); 913 } 914 } // IPVK_First <= Kind <= IPVK_Last 915 } 916 917 namespace { 918 919 // This class represents a CFG edge in profile use compilation. 920 struct PGOUseEdge : public PGOEdge { 921 bool CountValid = false; 922 uint64_t CountValue = 0; 923 924 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 925 : PGOEdge(Src, Dest, W) {} 926 927 // Set edge count value 928 void setEdgeCount(uint64_t Value) { 929 CountValue = Value; 930 CountValid = true; 931 } 932 933 // Return the information string for this object. 934 const std::string infoString() const { 935 if (!CountValid) 936 return PGOEdge::infoString(); 937 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 938 .str(); 939 } 940 }; 941 942 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 943 944 // This class stores the auxiliary information for each BB. 945 struct UseBBInfo : public BBInfo { 946 uint64_t CountValue = 0; 947 bool CountValid; 948 int32_t UnknownCountInEdge = 0; 949 int32_t UnknownCountOutEdge = 0; 950 DirectEdges InEdges; 951 DirectEdges OutEdges; 952 953 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 954 955 UseBBInfo(unsigned IX, uint64_t C) 956 : BBInfo(IX), CountValue(C), CountValid(true) {} 957 958 // Set the profile count value for this BB. 959 void setBBInfoCount(uint64_t Value) { 960 CountValue = Value; 961 CountValid = true; 962 } 963 964 // Return the information string of this object. 965 const std::string infoString() const { 966 if (!CountValid) 967 return BBInfo::infoString(); 968 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 969 } 970 971 // Add an OutEdge and update the edge count. 972 void addOutEdge(PGOUseEdge *E) { 973 OutEdges.push_back(E); 974 UnknownCountOutEdge++; 975 } 976 977 // Add an InEdge and update the edge count. 978 void addInEdge(PGOUseEdge *E) { 979 InEdges.push_back(E); 980 UnknownCountInEdge++; 981 } 982 }; 983 984 } // end anonymous namespace 985 986 // Sum up the count values for all the edges. 987 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 988 uint64_t Total = 0; 989 for (auto &E : Edges) { 990 if (E->Removed) 991 continue; 992 Total += E->CountValue; 993 } 994 return Total; 995 } 996 997 namespace { 998 999 class PGOUseFunc { 1000 public: 1001 PGOUseFunc(Function &Func, Module *Modu, 1002 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1003 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1004 ProfileSummaryInfo *PSI, bool IsCS) 1005 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1006 FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS), 1007 FreqAttr(FFA_Normal), IsCS(IsCS) {} 1008 1009 // Read counts for the instrumented BB from profile. 1010 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros); 1011 1012 // Populate the counts for all BBs. 1013 void populateCounters(); 1014 1015 // Set the branch weights based on the count values. 1016 void setBranchWeights(); 1017 1018 // Annotate the value profile call sites for all value kind. 1019 void annotateValueSites(); 1020 1021 // Annotate the value profile call sites for one value kind. 1022 void annotateValueSites(uint32_t Kind); 1023 1024 // Annotate the irreducible loop header weights. 1025 void annotateIrrLoopHeaderWeights(); 1026 1027 // The hotness of the function from the profile count. 1028 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1029 1030 // Return the function hotness from the profile. 1031 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1032 1033 // Return the function hash. 1034 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1035 1036 // Return the profile record for this function; 1037 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1038 1039 // Return the auxiliary BB information. 1040 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 1041 return FuncInfo.getBBInfo(BB); 1042 } 1043 1044 // Return the auxiliary BB information if available. 1045 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 1046 return FuncInfo.findBBInfo(BB); 1047 } 1048 1049 Function &getFunc() const { return F; } 1050 1051 void dumpInfo(std::string Str = "") const { 1052 FuncInfo.dumpInfo(Str); 1053 } 1054 1055 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1056 private: 1057 Function &F; 1058 Module *M; 1059 BlockFrequencyInfo *BFI; 1060 ProfileSummaryInfo *PSI; 1061 1062 // This member stores the shared information with class PGOGenFunc. 1063 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 1064 1065 // The maximum count value in the profile. This is only used in PGO use 1066 // compilation. 1067 uint64_t ProgramMaxCount; 1068 1069 // Position of counter that remains to be read. 1070 uint32_t CountPosition = 0; 1071 1072 // Total size of the profile count for this function. 1073 uint32_t ProfileCountSize = 0; 1074 1075 // ProfileRecord for this function. 1076 InstrProfRecord ProfileRecord; 1077 1078 // Function hotness info derived from profile. 1079 FuncFreqAttr FreqAttr; 1080 1081 // Is to use the context sensitive profile. 1082 bool IsCS; 1083 1084 // Find the Instrumented BB and set the value. Return false on error. 1085 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1086 1087 // Set the edge counter value for the unknown edge -- there should be only 1088 // one unknown edge. 1089 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1090 1091 // Return FuncName string; 1092 const std::string getFuncName() const { return FuncInfo.FuncName; } 1093 1094 // Set the hot/cold inline hints based on the count values. 1095 // FIXME: This function should be removed once the functionality in 1096 // the inliner is implemented. 1097 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1098 if (PSI->isHotCount(EntryCount)) 1099 FreqAttr = FFA_Hot; 1100 else if (PSI->isColdCount(MaxCount)) 1101 FreqAttr = FFA_Cold; 1102 } 1103 }; 1104 1105 } // end anonymous namespace 1106 1107 // Visit all the edges and assign the count value for the instrumented 1108 // edges and the BB. Return false on error. 1109 bool PGOUseFunc::setInstrumentedCounts( 1110 const std::vector<uint64_t> &CountFromProfile) { 1111 1112 std::vector<BasicBlock *> InstrumentBBs; 1113 FuncInfo.getInstrumentBBs(InstrumentBBs); 1114 unsigned NumCounters = 1115 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1116 // The number of counters here should match the number of counters 1117 // in profile. Return if they mismatch. 1118 if (NumCounters != CountFromProfile.size()) { 1119 return false; 1120 } 1121 // Set the profile count to the Instrumented BBs. 1122 uint32_t I = 0; 1123 for (BasicBlock *InstrBB : InstrumentBBs) { 1124 uint64_t CountValue = CountFromProfile[I++]; 1125 UseBBInfo &Info = getBBInfo(InstrBB); 1126 Info.setBBInfoCount(CountValue); 1127 } 1128 ProfileCountSize = CountFromProfile.size(); 1129 CountPosition = I; 1130 1131 // Set the edge count and update the count of unknown edges for BBs. 1132 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1133 E->setEdgeCount(Value); 1134 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1135 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1136 }; 1137 1138 // Set the profile count the Instrumented edges. There are BBs that not in 1139 // MST but not instrumented. Need to set the edge count value so that we can 1140 // populate the profile counts later. 1141 for (auto &E : FuncInfo.MST.AllEdges) { 1142 if (E->Removed || E->InMST) 1143 continue; 1144 const BasicBlock *SrcBB = E->SrcBB; 1145 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 1146 1147 // If only one out-edge, the edge profile count should be the same as BB 1148 // profile count. 1149 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1150 setEdgeCount(E.get(), SrcInfo.CountValue); 1151 else { 1152 const BasicBlock *DestBB = E->DestBB; 1153 UseBBInfo &DestInfo = getBBInfo(DestBB); 1154 // If only one in-edge, the edge profile count should be the same as BB 1155 // profile count. 1156 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1157 setEdgeCount(E.get(), DestInfo.CountValue); 1158 } 1159 if (E->CountValid) 1160 continue; 1161 // E's count should have been set from profile. If not, this meenas E skips 1162 // the instrumentation. We set the count to 0. 1163 setEdgeCount(E.get(), 0); 1164 } 1165 return true; 1166 } 1167 1168 // Set the count value for the unknown edge. There should be one and only one 1169 // unknown edge in Edges vector. 1170 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1171 for (auto &E : Edges) { 1172 if (E->CountValid) 1173 continue; 1174 E->setEdgeCount(Value); 1175 1176 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1177 getBBInfo(E->DestBB).UnknownCountInEdge--; 1178 return; 1179 } 1180 llvm_unreachable("Cannot find the unknown count edge"); 1181 } 1182 1183 // Read the profile from ProfileFileName and assign the value to the 1184 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1185 // Return true if the profile are successfully read, and false on errors. 1186 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) { 1187 auto &Ctx = M->getContext(); 1188 Expected<InstrProfRecord> Result = 1189 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 1190 if (Error E = Result.takeError()) { 1191 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1192 auto Err = IPE.get(); 1193 bool SkipWarning = false; 1194 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1195 << FuncInfo.FuncName << ": "); 1196 if (Err == instrprof_error::unknown_function) { 1197 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1198 SkipWarning = !PGOWarnMissing; 1199 LLVM_DEBUG(dbgs() << "unknown function"); 1200 } else if (Err == instrprof_error::hash_mismatch || 1201 Err == instrprof_error::malformed) { 1202 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1203 SkipWarning = 1204 NoPGOWarnMismatch || 1205 (NoPGOWarnMismatchComdat && 1206 (F.hasComdat() || 1207 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1208 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 1209 } 1210 1211 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1212 if (SkipWarning) 1213 return; 1214 1215 std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + 1216 std::string(" Hash = ") + 1217 std::to_string(FuncInfo.FunctionHash); 1218 1219 Ctx.diagnose( 1220 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1221 }); 1222 return false; 1223 } 1224 ProfileRecord = std::move(Result.get()); 1225 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1226 1227 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1228 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1229 uint64_t ValueSum = 0; 1230 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1231 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1232 ValueSum += CountFromProfile[I]; 1233 } 1234 AllZeros = (ValueSum == 0); 1235 1236 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1237 1238 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1239 getBBInfo(nullptr).UnknownCountInEdge = 2; 1240 1241 if (!setInstrumentedCounts(CountFromProfile)) { 1242 LLVM_DEBUG( 1243 dbgs() << "Inconsistent number of counts, skipping this function"); 1244 Ctx.diagnose(DiagnosticInfoPGOProfile( 1245 M->getName().data(), 1246 Twine("Inconsistent number of counts in ") + F.getName().str() 1247 + Twine(": the profile may be stale or there is a function name collision."), 1248 DS_Warning)); 1249 return false; 1250 } 1251 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1252 return true; 1253 } 1254 1255 // Populate the counters from instrumented BBs to all BBs. 1256 // In the end of this operation, all BBs should have a valid count value. 1257 void PGOUseFunc::populateCounters() { 1258 bool Changes = true; 1259 unsigned NumPasses = 0; 1260 while (Changes) { 1261 NumPasses++; 1262 Changes = false; 1263 1264 // For efficient traversal, it's better to start from the end as most 1265 // of the instrumented edges are at the end. 1266 for (auto &BB : reverse(F)) { 1267 UseBBInfo *Count = findBBInfo(&BB); 1268 if (Count == nullptr) 1269 continue; 1270 if (!Count->CountValid) { 1271 if (Count->UnknownCountOutEdge == 0) { 1272 Count->CountValue = sumEdgeCount(Count->OutEdges); 1273 Count->CountValid = true; 1274 Changes = true; 1275 } else if (Count->UnknownCountInEdge == 0) { 1276 Count->CountValue = sumEdgeCount(Count->InEdges); 1277 Count->CountValid = true; 1278 Changes = true; 1279 } 1280 } 1281 if (Count->CountValid) { 1282 if (Count->UnknownCountOutEdge == 1) { 1283 uint64_t Total = 0; 1284 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1285 // If the one of the successor block can early terminate (no-return), 1286 // we can end up with situation where out edge sum count is larger as 1287 // the source BB's count is collected by a post-dominated block. 1288 if (Count->CountValue > OutSum) 1289 Total = Count->CountValue - OutSum; 1290 setEdgeCount(Count->OutEdges, Total); 1291 Changes = true; 1292 } 1293 if (Count->UnknownCountInEdge == 1) { 1294 uint64_t Total = 0; 1295 uint64_t InSum = sumEdgeCount(Count->InEdges); 1296 if (Count->CountValue > InSum) 1297 Total = Count->CountValue - InSum; 1298 setEdgeCount(Count->InEdges, Total); 1299 Changes = true; 1300 } 1301 } 1302 } 1303 } 1304 1305 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1306 #ifndef NDEBUG 1307 // Assert every BB has a valid counter. 1308 for (auto &BB : F) { 1309 auto BI = findBBInfo(&BB); 1310 if (BI == nullptr) 1311 continue; 1312 assert(BI->CountValid && "BB count is not valid"); 1313 } 1314 #endif 1315 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1316 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1317 uint64_t FuncMaxCount = FuncEntryCount; 1318 for (auto &BB : F) { 1319 auto BI = findBBInfo(&BB); 1320 if (BI == nullptr) 1321 continue; 1322 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1323 } 1324 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1325 1326 // Now annotate select instructions 1327 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1328 assert(CountPosition == ProfileCountSize); 1329 1330 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1331 } 1332 1333 // Assign the scaled count values to the BB with multiple out edges. 1334 void PGOUseFunc::setBranchWeights() { 1335 // Generate MD_prof metadata for every branch instruction. 1336 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1337 << " IsCS=" << IsCS << "\n"); 1338 for (auto &BB : F) { 1339 Instruction *TI = BB.getTerminator(); 1340 if (TI->getNumSuccessors() < 2) 1341 continue; 1342 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1343 isa<IndirectBrInst>(TI))) 1344 continue; 1345 1346 if (getBBInfo(&BB).CountValue == 0) 1347 continue; 1348 1349 // We have a non-zero Branch BB. 1350 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1351 unsigned Size = BBCountInfo.OutEdges.size(); 1352 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1353 uint64_t MaxCount = 0; 1354 for (unsigned s = 0; s < Size; s++) { 1355 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1356 const BasicBlock *SrcBB = E->SrcBB; 1357 const BasicBlock *DestBB = E->DestBB; 1358 if (DestBB == nullptr) 1359 continue; 1360 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1361 uint64_t EdgeCount = E->CountValue; 1362 if (EdgeCount > MaxCount) 1363 MaxCount = EdgeCount; 1364 EdgeCounts[SuccNum] = EdgeCount; 1365 } 1366 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1367 } 1368 } 1369 1370 static bool isIndirectBrTarget(BasicBlock *BB) { 1371 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 1372 if (isa<IndirectBrInst>((*PI)->getTerminator())) 1373 return true; 1374 } 1375 return false; 1376 } 1377 1378 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1379 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1380 // Find irr loop headers 1381 for (auto &BB : F) { 1382 // As a heuristic also annotate indrectbr targets as they have a high chance 1383 // to become an irreducible loop header after the indirectbr tail 1384 // duplication. 1385 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1386 Instruction *TI = BB.getTerminator(); 1387 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1388 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1389 } 1390 } 1391 } 1392 1393 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1394 Module *M = F.getParent(); 1395 IRBuilder<> Builder(&SI); 1396 Type *Int64Ty = Builder.getInt64Ty(); 1397 Type *I8PtrTy = Builder.getInt8PtrTy(); 1398 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1399 Builder.CreateCall( 1400 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1401 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1402 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1403 Builder.getInt32(*CurCtrIdx), Step}); 1404 ++(*CurCtrIdx); 1405 } 1406 1407 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1408 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1409 assert(*CurCtrIdx < CountFromProfile.size() && 1410 "Out of bound access of counters"); 1411 uint64_t SCounts[2]; 1412 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1413 ++(*CurCtrIdx); 1414 uint64_t TotalCount = 0; 1415 auto BI = UseFunc->findBBInfo(SI.getParent()); 1416 if (BI != nullptr) 1417 TotalCount = BI->CountValue; 1418 // False Count 1419 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1420 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1421 if (MaxCount) 1422 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1423 } 1424 1425 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1426 if (!PGOInstrSelect) 1427 return; 1428 // FIXME: do not handle this yet. 1429 if (SI.getCondition()->getType()->isVectorTy()) 1430 return; 1431 1432 switch (Mode) { 1433 case VM_counting: 1434 NSIs++; 1435 return; 1436 case VM_instrument: 1437 instrumentOneSelectInst(SI); 1438 return; 1439 case VM_annotate: 1440 annotateOneSelectInst(SI); 1441 return; 1442 } 1443 1444 llvm_unreachable("Unknown visiting mode"); 1445 } 1446 1447 // Traverse all valuesites and annotate the instructions for all value kind. 1448 void PGOUseFunc::annotateValueSites() { 1449 if (DisableValueProfiling) 1450 return; 1451 1452 // Create the PGOFuncName meta data. 1453 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1454 1455 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1456 annotateValueSites(Kind); 1457 } 1458 1459 // Annotate the instructions for a specific value kind. 1460 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1461 assert(Kind <= IPVK_Last); 1462 unsigned ValueSiteIndex = 0; 1463 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1464 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1465 if (NumValueSites != ValueSites.size()) { 1466 auto &Ctx = M->getContext(); 1467 Ctx.diagnose(DiagnosticInfoPGOProfile( 1468 M->getName().data(), 1469 Twine("Inconsistent number of value sites for ") + 1470 Twine(ValueProfKindDescr[Kind]) + 1471 Twine(" profiling in \"") + F.getName().str() + 1472 Twine("\", possibly due to the use of a stale profile."), 1473 DS_Warning)); 1474 return; 1475 } 1476 1477 for (VPCandidateInfo &I : ValueSites) { 1478 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1479 << "): Index = " << ValueSiteIndex << " out of " 1480 << NumValueSites << "\n"); 1481 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1482 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1483 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1484 : MaxNumAnnotations); 1485 ValueSiteIndex++; 1486 } 1487 } 1488 1489 // Collect the set of members for each Comdat in module M and store 1490 // in ComdatMembers. 1491 static void collectComdatMembers( 1492 Module &M, 1493 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1494 if (!DoComdatRenaming) 1495 return; 1496 for (Function &F : M) 1497 if (Comdat *C = F.getComdat()) 1498 ComdatMembers.insert(std::make_pair(C, &F)); 1499 for (GlobalVariable &GV : M.globals()) 1500 if (Comdat *C = GV.getComdat()) 1501 ComdatMembers.insert(std::make_pair(C, &GV)); 1502 for (GlobalAlias &GA : M.aliases()) 1503 if (Comdat *C = GA.getComdat()) 1504 ComdatMembers.insert(std::make_pair(C, &GA)); 1505 } 1506 1507 static bool InstrumentAllFunctions( 1508 Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1509 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1510 // For the context-sensitve instrumentation, we should have a separated pass 1511 // (before LTO/ThinLTO linking) to create these variables. 1512 if (!IsCS) 1513 createIRLevelProfileFlagVar(M, /* IsCS */ false); 1514 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1515 collectComdatMembers(M, ComdatMembers); 1516 1517 for (auto &F : M) { 1518 if (F.isDeclaration()) 1519 continue; 1520 auto *BPI = LookupBPI(F); 1521 auto *BFI = LookupBFI(F); 1522 instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS); 1523 } 1524 return true; 1525 } 1526 1527 PreservedAnalyses 1528 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { 1529 createProfileFileNameVar(M, CSInstrName); 1530 createIRLevelProfileFlagVar(M, /* IsCS */ true); 1531 return PreservedAnalyses::all(); 1532 } 1533 1534 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { 1535 if (skipModule(M)) 1536 return false; 1537 1538 auto LookupBPI = [this](Function &F) { 1539 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1540 }; 1541 auto LookupBFI = [this](Function &F) { 1542 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1543 }; 1544 return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS); 1545 } 1546 1547 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1548 ModuleAnalysisManager &AM) { 1549 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1550 auto LookupBPI = [&FAM](Function &F) { 1551 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1552 }; 1553 1554 auto LookupBFI = [&FAM](Function &F) { 1555 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1556 }; 1557 1558 if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS)) 1559 return PreservedAnalyses::all(); 1560 1561 return PreservedAnalyses::none(); 1562 } 1563 1564 static bool annotateAllFunctions( 1565 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1566 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1567 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1568 ProfileSummaryInfo *PSI, bool IsCS) { 1569 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1570 auto &Ctx = M.getContext(); 1571 // Read the counter array from file. 1572 auto ReaderOrErr = 1573 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 1574 if (Error E = ReaderOrErr.takeError()) { 1575 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1576 Ctx.diagnose( 1577 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1578 }); 1579 return false; 1580 } 1581 1582 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1583 std::move(ReaderOrErr.get()); 1584 if (!PGOReader) { 1585 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1586 StringRef("Cannot get PGOReader"))); 1587 return false; 1588 } 1589 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 1590 return false; 1591 1592 // TODO: might need to change the warning once the clang option is finalized. 1593 if (!PGOReader->isIRLevelProfile()) { 1594 Ctx.diagnose(DiagnosticInfoPGOProfile( 1595 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1596 return false; 1597 } 1598 1599 // Add the profile summary (read from the header of the indexed summary) here 1600 // so that we can use it below when reading counters (which checks if the 1601 // function should be marked with a cold or inlinehint attribute). 1602 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 1603 IsCS ? ProfileSummary::PSK_CSInstr 1604 : ProfileSummary::PSK_Instr); 1605 1606 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1607 collectComdatMembers(M, ComdatMembers); 1608 std::vector<Function *> HotFunctions; 1609 std::vector<Function *> ColdFunctions; 1610 for (auto &F : M) { 1611 if (F.isDeclaration()) 1612 continue; 1613 auto *BPI = LookupBPI(F); 1614 auto *BFI = LookupBFI(F); 1615 // Split indirectbr critical edges here before computing the MST rather than 1616 // later in getInstrBB() to avoid invalidating it. 1617 SplitIndirectBrCriticalEdges(F, BPI, BFI); 1618 PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, PSI, IsCS); 1619 bool AllZeros = false; 1620 if (!Func.readCounters(PGOReader.get(), AllZeros)) 1621 continue; 1622 if (AllZeros) { 1623 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 1624 if (Func.getProgramMaxCount() != 0) 1625 ColdFunctions.push_back(&F); 1626 continue; 1627 } 1628 Func.populateCounters(); 1629 Func.setBranchWeights(); 1630 Func.annotateValueSites(); 1631 Func.annotateIrrLoopHeaderWeights(); 1632 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1633 if (FreqAttr == PGOUseFunc::FFA_Cold) 1634 ColdFunctions.push_back(&F); 1635 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1636 HotFunctions.push_back(&F); 1637 if (PGOViewCounts != PGOVCT_None && 1638 (ViewBlockFreqFuncName.empty() || 1639 F.getName().equals(ViewBlockFreqFuncName))) { 1640 LoopInfo LI{DominatorTree(F)}; 1641 std::unique_ptr<BranchProbabilityInfo> NewBPI = 1642 std::make_unique<BranchProbabilityInfo>(F, LI); 1643 std::unique_ptr<BlockFrequencyInfo> NewBFI = 1644 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 1645 if (PGOViewCounts == PGOVCT_Graph) 1646 NewBFI->view(); 1647 else if (PGOViewCounts == PGOVCT_Text) { 1648 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 1649 NewBFI->print(dbgs()); 1650 } 1651 } 1652 if (PGOViewRawCounts != PGOVCT_None && 1653 (ViewBlockFreqFuncName.empty() || 1654 F.getName().equals(ViewBlockFreqFuncName))) { 1655 if (PGOViewRawCounts == PGOVCT_Graph) 1656 if (ViewBlockFreqFuncName.empty()) 1657 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1658 else 1659 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1660 else if (PGOViewRawCounts == PGOVCT_Text) { 1661 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 1662 Func.dumpInfo(); 1663 } 1664 } 1665 } 1666 1667 // Set function hotness attribute from the profile. 1668 // We have to apply these attributes at the end because their presence 1669 // can affect the BranchProbabilityInfo of any callers, resulting in an 1670 // inconsistent MST between prof-gen and prof-use. 1671 for (auto &F : HotFunctions) { 1672 F->addFnAttr(Attribute::InlineHint); 1673 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 1674 << "\n"); 1675 } 1676 for (auto &F : ColdFunctions) { 1677 F->addFnAttr(Attribute::Cold); 1678 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 1679 << "\n"); 1680 } 1681 return true; 1682 } 1683 1684 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 1685 std::string RemappingFilename, 1686 bool IsCS) 1687 : ProfileFileName(std::move(Filename)), 1688 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { 1689 if (!PGOTestProfileFile.empty()) 1690 ProfileFileName = PGOTestProfileFile; 1691 if (!PGOTestProfileRemappingFile.empty()) 1692 ProfileRemappingFileName = PGOTestProfileRemappingFile; 1693 } 1694 1695 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 1696 ModuleAnalysisManager &AM) { 1697 1698 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1699 auto LookupBPI = [&FAM](Function &F) { 1700 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1701 }; 1702 1703 auto LookupBFI = [&FAM](Function &F) { 1704 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1705 }; 1706 1707 auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 1708 1709 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 1710 LookupBPI, LookupBFI, PSI, IsCS)) 1711 return PreservedAnalyses::all(); 1712 1713 return PreservedAnalyses::none(); 1714 } 1715 1716 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { 1717 if (skipModule(M)) 1718 return false; 1719 1720 auto LookupBPI = [this](Function &F) { 1721 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1722 }; 1723 auto LookupBFI = [this](Function &F) { 1724 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1725 }; 1726 1727 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 1728 return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, PSI, 1729 IsCS); 1730 } 1731 1732 static std::string getSimpleNodeName(const BasicBlock *Node) { 1733 if (!Node->getName().empty()) 1734 return std::string(Node->getName()); 1735 1736 std::string SimpleNodeName; 1737 raw_string_ostream OS(SimpleNodeName); 1738 Node->printAsOperand(OS, false); 1739 return OS.str(); 1740 } 1741 1742 void llvm::setProfMetadata(Module *M, Instruction *TI, 1743 ArrayRef<uint64_t> EdgeCounts, 1744 uint64_t MaxCount) { 1745 MDBuilder MDB(M->getContext()); 1746 assert(MaxCount > 0 && "Bad max count"); 1747 uint64_t Scale = calculateCountScale(MaxCount); 1748 SmallVector<unsigned, 4> Weights; 1749 for (const auto &ECI : EdgeCounts) 1750 Weights.push_back(scaleBranchCount(ECI, Scale)); 1751 1752 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 1753 : Weights) { 1754 dbgs() << W << " "; 1755 } dbgs() << "\n";); 1756 1757 misexpect::verifyMisExpect(TI, Weights, TI->getContext()); 1758 1759 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1760 if (EmitBranchProbability) { 1761 std::string BrCondStr = getBranchCondString(TI); 1762 if (BrCondStr.empty()) 1763 return; 1764 1765 uint64_t WSum = 1766 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 1767 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 1768 uint64_t TotalCount = 1769 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 1770 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 1771 Scale = calculateCountScale(WSum); 1772 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 1773 scaleBranchCount(WSum, Scale)); 1774 std::string BranchProbStr; 1775 raw_string_ostream OS(BranchProbStr); 1776 OS << BP; 1777 OS << " (total count : " << TotalCount << ")"; 1778 OS.flush(); 1779 Function *F = TI->getParent()->getParent(); 1780 OptimizationRemarkEmitter ORE(F); 1781 ORE.emit([&]() { 1782 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 1783 << BrCondStr << " is true with probability : " << BranchProbStr; 1784 }); 1785 } 1786 } 1787 1788 namespace llvm { 1789 1790 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 1791 MDBuilder MDB(M->getContext()); 1792 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 1793 MDB.createIrrLoopHeaderWeight(Count)); 1794 } 1795 1796 template <> struct GraphTraits<PGOUseFunc *> { 1797 using NodeRef = const BasicBlock *; 1798 using ChildIteratorType = const_succ_iterator; 1799 using nodes_iterator = pointer_iterator<Function::const_iterator>; 1800 1801 static NodeRef getEntryNode(const PGOUseFunc *G) { 1802 return &G->getFunc().front(); 1803 } 1804 1805 static ChildIteratorType child_begin(const NodeRef N) { 1806 return succ_begin(N); 1807 } 1808 1809 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 1810 1811 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 1812 return nodes_iterator(G->getFunc().begin()); 1813 } 1814 1815 static nodes_iterator nodes_end(const PGOUseFunc *G) { 1816 return nodes_iterator(G->getFunc().end()); 1817 } 1818 }; 1819 1820 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 1821 explicit DOTGraphTraits(bool isSimple = false) 1822 : DefaultDOTGraphTraits(isSimple) {} 1823 1824 static std::string getGraphName(const PGOUseFunc *G) { 1825 return std::string(G->getFunc().getName()); 1826 } 1827 1828 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 1829 std::string Result; 1830 raw_string_ostream OS(Result); 1831 1832 OS << getSimpleNodeName(Node) << ":\\l"; 1833 UseBBInfo *BI = Graph->findBBInfo(Node); 1834 OS << "Count : "; 1835 if (BI && BI->CountValid) 1836 OS << BI->CountValue << "\\l"; 1837 else 1838 OS << "Unknown\\l"; 1839 1840 if (!PGOInstrSelect) 1841 return Result; 1842 1843 for (auto BI = Node->begin(); BI != Node->end(); ++BI) { 1844 auto *I = &*BI; 1845 if (!isa<SelectInst>(I)) 1846 continue; 1847 // Display scaled counts for SELECT instruction: 1848 OS << "SELECT : { T = "; 1849 uint64_t TC, FC; 1850 bool HasProf = I->extractProfMetadata(TC, FC); 1851 if (!HasProf) 1852 OS << "Unknown, F = Unknown }\\l"; 1853 else 1854 OS << TC << ", F = " << FC << " }\\l"; 1855 } 1856 return Result; 1857 } 1858 }; 1859 1860 } // end namespace llvm 1861