1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "CFGMST.h" 52 #include "ValueProfileCollector.h" 53 #include "llvm/ADT/APInt.h" 54 #include "llvm/ADT/ArrayRef.h" 55 #include "llvm/ADT/STLExtras.h" 56 #include "llvm/ADT/SmallVector.h" 57 #include "llvm/ADT/Statistic.h" 58 #include "llvm/ADT/StringRef.h" 59 #include "llvm/ADT/Triple.h" 60 #include "llvm/ADT/Twine.h" 61 #include "llvm/ADT/iterator.h" 62 #include "llvm/ADT/iterator_range.h" 63 #include "llvm/Analysis/BlockFrequencyInfo.h" 64 #include "llvm/Analysis/BranchProbabilityInfo.h" 65 #include "llvm/Analysis/CFG.h" 66 #include "llvm/Analysis/EHPersonalities.h" 67 #include "llvm/Analysis/LoopInfo.h" 68 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 69 #include "llvm/Analysis/ProfileSummaryInfo.h" 70 #include "llvm/Analysis/TargetLibraryInfo.h" 71 #include "llvm/IR/Attributes.h" 72 #include "llvm/IR/BasicBlock.h" 73 #include "llvm/IR/CFG.h" 74 #include "llvm/IR/Comdat.h" 75 #include "llvm/IR/Constant.h" 76 #include "llvm/IR/Constants.h" 77 #include "llvm/IR/DiagnosticInfo.h" 78 #include "llvm/IR/Dominators.h" 79 #include "llvm/IR/Function.h" 80 #include "llvm/IR/GlobalAlias.h" 81 #include "llvm/IR/GlobalValue.h" 82 #include "llvm/IR/GlobalVariable.h" 83 #include "llvm/IR/IRBuilder.h" 84 #include "llvm/IR/InstVisitor.h" 85 #include "llvm/IR/InstrTypes.h" 86 #include "llvm/IR/Instruction.h" 87 #include "llvm/IR/Instructions.h" 88 #include "llvm/IR/IntrinsicInst.h" 89 #include "llvm/IR/Intrinsics.h" 90 #include "llvm/IR/LLVMContext.h" 91 #include "llvm/IR/MDBuilder.h" 92 #include "llvm/IR/Module.h" 93 #include "llvm/IR/PassManager.h" 94 #include "llvm/IR/ProfileSummary.h" 95 #include "llvm/IR/Type.h" 96 #include "llvm/IR/Value.h" 97 #include "llvm/InitializePasses.h" 98 #include "llvm/Pass.h" 99 #include "llvm/ProfileData/InstrProf.h" 100 #include "llvm/ProfileData/InstrProfReader.h" 101 #include "llvm/Support/BranchProbability.h" 102 #include "llvm/Support/CRC.h" 103 #include "llvm/Support/Casting.h" 104 #include "llvm/Support/CommandLine.h" 105 #include "llvm/Support/DOTGraphTraits.h" 106 #include "llvm/Support/Debug.h" 107 #include "llvm/Support/Error.h" 108 #include "llvm/Support/ErrorHandling.h" 109 #include "llvm/Support/GraphWriter.h" 110 #include "llvm/Support/raw_ostream.h" 111 #include "llvm/Transforms/Instrumentation.h" 112 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 113 #include "llvm/Transforms/Utils/MisExpect.h" 114 #include "llvm/Transforms/Utils/ModuleUtils.h" 115 #include <algorithm> 116 #include <cassert> 117 #include <cstdint> 118 #include <memory> 119 #include <numeric> 120 #include <string> 121 #include <unordered_map> 122 #include <utility> 123 #include <vector> 124 125 using namespace llvm; 126 using ProfileCount = Function::ProfileCount; 127 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 128 129 #define DEBUG_TYPE "pgo-instrumentation" 130 131 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 132 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 133 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 134 STATISTIC(NumOfPGOEdge, "Number of edges."); 135 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 136 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 137 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 138 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 139 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 140 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 141 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 142 STATISTIC(NumOfCSPGOSelectInsts, 143 "Number of select instruction instrumented in CSPGO."); 144 STATISTIC(NumOfCSPGOMemIntrinsics, 145 "Number of mem intrinsics instrumented in CSPGO."); 146 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 147 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 148 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 149 STATISTIC(NumOfCSPGOFunc, 150 "Number of functions having valid profile counts in CSPGO."); 151 STATISTIC(NumOfCSPGOMismatch, 152 "Number of functions having mismatch profile in CSPGO."); 153 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 154 155 // Command line option to specify the file to read profile from. This is 156 // mainly used for testing. 157 static cl::opt<std::string> 158 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 159 cl::value_desc("filename"), 160 cl::desc("Specify the path of profile data file. This is" 161 "mainly for test purpose.")); 162 static cl::opt<std::string> PGOTestProfileRemappingFile( 163 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 164 cl::value_desc("filename"), 165 cl::desc("Specify the path of profile remapping file. This is mainly for " 166 "test purpose.")); 167 168 // Command line option to disable value profiling. The default is false: 169 // i.e. value profiling is enabled by default. This is for debug purpose. 170 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 171 cl::Hidden, 172 cl::desc("Disable Value Profiling")); 173 174 // Command line option to set the maximum number of VP annotations to write to 175 // the metadata for a single indirect call callsite. 176 static cl::opt<unsigned> MaxNumAnnotations( 177 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 178 cl::desc("Max number of annotations for a single indirect " 179 "call callsite")); 180 181 // Command line option to set the maximum number of value annotations 182 // to write to the metadata for a single memop intrinsic. 183 static cl::opt<unsigned> MaxNumMemOPAnnotations( 184 "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, 185 cl::desc("Max number of preicise value annotations for a single memop" 186 "intrinsic")); 187 188 // Command line option to control appending FunctionHash to the name of a COMDAT 189 // function. This is to avoid the hash mismatch caused by the preinliner. 190 static cl::opt<bool> DoComdatRenaming( 191 "do-comdat-renaming", cl::init(false), cl::Hidden, 192 cl::desc("Append function hash to the name of COMDAT function to avoid " 193 "function hash mismatch due to the preinliner")); 194 195 // Command line option to enable/disable the warning about missing profile 196 // information. 197 static cl::opt<bool> 198 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 199 cl::desc("Use this option to turn on/off " 200 "warnings about missing profile data for " 201 "functions.")); 202 203 namespace llvm { 204 // Command line option to enable/disable the warning about a hash mismatch in 205 // the profile data. 206 cl::opt<bool> 207 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 208 cl::desc("Use this option to turn off/on " 209 "warnings about profile cfg mismatch.")); 210 } // namespace llvm 211 212 // Command line option to enable/disable the warning about a hash mismatch in 213 // the profile data for Comdat functions, which often turns out to be false 214 // positive due to the pre-instrumentation inline. 215 static cl::opt<bool> 216 NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), 217 cl::Hidden, 218 cl::desc("The option is used to turn on/off " 219 "warnings about hash mismatch for comdat " 220 "functions.")); 221 222 // Command line option to enable/disable select instruction instrumentation. 223 static cl::opt<bool> 224 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 225 cl::desc("Use this option to turn on/off SELECT " 226 "instruction instrumentation. ")); 227 228 // Command line option to turn on CFG dot or text dump of raw profile counts 229 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 230 "pgo-view-raw-counts", cl::Hidden, 231 cl::desc("A boolean option to show CFG dag or text " 232 "with raw profile counts from " 233 "profile data. See also option " 234 "-pgo-view-counts. To limit graph " 235 "display to only one function, use " 236 "filtering option -view-bfi-func-name."), 237 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 238 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 239 clEnumValN(PGOVCT_Text, "text", "show in text."))); 240 241 // Command line option to enable/disable memop intrinsic call.size profiling. 242 static cl::opt<bool> 243 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 244 cl::desc("Use this option to turn on/off " 245 "memory intrinsic size profiling.")); 246 247 // Emit branch probability as optimization remarks. 248 static cl::opt<bool> 249 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 250 cl::desc("When this option is on, the annotated " 251 "branch probability will be emitted as " 252 "optimization remarks: -{Rpass|" 253 "pass-remarks}=pgo-instrumentation")); 254 255 static cl::opt<bool> PGOInstrumentEntry( 256 "pgo-instrument-entry", cl::init(false), cl::Hidden, 257 cl::desc("Force to instrument function entry basicblock.")); 258 259 static cl::opt<bool> PGOFunctionEntryCoverage( 260 "pgo-function-entry-coverage", cl::init(false), cl::Hidden, cl::ZeroOrMore, 261 cl::desc( 262 "Use this option to enable function entry coverage instrumentation.")); 263 264 static cl::opt<bool> 265 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, 266 cl::desc("Fix function entry count in profile use.")); 267 268 static cl::opt<bool> PGOVerifyHotBFI( 269 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden, 270 cl::desc("Print out the non-match BFI count if a hot raw profile count " 271 "becomes non-hot, or a cold raw profile count becomes hot. " 272 "The print is enabled under -Rpass-analysis=pgo, or " 273 "internal option -pass-remakrs-analysis=pgo.")); 274 275 static cl::opt<bool> PGOVerifyBFI( 276 "pgo-verify-bfi", cl::init(false), cl::Hidden, 277 cl::desc("Print out mismatched BFI counts after setting profile metadata " 278 "The print is enabled under -Rpass-analysis=pgo, or " 279 "internal option -pass-remakrs-analysis=pgo.")); 280 281 static cl::opt<unsigned> PGOVerifyBFIRatio( 282 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, 283 cl::desc("Set the threshold for pgo-verify-bfi: only print out " 284 "mismatched BFI if the difference percentage is greater than " 285 "this value (in percentage).")); 286 287 static cl::opt<unsigned> PGOVerifyBFICutoff( 288 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, 289 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " 290 "profile count value is below.")); 291 292 namespace llvm { 293 // Command line option to turn on CFG dot dump after profile annotation. 294 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 295 extern cl::opt<PGOViewCountsType> PGOViewCounts; 296 297 // Command line option to specify the name of the function for CFG dump 298 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 299 extern cl::opt<std::string> ViewBlockFreqFuncName; 300 301 extern cl::opt<bool> DebugInfoCorrelate; 302 } // namespace llvm 303 304 static cl::opt<bool> 305 PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, 306 cl::desc("Use the old CFG function hashing")); 307 308 // Return a string describing the branch condition that can be 309 // used in static branch probability heuristics: 310 static std::string getBranchCondString(Instruction *TI) { 311 BranchInst *BI = dyn_cast<BranchInst>(TI); 312 if (!BI || !BI->isConditional()) 313 return std::string(); 314 315 Value *Cond = BI->getCondition(); 316 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 317 if (!CI) 318 return std::string(); 319 320 std::string result; 321 raw_string_ostream OS(result); 322 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 323 CI->getOperand(0)->getType()->print(OS, true); 324 325 Value *RHS = CI->getOperand(1); 326 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 327 if (CV) { 328 if (CV->isZero()) 329 OS << "_Zero"; 330 else if (CV->isOne()) 331 OS << "_One"; 332 else if (CV->isMinusOne()) 333 OS << "_MinusOne"; 334 else 335 OS << "_Const"; 336 } 337 OS.flush(); 338 return result; 339 } 340 341 static const char *ValueProfKindDescr[] = { 342 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 343 #include "llvm/ProfileData/InstrProfData.inc" 344 }; 345 346 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime 347 // aware this is an ir_level profile so it can set the version flag. 348 static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) { 349 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); 350 Type *IntTy64 = Type::getInt64Ty(M.getContext()); 351 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); 352 if (IsCS) 353 ProfileVersion |= VARIANT_MASK_CSIR_PROF; 354 if (PGOInstrumentEntry) 355 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; 356 if (DebugInfoCorrelate) 357 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; 358 if (PGOFunctionEntryCoverage) 359 ProfileVersion |= 360 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY; 361 auto IRLevelVersionVariable = new GlobalVariable( 362 M, IntTy64, true, GlobalValue::WeakAnyLinkage, 363 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); 364 IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); 365 Triple TT(M.getTargetTriple()); 366 if (TT.supportsCOMDAT()) { 367 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); 368 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); 369 } 370 return IRLevelVersionVariable; 371 } 372 373 namespace { 374 375 /// The select instruction visitor plays three roles specified 376 /// by the mode. In \c VM_counting mode, it simply counts the number of 377 /// select instructions. In \c VM_instrument mode, it inserts code to count 378 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 379 /// it reads the profile data and annotate the select instruction with metadata. 380 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 381 class PGOUseFunc; 382 383 /// Instruction Visitor class to visit select instructions. 384 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 385 Function &F; 386 unsigned NSIs = 0; // Number of select instructions instrumented. 387 VisitMode Mode = VM_counting; // Visiting mode. 388 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 389 unsigned TotalNumCtrs = 0; // Total number of counters 390 GlobalVariable *FuncNameVar = nullptr; 391 uint64_t FuncHash = 0; 392 PGOUseFunc *UseFunc = nullptr; 393 394 SelectInstVisitor(Function &Func) : F(Func) {} 395 396 void countSelects(Function &Func) { 397 NSIs = 0; 398 Mode = VM_counting; 399 visit(Func); 400 } 401 402 // Visit the IR stream and instrument all select instructions. \p 403 // Ind is a pointer to the counter index variable; \p TotalNC 404 // is the total number of counters; \p FNV is the pointer to the 405 // PGO function name var; \p FHash is the function hash. 406 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 407 GlobalVariable *FNV, uint64_t FHash) { 408 Mode = VM_instrument; 409 CurCtrIdx = Ind; 410 TotalNumCtrs = TotalNC; 411 FuncHash = FHash; 412 FuncNameVar = FNV; 413 visit(Func); 414 } 415 416 // Visit the IR stream and annotate all select instructions. 417 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 418 Mode = VM_annotate; 419 UseFunc = UF; 420 CurCtrIdx = Ind; 421 visit(Func); 422 } 423 424 void instrumentOneSelectInst(SelectInst &SI); 425 void annotateOneSelectInst(SelectInst &SI); 426 427 // Visit \p SI instruction and perform tasks according to visit mode. 428 void visitSelectInst(SelectInst &SI); 429 430 // Return the number of select instructions. This needs be called after 431 // countSelects(). 432 unsigned getNumOfSelectInsts() const { return NSIs; } 433 }; 434 435 } // end anonymous namespace 436 437 namespace { 438 439 /// An MST based instrumentation for PGO 440 /// 441 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 442 /// in the function level. 443 struct PGOEdge { 444 // This class implements the CFG edges. Note the CFG can be a multi-graph. 445 // So there might be multiple edges with same SrcBB and DestBB. 446 const BasicBlock *SrcBB; 447 const BasicBlock *DestBB; 448 uint64_t Weight; 449 bool InMST = false; 450 bool Removed = false; 451 bool IsCritical = false; 452 453 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 454 : SrcBB(Src), DestBB(Dest), Weight(W) {} 455 456 // Return the information string of an edge. 457 std::string infoString() const { 458 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 459 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 460 } 461 }; 462 463 // This class stores the auxiliary information for each BB. 464 struct BBInfo { 465 BBInfo *Group; 466 uint32_t Index; 467 uint32_t Rank = 0; 468 469 BBInfo(unsigned IX) : Group(this), Index(IX) {} 470 471 // Return the information string of this object. 472 std::string infoString() const { 473 return (Twine("Index=") + Twine(Index)).str(); 474 } 475 476 // Empty function -- only applicable to UseBBInfo. 477 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 478 479 // Empty function -- only applicable to UseBBInfo. 480 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 481 }; 482 483 // This class implements the CFG edges. Note the CFG can be a multi-graph. 484 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 485 private: 486 Function &F; 487 488 // Is this is context-sensitive instrumentation. 489 bool IsCS; 490 491 // A map that stores the Comdat group in function F. 492 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 493 494 ValueProfileCollector VPC; 495 496 void computeCFGHash(); 497 void renameComdatFunction(); 498 499 public: 500 std::vector<std::vector<VPCandidateInfo>> ValueSites; 501 SelectInstVisitor SIVisitor; 502 std::string FuncName; 503 GlobalVariable *FuncNameVar; 504 505 // CFG hash value for this function. 506 uint64_t FunctionHash = 0; 507 508 // The Minimum Spanning Tree of function CFG. 509 CFGMST<Edge, BBInfo> MST; 510 511 // Collect all the BBs that will be instrumented, and store them in 512 // InstrumentBBs. 513 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 514 515 // Give an edge, find the BB that will be instrumented. 516 // Return nullptr if there is no BB to be instrumented. 517 BasicBlock *getInstrBB(Edge *E); 518 519 // Return the auxiliary BB information. 520 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 521 522 // Return the auxiliary BB information if available. 523 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 524 525 // Dump edges and BB information. 526 void dumpInfo(std::string Str = "") const { 527 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 528 Twine(FunctionHash) + "\t" + Str); 529 } 530 531 FuncPGOInstrumentation( 532 Function &Func, TargetLibraryInfo &TLI, 533 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 534 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 535 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, 536 bool InstrumentFuncEntry = true) 537 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 538 ValueSites(IPVK_Last + 1), SIVisitor(Func), 539 MST(F, InstrumentFuncEntry, BPI, BFI) { 540 // This should be done before CFG hash computation. 541 SIVisitor.countSelects(Func); 542 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 543 if (!IsCS) { 544 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 545 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 546 NumOfPGOBB += MST.BBInfos.size(); 547 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 548 } else { 549 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 550 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 551 NumOfCSPGOBB += MST.BBInfos.size(); 552 } 553 554 FuncName = getPGOFuncName(F); 555 computeCFGHash(); 556 if (!ComdatMembers.empty()) 557 renameComdatFunction(); 558 LLVM_DEBUG(dumpInfo("after CFGMST")); 559 560 for (auto &E : MST.AllEdges) { 561 if (E->Removed) 562 continue; 563 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 564 if (!E->InMST) 565 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 566 } 567 568 if (CreateGlobalVar) 569 FuncNameVar = createPGOFuncNameVar(F, FuncName); 570 } 571 }; 572 573 } // end anonymous namespace 574 575 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 576 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers 577 // of selects, indirect calls, mem ops and edges. 578 template <class Edge, class BBInfo> 579 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 580 std::vector<uint8_t> Indexes; 581 JamCRC JC; 582 for (auto &BB : F) { 583 const Instruction *TI = BB.getTerminator(); 584 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 585 BasicBlock *Succ = TI->getSuccessor(I); 586 auto BI = findBBInfo(Succ); 587 if (BI == nullptr) 588 continue; 589 uint32_t Index = BI->Index; 590 for (int J = 0; J < 4; J++) 591 Indexes.push_back((uint8_t)(Index >> (J * 8))); 592 } 593 } 594 JC.update(Indexes); 595 596 JamCRC JCH; 597 if (PGOOldCFGHashing) { 598 // Hash format for context sensitive profile. Reserve 4 bits for other 599 // information. 600 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 601 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 602 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 603 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 604 } else { 605 // The higher 32 bits. 606 auto updateJCH = [&JCH](uint64_t Num) { 607 uint8_t Data[8]; 608 support::endian::write64le(Data, Num); 609 JCH.update(Data); 610 }; 611 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); 612 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); 613 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); 614 updateJCH((uint64_t)MST.AllEdges.size()); 615 616 // Hash format for context sensitive profile. Reserve 4 bits for other 617 // information. 618 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); 619 } 620 621 // Reserve bit 60-63 for other information purpose. 622 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 623 if (IsCS) 624 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 625 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 626 << " CRC = " << JC.getCRC() 627 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 628 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 629 << ValueSites[IPVK_IndirectCallTarget].size()); 630 if (!PGOOldCFGHashing) { 631 LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() 632 << ", High32 CRC = " << JCH.getCRC()); 633 } 634 LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); 635 } 636 637 // Check if we can safely rename this Comdat function. 638 static bool canRenameComdat( 639 Function &F, 640 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 641 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 642 return false; 643 644 // FIXME: Current only handle those Comdat groups that only containing one 645 // function. 646 // (1) For a Comdat group containing multiple functions, we need to have a 647 // unique postfix based on the hashes for each function. There is a 648 // non-trivial code refactoring to do this efficiently. 649 // (2) Variables can not be renamed, so we can not rename Comdat function in a 650 // group including global vars. 651 Comdat *C = F.getComdat(); 652 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 653 assert(!isa<GlobalAlias>(CM.second)); 654 Function *FM = dyn_cast<Function>(CM.second); 655 if (FM != &F) 656 return false; 657 } 658 return true; 659 } 660 661 // Append the CFGHash to the Comdat function name. 662 template <class Edge, class BBInfo> 663 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 664 if (!canRenameComdat(F, ComdatMembers)) 665 return; 666 std::string OrigName = F.getName().str(); 667 std::string NewFuncName = 668 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 669 F.setName(Twine(NewFuncName)); 670 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 671 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 672 Comdat *NewComdat; 673 Module *M = F.getParent(); 674 // For AvailableExternallyLinkage functions, change the linkage to 675 // LinkOnceODR and put them into comdat. This is because after renaming, there 676 // is no backup external copy available for the function. 677 if (!F.hasComdat()) { 678 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 679 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 680 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 681 F.setComdat(NewComdat); 682 return; 683 } 684 685 // This function belongs to a single function Comdat group. 686 Comdat *OrigComdat = F.getComdat(); 687 std::string NewComdatName = 688 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 689 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 690 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 691 692 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 693 // Must be a function. 694 cast<Function>(CM.second)->setComdat(NewComdat); 695 } 696 } 697 698 // Collect all the BBs that will be instruments and return them in 699 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. 700 template <class Edge, class BBInfo> 701 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 702 std::vector<BasicBlock *> &InstrumentBBs) { 703 // Use a worklist as we will update the vector during the iteration. 704 std::vector<Edge *> EdgeList; 705 EdgeList.reserve(MST.AllEdges.size()); 706 for (auto &E : MST.AllEdges) 707 EdgeList.push_back(E.get()); 708 709 for (auto &E : EdgeList) { 710 BasicBlock *InstrBB = getInstrBB(E); 711 if (InstrBB) 712 InstrumentBBs.push_back(InstrBB); 713 } 714 715 // Set up InEdges/OutEdges for all BBs. 716 for (auto &E : MST.AllEdges) { 717 if (E->Removed) 718 continue; 719 const BasicBlock *SrcBB = E->SrcBB; 720 const BasicBlock *DestBB = E->DestBB; 721 BBInfo &SrcInfo = getBBInfo(SrcBB); 722 BBInfo &DestInfo = getBBInfo(DestBB); 723 SrcInfo.addOutEdge(E.get()); 724 DestInfo.addInEdge(E.get()); 725 } 726 } 727 728 // Given a CFG E to be instrumented, find which BB to place the instrumented 729 // code. The function will split the critical edge if necessary. 730 template <class Edge, class BBInfo> 731 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 732 if (E->InMST || E->Removed) 733 return nullptr; 734 735 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 736 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 737 // For a fake edge, instrument the real BB. 738 if (SrcBB == nullptr) 739 return DestBB; 740 if (DestBB == nullptr) 741 return SrcBB; 742 743 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 744 // There are basic blocks (such as catchswitch) cannot be instrumented. 745 // If the returned first insertion point is the end of BB, skip this BB. 746 if (BB->getFirstInsertionPt() == BB->end()) 747 return nullptr; 748 return BB; 749 }; 750 751 // Instrument the SrcBB if it has a single successor, 752 // otherwise, the DestBB if this is not a critical edge. 753 Instruction *TI = SrcBB->getTerminator(); 754 if (TI->getNumSuccessors() <= 1) 755 return canInstrument(SrcBB); 756 if (!E->IsCritical) 757 return canInstrument(DestBB); 758 759 // Some IndirectBr critical edges cannot be split by the previous 760 // SplitIndirectBrCriticalEdges call. Bail out. 761 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 762 BasicBlock *InstrBB = 763 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); 764 if (!InstrBB) { 765 LLVM_DEBUG( 766 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 767 return nullptr; 768 } 769 // For a critical edge, we have to split. Instrument the newly 770 // created BB. 771 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 772 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 773 << " --> " << getBBInfo(DestBB).Index << "\n"); 774 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 775 MST.addEdge(SrcBB, InstrBB, 0); 776 // Second one: Add new edge of InstrBB->DestBB. 777 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 778 NewEdge1.InMST = true; 779 E->Removed = true; 780 781 return canInstrument(InstrBB); 782 } 783 784 // When generating value profiling calls on Windows routines that make use of 785 // handler funclets for exception processing an operand bundle needs to attached 786 // to the called function. This routine will set \p OpBundles to contain the 787 // funclet information, if any is needed, that should be placed on the generated 788 // value profiling call for the value profile candidate call. 789 static void 790 populateEHOperandBundle(VPCandidateInfo &Cand, 791 DenseMap<BasicBlock *, ColorVector> &BlockColors, 792 SmallVectorImpl<OperandBundleDef> &OpBundles) { 793 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 794 if (!OrigCall) 795 return; 796 797 if (!isa<IntrinsicInst>(OrigCall)) { 798 // The instrumentation call should belong to the same funclet as a 799 // non-intrinsic call, so just copy the operand bundle, if any exists. 800 Optional<OperandBundleUse> ParentFunclet = 801 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 802 if (ParentFunclet) 803 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 804 } else { 805 // Intrinsics or other instructions do not get funclet information from the 806 // front-end. Need to use the BlockColors that was computed by the routine 807 // colorEHFunclets to determine whether a funclet is needed. 808 if (!BlockColors.empty()) { 809 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 810 assert(CV.size() == 1 && "non-unique color for block!"); 811 Instruction *EHPad = CV.front()->getFirstNonPHI(); 812 if (EHPad->isEHPad()) 813 OpBundles.emplace_back("funclet", EHPad); 814 } 815 } 816 } 817 818 // Visit all edge and instrument the edges not in MST, and do value profiling. 819 // Critical edges will be split. 820 static void instrumentOneFunc( 821 Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, 822 BlockFrequencyInfo *BFI, 823 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 824 bool IsCS) { 825 // Split indirectbr critical edges here before computing the MST rather than 826 // later in getInstrBB() to avoid invalidating it. 827 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); 828 829 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo( 830 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry); 831 832 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 833 auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy); 834 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()), 835 FuncInfo.FunctionHash); 836 if (PGOFunctionEntryCoverage) { 837 assert(!IsCS && 838 "entry coverge does not support context-sensitive instrumentation"); 839 auto &EntryBB = F.getEntryBlock(); 840 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); 841 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, 842 // i32 <index>) 843 Builder.CreateCall( 844 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover), 845 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)}); 846 return; 847 } 848 849 std::vector<BasicBlock *> InstrumentBBs; 850 FuncInfo.getInstrumentBBs(InstrumentBBs); 851 unsigned NumCounters = 852 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 853 854 uint32_t I = 0; 855 for (auto *InstrBB : InstrumentBBs) { 856 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 857 assert(Builder.GetInsertPoint() != InstrBB->end() && 858 "Cannot get the Instrumentation point"); 859 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>, 860 // i32 <index>) 861 Builder.CreateCall( 862 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 863 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)}); 864 } 865 866 // Now instrument select instructions: 867 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 868 FuncInfo.FunctionHash); 869 assert(I == NumCounters); 870 871 if (DisableValueProfiling) 872 return; 873 874 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 875 876 // Intrinsic function calls do not have funclet operand bundles needed for 877 // Windows exception handling attached to them. However, if value profiling is 878 // inserted for one of these calls, then a funclet value will need to be set 879 // on the instrumentation call based on the funclet coloring. 880 DenseMap<BasicBlock *, ColorVector> BlockColors; 881 if (F.hasPersonalityFn() && 882 isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 883 BlockColors = colorEHFunclets(F); 884 885 // For each VP Kind, walk the VP candidates and instrument each one. 886 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 887 unsigned SiteIndex = 0; 888 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 889 continue; 890 891 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 892 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 893 << " site: CallSite Index = " << SiteIndex << "\n"); 894 895 IRBuilder<> Builder(Cand.InsertPt); 896 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 897 "Cannot get the Instrumentation point"); 898 899 Value *ToProfile = nullptr; 900 if (Cand.V->getType()->isIntegerTy()) 901 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 902 else if (Cand.V->getType()->isPointerTy()) 903 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 904 assert(ToProfile && "value profiling Value is of unexpected type"); 905 906 SmallVector<OperandBundleDef, 1> OpBundles; 907 populateEHOperandBundle(Cand, BlockColors, OpBundles); 908 Builder.CreateCall( 909 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 910 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 911 Builder.getInt64(FuncInfo.FunctionHash), ToProfile, 912 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 913 OpBundles); 914 } 915 } // IPVK_First <= Kind <= IPVK_Last 916 } 917 918 namespace { 919 920 // This class represents a CFG edge in profile use compilation. 921 struct PGOUseEdge : public PGOEdge { 922 bool CountValid = false; 923 uint64_t CountValue = 0; 924 925 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 926 : PGOEdge(Src, Dest, W) {} 927 928 // Set edge count value 929 void setEdgeCount(uint64_t Value) { 930 CountValue = Value; 931 CountValid = true; 932 } 933 934 // Return the information string for this object. 935 std::string infoString() const { 936 if (!CountValid) 937 return PGOEdge::infoString(); 938 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 939 .str(); 940 } 941 }; 942 943 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 944 945 // This class stores the auxiliary information for each BB. 946 struct UseBBInfo : public BBInfo { 947 uint64_t CountValue = 0; 948 bool CountValid; 949 int32_t UnknownCountInEdge = 0; 950 int32_t UnknownCountOutEdge = 0; 951 DirectEdges InEdges; 952 DirectEdges OutEdges; 953 954 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 955 956 UseBBInfo(unsigned IX, uint64_t C) 957 : BBInfo(IX), CountValue(C), CountValid(true) {} 958 959 // Set the profile count value for this BB. 960 void setBBInfoCount(uint64_t Value) { 961 CountValue = Value; 962 CountValid = true; 963 } 964 965 // Return the information string of this object. 966 std::string infoString() const { 967 if (!CountValid) 968 return BBInfo::infoString(); 969 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 970 } 971 972 // Add an OutEdge and update the edge count. 973 void addOutEdge(PGOUseEdge *E) { 974 OutEdges.push_back(E); 975 UnknownCountOutEdge++; 976 } 977 978 // Add an InEdge and update the edge count. 979 void addInEdge(PGOUseEdge *E) { 980 InEdges.push_back(E); 981 UnknownCountInEdge++; 982 } 983 }; 984 985 } // end anonymous namespace 986 987 // Sum up the count values for all the edges. 988 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 989 uint64_t Total = 0; 990 for (auto &E : Edges) { 991 if (E->Removed) 992 continue; 993 Total += E->CountValue; 994 } 995 return Total; 996 } 997 998 namespace { 999 1000 class PGOUseFunc { 1001 public: 1002 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 1003 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1004 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1005 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry) 1006 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1007 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, 1008 InstrumentFuncEntry), 1009 FreqAttr(FFA_Normal), IsCS(IsCS) {} 1010 1011 // Read counts for the instrumented BB from profile. 1012 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1013 bool &AllMinusOnes); 1014 1015 // Populate the counts for all BBs. 1016 void populateCounters(); 1017 1018 // Set the branch weights based on the count values. 1019 void setBranchWeights(); 1020 1021 // Annotate the value profile call sites for all value kind. 1022 void annotateValueSites(); 1023 1024 // Annotate the value profile call sites for one value kind. 1025 void annotateValueSites(uint32_t Kind); 1026 1027 // Annotate the irreducible loop header weights. 1028 void annotateIrrLoopHeaderWeights(); 1029 1030 // The hotness of the function from the profile count. 1031 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1032 1033 // Return the function hotness from the profile. 1034 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1035 1036 // Return the function hash. 1037 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1038 1039 // Return the profile record for this function; 1040 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1041 1042 // Return the auxiliary BB information. 1043 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 1044 return FuncInfo.getBBInfo(BB); 1045 } 1046 1047 // Return the auxiliary BB information if available. 1048 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 1049 return FuncInfo.findBBInfo(BB); 1050 } 1051 1052 Function &getFunc() const { return F; } 1053 1054 void dumpInfo(std::string Str = "") const { 1055 FuncInfo.dumpInfo(Str); 1056 } 1057 1058 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1059 private: 1060 Function &F; 1061 Module *M; 1062 BlockFrequencyInfo *BFI; 1063 ProfileSummaryInfo *PSI; 1064 1065 // This member stores the shared information with class PGOGenFunc. 1066 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 1067 1068 // The maximum count value in the profile. This is only used in PGO use 1069 // compilation. 1070 uint64_t ProgramMaxCount; 1071 1072 // Position of counter that remains to be read. 1073 uint32_t CountPosition = 0; 1074 1075 // Total size of the profile count for this function. 1076 uint32_t ProfileCountSize = 0; 1077 1078 // ProfileRecord for this function. 1079 InstrProfRecord ProfileRecord; 1080 1081 // Function hotness info derived from profile. 1082 FuncFreqAttr FreqAttr; 1083 1084 // Is to use the context sensitive profile. 1085 bool IsCS; 1086 1087 // Find the Instrumented BB and set the value. Return false on error. 1088 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1089 1090 // Set the edge counter value for the unknown edge -- there should be only 1091 // one unknown edge. 1092 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1093 1094 // Return FuncName string; 1095 std::string getFuncName() const { return FuncInfo.FuncName; } 1096 1097 // Set the hot/cold inline hints based on the count values. 1098 // FIXME: This function should be removed once the functionality in 1099 // the inliner is implemented. 1100 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1101 if (PSI->isHotCount(EntryCount)) 1102 FreqAttr = FFA_Hot; 1103 else if (PSI->isColdCount(MaxCount)) 1104 FreqAttr = FFA_Cold; 1105 } 1106 }; 1107 1108 } // end anonymous namespace 1109 1110 // Visit all the edges and assign the count value for the instrumented 1111 // edges and the BB. Return false on error. 1112 bool PGOUseFunc::setInstrumentedCounts( 1113 const std::vector<uint64_t> &CountFromProfile) { 1114 1115 std::vector<BasicBlock *> InstrumentBBs; 1116 FuncInfo.getInstrumentBBs(InstrumentBBs); 1117 unsigned NumCounters = 1118 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1119 // The number of counters here should match the number of counters 1120 // in profile. Return if they mismatch. 1121 if (NumCounters != CountFromProfile.size()) { 1122 return false; 1123 } 1124 auto *FuncEntry = &*F.begin(); 1125 1126 // Set the profile count to the Instrumented BBs. 1127 uint32_t I = 0; 1128 for (BasicBlock *InstrBB : InstrumentBBs) { 1129 uint64_t CountValue = CountFromProfile[I++]; 1130 UseBBInfo &Info = getBBInfo(InstrBB); 1131 // If we reach here, we know that we have some nonzero count 1132 // values in this function. The entry count should not be 0. 1133 // Fix it if necessary. 1134 if (InstrBB == FuncEntry && CountValue == 0) 1135 CountValue = 1; 1136 Info.setBBInfoCount(CountValue); 1137 } 1138 ProfileCountSize = CountFromProfile.size(); 1139 CountPosition = I; 1140 1141 // Set the edge count and update the count of unknown edges for BBs. 1142 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1143 E->setEdgeCount(Value); 1144 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1145 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1146 }; 1147 1148 // Set the profile count the Instrumented edges. There are BBs that not in 1149 // MST but not instrumented. Need to set the edge count value so that we can 1150 // populate the profile counts later. 1151 for (auto &E : FuncInfo.MST.AllEdges) { 1152 if (E->Removed || E->InMST) 1153 continue; 1154 const BasicBlock *SrcBB = E->SrcBB; 1155 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 1156 1157 // If only one out-edge, the edge profile count should be the same as BB 1158 // profile count. 1159 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1160 setEdgeCount(E.get(), SrcInfo.CountValue); 1161 else { 1162 const BasicBlock *DestBB = E->DestBB; 1163 UseBBInfo &DestInfo = getBBInfo(DestBB); 1164 // If only one in-edge, the edge profile count should be the same as BB 1165 // profile count. 1166 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1167 setEdgeCount(E.get(), DestInfo.CountValue); 1168 } 1169 if (E->CountValid) 1170 continue; 1171 // E's count should have been set from profile. If not, this meenas E skips 1172 // the instrumentation. We set the count to 0. 1173 setEdgeCount(E.get(), 0); 1174 } 1175 return true; 1176 } 1177 1178 // Set the count value for the unknown edge. There should be one and only one 1179 // unknown edge in Edges vector. 1180 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1181 for (auto &E : Edges) { 1182 if (E->CountValid) 1183 continue; 1184 E->setEdgeCount(Value); 1185 1186 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1187 getBBInfo(E->DestBB).UnknownCountInEdge--; 1188 return; 1189 } 1190 llvm_unreachable("Cannot find the unknown count edge"); 1191 } 1192 1193 // Emit function metadata indicating PGO profile mismatch. 1194 static void annotateFunctionWithHashMismatch(Function &F, 1195 LLVMContext &ctx) { 1196 const char MetadataName[] = "instr_prof_hash_mismatch"; 1197 SmallVector<Metadata *, 2> Names; 1198 // If this metadata already exists, ignore. 1199 auto *Existing = F.getMetadata(LLVMContext::MD_annotation); 1200 if (Existing) { 1201 MDTuple *Tuple = cast<MDTuple>(Existing); 1202 for (auto &N : Tuple->operands()) { 1203 if (cast<MDString>(N.get())->getString() == MetadataName) 1204 return; 1205 Names.push_back(N.get()); 1206 } 1207 } 1208 1209 MDBuilder MDB(ctx); 1210 Names.push_back(MDB.createString(MetadataName)); 1211 MDNode *MD = MDTuple::get(ctx, Names); 1212 F.setMetadata(LLVMContext::MD_annotation, MD); 1213 } 1214 1215 // Read the profile from ProfileFileName and assign the value to the 1216 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1217 // Return true if the profile are successfully read, and false on errors. 1218 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1219 bool &AllMinusOnes) { 1220 auto &Ctx = M->getContext(); 1221 Expected<InstrProfRecord> Result = 1222 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 1223 if (Error E = Result.takeError()) { 1224 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1225 auto Err = IPE.get(); 1226 bool SkipWarning = false; 1227 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1228 << FuncInfo.FuncName << ": "); 1229 if (Err == instrprof_error::unknown_function) { 1230 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1231 SkipWarning = !PGOWarnMissing; 1232 LLVM_DEBUG(dbgs() << "unknown function"); 1233 } else if (Err == instrprof_error::hash_mismatch || 1234 Err == instrprof_error::malformed) { 1235 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1236 SkipWarning = 1237 NoPGOWarnMismatch || 1238 (NoPGOWarnMismatchComdat && 1239 (F.hasComdat() || 1240 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1241 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 1242 // Emit function metadata indicating PGO profile mismatch. 1243 annotateFunctionWithHashMismatch(F, M->getContext()); 1244 } 1245 1246 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1247 if (SkipWarning) 1248 return; 1249 1250 std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + 1251 std::string(" Hash = ") + 1252 std::to_string(FuncInfo.FunctionHash); 1253 1254 Ctx.diagnose( 1255 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1256 }); 1257 return false; 1258 } 1259 ProfileRecord = std::move(Result.get()); 1260 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1261 1262 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1263 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1264 AllMinusOnes = (CountFromProfile.size() > 0); 1265 uint64_t ValueSum = 0; 1266 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1267 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1268 ValueSum += CountFromProfile[I]; 1269 if (CountFromProfile[I] != (uint64_t)-1) 1270 AllMinusOnes = false; 1271 } 1272 AllZeros = (ValueSum == 0); 1273 1274 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1275 1276 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1277 getBBInfo(nullptr).UnknownCountInEdge = 2; 1278 1279 if (!setInstrumentedCounts(CountFromProfile)) { 1280 LLVM_DEBUG( 1281 dbgs() << "Inconsistent number of counts, skipping this function"); 1282 Ctx.diagnose(DiagnosticInfoPGOProfile( 1283 M->getName().data(), 1284 Twine("Inconsistent number of counts in ") + F.getName().str() 1285 + Twine(": the profile may be stale or there is a function name collision."), 1286 DS_Warning)); 1287 return false; 1288 } 1289 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1290 return true; 1291 } 1292 1293 // Populate the counters from instrumented BBs to all BBs. 1294 // In the end of this operation, all BBs should have a valid count value. 1295 void PGOUseFunc::populateCounters() { 1296 bool Changes = true; 1297 unsigned NumPasses = 0; 1298 while (Changes) { 1299 NumPasses++; 1300 Changes = false; 1301 1302 // For efficient traversal, it's better to start from the end as most 1303 // of the instrumented edges are at the end. 1304 for (auto &BB : reverse(F)) { 1305 UseBBInfo *Count = findBBInfo(&BB); 1306 if (Count == nullptr) 1307 continue; 1308 if (!Count->CountValid) { 1309 if (Count->UnknownCountOutEdge == 0) { 1310 Count->CountValue = sumEdgeCount(Count->OutEdges); 1311 Count->CountValid = true; 1312 Changes = true; 1313 } else if (Count->UnknownCountInEdge == 0) { 1314 Count->CountValue = sumEdgeCount(Count->InEdges); 1315 Count->CountValid = true; 1316 Changes = true; 1317 } 1318 } 1319 if (Count->CountValid) { 1320 if (Count->UnknownCountOutEdge == 1) { 1321 uint64_t Total = 0; 1322 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1323 // If the one of the successor block can early terminate (no-return), 1324 // we can end up with situation where out edge sum count is larger as 1325 // the source BB's count is collected by a post-dominated block. 1326 if (Count->CountValue > OutSum) 1327 Total = Count->CountValue - OutSum; 1328 setEdgeCount(Count->OutEdges, Total); 1329 Changes = true; 1330 } 1331 if (Count->UnknownCountInEdge == 1) { 1332 uint64_t Total = 0; 1333 uint64_t InSum = sumEdgeCount(Count->InEdges); 1334 if (Count->CountValue > InSum) 1335 Total = Count->CountValue - InSum; 1336 setEdgeCount(Count->InEdges, Total); 1337 Changes = true; 1338 } 1339 } 1340 } 1341 } 1342 1343 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1344 (void) NumPasses; 1345 #ifndef NDEBUG 1346 // Assert every BB has a valid counter. 1347 for (auto &BB : F) { 1348 auto BI = findBBInfo(&BB); 1349 if (BI == nullptr) 1350 continue; 1351 assert(BI->CountValid && "BB count is not valid"); 1352 } 1353 #endif 1354 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1355 uint64_t FuncMaxCount = FuncEntryCount; 1356 for (auto &BB : F) { 1357 auto BI = findBBInfo(&BB); 1358 if (BI == nullptr) 1359 continue; 1360 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1361 } 1362 1363 // Fix the obviously inconsistent entry count. 1364 if (FuncMaxCount > 0 && FuncEntryCount == 0) 1365 FuncEntryCount = 1; 1366 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1367 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1368 1369 // Now annotate select instructions 1370 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1371 assert(CountPosition == ProfileCountSize); 1372 1373 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1374 } 1375 1376 // Assign the scaled count values to the BB with multiple out edges. 1377 void PGOUseFunc::setBranchWeights() { 1378 // Generate MD_prof metadata for every branch instruction. 1379 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1380 << " IsCS=" << IsCS << "\n"); 1381 for (auto &BB : F) { 1382 Instruction *TI = BB.getTerminator(); 1383 if (TI->getNumSuccessors() < 2) 1384 continue; 1385 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1386 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI))) 1387 continue; 1388 1389 if (getBBInfo(&BB).CountValue == 0) 1390 continue; 1391 1392 // We have a non-zero Branch BB. 1393 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1394 unsigned Size = BBCountInfo.OutEdges.size(); 1395 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1396 uint64_t MaxCount = 0; 1397 for (unsigned s = 0; s < Size; s++) { 1398 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1399 const BasicBlock *SrcBB = E->SrcBB; 1400 const BasicBlock *DestBB = E->DestBB; 1401 if (DestBB == nullptr) 1402 continue; 1403 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1404 uint64_t EdgeCount = E->CountValue; 1405 if (EdgeCount > MaxCount) 1406 MaxCount = EdgeCount; 1407 EdgeCounts[SuccNum] = EdgeCount; 1408 } 1409 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1410 } 1411 } 1412 1413 static bool isIndirectBrTarget(BasicBlock *BB) { 1414 for (BasicBlock *Pred : predecessors(BB)) { 1415 if (isa<IndirectBrInst>(Pred->getTerminator())) 1416 return true; 1417 } 1418 return false; 1419 } 1420 1421 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1422 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1423 // Find irr loop headers 1424 for (auto &BB : F) { 1425 // As a heuristic also annotate indrectbr targets as they have a high chance 1426 // to become an irreducible loop header after the indirectbr tail 1427 // duplication. 1428 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1429 Instruction *TI = BB.getTerminator(); 1430 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1431 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1432 } 1433 } 1434 } 1435 1436 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1437 if (PGOFunctionEntryCoverage) 1438 return; 1439 Module *M = F.getParent(); 1440 IRBuilder<> Builder(&SI); 1441 Type *Int64Ty = Builder.getInt64Ty(); 1442 Type *I8PtrTy = Builder.getInt8PtrTy(); 1443 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1444 Builder.CreateCall( 1445 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1446 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1447 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1448 Builder.getInt32(*CurCtrIdx), Step}); 1449 ++(*CurCtrIdx); 1450 } 1451 1452 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1453 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1454 assert(*CurCtrIdx < CountFromProfile.size() && 1455 "Out of bound access of counters"); 1456 uint64_t SCounts[2]; 1457 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1458 ++(*CurCtrIdx); 1459 uint64_t TotalCount = 0; 1460 auto BI = UseFunc->findBBInfo(SI.getParent()); 1461 if (BI != nullptr) 1462 TotalCount = BI->CountValue; 1463 // False Count 1464 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1465 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1466 if (MaxCount) 1467 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1468 } 1469 1470 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1471 if (!PGOInstrSelect) 1472 return; 1473 // FIXME: do not handle this yet. 1474 if (SI.getCondition()->getType()->isVectorTy()) 1475 return; 1476 1477 switch (Mode) { 1478 case VM_counting: 1479 NSIs++; 1480 return; 1481 case VM_instrument: 1482 instrumentOneSelectInst(SI); 1483 return; 1484 case VM_annotate: 1485 annotateOneSelectInst(SI); 1486 return; 1487 } 1488 1489 llvm_unreachable("Unknown visiting mode"); 1490 } 1491 1492 // Traverse all valuesites and annotate the instructions for all value kind. 1493 void PGOUseFunc::annotateValueSites() { 1494 if (DisableValueProfiling) 1495 return; 1496 1497 // Create the PGOFuncName meta data. 1498 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1499 1500 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1501 annotateValueSites(Kind); 1502 } 1503 1504 // Annotate the instructions for a specific value kind. 1505 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1506 assert(Kind <= IPVK_Last); 1507 unsigned ValueSiteIndex = 0; 1508 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1509 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1510 if (NumValueSites != ValueSites.size()) { 1511 auto &Ctx = M->getContext(); 1512 Ctx.diagnose(DiagnosticInfoPGOProfile( 1513 M->getName().data(), 1514 Twine("Inconsistent number of value sites for ") + 1515 Twine(ValueProfKindDescr[Kind]) + 1516 Twine(" profiling in \"") + F.getName().str() + 1517 Twine("\", possibly due to the use of a stale profile."), 1518 DS_Warning)); 1519 return; 1520 } 1521 1522 for (VPCandidateInfo &I : ValueSites) { 1523 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1524 << "): Index = " << ValueSiteIndex << " out of " 1525 << NumValueSites << "\n"); 1526 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1527 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1528 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1529 : MaxNumAnnotations); 1530 ValueSiteIndex++; 1531 } 1532 } 1533 1534 // Collect the set of members for each Comdat in module M and store 1535 // in ComdatMembers. 1536 static void collectComdatMembers( 1537 Module &M, 1538 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1539 if (!DoComdatRenaming) 1540 return; 1541 for (Function &F : M) 1542 if (Comdat *C = F.getComdat()) 1543 ComdatMembers.insert(std::make_pair(C, &F)); 1544 for (GlobalVariable &GV : M.globals()) 1545 if (Comdat *C = GV.getComdat()) 1546 ComdatMembers.insert(std::make_pair(C, &GV)); 1547 for (GlobalAlias &GA : M.aliases()) 1548 if (Comdat *C = GA.getComdat()) 1549 ComdatMembers.insert(std::make_pair(C, &GA)); 1550 } 1551 1552 static bool InstrumentAllFunctions( 1553 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1554 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1555 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1556 // For the context-sensitve instrumentation, we should have a separated pass 1557 // (before LTO/ThinLTO linking) to create these variables. 1558 if (!IsCS) 1559 createIRLevelProfileFlagVar(M, /*IsCS=*/false); 1560 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1561 collectComdatMembers(M, ComdatMembers); 1562 1563 for (auto &F : M) { 1564 if (F.isDeclaration()) 1565 continue; 1566 if (F.hasFnAttribute(llvm::Attribute::NoProfile)) 1567 continue; 1568 auto &TLI = LookupTLI(F); 1569 auto *BPI = LookupBPI(F); 1570 auto *BFI = LookupBFI(F); 1571 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); 1572 } 1573 return true; 1574 } 1575 1576 PreservedAnalyses 1577 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { 1578 createProfileFileNameVar(M, CSInstrName); 1579 // The variable in a comdat may be discarded by LTO. Ensure the declaration 1580 // will be retained. 1581 appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true)); 1582 return PreservedAnalyses::all(); 1583 } 1584 1585 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1586 ModuleAnalysisManager &AM) { 1587 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1588 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1589 return FAM.getResult<TargetLibraryAnalysis>(F); 1590 }; 1591 auto LookupBPI = [&FAM](Function &F) { 1592 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1593 }; 1594 auto LookupBFI = [&FAM](Function &F) { 1595 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1596 }; 1597 1598 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) 1599 return PreservedAnalyses::all(); 1600 1601 return PreservedAnalyses::none(); 1602 } 1603 1604 // Using the ratio b/w sums of profile count values and BFI count values to 1605 // adjust the func entry count. 1606 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, 1607 BranchProbabilityInfo &NBPI) { 1608 Function &F = Func.getFunc(); 1609 BlockFrequencyInfo NBFI(F, NBPI, LI); 1610 #ifndef NDEBUG 1611 auto BFIEntryCount = F.getEntryCount(); 1612 assert(BFIEntryCount.hasValue() && (BFIEntryCount->getCount() > 0) && 1613 "Invalid BFI Entrycount"); 1614 #endif 1615 auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); 1616 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); 1617 for (auto &BBI : F) { 1618 uint64_t CountValue = 0; 1619 uint64_t BFICountValue = 0; 1620 if (!Func.findBBInfo(&BBI)) 1621 continue; 1622 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1623 CountValue = Func.getBBInfo(&BBI).CountValue; 1624 BFICountValue = BFICount.getValue(); 1625 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); 1626 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); 1627 } 1628 if (SumCount.isZero()) 1629 return; 1630 1631 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && 1632 "Incorrect sum of BFI counts"); 1633 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual) 1634 return; 1635 double Scale = (SumCount / SumBFICount).convertToDouble(); 1636 if (Scale < 1.001 && Scale > 0.999) 1637 return; 1638 1639 uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue; 1640 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; 1641 if (NewEntryCount == 0) 1642 NewEntryCount = 1; 1643 if (NewEntryCount != FuncEntryCount) { 1644 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); 1645 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() 1646 << ", entry_count " << FuncEntryCount << " --> " 1647 << NewEntryCount << "\n"); 1648 } 1649 } 1650 1651 // Compare the profile count values with BFI count values, and print out 1652 // the non-matching ones. 1653 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, 1654 BranchProbabilityInfo &NBPI, 1655 uint64_t HotCountThreshold, 1656 uint64_t ColdCountThreshold) { 1657 Function &F = Func.getFunc(); 1658 BlockFrequencyInfo NBFI(F, NBPI, LI); 1659 // bool PrintFunc = false; 1660 bool HotBBOnly = PGOVerifyHotBFI; 1661 std::string Msg; 1662 OptimizationRemarkEmitter ORE(&F); 1663 1664 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; 1665 for (auto &BBI : F) { 1666 uint64_t CountValue = 0; 1667 uint64_t BFICountValue = 0; 1668 1669 if (Func.getBBInfo(&BBI).CountValid) 1670 CountValue = Func.getBBInfo(&BBI).CountValue; 1671 1672 BBNum++; 1673 if (CountValue) 1674 NonZeroBBNum++; 1675 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1676 if (BFICount) 1677 BFICountValue = BFICount.getValue(); 1678 1679 if (HotBBOnly) { 1680 bool rawIsHot = CountValue >= HotCountThreshold; 1681 bool BFIIsHot = BFICountValue >= HotCountThreshold; 1682 bool rawIsCold = CountValue <= ColdCountThreshold; 1683 bool ShowCount = false; 1684 if (rawIsHot && !BFIIsHot) { 1685 Msg = "raw-Hot to BFI-nonHot"; 1686 ShowCount = true; 1687 } else if (rawIsCold && BFIIsHot) { 1688 Msg = "raw-Cold to BFI-Hot"; 1689 ShowCount = true; 1690 } 1691 if (!ShowCount) 1692 continue; 1693 } else { 1694 if ((CountValue < PGOVerifyBFICutoff) && 1695 (BFICountValue < PGOVerifyBFICutoff)) 1696 continue; 1697 uint64_t Diff = (BFICountValue >= CountValue) 1698 ? BFICountValue - CountValue 1699 : CountValue - BFICountValue; 1700 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio) 1701 continue; 1702 } 1703 BBMisMatchNum++; 1704 1705 ORE.emit([&]() { 1706 OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify", 1707 F.getSubprogram(), &BBI); 1708 Remark << "BB " << ore::NV("Block", BBI.getName()) 1709 << " Count=" << ore::NV("Count", CountValue) 1710 << " BFI_Count=" << ore::NV("Count", BFICountValue); 1711 if (!Msg.empty()) 1712 Remark << " (" << Msg << ")"; 1713 return Remark; 1714 }); 1715 } 1716 if (BBMisMatchNum) 1717 ORE.emit([&]() { 1718 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify", 1719 F.getSubprogram(), &F.getEntryBlock()) 1720 << "In Func " << ore::NV("Function", F.getName()) 1721 << ": Num_of_BB=" << ore::NV("Count", BBNum) 1722 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum) 1723 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum); 1724 }); 1725 } 1726 1727 static bool annotateAllFunctions( 1728 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1729 function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1730 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1731 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1732 ProfileSummaryInfo *PSI, bool IsCS) { 1733 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1734 auto &Ctx = M.getContext(); 1735 // Read the counter array from file. 1736 auto ReaderOrErr = 1737 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 1738 if (Error E = ReaderOrErr.takeError()) { 1739 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1740 Ctx.diagnose( 1741 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1742 }); 1743 return false; 1744 } 1745 1746 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1747 std::move(ReaderOrErr.get()); 1748 if (!PGOReader) { 1749 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1750 StringRef("Cannot get PGOReader"))); 1751 return false; 1752 } 1753 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 1754 return false; 1755 1756 // TODO: might need to change the warning once the clang option is finalized. 1757 if (!PGOReader->isIRLevelProfile()) { 1758 Ctx.diagnose(DiagnosticInfoPGOProfile( 1759 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1760 return false; 1761 } 1762 if (PGOReader->hasSingleByteCoverage()) { 1763 Ctx.diagnose(DiagnosticInfoPGOProfile( 1764 ProfileFileName.data(), 1765 "Cannot use coverage profiles for optimization")); 1766 return false; 1767 } 1768 if (PGOReader->functionEntryOnly()) { 1769 Ctx.diagnose(DiagnosticInfoPGOProfile( 1770 ProfileFileName.data(), 1771 "Function entry profiles are not yet supported for optimization")); 1772 return false; 1773 } 1774 1775 // Add the profile summary (read from the header of the indexed summary) here 1776 // so that we can use it below when reading counters (which checks if the 1777 // function should be marked with a cold or inlinehint attribute). 1778 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 1779 IsCS ? ProfileSummary::PSK_CSInstr 1780 : ProfileSummary::PSK_Instr); 1781 PSI->refresh(); 1782 1783 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1784 collectComdatMembers(M, ComdatMembers); 1785 std::vector<Function *> HotFunctions; 1786 std::vector<Function *> ColdFunctions; 1787 1788 // If the profile marked as always instrument the entry BB, do the 1789 // same. Note this can be overwritten by the internal option in CFGMST.h 1790 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); 1791 if (PGOInstrumentEntry.getNumOccurrences() > 0) 1792 InstrumentFuncEntry = PGOInstrumentEntry; 1793 for (auto &F : M) { 1794 if (F.isDeclaration()) 1795 continue; 1796 auto &TLI = LookupTLI(F); 1797 auto *BPI = LookupBPI(F); 1798 auto *BFI = LookupBFI(F); 1799 // Split indirectbr critical edges here before computing the MST rather than 1800 // later in getInstrBB() to avoid invalidating it. 1801 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); 1802 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, 1803 InstrumentFuncEntry); 1804 // When AllMinusOnes is true, it means the profile for the function 1805 // is unrepresentative and this function is actually hot. Set the 1806 // entry count of the function to be multiple times of hot threshold 1807 // and drop all its internal counters. 1808 bool AllMinusOnes = false; 1809 bool AllZeros = false; 1810 if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes)) 1811 continue; 1812 if (AllZeros) { 1813 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 1814 if (Func.getProgramMaxCount() != 0) 1815 ColdFunctions.push_back(&F); 1816 continue; 1817 } 1818 const unsigned MultiplyFactor = 3; 1819 if (AllMinusOnes) { 1820 uint64_t HotThreshold = PSI->getHotCountThreshold(); 1821 if (HotThreshold) 1822 F.setEntryCount( 1823 ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real)); 1824 HotFunctions.push_back(&F); 1825 continue; 1826 } 1827 Func.populateCounters(); 1828 Func.setBranchWeights(); 1829 Func.annotateValueSites(); 1830 Func.annotateIrrLoopHeaderWeights(); 1831 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1832 if (FreqAttr == PGOUseFunc::FFA_Cold) 1833 ColdFunctions.push_back(&F); 1834 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1835 HotFunctions.push_back(&F); 1836 if (PGOViewCounts != PGOVCT_None && 1837 (ViewBlockFreqFuncName.empty() || 1838 F.getName().equals(ViewBlockFreqFuncName))) { 1839 LoopInfo LI{DominatorTree(F)}; 1840 std::unique_ptr<BranchProbabilityInfo> NewBPI = 1841 std::make_unique<BranchProbabilityInfo>(F, LI); 1842 std::unique_ptr<BlockFrequencyInfo> NewBFI = 1843 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 1844 if (PGOViewCounts == PGOVCT_Graph) 1845 NewBFI->view(); 1846 else if (PGOViewCounts == PGOVCT_Text) { 1847 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 1848 NewBFI->print(dbgs()); 1849 } 1850 } 1851 if (PGOViewRawCounts != PGOVCT_None && 1852 (ViewBlockFreqFuncName.empty() || 1853 F.getName().equals(ViewBlockFreqFuncName))) { 1854 if (PGOViewRawCounts == PGOVCT_Graph) 1855 if (ViewBlockFreqFuncName.empty()) 1856 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1857 else 1858 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1859 else if (PGOViewRawCounts == PGOVCT_Text) { 1860 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 1861 Func.dumpInfo(); 1862 } 1863 } 1864 1865 if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { 1866 LoopInfo LI{DominatorTree(F)}; 1867 BranchProbabilityInfo NBPI(F, LI); 1868 1869 // Fix func entry count. 1870 if (PGOFixEntryCount) 1871 fixFuncEntryCount(Func, LI, NBPI); 1872 1873 // Verify BlockFrequency information. 1874 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; 1875 if (PGOVerifyHotBFI) { 1876 HotCountThreshold = PSI->getOrCompHotCountThreshold(); 1877 ColdCountThreshold = PSI->getOrCompColdCountThreshold(); 1878 } 1879 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); 1880 } 1881 } 1882 1883 // Set function hotness attribute from the profile. 1884 // We have to apply these attributes at the end because their presence 1885 // can affect the BranchProbabilityInfo of any callers, resulting in an 1886 // inconsistent MST between prof-gen and prof-use. 1887 for (auto &F : HotFunctions) { 1888 F->addFnAttr(Attribute::InlineHint); 1889 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 1890 << "\n"); 1891 } 1892 for (auto &F : ColdFunctions) { 1893 // Only set when there is no Attribute::Hot set by the user. For Hot 1894 // attribute, user's annotation has the precedence over the profile. 1895 if (F->hasFnAttribute(Attribute::Hot)) { 1896 auto &Ctx = M.getContext(); 1897 std::string Msg = std::string("Function ") + F->getName().str() + 1898 std::string(" is annotated as a hot function but" 1899 " the profile is cold"); 1900 Ctx.diagnose( 1901 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 1902 continue; 1903 } 1904 F->addFnAttr(Attribute::Cold); 1905 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 1906 << "\n"); 1907 } 1908 return true; 1909 } 1910 1911 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 1912 std::string RemappingFilename, 1913 bool IsCS) 1914 : ProfileFileName(std::move(Filename)), 1915 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { 1916 if (!PGOTestProfileFile.empty()) 1917 ProfileFileName = PGOTestProfileFile; 1918 if (!PGOTestProfileRemappingFile.empty()) 1919 ProfileRemappingFileName = PGOTestProfileRemappingFile; 1920 } 1921 1922 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 1923 ModuleAnalysisManager &AM) { 1924 1925 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1926 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1927 return FAM.getResult<TargetLibraryAnalysis>(F); 1928 }; 1929 auto LookupBPI = [&FAM](Function &F) { 1930 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1931 }; 1932 auto LookupBFI = [&FAM](Function &F) { 1933 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1934 }; 1935 1936 auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 1937 1938 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 1939 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) 1940 return PreservedAnalyses::all(); 1941 1942 return PreservedAnalyses::none(); 1943 } 1944 1945 static std::string getSimpleNodeName(const BasicBlock *Node) { 1946 if (!Node->getName().empty()) 1947 return std::string(Node->getName()); 1948 1949 std::string SimpleNodeName; 1950 raw_string_ostream OS(SimpleNodeName); 1951 Node->printAsOperand(OS, false); 1952 return OS.str(); 1953 } 1954 1955 void llvm::setProfMetadata(Module *M, Instruction *TI, 1956 ArrayRef<uint64_t> EdgeCounts, 1957 uint64_t MaxCount) { 1958 MDBuilder MDB(M->getContext()); 1959 assert(MaxCount > 0 && "Bad max count"); 1960 uint64_t Scale = calculateCountScale(MaxCount); 1961 SmallVector<unsigned, 4> Weights; 1962 for (const auto &ECI : EdgeCounts) 1963 Weights.push_back(scaleBranchCount(ECI, Scale)); 1964 1965 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 1966 : Weights) { 1967 dbgs() << W << " "; 1968 } dbgs() << "\n";); 1969 1970 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false); 1971 1972 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1973 if (EmitBranchProbability) { 1974 std::string BrCondStr = getBranchCondString(TI); 1975 if (BrCondStr.empty()) 1976 return; 1977 1978 uint64_t WSum = 1979 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 1980 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 1981 uint64_t TotalCount = 1982 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 1983 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 1984 Scale = calculateCountScale(WSum); 1985 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 1986 scaleBranchCount(WSum, Scale)); 1987 std::string BranchProbStr; 1988 raw_string_ostream OS(BranchProbStr); 1989 OS << BP; 1990 OS << " (total count : " << TotalCount << ")"; 1991 OS.flush(); 1992 Function *F = TI->getParent()->getParent(); 1993 OptimizationRemarkEmitter ORE(F); 1994 ORE.emit([&]() { 1995 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 1996 << BrCondStr << " is true with probability : " << BranchProbStr; 1997 }); 1998 } 1999 } 2000 2001 namespace llvm { 2002 2003 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 2004 MDBuilder MDB(M->getContext()); 2005 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 2006 MDB.createIrrLoopHeaderWeight(Count)); 2007 } 2008 2009 template <> struct GraphTraits<PGOUseFunc *> { 2010 using NodeRef = const BasicBlock *; 2011 using ChildIteratorType = const_succ_iterator; 2012 using nodes_iterator = pointer_iterator<Function::const_iterator>; 2013 2014 static NodeRef getEntryNode(const PGOUseFunc *G) { 2015 return &G->getFunc().front(); 2016 } 2017 2018 static ChildIteratorType child_begin(const NodeRef N) { 2019 return succ_begin(N); 2020 } 2021 2022 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 2023 2024 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 2025 return nodes_iterator(G->getFunc().begin()); 2026 } 2027 2028 static nodes_iterator nodes_end(const PGOUseFunc *G) { 2029 return nodes_iterator(G->getFunc().end()); 2030 } 2031 }; 2032 2033 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 2034 explicit DOTGraphTraits(bool isSimple = false) 2035 : DefaultDOTGraphTraits(isSimple) {} 2036 2037 static std::string getGraphName(const PGOUseFunc *G) { 2038 return std::string(G->getFunc().getName()); 2039 } 2040 2041 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 2042 std::string Result; 2043 raw_string_ostream OS(Result); 2044 2045 OS << getSimpleNodeName(Node) << ":\\l"; 2046 UseBBInfo *BI = Graph->findBBInfo(Node); 2047 OS << "Count : "; 2048 if (BI && BI->CountValid) 2049 OS << BI->CountValue << "\\l"; 2050 else 2051 OS << "Unknown\\l"; 2052 2053 if (!PGOInstrSelect) 2054 return Result; 2055 2056 for (const Instruction &I : *Node) { 2057 if (!isa<SelectInst>(&I)) 2058 continue; 2059 // Display scaled counts for SELECT instruction: 2060 OS << "SELECT : { T = "; 2061 uint64_t TC, FC; 2062 bool HasProf = I.extractProfMetadata(TC, FC); 2063 if (!HasProf) 2064 OS << "Unknown, F = Unknown }\\l"; 2065 else 2066 OS << TC << ", F = " << FC << " }\\l"; 2067 } 2068 return Result; 2069 } 2070 }; 2071 2072 } // end namespace llvm 2073