1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "CFGMST.h" 52 #include "ValueProfileCollector.h" 53 #include "llvm/ADT/APInt.h" 54 #include "llvm/ADT/ArrayRef.h" 55 #include "llvm/ADT/STLExtras.h" 56 #include "llvm/ADT/SmallVector.h" 57 #include "llvm/ADT/Statistic.h" 58 #include "llvm/ADT/StringRef.h" 59 #include "llvm/ADT/Triple.h" 60 #include "llvm/ADT/Twine.h" 61 #include "llvm/ADT/iterator.h" 62 #include "llvm/ADT/iterator_range.h" 63 #include "llvm/Analysis/BlockFrequencyInfo.h" 64 #include "llvm/Analysis/BranchProbabilityInfo.h" 65 #include "llvm/Analysis/CFG.h" 66 #include "llvm/Analysis/EHPersonalities.h" 67 #include "llvm/Analysis/LoopInfo.h" 68 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 69 #include "llvm/Analysis/ProfileSummaryInfo.h" 70 #include "llvm/Analysis/TargetLibraryInfo.h" 71 #include "llvm/IR/Attributes.h" 72 #include "llvm/IR/BasicBlock.h" 73 #include "llvm/IR/CFG.h" 74 #include "llvm/IR/Comdat.h" 75 #include "llvm/IR/Constant.h" 76 #include "llvm/IR/Constants.h" 77 #include "llvm/IR/DiagnosticInfo.h" 78 #include "llvm/IR/Dominators.h" 79 #include "llvm/IR/Function.h" 80 #include "llvm/IR/GlobalAlias.h" 81 #include "llvm/IR/GlobalValue.h" 82 #include "llvm/IR/GlobalVariable.h" 83 #include "llvm/IR/IRBuilder.h" 84 #include "llvm/IR/InstVisitor.h" 85 #include "llvm/IR/InstrTypes.h" 86 #include "llvm/IR/Instruction.h" 87 #include "llvm/IR/Instructions.h" 88 #include "llvm/IR/IntrinsicInst.h" 89 #include "llvm/IR/Intrinsics.h" 90 #include "llvm/IR/LLVMContext.h" 91 #include "llvm/IR/MDBuilder.h" 92 #include "llvm/IR/Module.h" 93 #include "llvm/IR/PassManager.h" 94 #include "llvm/IR/ProfileSummary.h" 95 #include "llvm/IR/Type.h" 96 #include "llvm/IR/Value.h" 97 #include "llvm/ProfileData/InstrProf.h" 98 #include "llvm/ProfileData/InstrProfReader.h" 99 #include "llvm/Support/BranchProbability.h" 100 #include "llvm/Support/CRC.h" 101 #include "llvm/Support/Casting.h" 102 #include "llvm/Support/CommandLine.h" 103 #include "llvm/Support/DOTGraphTraits.h" 104 #include "llvm/Support/Debug.h" 105 #include "llvm/Support/Error.h" 106 #include "llvm/Support/ErrorHandling.h" 107 #include "llvm/Support/GraphWriter.h" 108 #include "llvm/Support/raw_ostream.h" 109 #include "llvm/Transforms/Instrumentation.h" 110 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 111 #include "llvm/Transforms/Utils/MisExpect.h" 112 #include "llvm/Transforms/Utils/ModuleUtils.h" 113 #include <algorithm> 114 #include <cassert> 115 #include <cstdint> 116 #include <memory> 117 #include <numeric> 118 #include <string> 119 #include <unordered_map> 120 #include <utility> 121 #include <vector> 122 123 using namespace llvm; 124 using ProfileCount = Function::ProfileCount; 125 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 126 127 #define DEBUG_TYPE "pgo-instrumentation" 128 129 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 130 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 131 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 132 STATISTIC(NumOfPGOEdge, "Number of edges."); 133 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 134 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 135 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 136 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 137 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 138 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 139 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 140 STATISTIC(NumOfCSPGOSelectInsts, 141 "Number of select instruction instrumented in CSPGO."); 142 STATISTIC(NumOfCSPGOMemIntrinsics, 143 "Number of mem intrinsics instrumented in CSPGO."); 144 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 145 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 146 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 147 STATISTIC(NumOfCSPGOFunc, 148 "Number of functions having valid profile counts in CSPGO."); 149 STATISTIC(NumOfCSPGOMismatch, 150 "Number of functions having mismatch profile in CSPGO."); 151 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 152 153 // Command line option to specify the file to read profile from. This is 154 // mainly used for testing. 155 static cl::opt<std::string> 156 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 157 cl::value_desc("filename"), 158 cl::desc("Specify the path of profile data file. This is" 159 "mainly for test purpose.")); 160 static cl::opt<std::string> PGOTestProfileRemappingFile( 161 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 162 cl::value_desc("filename"), 163 cl::desc("Specify the path of profile remapping file. This is mainly for " 164 "test purpose.")); 165 166 // Command line option to disable value profiling. The default is false: 167 // i.e. value profiling is enabled by default. This is for debug purpose. 168 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 169 cl::Hidden, 170 cl::desc("Disable Value Profiling")); 171 172 // Command line option to set the maximum number of VP annotations to write to 173 // the metadata for a single indirect call callsite. 174 static cl::opt<unsigned> MaxNumAnnotations( 175 "icp-max-annotations", cl::init(3), cl::Hidden, 176 cl::desc("Max number of annotations for a single indirect " 177 "call callsite")); 178 179 // Command line option to set the maximum number of value annotations 180 // to write to the metadata for a single memop intrinsic. 181 static cl::opt<unsigned> MaxNumMemOPAnnotations( 182 "memop-max-annotations", cl::init(4), cl::Hidden, 183 cl::desc("Max number of preicise value annotations for a single memop" 184 "intrinsic")); 185 186 // Command line option to control appending FunctionHash to the name of a COMDAT 187 // function. This is to avoid the hash mismatch caused by the preinliner. 188 static cl::opt<bool> DoComdatRenaming( 189 "do-comdat-renaming", cl::init(false), cl::Hidden, 190 cl::desc("Append function hash to the name of COMDAT function to avoid " 191 "function hash mismatch due to the preinliner")); 192 193 // Command line option to enable/disable the warning about missing profile 194 // information. 195 static cl::opt<bool> 196 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 197 cl::desc("Use this option to turn on/off " 198 "warnings about missing profile data for " 199 "functions.")); 200 201 namespace llvm { 202 // Command line option to enable/disable the warning about a hash mismatch in 203 // the profile data. 204 cl::opt<bool> 205 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 206 cl::desc("Use this option to turn off/on " 207 "warnings about profile cfg mismatch.")); 208 } // namespace llvm 209 210 // Command line option to enable/disable the warning about a hash mismatch in 211 // the profile data for Comdat functions, which often turns out to be false 212 // positive due to the pre-instrumentation inline. 213 static cl::opt<bool> NoPGOWarnMismatchComdatWeak( 214 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden, 215 cl::desc("The option is used to turn on/off " 216 "warnings about hash mismatch for comdat " 217 "or weak functions.")); 218 219 // Command line option to enable/disable select instruction instrumentation. 220 static cl::opt<bool> 221 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 222 cl::desc("Use this option to turn on/off SELECT " 223 "instruction instrumentation. ")); 224 225 // Command line option to turn on CFG dot or text dump of raw profile counts 226 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 227 "pgo-view-raw-counts", cl::Hidden, 228 cl::desc("A boolean option to show CFG dag or text " 229 "with raw profile counts from " 230 "profile data. See also option " 231 "-pgo-view-counts. To limit graph " 232 "display to only one function, use " 233 "filtering option -view-bfi-func-name."), 234 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 235 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 236 clEnumValN(PGOVCT_Text, "text", "show in text."))); 237 238 // Command line option to enable/disable memop intrinsic call.size profiling. 239 static cl::opt<bool> 240 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 241 cl::desc("Use this option to turn on/off " 242 "memory intrinsic size profiling.")); 243 244 // Emit branch probability as optimization remarks. 245 static cl::opt<bool> 246 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 247 cl::desc("When this option is on, the annotated " 248 "branch probability will be emitted as " 249 "optimization remarks: -{Rpass|" 250 "pass-remarks}=pgo-instrumentation")); 251 252 static cl::opt<bool> PGOInstrumentEntry( 253 "pgo-instrument-entry", cl::init(false), cl::Hidden, 254 cl::desc("Force to instrument function entry basicblock.")); 255 256 static cl::opt<bool> PGOFunctionEntryCoverage( 257 "pgo-function-entry-coverage", cl::Hidden, 258 cl::desc( 259 "Use this option to enable function entry coverage instrumentation.")); 260 261 static cl::opt<bool> 262 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, 263 cl::desc("Fix function entry count in profile use.")); 264 265 static cl::opt<bool> PGOVerifyHotBFI( 266 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden, 267 cl::desc("Print out the non-match BFI count if a hot raw profile count " 268 "becomes non-hot, or a cold raw profile count becomes hot. " 269 "The print is enabled under -Rpass-analysis=pgo, or " 270 "internal option -pass-remakrs-analysis=pgo.")); 271 272 static cl::opt<bool> PGOVerifyBFI( 273 "pgo-verify-bfi", cl::init(false), cl::Hidden, 274 cl::desc("Print out mismatched BFI counts after setting profile metadata " 275 "The print is enabled under -Rpass-analysis=pgo, or " 276 "internal option -pass-remakrs-analysis=pgo.")); 277 278 static cl::opt<unsigned> PGOVerifyBFIRatio( 279 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, 280 cl::desc("Set the threshold for pgo-verify-bfi: only print out " 281 "mismatched BFI if the difference percentage is greater than " 282 "this value (in percentage).")); 283 284 static cl::opt<unsigned> PGOVerifyBFICutoff( 285 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, 286 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " 287 "profile count value is below.")); 288 289 static cl::opt<std::string> PGOTraceFuncHash( 290 "pgo-trace-func-hash", cl::init("-"), cl::Hidden, 291 cl::value_desc("function name"), 292 cl::desc("Trace the hash of the function with this name.")); 293 294 namespace llvm { 295 // Command line option to turn on CFG dot dump after profile annotation. 296 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 297 extern cl::opt<PGOViewCountsType> PGOViewCounts; 298 299 // Command line option to specify the name of the function for CFG dump 300 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 301 extern cl::opt<std::string> ViewBlockFreqFuncName; 302 303 extern cl::opt<bool> DebugInfoCorrelate; 304 } // namespace llvm 305 306 static cl::opt<bool> 307 PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, 308 cl::desc("Use the old CFG function hashing")); 309 310 // Return a string describing the branch condition that can be 311 // used in static branch probability heuristics: 312 static std::string getBranchCondString(Instruction *TI) { 313 BranchInst *BI = dyn_cast<BranchInst>(TI); 314 if (!BI || !BI->isConditional()) 315 return std::string(); 316 317 Value *Cond = BI->getCondition(); 318 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 319 if (!CI) 320 return std::string(); 321 322 std::string result; 323 raw_string_ostream OS(result); 324 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 325 CI->getOperand(0)->getType()->print(OS, true); 326 327 Value *RHS = CI->getOperand(1); 328 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 329 if (CV) { 330 if (CV->isZero()) 331 OS << "_Zero"; 332 else if (CV->isOne()) 333 OS << "_One"; 334 else if (CV->isMinusOne()) 335 OS << "_MinusOne"; 336 else 337 OS << "_Const"; 338 } 339 OS.flush(); 340 return result; 341 } 342 343 static const char *ValueProfKindDescr[] = { 344 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 345 #include "llvm/ProfileData/InstrProfData.inc" 346 }; 347 348 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime 349 // aware this is an ir_level profile so it can set the version flag. 350 static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) { 351 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); 352 Type *IntTy64 = Type::getInt64Ty(M.getContext()); 353 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); 354 if (IsCS) 355 ProfileVersion |= VARIANT_MASK_CSIR_PROF; 356 if (PGOInstrumentEntry) 357 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; 358 if (DebugInfoCorrelate) 359 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; 360 if (PGOFunctionEntryCoverage) 361 ProfileVersion |= 362 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY; 363 auto IRLevelVersionVariable = new GlobalVariable( 364 M, IntTy64, true, GlobalValue::WeakAnyLinkage, 365 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); 366 IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); 367 Triple TT(M.getTargetTriple()); 368 if (TT.supportsCOMDAT()) { 369 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); 370 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); 371 } 372 return IRLevelVersionVariable; 373 } 374 375 namespace { 376 377 /// The select instruction visitor plays three roles specified 378 /// by the mode. In \c VM_counting mode, it simply counts the number of 379 /// select instructions. In \c VM_instrument mode, it inserts code to count 380 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 381 /// it reads the profile data and annotate the select instruction with metadata. 382 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 383 class PGOUseFunc; 384 385 /// Instruction Visitor class to visit select instructions. 386 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 387 Function &F; 388 unsigned NSIs = 0; // Number of select instructions instrumented. 389 VisitMode Mode = VM_counting; // Visiting mode. 390 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 391 unsigned TotalNumCtrs = 0; // Total number of counters 392 GlobalVariable *FuncNameVar = nullptr; 393 uint64_t FuncHash = 0; 394 PGOUseFunc *UseFunc = nullptr; 395 396 SelectInstVisitor(Function &Func) : F(Func) {} 397 398 void countSelects(Function &Func) { 399 NSIs = 0; 400 Mode = VM_counting; 401 visit(Func); 402 } 403 404 // Visit the IR stream and instrument all select instructions. \p 405 // Ind is a pointer to the counter index variable; \p TotalNC 406 // is the total number of counters; \p FNV is the pointer to the 407 // PGO function name var; \p FHash is the function hash. 408 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 409 GlobalVariable *FNV, uint64_t FHash) { 410 Mode = VM_instrument; 411 CurCtrIdx = Ind; 412 TotalNumCtrs = TotalNC; 413 FuncHash = FHash; 414 FuncNameVar = FNV; 415 visit(Func); 416 } 417 418 // Visit the IR stream and annotate all select instructions. 419 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 420 Mode = VM_annotate; 421 UseFunc = UF; 422 CurCtrIdx = Ind; 423 visit(Func); 424 } 425 426 void instrumentOneSelectInst(SelectInst &SI); 427 void annotateOneSelectInst(SelectInst &SI); 428 429 // Visit \p SI instruction and perform tasks according to visit mode. 430 void visitSelectInst(SelectInst &SI); 431 432 // Return the number of select instructions. This needs be called after 433 // countSelects(). 434 unsigned getNumOfSelectInsts() const { return NSIs; } 435 }; 436 437 } // end anonymous namespace 438 439 namespace { 440 441 /// An MST based instrumentation for PGO 442 /// 443 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 444 /// in the function level. 445 struct PGOEdge { 446 // This class implements the CFG edges. Note the CFG can be a multi-graph. 447 // So there might be multiple edges with same SrcBB and DestBB. 448 const BasicBlock *SrcBB; 449 const BasicBlock *DestBB; 450 uint64_t Weight; 451 bool InMST = false; 452 bool Removed = false; 453 bool IsCritical = false; 454 455 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 456 : SrcBB(Src), DestBB(Dest), Weight(W) {} 457 458 // Return the information string of an edge. 459 std::string infoString() const { 460 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 461 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 462 } 463 }; 464 465 // This class stores the auxiliary information for each BB. 466 struct BBInfo { 467 BBInfo *Group; 468 uint32_t Index; 469 uint32_t Rank = 0; 470 471 BBInfo(unsigned IX) : Group(this), Index(IX) {} 472 473 // Return the information string of this object. 474 std::string infoString() const { 475 return (Twine("Index=") + Twine(Index)).str(); 476 } 477 478 // Empty function -- only applicable to UseBBInfo. 479 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 480 481 // Empty function -- only applicable to UseBBInfo. 482 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 483 }; 484 485 // This class implements the CFG edges. Note the CFG can be a multi-graph. 486 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 487 private: 488 Function &F; 489 490 // Is this is context-sensitive instrumentation. 491 bool IsCS; 492 493 // A map that stores the Comdat group in function F. 494 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 495 496 ValueProfileCollector VPC; 497 498 void computeCFGHash(); 499 void renameComdatFunction(); 500 501 public: 502 std::vector<std::vector<VPCandidateInfo>> ValueSites; 503 SelectInstVisitor SIVisitor; 504 std::string FuncName; 505 GlobalVariable *FuncNameVar; 506 507 // CFG hash value for this function. 508 uint64_t FunctionHash = 0; 509 510 // The Minimum Spanning Tree of function CFG. 511 CFGMST<Edge, BBInfo> MST; 512 513 // Collect all the BBs that will be instrumented, and store them in 514 // InstrumentBBs. 515 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 516 517 // Give an edge, find the BB that will be instrumented. 518 // Return nullptr if there is no BB to be instrumented. 519 BasicBlock *getInstrBB(Edge *E); 520 521 // Return the auxiliary BB information. 522 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 523 524 // Return the auxiliary BB information if available. 525 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 526 527 // Dump edges and BB information. 528 void dumpInfo(std::string Str = "") const { 529 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 530 Twine(FunctionHash) + "\t" + Str); 531 } 532 533 FuncPGOInstrumentation( 534 Function &Func, TargetLibraryInfo &TLI, 535 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 536 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 537 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, 538 bool InstrumentFuncEntry = true) 539 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 540 ValueSites(IPVK_Last + 1), SIVisitor(Func), 541 MST(F, InstrumentFuncEntry, BPI, BFI) { 542 // This should be done before CFG hash computation. 543 SIVisitor.countSelects(Func); 544 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 545 if (!IsCS) { 546 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 547 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 548 NumOfPGOBB += MST.BBInfos.size(); 549 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 550 } else { 551 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 552 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 553 NumOfCSPGOBB += MST.BBInfos.size(); 554 } 555 556 FuncName = getPGOFuncName(F); 557 computeCFGHash(); 558 if (!ComdatMembers.empty()) 559 renameComdatFunction(); 560 LLVM_DEBUG(dumpInfo("after CFGMST")); 561 562 for (auto &E : MST.AllEdges) { 563 if (E->Removed) 564 continue; 565 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 566 if (!E->InMST) 567 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 568 } 569 570 if (CreateGlobalVar) 571 FuncNameVar = createPGOFuncNameVar(F, FuncName); 572 } 573 }; 574 575 } // end anonymous namespace 576 577 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 578 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers 579 // of selects, indirect calls, mem ops and edges. 580 template <class Edge, class BBInfo> 581 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 582 std::vector<uint8_t> Indexes; 583 JamCRC JC; 584 for (auto &BB : F) { 585 const Instruction *TI = BB.getTerminator(); 586 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 587 BasicBlock *Succ = TI->getSuccessor(I); 588 auto BI = findBBInfo(Succ); 589 if (BI == nullptr) 590 continue; 591 uint32_t Index = BI->Index; 592 for (int J = 0; J < 4; J++) 593 Indexes.push_back((uint8_t)(Index >> (J * 8))); 594 } 595 } 596 JC.update(Indexes); 597 598 JamCRC JCH; 599 if (PGOOldCFGHashing) { 600 // Hash format for context sensitive profile. Reserve 4 bits for other 601 // information. 602 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 603 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 604 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 605 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 606 } else { 607 // The higher 32 bits. 608 auto updateJCH = [&JCH](uint64_t Num) { 609 uint8_t Data[8]; 610 support::endian::write64le(Data, Num); 611 JCH.update(Data); 612 }; 613 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); 614 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); 615 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); 616 updateJCH((uint64_t)MST.AllEdges.size()); 617 618 // Hash format for context sensitive profile. Reserve 4 bits for other 619 // information. 620 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); 621 } 622 623 // Reserve bit 60-63 for other information purpose. 624 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 625 if (IsCS) 626 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 627 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 628 << " CRC = " << JC.getCRC() 629 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 630 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 631 << ValueSites[IPVK_IndirectCallTarget].size()); 632 if (!PGOOldCFGHashing) { 633 LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() 634 << ", High32 CRC = " << JCH.getCRC()); 635 } 636 LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); 637 638 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash)) 639 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash 640 << " in building " << F.getParent()->getSourceFileName() << "\n"; 641 } 642 643 // Check if we can safely rename this Comdat function. 644 static bool canRenameComdat( 645 Function &F, 646 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 647 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 648 return false; 649 650 // FIXME: Current only handle those Comdat groups that only containing one 651 // function. 652 // (1) For a Comdat group containing multiple functions, we need to have a 653 // unique postfix based on the hashes for each function. There is a 654 // non-trivial code refactoring to do this efficiently. 655 // (2) Variables can not be renamed, so we can not rename Comdat function in a 656 // group including global vars. 657 Comdat *C = F.getComdat(); 658 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 659 assert(!isa<GlobalAlias>(CM.second)); 660 Function *FM = dyn_cast<Function>(CM.second); 661 if (FM != &F) 662 return false; 663 } 664 return true; 665 } 666 667 // Append the CFGHash to the Comdat function name. 668 template <class Edge, class BBInfo> 669 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 670 if (!canRenameComdat(F, ComdatMembers)) 671 return; 672 std::string OrigName = F.getName().str(); 673 std::string NewFuncName = 674 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 675 F.setName(Twine(NewFuncName)); 676 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 677 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 678 Comdat *NewComdat; 679 Module *M = F.getParent(); 680 // For AvailableExternallyLinkage functions, change the linkage to 681 // LinkOnceODR and put them into comdat. This is because after renaming, there 682 // is no backup external copy available for the function. 683 if (!F.hasComdat()) { 684 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 685 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 686 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 687 F.setComdat(NewComdat); 688 return; 689 } 690 691 // This function belongs to a single function Comdat group. 692 Comdat *OrigComdat = F.getComdat(); 693 std::string NewComdatName = 694 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 695 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 696 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 697 698 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 699 // Must be a function. 700 cast<Function>(CM.second)->setComdat(NewComdat); 701 } 702 } 703 704 // Collect all the BBs that will be instruments and return them in 705 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. 706 template <class Edge, class BBInfo> 707 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 708 std::vector<BasicBlock *> &InstrumentBBs) { 709 // Use a worklist as we will update the vector during the iteration. 710 std::vector<Edge *> EdgeList; 711 EdgeList.reserve(MST.AllEdges.size()); 712 for (auto &E : MST.AllEdges) 713 EdgeList.push_back(E.get()); 714 715 for (auto &E : EdgeList) { 716 BasicBlock *InstrBB = getInstrBB(E); 717 if (InstrBB) 718 InstrumentBBs.push_back(InstrBB); 719 } 720 721 // Set up InEdges/OutEdges for all BBs. 722 for (auto &E : MST.AllEdges) { 723 if (E->Removed) 724 continue; 725 const BasicBlock *SrcBB = E->SrcBB; 726 const BasicBlock *DestBB = E->DestBB; 727 BBInfo &SrcInfo = getBBInfo(SrcBB); 728 BBInfo &DestInfo = getBBInfo(DestBB); 729 SrcInfo.addOutEdge(E.get()); 730 DestInfo.addInEdge(E.get()); 731 } 732 } 733 734 // Given a CFG E to be instrumented, find which BB to place the instrumented 735 // code. The function will split the critical edge if necessary. 736 template <class Edge, class BBInfo> 737 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 738 if (E->InMST || E->Removed) 739 return nullptr; 740 741 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 742 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 743 // For a fake edge, instrument the real BB. 744 if (SrcBB == nullptr) 745 return DestBB; 746 if (DestBB == nullptr) 747 return SrcBB; 748 749 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 750 // There are basic blocks (such as catchswitch) cannot be instrumented. 751 // If the returned first insertion point is the end of BB, skip this BB. 752 if (BB->getFirstInsertionPt() == BB->end()) 753 return nullptr; 754 return BB; 755 }; 756 757 // Instrument the SrcBB if it has a single successor, 758 // otherwise, the DestBB if this is not a critical edge. 759 Instruction *TI = SrcBB->getTerminator(); 760 if (TI->getNumSuccessors() <= 1) 761 return canInstrument(SrcBB); 762 if (!E->IsCritical) 763 return canInstrument(DestBB); 764 765 // Some IndirectBr critical edges cannot be split by the previous 766 // SplitIndirectBrCriticalEdges call. Bail out. 767 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 768 BasicBlock *InstrBB = 769 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); 770 if (!InstrBB) { 771 LLVM_DEBUG( 772 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 773 return nullptr; 774 } 775 // For a critical edge, we have to split. Instrument the newly 776 // created BB. 777 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 778 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 779 << " --> " << getBBInfo(DestBB).Index << "\n"); 780 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 781 MST.addEdge(SrcBB, InstrBB, 0); 782 // Second one: Add new edge of InstrBB->DestBB. 783 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 784 NewEdge1.InMST = true; 785 E->Removed = true; 786 787 return canInstrument(InstrBB); 788 } 789 790 // When generating value profiling calls on Windows routines that make use of 791 // handler funclets for exception processing an operand bundle needs to attached 792 // to the called function. This routine will set \p OpBundles to contain the 793 // funclet information, if any is needed, that should be placed on the generated 794 // value profiling call for the value profile candidate call. 795 static void 796 populateEHOperandBundle(VPCandidateInfo &Cand, 797 DenseMap<BasicBlock *, ColorVector> &BlockColors, 798 SmallVectorImpl<OperandBundleDef> &OpBundles) { 799 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 800 if (!OrigCall) 801 return; 802 803 if (!isa<IntrinsicInst>(OrigCall)) { 804 // The instrumentation call should belong to the same funclet as a 805 // non-intrinsic call, so just copy the operand bundle, if any exists. 806 Optional<OperandBundleUse> ParentFunclet = 807 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 808 if (ParentFunclet) 809 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 810 } else { 811 // Intrinsics or other instructions do not get funclet information from the 812 // front-end. Need to use the BlockColors that was computed by the routine 813 // colorEHFunclets to determine whether a funclet is needed. 814 if (!BlockColors.empty()) { 815 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 816 assert(CV.size() == 1 && "non-unique color for block!"); 817 Instruction *EHPad = CV.front()->getFirstNonPHI(); 818 if (EHPad->isEHPad()) 819 OpBundles.emplace_back("funclet", EHPad); 820 } 821 } 822 } 823 824 // Visit all edge and instrument the edges not in MST, and do value profiling. 825 // Critical edges will be split. 826 static void instrumentOneFunc( 827 Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, 828 BlockFrequencyInfo *BFI, 829 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 830 bool IsCS) { 831 // Split indirectbr critical edges here before computing the MST rather than 832 // later in getInstrBB() to avoid invalidating it. 833 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); 834 835 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo( 836 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry); 837 838 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 839 auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy); 840 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()), 841 FuncInfo.FunctionHash); 842 if (PGOFunctionEntryCoverage) { 843 assert(!IsCS && 844 "entry coverge does not support context-sensitive instrumentation"); 845 auto &EntryBB = F.getEntryBlock(); 846 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); 847 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, 848 // i32 <index>) 849 Builder.CreateCall( 850 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover), 851 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)}); 852 return; 853 } 854 855 std::vector<BasicBlock *> InstrumentBBs; 856 FuncInfo.getInstrumentBBs(InstrumentBBs); 857 unsigned NumCounters = 858 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 859 860 uint32_t I = 0; 861 for (auto *InstrBB : InstrumentBBs) { 862 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 863 assert(Builder.GetInsertPoint() != InstrBB->end() && 864 "Cannot get the Instrumentation point"); 865 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>, 866 // i32 <index>) 867 Builder.CreateCall( 868 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 869 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)}); 870 } 871 872 // Now instrument select instructions: 873 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 874 FuncInfo.FunctionHash); 875 assert(I == NumCounters); 876 877 if (DisableValueProfiling) 878 return; 879 880 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 881 882 // Intrinsic function calls do not have funclet operand bundles needed for 883 // Windows exception handling attached to them. However, if value profiling is 884 // inserted for one of these calls, then a funclet value will need to be set 885 // on the instrumentation call based on the funclet coloring. 886 DenseMap<BasicBlock *, ColorVector> BlockColors; 887 if (F.hasPersonalityFn() && 888 isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 889 BlockColors = colorEHFunclets(F); 890 891 // For each VP Kind, walk the VP candidates and instrument each one. 892 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 893 unsigned SiteIndex = 0; 894 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 895 continue; 896 897 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 898 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 899 << " site: CallSite Index = " << SiteIndex << "\n"); 900 901 IRBuilder<> Builder(Cand.InsertPt); 902 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 903 "Cannot get the Instrumentation point"); 904 905 Value *ToProfile = nullptr; 906 if (Cand.V->getType()->isIntegerTy()) 907 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 908 else if (Cand.V->getType()->isPointerTy()) 909 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 910 assert(ToProfile && "value profiling Value is of unexpected type"); 911 912 SmallVector<OperandBundleDef, 1> OpBundles; 913 populateEHOperandBundle(Cand, BlockColors, OpBundles); 914 Builder.CreateCall( 915 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 916 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 917 Builder.getInt64(FuncInfo.FunctionHash), ToProfile, 918 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 919 OpBundles); 920 } 921 } // IPVK_First <= Kind <= IPVK_Last 922 } 923 924 namespace { 925 926 // This class represents a CFG edge in profile use compilation. 927 struct PGOUseEdge : public PGOEdge { 928 bool CountValid = false; 929 uint64_t CountValue = 0; 930 931 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 932 : PGOEdge(Src, Dest, W) {} 933 934 // Set edge count value 935 void setEdgeCount(uint64_t Value) { 936 CountValue = Value; 937 CountValid = true; 938 } 939 940 // Return the information string for this object. 941 std::string infoString() const { 942 if (!CountValid) 943 return PGOEdge::infoString(); 944 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 945 .str(); 946 } 947 }; 948 949 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 950 951 // This class stores the auxiliary information for each BB. 952 struct UseBBInfo : public BBInfo { 953 uint64_t CountValue = 0; 954 bool CountValid; 955 int32_t UnknownCountInEdge = 0; 956 int32_t UnknownCountOutEdge = 0; 957 DirectEdges InEdges; 958 DirectEdges OutEdges; 959 960 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 961 962 UseBBInfo(unsigned IX, uint64_t C) 963 : BBInfo(IX), CountValue(C), CountValid(true) {} 964 965 // Set the profile count value for this BB. 966 void setBBInfoCount(uint64_t Value) { 967 CountValue = Value; 968 CountValid = true; 969 } 970 971 // Return the information string of this object. 972 std::string infoString() const { 973 if (!CountValid) 974 return BBInfo::infoString(); 975 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 976 } 977 978 // Add an OutEdge and update the edge count. 979 void addOutEdge(PGOUseEdge *E) { 980 OutEdges.push_back(E); 981 UnknownCountOutEdge++; 982 } 983 984 // Add an InEdge and update the edge count. 985 void addInEdge(PGOUseEdge *E) { 986 InEdges.push_back(E); 987 UnknownCountInEdge++; 988 } 989 }; 990 991 } // end anonymous namespace 992 993 // Sum up the count values for all the edges. 994 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 995 uint64_t Total = 0; 996 for (auto &E : Edges) { 997 if (E->Removed) 998 continue; 999 Total += E->CountValue; 1000 } 1001 return Total; 1002 } 1003 1004 namespace { 1005 1006 class PGOUseFunc { 1007 public: 1008 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 1009 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1010 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1011 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry) 1012 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1013 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, 1014 InstrumentFuncEntry), 1015 FreqAttr(FFA_Normal), IsCS(IsCS) {} 1016 1017 // Read counts for the instrumented BB from profile. 1018 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1019 bool &AllMinusOnes); 1020 1021 // Populate the counts for all BBs. 1022 void populateCounters(); 1023 1024 // Set the branch weights based on the count values. 1025 void setBranchWeights(); 1026 1027 // Annotate the value profile call sites for all value kind. 1028 void annotateValueSites(); 1029 1030 // Annotate the value profile call sites for one value kind. 1031 void annotateValueSites(uint32_t Kind); 1032 1033 // Annotate the irreducible loop header weights. 1034 void annotateIrrLoopHeaderWeights(); 1035 1036 // The hotness of the function from the profile count. 1037 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1038 1039 // Return the function hotness from the profile. 1040 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1041 1042 // Return the function hash. 1043 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1044 1045 // Return the profile record for this function; 1046 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1047 1048 // Return the auxiliary BB information. 1049 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 1050 return FuncInfo.getBBInfo(BB); 1051 } 1052 1053 // Return the auxiliary BB information if available. 1054 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 1055 return FuncInfo.findBBInfo(BB); 1056 } 1057 1058 Function &getFunc() const { return F; } 1059 1060 void dumpInfo(std::string Str = "") const { 1061 FuncInfo.dumpInfo(Str); 1062 } 1063 1064 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1065 private: 1066 Function &F; 1067 Module *M; 1068 BlockFrequencyInfo *BFI; 1069 ProfileSummaryInfo *PSI; 1070 1071 // This member stores the shared information with class PGOGenFunc. 1072 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 1073 1074 // The maximum count value in the profile. This is only used in PGO use 1075 // compilation. 1076 uint64_t ProgramMaxCount; 1077 1078 // Position of counter that remains to be read. 1079 uint32_t CountPosition = 0; 1080 1081 // Total size of the profile count for this function. 1082 uint32_t ProfileCountSize = 0; 1083 1084 // ProfileRecord for this function. 1085 InstrProfRecord ProfileRecord; 1086 1087 // Function hotness info derived from profile. 1088 FuncFreqAttr FreqAttr; 1089 1090 // Is to use the context sensitive profile. 1091 bool IsCS; 1092 1093 // Find the Instrumented BB and set the value. Return false on error. 1094 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1095 1096 // Set the edge counter value for the unknown edge -- there should be only 1097 // one unknown edge. 1098 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1099 1100 // Return FuncName string; 1101 std::string getFuncName() const { return FuncInfo.FuncName; } 1102 1103 // Set the hot/cold inline hints based on the count values. 1104 // FIXME: This function should be removed once the functionality in 1105 // the inliner is implemented. 1106 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1107 if (PSI->isHotCount(EntryCount)) 1108 FreqAttr = FFA_Hot; 1109 else if (PSI->isColdCount(MaxCount)) 1110 FreqAttr = FFA_Cold; 1111 } 1112 }; 1113 1114 } // end anonymous namespace 1115 1116 // Visit all the edges and assign the count value for the instrumented 1117 // edges and the BB. Return false on error. 1118 bool PGOUseFunc::setInstrumentedCounts( 1119 const std::vector<uint64_t> &CountFromProfile) { 1120 1121 std::vector<BasicBlock *> InstrumentBBs; 1122 FuncInfo.getInstrumentBBs(InstrumentBBs); 1123 unsigned NumCounters = 1124 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1125 // The number of counters here should match the number of counters 1126 // in profile. Return if they mismatch. 1127 if (NumCounters != CountFromProfile.size()) { 1128 return false; 1129 } 1130 auto *FuncEntry = &*F.begin(); 1131 1132 // Set the profile count to the Instrumented BBs. 1133 uint32_t I = 0; 1134 for (BasicBlock *InstrBB : InstrumentBBs) { 1135 uint64_t CountValue = CountFromProfile[I++]; 1136 UseBBInfo &Info = getBBInfo(InstrBB); 1137 // If we reach here, we know that we have some nonzero count 1138 // values in this function. The entry count should not be 0. 1139 // Fix it if necessary. 1140 if (InstrBB == FuncEntry && CountValue == 0) 1141 CountValue = 1; 1142 Info.setBBInfoCount(CountValue); 1143 } 1144 ProfileCountSize = CountFromProfile.size(); 1145 CountPosition = I; 1146 1147 // Set the edge count and update the count of unknown edges for BBs. 1148 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1149 E->setEdgeCount(Value); 1150 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1151 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1152 }; 1153 1154 // Set the profile count the Instrumented edges. There are BBs that not in 1155 // MST but not instrumented. Need to set the edge count value so that we can 1156 // populate the profile counts later. 1157 for (auto &E : FuncInfo.MST.AllEdges) { 1158 if (E->Removed || E->InMST) 1159 continue; 1160 const BasicBlock *SrcBB = E->SrcBB; 1161 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 1162 1163 // If only one out-edge, the edge profile count should be the same as BB 1164 // profile count. 1165 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1166 setEdgeCount(E.get(), SrcInfo.CountValue); 1167 else { 1168 const BasicBlock *DestBB = E->DestBB; 1169 UseBBInfo &DestInfo = getBBInfo(DestBB); 1170 // If only one in-edge, the edge profile count should be the same as BB 1171 // profile count. 1172 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1173 setEdgeCount(E.get(), DestInfo.CountValue); 1174 } 1175 if (E->CountValid) 1176 continue; 1177 // E's count should have been set from profile. If not, this meenas E skips 1178 // the instrumentation. We set the count to 0. 1179 setEdgeCount(E.get(), 0); 1180 } 1181 return true; 1182 } 1183 1184 // Set the count value for the unknown edge. There should be one and only one 1185 // unknown edge in Edges vector. 1186 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1187 for (auto &E : Edges) { 1188 if (E->CountValid) 1189 continue; 1190 E->setEdgeCount(Value); 1191 1192 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1193 getBBInfo(E->DestBB).UnknownCountInEdge--; 1194 return; 1195 } 1196 llvm_unreachable("Cannot find the unknown count edge"); 1197 } 1198 1199 // Emit function metadata indicating PGO profile mismatch. 1200 static void annotateFunctionWithHashMismatch(Function &F, 1201 LLVMContext &ctx) { 1202 const char MetadataName[] = "instr_prof_hash_mismatch"; 1203 SmallVector<Metadata *, 2> Names; 1204 // If this metadata already exists, ignore. 1205 auto *Existing = F.getMetadata(LLVMContext::MD_annotation); 1206 if (Existing) { 1207 MDTuple *Tuple = cast<MDTuple>(Existing); 1208 for (auto &N : Tuple->operands()) { 1209 if (cast<MDString>(N.get())->getString() == MetadataName) 1210 return; 1211 Names.push_back(N.get()); 1212 } 1213 } 1214 1215 MDBuilder MDB(ctx); 1216 Names.push_back(MDB.createString(MetadataName)); 1217 MDNode *MD = MDTuple::get(ctx, Names); 1218 F.setMetadata(LLVMContext::MD_annotation, MD); 1219 } 1220 1221 // Read the profile from ProfileFileName and assign the value to the 1222 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1223 // Return true if the profile are successfully read, and false on errors. 1224 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1225 bool &AllMinusOnes) { 1226 auto &Ctx = M->getContext(); 1227 uint64_t MismatchedFuncSum = 0; 1228 Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( 1229 FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum); 1230 if (Error E = Result.takeError()) { 1231 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1232 auto Err = IPE.get(); 1233 bool SkipWarning = false; 1234 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1235 << FuncInfo.FuncName << ": "); 1236 if (Err == instrprof_error::unknown_function) { 1237 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1238 SkipWarning = !PGOWarnMissing; 1239 LLVM_DEBUG(dbgs() << "unknown function"); 1240 } else if (Err == instrprof_error::hash_mismatch || 1241 Err == instrprof_error::malformed) { 1242 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1243 SkipWarning = 1244 NoPGOWarnMismatch || 1245 (NoPGOWarnMismatchComdatWeak && 1246 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage || 1247 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1248 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash 1249 << " skip=" << SkipWarning << ")"); 1250 // Emit function metadata indicating PGO profile mismatch. 1251 annotateFunctionWithHashMismatch(F, M->getContext()); 1252 } 1253 1254 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1255 if (SkipWarning) 1256 return; 1257 1258 std::string Msg = 1259 IPE.message() + std::string(" ") + F.getName().str() + 1260 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) + 1261 std::string(" up to ") + std::to_string(MismatchedFuncSum) + 1262 std::string(" count discarded"); 1263 1264 Ctx.diagnose( 1265 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1266 }); 1267 return false; 1268 } 1269 ProfileRecord = std::move(Result.get()); 1270 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1271 1272 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1273 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1274 AllMinusOnes = (CountFromProfile.size() > 0); 1275 uint64_t ValueSum = 0; 1276 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1277 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1278 ValueSum += CountFromProfile[I]; 1279 if (CountFromProfile[I] != (uint64_t)-1) 1280 AllMinusOnes = false; 1281 } 1282 AllZeros = (ValueSum == 0); 1283 1284 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1285 1286 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1287 getBBInfo(nullptr).UnknownCountInEdge = 2; 1288 1289 if (!setInstrumentedCounts(CountFromProfile)) { 1290 LLVM_DEBUG( 1291 dbgs() << "Inconsistent number of counts, skipping this function"); 1292 Ctx.diagnose(DiagnosticInfoPGOProfile( 1293 M->getName().data(), 1294 Twine("Inconsistent number of counts in ") + F.getName().str() 1295 + Twine(": the profile may be stale or there is a function name collision."), 1296 DS_Warning)); 1297 return false; 1298 } 1299 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1300 return true; 1301 } 1302 1303 // Populate the counters from instrumented BBs to all BBs. 1304 // In the end of this operation, all BBs should have a valid count value. 1305 void PGOUseFunc::populateCounters() { 1306 bool Changes = true; 1307 unsigned NumPasses = 0; 1308 while (Changes) { 1309 NumPasses++; 1310 Changes = false; 1311 1312 // For efficient traversal, it's better to start from the end as most 1313 // of the instrumented edges are at the end. 1314 for (auto &BB : reverse(F)) { 1315 UseBBInfo *Count = findBBInfo(&BB); 1316 if (Count == nullptr) 1317 continue; 1318 if (!Count->CountValid) { 1319 if (Count->UnknownCountOutEdge == 0) { 1320 Count->CountValue = sumEdgeCount(Count->OutEdges); 1321 Count->CountValid = true; 1322 Changes = true; 1323 } else if (Count->UnknownCountInEdge == 0) { 1324 Count->CountValue = sumEdgeCount(Count->InEdges); 1325 Count->CountValid = true; 1326 Changes = true; 1327 } 1328 } 1329 if (Count->CountValid) { 1330 if (Count->UnknownCountOutEdge == 1) { 1331 uint64_t Total = 0; 1332 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1333 // If the one of the successor block can early terminate (no-return), 1334 // we can end up with situation where out edge sum count is larger as 1335 // the source BB's count is collected by a post-dominated block. 1336 if (Count->CountValue > OutSum) 1337 Total = Count->CountValue - OutSum; 1338 setEdgeCount(Count->OutEdges, Total); 1339 Changes = true; 1340 } 1341 if (Count->UnknownCountInEdge == 1) { 1342 uint64_t Total = 0; 1343 uint64_t InSum = sumEdgeCount(Count->InEdges); 1344 if (Count->CountValue > InSum) 1345 Total = Count->CountValue - InSum; 1346 setEdgeCount(Count->InEdges, Total); 1347 Changes = true; 1348 } 1349 } 1350 } 1351 } 1352 1353 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1354 (void) NumPasses; 1355 #ifndef NDEBUG 1356 // Assert every BB has a valid counter. 1357 for (auto &BB : F) { 1358 auto BI = findBBInfo(&BB); 1359 if (BI == nullptr) 1360 continue; 1361 assert(BI->CountValid && "BB count is not valid"); 1362 } 1363 #endif 1364 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1365 uint64_t FuncMaxCount = FuncEntryCount; 1366 for (auto &BB : F) { 1367 auto BI = findBBInfo(&BB); 1368 if (BI == nullptr) 1369 continue; 1370 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1371 } 1372 1373 // Fix the obviously inconsistent entry count. 1374 if (FuncMaxCount > 0 && FuncEntryCount == 0) 1375 FuncEntryCount = 1; 1376 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1377 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1378 1379 // Now annotate select instructions 1380 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1381 assert(CountPosition == ProfileCountSize); 1382 1383 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1384 } 1385 1386 // Assign the scaled count values to the BB with multiple out edges. 1387 void PGOUseFunc::setBranchWeights() { 1388 // Generate MD_prof metadata for every branch instruction. 1389 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1390 << " IsCS=" << IsCS << "\n"); 1391 for (auto &BB : F) { 1392 Instruction *TI = BB.getTerminator(); 1393 if (TI->getNumSuccessors() < 2) 1394 continue; 1395 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1396 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI))) 1397 continue; 1398 1399 if (getBBInfo(&BB).CountValue == 0) 1400 continue; 1401 1402 // We have a non-zero Branch BB. 1403 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1404 unsigned Size = BBCountInfo.OutEdges.size(); 1405 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1406 uint64_t MaxCount = 0; 1407 for (unsigned s = 0; s < Size; s++) { 1408 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1409 const BasicBlock *SrcBB = E->SrcBB; 1410 const BasicBlock *DestBB = E->DestBB; 1411 if (DestBB == nullptr) 1412 continue; 1413 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1414 uint64_t EdgeCount = E->CountValue; 1415 if (EdgeCount > MaxCount) 1416 MaxCount = EdgeCount; 1417 EdgeCounts[SuccNum] = EdgeCount; 1418 } 1419 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1420 } 1421 } 1422 1423 static bool isIndirectBrTarget(BasicBlock *BB) { 1424 for (BasicBlock *Pred : predecessors(BB)) { 1425 if (isa<IndirectBrInst>(Pred->getTerminator())) 1426 return true; 1427 } 1428 return false; 1429 } 1430 1431 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1432 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1433 // Find irr loop headers 1434 for (auto &BB : F) { 1435 // As a heuristic also annotate indrectbr targets as they have a high chance 1436 // to become an irreducible loop header after the indirectbr tail 1437 // duplication. 1438 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1439 Instruction *TI = BB.getTerminator(); 1440 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1441 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1442 } 1443 } 1444 } 1445 1446 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1447 if (PGOFunctionEntryCoverage) 1448 return; 1449 Module *M = F.getParent(); 1450 IRBuilder<> Builder(&SI); 1451 Type *Int64Ty = Builder.getInt64Ty(); 1452 Type *I8PtrTy = Builder.getInt8PtrTy(); 1453 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1454 Builder.CreateCall( 1455 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1456 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1457 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1458 Builder.getInt32(*CurCtrIdx), Step}); 1459 ++(*CurCtrIdx); 1460 } 1461 1462 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1463 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1464 assert(*CurCtrIdx < CountFromProfile.size() && 1465 "Out of bound access of counters"); 1466 uint64_t SCounts[2]; 1467 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1468 ++(*CurCtrIdx); 1469 uint64_t TotalCount = 0; 1470 auto BI = UseFunc->findBBInfo(SI.getParent()); 1471 if (BI != nullptr) 1472 TotalCount = BI->CountValue; 1473 // False Count 1474 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1475 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1476 if (MaxCount) 1477 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1478 } 1479 1480 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1481 if (!PGOInstrSelect) 1482 return; 1483 // FIXME: do not handle this yet. 1484 if (SI.getCondition()->getType()->isVectorTy()) 1485 return; 1486 1487 switch (Mode) { 1488 case VM_counting: 1489 NSIs++; 1490 return; 1491 case VM_instrument: 1492 instrumentOneSelectInst(SI); 1493 return; 1494 case VM_annotate: 1495 annotateOneSelectInst(SI); 1496 return; 1497 } 1498 1499 llvm_unreachable("Unknown visiting mode"); 1500 } 1501 1502 // Traverse all valuesites and annotate the instructions for all value kind. 1503 void PGOUseFunc::annotateValueSites() { 1504 if (DisableValueProfiling) 1505 return; 1506 1507 // Create the PGOFuncName meta data. 1508 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1509 1510 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1511 annotateValueSites(Kind); 1512 } 1513 1514 // Annotate the instructions for a specific value kind. 1515 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1516 assert(Kind <= IPVK_Last); 1517 unsigned ValueSiteIndex = 0; 1518 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1519 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1520 if (NumValueSites != ValueSites.size()) { 1521 auto &Ctx = M->getContext(); 1522 Ctx.diagnose(DiagnosticInfoPGOProfile( 1523 M->getName().data(), 1524 Twine("Inconsistent number of value sites for ") + 1525 Twine(ValueProfKindDescr[Kind]) + 1526 Twine(" profiling in \"") + F.getName().str() + 1527 Twine("\", possibly due to the use of a stale profile."), 1528 DS_Warning)); 1529 return; 1530 } 1531 1532 for (VPCandidateInfo &I : ValueSites) { 1533 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1534 << "): Index = " << ValueSiteIndex << " out of " 1535 << NumValueSites << "\n"); 1536 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1537 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1538 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1539 : MaxNumAnnotations); 1540 ValueSiteIndex++; 1541 } 1542 } 1543 1544 // Collect the set of members for each Comdat in module M and store 1545 // in ComdatMembers. 1546 static void collectComdatMembers( 1547 Module &M, 1548 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1549 if (!DoComdatRenaming) 1550 return; 1551 for (Function &F : M) 1552 if (Comdat *C = F.getComdat()) 1553 ComdatMembers.insert(std::make_pair(C, &F)); 1554 for (GlobalVariable &GV : M.globals()) 1555 if (Comdat *C = GV.getComdat()) 1556 ComdatMembers.insert(std::make_pair(C, &GV)); 1557 for (GlobalAlias &GA : M.aliases()) 1558 if (Comdat *C = GA.getComdat()) 1559 ComdatMembers.insert(std::make_pair(C, &GA)); 1560 } 1561 1562 static bool InstrumentAllFunctions( 1563 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1564 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1565 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1566 // For the context-sensitve instrumentation, we should have a separated pass 1567 // (before LTO/ThinLTO linking) to create these variables. 1568 if (!IsCS) 1569 createIRLevelProfileFlagVar(M, /*IsCS=*/false); 1570 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1571 collectComdatMembers(M, ComdatMembers); 1572 1573 for (auto &F : M) { 1574 if (F.isDeclaration()) 1575 continue; 1576 if (F.hasFnAttribute(llvm::Attribute::NoProfile)) 1577 continue; 1578 auto &TLI = LookupTLI(F); 1579 auto *BPI = LookupBPI(F); 1580 auto *BFI = LookupBFI(F); 1581 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); 1582 } 1583 return true; 1584 } 1585 1586 PreservedAnalyses 1587 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { 1588 createProfileFileNameVar(M, CSInstrName); 1589 // The variable in a comdat may be discarded by LTO. Ensure the declaration 1590 // will be retained. 1591 appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true)); 1592 return PreservedAnalyses::all(); 1593 } 1594 1595 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1596 ModuleAnalysisManager &AM) { 1597 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1598 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1599 return FAM.getResult<TargetLibraryAnalysis>(F); 1600 }; 1601 auto LookupBPI = [&FAM](Function &F) { 1602 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1603 }; 1604 auto LookupBFI = [&FAM](Function &F) { 1605 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1606 }; 1607 1608 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) 1609 return PreservedAnalyses::all(); 1610 1611 return PreservedAnalyses::none(); 1612 } 1613 1614 // Using the ratio b/w sums of profile count values and BFI count values to 1615 // adjust the func entry count. 1616 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, 1617 BranchProbabilityInfo &NBPI) { 1618 Function &F = Func.getFunc(); 1619 BlockFrequencyInfo NBFI(F, NBPI, LI); 1620 #ifndef NDEBUG 1621 auto BFIEntryCount = F.getEntryCount(); 1622 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) && 1623 "Invalid BFI Entrycount"); 1624 #endif 1625 auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); 1626 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); 1627 for (auto &BBI : F) { 1628 uint64_t CountValue = 0; 1629 uint64_t BFICountValue = 0; 1630 if (!Func.findBBInfo(&BBI)) 1631 continue; 1632 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1633 CountValue = Func.getBBInfo(&BBI).CountValue; 1634 BFICountValue = *BFICount; 1635 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); 1636 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); 1637 } 1638 if (SumCount.isZero()) 1639 return; 1640 1641 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && 1642 "Incorrect sum of BFI counts"); 1643 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual) 1644 return; 1645 double Scale = (SumCount / SumBFICount).convertToDouble(); 1646 if (Scale < 1.001 && Scale > 0.999) 1647 return; 1648 1649 uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue; 1650 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; 1651 if (NewEntryCount == 0) 1652 NewEntryCount = 1; 1653 if (NewEntryCount != FuncEntryCount) { 1654 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); 1655 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() 1656 << ", entry_count " << FuncEntryCount << " --> " 1657 << NewEntryCount << "\n"); 1658 } 1659 } 1660 1661 // Compare the profile count values with BFI count values, and print out 1662 // the non-matching ones. 1663 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, 1664 BranchProbabilityInfo &NBPI, 1665 uint64_t HotCountThreshold, 1666 uint64_t ColdCountThreshold) { 1667 Function &F = Func.getFunc(); 1668 BlockFrequencyInfo NBFI(F, NBPI, LI); 1669 // bool PrintFunc = false; 1670 bool HotBBOnly = PGOVerifyHotBFI; 1671 std::string Msg; 1672 OptimizationRemarkEmitter ORE(&F); 1673 1674 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; 1675 for (auto &BBI : F) { 1676 uint64_t CountValue = 0; 1677 uint64_t BFICountValue = 0; 1678 1679 if (Func.getBBInfo(&BBI).CountValid) 1680 CountValue = Func.getBBInfo(&BBI).CountValue; 1681 1682 BBNum++; 1683 if (CountValue) 1684 NonZeroBBNum++; 1685 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1686 if (BFICount) 1687 BFICountValue = *BFICount; 1688 1689 if (HotBBOnly) { 1690 bool rawIsHot = CountValue >= HotCountThreshold; 1691 bool BFIIsHot = BFICountValue >= HotCountThreshold; 1692 bool rawIsCold = CountValue <= ColdCountThreshold; 1693 bool ShowCount = false; 1694 if (rawIsHot && !BFIIsHot) { 1695 Msg = "raw-Hot to BFI-nonHot"; 1696 ShowCount = true; 1697 } else if (rawIsCold && BFIIsHot) { 1698 Msg = "raw-Cold to BFI-Hot"; 1699 ShowCount = true; 1700 } 1701 if (!ShowCount) 1702 continue; 1703 } else { 1704 if ((CountValue < PGOVerifyBFICutoff) && 1705 (BFICountValue < PGOVerifyBFICutoff)) 1706 continue; 1707 uint64_t Diff = (BFICountValue >= CountValue) 1708 ? BFICountValue - CountValue 1709 : CountValue - BFICountValue; 1710 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio) 1711 continue; 1712 } 1713 BBMisMatchNum++; 1714 1715 ORE.emit([&]() { 1716 OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify", 1717 F.getSubprogram(), &BBI); 1718 Remark << "BB " << ore::NV("Block", BBI.getName()) 1719 << " Count=" << ore::NV("Count", CountValue) 1720 << " BFI_Count=" << ore::NV("Count", BFICountValue); 1721 if (!Msg.empty()) 1722 Remark << " (" << Msg << ")"; 1723 return Remark; 1724 }); 1725 } 1726 if (BBMisMatchNum) 1727 ORE.emit([&]() { 1728 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify", 1729 F.getSubprogram(), &F.getEntryBlock()) 1730 << "In Func " << ore::NV("Function", F.getName()) 1731 << ": Num_of_BB=" << ore::NV("Count", BBNum) 1732 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum) 1733 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum); 1734 }); 1735 } 1736 1737 static bool annotateAllFunctions( 1738 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1739 function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1740 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1741 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1742 ProfileSummaryInfo *PSI, bool IsCS) { 1743 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1744 auto &Ctx = M.getContext(); 1745 // Read the counter array from file. 1746 auto ReaderOrErr = 1747 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 1748 if (Error E = ReaderOrErr.takeError()) { 1749 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1750 Ctx.diagnose( 1751 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1752 }); 1753 return false; 1754 } 1755 1756 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1757 std::move(ReaderOrErr.get()); 1758 if (!PGOReader) { 1759 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1760 StringRef("Cannot get PGOReader"))); 1761 return false; 1762 } 1763 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 1764 return false; 1765 1766 // TODO: might need to change the warning once the clang option is finalized. 1767 if (!PGOReader->isIRLevelProfile()) { 1768 Ctx.diagnose(DiagnosticInfoPGOProfile( 1769 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1770 return false; 1771 } 1772 if (PGOReader->hasSingleByteCoverage()) { 1773 Ctx.diagnose(DiagnosticInfoPGOProfile( 1774 ProfileFileName.data(), 1775 "Cannot use coverage profiles for optimization")); 1776 return false; 1777 } 1778 if (PGOReader->functionEntryOnly()) { 1779 Ctx.diagnose(DiagnosticInfoPGOProfile( 1780 ProfileFileName.data(), 1781 "Function entry profiles are not yet supported for optimization")); 1782 return false; 1783 } 1784 1785 // Add the profile summary (read from the header of the indexed summary) here 1786 // so that we can use it below when reading counters (which checks if the 1787 // function should be marked with a cold or inlinehint attribute). 1788 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 1789 IsCS ? ProfileSummary::PSK_CSInstr 1790 : ProfileSummary::PSK_Instr); 1791 PSI->refresh(); 1792 1793 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1794 collectComdatMembers(M, ComdatMembers); 1795 std::vector<Function *> HotFunctions; 1796 std::vector<Function *> ColdFunctions; 1797 1798 // If the profile marked as always instrument the entry BB, do the 1799 // same. Note this can be overwritten by the internal option in CFGMST.h 1800 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); 1801 if (PGOInstrumentEntry.getNumOccurrences() > 0) 1802 InstrumentFuncEntry = PGOInstrumentEntry; 1803 for (auto &F : M) { 1804 if (F.isDeclaration()) 1805 continue; 1806 auto &TLI = LookupTLI(F); 1807 auto *BPI = LookupBPI(F); 1808 auto *BFI = LookupBFI(F); 1809 // Split indirectbr critical edges here before computing the MST rather than 1810 // later in getInstrBB() to avoid invalidating it. 1811 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); 1812 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, 1813 InstrumentFuncEntry); 1814 // When AllMinusOnes is true, it means the profile for the function 1815 // is unrepresentative and this function is actually hot. Set the 1816 // entry count of the function to be multiple times of hot threshold 1817 // and drop all its internal counters. 1818 bool AllMinusOnes = false; 1819 bool AllZeros = false; 1820 if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes)) 1821 continue; 1822 if (AllZeros) { 1823 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 1824 if (Func.getProgramMaxCount() != 0) 1825 ColdFunctions.push_back(&F); 1826 continue; 1827 } 1828 const unsigned MultiplyFactor = 3; 1829 if (AllMinusOnes) { 1830 uint64_t HotThreshold = PSI->getHotCountThreshold(); 1831 if (HotThreshold) 1832 F.setEntryCount( 1833 ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real)); 1834 HotFunctions.push_back(&F); 1835 continue; 1836 } 1837 Func.populateCounters(); 1838 Func.setBranchWeights(); 1839 Func.annotateValueSites(); 1840 Func.annotateIrrLoopHeaderWeights(); 1841 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1842 if (FreqAttr == PGOUseFunc::FFA_Cold) 1843 ColdFunctions.push_back(&F); 1844 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1845 HotFunctions.push_back(&F); 1846 if (PGOViewCounts != PGOVCT_None && 1847 (ViewBlockFreqFuncName.empty() || 1848 F.getName().equals(ViewBlockFreqFuncName))) { 1849 LoopInfo LI{DominatorTree(F)}; 1850 std::unique_ptr<BranchProbabilityInfo> NewBPI = 1851 std::make_unique<BranchProbabilityInfo>(F, LI); 1852 std::unique_ptr<BlockFrequencyInfo> NewBFI = 1853 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 1854 if (PGOViewCounts == PGOVCT_Graph) 1855 NewBFI->view(); 1856 else if (PGOViewCounts == PGOVCT_Text) { 1857 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 1858 NewBFI->print(dbgs()); 1859 } 1860 } 1861 if (PGOViewRawCounts != PGOVCT_None && 1862 (ViewBlockFreqFuncName.empty() || 1863 F.getName().equals(ViewBlockFreqFuncName))) { 1864 if (PGOViewRawCounts == PGOVCT_Graph) 1865 if (ViewBlockFreqFuncName.empty()) 1866 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1867 else 1868 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1869 else if (PGOViewRawCounts == PGOVCT_Text) { 1870 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 1871 Func.dumpInfo(); 1872 } 1873 } 1874 1875 if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { 1876 LoopInfo LI{DominatorTree(F)}; 1877 BranchProbabilityInfo NBPI(F, LI); 1878 1879 // Fix func entry count. 1880 if (PGOFixEntryCount) 1881 fixFuncEntryCount(Func, LI, NBPI); 1882 1883 // Verify BlockFrequency information. 1884 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; 1885 if (PGOVerifyHotBFI) { 1886 HotCountThreshold = PSI->getOrCompHotCountThreshold(); 1887 ColdCountThreshold = PSI->getOrCompColdCountThreshold(); 1888 } 1889 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); 1890 } 1891 } 1892 1893 // Set function hotness attribute from the profile. 1894 // We have to apply these attributes at the end because their presence 1895 // can affect the BranchProbabilityInfo of any callers, resulting in an 1896 // inconsistent MST between prof-gen and prof-use. 1897 for (auto &F : HotFunctions) { 1898 F->addFnAttr(Attribute::InlineHint); 1899 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 1900 << "\n"); 1901 } 1902 for (auto &F : ColdFunctions) { 1903 // Only set when there is no Attribute::Hot set by the user. For Hot 1904 // attribute, user's annotation has the precedence over the profile. 1905 if (F->hasFnAttribute(Attribute::Hot)) { 1906 auto &Ctx = M.getContext(); 1907 std::string Msg = std::string("Function ") + F->getName().str() + 1908 std::string(" is annotated as a hot function but" 1909 " the profile is cold"); 1910 Ctx.diagnose( 1911 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 1912 continue; 1913 } 1914 F->addFnAttr(Attribute::Cold); 1915 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 1916 << "\n"); 1917 } 1918 return true; 1919 } 1920 1921 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 1922 std::string RemappingFilename, 1923 bool IsCS) 1924 : ProfileFileName(std::move(Filename)), 1925 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { 1926 if (!PGOTestProfileFile.empty()) 1927 ProfileFileName = PGOTestProfileFile; 1928 if (!PGOTestProfileRemappingFile.empty()) 1929 ProfileRemappingFileName = PGOTestProfileRemappingFile; 1930 } 1931 1932 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 1933 ModuleAnalysisManager &AM) { 1934 1935 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1936 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1937 return FAM.getResult<TargetLibraryAnalysis>(F); 1938 }; 1939 auto LookupBPI = [&FAM](Function &F) { 1940 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1941 }; 1942 auto LookupBFI = [&FAM](Function &F) { 1943 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1944 }; 1945 1946 auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 1947 1948 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 1949 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) 1950 return PreservedAnalyses::all(); 1951 1952 return PreservedAnalyses::none(); 1953 } 1954 1955 static std::string getSimpleNodeName(const BasicBlock *Node) { 1956 if (!Node->getName().empty()) 1957 return std::string(Node->getName()); 1958 1959 std::string SimpleNodeName; 1960 raw_string_ostream OS(SimpleNodeName); 1961 Node->printAsOperand(OS, false); 1962 return OS.str(); 1963 } 1964 1965 void llvm::setProfMetadata(Module *M, Instruction *TI, 1966 ArrayRef<uint64_t> EdgeCounts, 1967 uint64_t MaxCount) { 1968 MDBuilder MDB(M->getContext()); 1969 assert(MaxCount > 0 && "Bad max count"); 1970 uint64_t Scale = calculateCountScale(MaxCount); 1971 SmallVector<unsigned, 4> Weights; 1972 for (const auto &ECI : EdgeCounts) 1973 Weights.push_back(scaleBranchCount(ECI, Scale)); 1974 1975 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 1976 : Weights) { 1977 dbgs() << W << " "; 1978 } dbgs() << "\n";); 1979 1980 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false); 1981 1982 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1983 if (EmitBranchProbability) { 1984 std::string BrCondStr = getBranchCondString(TI); 1985 if (BrCondStr.empty()) 1986 return; 1987 1988 uint64_t WSum = 1989 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 1990 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 1991 uint64_t TotalCount = 1992 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 1993 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 1994 Scale = calculateCountScale(WSum); 1995 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 1996 scaleBranchCount(WSum, Scale)); 1997 std::string BranchProbStr; 1998 raw_string_ostream OS(BranchProbStr); 1999 OS << BP; 2000 OS << " (total count : " << TotalCount << ")"; 2001 OS.flush(); 2002 Function *F = TI->getParent()->getParent(); 2003 OptimizationRemarkEmitter ORE(F); 2004 ORE.emit([&]() { 2005 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 2006 << BrCondStr << " is true with probability : " << BranchProbStr; 2007 }); 2008 } 2009 } 2010 2011 namespace llvm { 2012 2013 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 2014 MDBuilder MDB(M->getContext()); 2015 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 2016 MDB.createIrrLoopHeaderWeight(Count)); 2017 } 2018 2019 template <> struct GraphTraits<PGOUseFunc *> { 2020 using NodeRef = const BasicBlock *; 2021 using ChildIteratorType = const_succ_iterator; 2022 using nodes_iterator = pointer_iterator<Function::const_iterator>; 2023 2024 static NodeRef getEntryNode(const PGOUseFunc *G) { 2025 return &G->getFunc().front(); 2026 } 2027 2028 static ChildIteratorType child_begin(const NodeRef N) { 2029 return succ_begin(N); 2030 } 2031 2032 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 2033 2034 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 2035 return nodes_iterator(G->getFunc().begin()); 2036 } 2037 2038 static nodes_iterator nodes_end(const PGOUseFunc *G) { 2039 return nodes_iterator(G->getFunc().end()); 2040 } 2041 }; 2042 2043 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 2044 explicit DOTGraphTraits(bool isSimple = false) 2045 : DefaultDOTGraphTraits(isSimple) {} 2046 2047 static std::string getGraphName(const PGOUseFunc *G) { 2048 return std::string(G->getFunc().getName()); 2049 } 2050 2051 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 2052 std::string Result; 2053 raw_string_ostream OS(Result); 2054 2055 OS << getSimpleNodeName(Node) << ":\\l"; 2056 UseBBInfo *BI = Graph->findBBInfo(Node); 2057 OS << "Count : "; 2058 if (BI && BI->CountValid) 2059 OS << BI->CountValue << "\\l"; 2060 else 2061 OS << "Unknown\\l"; 2062 2063 if (!PGOInstrSelect) 2064 return Result; 2065 2066 for (const Instruction &I : *Node) { 2067 if (!isa<SelectInst>(&I)) 2068 continue; 2069 // Display scaled counts for SELECT instruction: 2070 OS << "SELECT : { T = "; 2071 uint64_t TC, FC; 2072 bool HasProf = I.extractProfMetadata(TC, FC); 2073 if (!HasProf) 2074 OS << "Unknown, F = Unknown }\\l"; 2075 else 2076 OS << TC << ", F = " << FC << " }\\l"; 2077 } 2078 return Result; 2079 } 2080 }; 2081 2082 } // end namespace llvm 2083