1 //===--- CodeGenPGO.h - PGO Instrumentation for LLVM CodeGen ----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Instrumentation-based profile-guided optimization 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef CLANG_CODEGEN_CODEGENPGO_H 15 #define CLANG_CODEGEN_CODEGENPGO_H 16 17 #include "CGBuilder.h" 18 #include "CodeGenModule.h" 19 #include "CodeGenTypes.h" 20 #include "clang/Frontend/CodeGenOptions.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/Support/MemoryBuffer.h" 23 #include <memory> 24 25 namespace clang { 26 namespace CodeGen { 27 class RegionCounter; 28 29 /// The raw counter data from an instrumented PGO binary 30 class PGOProfileData { 31 private: 32 /// The PGO data 33 std::unique_ptr<llvm::MemoryBuffer> DataBuffer; 34 /// Offsets into DataBuffer for each function's counters 35 llvm::StringMap<unsigned> DataOffsets; 36 /// Execution counts for each function. 37 llvm::StringMap<uint64_t> FunctionCounts; 38 /// The maximal execution count among all functions. 39 uint64_t MaxFunctionCount; 40 CodeGenModule &CGM; 41 public: 42 PGOProfileData(CodeGenModule &CGM, std::string Path); 43 /// Fill Counts with the profile data for the given function name. Returns 44 /// false on success. 45 bool getFunctionCounts(StringRef FuncName, uint64_t &FuncHash, 46 std::vector<uint64_t> &Counts); 47 /// Return the maximum of all known function counts. 48 uint64_t getMaximumFunctionCount() { return MaxFunctionCount; } 49 }; 50 51 /// Per-function PGO state. This class should generally not be used directly, 52 /// but instead through the CodeGenFunction and RegionCounter types. 53 class CodeGenPGO { 54 private: 55 CodeGenModule &CGM; 56 std::unique_ptr<std::string> PrefixedFuncName; 57 StringRef RawFuncName; 58 llvm::GlobalValue::LinkageTypes VarLinkage; 59 60 unsigned NumRegionCounters; 61 uint64_t FunctionHash; 62 llvm::GlobalVariable *RegionCounters; 63 std::unique_ptr<llvm::DenseMap<const Stmt *, unsigned>> RegionCounterMap; 64 std::unique_ptr<llvm::DenseMap<const Stmt *, uint64_t>> StmtCountMap; 65 std::unique_ptr<std::vector<uint64_t>> RegionCounts; 66 uint64_t CurrentRegionCount; 67 68 public: 69 CodeGenPGO(CodeGenModule &CGM) 70 : CGM(CGM), NumRegionCounters(0), FunctionHash(0), RegionCounters(0), 71 CurrentRegionCount(0) {} 72 73 /// Whether or not we have PGO region data for the current function. This is 74 /// false both when we have no data at all and when our data has been 75 /// discarded. 76 bool haveRegionCounts() const { return RegionCounts != 0; } 77 78 /// Get the string used to identify this function in the profile data. 79 /// For functions with local linkage, this includes the main file name. 80 StringRef getFuncName() const { return StringRef(*PrefixedFuncName); } 81 std::string getFuncVarName(StringRef VarName) const { 82 return ("__llvm_profile_" + VarName + "_" + RawFuncName).str(); 83 } 84 85 /// Return the counter value of the current region. 86 uint64_t getCurrentRegionCount() const { return CurrentRegionCount; } 87 88 /// Set the counter value for the current region. This is used to keep track 89 /// of changes to the most recent counter from control flow and non-local 90 /// exits. 91 void setCurrentRegionCount(uint64_t Count) { CurrentRegionCount = Count; } 92 93 /// Indicate that the current region is never reached, and thus should have a 94 /// counter value of zero. This is important so that subsequent regions can 95 /// correctly track their parent counts. 96 void setCurrentRegionUnreachable() { setCurrentRegionCount(0); } 97 98 /// Check if an execution count is known for a given statement. If so, return 99 /// true and put the value in Count; else return false. 100 bool getStmtCount(const Stmt *S, uint64_t &Count) { 101 if (!StmtCountMap) 102 return false; 103 llvm::DenseMap<const Stmt*, uint64_t>::const_iterator 104 I = StmtCountMap->find(S); 105 if (I == StmtCountMap->end()) 106 return false; 107 Count = I->second; 108 return true; 109 } 110 111 /// If the execution count for the current statement is known, record that 112 /// as the current count. 113 void setCurrentStmt(const Stmt *S) { 114 uint64_t Count; 115 if (getStmtCount(S, Count)) 116 setCurrentRegionCount(Count); 117 } 118 119 /// Calculate branch weights appropriate for PGO data 120 llvm::MDNode *createBranchWeights(uint64_t TrueCount, uint64_t FalseCount); 121 llvm::MDNode *createBranchWeights(ArrayRef<uint64_t> Weights); 122 llvm::MDNode *createLoopWeights(const Stmt *Cond, RegionCounter &Cnt); 123 124 /// Assign counters to regions and configure them for PGO of a given 125 /// function. Does nothing if instrumentation is not enabled and either 126 /// generates global variables or associates PGO data with each of the 127 /// counters depending on whether we are generating or using instrumentation. 128 void assignRegionCounters(const Decl *D, llvm::Function *Fn); 129 /// Emit static data structures for instrumentation data. 130 void emitInstrumentationData(); 131 /// Clean up region counter state. Must be called if assignRegionCounters is 132 /// used. 133 void destroyRegionCounters(); 134 /// Emit static initialization code, if any. 135 static llvm::Function *emitInitialization(CodeGenModule &CGM); 136 137 private: 138 void setFuncName(llvm::Function *Fn); 139 void mapRegionCounters(const Decl *D); 140 void computeRegionCounts(const Decl *D); 141 void applyFunctionAttributes(PGOProfileData *PGOData, llvm::Function *Fn); 142 void loadRegionCounts(PGOProfileData *PGOData); 143 void emitCounterVariables(); 144 llvm::GlobalVariable *buildDataVar(); 145 146 /// Emit code to increment the counter at the given index 147 void emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter); 148 149 /// Return the region counter for the given statement. This should only be 150 /// called on statements that have a dedicated counter. 151 unsigned getRegionCounter(const Stmt *S) { 152 if (RegionCounterMap == 0) 153 return 0; 154 return (*RegionCounterMap)[S]; 155 } 156 157 /// Return the region count for the counter at the given index. 158 uint64_t getRegionCount(unsigned Counter) { 159 if (!haveRegionCounts()) 160 return 0; 161 return (*RegionCounts)[Counter]; 162 } 163 164 friend class RegionCounter; 165 }; 166 167 /// A counter for a particular region. This is the primary interface through 168 /// which clients manage PGO counters and their values. 169 class RegionCounter { 170 CodeGenPGO *PGO; 171 unsigned Counter; 172 uint64_t Count; 173 uint64_t ParentCount; 174 uint64_t RegionCount; 175 int64_t Adjust; 176 177 RegionCounter(CodeGenPGO &PGO, unsigned CounterIndex) 178 : PGO(&PGO), Counter(CounterIndex), Count(PGO.getRegionCount(Counter)), 179 ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {} 180 181 public: 182 RegionCounter(CodeGenPGO &PGO, const Stmt *S) 183 : PGO(&PGO), Counter(PGO.getRegionCounter(S)), 184 Count(PGO.getRegionCount(Counter)), 185 ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {} 186 187 /// Get the value of the counter. In most cases this is the number of times 188 /// the region of the counter was entered, but for switch labels it's the 189 /// number of direct jumps to that label. 190 uint64_t getCount() const { return Count; } 191 192 /// Get the value of the counter with adjustments applied. Adjustments occur 193 /// when control enters or leaves the region abnormally; i.e., if there is a 194 /// jump to a label within the region, or if the function can return from 195 /// within the region. The adjusted count, then, is the value of the counter 196 /// at the end of the region. 197 uint64_t getAdjustedCount() const { 198 return Count + Adjust; 199 } 200 201 /// Get the value of the counter in this region's parent, i.e., the region 202 /// that was active when this region began. This is useful for deriving 203 /// counts in implicitly counted regions, like the false case of a condition 204 /// or the normal exits of a loop. 205 uint64_t getParentCount() const { return ParentCount; } 206 207 /// Activate the counter by emitting an increment and starting to track 208 /// adjustments. If AddIncomingFallThrough is true, the current region count 209 /// will be added to the counter for the purposes of tracking the region. 210 void beginRegion(CGBuilderTy &Builder, bool AddIncomingFallThrough=false) { 211 beginRegion(AddIncomingFallThrough); 212 PGO->emitCounterIncrement(Builder, Counter); 213 } 214 void beginRegion(bool AddIncomingFallThrough=false) { 215 RegionCount = Count; 216 if (AddIncomingFallThrough) 217 RegionCount += PGO->getCurrentRegionCount(); 218 PGO->setCurrentRegionCount(RegionCount); 219 } 220 221 /// For counters on boolean branches, begins tracking adjustments for the 222 /// uncounted path. 223 void beginElseRegion() { 224 RegionCount = ParentCount - Count; 225 PGO->setCurrentRegionCount(RegionCount); 226 } 227 228 /// Reset the current region count. 229 void setCurrentRegionCount(uint64_t CurrentCount) { 230 RegionCount = CurrentCount; 231 PGO->setCurrentRegionCount(RegionCount); 232 } 233 234 /// Adjust for non-local control flow after emitting a subexpression or 235 /// substatement. This must be called to account for constructs such as gotos, 236 /// labels, and returns, so that we can ensure that our region's count is 237 /// correct in the code that follows. 238 void adjustForControlFlow() { 239 Adjust += PGO->getCurrentRegionCount() - RegionCount; 240 // Reset the region count in case this is called again later. 241 RegionCount = PGO->getCurrentRegionCount(); 242 } 243 244 /// Commit all adjustments to the current region. If the region is a loop, 245 /// the LoopAdjust value should be the count of all the breaks and continues 246 /// from the loop, to compensate for those counts being deducted from the 247 /// adjustments for the body of the loop. 248 void applyAdjustmentsToRegion(uint64_t LoopAdjust) { 249 PGO->setCurrentRegionCount(ParentCount + Adjust + LoopAdjust); 250 } 251 }; 252 253 } // end namespace CodeGen 254 } // end namespace clang 255 256 #endif 257