1 //===--- BinaryFunctionProfile.cpp --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 //===----------------------------------------------------------------------===// 10 11 #include "bolt/Core/BinaryBasicBlock.h" 12 #include "bolt/Core/BinaryFunction.h" 13 #include "llvm/Support/CommandLine.h" 14 #include "llvm/Support/Debug.h" 15 #include "llvm/Support/raw_ostream.h" 16 17 #undef DEBUG_TYPE 18 #define DEBUG_TYPE "bolt-prof" 19 20 using namespace llvm; 21 using namespace bolt; 22 23 namespace opts { 24 25 extern cl::OptionCategory BoltOptCategory; 26 27 cl::opt<IndirectCallPromotionType> 28 IndirectCallPromotion("indirect-call-promotion", 29 cl::init(ICP_NONE), 30 cl::desc("indirect call promotion"), 31 cl::values( 32 clEnumValN(ICP_NONE, "none", "do not perform indirect call promotion"), 33 clEnumValN(ICP_CALLS, "calls", "perform ICP on indirect calls"), 34 clEnumValN(ICP_JUMP_TABLES, "jump-tables", "perform ICP on jump tables"), 35 clEnumValN(ICP_ALL, "all", "perform ICP on calls and jump tables")), 36 cl::ZeroOrMore, 37 cl::cat(BoltOptCategory)); 38 39 extern cl::opt<JumpTableSupportLevel> JumpTables; 40 41 static cl::opt<bool> 42 FixFuncCounts("fix-func-counts", 43 cl::desc("adjust function counts based on basic blocks execution count"), 44 cl::init(false), 45 cl::ZeroOrMore, 46 cl::Hidden, 47 cl::cat(BoltOptCategory)); 48 49 static cl::opt<bool> 50 FixBlockCounts("fix-block-counts", 51 cl::desc("adjust block counts based on outgoing branch counts"), 52 cl::init(true), 53 cl::ZeroOrMore, 54 cl::Hidden, 55 cl::cat(BoltOptCategory)); 56 57 static cl::opt<bool> 58 InferFallThroughs("infer-fall-throughs", 59 cl::desc("infer execution count for fall-through blocks"), 60 cl::init(false), 61 cl::ZeroOrMore, 62 cl::Hidden, 63 cl::cat(BoltOptCategory)); 64 65 } // namespace opts 66 67 namespace llvm { 68 namespace bolt { 69 70 void BinaryFunction::postProcessProfile() { 71 if (!hasValidProfile()) { 72 clearProfile(); 73 return; 74 } 75 76 if (!(getProfileFlags() & PF_LBR)) 77 return; 78 79 // If we have at least some branch data for the function indicate that it 80 // was executed. 81 if (opts::FixFuncCounts && ExecutionCount == 0) { 82 ExecutionCount = 1; 83 } 84 85 // Compute preliminary execution count for each basic block. 86 for (BinaryBasicBlock *BB : BasicBlocks) { 87 if ((!BB->isEntryPoint() && !BB->isLandingPad()) || 88 BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE) 89 BB->ExecutionCount = 0; 90 } 91 for (BinaryBasicBlock *BB : BasicBlocks) { 92 auto SuccBIIter = BB->branch_info_begin(); 93 for (BinaryBasicBlock *Succ : BB->successors()) { 94 // All incoming edges to the primary entry have been accounted for, thus 95 // we skip the update here. 96 if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 97 Succ != BasicBlocks.front()) 98 Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count); 99 ++SuccBIIter; 100 } 101 } 102 103 if (opts::FixBlockCounts) { 104 for (BinaryBasicBlock *BB : BasicBlocks) { 105 // Make sure that execution count of a block is at least the branch count 106 // of an incoming/outgoing jump. 107 auto SuccBIIter = BB->branch_info_begin(); 108 for (BinaryBasicBlock *Succ : BB->successors()) { 109 uint64_t Count = SuccBIIter->Count; 110 if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) { 111 Succ->setExecutionCount(std::max(Succ->getExecutionCount(), Count)); 112 BB->setExecutionCount(std::max(BB->getExecutionCount(), Count)); 113 } 114 ++SuccBIIter; 115 } 116 // Make sure that execution count of a block is at least the number of 117 // function calls from the block. 118 for (MCInst &Inst : *BB) { 119 // Ignore non-call instruction 120 if (!BC.MIB->isCall(Inst)) 121 continue; 122 123 auto CountAnnt = BC.MIB->tryGetAnnotationAs<uint64_t>(Inst, "Count"); 124 if (CountAnnt) { 125 BB->setExecutionCount(std::max(BB->getExecutionCount(), *CountAnnt)); 126 } 127 } 128 } 129 } 130 131 if (opts::InferFallThroughs) 132 inferFallThroughCounts(); 133 134 // Update profile information for jump tables based on CFG branch data. 135 for (BinaryBasicBlock *BB : BasicBlocks) { 136 const MCInst *LastInstr = BB->getLastNonPseudoInstr(); 137 if (!LastInstr) 138 continue; 139 const uint64_t JTAddress = BC.MIB->getJumpTable(*LastInstr); 140 if (!JTAddress) 141 continue; 142 JumpTable *JT = getJumpTableContainingAddress(JTAddress); 143 if (!JT) 144 continue; 145 146 uint64_t TotalBranchCount = 0; 147 for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo : 148 BB->branch_info()) { 149 TotalBranchCount += BranchInfo.Count; 150 } 151 JT->Count += TotalBranchCount; 152 153 if (opts::IndirectCallPromotion < ICP_JUMP_TABLES && 154 opts::JumpTables < JTS_AGGRESSIVE) 155 continue; 156 157 if (JT->Counts.empty()) 158 JT->Counts.resize(JT->Entries.size()); 159 auto EI = JT->Entries.begin(); 160 uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize; 161 EI += Delta; 162 while (EI != JT->Entries.end()) { 163 const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(*EI); 164 if (TargetBB) { 165 const BinaryBasicBlock::BinaryBranchInfo &BranchInfo = 166 BB->getBranchInfo(*TargetBB); 167 assert(Delta < JT->Counts.size()); 168 JT->Counts[Delta].Count += BranchInfo.Count; 169 JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount; 170 } 171 ++Delta; 172 ++EI; 173 // A label marks the start of another jump table. 174 if (JT->Labels.count(Delta * JT->EntrySize)) 175 break; 176 } 177 } 178 } 179 180 void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const { 181 // No reason to merge invalid or empty profiles into BF. 182 if (!hasValidProfile()) 183 return; 184 185 // Update function execution count. 186 if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE) { 187 BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount()); 188 } 189 190 // Since we are merging a valid profile, the new profile should be valid too. 191 // It has either already been valid, or it has been cleaned up. 192 BF.ProfileMatchRatio = 1.0f; 193 194 // Update basic block and edge counts. 195 auto BBMergeI = BF.begin(); 196 for (BinaryBasicBlock *BB : BasicBlocks) { 197 BinaryBasicBlock *BBMerge = &*BBMergeI; 198 assert(getIndex(BB) == BF.getIndex(BBMerge)); 199 200 // Update basic block count. 201 if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) { 202 BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() + 203 BB->getExecutionCount()); 204 } 205 206 // Update edge count for successors of this basic block. 207 auto BBMergeSI = BBMerge->succ_begin(); 208 auto BIMergeI = BBMerge->branch_info_begin(); 209 auto BII = BB->branch_info_begin(); 210 for (const BinaryBasicBlock *BBSucc : BB->successors()) { 211 (void)BBSucc; 212 assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI)); 213 214 // At this point no branch count should be set to COUNT_NO_PROFILE. 215 assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 216 "unexpected unknown branch profile"); 217 assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 218 "unexpected unknown branch profile"); 219 220 BIMergeI->Count += BII->Count; 221 222 // When we merge inferred and real fall-through branch data, the merged 223 // data is considered inferred. 224 if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED && 225 BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) { 226 BIMergeI->MispredictedCount += BII->MispredictedCount; 227 } else { 228 BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED; 229 } 230 231 ++BBMergeSI; 232 ++BII; 233 ++BIMergeI; 234 } 235 assert(BBMergeSI == BBMerge->succ_end()); 236 237 ++BBMergeI; 238 } 239 assert(BBMergeI == BF.end()); 240 241 // Merge jump tables profile info. 242 auto JTMergeI = BF.JumpTables.begin(); 243 for (const auto &JTEntry : JumpTables) { 244 if (JTMergeI->second->Counts.empty()) 245 JTMergeI->second->Counts.resize(JTEntry.second->Counts.size()); 246 auto CountMergeI = JTMergeI->second->Counts.begin(); 247 for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) { 248 CountMergeI->Count += JI.Count; 249 CountMergeI->Mispreds += JI.Mispreds; 250 ++CountMergeI; 251 } 252 assert(CountMergeI == JTMergeI->second->Counts.end()); 253 254 ++JTMergeI; 255 } 256 assert(JTMergeI == BF.JumpTables.end()); 257 } 258 259 void BinaryFunction::inferFallThroughCounts() { 260 // Work on a basic block at a time, propagating frequency information 261 // forwards. 262 // It is important to walk in the layout order. 263 for (BinaryBasicBlock *BB : BasicBlocks) { 264 const uint64_t BBExecCount = BB->getExecutionCount(); 265 266 // Propagate this information to successors, filling in fall-through edges 267 // with frequency information 268 if (BB->succ_size() == 0) 269 continue; 270 271 // Calculate frequency of outgoing branches from this node according to 272 // LBR data. 273 uint64_t ReportedBranches = 0; 274 for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info()) { 275 if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) 276 ReportedBranches += SuccBI.Count; 277 } 278 279 // Get taken count of conditional tail call if the block ends with one. 280 uint64_t CTCTakenCount = 0; 281 const MCInst *CTCInstr = BB->getLastNonPseudoInstr(); 282 if (CTCInstr && BC.MIB->getConditionalTailCall(*CTCInstr)) { 283 CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>( 284 *CTCInstr, "CTCTakenCount"); 285 } 286 287 // Calculate frequency of throws from this node according to LBR data 288 // for branching into associated landing pads. Since it is possible 289 // for a landing pad to be associated with more than one basic blocks, 290 // we may overestimate the frequency of throws for such blocks. 291 uint64_t ReportedThrows = 0; 292 for (const BinaryBasicBlock *LP : BB->landing_pads()) { 293 ReportedThrows += LP->getExecutionCount(); 294 } 295 296 const uint64_t TotalReportedJumps = 297 ReportedBranches + CTCTakenCount + ReportedThrows; 298 299 // Infer the frequency of the fall-through edge, representing not taking the 300 // branch. 301 uint64_t Inferred = 0; 302 if (BBExecCount > TotalReportedJumps) 303 Inferred = BBExecCount - TotalReportedJumps; 304 305 LLVM_DEBUG( 306 if (BBExecCount < TotalReportedJumps) dbgs() 307 << "Fall-through inference is slightly inconsistent. " 308 "exec frequency is less than the outgoing edges frequency (" 309 << BBExecCount << " < " << ReportedBranches 310 << ") for BB at offset 0x" 311 << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';); 312 313 if (BB->succ_size() <= 2) { 314 // Skip if the last instruction is an unconditional jump. 315 const MCInst *LastInstr = BB->getLastNonPseudoInstr(); 316 if (LastInstr && (BC.MIB->isUnconditionalBranch(*LastInstr) || 317 BC.MIB->isIndirectBranch(*LastInstr))) 318 continue; 319 // If there is an FT it will be the last successor. 320 auto &SuccBI = *BB->branch_info_rbegin(); 321 auto &Succ = *BB->succ_rbegin(); 322 if (SuccBI.Count == 0) { 323 SuccBI.Count = Inferred; 324 SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED; 325 Succ->ExecutionCount += Inferred; 326 } 327 } 328 } 329 330 return; 331 } 332 333 void BinaryFunction::clearProfile() { 334 // Keep function execution profile the same. Only clear basic block and edge 335 // counts. 336 for (BinaryBasicBlock *BB : BasicBlocks) { 337 BB->ExecutionCount = 0; 338 for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) { 339 BI.Count = 0; 340 BI.MispredictedCount = 0; 341 } 342 } 343 } 344 345 } // namespace bolt 346 } // namespace llvm 347