1 //===- bolt/Core/BinaryFunctionProfile.cpp - Profile processing -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements BinaryFunction member functions related to processing 10 // the execution profile. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "bolt/Core/BinaryBasicBlock.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "llvm/Support/CommandLine.h" 17 #include "llvm/Support/Debug.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 #undef DEBUG_TYPE 21 #define DEBUG_TYPE "bolt-prof" 22 23 using namespace llvm; 24 using namespace bolt; 25 26 namespace opts { 27 28 extern cl::OptionCategory BoltOptCategory; 29 30 cl::opt<IndirectCallPromotionType> 31 IndirectCallPromotion("indirect-call-promotion", 32 cl::init(ICP_NONE), 33 cl::desc("indirect call promotion"), 34 cl::values( 35 clEnumValN(ICP_NONE, "none", "do not perform indirect call promotion"), 36 clEnumValN(ICP_CALLS, "calls", "perform ICP on indirect calls"), 37 clEnumValN(ICP_JUMP_TABLES, "jump-tables", "perform ICP on jump tables"), 38 clEnumValN(ICP_ALL, "all", "perform ICP on calls and jump tables")), 39 cl::ZeroOrMore, 40 cl::cat(BoltOptCategory)); 41 42 extern cl::opt<JumpTableSupportLevel> JumpTables; 43 44 static cl::opt<bool> 45 FixFuncCounts("fix-func-counts", 46 cl::desc("adjust function counts based on basic blocks execution count"), 47 cl::init(false), 48 cl::ZeroOrMore, 49 cl::Hidden, 50 cl::cat(BoltOptCategory)); 51 52 static cl::opt<bool> 53 FixBlockCounts("fix-block-counts", 54 cl::desc("adjust block counts based on outgoing branch counts"), 55 cl::init(true), 56 cl::ZeroOrMore, 57 cl::Hidden, 58 cl::cat(BoltOptCategory)); 59 60 static cl::opt<bool> 61 InferFallThroughs("infer-fall-throughs", 62 cl::desc("infer execution count for fall-through blocks"), 63 cl::init(false), 64 cl::ZeroOrMore, 65 cl::Hidden, 66 cl::cat(BoltOptCategory)); 67 68 } // namespace opts 69 70 namespace llvm { 71 namespace bolt { 72 73 void BinaryFunction::postProcessProfile() { 74 if (!hasValidProfile()) { 75 clearProfile(); 76 return; 77 } 78 79 if (!(getProfileFlags() & PF_LBR)) 80 return; 81 82 // If we have at least some branch data for the function indicate that it 83 // was executed. 84 if (opts::FixFuncCounts && ExecutionCount == 0) 85 ExecutionCount = 1; 86 87 // Compute preliminary execution count for each basic block. 88 for (BinaryBasicBlock *BB : BasicBlocks) { 89 if ((!BB->isEntryPoint() && !BB->isLandingPad()) || 90 BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE) 91 BB->ExecutionCount = 0; 92 } 93 for (BinaryBasicBlock *BB : BasicBlocks) { 94 auto SuccBIIter = BB->branch_info_begin(); 95 for (BinaryBasicBlock *Succ : BB->successors()) { 96 // All incoming edges to the primary entry have been accounted for, thus 97 // we skip the update here. 98 if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 99 Succ != BasicBlocks.front()) 100 Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count); 101 ++SuccBIIter; 102 } 103 } 104 105 // Fix for old profiles. 106 for (BinaryBasicBlock *BB : BasicBlocks) { 107 if (BB->size() != 1 || BB->succ_size() != 1) 108 continue; 109 110 if (BB->getKnownExecutionCount() == 0) 111 continue; 112 113 MCInst *Instr = BB->getFirstNonPseudoInstr(); 114 assert(Instr && "expected non-pseudo instr"); 115 if (!BC.MIB->hasAnnotation(*Instr, "NOP")) 116 continue; 117 118 BinaryBasicBlock *FTSuccessor = BB->getSuccessor(); 119 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor); 120 if (!BI.Count) { 121 BI.Count = BB->getKnownExecutionCount(); 122 FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() + 123 BI.Count); 124 } 125 } 126 127 if (opts::FixBlockCounts) { 128 for (BinaryBasicBlock *BB : BasicBlocks) { 129 // Make sure that execution count of a block is at least the branch count 130 // of an incoming/outgoing jump. 131 auto SuccBIIter = BB->branch_info_begin(); 132 for (BinaryBasicBlock *Succ : BB->successors()) { 133 uint64_t Count = SuccBIIter->Count; 134 if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) { 135 Succ->setExecutionCount(std::max(Succ->getExecutionCount(), Count)); 136 BB->setExecutionCount(std::max(BB->getExecutionCount(), Count)); 137 } 138 ++SuccBIIter; 139 } 140 // Make sure that execution count of a block is at least the number of 141 // function calls from the block. 142 for (MCInst &Inst : *BB) { 143 // Ignore non-call instruction 144 if (!BC.MIB->isCall(Inst)) 145 continue; 146 147 auto CountAnnt = BC.MIB->tryGetAnnotationAs<uint64_t>(Inst, "Count"); 148 if (CountAnnt) 149 BB->setExecutionCount(std::max(BB->getExecutionCount(), *CountAnnt)); 150 } 151 } 152 } 153 154 if (opts::InferFallThroughs) 155 inferFallThroughCounts(); 156 157 // Update profile information for jump tables based on CFG branch data. 158 for (BinaryBasicBlock *BB : BasicBlocks) { 159 const MCInst *LastInstr = BB->getLastNonPseudoInstr(); 160 if (!LastInstr) 161 continue; 162 const uint64_t JTAddress = BC.MIB->getJumpTable(*LastInstr); 163 if (!JTAddress) 164 continue; 165 JumpTable *JT = getJumpTableContainingAddress(JTAddress); 166 if (!JT) 167 continue; 168 169 uint64_t TotalBranchCount = 0; 170 for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo : 171 BB->branch_info()) { 172 TotalBranchCount += BranchInfo.Count; 173 } 174 JT->Count += TotalBranchCount; 175 176 if (opts::IndirectCallPromotion < ICP_JUMP_TABLES && 177 opts::JumpTables < JTS_AGGRESSIVE) 178 continue; 179 180 if (JT->Counts.empty()) 181 JT->Counts.resize(JT->Entries.size()); 182 auto EI = JT->Entries.begin(); 183 uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize; 184 EI += Delta; 185 while (EI != JT->Entries.end()) { 186 const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(*EI); 187 if (TargetBB) { 188 const BinaryBasicBlock::BinaryBranchInfo &BranchInfo = 189 BB->getBranchInfo(*TargetBB); 190 assert(Delta < JT->Counts.size()); 191 JT->Counts[Delta].Count += BranchInfo.Count; 192 JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount; 193 } 194 ++Delta; 195 ++EI; 196 // A label marks the start of another jump table. 197 if (JT->Labels.count(Delta * JT->EntrySize)) 198 break; 199 } 200 } 201 } 202 203 void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const { 204 // No reason to merge invalid or empty profiles into BF. 205 if (!hasValidProfile()) 206 return; 207 208 // Update function execution count. 209 if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE) 210 BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount()); 211 212 // Since we are merging a valid profile, the new profile should be valid too. 213 // It has either already been valid, or it has been cleaned up. 214 BF.ProfileMatchRatio = 1.0f; 215 216 // Update basic block and edge counts. 217 auto BBMergeI = BF.begin(); 218 for (BinaryBasicBlock *BB : BasicBlocks) { 219 BinaryBasicBlock *BBMerge = &*BBMergeI; 220 assert(getIndex(BB) == BF.getIndex(BBMerge)); 221 222 // Update basic block count. 223 if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) { 224 BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() + 225 BB->getExecutionCount()); 226 } 227 228 // Update edge count for successors of this basic block. 229 auto BBMergeSI = BBMerge->succ_begin(); 230 auto BIMergeI = BBMerge->branch_info_begin(); 231 auto BII = BB->branch_info_begin(); 232 for (const BinaryBasicBlock *BBSucc : BB->successors()) { 233 (void)BBSucc; 234 assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI)); 235 236 // At this point no branch count should be set to COUNT_NO_PROFILE. 237 assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 238 "unexpected unknown branch profile"); 239 assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 240 "unexpected unknown branch profile"); 241 242 BIMergeI->Count += BII->Count; 243 244 // When we merge inferred and real fall-through branch data, the merged 245 // data is considered inferred. 246 if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED && 247 BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) { 248 BIMergeI->MispredictedCount += BII->MispredictedCount; 249 } else { 250 BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED; 251 } 252 253 ++BBMergeSI; 254 ++BII; 255 ++BIMergeI; 256 } 257 assert(BBMergeSI == BBMerge->succ_end()); 258 259 ++BBMergeI; 260 } 261 assert(BBMergeI == BF.end()); 262 263 // Merge jump tables profile info. 264 auto JTMergeI = BF.JumpTables.begin(); 265 for (const auto &JTEntry : JumpTables) { 266 if (JTMergeI->second->Counts.empty()) 267 JTMergeI->second->Counts.resize(JTEntry.second->Counts.size()); 268 auto CountMergeI = JTMergeI->second->Counts.begin(); 269 for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) { 270 CountMergeI->Count += JI.Count; 271 CountMergeI->Mispreds += JI.Mispreds; 272 ++CountMergeI; 273 } 274 assert(CountMergeI == JTMergeI->second->Counts.end()); 275 276 ++JTMergeI; 277 } 278 assert(JTMergeI == BF.JumpTables.end()); 279 } 280 281 void BinaryFunction::inferFallThroughCounts() { 282 // Work on a basic block at a time, propagating frequency information 283 // forwards. 284 // It is important to walk in the layout order. 285 for (BinaryBasicBlock *BB : BasicBlocks) { 286 const uint64_t BBExecCount = BB->getExecutionCount(); 287 288 // Propagate this information to successors, filling in fall-through edges 289 // with frequency information 290 if (BB->succ_size() == 0) 291 continue; 292 293 // Calculate frequency of outgoing branches from this node according to 294 // LBR data. 295 uint64_t ReportedBranches = 0; 296 for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info()) 297 if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) 298 ReportedBranches += SuccBI.Count; 299 300 // Get taken count of conditional tail call if the block ends with one. 301 uint64_t CTCTakenCount = 0; 302 const MCInst *CTCInstr = BB->getLastNonPseudoInstr(); 303 if (CTCInstr && BC.MIB->getConditionalTailCall(*CTCInstr)) { 304 CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>( 305 *CTCInstr, "CTCTakenCount"); 306 } 307 308 // Calculate frequency of throws from this node according to LBR data 309 // for branching into associated landing pads. Since it is possible 310 // for a landing pad to be associated with more than one basic blocks, 311 // we may overestimate the frequency of throws for such blocks. 312 uint64_t ReportedThrows = 0; 313 for (const BinaryBasicBlock *LP : BB->landing_pads()) 314 ReportedThrows += LP->getExecutionCount(); 315 316 const uint64_t TotalReportedJumps = 317 ReportedBranches + CTCTakenCount + ReportedThrows; 318 319 // Infer the frequency of the fall-through edge, representing not taking the 320 // branch. 321 uint64_t Inferred = 0; 322 if (BBExecCount > TotalReportedJumps) 323 Inferred = BBExecCount - TotalReportedJumps; 324 325 LLVM_DEBUG( 326 if (BBExecCount < TotalReportedJumps) dbgs() 327 << "Fall-through inference is slightly inconsistent. " 328 "exec frequency is less than the outgoing edges frequency (" 329 << BBExecCount << " < " << ReportedBranches 330 << ") for BB at offset 0x" 331 << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';); 332 333 if (BB->succ_size() <= 2) { 334 // Skip if the last instruction is an unconditional jump. 335 const MCInst *LastInstr = BB->getLastNonPseudoInstr(); 336 if (LastInstr && (BC.MIB->isUnconditionalBranch(*LastInstr) || 337 BC.MIB->isIndirectBranch(*LastInstr))) 338 continue; 339 // If there is an FT it will be the last successor. 340 auto &SuccBI = *BB->branch_info_rbegin(); 341 auto &Succ = *BB->succ_rbegin(); 342 if (SuccBI.Count == 0) { 343 SuccBI.Count = Inferred; 344 SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED; 345 Succ->ExecutionCount += Inferred; 346 } 347 } 348 } 349 350 return; 351 } 352 353 void BinaryFunction::clearProfile() { 354 // Keep function execution profile the same. Only clear basic block and edge 355 // counts. 356 for (BinaryBasicBlock *BB : BasicBlocks) { 357 BB->ExecutionCount = 0; 358 for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) { 359 BI.Count = 0; 360 BI.MispredictedCount = 0; 361 } 362 } 363 } 364 365 } // namespace bolt 366 } // namespace llvm 367