1 //===- bolt/Core/BinaryFunctionProfile.cpp - Profile processing -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements BinaryFunction member functions related to processing 10 // the execution profile. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "bolt/Core/BinaryBasicBlock.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "llvm/Support/CommandLine.h" 17 #include "llvm/Support/Debug.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 #undef DEBUG_TYPE 21 #define DEBUG_TYPE "bolt-prof" 22 23 using namespace llvm; 24 using namespace bolt; 25 26 namespace opts { 27 28 extern cl::OptionCategory BoltOptCategory; 29 30 cl::opt<IndirectCallPromotionType> ICP( 31 "indirect-call-promotion", cl::init(ICP_NONE), 32 cl::desc("indirect call promotion"), 33 cl::values( 34 clEnumValN(ICP_NONE, "none", "do not perform indirect call promotion"), 35 clEnumValN(ICP_CALLS, "calls", "perform ICP on indirect calls"), 36 clEnumValN(ICP_JUMP_TABLES, "jump-tables", 37 "perform ICP on jump tables"), 38 clEnumValN(ICP_ALL, "all", "perform ICP on calls and jump tables")), 39 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 40 41 extern cl::opt<JumpTableSupportLevel> JumpTables; 42 43 static cl::opt<bool> 44 FixFuncCounts("fix-func-counts", 45 cl::desc("adjust function counts based on basic blocks execution count"), 46 cl::init(false), 47 cl::ZeroOrMore, 48 cl::Hidden, 49 cl::cat(BoltOptCategory)); 50 51 static cl::opt<bool> 52 FixBlockCounts("fix-block-counts", 53 cl::desc("adjust block counts based on outgoing branch counts"), 54 cl::init(true), 55 cl::ZeroOrMore, 56 cl::Hidden, 57 cl::cat(BoltOptCategory)); 58 59 static cl::opt<bool> 60 InferFallThroughs("infer-fall-throughs", 61 cl::desc("infer execution count for fall-through blocks"), 62 cl::init(false), 63 cl::ZeroOrMore, 64 cl::Hidden, 65 cl::cat(BoltOptCategory)); 66 67 } // namespace opts 68 69 namespace llvm { 70 namespace bolt { 71 72 void BinaryFunction::postProcessProfile() { 73 if (!hasValidProfile()) { 74 clearProfile(); 75 return; 76 } 77 78 if (!(getProfileFlags() & PF_LBR)) 79 return; 80 81 // If we have at least some branch data for the function indicate that it 82 // was executed. 83 if (opts::FixFuncCounts && ExecutionCount == 0) 84 ExecutionCount = 1; 85 86 // Compute preliminary execution count for each basic block. 87 for (BinaryBasicBlock *BB : BasicBlocks) { 88 if ((!BB->isEntryPoint() && !BB->isLandingPad()) || 89 BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE) 90 BB->ExecutionCount = 0; 91 } 92 for (BinaryBasicBlock *BB : BasicBlocks) { 93 auto SuccBIIter = BB->branch_info_begin(); 94 for (BinaryBasicBlock *Succ : BB->successors()) { 95 // All incoming edges to the primary entry have been accounted for, thus 96 // we skip the update here. 97 if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 98 Succ != BasicBlocks.front()) 99 Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count); 100 ++SuccBIIter; 101 } 102 } 103 104 // Fix for old profiles. 105 for (BinaryBasicBlock *BB : BasicBlocks) { 106 if (BB->size() != 1 || BB->succ_size() != 1) 107 continue; 108 109 if (BB->getKnownExecutionCount() == 0) 110 continue; 111 112 MCInst *Instr = BB->getFirstNonPseudoInstr(); 113 assert(Instr && "expected non-pseudo instr"); 114 if (!BC.MIB->hasAnnotation(*Instr, "NOP")) 115 continue; 116 117 BinaryBasicBlock *FTSuccessor = BB->getSuccessor(); 118 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor); 119 if (!BI.Count) { 120 BI.Count = BB->getKnownExecutionCount(); 121 FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() + 122 BI.Count); 123 } 124 } 125 126 if (opts::FixBlockCounts) { 127 for (BinaryBasicBlock *BB : BasicBlocks) { 128 // Make sure that execution count of a block is at least the branch count 129 // of an incoming/outgoing jump. 130 auto SuccBIIter = BB->branch_info_begin(); 131 for (BinaryBasicBlock *Succ : BB->successors()) { 132 uint64_t Count = SuccBIIter->Count; 133 if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) { 134 Succ->setExecutionCount(std::max(Succ->getExecutionCount(), Count)); 135 BB->setExecutionCount(std::max(BB->getExecutionCount(), Count)); 136 } 137 ++SuccBIIter; 138 } 139 // Make sure that execution count of a block is at least the number of 140 // function calls from the block. 141 for (MCInst &Inst : *BB) { 142 // Ignore non-call instruction 143 if (!BC.MIB->isCall(Inst)) 144 continue; 145 146 auto CountAnnt = BC.MIB->tryGetAnnotationAs<uint64_t>(Inst, "Count"); 147 if (CountAnnt) 148 BB->setExecutionCount(std::max(BB->getExecutionCount(), *CountAnnt)); 149 } 150 } 151 } 152 153 if (opts::InferFallThroughs) 154 inferFallThroughCounts(); 155 156 // Update profile information for jump tables based on CFG branch data. 157 for (BinaryBasicBlock *BB : BasicBlocks) { 158 const MCInst *LastInstr = BB->getLastNonPseudoInstr(); 159 if (!LastInstr) 160 continue; 161 const uint64_t JTAddress = BC.MIB->getJumpTable(*LastInstr); 162 if (!JTAddress) 163 continue; 164 JumpTable *JT = getJumpTableContainingAddress(JTAddress); 165 if (!JT) 166 continue; 167 168 uint64_t TotalBranchCount = 0; 169 for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo : 170 BB->branch_info()) { 171 TotalBranchCount += BranchInfo.Count; 172 } 173 JT->Count += TotalBranchCount; 174 175 if (opts::ICP < ICP_JUMP_TABLES && opts::JumpTables < JTS_AGGRESSIVE) 176 continue; 177 178 if (JT->Counts.empty()) 179 JT->Counts.resize(JT->Entries.size()); 180 auto EI = JT->Entries.begin(); 181 uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize; 182 EI += Delta; 183 while (EI != JT->Entries.end()) { 184 const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(*EI); 185 if (TargetBB) { 186 const BinaryBasicBlock::BinaryBranchInfo &BranchInfo = 187 BB->getBranchInfo(*TargetBB); 188 assert(Delta < JT->Counts.size()); 189 JT->Counts[Delta].Count += BranchInfo.Count; 190 JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount; 191 } 192 ++Delta; 193 ++EI; 194 // A label marks the start of another jump table. 195 if (JT->Labels.count(Delta * JT->EntrySize)) 196 break; 197 } 198 } 199 } 200 201 void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const { 202 // No reason to merge invalid or empty profiles into BF. 203 if (!hasValidProfile()) 204 return; 205 206 // Update function execution count. 207 if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE) 208 BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount()); 209 210 // Since we are merging a valid profile, the new profile should be valid too. 211 // It has either already been valid, or it has been cleaned up. 212 BF.ProfileMatchRatio = 1.0f; 213 214 // Update basic block and edge counts. 215 auto BBMergeI = BF.begin(); 216 for (BinaryBasicBlock *BB : BasicBlocks) { 217 BinaryBasicBlock *BBMerge = &*BBMergeI; 218 assert(getIndex(BB) == BF.getIndex(BBMerge)); 219 220 // Update basic block count. 221 if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) { 222 BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() + 223 BB->getExecutionCount()); 224 } 225 226 // Update edge count for successors of this basic block. 227 auto BBMergeSI = BBMerge->succ_begin(); 228 auto BIMergeI = BBMerge->branch_info_begin(); 229 auto BII = BB->branch_info_begin(); 230 for (const BinaryBasicBlock *BBSucc : BB->successors()) { 231 (void)BBSucc; 232 assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI)); 233 234 // At this point no branch count should be set to COUNT_NO_PROFILE. 235 assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 236 "unexpected unknown branch profile"); 237 assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 238 "unexpected unknown branch profile"); 239 240 BIMergeI->Count += BII->Count; 241 242 // When we merge inferred and real fall-through branch data, the merged 243 // data is considered inferred. 244 if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED && 245 BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) { 246 BIMergeI->MispredictedCount += BII->MispredictedCount; 247 } else { 248 BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED; 249 } 250 251 ++BBMergeSI; 252 ++BII; 253 ++BIMergeI; 254 } 255 assert(BBMergeSI == BBMerge->succ_end()); 256 257 ++BBMergeI; 258 } 259 assert(BBMergeI == BF.end()); 260 261 // Merge jump tables profile info. 262 auto JTMergeI = BF.JumpTables.begin(); 263 for (const auto &JTEntry : JumpTables) { 264 if (JTMergeI->second->Counts.empty()) 265 JTMergeI->second->Counts.resize(JTEntry.second->Counts.size()); 266 auto CountMergeI = JTMergeI->second->Counts.begin(); 267 for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) { 268 CountMergeI->Count += JI.Count; 269 CountMergeI->Mispreds += JI.Mispreds; 270 ++CountMergeI; 271 } 272 assert(CountMergeI == JTMergeI->second->Counts.end()); 273 274 ++JTMergeI; 275 } 276 assert(JTMergeI == BF.JumpTables.end()); 277 } 278 279 void BinaryFunction::inferFallThroughCounts() { 280 // Work on a basic block at a time, propagating frequency information 281 // forwards. 282 // It is important to walk in the layout order. 283 for (BinaryBasicBlock *BB : BasicBlocks) { 284 const uint64_t BBExecCount = BB->getExecutionCount(); 285 286 // Propagate this information to successors, filling in fall-through edges 287 // with frequency information 288 if (BB->succ_size() == 0) 289 continue; 290 291 // Calculate frequency of outgoing branches from this node according to 292 // LBR data. 293 uint64_t ReportedBranches = 0; 294 for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info()) 295 if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) 296 ReportedBranches += SuccBI.Count; 297 298 // Get taken count of conditional tail call if the block ends with one. 299 uint64_t CTCTakenCount = 0; 300 const MCInst *CTCInstr = BB->getLastNonPseudoInstr(); 301 if (CTCInstr && BC.MIB->getConditionalTailCall(*CTCInstr)) { 302 CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>( 303 *CTCInstr, "CTCTakenCount"); 304 } 305 306 // Calculate frequency of throws from this node according to LBR data 307 // for branching into associated landing pads. Since it is possible 308 // for a landing pad to be associated with more than one basic blocks, 309 // we may overestimate the frequency of throws for such blocks. 310 uint64_t ReportedThrows = 0; 311 for (const BinaryBasicBlock *LP : BB->landing_pads()) 312 ReportedThrows += LP->getExecutionCount(); 313 314 const uint64_t TotalReportedJumps = 315 ReportedBranches + CTCTakenCount + ReportedThrows; 316 317 // Infer the frequency of the fall-through edge, representing not taking the 318 // branch. 319 uint64_t Inferred = 0; 320 if (BBExecCount > TotalReportedJumps) 321 Inferred = BBExecCount - TotalReportedJumps; 322 323 LLVM_DEBUG( 324 if (BBExecCount < TotalReportedJumps) dbgs() 325 << "Fall-through inference is slightly inconsistent. " 326 "exec frequency is less than the outgoing edges frequency (" 327 << BBExecCount << " < " << ReportedBranches 328 << ") for BB at offset 0x" 329 << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';); 330 331 if (BB->succ_size() <= 2) { 332 // Skip if the last instruction is an unconditional jump. 333 const MCInst *LastInstr = BB->getLastNonPseudoInstr(); 334 if (LastInstr && (BC.MIB->isUnconditionalBranch(*LastInstr) || 335 BC.MIB->isIndirectBranch(*LastInstr))) 336 continue; 337 // If there is an FT it will be the last successor. 338 auto &SuccBI = *BB->branch_info_rbegin(); 339 auto &Succ = *BB->succ_rbegin(); 340 if (SuccBI.Count == 0) { 341 SuccBI.Count = Inferred; 342 SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED; 343 Succ->ExecutionCount += Inferred; 344 } 345 } 346 } 347 348 return; 349 } 350 351 void BinaryFunction::clearProfile() { 352 // Keep function execution profile the same. Only clear basic block and edge 353 // counts. 354 for (BinaryBasicBlock *BB : BasicBlocks) { 355 BB->ExecutionCount = 0; 356 for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) { 357 BI.Count = 0; 358 BI.MispredictedCount = 0; 359 } 360 } 361 } 362 363 } // namespace bolt 364 } // namespace llvm 365