1 //===- bolt/Core/BinaryFunctionProfile.cpp - Profile processing -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements BinaryFunction member functions related to processing
10 // the execution profile.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 #undef  DEBUG_TYPE
21 #define DEBUG_TYPE "bolt-prof"
22 
23 using namespace llvm;
24 using namespace bolt;
25 
26 namespace opts {
27 
28 extern cl::OptionCategory BoltOptCategory;
29 
30 cl::opt<IndirectCallPromotionType> ICP(
31     "indirect-call-promotion", cl::init(ICP_NONE),
32     cl::desc("indirect call promotion"),
33     cl::values(
34         clEnumValN(ICP_NONE, "none", "do not perform indirect call promotion"),
35         clEnumValN(ICP_CALLS, "calls", "perform ICP on indirect calls"),
36         clEnumValN(ICP_JUMP_TABLES, "jump-tables",
37                    "perform ICP on jump tables"),
38         clEnumValN(ICP_ALL, "all", "perform ICP on calls and jump tables")),
39     cl::ZeroOrMore, cl::cat(BoltOptCategory));
40 
41 extern cl::opt<JumpTableSupportLevel> JumpTables;
42 
43 static cl::opt<bool>
44 FixFuncCounts("fix-func-counts",
45   cl::desc("adjust function counts based on basic blocks execution count"),
46   cl::init(false),
47   cl::ZeroOrMore,
48   cl::Hidden,
49   cl::cat(BoltOptCategory));
50 
51 static cl::opt<bool>
52 FixBlockCounts("fix-block-counts",
53   cl::desc("adjust block counts based on outgoing branch counts"),
54   cl::init(true),
55   cl::ZeroOrMore,
56   cl::Hidden,
57   cl::cat(BoltOptCategory));
58 
59 static cl::opt<bool>
60 InferFallThroughs("infer-fall-throughs",
61   cl::desc("infer execution count for fall-through blocks"),
62   cl::init(false),
63   cl::ZeroOrMore,
64   cl::Hidden,
65   cl::cat(BoltOptCategory));
66 
67 } // namespace opts
68 
69 namespace llvm {
70 namespace bolt {
71 
72 void BinaryFunction::postProcessProfile() {
73   if (!hasValidProfile()) {
74     clearProfile();
75     return;
76   }
77 
78   if (!(getProfileFlags() & PF_LBR))
79     return;
80 
81   // If we have at least some branch data for the function indicate that it
82   // was executed.
83   if (opts::FixFuncCounts && ExecutionCount == 0)
84     ExecutionCount = 1;
85 
86   // Compute preliminary execution count for each basic block.
87   for (BinaryBasicBlock *BB : BasicBlocks) {
88     if ((!BB->isEntryPoint() && !BB->isLandingPad()) ||
89         BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
90       BB->ExecutionCount = 0;
91   }
92   for (BinaryBasicBlock *BB : BasicBlocks) {
93     auto SuccBIIter = BB->branch_info_begin();
94     for (BinaryBasicBlock *Succ : BB->successors()) {
95       // All incoming edges to the primary entry have been accounted for, thus
96       // we skip the update here.
97       if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
98           Succ != BasicBlocks.front())
99         Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count);
100       ++SuccBIIter;
101     }
102   }
103 
104   // Fix for old profiles.
105   for (BinaryBasicBlock *BB : BasicBlocks) {
106     if (BB->size() != 1 || BB->succ_size() != 1)
107       continue;
108 
109     if (BB->getKnownExecutionCount() == 0)
110       continue;
111 
112     MCInst *Instr = BB->getFirstNonPseudoInstr();
113     assert(Instr && "expected non-pseudo instr");
114     if (!BC.MIB->hasAnnotation(*Instr, "NOP"))
115       continue;
116 
117     BinaryBasicBlock *FTSuccessor = BB->getSuccessor();
118     BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor);
119     if (!BI.Count) {
120       BI.Count = BB->getKnownExecutionCount();
121       FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() +
122                                      BI.Count);
123     }
124   }
125 
126   if (opts::FixBlockCounts) {
127     for (BinaryBasicBlock *BB : BasicBlocks) {
128       // Make sure that execution count of a block is at least the branch count
129       // of an incoming/outgoing jump.
130       auto SuccBIIter = BB->branch_info_begin();
131       for (BinaryBasicBlock *Succ : BB->successors()) {
132         uint64_t Count = SuccBIIter->Count;
133         if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) {
134           Succ->setExecutionCount(std::max(Succ->getExecutionCount(), Count));
135           BB->setExecutionCount(std::max(BB->getExecutionCount(), Count));
136         }
137         ++SuccBIIter;
138       }
139       // Make sure that execution count of a block is at least the number of
140       // function calls from the block.
141       for (MCInst &Inst : *BB) {
142         // Ignore non-call instruction
143         if (!BC.MIB->isCall(Inst))
144           continue;
145 
146         auto CountAnnt = BC.MIB->tryGetAnnotationAs<uint64_t>(Inst, "Count");
147         if (CountAnnt)
148           BB->setExecutionCount(std::max(BB->getExecutionCount(), *CountAnnt));
149       }
150     }
151   }
152 
153   if (opts::InferFallThroughs)
154     inferFallThroughCounts();
155 
156   // Update profile information for jump tables based on CFG branch data.
157   for (BinaryBasicBlock *BB : BasicBlocks) {
158     const MCInst *LastInstr = BB->getLastNonPseudoInstr();
159     if (!LastInstr)
160       continue;
161     const uint64_t JTAddress = BC.MIB->getJumpTable(*LastInstr);
162     if (!JTAddress)
163       continue;
164     JumpTable *JT = getJumpTableContainingAddress(JTAddress);
165     if (!JT)
166       continue;
167 
168     uint64_t TotalBranchCount = 0;
169     for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo :
170          BB->branch_info()) {
171       TotalBranchCount += BranchInfo.Count;
172     }
173     JT->Count += TotalBranchCount;
174 
175     if (opts::ICP < ICP_JUMP_TABLES && opts::JumpTables < JTS_AGGRESSIVE)
176       continue;
177 
178     if (JT->Counts.empty())
179       JT->Counts.resize(JT->Entries.size());
180     auto EI = JT->Entries.begin();
181     uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize;
182     EI += Delta;
183     while (EI != JT->Entries.end()) {
184       const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(*EI);
185       if (TargetBB) {
186         const BinaryBasicBlock::BinaryBranchInfo &BranchInfo =
187             BB->getBranchInfo(*TargetBB);
188         assert(Delta < JT->Counts.size());
189         JT->Counts[Delta].Count += BranchInfo.Count;
190         JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount;
191       }
192       ++Delta;
193       ++EI;
194       // A label marks the start of another jump table.
195       if (JT->Labels.count(Delta * JT->EntrySize))
196         break;
197     }
198   }
199 }
200 
201 void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
202   // No reason to merge invalid or empty profiles into BF.
203   if (!hasValidProfile())
204     return;
205 
206   // Update function execution count.
207   if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE)
208     BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount());
209 
210   // Since we are merging a valid profile, the new profile should be valid too.
211   // It has either already been valid, or it has been cleaned up.
212   BF.ProfileMatchRatio = 1.0f;
213 
214   // Update basic block and edge counts.
215   auto BBMergeI = BF.begin();
216   for (BinaryBasicBlock *BB : BasicBlocks) {
217     BinaryBasicBlock *BBMerge = &*BBMergeI;
218     assert(getIndex(BB) == BF.getIndex(BBMerge));
219 
220     // Update basic block count.
221     if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) {
222       BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() +
223                                  BB->getExecutionCount());
224     }
225 
226     // Update edge count for successors of this basic block.
227     auto BBMergeSI = BBMerge->succ_begin();
228     auto BIMergeI = BBMerge->branch_info_begin();
229     auto BII = BB->branch_info_begin();
230     for (const BinaryBasicBlock *BBSucc : BB->successors()) {
231       (void)BBSucc;
232       assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI));
233 
234       // At this point no branch count should be set to COUNT_NO_PROFILE.
235       assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
236              "unexpected unknown branch profile");
237       assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
238              "unexpected unknown branch profile");
239 
240       BIMergeI->Count += BII->Count;
241 
242       // When we merge inferred and real fall-through branch data, the merged
243       // data is considered inferred.
244       if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED &&
245           BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
246         BIMergeI->MispredictedCount += BII->MispredictedCount;
247       } else {
248         BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
249       }
250 
251       ++BBMergeSI;
252       ++BII;
253       ++BIMergeI;
254     }
255     assert(BBMergeSI == BBMerge->succ_end());
256 
257     ++BBMergeI;
258   }
259   assert(BBMergeI == BF.end());
260 
261   // Merge jump tables profile info.
262   auto JTMergeI = BF.JumpTables.begin();
263   for (const auto &JTEntry : JumpTables) {
264     if (JTMergeI->second->Counts.empty())
265       JTMergeI->second->Counts.resize(JTEntry.second->Counts.size());
266     auto CountMergeI = JTMergeI->second->Counts.begin();
267     for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) {
268       CountMergeI->Count += JI.Count;
269       CountMergeI->Mispreds += JI.Mispreds;
270       ++CountMergeI;
271     }
272     assert(CountMergeI == JTMergeI->second->Counts.end());
273 
274     ++JTMergeI;
275   }
276   assert(JTMergeI == BF.JumpTables.end());
277 }
278 
279 void BinaryFunction::inferFallThroughCounts() {
280   // Work on a basic block at a time, propagating frequency information
281   // forwards.
282   // It is important to walk in the layout order.
283   for (BinaryBasicBlock *BB : BasicBlocks) {
284     const uint64_t BBExecCount = BB->getExecutionCount();
285 
286     // Propagate this information to successors, filling in fall-through edges
287     // with frequency information
288     if (BB->succ_size() == 0)
289       continue;
290 
291     // Calculate frequency of outgoing branches from this node according to
292     // LBR data.
293     uint64_t ReportedBranches = 0;
294     for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info())
295       if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
296         ReportedBranches += SuccBI.Count;
297 
298     // Get taken count of conditional tail call if the block ends with one.
299     uint64_t CTCTakenCount = 0;
300     const MCInst *CTCInstr = BB->getLastNonPseudoInstr();
301     if (CTCInstr && BC.MIB->getConditionalTailCall(*CTCInstr)) {
302       CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
303           *CTCInstr, "CTCTakenCount");
304     }
305 
306     // Calculate frequency of throws from this node according to LBR data
307     // for branching into associated landing pads. Since it is possible
308     // for a landing pad to be associated with more than one basic blocks,
309     // we may overestimate the frequency of throws for such blocks.
310     uint64_t ReportedThrows = 0;
311     for (const BinaryBasicBlock *LP : BB->landing_pads())
312       ReportedThrows += LP->getExecutionCount();
313 
314     const uint64_t TotalReportedJumps =
315         ReportedBranches + CTCTakenCount + ReportedThrows;
316 
317     // Infer the frequency of the fall-through edge, representing not taking the
318     // branch.
319     uint64_t Inferred = 0;
320     if (BBExecCount > TotalReportedJumps)
321       Inferred = BBExecCount - TotalReportedJumps;
322 
323     LLVM_DEBUG(
324         if (BBExecCount < TotalReportedJumps) dbgs()
325             << "Fall-through inference is slightly inconsistent. "
326                "exec frequency is less than the outgoing edges frequency ("
327             << BBExecCount << " < " << ReportedBranches
328             << ") for  BB at offset 0x"
329             << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';);
330 
331     if (BB->succ_size() <= 2) {
332       // Skip if the last instruction is an unconditional jump.
333       const MCInst *LastInstr = BB->getLastNonPseudoInstr();
334       if (LastInstr && (BC.MIB->isUnconditionalBranch(*LastInstr) ||
335                         BC.MIB->isIndirectBranch(*LastInstr)))
336         continue;
337       // If there is an FT it will be the last successor.
338       auto &SuccBI = *BB->branch_info_rbegin();
339       auto &Succ = *BB->succ_rbegin();
340       if (SuccBI.Count == 0) {
341         SuccBI.Count = Inferred;
342         SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
343         Succ->ExecutionCount += Inferred;
344       }
345     }
346   }
347 
348   return;
349 }
350 
351 void BinaryFunction::clearProfile() {
352   // Keep function execution profile the same. Only clear basic block and edge
353   // counts.
354   for (BinaryBasicBlock *BB : BasicBlocks) {
355     BB->ExecutionCount = 0;
356     for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) {
357       BI.Count = 0;
358       BI.MispredictedCount = 0;
359     }
360   }
361 }
362 
363 } // namespace bolt
364 } // namespace llvm
365