1 //===--- BinaryFunctionProfile.cpp                                 --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
11 #include "bolt/Core/BinaryBasicBlock.h"
12 #include "bolt/Core/BinaryFunction.h"
13 #include "llvm/Support/CommandLine.h"
14 #include "llvm/Support/Debug.h"
15 #include "llvm/Support/raw_ostream.h"
16 
17 #undef  DEBUG_TYPE
18 #define DEBUG_TYPE "bolt-prof"
19 
20 using namespace llvm;
21 using namespace bolt;
22 
23 namespace opts {
24 
25 extern cl::OptionCategory BoltOptCategory;
26 
27 cl::opt<IndirectCallPromotionType>
28 IndirectCallPromotion("indirect-call-promotion",
29   cl::init(ICP_NONE),
30   cl::desc("indirect call promotion"),
31   cl::values(
32     clEnumValN(ICP_NONE, "none", "do not perform indirect call promotion"),
33     clEnumValN(ICP_CALLS, "calls", "perform ICP on indirect calls"),
34     clEnumValN(ICP_JUMP_TABLES, "jump-tables", "perform ICP on jump tables"),
35     clEnumValN(ICP_ALL, "all", "perform ICP on calls and jump tables")),
36   cl::ZeroOrMore,
37   cl::cat(BoltOptCategory));
38 
39 extern cl::opt<JumpTableSupportLevel> JumpTables;
40 
41 static cl::opt<bool>
42 FixFuncCounts("fix-func-counts",
43   cl::desc("adjust function counts based on basic blocks execution count"),
44   cl::init(false),
45   cl::ZeroOrMore,
46   cl::Hidden,
47   cl::cat(BoltOptCategory));
48 
49 static cl::opt<bool>
50 FixBlockCounts("fix-block-counts",
51   cl::desc("adjust block counts based on outgoing branch counts"),
52   cl::init(true),
53   cl::ZeroOrMore,
54   cl::Hidden,
55   cl::cat(BoltOptCategory));
56 
57 static cl::opt<bool>
58 InferFallThroughs("infer-fall-throughs",
59   cl::desc("infer execution count for fall-through blocks"),
60   cl::init(false),
61   cl::ZeroOrMore,
62   cl::Hidden,
63   cl::cat(BoltOptCategory));
64 
65 } // namespace opts
66 
67 namespace llvm {
68 namespace bolt {
69 
70 void BinaryFunction::postProcessProfile() {
71   if (!hasValidProfile()) {
72     clearProfile();
73     return;
74   }
75 
76   if (!(getProfileFlags() & PF_LBR))
77     return;
78 
79   // If we have at least some branch data for the function indicate that it
80   // was executed.
81   if (opts::FixFuncCounts && ExecutionCount == 0) {
82     ExecutionCount = 1;
83   }
84 
85   // Compute preliminary execution count for each basic block.
86   for (BinaryBasicBlock *BB : BasicBlocks) {
87     if ((!BB->isEntryPoint() && !BB->isLandingPad()) ||
88         BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
89       BB->ExecutionCount = 0;
90   }
91   for (BinaryBasicBlock *BB : BasicBlocks) {
92     auto SuccBIIter = BB->branch_info_begin();
93     for (BinaryBasicBlock *Succ : BB->successors()) {
94       // All incoming edges to the primary entry have been accounted for, thus
95       // we skip the update here.
96       if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
97           Succ != BasicBlocks.front())
98         Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count);
99       ++SuccBIIter;
100     }
101   }
102 
103   if (opts::FixBlockCounts) {
104     for (BinaryBasicBlock *BB : BasicBlocks) {
105       // Make sure that execution count of a block is at least the branch count
106       // of an incoming/outgoing jump.
107       auto SuccBIIter = BB->branch_info_begin();
108       for (BinaryBasicBlock *Succ : BB->successors()) {
109         uint64_t Count = SuccBIIter->Count;
110         if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) {
111           Succ->setExecutionCount(std::max(Succ->getExecutionCount(), Count));
112           BB->setExecutionCount(std::max(BB->getExecutionCount(), Count));
113         }
114         ++SuccBIIter;
115       }
116       // Make sure that execution count of a block is at least the number of
117       // function calls from the block.
118       for (MCInst &Inst : *BB) {
119         // Ignore non-call instruction
120         if (!BC.MIB->isCall(Inst))
121           continue;
122 
123         auto CountAnnt = BC.MIB->tryGetAnnotationAs<uint64_t>(Inst, "Count");
124         if (CountAnnt) {
125           BB->setExecutionCount(std::max(BB->getExecutionCount(), *CountAnnt));
126         }
127       }
128     }
129   }
130 
131   if (opts::InferFallThroughs)
132     inferFallThroughCounts();
133 
134   // Update profile information for jump tables based on CFG branch data.
135   for (BinaryBasicBlock *BB : BasicBlocks) {
136     const MCInst *LastInstr = BB->getLastNonPseudoInstr();
137     if (!LastInstr)
138       continue;
139     const uint64_t JTAddress = BC.MIB->getJumpTable(*LastInstr);
140     if (!JTAddress)
141       continue;
142     JumpTable *JT = getJumpTableContainingAddress(JTAddress);
143     if (!JT)
144       continue;
145 
146     uint64_t TotalBranchCount = 0;
147     for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo :
148          BB->branch_info()) {
149       TotalBranchCount += BranchInfo.Count;
150     }
151     JT->Count += TotalBranchCount;
152 
153     if (opts::IndirectCallPromotion < ICP_JUMP_TABLES &&
154         opts::JumpTables < JTS_AGGRESSIVE)
155       continue;
156 
157     if (JT->Counts.empty())
158       JT->Counts.resize(JT->Entries.size());
159     auto EI = JT->Entries.begin();
160     uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize;
161     EI += Delta;
162     while (EI != JT->Entries.end()) {
163       const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(*EI);
164       if (TargetBB) {
165         const BinaryBasicBlock::BinaryBranchInfo &BranchInfo =
166             BB->getBranchInfo(*TargetBB);
167         assert(Delta < JT->Counts.size());
168         JT->Counts[Delta].Count += BranchInfo.Count;
169         JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount;
170       }
171       ++Delta;
172       ++EI;
173       // A label marks the start of another jump table.
174       if (JT->Labels.count(Delta * JT->EntrySize))
175         break;
176     }
177   }
178 }
179 
180 void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
181   // No reason to merge invalid or empty profiles into BF.
182   if (!hasValidProfile())
183     return;
184 
185   // Update function execution count.
186   if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE) {
187     BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount());
188   }
189 
190   // Since we are merging a valid profile, the new profile should be valid too.
191   // It has either already been valid, or it has been cleaned up.
192   BF.ProfileMatchRatio = 1.0f;
193 
194   // Update basic block and edge counts.
195   auto BBMergeI = BF.begin();
196   for (BinaryBasicBlock *BB : BasicBlocks) {
197     BinaryBasicBlock *BBMerge = &*BBMergeI;
198     assert(getIndex(BB) == BF.getIndex(BBMerge));
199 
200     // Update basic block count.
201     if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) {
202       BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() +
203                                  BB->getExecutionCount());
204     }
205 
206     // Update edge count for successors of this basic block.
207     auto BBMergeSI = BBMerge->succ_begin();
208     auto BIMergeI = BBMerge->branch_info_begin();
209     auto BII = BB->branch_info_begin();
210     for (const BinaryBasicBlock *BBSucc : BB->successors()) {
211       (void)BBSucc;
212       assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI));
213 
214       // At this point no branch count should be set to COUNT_NO_PROFILE.
215       assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
216              "unexpected unknown branch profile");
217       assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
218              "unexpected unknown branch profile");
219 
220       BIMergeI->Count += BII->Count;
221 
222       // When we merge inferred and real fall-through branch data, the merged
223       // data is considered inferred.
224       if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED &&
225           BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
226         BIMergeI->MispredictedCount += BII->MispredictedCount;
227       } else {
228         BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
229       }
230 
231       ++BBMergeSI;
232       ++BII;
233       ++BIMergeI;
234     }
235     assert(BBMergeSI == BBMerge->succ_end());
236 
237     ++BBMergeI;
238   }
239   assert(BBMergeI == BF.end());
240 
241   // Merge jump tables profile info.
242   auto JTMergeI = BF.JumpTables.begin();
243   for (const auto &JTEntry : JumpTables) {
244     if (JTMergeI->second->Counts.empty())
245       JTMergeI->second->Counts.resize(JTEntry.second->Counts.size());
246     auto CountMergeI = JTMergeI->second->Counts.begin();
247     for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) {
248       CountMergeI->Count += JI.Count;
249       CountMergeI->Mispreds += JI.Mispreds;
250       ++CountMergeI;
251     }
252     assert(CountMergeI == JTMergeI->second->Counts.end());
253 
254     ++JTMergeI;
255   }
256   assert(JTMergeI == BF.JumpTables.end());
257 }
258 
259 void BinaryFunction::inferFallThroughCounts() {
260   // Work on a basic block at a time, propagating frequency information
261   // forwards.
262   // It is important to walk in the layout order.
263   for (BinaryBasicBlock *BB : BasicBlocks) {
264     const uint64_t BBExecCount = BB->getExecutionCount();
265 
266     // Propagate this information to successors, filling in fall-through edges
267     // with frequency information
268     if (BB->succ_size() == 0)
269       continue;
270 
271     // Calculate frequency of outgoing branches from this node according to
272     // LBR data.
273     uint64_t ReportedBranches = 0;
274     for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info()) {
275       if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
276         ReportedBranches += SuccBI.Count;
277     }
278 
279     // Get taken count of conditional tail call if the block ends with one.
280     uint64_t CTCTakenCount = 0;
281     const MCInst *CTCInstr = BB->getLastNonPseudoInstr();
282     if (CTCInstr && BC.MIB->getConditionalTailCall(*CTCInstr)) {
283       CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
284           *CTCInstr, "CTCTakenCount");
285     }
286 
287     // Calculate frequency of throws from this node according to LBR data
288     // for branching into associated landing pads. Since it is possible
289     // for a landing pad to be associated with more than one basic blocks,
290     // we may overestimate the frequency of throws for such blocks.
291     uint64_t ReportedThrows = 0;
292     for (const BinaryBasicBlock *LP : BB->landing_pads()) {
293       ReportedThrows += LP->getExecutionCount();
294     }
295 
296     const uint64_t TotalReportedJumps =
297         ReportedBranches + CTCTakenCount + ReportedThrows;
298 
299     // Infer the frequency of the fall-through edge, representing not taking the
300     // branch.
301     uint64_t Inferred = 0;
302     if (BBExecCount > TotalReportedJumps)
303       Inferred = BBExecCount - TotalReportedJumps;
304 
305     LLVM_DEBUG(
306         if (BBExecCount < TotalReportedJumps) dbgs()
307             << "Fall-through inference is slightly inconsistent. "
308                "exec frequency is less than the outgoing edges frequency ("
309             << BBExecCount << " < " << ReportedBranches
310             << ") for  BB at offset 0x"
311             << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';);
312 
313     if (BB->succ_size() <= 2) {
314       // Skip if the last instruction is an unconditional jump.
315       const MCInst *LastInstr = BB->getLastNonPseudoInstr();
316       if (LastInstr && (BC.MIB->isUnconditionalBranch(*LastInstr) ||
317                         BC.MIB->isIndirectBranch(*LastInstr)))
318         continue;
319       // If there is an FT it will be the last successor.
320       auto &SuccBI = *BB->branch_info_rbegin();
321       auto &Succ = *BB->succ_rbegin();
322       if (SuccBI.Count == 0) {
323         SuccBI.Count = Inferred;
324         SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
325         Succ->ExecutionCount += Inferred;
326       }
327     }
328   }
329 
330   return;
331 }
332 
333 void BinaryFunction::clearProfile() {
334   // Keep function execution profile the same. Only clear basic block and edge
335   // counts.
336   for (BinaryBasicBlock *BB : BasicBlocks) {
337     BB->ExecutionCount = 0;
338     for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) {
339       BI.Count = 0;
340       BI.MispredictedCount = 0;
341     }
342   }
343 }
344 
345 } // namespace bolt
346 } // namespace llvm
347