12f09f445SMaksim Panchenko //===- bolt/Core/DynoStats.cpp - Dynamic execution stats ------------------===//
2a34c753fSRafael Auler //
3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information.
5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a34c753fSRafael Auler //
7a34c753fSRafael Auler //===----------------------------------------------------------------------===//
8a34c753fSRafael Auler //
92f09f445SMaksim Panchenko // This file implements the DynoStats class.
102f09f445SMaksim Panchenko //
11a34c753fSRafael Auler //===----------------------------------------------------------------------===//
12a34c753fSRafael Auler 
13a34c753fSRafael Auler #include "bolt/Core/DynoStats.h"
14a34c753fSRafael Auler #include "bolt/Core/BinaryBasicBlock.h"
15a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h"
16a34c753fSRafael Auler #include "llvm/ADT/StringRef.h"
17a34c753fSRafael Auler #include "llvm/Support/CommandLine.h"
18a34c753fSRafael Auler #include "llvm/Support/Debug.h"
19a34c753fSRafael Auler #include "llvm/Support/raw_ostream.h"
20a34c753fSRafael Auler #include <algorithm>
21a34c753fSRafael Auler #include <string>
22a34c753fSRafael Auler 
23a34c753fSRafael Auler #undef  DEBUG_TYPE
24a34c753fSRafael Auler #define DEBUG_TYPE "bolt"
25a34c753fSRafael Auler 
26a34c753fSRafael Auler using namespace llvm;
27a34c753fSRafael Auler using namespace bolt;
28a34c753fSRafael Auler 
29a34c753fSRafael Auler namespace opts {
30a34c753fSRafael Auler 
31a34c753fSRafael Auler extern cl::OptionCategory BoltCategory;
32a34c753fSRafael Auler 
33a34c753fSRafael Auler static cl::opt<uint32_t>
34a34c753fSRafael Auler DynoStatsScale("dyno-stats-scale",
35a34c753fSRafael Auler   cl::desc("scale to be applied while reporting dyno stats"),
36a34c753fSRafael Auler   cl::Optional,
37a34c753fSRafael Auler   cl::init(1),
38a34c753fSRafael Auler   cl::Hidden,
39a34c753fSRafael Auler   cl::cat(BoltCategory));
40a34c753fSRafael Auler 
41a34c753fSRafael Auler static cl::opt<uint32_t>
42a34c753fSRafael Auler PrintDynoOpcodeStat("print-dyno-opcode-stats",
43a34c753fSRafael Auler   cl::desc("print per instruction opcode dyno stats and the function"
44a34c753fSRafael Auler               "names:BB offsets of the nth highest execution counts"),
45a34c753fSRafael Auler   cl::init(0),
46a34c753fSRafael Auler   cl::Hidden,
47a34c753fSRafael Auler   cl::cat(BoltCategory));
48a34c753fSRafael Auler 
49a34c753fSRafael Auler } // namespace opts
50a34c753fSRafael Auler 
51a34c753fSRafael Auler namespace llvm {
52a34c753fSRafael Auler namespace bolt {
53a34c753fSRafael Auler 
54a34c753fSRafael Auler constexpr const char *DynoStats::Desc[];
55a34c753fSRafael Auler 
operator <(const DynoStats & Other) const56a34c753fSRafael Auler bool DynoStats::operator<(const DynoStats &Other) const {
57a34c753fSRafael Auler   return std::lexicographical_compare(
58a34c753fSRafael Auler       &Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
5940c2e0faSMaksim Panchenko       &Other.Stats[FIRST_DYNO_STAT], &Other.Stats[LAST_DYNO_STAT]);
60a34c753fSRafael Auler }
61a34c753fSRafael Auler 
operator ==(const DynoStats & Other) const62a34c753fSRafael Auler bool DynoStats::operator==(const DynoStats &Other) const {
6340c2e0faSMaksim Panchenko   return std::equal(&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
6440c2e0faSMaksim Panchenko                     &Other.Stats[FIRST_DYNO_STAT]);
65a34c753fSRafael Auler }
66a34c753fSRafael Auler 
lessThan(const DynoStats & Other,ArrayRef<Category> Keys) const67a34c753fSRafael Auler bool DynoStats::lessThan(const DynoStats &Other,
68a34c753fSRafael Auler                          ArrayRef<Category> Keys) const {
69a34c753fSRafael Auler   return std::lexicographical_compare(
7040c2e0faSMaksim Panchenko       Keys.begin(), Keys.end(), Keys.begin(), Keys.end(),
71a34c753fSRafael Auler       [this, &Other](const Category A, const Category) {
72a34c753fSRafael Auler         return Stats[A] < Other.Stats[A];
7340c2e0faSMaksim Panchenko       });
74a34c753fSRafael Auler }
75a34c753fSRafael Auler 
print(raw_ostream & OS,const DynoStats * Other,MCInstPrinter * Printer) const76a34c753fSRafael Auler void DynoStats::print(raw_ostream &OS, const DynoStats *Other,
77a34c753fSRafael Auler                       MCInstPrinter *Printer) const {
78a34c753fSRafael Auler   auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat,
79a34c753fSRafael Auler                                 uint64_t OtherStat) {
80a34c753fSRafael Auler     OS << format("%'20lld : ", Stat * opts::DynoStatsScale) << Name;
81a34c753fSRafael Auler     if (Other) {
82a34c753fSRafael Auler       if (Stat != OtherStat) {
83a34c753fSRafael Auler         OtherStat = std::max(OtherStat, uint64_t(1)); // to prevent divide by 0
8440c2e0faSMaksim Panchenko         OS << format(" (%+.1f%%)", ((float)Stat - (float)OtherStat) * 100.0 /
85a34c753fSRafael Auler                                        (float)(OtherStat));
86a34c753fSRafael Auler       } else {
87a34c753fSRafael Auler         OS << " (=)";
88a34c753fSRafael Auler       }
89a34c753fSRafael Auler     }
90a34c753fSRafael Auler     OS << '\n';
91a34c753fSRafael Auler   };
92a34c753fSRafael Auler 
93a34c753fSRafael Auler   for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
9440c2e0faSMaksim Panchenko        Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
95a34c753fSRafael Auler 
96a34c753fSRafael Auler     if (!PrintAArch64Stats && Stat == DynoStats::VENEER_CALLS_AARCH64)
97a34c753fSRafael Auler       continue;
98a34c753fSRafael Auler 
99a34c753fSRafael Auler     printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0);
100a34c753fSRafael Auler   }
101a34c753fSRafael Auler   if (opts::PrintDynoOpcodeStat && Printer) {
102a34c753fSRafael Auler     outs() << "\nProgram-wide opcode histogram:\n";
103a34c753fSRafael Auler     OS << "              Opcode,   Execution Count,     Max Exec Count, "
104a34c753fSRafael Auler           "Function Name:Offset ...\n";
105a34c753fSRafael Auler     std::vector<std::pair<uint64_t, unsigned>> SortedHistogram;
106a34c753fSRafael Auler     for (const OpcodeStatTy &Stat : OpcodeHistogram)
107a34c753fSRafael Auler       SortedHistogram.emplace_back(Stat.second.first, Stat.first);
108a34c753fSRafael Auler 
109a34c753fSRafael Auler     // Sort using lexicographic ordering
110d2c87699SAmir Ayupov     llvm::sort(SortedHistogram);
111a34c753fSRafael Auler 
112a34c753fSRafael Auler     // Dump in ascending order: Start with Opcode with Highest execution
113a34c753fSRafael Auler     // count.
114a34c753fSRafael Auler     for (auto Stat = SortedHistogram.rbegin(); Stat != SortedHistogram.rend();
115a34c753fSRafael Auler          ++Stat) {
11640c2e0faSMaksim Panchenko       OS << format("%20s,%'18lld", Printer->getOpcodeName(Stat->second).data(),
117a34c753fSRafael Auler                    Stat->first * opts::DynoStatsScale);
118a34c753fSRafael Auler 
119a34c753fSRafael Auler       MaxOpcodeHistogramTy MaxMultiMap =
120a34c753fSRafael Auler           OpcodeHistogram.at(Stat->second).second;
121a34c753fSRafael Auler       // Start with function name:BB offset with highest execution count.
122a34c753fSRafael Auler       for (auto Max = MaxMultiMap.rbegin(); Max != MaxMultiMap.rend(); ++Max) {
123a34c753fSRafael Auler         OS << format(", %'18lld, ", Max->first * opts::DynoStatsScale)
124a34c753fSRafael Auler            << Max->second.first.str() << ':' << Max->second.second;
125a34c753fSRafael Auler       }
126a34c753fSRafael Auler       OS << '\n';
127a34c753fSRafael Auler     }
128a34c753fSRafael Auler   }
129a34c753fSRafael Auler }
130a34c753fSRafael Auler 
operator +=(const DynoStats & Other)131a34c753fSRafael Auler void DynoStats::operator+=(const DynoStats &Other) {
132a34c753fSRafael Auler   for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
13340c2e0faSMaksim Panchenko        Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
134a34c753fSRafael Auler     Stats[Stat] += Other[Stat];
135a34c753fSRafael Auler   }
136a34c753fSRafael Auler   for (const OpcodeStatTy &Stat : Other.OpcodeHistogram) {
137a34c753fSRafael Auler     auto I = OpcodeHistogram.find(Stat.first);
138a34c753fSRafael Auler     if (I == OpcodeHistogram.end()) {
139a34c753fSRafael Auler       OpcodeHistogram.emplace(Stat);
140a34c753fSRafael Auler     } else {
141a34c753fSRafael Auler       // Merge Other Historgrams, log only the opts::PrintDynoOpcodeStat'th
142a34c753fSRafael Auler       // maximum counts.
143a34c753fSRafael Auler       I->second.first += Stat.second.first;
144a34c753fSRafael Auler       auto &MMap = I->second.second;
145a34c753fSRafael Auler       auto &OtherMMap = Stat.second.second;
146a34c753fSRafael Auler       auto Size = MMap.size();
147a34c753fSRafael Auler       assert(Size <= opts::PrintDynoOpcodeStat);
148a34c753fSRafael Auler       for (auto Iter = OtherMMap.rbegin(); Iter != OtherMMap.rend(); ++Iter) {
149a34c753fSRafael Auler         if (Size++ >= opts::PrintDynoOpcodeStat) {
150a34c753fSRafael Auler           auto First = MMap.begin();
151a34c753fSRafael Auler           if (Iter->first <= First->first)
152a34c753fSRafael Auler             break;
153a34c753fSRafael Auler           MMap.erase(First);
154a34c753fSRafael Auler         }
155a34c753fSRafael Auler         MMap.emplace(*Iter);
156a34c753fSRafael Auler       }
157a34c753fSRafael Auler     }
158a34c753fSRafael Auler   }
159a34c753fSRafael Auler }
160a34c753fSRafael Auler 
getDynoStats(const BinaryFunction & BF)161a34c753fSRafael Auler DynoStats getDynoStats(const BinaryFunction &BF) {
162a34c753fSRafael Auler   auto &BC = BF.getBinaryContext();
163a34c753fSRafael Auler 
164a34c753fSRafael Auler   DynoStats Stats(/*PrintAArch64Stats*/ BC.isAArch64());
165a34c753fSRafael Auler 
166a34c753fSRafael Auler   // Return empty-stats about the function we don't completely understand.
167a34c753fSRafael Auler   if (!BF.isSimple() || !BF.hasValidProfile() || !BF.hasCanonicalCFG())
168a34c753fSRafael Auler     return Stats;
169a34c753fSRafael Auler 
170a34c753fSRafael Auler   // Update enumeration of basic blocks for correct detection of branch'
171a34c753fSRafael Auler   // direction.
172*8477bc67SFabian Parzefall   BF.getLayout().updateLayoutIndices();
173a34c753fSRafael Auler 
174*8477bc67SFabian Parzefall   for (BinaryBasicBlock *const BB : BF.getLayout().blocks()) {
175a34c753fSRafael Auler     // The basic block execution count equals to the sum of incoming branch
176a34c753fSRafael Auler     // frequencies. This may deviate from the sum of outgoing branches of the
177a34c753fSRafael Auler     // basic block especially since the block may contain a function that
178a34c753fSRafael Auler     // does not return or a function that throws an exception.
179a34c753fSRafael Auler     const uint64_t BBExecutionCount = BB->getKnownExecutionCount();
180a34c753fSRafael Auler 
181a34c753fSRafael Auler     // Ignore empty blocks and blocks that were not executed.
182a34c753fSRafael Auler     if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0)
183a34c753fSRafael Auler       continue;
184a34c753fSRafael Auler 
185a34c753fSRafael Auler     // Count AArch64 linker-inserted veneers
186a34c753fSRafael Auler     if (BF.isAArch64Veneer())
187a34c753fSRafael Auler       Stats[DynoStats::VENEER_CALLS_AARCH64] += BF.getKnownExecutionCount();
188a34c753fSRafael Auler 
189a34c753fSRafael Auler     // Count various instruction types by iterating through all instructions.
190a34c753fSRafael Auler     // When -print-dyno-opcode-stats is on, count per each opcode and record
191a34c753fSRafael Auler     // maximum execution counts.
192a34c753fSRafael Auler     for (const MCInst &Instr : *BB) {
193a34c753fSRafael Auler       if (opts::PrintDynoOpcodeStat) {
194a34c753fSRafael Auler         unsigned Opcode = Instr.getOpcode();
195a34c753fSRafael Auler         auto I = Stats.OpcodeHistogram.find(Opcode);
196a34c753fSRafael Auler         if (I == Stats.OpcodeHistogram.end()) {
197a34c753fSRafael Auler           DynoStats::MaxOpcodeHistogramTy MMap;
198a34c753fSRafael Auler           MMap.emplace(BBExecutionCount,
199a34c753fSRafael Auler                        std::make_pair(BF.getOneName(), BB->getOffset()));
200a34c753fSRafael Auler           Stats.OpcodeHistogram.emplace(Opcode,
201a34c753fSRafael Auler                                         std::make_pair(BBExecutionCount, MMap));
202a34c753fSRafael Auler         } else {
203a34c753fSRafael Auler           I->second.first += BBExecutionCount;
204a34c753fSRafael Auler           bool Insert = true;
205a34c753fSRafael Auler           if (I->second.second.size() == opts::PrintDynoOpcodeStat) {
206a34c753fSRafael Auler             auto First = I->second.second.begin();
207a34c753fSRafael Auler             if (First->first < BBExecutionCount)
208a34c753fSRafael Auler               I->second.second.erase(First);
209a34c753fSRafael Auler             else
210a34c753fSRafael Auler               Insert = false;
211a34c753fSRafael Auler           }
212a34c753fSRafael Auler           if (Insert) {
213a34c753fSRafael Auler             I->second.second.emplace(
214a34c753fSRafael Auler                 BBExecutionCount,
215a34c753fSRafael Auler                 std::make_pair(BF.getOneName(), BB->getOffset()));
216a34c753fSRafael Auler           }
217a34c753fSRafael Auler         }
218a34c753fSRafael Auler       }
219a34c753fSRafael Auler 
220a34c753fSRafael Auler       if (BC.MIB->isStore(Instr)) {
221a34c753fSRafael Auler         Stats[DynoStats::STORES] += BBExecutionCount;
222a34c753fSRafael Auler       }
223a34c753fSRafael Auler       if (BC.MIB->isLoad(Instr)) {
224a34c753fSRafael Auler         Stats[DynoStats::LOADS] += BBExecutionCount;
225a34c753fSRafael Auler       }
226a34c753fSRafael Auler       if (!BC.MIB->isCall(Instr))
227a34c753fSRafael Auler         continue;
228a34c753fSRafael Auler 
229a34c753fSRafael Auler       uint64_t CallFreq = BBExecutionCount;
230a34c753fSRafael Auler       if (BC.MIB->getConditionalTailCall(Instr)) {
231a34c753fSRafael Auler         CallFreq =
232a34c753fSRafael Auler             BC.MIB->getAnnotationWithDefault<uint64_t>(Instr, "CTCTakenCount");
233a34c753fSRafael Auler       }
234a34c753fSRafael Auler       Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
235a34c753fSRafael Auler       if (BC.MIB->isIndirectCall(Instr)) {
236a34c753fSRafael Auler         Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
237a34c753fSRafael Auler       } else if (const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr)) {
238a34c753fSRafael Auler         const BinaryFunction *BF = BC.getFunctionForSymbol(CallSymbol);
239a34c753fSRafael Auler         if (BF && BF->isPLTFunction()) {
240a34c753fSRafael Auler           Stats[DynoStats::PLT_CALLS] += CallFreq;
241a34c753fSRafael Auler 
242a34c753fSRafael Auler           // We don't process PLT functions and hence have to adjust relevant
243a34c753fSRafael Auler           // dynostats here for:
244a34c753fSRafael Auler           //
245a34c753fSRafael Auler           //   jmp *GOT_ENTRY(%rip)
246a34c753fSRafael Auler           //
247a34c753fSRafael Auler           // NOTE: this is arch-specific.
248a34c753fSRafael Auler           Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
249a34c753fSRafael Auler           Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
250a34c753fSRafael Auler           Stats[DynoStats::LOADS] += CallFreq;
251a34c753fSRafael Auler           Stats[DynoStats::INSTRUCTIONS] += CallFreq;
252a34c753fSRafael Auler         }
253a34c753fSRafael Auler       }
254a34c753fSRafael Auler     }
255a34c753fSRafael Auler 
256a34c753fSRafael Auler     Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount;
257a34c753fSRafael Auler 
258a34c753fSRafael Auler     // Jump tables.
259a34c753fSRafael Auler     const MCInst *LastInstr = BB->getLastNonPseudoInstr();
260a34c753fSRafael Auler     if (BC.MIB->getJumpTable(*LastInstr)) {
261a34c753fSRafael Auler       Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount;
262a34c753fSRafael Auler       LLVM_DEBUG(
263a34c753fSRafael Auler         static uint64_t MostFrequentJT;
264a34c753fSRafael Auler         if (BBExecutionCount > MostFrequentJT) {
265a34c753fSRafael Auler           MostFrequentJT = BBExecutionCount;
266a34c753fSRafael Auler           dbgs() << "BOLT-INFO: most frequently executed jump table is in "
267a34c753fSRafael Auler                  << "function " << BF << " in basic block " << BB->getName()
268a34c753fSRafael Auler                  << " executed totally " << BBExecutionCount << " times.\n";
269a34c753fSRafael Auler         }
270a34c753fSRafael Auler       );
271a34c753fSRafael Auler       continue;
272a34c753fSRafael Auler     }
273a34c753fSRafael Auler 
274a34c753fSRafael Auler     if (BC.MIB->isIndirectBranch(*LastInstr) && !BC.MIB->isCall(*LastInstr)) {
275a34c753fSRafael Auler       Stats[DynoStats::UNKNOWN_INDIRECT_BRANCHES] += BBExecutionCount;
276a34c753fSRafael Auler       continue;
277a34c753fSRafael Auler     }
278a34c753fSRafael Auler 
279a34c753fSRafael Auler     // Update stats for branches.
280a34c753fSRafael Auler     const MCSymbol *TBB = nullptr;
281a34c753fSRafael Auler     const MCSymbol *FBB = nullptr;
282a34c753fSRafael Auler     MCInst *CondBranch = nullptr;
283a34c753fSRafael Auler     MCInst *UncondBranch = nullptr;
2843652483cSRafael Auler     if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch))
285a34c753fSRafael Auler       continue;
286a34c753fSRafael Auler 
2873652483cSRafael Auler     if (!CondBranch && !UncondBranch)
288a34c753fSRafael Auler       continue;
289a34c753fSRafael Auler 
290a34c753fSRafael Auler     // Simple unconditional branch.
291a34c753fSRafael Auler     if (!CondBranch) {
292a34c753fSRafael Auler       Stats[DynoStats::UNCOND_BRANCHES] += BBExecutionCount;
293a34c753fSRafael Auler       continue;
294a34c753fSRafael Auler     }
295a34c753fSRafael Auler 
296a34c753fSRafael Auler     // CTCs: instruction annotations could be stripped, hence check the number
297a34c753fSRafael Auler     // of successors to identify conditional tail calls.
298a34c753fSRafael Auler     if (BB->succ_size() == 1) {
299a34c753fSRafael Auler       if (BB->branch_info_begin() != BB->branch_info_end())
300a34c753fSRafael Auler         Stats[DynoStats::UNCOND_BRANCHES] += BB->branch_info_begin()->Count;
301a34c753fSRafael Auler       continue;
302a34c753fSRafael Auler     }
303a34c753fSRafael Auler 
304a34c753fSRafael Auler     // Conditional branch that could be followed by an unconditional branch.
305a34c753fSRafael Auler     uint64_t TakenCount = BB->getTakenBranchInfo().Count;
306a34c753fSRafael Auler     if (TakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
307a34c753fSRafael Auler       TakenCount = 0;
308a34c753fSRafael Auler 
309a34c753fSRafael Auler     uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count;
310a34c753fSRafael Auler     if (NonTakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
311a34c753fSRafael Auler       NonTakenCount = 0;
312a34c753fSRafael Auler 
313a34c753fSRafael Auler     if (BF.isForwardBranch(BB, BB->getConditionalSuccessor(true))) {
314a34c753fSRafael Auler       Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount;
315a34c753fSRafael Auler       Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount;
316a34c753fSRafael Auler     } else {
317a34c753fSRafael Auler       Stats[DynoStats::BACKWARD_COND_BRANCHES] += BBExecutionCount;
318a34c753fSRafael Auler       Stats[DynoStats::BACKWARD_COND_BRANCHES_TAKEN] += TakenCount;
319a34c753fSRafael Auler     }
320a34c753fSRafael Auler 
321a34c753fSRafael Auler     if (UncondBranch) {
322a34c753fSRafael Auler       Stats[DynoStats::UNCOND_BRANCHES] += NonTakenCount;
323a34c753fSRafael Auler     }
324a34c753fSRafael Auler   }
325a34c753fSRafael Auler 
326a34c753fSRafael Auler   return Stats;
327a34c753fSRafael Auler }
328a34c753fSRafael Auler 
329a34c753fSRafael Auler } // namespace bolt
330a34c753fSRafael Auler } // namespace llvm
331