1 //===- bolt/Passes/AsmDump.cpp - Dump BinaryFunction into assembly --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the AsmDumpPass class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/AsmDump.h"
14 #include "llvm/CodeGen/AsmPrinter.h"
15 #include "llvm/MC/TargetRegistry.h"
16 #include "llvm/Support/FileSystem.h"
17 #include "llvm/Support/Path.h"
18 #include "llvm/Target/TargetMachine.h"
19 #include <unordered_set>
20 
21 #define DEBUG_TYPE "asm-dump"
22 
23 using namespace llvm;
24 
25 namespace opts {
26 extern bool shouldPrint(const bolt::BinaryFunction &Function);
27 extern cl::OptionCategory BoltCategory;
28 extern cl::opt<unsigned> Verbosity;
29 
30 cl::opt<std::string> AsmDump("asm-dump",
31                              cl::desc("dump function into assembly"),
32                              cl::value_desc("dump folder"), cl::ValueOptional,
33                              cl::Hidden, cl::cat(BoltCategory));
34 } // end namespace opts
35 
36 namespace llvm {
37 namespace bolt {
38 
dumpCFI(const BinaryFunction & BF,const MCInst & Instr,AsmPrinter & MAP)39 void dumpCFI(const BinaryFunction &BF, const MCInst &Instr, AsmPrinter &MAP) {
40   const MCCFIInstruction *CFIInstr = BF.getCFIFor(Instr);
41   switch (CFIInstr->getOperation()) {
42   // Skip unsupported CFI instructions.
43   case MCCFIInstruction::OpRememberState:
44   case MCCFIInstruction::OpRestoreState:
45     if (opts::Verbosity >= 2)
46       errs()
47           << "BOLT-WARNING: AsmDump: skipping unsupported CFI instruction in "
48           << BF << ".\n";
49 
50     return;
51 
52   default:
53     // Emit regular CFI instructions.
54     MAP.emitCFIInstruction(*CFIInstr);
55   }
56 }
57 
dumpJumpTableFdata(raw_ostream & OS,const BinaryFunction & BF,const MCInst & Instr,const std::string & BranchLabel)58 void dumpJumpTableFdata(raw_ostream &OS, const BinaryFunction &BF,
59                         const MCInst &Instr, const std::string &BranchLabel) {
60   StringRef FunctionName = BF.getOneName();
61   const JumpTable *JT = BF.getJumpTable(Instr);
62   for (uint32_t i = 0; i < JT->Entries.size(); ++i) {
63     StringRef TargetName = JT->Entries[i]->getName();
64     const uint64_t Mispreds = JT->Counts[i].Mispreds;
65     const uint64_t Count = JT->Counts[i].Count;
66     OS << "# FDATA: 1 " << FunctionName << " #" << BranchLabel << "# "
67        << "1 " << FunctionName << " #" << TargetName << "# " << Mispreds << " "
68        << Count << '\n';
69   }
70 }
71 
dumpTailCallFdata(raw_ostream & OS,const BinaryFunction & BF,const MCInst & Instr,const std::string & BranchLabel)72 void dumpTailCallFdata(raw_ostream &OS, const BinaryFunction &BF,
73                        const MCInst &Instr, const std::string &BranchLabel) {
74   const BinaryContext &BC = BF.getBinaryContext();
75   StringRef FunctionName = BF.getOneName();
76   auto CallFreq = BC.MIB->getAnnotationWithDefault<uint64_t>(Instr, "Count");
77   const MCSymbol *Target = BC.MIB->getTargetSymbol(Instr);
78   const BinaryFunction *TargetBF = BC.getFunctionForSymbol(Target);
79   if (!TargetBF)
80     return;
81   OS << "# FDATA: 1 " << FunctionName << " #" << BranchLabel << "# "
82      << "1 " << TargetBF->getPrintName() << " 0 "
83      << "0 " << CallFreq << '\n';
84 }
85 
dumpTargetFunctionStub(raw_ostream & OS,const BinaryContext & BC,const MCSymbol * CalleeSymb,const BinarySection * & LastCS)86 void dumpTargetFunctionStub(raw_ostream &OS, const BinaryContext &BC,
87                             const MCSymbol *CalleeSymb,
88                             const BinarySection *&LastCS) {
89   const BinaryFunction *CalleeFunc = BC.getFunctionForSymbol(CalleeSymb);
90   if (!CalleeFunc || CalleeFunc->isPLTFunction())
91     return;
92 
93   if (CalleeFunc->getOriginSection() != LastCS) {
94     OS << ".section " << CalleeFunc->getOriginSectionName() << '\n';
95     LastCS = CalleeFunc->getOriginSection();
96   }
97   StringRef CalleeName = CalleeFunc->getOneName();
98   OS << ".set \"" << CalleeName << "\", 0\n";
99 }
100 
dumpJumpTableSymbols(raw_ostream & OS,const JumpTable * JT,AsmPrinter & MAP,const BinarySection * & LastBS)101 void dumpJumpTableSymbols(raw_ostream &OS, const JumpTable *JT, AsmPrinter &MAP,
102                           const BinarySection *&LastBS) {
103   if (&JT->getSection() != LastBS) {
104     OS << ".section " << JT->getSectionName() << '\n';
105     LastBS = &JT->getSection();
106   }
107   OS << "\"" << JT->getName() << "\":\n";
108   for (MCSymbol *JTEntry : JT->Entries)
109     MAP.OutStreamer->emitSymbolValue(JTEntry, JT->OutputEntrySize);
110   OS << '\n';
111 }
112 
dumpBinaryDataSymbols(raw_ostream & OS,const BinaryData * BD,const BinarySection * & LastBS)113 void dumpBinaryDataSymbols(raw_ostream &OS, const BinaryData *BD,
114                            const BinarySection *&LastBS) {
115   if (BD->isJumpTable())
116     return;
117   if (&BD->getSection() != LastBS) {
118     OS << ".section " << BD->getSectionName() << '\n';
119     LastBS = &BD->getSection();
120   }
121   OS << "\"" << BD->getName() << "\": ";
122   OS << '\n';
123 }
124 
dumpFunction(const BinaryFunction & BF)125 void dumpFunction(const BinaryFunction &BF) {
126   const BinaryContext &BC = BF.getBinaryContext();
127   if (!opts::shouldPrint(BF))
128     return;
129 
130   // Make sure the new directory exists, creating it if necessary.
131   if (!opts::AsmDump.empty()) {
132     if (std::error_code EC = sys::fs::create_directories(opts::AsmDump)) {
133       errs() << "BOLT-ERROR: could not create directory '" << opts::AsmDump
134              << "': " << EC.message() << '\n';
135       exit(1);
136     }
137   }
138 
139   std::string PrintName = BF.getPrintName();
140   std::replace(PrintName.begin(), PrintName.end(), '/', '-');
141   std::string Filename =
142       opts::AsmDump.empty()
143           ? (PrintName + ".s")
144           : (opts::AsmDump + sys::path::get_separator() + PrintName + ".s")
145                 .str();
146   outs() << "BOLT-INFO: Dumping function assembly to " << Filename << "\n";
147 
148   std::error_code EC;
149   raw_fd_ostream OS(Filename, EC, sys::fs::OF_None);
150   if (EC) {
151     errs() << "BOLT-ERROR: " << EC.message() << ", unable to open " << Filename
152            << " for output.\n";
153     exit(1);
154   }
155   OS.SetUnbuffered();
156 
157   // Create local MC context to isolate the effect of ephemeral assembly
158   // emission.
159   BinaryContext::IndependentCodeEmitter MCEInstance =
160       BC.createIndependentMCCodeEmitter();
161   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
162   std::unique_ptr<MCAsmBackend> MAB(
163       BC.TheTarget->createMCAsmBackend(*BC.STI, *BC.MRI, MCTargetOptions()));
164   int AsmPrinterVariant = BC.AsmInfo->getAssemblerDialect();
165   MCInstPrinter *InstructionPrinter(BC.TheTarget->createMCInstPrinter(
166       *BC.TheTriple, AsmPrinterVariant, *BC.AsmInfo, *BC.MII, *BC.MRI));
167   auto FOut = std::make_unique<formatted_raw_ostream>(OS);
168   FOut->SetUnbuffered();
169   std::unique_ptr<MCStreamer> AsmStreamer(
170       createAsmStreamer(*LocalCtx, std::move(FOut),
171                         /*isVerboseAsm=*/true,
172                         /*useDwarfDirectory=*/false, InstructionPrinter,
173                         std::move(MCEInstance.MCE), std::move(MAB),
174                         /*ShowInst=*/false));
175   AsmStreamer->initSections(true, *BC.STI);
176   std::unique_ptr<TargetMachine> TM(BC.TheTarget->createTargetMachine(
177       BC.TripleName, "", "", TargetOptions(), None));
178   std::unique_ptr<AsmPrinter> MAP(
179       BC.TheTarget->createAsmPrinter(*TM, std::move(AsmStreamer)));
180 
181   StringRef FunctionName = BF.getOneName();
182   OS << "  .globl " << FunctionName << '\n';
183   OS << "  .type " << FunctionName << ", %function\n";
184   OS << FunctionName << ":\n";
185 
186   // FDATA for the entry point
187   if (uint64_t EntryExecCount = BF.getKnownExecutionCount())
188     OS << "# FDATA: 0 [unknown] 0 "
189        << "1 " << FunctionName << " 0 "
190        << "0 " << EntryExecCount << '\n';
191 
192   // Binary data references from the function.
193   std::unordered_set<const BinaryData *> BDReferences;
194   // Function references from the function (to avoid constructing call graph).
195   std::unordered_set<const MCSymbol *> CallReferences;
196 
197   MAP->OutStreamer->emitCFIStartProc(/*IsSimple=*/false);
198   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
199     OS << BB->getName() << ": \n";
200 
201     const std::string BranchLabel = Twine(BB->getName(), "_br").str();
202     const MCInst *LastInst = BB->getLastNonPseudoInstr();
203 
204     for (const MCInst &Instr : *BB) {
205       // Dump pseudo instructions (CFI)
206       if (BC.MIB->isPseudo(Instr)) {
207         if (BC.MIB->isCFI(Instr))
208           dumpCFI(BF, Instr, *MAP.get());
209         continue;
210       }
211 
212       // Analyze symbol references (data, functions) from the instruction.
213       bool IsCall = BC.MIB->isCall(Instr);
214       for (const MCOperand &Operand : MCPlus::primeOperands(Instr)) {
215         if (Operand.isExpr() &&
216             Operand.getExpr()->getKind() == MCExpr::SymbolRef) {
217           std::pair<const MCSymbol *, uint64_t> TSI =
218               BC.MIB->getTargetSymbolInfo(Operand.getExpr());
219           const MCSymbol *Symbol = TSI.first;
220           if (IsCall)
221             CallReferences.insert(Symbol);
222           else if (const BinaryData *BD =
223                        BC.getBinaryDataByName(Symbol->getName()))
224             BDReferences.insert(BD);
225         }
226       }
227 
228       if (&Instr == LastInst && (BB->succ_size() || IsCall))
229         OS << BranchLabel << ":\n";
230 
231       BC.InstPrinter->printInst(&Instr, 0, "", *BC.STI, OS);
232       OS << '\n';
233 
234       // Dump profile data in FDATA format (as parsed by link_fdata).
235       if (BC.MIB->getJumpTable(Instr))
236         dumpJumpTableFdata(OS, BF, Instr, BranchLabel);
237       else if (BC.MIB->isTailCall(Instr))
238         dumpTailCallFdata(OS, BF, Instr, BranchLabel);
239     }
240 
241     // Dump profile data in FDATA format (as parsed by link_fdata).
242     for (const BinaryBasicBlock *Succ : BB->successors()) {
243       const BinaryBasicBlock::BinaryBranchInfo BI = BB->getBranchInfo(*Succ);
244       if (!BI.MispredictedCount && !BI.Count)
245         continue;
246 
247       OS << "# FDATA: 1 " << FunctionName << " #" << BranchLabel << "# "
248          << "1 " << FunctionName << " #" << Succ->getName() << "# "
249          << BI.MispredictedCount << " " << BI.Count << '\n';
250     }
251 
252     OS << '\n';
253   }
254   MAP->OutStreamer->emitCFIEndProc();
255 
256   OS << ".size " << FunctionName << ", .-" << FunctionName << '\n';
257 
258   const BinarySection *LastSection = BF.getOriginSection();
259   // Print stubs for all target functions.
260   for (const MCSymbol *CalleeSymb : CallReferences)
261     dumpTargetFunctionStub(OS, BC, CalleeSymb, LastSection);
262 
263   OS << "# Jump tables\n";
264   // Print all jump tables.
265   for (auto &JTI : BF.jumpTables())
266     dumpJumpTableSymbols(OS, JTI.second, *MAP.get(), LastSection);
267 
268   OS << "# BinaryData\n";
269   // Print data references.
270   for (const BinaryData *BD : BDReferences)
271     dumpBinaryDataSymbols(OS, BD, LastSection);
272 }
273 
runOnFunctions(BinaryContext & BC)274 void AsmDumpPass::runOnFunctions(BinaryContext &BC) {
275   for (const auto &BFIt : BC.getBinaryFunctions())
276     dumpFunction(BFIt.second);
277 }
278 
279 } // namespace bolt
280 } // namespace llvm
281