1 //===- bolt/RuntimeLibs/InstrumentationRuntimeLibrary.cpp -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the InstrumentationRuntimeLibrary class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
14 #include "bolt/Core/BinaryFunction.h"
15 #include "bolt/Core/JumpTable.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "llvm/ExecutionEngine/RuntimeDyld.h"
18 #include "llvm/MC/MCStreamer.h"
19 #include "llvm/Support/Alignment.h"
20 #include "llvm/Support/CommandLine.h"
21 
22 using namespace llvm;
23 using namespace bolt;
24 
25 namespace opts {
26 
27 cl::opt<std::string> RuntimeInstrumentationLib(
28     "runtime-instrumentation-lib",
29     cl::desc("specify file name of the runtime instrumentation library"),
30     cl::ZeroOrMore, cl::init("libbolt_rt_instr.a"), cl::cat(BoltOptCategory));
31 
32 extern cl::opt<bool> InstrumentationFileAppendPID;
33 extern cl::opt<bool> ConservativeInstrumentation;
34 extern cl::opt<std::string> InstrumentationFilename;
35 extern cl::opt<std::string> InstrumentationBinpath;
36 extern cl::opt<uint32_t> InstrumentationSleepTime;
37 extern cl::opt<bool> InstrumentationNoCountersClear;
38 extern cl::opt<bool> InstrumentationWaitForks;
39 extern cl::opt<JumpTableSupportLevel> JumpTables;
40 
41 } // namespace opts
42 
43 void InstrumentationRuntimeLibrary::adjustCommandLineOptions(
44     const BinaryContext &BC) const {
45   if (!BC.HasRelocations) {
46     errs() << "BOLT-ERROR: instrumentation runtime libraries require "
47               "relocations\n";
48     exit(1);
49   }
50   if (opts::JumpTables != JTS_MOVE) {
51     opts::JumpTables = JTS_MOVE;
52     outs() << "BOLT-INFO: forcing -jump-tables=move for instrumentation\n";
53   }
54   if (!BC.StartFunctionAddress) {
55     errs() << "BOLT-ERROR: instrumentation runtime libraries require a known "
56               "entry point of "
57               "the input binary\n";
58     exit(1);
59   }
60   if (!BC.FiniFunctionAddress && !BC.IsStaticExecutable) {
61     errs() << "BOLT-ERROR: input binary lacks DT_FINI entry in the dynamic "
62               "section but instrumentation currently relies on patching "
63               "DT_FINI to write the profile\n";
64     exit(1);
65   }
66 }
67 
68 void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
69                                                MCStreamer &Streamer) {
70   MCSection *Section = BC.isELF()
71                            ? static_cast<MCSection *>(BC.Ctx->getELFSection(
72                                  ".bolt.instr.counters", ELF::SHT_PROGBITS,
73                                  BinarySection::getFlags(/*IsReadOnly=*/false,
74                                                          /*IsText=*/false,
75                                                          /*IsAllocatable=*/true)
76 
77                                      ))
78                            : static_cast<MCSection *>(BC.Ctx->getMachOSection(
79                                  "__BOLT", "__counters", MachO::S_REGULAR,
80                                  SectionKind::getData()));
81 
82   if (BC.IsStaticExecutable && !opts::InstrumentationSleepTime) {
83     errs() << "BOLT-ERROR: instrumentation of static binary currently does not "
84               "support profile output on binary finalization, so it "
85               "requires -instrumentation-sleep-time=N (N>0) usage\n";
86     exit(1);
87   }
88 
89   Section->setAlignment(llvm::Align(BC.RegularPageSize));
90   Streamer.SwitchSection(Section);
91 
92   // EmitOffset is used to determine padding size for data alignment
93   uint64_t EmitOffset = 0;
94 
95   auto emitLabel = [&Streamer](MCSymbol *Symbol, bool IsGlobal = true) {
96     Streamer.emitLabel(Symbol);
97     if (IsGlobal)
98       Streamer.emitSymbolAttribute(Symbol, MCSymbolAttr::MCSA_Global);
99   };
100 
101   auto emitLabelByName = [&BC, emitLabel](StringRef Name,
102                                           bool IsGlobal = true) {
103     MCSymbol *Symbol = BC.Ctx->getOrCreateSymbol(Name);
104     emitLabel(Symbol, IsGlobal);
105   };
106 
107   auto emitPadding = [&Streamer, &EmitOffset](unsigned Size) {
108     const uint64_t Padding = alignTo(EmitOffset, Size) - EmitOffset;
109     if (Padding) {
110       Streamer.emitFill(Padding, 0);
111       EmitOffset += Padding;
112     }
113   };
114 
115   auto emitDataSize = [&EmitOffset](unsigned Size) { EmitOffset += Size; };
116 
117   auto emitDataPadding = [emitPadding, emitDataSize](unsigned Size) {
118     emitPadding(Size);
119     emitDataSize(Size);
120   };
121 
122   auto emitFill = [&Streamer, emitDataSize,
123                    emitLabel](unsigned Size, MCSymbol *Symbol = nullptr,
124                               uint8_t Byte = 0) {
125     emitDataSize(Size);
126     if (Symbol)
127       emitLabel(Symbol, /*IsGlobal*/ false);
128     Streamer.emitFill(Size, Byte);
129   };
130 
131   auto emitValue = [&BC, &Streamer, emitDataPadding,
132                     emitLabel](MCSymbol *Symbol, const MCExpr *Value) {
133     const unsigned Psize = BC.AsmInfo->getCodePointerSize();
134     emitDataPadding(Psize);
135     emitLabel(Symbol);
136     if (Value)
137       Streamer.emitValue(Value, Psize);
138     else
139       Streamer.emitFill(Psize, 0);
140   };
141 
142   auto emitIntValue = [&Streamer, emitDataPadding, emitLabelByName](
143                           StringRef Name, uint64_t Value, unsigned Size = 4) {
144     emitDataPadding(Size);
145     emitLabelByName(Name);
146     Streamer.emitIntValue(Value, Size);
147   };
148 
149   auto emitString = [&Streamer, emitDataSize, emitLabelByName,
150                      emitFill](StringRef Name, StringRef Contents) {
151     emitDataSize(Contents.size());
152     emitLabelByName(Name);
153     Streamer.emitBytes(Contents);
154     emitFill(1);
155   };
156 
157   // All of the following symbols will be exported as globals to be used by the
158   // instrumentation runtime library to dump the instrumentation data to disk.
159   // Label marking start of the memory region containing instrumentation
160   // counters, total vector size is Counters.size() 8-byte counters
161   emitLabelByName("__bolt_instr_locations");
162   for (MCSymbol *const &Label : Summary->Counters)
163     emitFill(sizeof(uint64_t), Label);
164 
165   emitPadding(BC.RegularPageSize);
166   emitIntValue("__bolt_instr_sleep_time", opts::InstrumentationSleepTime);
167   emitIntValue("__bolt_instr_no_counters_clear",
168                !!opts::InstrumentationNoCountersClear, 1);
169   emitIntValue("__bolt_instr_conservative", !!opts::ConservativeInstrumentation,
170                1);
171   emitIntValue("__bolt_instr_wait_forks", !!opts::InstrumentationWaitForks, 1);
172   emitIntValue("__bolt_num_counters", Summary->Counters.size());
173   emitValue(Summary->IndCallCounterFuncPtr, nullptr);
174   emitValue(Summary->IndTailCallCounterFuncPtr, nullptr);
175   emitIntValue("__bolt_instr_num_ind_calls",
176                Summary->IndCallDescriptions.size());
177   emitIntValue("__bolt_instr_num_ind_targets",
178                Summary->IndCallTargetDescriptions.size());
179   emitIntValue("__bolt_instr_num_funcs", Summary->FunctionDescriptions.size());
180   emitString("__bolt_instr_filename", opts::InstrumentationFilename);
181   emitString("__bolt_instr_binpath", opts::InstrumentationBinpath);
182   emitIntValue("__bolt_instr_use_pid", !!opts::InstrumentationFileAppendPID, 1);
183 
184   if (BC.isMachO()) {
185     MCSection *TablesSection = BC.Ctx->getMachOSection(
186         "__BOLT", "__tables", MachO::S_REGULAR, SectionKind::getData());
187     TablesSection->setAlignment(llvm::Align(BC.RegularPageSize));
188     Streamer.SwitchSection(TablesSection);
189     emitString("__bolt_instr_tables", buildTables(BC));
190   }
191 }
192 
193 void InstrumentationRuntimeLibrary::link(
194     BinaryContext &BC, StringRef ToolPath, RuntimeDyld &RTDyld,
195     std::function<void(RuntimeDyld &)> OnLoad) {
196   std::string LibPath = getLibPath(ToolPath, opts::RuntimeInstrumentationLib);
197   loadLibrary(LibPath, RTDyld);
198   OnLoad(RTDyld);
199   RTDyld.finalizeWithMemoryManagerLocking();
200   if (RTDyld.hasError()) {
201     outs() << "BOLT-ERROR: RTDyld failed: " << RTDyld.getErrorString() << "\n";
202     exit(1);
203   }
204 
205   if (BC.isMachO())
206     return;
207 
208   RuntimeFiniAddress = RTDyld.getSymbol("__bolt_instr_fini").getAddress();
209   if (!RuntimeFiniAddress) {
210     errs() << "BOLT-ERROR: instrumentation library does not define "
211               "__bolt_instr_fini: "
212            << LibPath << "\n";
213     exit(1);
214   }
215   RuntimeStartAddress = RTDyld.getSymbol("__bolt_instr_start").getAddress();
216   if (!RuntimeStartAddress) {
217     errs() << "BOLT-ERROR: instrumentation library does not define "
218               "__bolt_instr_start: "
219            << LibPath << "\n";
220     exit(1);
221   }
222   outs() << "BOLT-INFO: output linked against instrumentation runtime "
223             "library, lib entry point is 0x"
224          << Twine::utohexstr(RuntimeFiniAddress) << "\n";
225   outs() << "BOLT-INFO: clear procedure is 0x"
226          << Twine::utohexstr(
227                 RTDyld.getSymbol("__bolt_instr_clear_counters").getAddress())
228          << "\n";
229 
230   emitTablesAsELFNote(BC);
231 }
232 
233 std::string InstrumentationRuntimeLibrary::buildTables(BinaryContext &BC) {
234   std::string TablesStr;
235   raw_string_ostream OS(TablesStr);
236 
237   // This is sync'ed with runtime/instr.cpp:readDescriptions()
238   auto getOutputAddress = [](const BinaryFunction &Func,
239                              uint64_t Offset) -> uint64_t {
240     return Offset == 0
241                ? Func.getOutputAddress()
242                : Func.translateInputToOutputAddress(Func.getAddress() + Offset);
243   };
244 
245   // Indirect targets need to be sorted for fast lookup during runtime
246   std::sort(Summary->IndCallTargetDescriptions.begin(),
247             Summary->IndCallTargetDescriptions.end(),
248             [&](const IndCallTargetDescription &A,
249                 const IndCallTargetDescription &B) {
250               return getOutputAddress(*A.Target, A.ToLoc.Offset) <
251                      getOutputAddress(*B.Target, B.ToLoc.Offset);
252             });
253 
254   // Start of the vector with descriptions (one CounterDescription for each
255   // counter), vector size is Counters.size() CounterDescription-sized elmts
256   const size_t IDSize =
257       Summary->IndCallDescriptions.size() * sizeof(IndCallDescription);
258   OS.write(reinterpret_cast<const char *>(&IDSize), 4);
259   for (const IndCallDescription &Desc : Summary->IndCallDescriptions) {
260     OS.write(reinterpret_cast<const char *>(&Desc.FromLoc.FuncString), 4);
261     OS.write(reinterpret_cast<const char *>(&Desc.FromLoc.Offset), 4);
262   }
263 
264   const size_t ITDSize = Summary->IndCallTargetDescriptions.size() *
265                          sizeof(IndCallTargetDescription);
266   OS.write(reinterpret_cast<const char *>(&ITDSize), 4);
267   for (const IndCallTargetDescription &Desc :
268        Summary->IndCallTargetDescriptions) {
269     OS.write(reinterpret_cast<const char *>(&Desc.ToLoc.FuncString), 4);
270     OS.write(reinterpret_cast<const char *>(&Desc.ToLoc.Offset), 4);
271     uint64_t TargetFuncAddress =
272         getOutputAddress(*Desc.Target, Desc.ToLoc.Offset);
273     OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8);
274   }
275 
276   uint32_t FuncDescSize = Summary->getFDSize();
277   OS.write(reinterpret_cast<const char *>(&FuncDescSize), 4);
278   for (const FunctionDescription &Desc : Summary->FunctionDescriptions) {
279     const size_t LeafNum = Desc.LeafNodes.size();
280     OS.write(reinterpret_cast<const char *>(&LeafNum), 4);
281     for (const InstrumentedNode &LeafNode : Desc.LeafNodes) {
282       OS.write(reinterpret_cast<const char *>(&LeafNode.Node), 4);
283       OS.write(reinterpret_cast<const char *>(&LeafNode.Counter), 4);
284     }
285     const size_t EdgesNum = Desc.Edges.size();
286     OS.write(reinterpret_cast<const char *>(&EdgesNum), 4);
287     for (const EdgeDescription &Edge : Desc.Edges) {
288       OS.write(reinterpret_cast<const char *>(&Edge.FromLoc.FuncString), 4);
289       OS.write(reinterpret_cast<const char *>(&Edge.FromLoc.Offset), 4);
290       OS.write(reinterpret_cast<const char *>(&Edge.FromNode), 4);
291       OS.write(reinterpret_cast<const char *>(&Edge.ToLoc.FuncString), 4);
292       OS.write(reinterpret_cast<const char *>(&Edge.ToLoc.Offset), 4);
293       OS.write(reinterpret_cast<const char *>(&Edge.ToNode), 4);
294       OS.write(reinterpret_cast<const char *>(&Edge.Counter), 4);
295     }
296     const size_t CallsNum = Desc.Calls.size();
297     OS.write(reinterpret_cast<const char *>(&CallsNum), 4);
298     for (const CallDescription &Call : Desc.Calls) {
299       OS.write(reinterpret_cast<const char *>(&Call.FromLoc.FuncString), 4);
300       OS.write(reinterpret_cast<const char *>(&Call.FromLoc.Offset), 4);
301       OS.write(reinterpret_cast<const char *>(&Call.FromNode), 4);
302       OS.write(reinterpret_cast<const char *>(&Call.ToLoc.FuncString), 4);
303       OS.write(reinterpret_cast<const char *>(&Call.ToLoc.Offset), 4);
304       OS.write(reinterpret_cast<const char *>(&Call.Counter), 4);
305       uint64_t TargetFuncAddress =
306           getOutputAddress(*Call.Target, Call.ToLoc.Offset);
307       OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8);
308     }
309     const size_t EntryNum = Desc.EntryNodes.size();
310     OS.write(reinterpret_cast<const char *>(&EntryNum), 4);
311     for (const EntryNode &EntryNode : Desc.EntryNodes) {
312       OS.write(reinterpret_cast<const char *>(&EntryNode.Node), 8);
313       uint64_t TargetFuncAddress =
314           getOutputAddress(*Desc.Function, EntryNode.Address);
315       OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8);
316     }
317   }
318   // Our string table lives immediately after descriptions vector
319   OS << Summary->StringTable;
320   OS.flush();
321 
322   return TablesStr;
323 }
324 
325 void InstrumentationRuntimeLibrary::emitTablesAsELFNote(BinaryContext &BC) {
326   std::string TablesStr = buildTables(BC);
327   const std::string BoltInfo = BinarySection::encodeELFNote(
328       "BOLT", TablesStr, BinarySection::NT_BOLT_INSTRUMENTATION_TABLES);
329   BC.registerOrUpdateNoteSection(".bolt.instr.tables", copyByteArray(BoltInfo),
330                                  BoltInfo.size(),
331                                  /*Alignment=*/1,
332                                  /*IsReadOnly=*/true, ELF::SHT_NOTE);
333 }
334