1 //===- bolt/RuntimeLibs/InstrumentationRuntimeLibrary.cpp -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the InstrumentationRuntimeLibrary class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
14 #include "bolt/Core/BinaryFunction.h"
15 #include "bolt/Core/JumpTable.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "llvm/ExecutionEngine/RuntimeDyld.h"
18 #include "llvm/MC/MCStreamer.h"
19 #include "llvm/Support/Alignment.h"
20 #include "llvm/Support/CommandLine.h"
21 
22 using namespace llvm;
23 using namespace bolt;
24 
25 namespace opts {
26 
27 cl::opt<std::string> RuntimeInstrumentationLib(
28     "runtime-instrumentation-lib",
29     cl::desc("specify file name of the runtime instrumentation library"),
30     cl::init("libbolt_rt_instr.a"), cl::cat(BoltOptCategory));
31 
32 extern cl::opt<bool> InstrumentationFileAppendPID;
33 extern cl::opt<bool> ConservativeInstrumentation;
34 extern cl::opt<std::string> InstrumentationFilename;
35 extern cl::opt<std::string> InstrumentationBinpath;
36 extern cl::opt<uint32_t> InstrumentationSleepTime;
37 extern cl::opt<bool> InstrumentationNoCountersClear;
38 extern cl::opt<bool> InstrumentationWaitForks;
39 extern cl::opt<JumpTableSupportLevel> JumpTables;
40 
41 } // namespace opts
42 
43 void InstrumentationRuntimeLibrary::adjustCommandLineOptions(
44     const BinaryContext &BC) const {
45   if (!BC.HasRelocations) {
46     errs() << "BOLT-ERROR: instrumentation runtime libraries require "
47               "relocations\n";
48     exit(1);
49   }
50   if (opts::JumpTables != JTS_MOVE) {
51     opts::JumpTables = JTS_MOVE;
52     outs() << "BOLT-INFO: forcing -jump-tables=move for instrumentation\n";
53   }
54   if (!BC.StartFunctionAddress) {
55     errs() << "BOLT-ERROR: instrumentation runtime libraries require a known "
56               "entry point of "
57               "the input binary\n";
58     exit(1);
59   }
60   if (!BC.FiniFunctionAddress && !BC.IsStaticExecutable) {
61     errs() << "BOLT-ERROR: input binary lacks DT_FINI entry in the dynamic "
62               "section but instrumentation currently relies on patching "
63               "DT_FINI to write the profile\n";
64     exit(1);
65   }
66 }
67 
68 void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
69                                                MCStreamer &Streamer) {
70   MCSection *Section = BC.isELF()
71                            ? static_cast<MCSection *>(BC.Ctx->getELFSection(
72                                  ".bolt.instr.counters", ELF::SHT_PROGBITS,
73                                  BinarySection::getFlags(/*IsReadOnly=*/false,
74                                                          /*IsText=*/false,
75                                                          /*IsAllocatable=*/true)
76 
77                                      ))
78                            : static_cast<MCSection *>(BC.Ctx->getMachOSection(
79                                  "__BOLT", "__counters", MachO::S_REGULAR,
80                                  SectionKind::getData()));
81 
82   if (BC.IsStaticExecutable && !opts::InstrumentationSleepTime) {
83     errs() << "BOLT-ERROR: instrumentation of static binary currently does not "
84               "support profile output on binary finalization, so it "
85               "requires -instrumentation-sleep-time=N (N>0) usage\n";
86     exit(1);
87   }
88 
89   Section->setAlignment(llvm::Align(BC.RegularPageSize));
90   Streamer.switchSection(Section);
91 
92   // EmitOffset is used to determine padding size for data alignment
93   uint64_t EmitOffset = 0;
94 
95   auto emitLabel = [&Streamer](MCSymbol *Symbol, bool IsGlobal = true) {
96     Streamer.emitLabel(Symbol);
97     if (IsGlobal)
98       Streamer.emitSymbolAttribute(Symbol, MCSymbolAttr::MCSA_Global);
99   };
100 
101   auto emitLabelByName = [&BC, emitLabel](StringRef Name,
102                                           bool IsGlobal = true) {
103     MCSymbol *Symbol = BC.Ctx->getOrCreateSymbol(Name);
104     emitLabel(Symbol, IsGlobal);
105   };
106 
107   auto emitPadding = [&Streamer, &EmitOffset](unsigned Size) {
108     const uint64_t Padding = alignTo(EmitOffset, Size) - EmitOffset;
109     if (Padding) {
110       Streamer.emitFill(Padding, 0);
111       EmitOffset += Padding;
112     }
113   };
114 
115   auto emitDataSize = [&EmitOffset](unsigned Size) { EmitOffset += Size; };
116 
117   auto emitDataPadding = [emitPadding, emitDataSize](unsigned Size) {
118     emitPadding(Size);
119     emitDataSize(Size);
120   };
121 
122   auto emitFill = [&Streamer, emitDataSize,
123                    emitLabel](unsigned Size, MCSymbol *Symbol = nullptr,
124                               uint8_t Byte = 0) {
125     emitDataSize(Size);
126     if (Symbol)
127       emitLabel(Symbol, /*IsGlobal*/ false);
128     Streamer.emitFill(Size, Byte);
129   };
130 
131   auto emitValue = [&BC, &Streamer, emitDataPadding,
132                     emitLabel](MCSymbol *Symbol, const MCExpr *Value) {
133     const unsigned Psize = BC.AsmInfo->getCodePointerSize();
134     emitDataPadding(Psize);
135     emitLabel(Symbol);
136     if (Value)
137       Streamer.emitValue(Value, Psize);
138     else
139       Streamer.emitFill(Psize, 0);
140   };
141 
142   auto emitIntValue = [&Streamer, emitDataPadding, emitLabelByName](
143                           StringRef Name, uint64_t Value, unsigned Size = 4) {
144     emitDataPadding(Size);
145     emitLabelByName(Name);
146     Streamer.emitIntValue(Value, Size);
147   };
148 
149   auto emitString = [&Streamer, emitDataSize, emitLabelByName,
150                      emitFill](StringRef Name, StringRef Contents) {
151     emitDataSize(Contents.size());
152     emitLabelByName(Name);
153     Streamer.emitBytes(Contents);
154     emitFill(1);
155   };
156 
157   // All of the following symbols will be exported as globals to be used by the
158   // instrumentation runtime library to dump the instrumentation data to disk.
159   // Label marking start of the memory region containing instrumentation
160   // counters, total vector size is Counters.size() 8-byte counters
161   emitLabelByName("__bolt_instr_locations");
162   for (MCSymbol *const &Label : Summary->Counters)
163     emitFill(sizeof(uint64_t), Label);
164 
165   emitPadding(BC.RegularPageSize);
166   emitIntValue("__bolt_instr_sleep_time", opts::InstrumentationSleepTime);
167   emitIntValue("__bolt_instr_no_counters_clear",
168                !!opts::InstrumentationNoCountersClear, 1);
169   emitIntValue("__bolt_instr_conservative", !!opts::ConservativeInstrumentation,
170                1);
171   emitIntValue("__bolt_instr_wait_forks", !!opts::InstrumentationWaitForks, 1);
172   emitIntValue("__bolt_num_counters", Summary->Counters.size());
173   emitValue(Summary->IndCallCounterFuncPtr, nullptr);
174   emitValue(Summary->IndTailCallCounterFuncPtr, nullptr);
175   emitIntValue("__bolt_instr_num_ind_calls",
176                Summary->IndCallDescriptions.size());
177   emitIntValue("__bolt_instr_num_ind_targets",
178                Summary->IndCallTargetDescriptions.size());
179   emitIntValue("__bolt_instr_num_funcs", Summary->FunctionDescriptions.size());
180   emitString("__bolt_instr_filename", opts::InstrumentationFilename);
181   emitString("__bolt_instr_binpath", opts::InstrumentationBinpath);
182   emitIntValue("__bolt_instr_use_pid", !!opts::InstrumentationFileAppendPID, 1);
183 
184   if (BC.isMachO()) {
185     MCSection *TablesSection = BC.Ctx->getMachOSection(
186         "__BOLT", "__tables", MachO::S_REGULAR, SectionKind::getData());
187     TablesSection->setAlignment(llvm::Align(BC.RegularPageSize));
188     Streamer.switchSection(TablesSection);
189     emitString("__bolt_instr_tables", buildTables(BC));
190   }
191 }
192 
193 void InstrumentationRuntimeLibrary::link(
194     BinaryContext &BC, StringRef ToolPath, RuntimeDyld &RTDyld,
195     std::function<void(RuntimeDyld &)> OnLoad) {
196   std::string LibPath = getLibPath(ToolPath, opts::RuntimeInstrumentationLib);
197   loadLibrary(LibPath, RTDyld);
198   OnLoad(RTDyld);
199   RTDyld.finalizeWithMemoryManagerLocking();
200   if (RTDyld.hasError()) {
201     outs() << "BOLT-ERROR: RTDyld failed: " << RTDyld.getErrorString() << "\n";
202     exit(1);
203   }
204 
205   if (BC.isMachO())
206     return;
207 
208   RuntimeFiniAddress = RTDyld.getSymbol("__bolt_instr_fini").getAddress();
209   if (!RuntimeFiniAddress) {
210     errs() << "BOLT-ERROR: instrumentation library does not define "
211               "__bolt_instr_fini: "
212            << LibPath << "\n";
213     exit(1);
214   }
215   RuntimeStartAddress = RTDyld.getSymbol("__bolt_instr_start").getAddress();
216   if (!RuntimeStartAddress) {
217     errs() << "BOLT-ERROR: instrumentation library does not define "
218               "__bolt_instr_start: "
219            << LibPath << "\n";
220     exit(1);
221   }
222   outs() << "BOLT-INFO: output linked against instrumentation runtime "
223             "library, lib entry point is 0x"
224          << Twine::utohexstr(RuntimeFiniAddress) << "\n";
225   outs() << "BOLT-INFO: clear procedure is 0x"
226          << Twine::utohexstr(
227                 RTDyld.getSymbol("__bolt_instr_clear_counters").getAddress())
228          << "\n";
229 
230   emitTablesAsELFNote(BC);
231 }
232 
233 std::string InstrumentationRuntimeLibrary::buildTables(BinaryContext &BC) {
234   std::string TablesStr;
235   raw_string_ostream OS(TablesStr);
236 
237   // This is sync'ed with runtime/instr.cpp:readDescriptions()
238   auto getOutputAddress = [](const BinaryFunction &Func,
239                              uint64_t Offset) -> uint64_t {
240     return Offset == 0
241                ? Func.getOutputAddress()
242                : Func.translateInputToOutputAddress(Func.getAddress() + Offset);
243   };
244 
245   // Indirect targets need to be sorted for fast lookup during runtime
246   llvm::sort(Summary->IndCallTargetDescriptions,
247              [&](const IndCallTargetDescription &A,
248                  const IndCallTargetDescription &B) {
249                return getOutputAddress(*A.Target, A.ToLoc.Offset) <
250                       getOutputAddress(*B.Target, B.ToLoc.Offset);
251              });
252 
253   // Start of the vector with descriptions (one CounterDescription for each
254   // counter), vector size is Counters.size() CounterDescription-sized elmts
255   const size_t IDSize =
256       Summary->IndCallDescriptions.size() * sizeof(IndCallDescription);
257   OS.write(reinterpret_cast<const char *>(&IDSize), 4);
258   for (const IndCallDescription &Desc : Summary->IndCallDescriptions) {
259     OS.write(reinterpret_cast<const char *>(&Desc.FromLoc.FuncString), 4);
260     OS.write(reinterpret_cast<const char *>(&Desc.FromLoc.Offset), 4);
261   }
262 
263   const size_t ITDSize = Summary->IndCallTargetDescriptions.size() *
264                          sizeof(IndCallTargetDescription);
265   OS.write(reinterpret_cast<const char *>(&ITDSize), 4);
266   for (const IndCallTargetDescription &Desc :
267        Summary->IndCallTargetDescriptions) {
268     OS.write(reinterpret_cast<const char *>(&Desc.ToLoc.FuncString), 4);
269     OS.write(reinterpret_cast<const char *>(&Desc.ToLoc.Offset), 4);
270     uint64_t TargetFuncAddress =
271         getOutputAddress(*Desc.Target, Desc.ToLoc.Offset);
272     OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8);
273   }
274 
275   uint32_t FuncDescSize = Summary->getFDSize();
276   OS.write(reinterpret_cast<const char *>(&FuncDescSize), 4);
277   for (const FunctionDescription &Desc : Summary->FunctionDescriptions) {
278     const size_t LeafNum = Desc.LeafNodes.size();
279     OS.write(reinterpret_cast<const char *>(&LeafNum), 4);
280     for (const InstrumentedNode &LeafNode : Desc.LeafNodes) {
281       OS.write(reinterpret_cast<const char *>(&LeafNode.Node), 4);
282       OS.write(reinterpret_cast<const char *>(&LeafNode.Counter), 4);
283     }
284     const size_t EdgesNum = Desc.Edges.size();
285     OS.write(reinterpret_cast<const char *>(&EdgesNum), 4);
286     for (const EdgeDescription &Edge : Desc.Edges) {
287       OS.write(reinterpret_cast<const char *>(&Edge.FromLoc.FuncString), 4);
288       OS.write(reinterpret_cast<const char *>(&Edge.FromLoc.Offset), 4);
289       OS.write(reinterpret_cast<const char *>(&Edge.FromNode), 4);
290       OS.write(reinterpret_cast<const char *>(&Edge.ToLoc.FuncString), 4);
291       OS.write(reinterpret_cast<const char *>(&Edge.ToLoc.Offset), 4);
292       OS.write(reinterpret_cast<const char *>(&Edge.ToNode), 4);
293       OS.write(reinterpret_cast<const char *>(&Edge.Counter), 4);
294     }
295     const size_t CallsNum = Desc.Calls.size();
296     OS.write(reinterpret_cast<const char *>(&CallsNum), 4);
297     for (const CallDescription &Call : Desc.Calls) {
298       OS.write(reinterpret_cast<const char *>(&Call.FromLoc.FuncString), 4);
299       OS.write(reinterpret_cast<const char *>(&Call.FromLoc.Offset), 4);
300       OS.write(reinterpret_cast<const char *>(&Call.FromNode), 4);
301       OS.write(reinterpret_cast<const char *>(&Call.ToLoc.FuncString), 4);
302       OS.write(reinterpret_cast<const char *>(&Call.ToLoc.Offset), 4);
303       OS.write(reinterpret_cast<const char *>(&Call.Counter), 4);
304       uint64_t TargetFuncAddress =
305           getOutputAddress(*Call.Target, Call.ToLoc.Offset);
306       OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8);
307     }
308     const size_t EntryNum = Desc.EntryNodes.size();
309     OS.write(reinterpret_cast<const char *>(&EntryNum), 4);
310     for (const EntryNode &EntryNode : Desc.EntryNodes) {
311       OS.write(reinterpret_cast<const char *>(&EntryNode.Node), 8);
312       uint64_t TargetFuncAddress =
313           getOutputAddress(*Desc.Function, EntryNode.Address);
314       OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8);
315     }
316   }
317   // Our string table lives immediately after descriptions vector
318   OS << Summary->StringTable;
319   OS.flush();
320 
321   return TablesStr;
322 }
323 
324 void InstrumentationRuntimeLibrary::emitTablesAsELFNote(BinaryContext &BC) {
325   std::string TablesStr = buildTables(BC);
326   const std::string BoltInfo = BinarySection::encodeELFNote(
327       "BOLT", TablesStr, BinarySection::NT_BOLT_INSTRUMENTATION_TABLES);
328   BC.registerOrUpdateNoteSection(".bolt.instr.tables", copyByteArray(BoltInfo),
329                                  BoltInfo.size(),
330                                  /*Alignment=*/1,
331                                  /*IsReadOnly=*/true, ELF::SHT_NOTE);
332 }
333