1 //=  InstrumentationRuntimeLibrary.cpp - The Instrumentation Runtime Library =//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
11 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
12 #include "bolt/Core/BinaryFunction.h"
13 #include "bolt/Core/JumpTable.h"
14 #include "bolt/Utils/CommandLineOpts.h"
15 #include "llvm/ExecutionEngine/RuntimeDyld.h"
16 #include "llvm/MC/MCStreamer.h"
17 #include "llvm/Support/Alignment.h"
18 #include "llvm/Support/CommandLine.h"
19 
20 using namespace llvm;
21 using namespace bolt;
22 
23 namespace opts {
24 
25 cl::opt<std::string> RuntimeInstrumentationLib(
26     "runtime-instrumentation-lib",
27     cl::desc("specify file name of the runtime instrumentation library"),
28     cl::ZeroOrMore, cl::init("libbolt_rt_instr.a"), cl::cat(BoltOptCategory));
29 
30 extern cl::opt<bool> InstrumentationFileAppendPID;
31 extern cl::opt<bool> ConservativeInstrumentation;
32 extern cl::opt<std::string> InstrumentationFilename;
33 extern cl::opt<std::string> InstrumentationBinpath;
34 extern cl::opt<uint32_t> InstrumentationSleepTime;
35 extern cl::opt<bool> InstrumentationNoCountersClear;
36 extern cl::opt<bool> InstrumentationWaitForks;
37 extern cl::opt<JumpTableSupportLevel> JumpTables;
38 
39 } // namespace opts
40 
41 void InstrumentationRuntimeLibrary::adjustCommandLineOptions(
42     const BinaryContext &BC) const {
43   if (!BC.HasRelocations) {
44     errs() << "BOLT-ERROR: instrumentation runtime libraries require "
45               "relocations\n";
46     exit(1);
47   }
48   if (opts::JumpTables != JTS_MOVE) {
49     opts::JumpTables = JTS_MOVE;
50     outs() << "BOLT-INFO: forcing -jump-tables=move for instrumentation\n";
51   }
52   if (!BC.StartFunctionAddress) {
53     errs() << "BOLT-ERROR: instrumentation runtime libraries require a known "
54               "entry point of "
55               "the input binary\n";
56     exit(1);
57   }
58   if (!BC.FiniFunctionAddress && !BC.IsStaticExecutable) {
59     errs() << "BOLT-ERROR: input binary lacks DT_FINI entry in the dynamic "
60               "section but instrumentation currently relies on patching "
61               "DT_FINI to write the profile\n";
62     exit(1);
63   }
64 }
65 
66 void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
67                                                MCStreamer &Streamer) {
68   MCSection *Section = BC.isELF()
69                            ? static_cast<MCSection *>(BC.Ctx->getELFSection(
70                                  ".bolt.instr.counters", ELF::SHT_PROGBITS,
71                                  BinarySection::getFlags(/*IsReadOnly=*/false,
72                                                          /*IsText=*/false,
73                                                          /*IsAllocatable=*/true)
74 
75                                      ))
76                            : static_cast<MCSection *>(BC.Ctx->getMachOSection(
77                                  "__BOLT", "__counters", MachO::S_REGULAR,
78                                  SectionKind::getData()));
79 
80   if (BC.IsStaticExecutable && !opts::InstrumentationSleepTime) {
81     errs() << "BOLT-ERROR: instrumentation of static binary currently does not "
82               "support profile output on binary finalization, so it "
83               "requires -instrumentation-sleep-time=N (N>0) usage\n";
84     exit(1);
85   }
86 
87   Section->setAlignment(llvm::Align(BC.RegularPageSize));
88   Streamer.SwitchSection(Section);
89 
90   // EmitOffset is used to determine padding size for data alignment
91   uint64_t EmitOffset = 0;
92 
93   auto emitLabel = [&Streamer](MCSymbol *Symbol, bool IsGlobal = true) {
94     Streamer.emitLabel(Symbol);
95     if (IsGlobal)
96       Streamer.emitSymbolAttribute(Symbol, MCSymbolAttr::MCSA_Global);
97   };
98 
99   auto emitLabelByName = [&BC, emitLabel](StringRef Name,
100                                           bool IsGlobal = true) {
101     MCSymbol *Symbol = BC.Ctx->getOrCreateSymbol(Name);
102     emitLabel(Symbol, IsGlobal);
103   };
104 
105   auto emitPadding = [&Streamer, &EmitOffset](unsigned Size) {
106     const uint64_t Padding = alignTo(EmitOffset, Size) - EmitOffset;
107     if (Padding) {
108       Streamer.emitFill(Padding, 0);
109       EmitOffset += Padding;
110     }
111   };
112 
113   auto emitDataSize = [&EmitOffset](unsigned Size) { EmitOffset += Size; };
114 
115   auto emitDataPadding = [emitPadding, emitDataSize](unsigned Size) {
116     emitPadding(Size);
117     emitDataSize(Size);
118   };
119 
120   auto emitFill = [&Streamer, emitDataSize,
121                    emitLabel](unsigned Size, MCSymbol *Symbol = nullptr,
122                               uint8_t Byte = 0) {
123     emitDataSize(Size);
124     if (Symbol)
125       emitLabel(Symbol, /*IsGlobal*/ false);
126     Streamer.emitFill(Size, Byte);
127   };
128 
129   auto emitValue = [&BC, &Streamer, emitDataPadding,
130                     emitLabel](MCSymbol *Symbol, const MCExpr *Value) {
131     const unsigned Psize = BC.AsmInfo->getCodePointerSize();
132     emitDataPadding(Psize);
133     emitLabel(Symbol);
134     if (Value)
135       Streamer.emitValue(Value, Psize);
136     else
137       Streamer.emitFill(Psize, 0);
138   };
139 
140   auto emitIntValue = [&Streamer, emitDataPadding, emitLabelByName](
141                           StringRef Name, uint64_t Value, unsigned Size = 4) {
142     emitDataPadding(Size);
143     emitLabelByName(Name);
144     Streamer.emitIntValue(Value, Size);
145   };
146 
147   auto emitString = [&Streamer, emitDataSize, emitLabelByName,
148                      emitFill](StringRef Name, StringRef Contents) {
149     emitDataSize(Contents.size());
150     emitLabelByName(Name);
151     Streamer.emitBytes(Contents);
152     emitFill(1);
153   };
154 
155   // All of the following symbols will be exported as globals to be used by the
156   // instrumentation runtime library to dump the instrumentation data to disk.
157   // Label marking start of the memory region containing instrumentation
158   // counters, total vector size is Counters.size() 8-byte counters
159   emitLabelByName("__bolt_instr_locations");
160   for (MCSymbol *const &Label : Summary->Counters)
161     emitFill(sizeof(uint64_t), Label);
162 
163   emitPadding(BC.RegularPageSize);
164   emitIntValue("__bolt_instr_sleep_time", opts::InstrumentationSleepTime);
165   emitIntValue("__bolt_instr_no_counters_clear",
166                !!opts::InstrumentationNoCountersClear, 1);
167   emitIntValue("__bolt_instr_conservative", !!opts::ConservativeInstrumentation,
168                1);
169   emitIntValue("__bolt_instr_wait_forks", !!opts::InstrumentationWaitForks, 1);
170   emitIntValue("__bolt_num_counters", Summary->Counters.size());
171   emitValue(Summary->IndCallCounterFuncPtr, nullptr);
172   emitValue(Summary->IndTailCallCounterFuncPtr, nullptr);
173   emitIntValue("__bolt_instr_num_ind_calls",
174                Summary->IndCallDescriptions.size());
175   emitIntValue("__bolt_instr_num_ind_targets",
176                Summary->IndCallTargetDescriptions.size());
177   emitIntValue("__bolt_instr_num_funcs", Summary->FunctionDescriptions.size());
178   emitString("__bolt_instr_filename", opts::InstrumentationFilename);
179   emitString("__bolt_instr_binpath", opts::InstrumentationBinpath);
180   emitIntValue("__bolt_instr_use_pid", !!opts::InstrumentationFileAppendPID, 1);
181 
182   if (BC.isMachO()) {
183     MCSection *TablesSection = BC.Ctx->getMachOSection(
184         "__BOLT", "__tables", MachO::S_REGULAR, SectionKind::getData());
185     TablesSection->setAlignment(llvm::Align(BC.RegularPageSize));
186     Streamer.SwitchSection(TablesSection);
187     emitString("__bolt_instr_tables", buildTables(BC));
188   }
189 }
190 
191 void InstrumentationRuntimeLibrary::link(
192     BinaryContext &BC, StringRef ToolPath, RuntimeDyld &RTDyld,
193     std::function<void(RuntimeDyld &)> OnLoad) {
194   std::string LibPath = getLibPath(ToolPath, opts::RuntimeInstrumentationLib);
195   loadLibrary(LibPath, RTDyld);
196   OnLoad(RTDyld);
197   RTDyld.finalizeWithMemoryManagerLocking();
198   if (RTDyld.hasError()) {
199     outs() << "BOLT-ERROR: RTDyld failed: " << RTDyld.getErrorString() << "\n";
200     exit(1);
201   }
202 
203   if (BC.isMachO())
204     return;
205 
206   RuntimeFiniAddress = RTDyld.getSymbol("__bolt_instr_fini").getAddress();
207   if (!RuntimeFiniAddress) {
208     errs() << "BOLT-ERROR: instrumentation library does not define "
209               "__bolt_instr_fini: "
210            << LibPath << "\n";
211     exit(1);
212   }
213   RuntimeStartAddress = RTDyld.getSymbol("__bolt_instr_start").getAddress();
214   if (!RuntimeStartAddress) {
215     errs() << "BOLT-ERROR: instrumentation library does not define "
216               "__bolt_instr_start: "
217            << LibPath << "\n";
218     exit(1);
219   }
220   outs() << "BOLT-INFO: output linked against instrumentation runtime "
221             "library, lib entry point is 0x"
222          << Twine::utohexstr(RuntimeFiniAddress) << "\n";
223   outs() << "BOLT-INFO: clear procedure is 0x"
224          << Twine::utohexstr(
225                 RTDyld.getSymbol("__bolt_instr_clear_counters").getAddress())
226          << "\n";
227 
228   emitTablesAsELFNote(BC);
229 }
230 
231 std::string InstrumentationRuntimeLibrary::buildTables(BinaryContext &BC) {
232   std::string TablesStr;
233   raw_string_ostream OS(TablesStr);
234 
235   // This is sync'ed with runtime/instr.cpp:readDescriptions()
236   auto getOutputAddress = [](const BinaryFunction &Func,
237                              uint64_t Offset) -> uint64_t {
238     return Offset == 0
239                ? Func.getOutputAddress()
240                : Func.translateInputToOutputAddress(Func.getAddress() + Offset);
241   };
242 
243   // Indirect targets need to be sorted for fast lookup during runtime
244   std::sort(Summary->IndCallTargetDescriptions.begin(),
245             Summary->IndCallTargetDescriptions.end(),
246             [&](const IndCallTargetDescription &A,
247                 const IndCallTargetDescription &B) {
248               return getOutputAddress(*A.Target, A.ToLoc.Offset) <
249                      getOutputAddress(*B.Target, B.ToLoc.Offset);
250             });
251 
252   // Start of the vector with descriptions (one CounterDescription for each
253   // counter), vector size is Counters.size() CounterDescription-sized elmts
254   const size_t IDSize =
255       Summary->IndCallDescriptions.size() * sizeof(IndCallDescription);
256   OS.write(reinterpret_cast<const char *>(&IDSize), 4);
257   for (const IndCallDescription &Desc : Summary->IndCallDescriptions) {
258     OS.write(reinterpret_cast<const char *>(&Desc.FromLoc.FuncString), 4);
259     OS.write(reinterpret_cast<const char *>(&Desc.FromLoc.Offset), 4);
260   }
261 
262   const size_t ITDSize = Summary->IndCallTargetDescriptions.size() *
263                          sizeof(IndCallTargetDescription);
264   OS.write(reinterpret_cast<const char *>(&ITDSize), 4);
265   for (const IndCallTargetDescription &Desc :
266        Summary->IndCallTargetDescriptions) {
267     OS.write(reinterpret_cast<const char *>(&Desc.ToLoc.FuncString), 4);
268     OS.write(reinterpret_cast<const char *>(&Desc.ToLoc.Offset), 4);
269     uint64_t TargetFuncAddress =
270         getOutputAddress(*Desc.Target, Desc.ToLoc.Offset);
271     OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8);
272   }
273 
274   uint32_t FuncDescSize = Summary->getFDSize();
275   OS.write(reinterpret_cast<const char *>(&FuncDescSize), 4);
276   for (const FunctionDescription &Desc : Summary->FunctionDescriptions) {
277     const size_t LeafNum = Desc.LeafNodes.size();
278     OS.write(reinterpret_cast<const char *>(&LeafNum), 4);
279     for (const InstrumentedNode &LeafNode : Desc.LeafNodes) {
280       OS.write(reinterpret_cast<const char *>(&LeafNode.Node), 4);
281       OS.write(reinterpret_cast<const char *>(&LeafNode.Counter), 4);
282     }
283     const size_t EdgesNum = Desc.Edges.size();
284     OS.write(reinterpret_cast<const char *>(&EdgesNum), 4);
285     for (const EdgeDescription &Edge : Desc.Edges) {
286       OS.write(reinterpret_cast<const char *>(&Edge.FromLoc.FuncString), 4);
287       OS.write(reinterpret_cast<const char *>(&Edge.FromLoc.Offset), 4);
288       OS.write(reinterpret_cast<const char *>(&Edge.FromNode), 4);
289       OS.write(reinterpret_cast<const char *>(&Edge.ToLoc.FuncString), 4);
290       OS.write(reinterpret_cast<const char *>(&Edge.ToLoc.Offset), 4);
291       OS.write(reinterpret_cast<const char *>(&Edge.ToNode), 4);
292       OS.write(reinterpret_cast<const char *>(&Edge.Counter), 4);
293     }
294     const size_t CallsNum = Desc.Calls.size();
295     OS.write(reinterpret_cast<const char *>(&CallsNum), 4);
296     for (const CallDescription &Call : Desc.Calls) {
297       OS.write(reinterpret_cast<const char *>(&Call.FromLoc.FuncString), 4);
298       OS.write(reinterpret_cast<const char *>(&Call.FromLoc.Offset), 4);
299       OS.write(reinterpret_cast<const char *>(&Call.FromNode), 4);
300       OS.write(reinterpret_cast<const char *>(&Call.ToLoc.FuncString), 4);
301       OS.write(reinterpret_cast<const char *>(&Call.ToLoc.Offset), 4);
302       OS.write(reinterpret_cast<const char *>(&Call.Counter), 4);
303       uint64_t TargetFuncAddress =
304           getOutputAddress(*Call.Target, Call.ToLoc.Offset);
305       OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8);
306     }
307     const size_t EntryNum = Desc.EntryNodes.size();
308     OS.write(reinterpret_cast<const char *>(&EntryNum), 4);
309     for (const EntryNode &EntryNode : Desc.EntryNodes) {
310       OS.write(reinterpret_cast<const char *>(&EntryNode.Node), 8);
311       uint64_t TargetFuncAddress =
312           getOutputAddress(*Desc.Function, EntryNode.Address);
313       OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8);
314     }
315   }
316   // Our string table lives immediately after descriptions vector
317   OS << Summary->StringTable;
318   OS.flush();
319 
320   return TablesStr;
321 }
322 
323 void InstrumentationRuntimeLibrary::emitTablesAsELFNote(BinaryContext &BC) {
324   std::string TablesStr = buildTables(BC);
325   const std::string BoltInfo = BinarySection::encodeELFNote(
326       "BOLT", TablesStr, BinarySection::NT_BOLT_INSTRUMENTATION_TABLES);
327   BC.registerOrUpdateNoteSection(".bolt.instr.tables", copyByteArray(BoltInfo),
328                                  BoltInfo.size(),
329                                  /*Alignment=*/1,
330                                  /*IsReadOnly=*/true, ELF::SHT_NOTE);
331 }
332