1 //===- bolt/RuntimeLibs/InstrumentationRuntimeLibrary.cpp -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the InstrumentationRuntimeLibrary class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
14 #include "bolt/Core/BinaryFunction.h"
15 #include "bolt/Core/JumpTable.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "llvm/ExecutionEngine/RuntimeDyld.h"
18 #include "llvm/MC/MCStreamer.h"
19 #include "llvm/Support/Alignment.h"
20 #include "llvm/Support/CommandLine.h"
21
22 using namespace llvm;
23 using namespace bolt;
24
25 namespace opts {
26
27 cl::opt<std::string> RuntimeInstrumentationLib(
28 "runtime-instrumentation-lib",
29 cl::desc("specify file name of the runtime instrumentation library"),
30 cl::init("libbolt_rt_instr.a"), cl::cat(BoltOptCategory));
31
32 extern cl::opt<bool> InstrumentationFileAppendPID;
33 extern cl::opt<bool> ConservativeInstrumentation;
34 extern cl::opt<std::string> InstrumentationFilename;
35 extern cl::opt<std::string> InstrumentationBinpath;
36 extern cl::opt<uint32_t> InstrumentationSleepTime;
37 extern cl::opt<bool> InstrumentationNoCountersClear;
38 extern cl::opt<bool> InstrumentationWaitForks;
39 extern cl::opt<JumpTableSupportLevel> JumpTables;
40
41 } // namespace opts
42
adjustCommandLineOptions(const BinaryContext & BC) const43 void InstrumentationRuntimeLibrary::adjustCommandLineOptions(
44 const BinaryContext &BC) const {
45 if (!BC.HasRelocations) {
46 errs() << "BOLT-ERROR: instrumentation runtime libraries require "
47 "relocations\n";
48 exit(1);
49 }
50 if (opts::JumpTables != JTS_MOVE) {
51 opts::JumpTables = JTS_MOVE;
52 outs() << "BOLT-INFO: forcing -jump-tables=move for instrumentation\n";
53 }
54 if (!BC.StartFunctionAddress) {
55 errs() << "BOLT-ERROR: instrumentation runtime libraries require a known "
56 "entry point of "
57 "the input binary\n";
58 exit(1);
59 }
60 if (!BC.FiniFunctionAddress && !BC.IsStaticExecutable) {
61 errs() << "BOLT-ERROR: input binary lacks DT_FINI entry in the dynamic "
62 "section but instrumentation currently relies on patching "
63 "DT_FINI to write the profile\n";
64 exit(1);
65 }
66 }
67
emitBinary(BinaryContext & BC,MCStreamer & Streamer)68 void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
69 MCStreamer &Streamer) {
70 MCSection *Section = BC.isELF()
71 ? static_cast<MCSection *>(BC.Ctx->getELFSection(
72 ".bolt.instr.counters", ELF::SHT_PROGBITS,
73 BinarySection::getFlags(/*IsReadOnly=*/false,
74 /*IsText=*/false,
75 /*IsAllocatable=*/true)
76
77 ))
78 : static_cast<MCSection *>(BC.Ctx->getMachOSection(
79 "__BOLT", "__counters", MachO::S_REGULAR,
80 SectionKind::getData()));
81
82 if (BC.IsStaticExecutable && !opts::InstrumentationSleepTime) {
83 errs() << "BOLT-ERROR: instrumentation of static binary currently does not "
84 "support profile output on binary finalization, so it "
85 "requires -instrumentation-sleep-time=N (N>0) usage\n";
86 exit(1);
87 }
88
89 Section->setAlignment(llvm::Align(BC.RegularPageSize));
90 Streamer.switchSection(Section);
91
92 // EmitOffset is used to determine padding size for data alignment
93 uint64_t EmitOffset = 0;
94
95 auto emitLabel = [&Streamer](MCSymbol *Symbol, bool IsGlobal = true) {
96 Streamer.emitLabel(Symbol);
97 if (IsGlobal)
98 Streamer.emitSymbolAttribute(Symbol, MCSymbolAttr::MCSA_Global);
99 };
100
101 auto emitLabelByName = [&BC, emitLabel](StringRef Name,
102 bool IsGlobal = true) {
103 MCSymbol *Symbol = BC.Ctx->getOrCreateSymbol(Name);
104 emitLabel(Symbol, IsGlobal);
105 };
106
107 auto emitPadding = [&Streamer, &EmitOffset](unsigned Size) {
108 const uint64_t Padding = alignTo(EmitOffset, Size) - EmitOffset;
109 if (Padding) {
110 Streamer.emitFill(Padding, 0);
111 EmitOffset += Padding;
112 }
113 };
114
115 auto emitDataSize = [&EmitOffset](unsigned Size) { EmitOffset += Size; };
116
117 auto emitDataPadding = [emitPadding, emitDataSize](unsigned Size) {
118 emitPadding(Size);
119 emitDataSize(Size);
120 };
121
122 auto emitFill = [&Streamer, emitDataSize,
123 emitLabel](unsigned Size, MCSymbol *Symbol = nullptr,
124 uint8_t Byte = 0) {
125 emitDataSize(Size);
126 if (Symbol)
127 emitLabel(Symbol, /*IsGlobal*/ false);
128 Streamer.emitFill(Size, Byte);
129 };
130
131 auto emitValue = [&BC, &Streamer, emitDataPadding,
132 emitLabel](MCSymbol *Symbol, const MCExpr *Value) {
133 const unsigned Psize = BC.AsmInfo->getCodePointerSize();
134 emitDataPadding(Psize);
135 emitLabel(Symbol);
136 if (Value)
137 Streamer.emitValue(Value, Psize);
138 else
139 Streamer.emitFill(Psize, 0);
140 };
141
142 auto emitIntValue = [&Streamer, emitDataPadding, emitLabelByName](
143 StringRef Name, uint64_t Value, unsigned Size = 4) {
144 emitDataPadding(Size);
145 emitLabelByName(Name);
146 Streamer.emitIntValue(Value, Size);
147 };
148
149 auto emitString = [&Streamer, emitDataSize, emitLabelByName,
150 emitFill](StringRef Name, StringRef Contents) {
151 emitDataSize(Contents.size());
152 emitLabelByName(Name);
153 Streamer.emitBytes(Contents);
154 emitFill(1);
155 };
156
157 // All of the following symbols will be exported as globals to be used by the
158 // instrumentation runtime library to dump the instrumentation data to disk.
159 // Label marking start of the memory region containing instrumentation
160 // counters, total vector size is Counters.size() 8-byte counters
161 emitLabelByName("__bolt_instr_locations");
162 for (MCSymbol *const &Label : Summary->Counters)
163 emitFill(sizeof(uint64_t), Label);
164
165 emitPadding(BC.RegularPageSize);
166 emitIntValue("__bolt_instr_sleep_time", opts::InstrumentationSleepTime);
167 emitIntValue("__bolt_instr_no_counters_clear",
168 !!opts::InstrumentationNoCountersClear, 1);
169 emitIntValue("__bolt_instr_conservative", !!opts::ConservativeInstrumentation,
170 1);
171 emitIntValue("__bolt_instr_wait_forks", !!opts::InstrumentationWaitForks, 1);
172 emitIntValue("__bolt_num_counters", Summary->Counters.size());
173 emitValue(Summary->IndCallCounterFuncPtr, nullptr);
174 emitValue(Summary->IndTailCallCounterFuncPtr, nullptr);
175 emitIntValue("__bolt_instr_num_ind_calls",
176 Summary->IndCallDescriptions.size());
177 emitIntValue("__bolt_instr_num_ind_targets",
178 Summary->IndCallTargetDescriptions.size());
179 emitIntValue("__bolt_instr_num_funcs", Summary->FunctionDescriptions.size());
180 emitString("__bolt_instr_filename", opts::InstrumentationFilename);
181 emitString("__bolt_instr_binpath", opts::InstrumentationBinpath);
182 emitIntValue("__bolt_instr_use_pid", !!opts::InstrumentationFileAppendPID, 1);
183
184 if (BC.isMachO()) {
185 MCSection *TablesSection = BC.Ctx->getMachOSection(
186 "__BOLT", "__tables", MachO::S_REGULAR, SectionKind::getData());
187 TablesSection->setAlignment(llvm::Align(BC.RegularPageSize));
188 Streamer.switchSection(TablesSection);
189 emitString("__bolt_instr_tables", buildTables(BC));
190 }
191 }
192
link(BinaryContext & BC,StringRef ToolPath,RuntimeDyld & RTDyld,std::function<void (RuntimeDyld &)> OnLoad)193 void InstrumentationRuntimeLibrary::link(
194 BinaryContext &BC, StringRef ToolPath, RuntimeDyld &RTDyld,
195 std::function<void(RuntimeDyld &)> OnLoad) {
196 std::string LibPath = getLibPath(ToolPath, opts::RuntimeInstrumentationLib);
197 loadLibrary(LibPath, RTDyld);
198 OnLoad(RTDyld);
199 RTDyld.finalizeWithMemoryManagerLocking();
200 if (RTDyld.hasError()) {
201 outs() << "BOLT-ERROR: RTDyld failed: " << RTDyld.getErrorString() << "\n";
202 exit(1);
203 }
204
205 if (BC.isMachO())
206 return;
207
208 RuntimeFiniAddress = RTDyld.getSymbol("__bolt_instr_fini").getAddress();
209 if (!RuntimeFiniAddress) {
210 errs() << "BOLT-ERROR: instrumentation library does not define "
211 "__bolt_instr_fini: "
212 << LibPath << "\n";
213 exit(1);
214 }
215 RuntimeStartAddress = RTDyld.getSymbol("__bolt_instr_start").getAddress();
216 if (!RuntimeStartAddress) {
217 errs() << "BOLT-ERROR: instrumentation library does not define "
218 "__bolt_instr_start: "
219 << LibPath << "\n";
220 exit(1);
221 }
222 outs() << "BOLT-INFO: output linked against instrumentation runtime "
223 "library, lib entry point is 0x"
224 << Twine::utohexstr(RuntimeFiniAddress) << "\n";
225 outs() << "BOLT-INFO: clear procedure is 0x"
226 << Twine::utohexstr(
227 RTDyld.getSymbol("__bolt_instr_clear_counters").getAddress())
228 << "\n";
229
230 emitTablesAsELFNote(BC);
231 }
232
buildTables(BinaryContext & BC)233 std::string InstrumentationRuntimeLibrary::buildTables(BinaryContext &BC) {
234 std::string TablesStr;
235 raw_string_ostream OS(TablesStr);
236
237 // This is sync'ed with runtime/instr.cpp:readDescriptions()
238 auto getOutputAddress = [](const BinaryFunction &Func,
239 uint64_t Offset) -> uint64_t {
240 return Offset == 0
241 ? Func.getOutputAddress()
242 : Func.translateInputToOutputAddress(Func.getAddress() + Offset);
243 };
244
245 // Indirect targets need to be sorted for fast lookup during runtime
246 llvm::sort(Summary->IndCallTargetDescriptions,
247 [&](const IndCallTargetDescription &A,
248 const IndCallTargetDescription &B) {
249 return getOutputAddress(*A.Target, A.ToLoc.Offset) <
250 getOutputAddress(*B.Target, B.ToLoc.Offset);
251 });
252
253 // Start of the vector with descriptions (one CounterDescription for each
254 // counter), vector size is Counters.size() CounterDescription-sized elmts
255 const size_t IDSize =
256 Summary->IndCallDescriptions.size() * sizeof(IndCallDescription);
257 OS.write(reinterpret_cast<const char *>(&IDSize), 4);
258 for (const IndCallDescription &Desc : Summary->IndCallDescriptions) {
259 OS.write(reinterpret_cast<const char *>(&Desc.FromLoc.FuncString), 4);
260 OS.write(reinterpret_cast<const char *>(&Desc.FromLoc.Offset), 4);
261 }
262
263 const size_t ITDSize = Summary->IndCallTargetDescriptions.size() *
264 sizeof(IndCallTargetDescription);
265 OS.write(reinterpret_cast<const char *>(&ITDSize), 4);
266 for (const IndCallTargetDescription &Desc :
267 Summary->IndCallTargetDescriptions) {
268 OS.write(reinterpret_cast<const char *>(&Desc.ToLoc.FuncString), 4);
269 OS.write(reinterpret_cast<const char *>(&Desc.ToLoc.Offset), 4);
270 uint64_t TargetFuncAddress =
271 getOutputAddress(*Desc.Target, Desc.ToLoc.Offset);
272 OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8);
273 }
274
275 uint32_t FuncDescSize = Summary->getFDSize();
276 OS.write(reinterpret_cast<const char *>(&FuncDescSize), 4);
277 for (const FunctionDescription &Desc : Summary->FunctionDescriptions) {
278 const size_t LeafNum = Desc.LeafNodes.size();
279 OS.write(reinterpret_cast<const char *>(&LeafNum), 4);
280 for (const InstrumentedNode &LeafNode : Desc.LeafNodes) {
281 OS.write(reinterpret_cast<const char *>(&LeafNode.Node), 4);
282 OS.write(reinterpret_cast<const char *>(&LeafNode.Counter), 4);
283 }
284 const size_t EdgesNum = Desc.Edges.size();
285 OS.write(reinterpret_cast<const char *>(&EdgesNum), 4);
286 for (const EdgeDescription &Edge : Desc.Edges) {
287 OS.write(reinterpret_cast<const char *>(&Edge.FromLoc.FuncString), 4);
288 OS.write(reinterpret_cast<const char *>(&Edge.FromLoc.Offset), 4);
289 OS.write(reinterpret_cast<const char *>(&Edge.FromNode), 4);
290 OS.write(reinterpret_cast<const char *>(&Edge.ToLoc.FuncString), 4);
291 OS.write(reinterpret_cast<const char *>(&Edge.ToLoc.Offset), 4);
292 OS.write(reinterpret_cast<const char *>(&Edge.ToNode), 4);
293 OS.write(reinterpret_cast<const char *>(&Edge.Counter), 4);
294 }
295 const size_t CallsNum = Desc.Calls.size();
296 OS.write(reinterpret_cast<const char *>(&CallsNum), 4);
297 for (const CallDescription &Call : Desc.Calls) {
298 OS.write(reinterpret_cast<const char *>(&Call.FromLoc.FuncString), 4);
299 OS.write(reinterpret_cast<const char *>(&Call.FromLoc.Offset), 4);
300 OS.write(reinterpret_cast<const char *>(&Call.FromNode), 4);
301 OS.write(reinterpret_cast<const char *>(&Call.ToLoc.FuncString), 4);
302 OS.write(reinterpret_cast<const char *>(&Call.ToLoc.Offset), 4);
303 OS.write(reinterpret_cast<const char *>(&Call.Counter), 4);
304 uint64_t TargetFuncAddress =
305 getOutputAddress(*Call.Target, Call.ToLoc.Offset);
306 OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8);
307 }
308 const size_t EntryNum = Desc.EntryNodes.size();
309 OS.write(reinterpret_cast<const char *>(&EntryNum), 4);
310 for (const EntryNode &EntryNode : Desc.EntryNodes) {
311 OS.write(reinterpret_cast<const char *>(&EntryNode.Node), 8);
312 uint64_t TargetFuncAddress =
313 getOutputAddress(*Desc.Function, EntryNode.Address);
314 OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8);
315 }
316 }
317 // Our string table lives immediately after descriptions vector
318 OS << Summary->StringTable;
319 OS.flush();
320
321 return TablesStr;
322 }
323
emitTablesAsELFNote(BinaryContext & BC)324 void InstrumentationRuntimeLibrary::emitTablesAsELFNote(BinaryContext &BC) {
325 std::string TablesStr = buildTables(BC);
326 const std::string BoltInfo = BinarySection::encodeELFNote(
327 "BOLT", TablesStr, BinarySection::NT_BOLT_INSTRUMENTATION_TABLES);
328 BC.registerOrUpdateNoteSection(".bolt.instr.tables", copyByteArray(BoltInfo),
329 BoltInfo.size(),
330 /*Alignment=*/1,
331 /*IsReadOnly=*/true, ELF::SHT_NOTE);
332 }
333