1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/NameResolver.h"
18 #include "bolt/Utils/Utils.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/MC/MCAsmLayout.h"
24 #include "llvm/MC/MCAssembler.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
27 #include "llvm/MC/MCInstPrinter.h"
28 #include "llvm/MC/MCObjectStreamer.h"
29 #include "llvm/MC/MCObjectWriter.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSectionELF.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/Regex.h"
38 #include <algorithm>
39 #include <functional>
40 #include <iterator>
41 #include <unordered_set>
42
43 using namespace llvm;
44
45 #undef DEBUG_TYPE
46 #define DEBUG_TYPE "bolt"
47
48 namespace opts {
49
50 cl::opt<bool> NoHugePages("no-huge-pages",
51 cl::desc("use regular size pages for code alignment"),
52 cl::Hidden, cl::cat(BoltCategory));
53
54 static cl::opt<bool>
55 PrintDebugInfo("print-debug-info",
56 cl::desc("print debug info when printing functions"),
57 cl::Hidden,
58 cl::ZeroOrMore,
59 cl::cat(BoltCategory));
60
61 cl::opt<bool> PrintRelocations(
62 "print-relocations",
63 cl::desc("print relocations when printing functions/objects"), cl::Hidden,
64 cl::cat(BoltCategory));
65
66 static cl::opt<bool>
67 PrintMemData("print-mem-data",
68 cl::desc("print memory data annotations when printing functions"),
69 cl::Hidden,
70 cl::ZeroOrMore,
71 cl::cat(BoltCategory));
72
73 } // namespace opts
74
75 namespace llvm {
76 namespace bolt {
77
BinaryContext(std::unique_ptr<MCContext> Ctx,std::unique_ptr<DWARFContext> DwCtx,std::unique_ptr<Triple> TheTriple,const Target * TheTarget,std::string TripleName,std::unique_ptr<MCCodeEmitter> MCE,std::unique_ptr<MCObjectFileInfo> MOFI,std::unique_ptr<const MCAsmInfo> AsmInfo,std::unique_ptr<const MCInstrInfo> MII,std::unique_ptr<const MCSubtargetInfo> STI,std::unique_ptr<MCInstPrinter> InstPrinter,std::unique_ptr<const MCInstrAnalysis> MIA,std::unique_ptr<MCPlusBuilder> MIB,std::unique_ptr<const MCRegisterInfo> MRI,std::unique_ptr<MCDisassembler> DisAsm)78 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
79 std::unique_ptr<DWARFContext> DwCtx,
80 std::unique_ptr<Triple> TheTriple,
81 const Target *TheTarget, std::string TripleName,
82 std::unique_ptr<MCCodeEmitter> MCE,
83 std::unique_ptr<MCObjectFileInfo> MOFI,
84 std::unique_ptr<const MCAsmInfo> AsmInfo,
85 std::unique_ptr<const MCInstrInfo> MII,
86 std::unique_ptr<const MCSubtargetInfo> STI,
87 std::unique_ptr<MCInstPrinter> InstPrinter,
88 std::unique_ptr<const MCInstrAnalysis> MIA,
89 std::unique_ptr<MCPlusBuilder> MIB,
90 std::unique_ptr<const MCRegisterInfo> MRI,
91 std::unique_ptr<MCDisassembler> DisAsm)
92 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
93 TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
94 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
95 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
96 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
97 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) {
98 Relocation::Arch = this->TheTriple->getArch();
99 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
100 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
101 }
102
~BinaryContext()103 BinaryContext::~BinaryContext() {
104 for (BinarySection *Section : Sections)
105 delete Section;
106 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
107 delete InjectedFunction;
108 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
109 delete JTI.second;
110 clearBinaryData();
111 }
112
113 /// Create BinaryContext for a given architecture \p ArchName and
114 /// triple \p TripleName.
115 Expected<std::unique_ptr<BinaryContext>>
createBinaryContext(const ObjectFile * File,bool IsPIC,std::unique_ptr<DWARFContext> DwCtx)116 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
117 std::unique_ptr<DWARFContext> DwCtx) {
118 StringRef ArchName = "";
119 StringRef FeaturesStr = "";
120 switch (File->getArch()) {
121 case llvm::Triple::x86_64:
122 ArchName = "x86-64";
123 FeaturesStr = "+nopl";
124 break;
125 case llvm::Triple::aarch64:
126 ArchName = "aarch64";
127 FeaturesStr = "+all";
128 break;
129 default:
130 return createStringError(std::errc::not_supported,
131 "BOLT-ERROR: Unrecognized machine in ELF file");
132 }
133
134 auto TheTriple = std::make_unique<Triple>(File->makeTriple());
135 const std::string TripleName = TheTriple->str();
136
137 std::string Error;
138 const Target *TheTarget =
139 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
140 if (!TheTarget)
141 return createStringError(make_error_code(std::errc::not_supported),
142 Twine("BOLT-ERROR: ", Error));
143
144 std::unique_ptr<const MCRegisterInfo> MRI(
145 TheTarget->createMCRegInfo(TripleName));
146 if (!MRI)
147 return createStringError(
148 make_error_code(std::errc::not_supported),
149 Twine("BOLT-ERROR: no register info for target ", TripleName));
150
151 // Set up disassembler.
152 std::unique_ptr<MCAsmInfo> AsmInfo(
153 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
154 if (!AsmInfo)
155 return createStringError(
156 make_error_code(std::errc::not_supported),
157 Twine("BOLT-ERROR: no assembly info for target ", TripleName));
158 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
159 // we want to emit such names as using @PLT without double quotes to convey
160 // variant kind to the assembler. BOLT doesn't rely on the linker so we can
161 // override the default AsmInfo behavior to emit names the way we want.
162 AsmInfo->setAllowAtInName(true);
163
164 std::unique_ptr<const MCSubtargetInfo> STI(
165 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
166 if (!STI)
167 return createStringError(
168 make_error_code(std::errc::not_supported),
169 Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
170
171 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
172 if (!MII)
173 return createStringError(
174 make_error_code(std::errc::not_supported),
175 Twine("BOLT-ERROR: no instruction info for target ", TripleName));
176
177 std::unique_ptr<MCContext> Ctx(
178 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
179 std::unique_ptr<MCObjectFileInfo> MOFI(
180 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
181 Ctx->setObjectFileInfo(MOFI.get());
182 // We do not support X86 Large code model. Change this in the future.
183 bool Large = false;
184 if (TheTriple->getArch() == llvm::Triple::aarch64)
185 Large = true;
186 unsigned LSDAEncoding =
187 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
188 unsigned TTypeEncoding =
189 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
190 if (IsPIC) {
191 LSDAEncoding = dwarf::DW_EH_PE_pcrel |
192 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
193 TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
194 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
195 }
196
197 std::unique_ptr<MCDisassembler> DisAsm(
198 TheTarget->createMCDisassembler(*STI, *Ctx));
199
200 if (!DisAsm)
201 return createStringError(
202 make_error_code(std::errc::not_supported),
203 Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
204
205 std::unique_ptr<const MCInstrAnalysis> MIA(
206 TheTarget->createMCInstrAnalysis(MII.get()));
207 if (!MIA)
208 return createStringError(
209 make_error_code(std::errc::not_supported),
210 Twine("BOLT-ERROR: failed to create instruction analysis for target ",
211 TripleName));
212
213 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
214 std::unique_ptr<MCInstPrinter> InstructionPrinter(
215 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
216 *MII, *MRI));
217 if (!InstructionPrinter)
218 return createStringError(
219 make_error_code(std::errc::not_supported),
220 Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
221 InstructionPrinter->setPrintImmHex(true);
222
223 std::unique_ptr<MCCodeEmitter> MCE(
224 TheTarget->createMCCodeEmitter(*MII, *Ctx));
225
226 // Make sure we don't miss any output on core dumps.
227 outs().SetUnbuffered();
228 errs().SetUnbuffered();
229 dbgs().SetUnbuffered();
230
231 auto BC = std::make_unique<BinaryContext>(
232 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
233 std::string(TripleName), std::move(MCE), std::move(MOFI),
234 std::move(AsmInfo), std::move(MII), std::move(STI),
235 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
236 std::move(DisAsm));
237
238 BC->TTypeEncoding = TTypeEncoding;
239 BC->LSDAEncoding = LSDAEncoding;
240
241 BC->MAB = std::unique_ptr<MCAsmBackend>(
242 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
243
244 BC->setFilename(File->getFileName());
245
246 BC->HasFixedLoadAddress = !IsPIC;
247
248 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
249 BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
250
251 if (!BC->SymbolicDisAsm)
252 return createStringError(
253 make_error_code(std::errc::not_supported),
254 Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
255
256 return std::move(BC);
257 }
258
forceSymbolRelocations(StringRef SymbolName) const259 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
260 if (opts::HotText &&
261 (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
262 return true;
263
264 if (opts::HotData &&
265 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
266 return true;
267
268 if (SymbolName == "_end")
269 return true;
270
271 return false;
272 }
273
274 std::unique_ptr<MCObjectWriter>
createObjectWriter(raw_pwrite_stream & OS)275 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
276 return MAB->createObjectWriter(OS);
277 }
278
validateObjectNesting() const279 bool BinaryContext::validateObjectNesting() const {
280 auto Itr = BinaryDataMap.begin();
281 auto End = BinaryDataMap.end();
282 bool Valid = true;
283 while (Itr != End) {
284 auto Next = std::next(Itr);
285 while (Next != End &&
286 Itr->second->getSection() == Next->second->getSection() &&
287 Itr->second->containsRange(Next->second->getAddress(),
288 Next->second->getSize())) {
289 if (Next->second->Parent != Itr->second) {
290 errs() << "BOLT-WARNING: object nesting incorrect for:\n"
291 << "BOLT-WARNING: " << *Itr->second << "\n"
292 << "BOLT-WARNING: " << *Next->second << "\n";
293 Valid = false;
294 }
295 ++Next;
296 }
297 Itr = Next;
298 }
299 return Valid;
300 }
301
validateHoles() const302 bool BinaryContext::validateHoles() const {
303 bool Valid = true;
304 for (BinarySection &Section : sections()) {
305 for (const Relocation &Rel : Section.relocations()) {
306 uint64_t RelAddr = Rel.Offset + Section.getAddress();
307 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
308 if (!BD) {
309 errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
310 << " 0x" << Twine::utohexstr(RelAddr) << " in "
311 << Section.getName() << "\n";
312 Valid = false;
313 } else if (!BD->getAtomicRoot()) {
314 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
315 << "address 0x" << Twine::utohexstr(RelAddr) << " in "
316 << Section.getName() << "\n";
317 Valid = false;
318 }
319 }
320 }
321 return Valid;
322 }
323
updateObjectNesting(BinaryDataMapType::iterator GAI)324 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
325 const uint64_t Address = GAI->second->getAddress();
326 const uint64_t Size = GAI->second->getSize();
327
328 auto fixParents = [&](BinaryDataMapType::iterator Itr,
329 BinaryData *NewParent) {
330 BinaryData *OldParent = Itr->second->Parent;
331 Itr->second->Parent = NewParent;
332 ++Itr;
333 while (Itr != BinaryDataMap.end() && OldParent &&
334 Itr->second->Parent == OldParent) {
335 Itr->second->Parent = NewParent;
336 ++Itr;
337 }
338 };
339
340 // Check if the previous symbol contains the newly added symbol.
341 if (GAI != BinaryDataMap.begin()) {
342 BinaryData *Prev = std::prev(GAI)->second;
343 while (Prev) {
344 if (Prev->getSection() == GAI->second->getSection() &&
345 Prev->containsRange(Address, Size)) {
346 fixParents(GAI, Prev);
347 } else {
348 fixParents(GAI, nullptr);
349 }
350 Prev = Prev->Parent;
351 }
352 }
353
354 // Check if the newly added symbol contains any subsequent symbols.
355 if (Size != 0) {
356 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
357 auto Itr = std::next(GAI);
358 while (
359 Itr != BinaryDataMap.end() &&
360 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
361 Itr->second->Parent = BD;
362 ++Itr;
363 }
364 }
365 }
366
367 iterator_range<BinaryContext::binary_data_iterator>
getSubBinaryData(BinaryData * BD)368 BinaryContext::getSubBinaryData(BinaryData *BD) {
369 auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
370 auto End = Start;
371 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
372 ++End;
373 return make_range(Start, End);
374 }
375
376 std::pair<const MCSymbol *, uint64_t>
handleAddressRef(uint64_t Address,BinaryFunction & BF,bool IsPCRel)377 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
378 bool IsPCRel) {
379 uint64_t Addend = 0;
380
381 if (isAArch64()) {
382 // Check if this is an access to a constant island and create bookkeeping
383 // to keep track of it and emit it later as part of this function.
384 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
385 return std::make_pair(IslandSym, Addend);
386
387 // Detect custom code written in assembly that refers to arbitrary
388 // constant islands from other functions. Write this reference so we
389 // can pull this constant island and emit it as part of this function
390 // too.
391 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
392 if (IslandIter != AddressToConstantIslandMap.end()) {
393 if (MCSymbol *IslandSym =
394 IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) {
395 BF.createIslandDependency(IslandSym, IslandIter->second);
396 return std::make_pair(IslandSym, Addend);
397 }
398 }
399 }
400
401 // Note that the address does not necessarily have to reside inside
402 // a section, it could be an absolute address too.
403 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
404 if (Section && Section->isText()) {
405 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
406 if (Address != BF.getAddress()) {
407 // The address could potentially escape. Mark it as another entry
408 // point into the function.
409 if (opts::Verbosity >= 1) {
410 outs() << "BOLT-INFO: potentially escaped address 0x"
411 << Twine::utohexstr(Address) << " in function " << BF << '\n';
412 }
413 BF.HasInternalLabelReference = true;
414 return std::make_pair(
415 BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend);
416 }
417 } else {
418 addInterproceduralReference(&BF, Address);
419 }
420 }
421
422 // With relocations, catch jump table references outside of the basic block
423 // containing the indirect jump.
424 if (HasRelocations) {
425 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
426 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
427 const MCSymbol *Symbol =
428 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
429
430 return std::make_pair(Symbol, Addend);
431 }
432 }
433
434 if (BinaryData *BD = getBinaryDataContainingAddress(Address))
435 return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
436
437 // TODO: use DWARF info to get size/alignment here?
438 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
439 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
440 return std::make_pair(TargetSymbol, Addend);
441 }
442
analyzeMemoryAt(uint64_t Address,BinaryFunction & BF)443 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
444 BinaryFunction &BF) {
445 if (!isX86())
446 return MemoryContentsType::UNKNOWN;
447
448 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
449 if (!Section) {
450 // No section - possibly an absolute address. Since we don't allow
451 // internal function addresses to escape the function scope - we
452 // consider it a tail call.
453 if (opts::Verbosity > 1) {
454 errs() << "BOLT-WARNING: no section for address 0x"
455 << Twine::utohexstr(Address) << " referenced from function " << BF
456 << '\n';
457 }
458 return MemoryContentsType::UNKNOWN;
459 }
460
461 if (Section->isVirtual()) {
462 // The contents are filled at runtime.
463 return MemoryContentsType::UNKNOWN;
464 }
465
466 // No support for jump tables in code yet.
467 if (Section->isText())
468 return MemoryContentsType::UNKNOWN;
469
470 // Start with checking for PIC jump table. We expect non-PIC jump tables
471 // to have high 32 bits set to 0.
472 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
473 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
474
475 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
476 return MemoryContentsType::POSSIBLE_JUMP_TABLE;
477
478 return MemoryContentsType::UNKNOWN;
479 }
480
481 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)?
isPotentialFragmentByName(BinaryFunction & Fragment,BinaryFunction & Parent)482 bool isPotentialFragmentByName(BinaryFunction &Fragment,
483 BinaryFunction &Parent) {
484 for (StringRef Name : Parent.getNames()) {
485 std::string NamePrefix = Regex::escape(NameResolver::restore(Name));
486 std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str();
487 if (Fragment.hasRestoredNameRegex(NameRegex))
488 return true;
489 }
490 return false;
491 }
492
analyzeJumpTable(const uint64_t Address,const JumpTable::JumpTableType Type,BinaryFunction & BF,const uint64_t NextJTAddress,JumpTable::AddressesType * EntriesAsAddress)493 bool BinaryContext::analyzeJumpTable(
494 const uint64_t Address, const JumpTable::JumpTableType Type,
495 BinaryFunction &BF, const uint64_t NextJTAddress,
496 JumpTable::AddressesType *EntriesAsAddress) {
497 // Is one of the targets __builtin_unreachable?
498 bool HasUnreachable = false;
499
500 // Number of targets other than __builtin_unreachable.
501 uint64_t NumRealEntries = 0;
502
503 auto addEntryAddress = [&](uint64_t EntryAddress) {
504 if (EntriesAsAddress)
505 EntriesAsAddress->emplace_back(EntryAddress);
506 };
507
508 auto doesBelongToFunction = [&](const uint64_t Addr,
509 BinaryFunction *TargetBF) -> bool {
510 if (BF.containsAddress(Addr))
511 return true;
512 // Nothing to do if we failed to identify the containing function.
513 if (!TargetBF)
514 return false;
515 // Case 1: check if BF is a fragment and TargetBF is its parent.
516 if (BF.isFragment()) {
517 // Parent function may or may not be already registered.
518 // Set parent link based on function name matching heuristic.
519 return registerFragment(BF, *TargetBF);
520 }
521 // Case 2: check if TargetBF is a fragment and BF is its parent.
522 return TargetBF->isFragment() && registerFragment(*TargetBF, BF);
523 };
524
525 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
526 if (!Section)
527 return false;
528
529 // The upper bound is defined by containing object, section limits, and
530 // the next jump table in memory.
531 uint64_t UpperBound = Section->getEndAddress();
532 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
533 if (JumpTableBD && JumpTableBD->getSize()) {
534 assert(JumpTableBD->getEndAddress() <= UpperBound &&
535 "data object cannot cross a section boundary");
536 UpperBound = JumpTableBD->getEndAddress();
537 }
538 if (NextJTAddress)
539 UpperBound = std::min(NextJTAddress, UpperBound);
540
541 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName()
542 << '\n');
543 const uint64_t EntrySize = getJumpTableEntrySize(Type);
544 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
545 EntryAddress += EntrySize) {
546 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress)
547 << " -> ");
548 // Check if there's a proper relocation against the jump table entry.
549 if (HasRelocations) {
550 if (Type == JumpTable::JTT_PIC &&
551 !DataPCRelocations.count(EntryAddress)) {
552 LLVM_DEBUG(
553 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
554 break;
555 }
556 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
557 LLVM_DEBUG(
558 dbgs()
559 << "FAIL: JTT_NORMAL table, no relocation for this address\n");
560 break;
561 }
562 }
563
564 const uint64_t Value =
565 (Type == JumpTable::JTT_PIC)
566 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
567 : *getPointerAtAddress(EntryAddress);
568
569 // __builtin_unreachable() case.
570 if (Value == BF.getAddress() + BF.getSize()) {
571 addEntryAddress(Value);
572 HasUnreachable = true;
573 LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n");
574 continue;
575 }
576
577 // Function or one of its fragments.
578 BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
579
580 // We assume that a jump table cannot have function start as an entry.
581 if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) {
582 LLVM_DEBUG({
583 if (!BF.containsAddress(Value)) {
584 dbgs() << "FAIL: function doesn't contain this address\n";
585 if (TargetBF) {
586 dbgs() << " ! function containing this address: "
587 << TargetBF->getPrintName() << '\n';
588 if (TargetBF->isFragment())
589 dbgs() << " ! is a fragment\n";
590 for (BinaryFunction *TargetParent : TargetBF->ParentFragments)
591 dbgs() << " ! its parent is "
592 << (TargetParent ? TargetParent->getPrintName() : "(none)")
593 << '\n';
594 }
595 }
596 if (Value == BF.getAddress())
597 dbgs() << "FAIL: jump table cannot have function start as an entry\n";
598 });
599 break;
600 }
601
602 // Check there's an instruction at this offset.
603 if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
604 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
605 LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n");
606 break;
607 }
608
609 ++NumRealEntries;
610
611 if (TargetBF != &BF)
612 BF.setHasIndirectTargetToSplitFragment(true);
613 addEntryAddress(Value);
614 }
615
616 // It's a jump table if the number of real entries is more than 1, or there's
617 // one real entry and "unreachable" targets. If there are only multiple
618 // "unreachable" targets, then it's not a jump table.
619 return NumRealEntries + HasUnreachable >= 2;
620 }
621
populateJumpTables()622 void BinaryContext::populateJumpTables() {
623 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
624 << '\n');
625 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
626 ++JTI) {
627 JumpTable *JT = JTI->second;
628
629 bool NonSimpleParent = false;
630 for (BinaryFunction *BF : JT->Parents)
631 NonSimpleParent |= !BF->isSimple();
632 if (NonSimpleParent)
633 continue;
634
635 uint64_t NextJTAddress = 0;
636 auto NextJTI = std::next(JTI);
637 if (NextJTI != JTE)
638 NextJTAddress = NextJTI->second->getAddress();
639
640 const bool Success =
641 analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
642 NextJTAddress, &JT->EntriesAsAddress);
643 if (!Success) {
644 LLVM_DEBUG(ListSeparator LS;
645 dbgs() << "failed to analyze jump table in function ";
646 for (BinaryFunction *Frag
647 : JT->Parents) dbgs()
648 << LS << *Frag;
649 dbgs() << '\n';);
650 JT->print(dbgs());
651 if (NextJTI != JTE) {
652 LLVM_DEBUG(ListSeparator LS;
653 dbgs() << "next jump table at 0x"
654 << Twine::utohexstr(NextJTI->second->getAddress())
655 << " belongs to function ";
656 for (BinaryFunction *Frag
657 : NextJTI->second->Parents) dbgs()
658 << LS << *Frag;
659 dbgs() << "\n";);
660 NextJTI->second->print(dbgs());
661 }
662 llvm_unreachable("jump table heuristic failure");
663 }
664 for (BinaryFunction *Frag : JT->Parents) {
665 for (uint64_t EntryAddress : JT->EntriesAsAddress)
666 // if target is builtin_unreachable
667 if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
668 Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(),
669 Frag->getSize());
670 } else if (EntryAddress >= Frag->getAddress() &&
671 EntryAddress < Frag->getAddress() + Frag->getSize()) {
672 Frag->registerReferencedOffset(EntryAddress - Frag->getAddress());
673 }
674 }
675
676 // In strict mode, erase PC-relative relocation record. Later we check that
677 // all such records are erased and thus have been accounted for.
678 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
679 for (uint64_t Address = JT->getAddress();
680 Address < JT->getAddress() + JT->getSize();
681 Address += JT->EntrySize) {
682 DataPCRelocations.erase(DataPCRelocations.find(Address));
683 }
684 }
685
686 // Mark to skip the function and all its fragments.
687 for (BinaryFunction *Frag : JT->Parents)
688 if (Frag->hasIndirectTargetToSplitFragment())
689 addFragmentsToSkip(Frag);
690 }
691
692 if (opts::StrictMode && DataPCRelocations.size()) {
693 LLVM_DEBUG({
694 dbgs() << DataPCRelocations.size()
695 << " unclaimed PC-relative relocations left in data:\n";
696 for (uint64_t Reloc : DataPCRelocations)
697 dbgs() << Twine::utohexstr(Reloc) << '\n';
698 });
699 assert(0 && "unclaimed PC-relative relocations left in data\n");
700 }
701 clearList(DataPCRelocations);
702 }
703
skipMarkedFragments()704 void BinaryContext::skipMarkedFragments() {
705 std::vector<BinaryFunction *> FragmentQueue;
706 // Copy the functions to FragmentQueue.
707 FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end());
708 auto addToWorklist = [&](BinaryFunction *Function) -> void {
709 if (FragmentsToSkip.count(Function))
710 return;
711 FragmentQueue.push_back(Function);
712 addFragmentsToSkip(Function);
713 };
714 // Functions containing split jump tables need to be skipped with all
715 // fragments (transitively).
716 for (size_t I = 0; I != FragmentQueue.size(); I++) {
717 BinaryFunction *BF = FragmentQueue[I];
718 assert(FragmentsToSkip.count(BF) &&
719 "internal error in traversing function fragments");
720 if (opts::Verbosity >= 1)
721 errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
722 BF->setSimple(false);
723 BF->setHasIndirectTargetToSplitFragment(true);
724
725 llvm::for_each(BF->Fragments, addToWorklist);
726 llvm::for_each(BF->ParentFragments, addToWorklist);
727 }
728 if (!FragmentsToSkip.empty())
729 errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function"
730 << (FragmentsToSkip.size() == 1 ? "" : "s")
731 << " due to cold fragments\n";
732 }
733
getOrCreateGlobalSymbol(uint64_t Address,Twine Prefix,uint64_t Size,uint16_t Alignment,unsigned Flags)734 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
735 uint64_t Size,
736 uint16_t Alignment,
737 unsigned Flags) {
738 auto Itr = BinaryDataMap.find(Address);
739 if (Itr != BinaryDataMap.end()) {
740 assert(Itr->second->getSize() == Size || !Size);
741 return Itr->second->getSymbol();
742 }
743
744 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
745 assert(!GlobalSymbols.count(Name) && "created name is not unique");
746 return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
747 }
748
getOrCreateUndefinedGlobalSymbol(StringRef Name)749 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
750 return Ctx->getOrCreateSymbol(Name);
751 }
752
createBinaryFunction(const std::string & Name,BinarySection & Section,uint64_t Address,uint64_t Size,uint64_t SymbolSize,uint16_t Alignment)753 BinaryFunction *BinaryContext::createBinaryFunction(
754 const std::string &Name, BinarySection &Section, uint64_t Address,
755 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
756 auto Result = BinaryFunctions.emplace(
757 Address, BinaryFunction(Name, Section, Address, Size, *this));
758 assert(Result.second == true && "unexpected duplicate function");
759 BinaryFunction *BF = &Result.first->second;
760 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
761 Alignment);
762 setSymbolToFunctionMap(BF->getSymbol(), BF);
763 return BF;
764 }
765
766 const MCSymbol *
getOrCreateJumpTable(BinaryFunction & Function,uint64_t Address,JumpTable::JumpTableType Type)767 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
768 JumpTable::JumpTableType Type) {
769 auto isFragmentOf = [](BinaryFunction *Fragment, BinaryFunction *Parent) {
770 return (Fragment->isFragment() && Fragment->isParentFragment(Parent));
771 };
772
773 // Two fragments of same function access same jump table
774 if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
775 assert(JT->Type == Type && "jump table types have to match");
776 assert(Address == JT->getAddress() && "unexpected non-empty jump table");
777
778 // Prevent associating a jump table to a specific fragment twice.
779 // This simple check arises from the assumption: no more than 2 fragments.
780 if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) {
781 bool SameFunction = isFragmentOf(JT->Parents[0], &Function) ||
782 isFragmentOf(&Function, JT->Parents[0]);
783 assert(SameFunction &&
784 "cannot re-use jump table of a different function");
785 // Duplicate the entry for the parent function for easy access
786 JT->Parents.push_back(&Function);
787 if (opts::Verbosity > 2) {
788 outs() << "BOLT-INFO: Multiple fragments access same jump table: "
789 << JT->Parents[0]->getPrintName() << "; "
790 << Function.getPrintName() << "\n";
791 JT->print(outs());
792 }
793 Function.JumpTables.emplace(Address, JT);
794 JT->Parents[0]->setHasIndirectTargetToSplitFragment(true);
795 JT->Parents[1]->setHasIndirectTargetToSplitFragment(true);
796 }
797
798 bool IsJumpTableParent = false;
799 for (BinaryFunction *Frag : JT->Parents)
800 if (Frag == &Function)
801 IsJumpTableParent = true;
802 assert(IsJumpTableParent &&
803 "cannot re-use jump table of a different function");
804 return JT->getFirstLabel();
805 }
806
807 // Re-use the existing symbol if possible.
808 MCSymbol *JTLabel = nullptr;
809 if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
810 if (!isInternalSymbolName(Object->getSymbol()->getName()))
811 JTLabel = Object->getSymbol();
812 }
813
814 const uint64_t EntrySize = getJumpTableEntrySize(Type);
815 if (!JTLabel) {
816 const std::string JumpTableName = generateJumpTableName(Function, Address);
817 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
818 }
819
820 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
821 << " in function " << Function << '\n');
822
823 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
824 JumpTable::LabelMapType{{0, JTLabel}},
825 *getSectionForAddress(Address));
826 JT->Parents.push_back(&Function);
827 if (opts::Verbosity > 2)
828 JT->print(outs());
829 JumpTables.emplace(Address, JT);
830
831 // Duplicate the entry for the parent function for easy access.
832 Function.JumpTables.emplace(Address, JT);
833 return JTLabel;
834 }
835
836 std::pair<uint64_t, const MCSymbol *>
duplicateJumpTable(BinaryFunction & Function,JumpTable * JT,const MCSymbol * OldLabel)837 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
838 const MCSymbol *OldLabel) {
839 auto L = scopeLock();
840 unsigned Offset = 0;
841 bool Found = false;
842 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
843 if (Elmt.second != OldLabel)
844 continue;
845 Offset = Elmt.first;
846 Found = true;
847 break;
848 }
849 assert(Found && "Label not found");
850 (void)Found;
851 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
852 JumpTable *NewJT =
853 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
854 JumpTable::LabelMapType{{Offset, NewLabel}},
855 *getSectionForAddress(JT->getAddress()));
856 NewJT->Parents = JT->Parents;
857 NewJT->Entries = JT->Entries;
858 NewJT->Counts = JT->Counts;
859 uint64_t JumpTableID = ++DuplicatedJumpTables;
860 // Invert it to differentiate from regular jump tables whose IDs are their
861 // addresses in the input binary memory space
862 JumpTableID = ~JumpTableID;
863 JumpTables.emplace(JumpTableID, NewJT);
864 Function.JumpTables.emplace(JumpTableID, NewJT);
865 return std::make_pair(JumpTableID, NewLabel);
866 }
867
generateJumpTableName(const BinaryFunction & BF,uint64_t Address)868 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
869 uint64_t Address) {
870 size_t Id;
871 uint64_t Offset = 0;
872 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
873 Offset = Address - JT->getAddress();
874 auto Itr = JT->Labels.find(Offset);
875 if (Itr != JT->Labels.end())
876 return std::string(Itr->second->getName());
877 Id = JumpTableIds.at(JT->getAddress());
878 } else {
879 Id = JumpTableIds[Address] = BF.JumpTables.size();
880 }
881 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
882 (Offset ? ("." + std::to_string(Offset)) : ""));
883 }
884
hasValidCodePadding(const BinaryFunction & BF)885 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
886 // FIXME: aarch64 support is missing.
887 if (!isX86())
888 return true;
889
890 if (BF.getSize() == BF.getMaxSize())
891 return true;
892
893 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
894 assert(FunctionData && "cannot get function as data");
895
896 uint64_t Offset = BF.getSize();
897 MCInst Instr;
898 uint64_t InstrSize = 0;
899 uint64_t InstrAddress = BF.getAddress() + Offset;
900 using std::placeholders::_1;
901
902 // Skip instructions that satisfy the predicate condition.
903 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
904 const uint64_t StartOffset = Offset;
905 for (; Offset < BF.getMaxSize();
906 Offset += InstrSize, InstrAddress += InstrSize) {
907 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
908 InstrAddress, nulls()))
909 break;
910 if (!Predicate(Instr))
911 break;
912 }
913
914 return Offset - StartOffset;
915 };
916
917 // Skip a sequence of zero bytes.
918 auto skipZeros = [&]() {
919 const uint64_t StartOffset = Offset;
920 for (; Offset < BF.getMaxSize(); ++Offset)
921 if ((*FunctionData)[Offset] != 0)
922 break;
923
924 return Offset - StartOffset;
925 };
926
927 // Accept the whole padding area filled with breakpoints.
928 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
929 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
930 return true;
931
932 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
933
934 // Some functions have a jump to the next function or to the padding area
935 // inserted after the body.
936 auto isSkipJump = [&](const MCInst &Instr) {
937 uint64_t TargetAddress = 0;
938 if (MIB->isUnconditionalBranch(Instr) &&
939 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
940 if (TargetAddress >= InstrAddress + InstrSize &&
941 TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
942 return true;
943 }
944 }
945 return false;
946 };
947
948 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
949 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
950 skipZeros())
951 ;
952
953 if (Offset == BF.getMaxSize())
954 return true;
955
956 if (opts::Verbosity >= 1) {
957 errs() << "BOLT-WARNING: bad padding at address 0x"
958 << Twine::utohexstr(BF.getAddress() + BF.getSize())
959 << " starting at offset " << (Offset - BF.getSize())
960 << " in function " << BF << '\n'
961 << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
962 << '\n';
963 }
964
965 return false;
966 }
967
adjustCodePadding()968 void BinaryContext::adjustCodePadding() {
969 for (auto &BFI : BinaryFunctions) {
970 BinaryFunction &BF = BFI.second;
971 if (!shouldEmit(BF))
972 continue;
973
974 if (!hasValidCodePadding(BF)) {
975 if (HasRelocations) {
976 if (opts::Verbosity >= 1) {
977 outs() << "BOLT-INFO: function " << BF
978 << " has invalid padding. Ignoring the function.\n";
979 }
980 BF.setIgnored();
981 } else {
982 BF.setMaxSize(BF.getSize());
983 }
984 }
985 }
986 }
987
registerNameAtAddress(StringRef Name,uint64_t Address,uint64_t Size,uint16_t Alignment,unsigned Flags)988 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
989 uint64_t Size,
990 uint16_t Alignment,
991 unsigned Flags) {
992 // Register the name with MCContext.
993 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
994
995 auto GAI = BinaryDataMap.find(Address);
996 BinaryData *BD;
997 if (GAI == BinaryDataMap.end()) {
998 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
999 BinarySection &Section =
1000 SectionOrErr ? SectionOrErr.get() : absoluteSection();
1001 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
1002 Section, Flags);
1003 GAI = BinaryDataMap.emplace(Address, BD).first;
1004 GlobalSymbols[Name] = BD;
1005 updateObjectNesting(GAI);
1006 } else {
1007 BD = GAI->second;
1008 if (!BD->hasName(Name)) {
1009 GlobalSymbols[Name] = BD;
1010 BD->Symbols.push_back(Symbol);
1011 }
1012 }
1013
1014 return Symbol;
1015 }
1016
1017 const BinaryData *
getBinaryDataContainingAddressImpl(uint64_t Address) const1018 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1019 auto NI = BinaryDataMap.lower_bound(Address);
1020 auto End = BinaryDataMap.end();
1021 if ((NI != End && Address == NI->first) ||
1022 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1023 if (NI->second->containsAddress(Address))
1024 return NI->second;
1025
1026 // If this is a sub-symbol, see if a parent data contains the address.
1027 const BinaryData *BD = NI->second->getParent();
1028 while (BD) {
1029 if (BD->containsAddress(Address))
1030 return BD;
1031 BD = BD->getParent();
1032 }
1033 }
1034 return nullptr;
1035 }
1036
setBinaryDataSize(uint64_t Address,uint64_t Size)1037 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1038 auto NI = BinaryDataMap.find(Address);
1039 assert(NI != BinaryDataMap.end());
1040 if (NI == BinaryDataMap.end())
1041 return false;
1042 // TODO: it's possible that a jump table starts at the same address
1043 // as a larger blob of private data. When we set the size of the
1044 // jump table, it might be smaller than the total blob size. In this
1045 // case we just leave the original size since (currently) it won't really
1046 // affect anything.
1047 assert((!NI->second->Size || NI->second->Size == Size ||
1048 (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1049 "can't change the size of a symbol that has already had its "
1050 "size set");
1051 if (!NI->second->Size) {
1052 NI->second->Size = Size;
1053 updateObjectNesting(NI);
1054 return true;
1055 }
1056 return false;
1057 }
1058
generateSymbolHashes()1059 void BinaryContext::generateSymbolHashes() {
1060 auto isPadding = [](const BinaryData &BD) {
1061 StringRef Contents = BD.getSection().getContents();
1062 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1063 return (BD.getName().startswith("HOLEat") ||
1064 SymData.find_first_not_of(0) == StringRef::npos);
1065 };
1066
1067 uint64_t NumCollisions = 0;
1068 for (auto &Entry : BinaryDataMap) {
1069 BinaryData &BD = *Entry.second;
1070 StringRef Name = BD.getName();
1071
1072 if (!isInternalSymbolName(Name))
1073 continue;
1074
1075 // First check if a non-anonymous alias exists and move it to the front.
1076 if (BD.getSymbols().size() > 1) {
1077 auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) {
1078 return !isInternalSymbolName(Symbol->getName());
1079 });
1080 if (Itr != BD.getSymbols().end()) {
1081 size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1082 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1083 continue;
1084 }
1085 }
1086
1087 // We have to skip 0 size symbols since they will all collide.
1088 if (BD.getSize() == 0) {
1089 continue;
1090 }
1091
1092 const uint64_t Hash = BD.getSection().hash(BD);
1093 const size_t Idx = Name.find("0x");
1094 std::string NewName =
1095 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1096 if (getBinaryDataByName(NewName)) {
1097 // Ignore collisions for symbols that appear to be padding
1098 // (i.e. all zeros or a "hole")
1099 if (!isPadding(BD)) {
1100 if (opts::Verbosity) {
1101 errs() << "BOLT-WARNING: collision detected when hashing " << BD
1102 << " with new name (" << NewName << "), skipping.\n";
1103 }
1104 ++NumCollisions;
1105 }
1106 continue;
1107 }
1108 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1109 GlobalSymbols[NewName] = &BD;
1110 }
1111 if (NumCollisions) {
1112 errs() << "BOLT-WARNING: " << NumCollisions
1113 << " collisions detected while hashing binary objects";
1114 if (!opts::Verbosity)
1115 errs() << ". Use -v=1 to see the list.";
1116 errs() << '\n';
1117 }
1118 }
1119
registerFragment(BinaryFunction & TargetFunction,BinaryFunction & Function) const1120 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1121 BinaryFunction &Function) const {
1122 if (!isPotentialFragmentByName(TargetFunction, Function))
1123 return false;
1124 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1125 if (TargetFunction.isParentFragment(&Function))
1126 return true;
1127 TargetFunction.addParentFragment(Function);
1128 Function.addFragment(TargetFunction);
1129 if (!HasRelocations) {
1130 TargetFunction.setSimple(false);
1131 Function.setSimple(false);
1132 }
1133 if (opts::Verbosity >= 1) {
1134 outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of "
1135 << Function << '\n';
1136 }
1137 return true;
1138 }
1139
addAdrpAddRelocAArch64(BinaryFunction & BF,MCInst & LoadLowBits,MCInst & LoadHiBits,uint64_t Target)1140 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
1141 MCInst &LoadLowBits,
1142 MCInst &LoadHiBits,
1143 uint64_t Target) {
1144 const MCSymbol *TargetSymbol;
1145 uint64_t Addend = 0;
1146 std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF,
1147 /*IsPCRel*/ true);
1148 int64_t Val;
1149 MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val,
1150 ELF::R_AARCH64_ADR_PREL_PG_HI21);
1151 MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1152 Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1153 }
1154
handleAArch64Veneer(uint64_t Address,bool MatchOnly)1155 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
1156 BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
1157 if (TargetFunction)
1158 return false;
1159
1160 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1161 assert(Section && "cannot get section for referenced address");
1162 if (!Section->isText())
1163 return false;
1164
1165 bool Ret = false;
1166 StringRef SectionContents = Section->getContents();
1167 uint64_t Offset = Address - Section->getAddress();
1168 const uint64_t MaxSize = SectionContents.size() - Offset;
1169 const uint8_t *Bytes =
1170 reinterpret_cast<const uint8_t *>(SectionContents.data());
1171 ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
1172
1173 auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
1174 MCInst &Instruction, uint64_t Offset,
1175 uint64_t AbsoluteInstrAddr,
1176 uint64_t TotalSize) -> bool {
1177 MCInst *TargetHiBits, *TargetLowBits;
1178 uint64_t TargetAddress, Count;
1179 Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1180 AbsoluteInstrAddr, Instruction, TargetHiBits,
1181 TargetLowBits, TargetAddress);
1182 if (!Count)
1183 return false;
1184
1185 if (MatchOnly)
1186 return true;
1187
1188 // NOTE The target symbol was created during disassemble's
1189 // handleExternalReference
1190 const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat");
1191 BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(),
1192 *Section, Address, TotalSize);
1193 addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits,
1194 TargetAddress);
1195 MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1196 Veneer->addInstruction(Offset, std::move(Instruction));
1197 --Count;
1198 for (auto It = std::prev(Instructions.end()); Count != 0;
1199 It = std::prev(It), --Count) {
1200 MIB->addAnnotation(It->second, "AArch64Veneer", true);
1201 Veneer->addInstruction(It->first, std::move(It->second));
1202 }
1203
1204 Veneer->getOrCreateLocalLabel(Address);
1205 Veneer->setMaxSize(TotalSize);
1206 Veneer->updateState(BinaryFunction::State::Disassembled);
1207 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1208 << "\n");
1209 return true;
1210 };
1211
1212 uint64_t Size = 0, TotalSize = 0;
1213 BinaryFunction::InstrMapType VeneerInstructions;
1214 for (Offset = 0; Offset < MaxSize; Offset += Size) {
1215 MCInst Instruction;
1216 const uint64_t AbsoluteInstrAddr = Address + Offset;
1217 if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset),
1218 AbsoluteInstrAddr, nulls()))
1219 break;
1220
1221 TotalSize += Size;
1222 if (MIB->isBranch(Instruction)) {
1223 Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
1224 AbsoluteInstrAddr, TotalSize);
1225 break;
1226 }
1227
1228 VeneerInstructions.emplace(Offset, std::move(Instruction));
1229 }
1230
1231 return Ret;
1232 }
1233
processInterproceduralReferences()1234 void BinaryContext::processInterproceduralReferences() {
1235 for (const std::pair<BinaryFunction *, uint64_t> &It :
1236 InterproceduralReferences) {
1237 BinaryFunction &Function = *It.first;
1238 uint64_t Address = It.second;
1239 if (!Address || Function.isIgnored())
1240 continue;
1241
1242 BinaryFunction *TargetFunction =
1243 getBinaryFunctionContainingAddress(Address);
1244 if (&Function == TargetFunction)
1245 continue;
1246
1247 if (TargetFunction) {
1248 if (TargetFunction->isFragment() &&
1249 !registerFragment(*TargetFunction, Function)) {
1250 errs() << "BOLT-WARNING: interprocedural reference between unrelated "
1251 "fragments: "
1252 << Function.getPrintName() << " and "
1253 << TargetFunction->getPrintName() << '\n';
1254 }
1255 if (uint64_t Offset = Address - TargetFunction->getAddress())
1256 TargetFunction->addEntryPointAtOffset(Offset);
1257
1258 continue;
1259 }
1260
1261 // Check if address falls in function padding space - this could be
1262 // unmarked data in code. In this case adjust the padding space size.
1263 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1264 assert(Section && "cannot get section for referenced address");
1265
1266 if (!Section->isText())
1267 continue;
1268
1269 // PLT requires special handling and could be ignored in this context.
1270 StringRef SectionName = Section->getName();
1271 if (SectionName == ".plt" || SectionName == ".plt.got")
1272 continue;
1273
1274 // Check if it is aarch64 veneer written at Address
1275 if (isAArch64() && handleAArch64Veneer(Address))
1276 continue;
1277
1278 if (opts::processAllFunctions()) {
1279 errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1280 << "object in code at address 0x" << Twine::utohexstr(Address)
1281 << " belonging to section " << SectionName << " in current mode\n";
1282 exit(1);
1283 }
1284
1285 TargetFunction = getBinaryFunctionContainingAddress(Address,
1286 /*CheckPastEnd=*/false,
1287 /*UseMaxSize=*/true);
1288 // We are not going to overwrite non-simple functions, but for simple
1289 // ones - adjust the padding size.
1290 if (TargetFunction && TargetFunction->isSimple()) {
1291 errs() << "BOLT-WARNING: function " << *TargetFunction
1292 << " has an object detected in a padding region at address 0x"
1293 << Twine::utohexstr(Address) << '\n';
1294 TargetFunction->setMaxSize(TargetFunction->getSize());
1295 }
1296 }
1297
1298 InterproceduralReferences.clear();
1299 }
1300
postProcessSymbolTable()1301 void BinaryContext::postProcessSymbolTable() {
1302 fixBinaryDataHoles();
1303 bool Valid = true;
1304 for (auto &Entry : BinaryDataMap) {
1305 BinaryData *BD = Entry.second;
1306 if ((BD->getName().startswith("SYMBOLat") ||
1307 BD->getName().startswith("DATAat")) &&
1308 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1309 BD->getSection()) {
1310 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
1311 Valid = false;
1312 }
1313 }
1314 assert(Valid);
1315 (void)Valid;
1316 generateSymbolHashes();
1317 }
1318
foldFunction(BinaryFunction & ChildBF,BinaryFunction & ParentBF)1319 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1320 BinaryFunction &ParentBF) {
1321 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1322 "cannot merge functions with multiple entry points");
1323
1324 std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex,
1325 std::defer_lock);
1326 std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock(
1327 SymbolToFunctionMapMutex, std::defer_lock);
1328
1329 const StringRef ChildName = ChildBF.getOneName();
1330
1331 // Move symbols over and update bookkeeping info.
1332 for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1333 ParentBF.getSymbols().push_back(Symbol);
1334 WriteSymbolMapLock.lock();
1335 SymbolToFunctionMap[Symbol] = &ParentBF;
1336 WriteSymbolMapLock.unlock();
1337 // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1338 }
1339 ChildBF.getSymbols().clear();
1340
1341 // Move other names the child function is known under.
1342 llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
1343 ChildBF.Aliases.clear();
1344
1345 if (HasRelocations) {
1346 // Merge execution counts of ChildBF into those of ParentBF.
1347 // Without relocations, we cannot reliably merge profiles as both functions
1348 // continue to exist and either one can be executed.
1349 ChildBF.mergeProfileDataInto(ParentBF);
1350
1351 std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex,
1352 std::defer_lock);
1353 std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex,
1354 std::defer_lock);
1355 // Remove ChildBF from the global set of functions in relocs mode.
1356 ReadBfsLock.lock();
1357 auto FI = BinaryFunctions.find(ChildBF.getAddress());
1358 ReadBfsLock.unlock();
1359
1360 assert(FI != BinaryFunctions.end() && "function not found");
1361 assert(&ChildBF == &FI->second && "function mismatch");
1362
1363 WriteBfsLock.lock();
1364 ChildBF.clearDisasmState();
1365 FI = BinaryFunctions.erase(FI);
1366 WriteBfsLock.unlock();
1367
1368 } else {
1369 // In non-relocation mode we keep the function, but rename it.
1370 std::string NewName = "__ICF_" + ChildName.str();
1371
1372 WriteCtxLock.lock();
1373 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1374 WriteCtxLock.unlock();
1375
1376 ChildBF.setFolded(&ParentBF);
1377 }
1378 }
1379
fixBinaryDataHoles()1380 void BinaryContext::fixBinaryDataHoles() {
1381 assert(validateObjectNesting() && "object nesting inconsitency detected");
1382
1383 for (BinarySection &Section : allocatableSections()) {
1384 std::vector<std::pair<uint64_t, uint64_t>> Holes;
1385
1386 auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1387 BinaryData *BD = Itr->second;
1388 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1389 (BD->getName().startswith("SYMBOLat0x") ||
1390 BD->getName().startswith("DATAat0x") ||
1391 BD->getName().startswith("ANONYMOUS")));
1392 return !isHole && BD->getSection() == Section && !BD->getParent();
1393 };
1394
1395 auto BDStart = BinaryDataMap.begin();
1396 auto BDEnd = BinaryDataMap.end();
1397 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1398 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1399
1400 uint64_t EndAddress = Section.getAddress();
1401
1402 while (Itr != End) {
1403 if (Itr->second->getAddress() > EndAddress) {
1404 uint64_t Gap = Itr->second->getAddress() - EndAddress;
1405 Holes.emplace_back(EndAddress, Gap);
1406 }
1407 EndAddress = Itr->second->getEndAddress();
1408 ++Itr;
1409 }
1410
1411 if (EndAddress < Section.getEndAddress())
1412 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1413
1414 // If there is already a symbol at the start of the hole, grow that symbol
1415 // to cover the rest. Otherwise, create a new symbol to cover the hole.
1416 for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1417 BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1418 if (BD) {
1419 // BD->getSection() can be != Section if there are sections that
1420 // overlap. In this case it is probably safe to just skip the holes
1421 // since the overlapping section will not(?) have any symbols in it.
1422 if (BD->getSection() == Section)
1423 setBinaryDataSize(Hole.first, Hole.second);
1424 } else {
1425 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1426 }
1427 }
1428 }
1429
1430 assert(validateObjectNesting() && "object nesting inconsitency detected");
1431 assert(validateHoles() && "top level hole detected in object map");
1432 }
1433
printGlobalSymbols(raw_ostream & OS) const1434 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1435 const BinarySection *CurrentSection = nullptr;
1436 bool FirstSection = true;
1437
1438 for (auto &Entry : BinaryDataMap) {
1439 const BinaryData *BD = Entry.second;
1440 const BinarySection &Section = BD->getSection();
1441 if (FirstSection || Section != *CurrentSection) {
1442 uint64_t Address, Size;
1443 StringRef Name = Section.getName();
1444 if (Section) {
1445 Address = Section.getAddress();
1446 Size = Section.getSize();
1447 } else {
1448 Address = BD->getAddress();
1449 Size = BD->getSize();
1450 }
1451 OS << "BOLT-INFO: Section " << Name << ", "
1452 << "0x" + Twine::utohexstr(Address) << ":"
1453 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1454 CurrentSection = &Section;
1455 FirstSection = false;
1456 }
1457
1458 OS << "BOLT-INFO: ";
1459 const BinaryData *P = BD->getParent();
1460 while (P) {
1461 OS << " ";
1462 P = P->getParent();
1463 }
1464 OS << *BD << "\n";
1465 }
1466 }
1467
getDwarfFile(StringRef Directory,StringRef FileName,unsigned FileNumber,Optional<MD5::MD5Result> Checksum,Optional<StringRef> Source,unsigned CUID,unsigned DWARFVersion)1468 Expected<unsigned> BinaryContext::getDwarfFile(
1469 StringRef Directory, StringRef FileName, unsigned FileNumber,
1470 Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source,
1471 unsigned CUID, unsigned DWARFVersion) {
1472 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1473 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1474 FileNumber);
1475 }
1476
addDebugFilenameToUnit(const uint32_t DestCUID,const uint32_t SrcCUID,unsigned FileIndex)1477 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1478 const uint32_t SrcCUID,
1479 unsigned FileIndex) {
1480 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1481 const DWARFDebugLine::LineTable *LineTable =
1482 DwCtx->getLineTableForUnit(SrcUnit);
1483 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1484 LineTable->Prologue.FileNames;
1485 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1486 // means empty dir.
1487 assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1488 "FileIndex out of range for the compilation unit.");
1489 StringRef Dir = "";
1490 if (FileNames[FileIndex - 1].DirIdx != 0) {
1491 if (Optional<const char *> DirName = dwarf::toString(
1492 LineTable->Prologue
1493 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1494 Dir = *DirName;
1495 }
1496 }
1497 StringRef FileName = "";
1498 if (Optional<const char *> FName =
1499 dwarf::toString(FileNames[FileIndex - 1].Name))
1500 FileName = *FName;
1501 assert(FileName != "");
1502 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1503 return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID,
1504 DstUnit->getVersion()));
1505 }
1506
getSortedFunctions()1507 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1508 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1509 llvm::transform(BinaryFunctions, SortedFunctions.begin(),
1510 [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1511 return &BFI.second;
1512 });
1513
1514 llvm::stable_sort(SortedFunctions,
1515 [](const BinaryFunction *A, const BinaryFunction *B) {
1516 if (A->hasValidIndex() && B->hasValidIndex()) {
1517 return A->getIndex() < B->getIndex();
1518 }
1519 return A->hasValidIndex();
1520 });
1521 return SortedFunctions;
1522 }
1523
getAllBinaryFunctions()1524 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1525 std::vector<BinaryFunction *> AllFunctions;
1526 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1527 llvm::transform(BinaryFunctions, std::back_inserter(AllFunctions),
1528 [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1529 return &BFI.second;
1530 });
1531 llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions));
1532
1533 return AllFunctions;
1534 }
1535
getDWOCU(uint64_t DWOId)1536 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1537 auto Iter = DWOCUs.find(DWOId);
1538 if (Iter == DWOCUs.end())
1539 return None;
1540
1541 return Iter->second;
1542 }
1543
getDWOContext() const1544 DWARFContext *BinaryContext::getDWOContext() const {
1545 if (DWOCUs.empty())
1546 return nullptr;
1547 return &DWOCUs.begin()->second->getContext();
1548 }
1549
1550 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
preprocessDWODebugInfo()1551 void BinaryContext::preprocessDWODebugInfo() {
1552 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1553 DWARFUnit *const DwarfUnit = CU.get();
1554 if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1555 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
1556 if (!DWOCU->isDWOUnit()) {
1557 std::string DWOName = dwarf::toString(
1558 DwarfUnit->getUnitDIE().find(
1559 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1560 "");
1561 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for "
1562 << DWOName
1563 << " was not retrieved and won't be updated. Please check "
1564 "relative path.\n";
1565 continue;
1566 }
1567 DWOCUs[*DWOId] = DWOCU;
1568 }
1569 }
1570 }
1571
preprocessDebugInfo()1572 void BinaryContext::preprocessDebugInfo() {
1573 struct CURange {
1574 uint64_t LowPC;
1575 uint64_t HighPC;
1576 DWARFUnit *Unit;
1577
1578 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1579 };
1580
1581 // Building a map of address ranges to CUs similar to .debug_aranges and use
1582 // it to assign CU to functions.
1583 std::vector<CURange> AllRanges;
1584 AllRanges.reserve(DwCtx->getNumCompileUnits());
1585 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1586 Expected<DWARFAddressRangesVector> RangesOrError =
1587 CU->getUnitDIE().getAddressRanges();
1588 if (!RangesOrError) {
1589 consumeError(RangesOrError.takeError());
1590 continue;
1591 }
1592 for (DWARFAddressRange &Range : *RangesOrError) {
1593 // Parts of the debug info could be invalidated due to corresponding code
1594 // being removed from the binary by the linker. Hence we check if the
1595 // address is a valid one.
1596 if (containsAddress(Range.LowPC))
1597 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1598 }
1599
1600 ContainsDwarf5 |= CU->getVersion() >= 5;
1601 ContainsDwarfLegacy |= CU->getVersion() < 5;
1602 }
1603
1604 llvm::sort(AllRanges);
1605 for (auto &KV : BinaryFunctions) {
1606 const uint64_t FunctionAddress = KV.first;
1607 BinaryFunction &Function = KV.second;
1608
1609 auto It = llvm::partition_point(
1610 AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
1611 if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1612 Function.setDWARFUnit(It->Unit);
1613 }
1614
1615 // Discover units with debug info that needs to be updated.
1616 for (const auto &KV : BinaryFunctions) {
1617 const BinaryFunction &BF = KV.second;
1618 if (shouldEmit(BF) && BF.getDWARFUnit())
1619 ProcessedCUs.insert(BF.getDWARFUnit());
1620 }
1621
1622 // Clear debug info for functions from units that we are not going to process.
1623 for (auto &KV : BinaryFunctions) {
1624 BinaryFunction &BF = KV.second;
1625 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1626 BF.setDWARFUnit(nullptr);
1627 }
1628
1629 if (opts::Verbosity >= 1) {
1630 outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1631 << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1632 }
1633
1634 preprocessDWODebugInfo();
1635
1636 // Populate MCContext with DWARF files from all units.
1637 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1638 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1639 const uint64_t CUID = CU->getOffset();
1640 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1641 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1642 GlobalPrefix + "line_table_start" + Twine(CUID)));
1643
1644 if (!ProcessedCUs.count(CU.get()))
1645 continue;
1646
1647 const DWARFDebugLine::LineTable *LineTable =
1648 DwCtx->getLineTableForUnit(CU.get());
1649 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1650 LineTable->Prologue.FileNames;
1651
1652 uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1653 if (DwarfVersion >= 5) {
1654 Optional<MD5::MD5Result> Checksum = None;
1655 if (LineTable->Prologue.ContentTypes.HasMD5)
1656 Checksum = LineTable->Prologue.FileNames[0].Checksum;
1657 Optional<const char *> Name =
1658 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1659 if (Optional<uint64_t> DWOID = CU->getDWOId()) {
1660 auto Iter = DWOCUs.find(*DWOID);
1661 assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1662 Name = dwarf::toString(
1663 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1664 }
1665 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1666 None);
1667 }
1668
1669 BinaryLineTable.setDwarfVersion(DwarfVersion);
1670
1671 // Assign a unique label to every line table, one per CU.
1672 // Make sure empty debug line tables are registered too.
1673 if (FileNames.empty()) {
1674 cantFail(
1675 getDwarfFile("", "<unknown>", 0, None, None, CUID, DwarfVersion));
1676 continue;
1677 }
1678 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1679 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1680 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1681 // means empty dir.
1682 StringRef Dir = "";
1683 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1684 if (Optional<const char *> DirName = dwarf::toString(
1685 LineTable->Prologue
1686 .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1687 Dir = *DirName;
1688 StringRef FileName = "";
1689 if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name))
1690 FileName = *FName;
1691 assert(FileName != "");
1692 Optional<MD5::MD5Result> Checksum = None;
1693 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1694 Checksum = LineTable->Prologue.FileNames[I].Checksum;
1695 cantFail(
1696 getDwarfFile(Dir, FileName, 0, Checksum, None, CUID, DwarfVersion));
1697 }
1698 }
1699 }
1700
shouldEmit(const BinaryFunction & Function) const1701 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1702 if (Function.isPseudo())
1703 return false;
1704
1705 if (opts::processAllFunctions())
1706 return true;
1707
1708 if (Function.isIgnored())
1709 return false;
1710
1711 // In relocation mode we will emit non-simple functions with CFG.
1712 // If the function does not have a CFG it should be marked as ignored.
1713 return HasRelocations || Function.isSimple();
1714 }
1715
printCFI(raw_ostream & OS,const MCCFIInstruction & Inst)1716 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1717 uint32_t Operation = Inst.getOperation();
1718 switch (Operation) {
1719 case MCCFIInstruction::OpSameValue:
1720 OS << "OpSameValue Reg" << Inst.getRegister();
1721 break;
1722 case MCCFIInstruction::OpRememberState:
1723 OS << "OpRememberState";
1724 break;
1725 case MCCFIInstruction::OpRestoreState:
1726 OS << "OpRestoreState";
1727 break;
1728 case MCCFIInstruction::OpOffset:
1729 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1730 break;
1731 case MCCFIInstruction::OpDefCfaRegister:
1732 OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1733 break;
1734 case MCCFIInstruction::OpDefCfaOffset:
1735 OS << "OpDefCfaOffset " << Inst.getOffset();
1736 break;
1737 case MCCFIInstruction::OpDefCfa:
1738 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1739 break;
1740 case MCCFIInstruction::OpRelOffset:
1741 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1742 break;
1743 case MCCFIInstruction::OpAdjustCfaOffset:
1744 OS << "OfAdjustCfaOffset " << Inst.getOffset();
1745 break;
1746 case MCCFIInstruction::OpEscape:
1747 OS << "OpEscape";
1748 break;
1749 case MCCFIInstruction::OpRestore:
1750 OS << "OpRestore Reg" << Inst.getRegister();
1751 break;
1752 case MCCFIInstruction::OpUndefined:
1753 OS << "OpUndefined Reg" << Inst.getRegister();
1754 break;
1755 case MCCFIInstruction::OpRegister:
1756 OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1757 << Inst.getRegister2();
1758 break;
1759 case MCCFIInstruction::OpWindowSave:
1760 OS << "OpWindowSave";
1761 break;
1762 case MCCFIInstruction::OpGnuArgsSize:
1763 OS << "OpGnuArgsSize";
1764 break;
1765 default:
1766 OS << "Op#" << Operation;
1767 break;
1768 }
1769 }
1770
getMarkerType(const SymbolRef & Symbol) const1771 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1772 // For aarch64, the ABI defines mapping symbols so we identify data in the
1773 // code section (see IHI0056B). $x identifies a symbol starting code or the
1774 // end of a data chunk inside code, $d indentifies start of data.
1775 if (!isAArch64() || ELFSymbolRef(Symbol).getSize())
1776 return MarkerSymType::NONE;
1777
1778 Expected<StringRef> NameOrError = Symbol.getName();
1779 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1780
1781 if (!TypeOrError || !NameOrError)
1782 return MarkerSymType::NONE;
1783
1784 if (*TypeOrError != SymbolRef::ST_Unknown)
1785 return MarkerSymType::NONE;
1786
1787 if (*NameOrError == "$x" || NameOrError->startswith("$x."))
1788 return MarkerSymType::CODE;
1789
1790 if (*NameOrError == "$d" || NameOrError->startswith("$d."))
1791 return MarkerSymType::DATA;
1792
1793 return MarkerSymType::NONE;
1794 }
1795
isMarker(const SymbolRef & Symbol) const1796 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1797 return getMarkerType(Symbol) != MarkerSymType::NONE;
1798 }
1799
printDebugInfo(raw_ostream & OS,const MCInst & Instruction,const BinaryFunction * Function,DWARFContext * DwCtx)1800 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1801 const BinaryFunction *Function,
1802 DWARFContext *DwCtx) {
1803 DebugLineTableRowRef RowRef =
1804 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1805 if (RowRef == DebugLineTableRowRef::NULL_ROW)
1806 return;
1807
1808 const DWARFDebugLine::LineTable *LineTable;
1809 if (Function && Function->getDWARFUnit() &&
1810 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1811 LineTable = Function->getDWARFLineTable();
1812 } else {
1813 LineTable = DwCtx->getLineTableForUnit(
1814 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1815 }
1816 assert(LineTable && "line table expected for instruction with debug info");
1817
1818 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1819 StringRef FileName = "";
1820 if (Optional<const char *> FName =
1821 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1822 FileName = *FName;
1823 OS << " # debug line " << FileName << ":" << Row.Line;
1824 if (Row.Column)
1825 OS << ":" << Row.Column;
1826 if (Row.Discriminator)
1827 OS << " discriminator:" << Row.Discriminator;
1828 }
1829
printInstruction(raw_ostream & OS,const MCInst & Instruction,uint64_t Offset,const BinaryFunction * Function,bool PrintMCInst,bool PrintMemData,bool PrintRelocations,StringRef Endl) const1830 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1831 uint64_t Offset,
1832 const BinaryFunction *Function,
1833 bool PrintMCInst, bool PrintMemData,
1834 bool PrintRelocations,
1835 StringRef Endl) const {
1836 if (MIB->isEHLabel(Instruction)) {
1837 OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl;
1838 return;
1839 }
1840 OS << format(" %08" PRIx64 ": ", Offset);
1841 if (MIB->isCFI(Instruction)) {
1842 uint32_t Offset = Instruction.getOperand(0).getImm();
1843 OS << "\t!CFI\t$" << Offset << "\t; ";
1844 if (Function)
1845 printCFI(OS, *Function->getCFIFor(Instruction));
1846 OS << Endl;
1847 return;
1848 }
1849 InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1850 if (MIB->isCall(Instruction)) {
1851 if (MIB->isTailCall(Instruction))
1852 OS << " # TAILCALL ";
1853 if (MIB->isInvoke(Instruction)) {
1854 const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction);
1855 OS << " # handler: ";
1856 if (EHInfo->first)
1857 OS << *EHInfo->first;
1858 else
1859 OS << '0';
1860 OS << "; action: " << EHInfo->second;
1861 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1862 if (GnuArgsSize >= 0)
1863 OS << "; GNU_args_size = " << GnuArgsSize;
1864 }
1865 } else if (MIB->isIndirectBranch(Instruction)) {
1866 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1867 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1868 } else {
1869 OS << " # UNKNOWN CONTROL FLOW";
1870 }
1871 }
1872 if (Optional<uint32_t> Offset = MIB->getOffset(Instruction))
1873 OS << " # Offset: " << *Offset;
1874
1875 MIB->printAnnotations(Instruction, OS);
1876
1877 if (opts::PrintDebugInfo)
1878 printDebugInfo(OS, Instruction, Function, DwCtx.get());
1879
1880 if ((opts::PrintRelocations || PrintRelocations) && Function) {
1881 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
1882 Function->printRelocations(OS, Offset, Size);
1883 }
1884
1885 OS << Endl;
1886
1887 if (PrintMCInst) {
1888 Instruction.dump_pretty(OS, InstPrinter.get());
1889 OS << Endl;
1890 }
1891 }
1892
1893 Optional<uint64_t>
getBaseAddressForMapping(uint64_t MMapAddress,uint64_t FileOffset) const1894 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
1895 uint64_t FileOffset) const {
1896 // Find a segment with a matching file offset.
1897 for (auto &KV : SegmentMapInfo) {
1898 const SegmentInfo &SegInfo = KV.second;
1899 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) {
1900 // Use segment's aligned memory offset to calculate the base address.
1901 const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment);
1902 return MMapAddress - MemOffset;
1903 }
1904 }
1905
1906 return NoneType();
1907 }
1908
getSectionForAddress(uint64_t Address)1909 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
1910 auto SI = AddressToSection.upper_bound(Address);
1911 if (SI != AddressToSection.begin()) {
1912 --SI;
1913 uint64_t UpperBound = SI->first + SI->second->getSize();
1914 if (!SI->second->getSize())
1915 UpperBound += 1;
1916 if (UpperBound > Address)
1917 return *SI->second;
1918 }
1919 return std::make_error_code(std::errc::bad_address);
1920 }
1921
1922 ErrorOr<StringRef>
getSectionNameForAddress(uint64_t Address) const1923 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
1924 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
1925 return Section->getName();
1926 return std::make_error_code(std::errc::bad_address);
1927 }
1928
registerSection(BinarySection * Section)1929 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
1930 auto Res = Sections.insert(Section);
1931 (void)Res;
1932 assert(Res.second && "can't register the same section twice.");
1933
1934 // Only register allocatable sections in the AddressToSection map.
1935 if (Section->isAllocatable() && Section->getAddress())
1936 AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
1937 NameToSection.insert(
1938 std::make_pair(std::string(Section->getName()), Section));
1939 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
1940 return *Section;
1941 }
1942
registerSection(SectionRef Section)1943 BinarySection &BinaryContext::registerSection(SectionRef Section) {
1944 return registerSection(new BinarySection(*this, Section));
1945 }
1946
1947 BinarySection &
registerSection(StringRef SectionName,const BinarySection & OriginalSection)1948 BinaryContext::registerSection(StringRef SectionName,
1949 const BinarySection &OriginalSection) {
1950 return registerSection(
1951 new BinarySection(*this, SectionName, OriginalSection));
1952 }
1953
1954 BinarySection &
registerOrUpdateSection(StringRef Name,unsigned ELFType,unsigned ELFFlags,uint8_t * Data,uint64_t Size,unsigned Alignment)1955 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType,
1956 unsigned ELFFlags, uint8_t *Data,
1957 uint64_t Size, unsigned Alignment) {
1958 auto NamedSections = getSectionByName(Name);
1959 if (NamedSections.begin() != NamedSections.end()) {
1960 assert(std::next(NamedSections.begin()) == NamedSections.end() &&
1961 "can only update unique sections");
1962 BinarySection *Section = NamedSections.begin()->second;
1963
1964 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
1965 const bool Flag = Section->isAllocatable();
1966 (void)Flag;
1967 Section->update(Data, Size, Alignment, ELFType, ELFFlags);
1968 LLVM_DEBUG(dbgs() << *Section << "\n");
1969 // FIXME: Fix section flags/attributes for MachO.
1970 if (isELF())
1971 assert(Flag == Section->isAllocatable() &&
1972 "can't change section allocation status");
1973 return *Section;
1974 }
1975
1976 return registerSection(
1977 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
1978 }
1979
deregisterSection(BinarySection & Section)1980 bool BinaryContext::deregisterSection(BinarySection &Section) {
1981 BinarySection *SectionPtr = &Section;
1982 auto Itr = Sections.find(SectionPtr);
1983 if (Itr != Sections.end()) {
1984 auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
1985 while (Range.first != Range.second) {
1986 if (Range.first->second == SectionPtr) {
1987 AddressToSection.erase(Range.first);
1988 break;
1989 }
1990 ++Range.first;
1991 }
1992
1993 auto NameRange =
1994 NameToSection.equal_range(std::string(SectionPtr->getName()));
1995 while (NameRange.first != NameRange.second) {
1996 if (NameRange.first->second == SectionPtr) {
1997 NameToSection.erase(NameRange.first);
1998 break;
1999 }
2000 ++NameRange.first;
2001 }
2002
2003 Sections.erase(Itr);
2004 delete SectionPtr;
2005 return true;
2006 }
2007 return false;
2008 }
2009
printSections(raw_ostream & OS) const2010 void BinaryContext::printSections(raw_ostream &OS) const {
2011 for (BinarySection *const &Section : Sections)
2012 OS << "BOLT-INFO: " << *Section << "\n";
2013 }
2014
absoluteSection()2015 BinarySection &BinaryContext::absoluteSection() {
2016 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
2017 return *Section;
2018 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
2019 }
2020
getUnsignedValueAtAddress(uint64_t Address,size_t Size) const2021 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2022 size_t Size) const {
2023 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2024 if (!Section)
2025 return std::make_error_code(std::errc::bad_address);
2026
2027 if (Section->isVirtual())
2028 return 0;
2029
2030 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2031 AsmInfo->getCodePointerSize());
2032 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2033 return DE.getUnsigned(&ValueOffset, Size);
2034 }
2035
getSignedValueAtAddress(uint64_t Address,size_t Size) const2036 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2037 size_t Size) const {
2038 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2039 if (!Section)
2040 return std::make_error_code(std::errc::bad_address);
2041
2042 if (Section->isVirtual())
2043 return 0;
2044
2045 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2046 AsmInfo->getCodePointerSize());
2047 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2048 return DE.getSigned(&ValueOffset, Size);
2049 }
2050
addRelocation(uint64_t Address,MCSymbol * Symbol,uint64_t Type,uint64_t Addend,uint64_t Value)2051 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
2052 uint64_t Type, uint64_t Addend,
2053 uint64_t Value) {
2054 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2055 assert(Section && "cannot find section for address");
2056 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
2057 Value);
2058 }
2059
addDynamicRelocation(uint64_t Address,MCSymbol * Symbol,uint64_t Type,uint64_t Addend,uint64_t Value)2060 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
2061 uint64_t Type, uint64_t Addend,
2062 uint64_t Value) {
2063 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2064 assert(Section && "cannot find section for address");
2065 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
2066 Addend, Value);
2067 }
2068
removeRelocationAt(uint64_t Address)2069 bool BinaryContext::removeRelocationAt(uint64_t Address) {
2070 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2071 assert(Section && "cannot find section for address");
2072 return Section->removeRelocationAt(Address - Section->getAddress());
2073 }
2074
getRelocationAt(uint64_t Address)2075 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) {
2076 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2077 if (!Section)
2078 return nullptr;
2079
2080 return Section->getRelocationAt(Address - Section->getAddress());
2081 }
2082
getDynamicRelocationAt(uint64_t Address)2083 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) {
2084 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2085 if (!Section)
2086 return nullptr;
2087
2088 return Section->getDynamicRelocationAt(Address - Section->getAddress());
2089 }
2090
markAmbiguousRelocations(BinaryData & BD,const uint64_t Address)2091 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2092 const uint64_t Address) {
2093 auto setImmovable = [&](BinaryData &BD) {
2094 BinaryData *Root = BD.getAtomicRoot();
2095 LLVM_DEBUG(if (Root->isMoveable()) {
2096 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2097 << "due to ambiguous relocation referencing 0x"
2098 << Twine::utohexstr(Address) << '\n';
2099 });
2100 Root->setIsMoveable(false);
2101 };
2102
2103 if (Address == BD.getAddress()) {
2104 setImmovable(BD);
2105
2106 // Set previous symbol as immovable
2107 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
2108 if (Prev && Prev->getEndAddress() == BD.getAddress())
2109 setImmovable(*Prev);
2110 }
2111
2112 if (Address == BD.getEndAddress()) {
2113 setImmovable(BD);
2114
2115 // Set next symbol as immovable
2116 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
2117 if (Next && Next->getAddress() == BD.getEndAddress())
2118 setImmovable(*Next);
2119 }
2120 }
2121
getFunctionForSymbol(const MCSymbol * Symbol,uint64_t * EntryDesc)2122 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2123 uint64_t *EntryDesc) {
2124 std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex);
2125 auto BFI = SymbolToFunctionMap.find(Symbol);
2126 if (BFI == SymbolToFunctionMap.end())
2127 return nullptr;
2128
2129 BinaryFunction *BF = BFI->second;
2130 if (EntryDesc)
2131 *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2132
2133 return BF;
2134 }
2135
exitWithBugReport(StringRef Message,const BinaryFunction & Function) const2136 void BinaryContext::exitWithBugReport(StringRef Message,
2137 const BinaryFunction &Function) const {
2138 errs() << "=======================================\n";
2139 errs() << "BOLT is unable to proceed because it couldn't properly understand "
2140 "this function.\n";
2141 errs() << "If you are running the most recent version of BOLT, you may "
2142 "want to "
2143 "report this and paste this dump.\nPlease check that there is no "
2144 "sensitive contents being shared in this dump.\n";
2145 errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
2146 ScopedPrinter SP(errs());
2147 SP.printBinaryBlock("Function contents", *Function.getData());
2148 errs() << "\n";
2149 Function.dump();
2150 errs() << "ERROR: " << Message;
2151 errs() << "\n=======================================\n";
2152 exit(1);
2153 }
2154
2155 BinaryFunction *
createInjectedBinaryFunction(const std::string & Name,bool IsSimple)2156 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2157 bool IsSimple) {
2158 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2159 BinaryFunction *BF = InjectedBinaryFunctions.back();
2160 setSymbolToFunctionMap(BF->getSymbol(), BF);
2161 BF->CurrentState = BinaryFunction::State::CFG;
2162 return BF;
2163 }
2164
2165 std::pair<size_t, size_t>
calculateEmittedSize(BinaryFunction & BF,bool FixBranches)2166 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2167 // Adjust branch instruction to match the current layout.
2168 if (FixBranches)
2169 BF.fixBranches();
2170
2171 // Create local MC context to isolate the effect of ephemeral code emission.
2172 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2173 MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2174 MCAsmBackend *MAB =
2175 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2176
2177 SmallString<256> Code;
2178 raw_svector_ostream VecOS(Code);
2179
2180 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2181 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2182 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2183 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
2184 /*RelaxAll=*/false,
2185 /*IncrementalLinkerCompatible=*/false,
2186 /*DWARFMustBeAtTheEnd=*/false));
2187
2188 Streamer->initSections(false, *STI);
2189
2190 MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2191 Section->setHasInstructions(true);
2192
2193 // Create symbols in the LocalCtx so that they get destroyed with it.
2194 MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2195 MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2196 MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol();
2197 MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol();
2198
2199 Streamer->switchSection(Section);
2200 Streamer->emitLabel(StartLabel);
2201 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false,
2202 /*EmitCodeOnly=*/true);
2203 Streamer->emitLabel(EndLabel);
2204
2205 if (BF.isSplit()) {
2206 MCSectionELF *ColdSection =
2207 LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS,
2208 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2209 ColdSection->setHasInstructions(true);
2210
2211 Streamer->switchSection(ColdSection);
2212 Streamer->emitLabel(ColdStartLabel);
2213 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true,
2214 /*EmitCodeOnly=*/true);
2215 Streamer->emitLabel(ColdEndLabel);
2216 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private
2217 Streamer->emitBytes(StringRef(""));
2218 Streamer->switchSection(Section);
2219 }
2220
2221 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
2222 // MCStreamer::Finish(), which does more than we want
2223 Streamer->emitBytes(StringRef(""));
2224
2225 MCAssembler &Assembler =
2226 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2227 MCAsmLayout Layout(Assembler);
2228 Assembler.layout(Layout);
2229
2230 const uint64_t HotSize =
2231 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2232 const uint64_t ColdSize = BF.isSplit()
2233 ? Layout.getSymbolOffset(*ColdEndLabel) -
2234 Layout.getSymbolOffset(*ColdStartLabel)
2235 : 0ULL;
2236
2237 // Clean-up the effect of the code emission.
2238 for (const MCSymbol &Symbol : Assembler.symbols()) {
2239 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2240 MutableSymbol->setUndefined();
2241 MutableSymbol->setIsRegistered(false);
2242 }
2243
2244 return std::make_pair(HotSize, ColdSize);
2245 }
2246
validateEncoding(const MCInst & Inst,ArrayRef<uint8_t> InputEncoding) const2247 bool BinaryContext::validateEncoding(const MCInst &Inst,
2248 ArrayRef<uint8_t> InputEncoding) const {
2249 SmallString<256> Code;
2250 SmallVector<MCFixup, 4> Fixups;
2251 raw_svector_ostream VecOS(Code);
2252
2253 MCE->encodeInstruction(Inst, VecOS, Fixups, *STI);
2254 auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2255 if (InputEncoding != EncodedData) {
2256 if (opts::Verbosity > 1) {
2257 errs() << "BOLT-WARNING: mismatched encoding detected\n"
2258 << " input: " << InputEncoding << '\n'
2259 << " output: " << EncodedData << '\n';
2260 }
2261 return false;
2262 }
2263
2264 return true;
2265 }
2266
getHotThreshold() const2267 uint64_t BinaryContext::getHotThreshold() const {
2268 static uint64_t Threshold = 0;
2269 if (Threshold == 0) {
2270 Threshold = std::max(
2271 (uint64_t)opts::ExecutionCountThreshold,
2272 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2273 }
2274 return Threshold;
2275 }
2276
getBinaryFunctionContainingAddress(uint64_t Address,bool CheckPastEnd,bool UseMaxSize)2277 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2278 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2279 auto FI = BinaryFunctions.upper_bound(Address);
2280 if (FI == BinaryFunctions.begin())
2281 return nullptr;
2282 --FI;
2283
2284 const uint64_t UsedSize =
2285 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2286
2287 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2288 return nullptr;
2289
2290 return &FI->second;
2291 }
2292
getBinaryFunctionAtAddress(uint64_t Address)2293 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2294 // First, try to find a function starting at the given address. If the
2295 // function was folded, this will get us the original folded function if it
2296 // wasn't removed from the list, e.g. in non-relocation mode.
2297 auto BFI = BinaryFunctions.find(Address);
2298 if (BFI != BinaryFunctions.end())
2299 return &BFI->second;
2300
2301 // We might have folded the function matching the object at the given
2302 // address. In such case, we look for a function matching the symbol
2303 // registered at the original address. The new function (the one that the
2304 // original was folded into) will hold the symbol.
2305 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2306 uint64_t EntryID = 0;
2307 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2308 if (BF && EntryID == 0)
2309 return BF;
2310 }
2311 return nullptr;
2312 }
2313
translateModuleAddressRanges(const DWARFAddressRangesVector & InputRanges) const2314 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2315 const DWARFAddressRangesVector &InputRanges) const {
2316 DebugAddressRangesVector OutputRanges;
2317
2318 for (const DWARFAddressRange Range : InputRanges) {
2319 auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2320 while (BFI != BinaryFunctions.end()) {
2321 const BinaryFunction &Function = BFI->second;
2322 if (Function.getAddress() >= Range.HighPC)
2323 break;
2324 const DebugAddressRangesVector FunctionRanges =
2325 Function.getOutputAddressRanges();
2326 llvm::move(FunctionRanges, std::back_inserter(OutputRanges));
2327 std::advance(BFI, 1);
2328 }
2329 }
2330
2331 return OutputRanges;
2332 }
2333
2334 } // namespace bolt
2335 } // namespace llvm
2336