1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/NameResolver.h"
18 #include "bolt/Utils/Utils.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/MC/MCAsmLayout.h"
24 #include "llvm/MC/MCAssembler.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
27 #include "llvm/MC/MCInstPrinter.h"
28 #include "llvm/MC/MCObjectStreamer.h"
29 #include "llvm/MC/MCObjectWriter.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSectionELF.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/Regex.h"
38 #include <algorithm>
39 #include <functional>
40 #include <iterator>
41 #include <unordered_set>
42 
43 using namespace llvm;
44 
45 #undef  DEBUG_TYPE
46 #define DEBUG_TYPE "bolt"
47 
48 namespace opts {
49 
50 cl::opt<bool> NoHugePages("no-huge-pages",
51                           cl::desc("use regular size pages for code alignment"),
52                           cl::Hidden, cl::cat(BoltCategory));
53 
54 static cl::opt<bool>
55 PrintDebugInfo("print-debug-info",
56   cl::desc("print debug info when printing functions"),
57   cl::Hidden,
58   cl::ZeroOrMore,
59   cl::cat(BoltCategory));
60 
61 cl::opt<bool> PrintRelocations(
62     "print-relocations",
63     cl::desc("print relocations when printing functions/objects"), cl::Hidden,
64     cl::cat(BoltCategory));
65 
66 static cl::opt<bool>
67 PrintMemData("print-mem-data",
68   cl::desc("print memory data annotations when printing functions"),
69   cl::Hidden,
70   cl::ZeroOrMore,
71   cl::cat(BoltCategory));
72 
73 } // namespace opts
74 
75 namespace llvm {
76 namespace bolt {
77 
78 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
79                              std::unique_ptr<DWARFContext> DwCtx,
80                              std::unique_ptr<Triple> TheTriple,
81                              const Target *TheTarget, std::string TripleName,
82                              std::unique_ptr<MCCodeEmitter> MCE,
83                              std::unique_ptr<MCObjectFileInfo> MOFI,
84                              std::unique_ptr<const MCAsmInfo> AsmInfo,
85                              std::unique_ptr<const MCInstrInfo> MII,
86                              std::unique_ptr<const MCSubtargetInfo> STI,
87                              std::unique_ptr<MCInstPrinter> InstPrinter,
88                              std::unique_ptr<const MCInstrAnalysis> MIA,
89                              std::unique_ptr<MCPlusBuilder> MIB,
90                              std::unique_ptr<const MCRegisterInfo> MRI,
91                              std::unique_ptr<MCDisassembler> DisAsm)
92     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
93       TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
94       TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
95       AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
96       InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
97       MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) {
98   Relocation::Arch = this->TheTriple->getArch();
99   RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
100   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
101 }
102 
103 BinaryContext::~BinaryContext() {
104   for (BinarySection *Section : Sections)
105     delete Section;
106   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
107     delete InjectedFunction;
108   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
109     delete JTI.second;
110   clearBinaryData();
111 }
112 
113 /// Create BinaryContext for a given architecture \p ArchName and
114 /// triple \p TripleName.
115 Expected<std::unique_ptr<BinaryContext>>
116 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
117                                    std::unique_ptr<DWARFContext> DwCtx) {
118   StringRef ArchName = "";
119   StringRef FeaturesStr = "";
120   switch (File->getArch()) {
121   case llvm::Triple::x86_64:
122     ArchName = "x86-64";
123     FeaturesStr = "+nopl";
124     break;
125   case llvm::Triple::aarch64:
126     ArchName = "aarch64";
127     FeaturesStr = "+fp-armv8,+neon,+crypto,+dotprod,+crc,+lse,+ras,+rdm,"
128                   "+fullfp16,+spe,+fuse-aes,+rcpc";
129     break;
130   default:
131     return createStringError(std::errc::not_supported,
132                              "BOLT-ERROR: Unrecognized machine in ELF file");
133   }
134 
135   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
136   const std::string TripleName = TheTriple->str();
137 
138   std::string Error;
139   const Target *TheTarget =
140       TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
141   if (!TheTarget)
142     return createStringError(make_error_code(std::errc::not_supported),
143                              Twine("BOLT-ERROR: ", Error));
144 
145   std::unique_ptr<const MCRegisterInfo> MRI(
146       TheTarget->createMCRegInfo(TripleName));
147   if (!MRI)
148     return createStringError(
149         make_error_code(std::errc::not_supported),
150         Twine("BOLT-ERROR: no register info for target ", TripleName));
151 
152   // Set up disassembler.
153   std::unique_ptr<MCAsmInfo> AsmInfo(
154       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
155   if (!AsmInfo)
156     return createStringError(
157         make_error_code(std::errc::not_supported),
158         Twine("BOLT-ERROR: no assembly info for target ", TripleName));
159   // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
160   // we want to emit such names as using @PLT without double quotes to convey
161   // variant kind to the assembler. BOLT doesn't rely on the linker so we can
162   // override the default AsmInfo behavior to emit names the way we want.
163   AsmInfo->setAllowAtInName(true);
164 
165   std::unique_ptr<const MCSubtargetInfo> STI(
166       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
167   if (!STI)
168     return createStringError(
169         make_error_code(std::errc::not_supported),
170         Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
171 
172   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
173   if (!MII)
174     return createStringError(
175         make_error_code(std::errc::not_supported),
176         Twine("BOLT-ERROR: no instruction info for target ", TripleName));
177 
178   std::unique_ptr<MCContext> Ctx(
179       new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
180   std::unique_ptr<MCObjectFileInfo> MOFI(
181       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
182   Ctx->setObjectFileInfo(MOFI.get());
183   // We do not support X86 Large code model. Change this in the future.
184   bool Large = false;
185   if (TheTriple->getArch() == llvm::Triple::aarch64)
186     Large = true;
187   unsigned LSDAEncoding =
188       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
189   unsigned TTypeEncoding =
190       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
191   if (IsPIC) {
192     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
193                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
194     TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
195                     (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
196   }
197 
198   std::unique_ptr<MCDisassembler> DisAsm(
199       TheTarget->createMCDisassembler(*STI, *Ctx));
200 
201   if (!DisAsm)
202     return createStringError(
203         make_error_code(std::errc::not_supported),
204         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
205 
206   std::unique_ptr<const MCInstrAnalysis> MIA(
207       TheTarget->createMCInstrAnalysis(MII.get()));
208   if (!MIA)
209     return createStringError(
210         make_error_code(std::errc::not_supported),
211         Twine("BOLT-ERROR: failed to create instruction analysis for target ",
212               TripleName));
213 
214   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
215   std::unique_ptr<MCInstPrinter> InstructionPrinter(
216       TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
217                                      *MII, *MRI));
218   if (!InstructionPrinter)
219     return createStringError(
220         make_error_code(std::errc::not_supported),
221         Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
222   InstructionPrinter->setPrintImmHex(true);
223 
224   std::unique_ptr<MCCodeEmitter> MCE(
225       TheTarget->createMCCodeEmitter(*MII, *Ctx));
226 
227   // Make sure we don't miss any output on core dumps.
228   outs().SetUnbuffered();
229   errs().SetUnbuffered();
230   dbgs().SetUnbuffered();
231 
232   auto BC = std::make_unique<BinaryContext>(
233       std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
234       std::string(TripleName), std::move(MCE), std::move(MOFI),
235       std::move(AsmInfo), std::move(MII), std::move(STI),
236       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
237       std::move(DisAsm));
238 
239   BC->TTypeEncoding = TTypeEncoding;
240   BC->LSDAEncoding = LSDAEncoding;
241 
242   BC->MAB = std::unique_ptr<MCAsmBackend>(
243       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
244 
245   BC->setFilename(File->getFileName());
246 
247   BC->HasFixedLoadAddress = !IsPIC;
248 
249   BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
250       BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
251 
252   if (!BC->SymbolicDisAsm)
253     return createStringError(
254         make_error_code(std::errc::not_supported),
255         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
256 
257   return std::move(BC);
258 }
259 
260 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
261   if (opts::HotText &&
262       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
263     return true;
264 
265   if (opts::HotData &&
266       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
267     return true;
268 
269   if (SymbolName == "_end")
270     return true;
271 
272   return false;
273 }
274 
275 std::unique_ptr<MCObjectWriter>
276 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
277   return MAB->createObjectWriter(OS);
278 }
279 
280 bool BinaryContext::validateObjectNesting() const {
281   auto Itr = BinaryDataMap.begin();
282   auto End = BinaryDataMap.end();
283   bool Valid = true;
284   while (Itr != End) {
285     auto Next = std::next(Itr);
286     while (Next != End &&
287            Itr->second->getSection() == Next->second->getSection() &&
288            Itr->second->containsRange(Next->second->getAddress(),
289                                       Next->second->getSize())) {
290       if (Next->second->Parent != Itr->second) {
291         errs() << "BOLT-WARNING: object nesting incorrect for:\n"
292                << "BOLT-WARNING:  " << *Itr->second << "\n"
293                << "BOLT-WARNING:  " << *Next->second << "\n";
294         Valid = false;
295       }
296       ++Next;
297     }
298     Itr = Next;
299   }
300   return Valid;
301 }
302 
303 bool BinaryContext::validateHoles() const {
304   bool Valid = true;
305   for (BinarySection &Section : sections()) {
306     for (const Relocation &Rel : Section.relocations()) {
307       uint64_t RelAddr = Rel.Offset + Section.getAddress();
308       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
309       if (!BD) {
310         errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
311                << " 0x" << Twine::utohexstr(RelAddr) << " in "
312                << Section.getName() << "\n";
313         Valid = false;
314       } else if (!BD->getAtomicRoot()) {
315         errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
316                << "address 0x" << Twine::utohexstr(RelAddr) << " in "
317                << Section.getName() << "\n";
318         Valid = false;
319       }
320     }
321   }
322   return Valid;
323 }
324 
325 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
326   const uint64_t Address = GAI->second->getAddress();
327   const uint64_t Size = GAI->second->getSize();
328 
329   auto fixParents = [&](BinaryDataMapType::iterator Itr,
330                         BinaryData *NewParent) {
331     BinaryData *OldParent = Itr->second->Parent;
332     Itr->second->Parent = NewParent;
333     ++Itr;
334     while (Itr != BinaryDataMap.end() && OldParent &&
335            Itr->second->Parent == OldParent) {
336       Itr->second->Parent = NewParent;
337       ++Itr;
338     }
339   };
340 
341   // Check if the previous symbol contains the newly added symbol.
342   if (GAI != BinaryDataMap.begin()) {
343     BinaryData *Prev = std::prev(GAI)->second;
344     while (Prev) {
345       if (Prev->getSection() == GAI->second->getSection() &&
346           Prev->containsRange(Address, Size)) {
347         fixParents(GAI, Prev);
348       } else {
349         fixParents(GAI, nullptr);
350       }
351       Prev = Prev->Parent;
352     }
353   }
354 
355   // Check if the newly added symbol contains any subsequent symbols.
356   if (Size != 0) {
357     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
358     auto Itr = std::next(GAI);
359     while (
360         Itr != BinaryDataMap.end() &&
361         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
362       Itr->second->Parent = BD;
363       ++Itr;
364     }
365   }
366 }
367 
368 iterator_range<BinaryContext::binary_data_iterator>
369 BinaryContext::getSubBinaryData(BinaryData *BD) {
370   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
371   auto End = Start;
372   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
373     ++End;
374   return make_range(Start, End);
375 }
376 
377 std::pair<const MCSymbol *, uint64_t>
378 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
379                                 bool IsPCRel) {
380   uint64_t Addend = 0;
381 
382   if (isAArch64()) {
383     // Check if this is an access to a constant island and create bookkeeping
384     // to keep track of it and emit it later as part of this function.
385     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
386       return std::make_pair(IslandSym, Addend);
387 
388     // Detect custom code written in assembly that refers to arbitrary
389     // constant islands from other functions. Write this reference so we
390     // can pull this constant island and emit it as part of this function
391     // too.
392     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
393     if (IslandIter != AddressToConstantIslandMap.end()) {
394       if (MCSymbol *IslandSym =
395               IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) {
396         BF.createIslandDependency(IslandSym, IslandIter->second);
397         return std::make_pair(IslandSym, Addend);
398       }
399     }
400   }
401 
402   // Note that the address does not necessarily have to reside inside
403   // a section, it could be an absolute address too.
404   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
405   if (Section && Section->isText()) {
406     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
407       if (Address != BF.getAddress()) {
408         // The address could potentially escape. Mark it as another entry
409         // point into the function.
410         if (opts::Verbosity >= 1) {
411           outs() << "BOLT-INFO: potentially escaped address 0x"
412                  << Twine::utohexstr(Address) << " in function " << BF << '\n';
413         }
414         BF.HasInternalLabelReference = true;
415         return std::make_pair(
416             BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend);
417       }
418     } else {
419       BF.InterproceduralReferences.insert(Address);
420     }
421   }
422 
423   // With relocations, catch jump table references outside of the basic block
424   // containing the indirect jump.
425   if (HasRelocations) {
426     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
427     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
428       const MCSymbol *Symbol =
429           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
430 
431       return std::make_pair(Symbol, Addend);
432     }
433   }
434 
435   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
436     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
437 
438   // TODO: use DWARF info to get size/alignment here?
439   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
440   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
441   return std::make_pair(TargetSymbol, Addend);
442 }
443 
444 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
445                                                   BinaryFunction &BF) {
446   if (!isX86())
447     return MemoryContentsType::UNKNOWN;
448 
449   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
450   if (!Section) {
451     // No section - possibly an absolute address. Since we don't allow
452     // internal function addresses to escape the function scope - we
453     // consider it a tail call.
454     if (opts::Verbosity > 1) {
455       errs() << "BOLT-WARNING: no section for address 0x"
456              << Twine::utohexstr(Address) << " referenced from function " << BF
457              << '\n';
458     }
459     return MemoryContentsType::UNKNOWN;
460   }
461 
462   if (Section->isVirtual()) {
463     // The contents are filled at runtime.
464     return MemoryContentsType::UNKNOWN;
465   }
466 
467   // No support for jump tables in code yet.
468   if (Section->isText())
469     return MemoryContentsType::UNKNOWN;
470 
471   // Start with checking for PIC jump table. We expect non-PIC jump tables
472   // to have high 32 bits set to 0.
473   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
474     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
475 
476   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
477     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
478 
479   return MemoryContentsType::UNKNOWN;
480 }
481 
482 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)?
483 bool isPotentialFragmentByName(BinaryFunction &Fragment,
484                                BinaryFunction &Parent) {
485   for (StringRef Name : Parent.getNames()) {
486     std::string NamePrefix = Regex::escape(NameResolver::restore(Name));
487     std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str();
488     if (Fragment.hasRestoredNameRegex(NameRegex))
489       return true;
490   }
491   return false;
492 }
493 
494 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
495                                      const JumpTable::JumpTableType Type,
496                                      BinaryFunction &BF,
497                                      const uint64_t NextJTAddress,
498                                      JumpTable::OffsetsType *Offsets) {
499   // Is one of the targets __builtin_unreachable?
500   bool HasUnreachable = false;
501 
502   // Number of targets other than __builtin_unreachable.
503   uint64_t NumRealEntries = 0;
504 
505   constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max();
506   auto addOffset = [&](uint64_t Offset) {
507     if (Offsets)
508       Offsets->emplace_back(Offset);
509   };
510 
511   auto doesBelongToFunction = [&](const uint64_t Addr,
512                                   BinaryFunction *TargetBF) -> bool {
513     if (BF.containsAddress(Addr))
514       return true;
515     // Nothing to do if we failed to identify the containing function.
516     if (!TargetBF)
517       return false;
518     // Case 1: check if BF is a fragment and TargetBF is its parent.
519     if (BF.isFragment()) {
520       // Parent function may or may not be already registered.
521       // Set parent link based on function name matching heuristic.
522       return registerFragment(BF, *TargetBF);
523     }
524     // Case 2: check if TargetBF is a fragment and BF is its parent.
525     return TargetBF->isFragment() && registerFragment(*TargetBF, BF);
526   };
527 
528   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
529   if (!Section)
530     return false;
531 
532   // The upper bound is defined by containing object, section limits, and
533   // the next jump table in memory.
534   uint64_t UpperBound = Section->getEndAddress();
535   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
536   if (JumpTableBD && JumpTableBD->getSize()) {
537     assert(JumpTableBD->getEndAddress() <= UpperBound &&
538            "data object cannot cross a section boundary");
539     UpperBound = JumpTableBD->getEndAddress();
540   }
541   if (NextJTAddress)
542     UpperBound = std::min(NextJTAddress, UpperBound);
543 
544   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName()
545                     << '\n');
546   const uint64_t EntrySize = getJumpTableEntrySize(Type);
547   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
548        EntryAddress += EntrySize) {
549     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
550                       << " -> ");
551     // Check if there's a proper relocation against the jump table entry.
552     if (HasRelocations) {
553       if (Type == JumpTable::JTT_PIC &&
554           !DataPCRelocations.count(EntryAddress)) {
555         LLVM_DEBUG(
556             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
557         break;
558       }
559       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
560         LLVM_DEBUG(
561             dbgs()
562             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
563         break;
564       }
565     }
566 
567     const uint64_t Value =
568         (Type == JumpTable::JTT_PIC)
569             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
570             : *getPointerAtAddress(EntryAddress);
571 
572     // __builtin_unreachable() case.
573     if (Value == BF.getAddress() + BF.getSize()) {
574       addOffset(Value - BF.getAddress());
575       HasUnreachable = true;
576       LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n");
577       continue;
578     }
579 
580     // Function or one of its fragments.
581     BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
582 
583     // We assume that a jump table cannot have function start as an entry.
584     if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) {
585       LLVM_DEBUG({
586         if (!BF.containsAddress(Value)) {
587           dbgs() << "FAIL: function doesn't contain this address\n";
588           if (TargetBF) {
589             dbgs() << "  ! function containing this address: "
590                    << TargetBF->getPrintName() << '\n';
591             if (TargetBF->isFragment())
592               dbgs() << "  ! is a fragment\n";
593             for (BinaryFunction *TargetParent : TargetBF->ParentFragments)
594               dbgs() << "  ! its parent is "
595                      << (TargetParent ? TargetParent->getPrintName() : "(none)")
596                      << '\n';
597           }
598         }
599         if (Value == BF.getAddress())
600           dbgs() << "FAIL: jump table cannot have function start as an entry\n";
601       });
602       break;
603     }
604 
605     // Check there's an instruction at this offset.
606     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
607         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
608       LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n");
609       break;
610     }
611 
612     ++NumRealEntries;
613 
614     if (TargetBF == &BF) {
615       // Address inside the function.
616       addOffset(Value - TargetBF->getAddress());
617       LLVM_DEBUG(dbgs() << "OK: real entry\n");
618     } else {
619       // Address in split fragment.
620       BF.setHasSplitJumpTable(true);
621       // Add invalid offset for proper identification of jump table size.
622       addOffset(INVALID_OFFSET);
623       LLVM_DEBUG(dbgs() << "OK: address in split fragment "
624                         << TargetBF->getPrintName() << '\n');
625     }
626   }
627 
628   // It's a jump table if the number of real entries is more than 1, or there's
629   // one real entry and "unreachable" targets. If there are only multiple
630   // "unreachable" targets, then it's not a jump table.
631   return NumRealEntries + HasUnreachable >= 2;
632 }
633 
634 void BinaryContext::populateJumpTables() {
635   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
636                     << '\n');
637   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
638        ++JTI) {
639     JumpTable *JT = JTI->second;
640     BinaryFunction &BF = *JT->Parent;
641 
642     if (!BF.isSimple())
643       continue;
644 
645     uint64_t NextJTAddress = 0;
646     auto NextJTI = std::next(JTI);
647     if (NextJTI != JTE)
648       NextJTAddress = NextJTI->second->getAddress();
649 
650     const bool Success = analyzeJumpTable(JT->getAddress(), JT->Type, BF,
651                                           NextJTAddress, &JT->OffsetEntries);
652     if (!Success) {
653       dbgs() << "failed to analyze jump table in function " << BF << '\n';
654       JT->print(dbgs());
655       if (NextJTI != JTE) {
656         dbgs() << "next jump table at 0x"
657                << Twine::utohexstr(NextJTI->second->getAddress())
658                << " belongs to function " << *NextJTI->second->Parent << '\n';
659         NextJTI->second->print(dbgs());
660       }
661       llvm_unreachable("jump table heuristic failure");
662     }
663 
664     for (uint64_t EntryOffset : JT->OffsetEntries) {
665       if (EntryOffset == BF.getSize())
666         BF.IgnoredBranches.emplace_back(EntryOffset, BF.getSize());
667       else
668         BF.registerReferencedOffset(EntryOffset);
669     }
670 
671     // In strict mode, erase PC-relative relocation record. Later we check that
672     // all such records are erased and thus have been accounted for.
673     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
674       for (uint64_t Address = JT->getAddress();
675            Address < JT->getAddress() + JT->getSize();
676            Address += JT->EntrySize) {
677         DataPCRelocations.erase(DataPCRelocations.find(Address));
678       }
679     }
680 
681     // Mark to skip the function and all its fragments.
682     if (BF.hasSplitJumpTable())
683       FragmentsToSkip.push_back(&BF);
684   }
685 
686   if (opts::StrictMode && DataPCRelocations.size()) {
687     LLVM_DEBUG({
688       dbgs() << DataPCRelocations.size()
689              << " unclaimed PC-relative relocations left in data:\n";
690       for (uint64_t Reloc : DataPCRelocations)
691         dbgs() << Twine::utohexstr(Reloc) << '\n';
692     });
693     assert(0 && "unclaimed PC-relative relocations left in data\n");
694   }
695   clearList(DataPCRelocations);
696 }
697 
698 void BinaryContext::skipMarkedFragments() {
699   // Unique functions in the vector.
700   std::unordered_set<BinaryFunction *> UniqueFunctions(FragmentsToSkip.begin(),
701                                                        FragmentsToSkip.end());
702   // Copy the functions back to FragmentsToSkip.
703   FragmentsToSkip.assign(UniqueFunctions.begin(), UniqueFunctions.end());
704   auto addToWorklist = [&](BinaryFunction *Function) -> void {
705     if (UniqueFunctions.count(Function))
706       return;
707     FragmentsToSkip.push_back(Function);
708     UniqueFunctions.insert(Function);
709   };
710   // Functions containing split jump tables need to be skipped with all
711   // fragments (transitively).
712   for (size_t I = 0; I != FragmentsToSkip.size(); I++) {
713     BinaryFunction *BF = FragmentsToSkip[I];
714     assert(UniqueFunctions.count(BF) &&
715            "internal error in traversing function fragments");
716     if (opts::Verbosity >= 1)
717       errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
718     BF->setSimple(false);
719     BF->setHasSplitJumpTable(true);
720 
721     std::for_each(BF->Fragments.begin(), BF->Fragments.end(), addToWorklist);
722     std::for_each(BF->ParentFragments.begin(), BF->ParentFragments.end(),
723                   addToWorklist);
724   }
725   if (!FragmentsToSkip.empty())
726     errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function"
727            << (FragmentsToSkip.size() == 1 ? "" : "s")
728            << " due to cold fragments\n";
729   FragmentsToSkip.clear();
730 }
731 
732 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
733                                                  uint64_t Size,
734                                                  uint16_t Alignment,
735                                                  unsigned Flags) {
736   auto Itr = BinaryDataMap.find(Address);
737   if (Itr != BinaryDataMap.end()) {
738     assert(Itr->second->getSize() == Size || !Size);
739     return Itr->second->getSymbol();
740   }
741 
742   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
743   assert(!GlobalSymbols.count(Name) && "created name is not unique");
744   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
745 }
746 
747 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
748   return Ctx->getOrCreateSymbol(Name);
749 }
750 
751 BinaryFunction *BinaryContext::createBinaryFunction(
752     const std::string &Name, BinarySection &Section, uint64_t Address,
753     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
754   auto Result = BinaryFunctions.emplace(
755       Address, BinaryFunction(Name, Section, Address, Size, *this));
756   assert(Result.second == true && "unexpected duplicate function");
757   BinaryFunction *BF = &Result.first->second;
758   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
759                         Alignment);
760   setSymbolToFunctionMap(BF->getSymbol(), BF);
761   return BF;
762 }
763 
764 const MCSymbol *
765 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
766                                     JumpTable::JumpTableType Type) {
767   auto isFragmentOf = [](BinaryFunction *Fragment, BinaryFunction *Parent) {
768     return (Fragment->isFragment() && Fragment->isParentFragment(Parent));
769   };
770 
771   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
772     assert(JT->Type == Type && "jump table types have to match");
773     bool HasMultipleParents = isFragmentOf(JT->Parent, &Function) ||
774                               isFragmentOf(&Function, JT->Parent);
775     assert((JT->Parent == &Function || HasMultipleParents) &&
776            "cannot re-use jump table of a different function");
777     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
778 
779     // Flush OffsetEntries with INVALID_OFFSET if multiple parents
780     // Duplicate the entry for the parent function for easy access
781     if (HasMultipleParents) {
782       if (opts::Verbosity > 2) {
783         outs() << "BOLT-WARNING: Multiple fragments access same jump table: "
784                << JT->Parent->getPrintName() << "; " << Function.getPrintName()
785                << "\n";
786       }
787       constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max();
788       for (unsigned I = 0; I < JT->OffsetEntries.size(); ++I)
789         JT->OffsetEntries[I] = INVALID_OFFSET;
790       Function.JumpTables.emplace(Address, JT);
791     }
792     return JT->getFirstLabel();
793   }
794 
795   // Re-use the existing symbol if possible.
796   MCSymbol *JTLabel = nullptr;
797   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
798     if (!isInternalSymbolName(Object->getSymbol()->getName()))
799       JTLabel = Object->getSymbol();
800   }
801 
802   const uint64_t EntrySize = getJumpTableEntrySize(Type);
803   if (!JTLabel) {
804     const std::string JumpTableName = generateJumpTableName(Function, Address);
805     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
806   }
807 
808   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
809                     << " in function " << Function << '\n');
810 
811   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
812                                 JumpTable::LabelMapType{{0, JTLabel}}, Function,
813                                 *getSectionForAddress(Address));
814   JumpTables.emplace(Address, JT);
815 
816   // Duplicate the entry for the parent function for easy access.
817   Function.JumpTables.emplace(Address, JT);
818 
819   return JTLabel;
820 }
821 
822 std::pair<uint64_t, const MCSymbol *>
823 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
824                                   const MCSymbol *OldLabel) {
825   auto L = scopeLock();
826   unsigned Offset = 0;
827   bool Found = false;
828   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
829     if (Elmt.second != OldLabel)
830       continue;
831     Offset = Elmt.first;
832     Found = true;
833     break;
834   }
835   assert(Found && "Label not found");
836   (void)Found;
837   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
838   JumpTable *NewJT =
839       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
840                     JumpTable::LabelMapType{{Offset, NewLabel}}, Function,
841                     *getSectionForAddress(JT->getAddress()));
842   NewJT->Entries = JT->Entries;
843   NewJT->Counts = JT->Counts;
844   uint64_t JumpTableID = ++DuplicatedJumpTables;
845   // Invert it to differentiate from regular jump tables whose IDs are their
846   // addresses in the input binary memory space
847   JumpTableID = ~JumpTableID;
848   JumpTables.emplace(JumpTableID, NewJT);
849   Function.JumpTables.emplace(JumpTableID, NewJT);
850   return std::make_pair(JumpTableID, NewLabel);
851 }
852 
853 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
854                                                  uint64_t Address) {
855   size_t Id;
856   uint64_t Offset = 0;
857   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
858     Offset = Address - JT->getAddress();
859     auto Itr = JT->Labels.find(Offset);
860     if (Itr != JT->Labels.end())
861       return std::string(Itr->second->getName());
862     Id = JumpTableIds.at(JT->getAddress());
863   } else {
864     Id = JumpTableIds[Address] = BF.JumpTables.size();
865   }
866   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
867           (Offset ? ("." + std::to_string(Offset)) : ""));
868 }
869 
870 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
871   // FIXME: aarch64 support is missing.
872   if (!isX86())
873     return true;
874 
875   if (BF.getSize() == BF.getMaxSize())
876     return true;
877 
878   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
879   assert(FunctionData && "cannot get function as data");
880 
881   uint64_t Offset = BF.getSize();
882   MCInst Instr;
883   uint64_t InstrSize = 0;
884   uint64_t InstrAddress = BF.getAddress() + Offset;
885   using std::placeholders::_1;
886 
887   // Skip instructions that satisfy the predicate condition.
888   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
889     const uint64_t StartOffset = Offset;
890     for (; Offset < BF.getMaxSize();
891          Offset += InstrSize, InstrAddress += InstrSize) {
892       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
893                                   InstrAddress, nulls()))
894         break;
895       if (!Predicate(Instr))
896         break;
897     }
898 
899     return Offset - StartOffset;
900   };
901 
902   // Skip a sequence of zero bytes.
903   auto skipZeros = [&]() {
904     const uint64_t StartOffset = Offset;
905     for (; Offset < BF.getMaxSize(); ++Offset)
906       if ((*FunctionData)[Offset] != 0)
907         break;
908 
909     return Offset - StartOffset;
910   };
911 
912   // Accept the whole padding area filled with breakpoints.
913   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
914   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
915     return true;
916 
917   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
918 
919   // Some functions have a jump to the next function or to the padding area
920   // inserted after the body.
921   auto isSkipJump = [&](const MCInst &Instr) {
922     uint64_t TargetAddress = 0;
923     if (MIB->isUnconditionalBranch(Instr) &&
924         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
925       if (TargetAddress >= InstrAddress + InstrSize &&
926           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
927         return true;
928       }
929     }
930     return false;
931   };
932 
933   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
934   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
935          skipZeros())
936     ;
937 
938   if (Offset == BF.getMaxSize())
939     return true;
940 
941   if (opts::Verbosity >= 1) {
942     errs() << "BOLT-WARNING: bad padding at address 0x"
943            << Twine::utohexstr(BF.getAddress() + BF.getSize())
944            << " starting at offset " << (Offset - BF.getSize())
945            << " in function " << BF << '\n'
946            << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
947            << '\n';
948   }
949 
950   return false;
951 }
952 
953 void BinaryContext::adjustCodePadding() {
954   for (auto &BFI : BinaryFunctions) {
955     BinaryFunction &BF = BFI.second;
956     if (!shouldEmit(BF))
957       continue;
958 
959     if (!hasValidCodePadding(BF)) {
960       if (HasRelocations) {
961         if (opts::Verbosity >= 1) {
962           outs() << "BOLT-INFO: function " << BF
963                  << " has invalid padding. Ignoring the function.\n";
964         }
965         BF.setIgnored();
966       } else {
967         BF.setMaxSize(BF.getSize());
968       }
969     }
970   }
971 }
972 
973 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
974                                                uint64_t Size,
975                                                uint16_t Alignment,
976                                                unsigned Flags) {
977   // Register the name with MCContext.
978   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
979 
980   auto GAI = BinaryDataMap.find(Address);
981   BinaryData *BD;
982   if (GAI == BinaryDataMap.end()) {
983     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
984     BinarySection &Section =
985         SectionOrErr ? SectionOrErr.get() : absoluteSection();
986     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
987                         Section, Flags);
988     GAI = BinaryDataMap.emplace(Address, BD).first;
989     GlobalSymbols[Name] = BD;
990     updateObjectNesting(GAI);
991   } else {
992     BD = GAI->second;
993     if (!BD->hasName(Name)) {
994       GlobalSymbols[Name] = BD;
995       BD->Symbols.push_back(Symbol);
996     }
997   }
998 
999   return Symbol;
1000 }
1001 
1002 const BinaryData *
1003 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1004   auto NI = BinaryDataMap.lower_bound(Address);
1005   auto End = BinaryDataMap.end();
1006   if ((NI != End && Address == NI->first) ||
1007       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1008     if (NI->second->containsAddress(Address))
1009       return NI->second;
1010 
1011     // If this is a sub-symbol, see if a parent data contains the address.
1012     const BinaryData *BD = NI->second->getParent();
1013     while (BD) {
1014       if (BD->containsAddress(Address))
1015         return BD;
1016       BD = BD->getParent();
1017     }
1018   }
1019   return nullptr;
1020 }
1021 
1022 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1023   auto NI = BinaryDataMap.find(Address);
1024   assert(NI != BinaryDataMap.end());
1025   if (NI == BinaryDataMap.end())
1026     return false;
1027   // TODO: it's possible that a jump table starts at the same address
1028   // as a larger blob of private data.  When we set the size of the
1029   // jump table, it might be smaller than the total blob size.  In this
1030   // case we just leave the original size since (currently) it won't really
1031   // affect anything.
1032   assert((!NI->second->Size || NI->second->Size == Size ||
1033           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1034          "can't change the size of a symbol that has already had its "
1035          "size set");
1036   if (!NI->second->Size) {
1037     NI->second->Size = Size;
1038     updateObjectNesting(NI);
1039     return true;
1040   }
1041   return false;
1042 }
1043 
1044 void BinaryContext::generateSymbolHashes() {
1045   auto isPadding = [](const BinaryData &BD) {
1046     StringRef Contents = BD.getSection().getContents();
1047     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1048     return (BD.getName().startswith("HOLEat") ||
1049             SymData.find_first_not_of(0) == StringRef::npos);
1050   };
1051 
1052   uint64_t NumCollisions = 0;
1053   for (auto &Entry : BinaryDataMap) {
1054     BinaryData &BD = *Entry.second;
1055     StringRef Name = BD.getName();
1056 
1057     if (!isInternalSymbolName(Name))
1058       continue;
1059 
1060     // First check if a non-anonymous alias exists and move it to the front.
1061     if (BD.getSymbols().size() > 1) {
1062       auto Itr = std::find_if(BD.getSymbols().begin(), BD.getSymbols().end(),
1063                               [&](const MCSymbol *Symbol) {
1064                                 return !isInternalSymbolName(Symbol->getName());
1065                               });
1066       if (Itr != BD.getSymbols().end()) {
1067         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1068         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1069         continue;
1070       }
1071     }
1072 
1073     // We have to skip 0 size symbols since they will all collide.
1074     if (BD.getSize() == 0) {
1075       continue;
1076     }
1077 
1078     const uint64_t Hash = BD.getSection().hash(BD);
1079     const size_t Idx = Name.find("0x");
1080     std::string NewName =
1081         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1082     if (getBinaryDataByName(NewName)) {
1083       // Ignore collisions for symbols that appear to be padding
1084       // (i.e. all zeros or a "hole")
1085       if (!isPadding(BD)) {
1086         if (opts::Verbosity) {
1087           errs() << "BOLT-WARNING: collision detected when hashing " << BD
1088                  << " with new name (" << NewName << "), skipping.\n";
1089         }
1090         ++NumCollisions;
1091       }
1092       continue;
1093     }
1094     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1095     GlobalSymbols[NewName] = &BD;
1096   }
1097   if (NumCollisions) {
1098     errs() << "BOLT-WARNING: " << NumCollisions
1099            << " collisions detected while hashing binary objects";
1100     if (!opts::Verbosity)
1101       errs() << ". Use -v=1 to see the list.";
1102     errs() << '\n';
1103   }
1104 }
1105 
1106 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1107                                      BinaryFunction &Function) const {
1108   if (!isPotentialFragmentByName(TargetFunction, Function))
1109     return false;
1110   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1111   if (TargetFunction.isParentFragment(&Function))
1112     return true;
1113   TargetFunction.addParentFragment(Function);
1114   Function.addFragment(TargetFunction);
1115   if (!HasRelocations) {
1116     TargetFunction.setSimple(false);
1117     Function.setSimple(false);
1118   }
1119   if (opts::Verbosity >= 1) {
1120     outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of "
1121            << Function << '\n';
1122   }
1123   return true;
1124 }
1125 
1126 void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) {
1127   for (uint64_t Address : Function.InterproceduralReferences) {
1128     if (!Address)
1129       continue;
1130 
1131     BinaryFunction *TargetFunction =
1132         getBinaryFunctionContainingAddress(Address);
1133     if (&Function == TargetFunction)
1134       continue;
1135 
1136     if (TargetFunction) {
1137       if (TargetFunction->IsFragment &&
1138           !registerFragment(*TargetFunction, Function)) {
1139         errs() << "BOLT-WARNING: interprocedural reference between unrelated "
1140                   "fragments: "
1141                << Function.getPrintName() << " and "
1142                << TargetFunction->getPrintName() << '\n';
1143       }
1144       if (uint64_t Offset = Address - TargetFunction->getAddress())
1145         TargetFunction->addEntryPointAtOffset(Offset);
1146 
1147       continue;
1148     }
1149 
1150     // Check if address falls in function padding space - this could be
1151     // unmarked data in code. In this case adjust the padding space size.
1152     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1153     assert(Section && "cannot get section for referenced address");
1154 
1155     if (!Section->isText())
1156       continue;
1157 
1158     // PLT requires special handling and could be ignored in this context.
1159     StringRef SectionName = Section->getName();
1160     if (SectionName == ".plt" || SectionName == ".plt.got")
1161       continue;
1162 
1163     if (opts::processAllFunctions()) {
1164       errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1165              << "object in code at address 0x" << Twine::utohexstr(Address)
1166              << " belonging to section " << SectionName << " in current mode\n";
1167       exit(1);
1168     }
1169 
1170     TargetFunction = getBinaryFunctionContainingAddress(Address,
1171                                                         /*CheckPastEnd=*/false,
1172                                                         /*UseMaxSize=*/true);
1173     // We are not going to overwrite non-simple functions, but for simple
1174     // ones - adjust the padding size.
1175     if (TargetFunction && TargetFunction->isSimple()) {
1176       errs() << "BOLT-WARNING: function " << *TargetFunction
1177              << " has an object detected in a padding region at address 0x"
1178              << Twine::utohexstr(Address) << '\n';
1179       TargetFunction->setMaxSize(TargetFunction->getSize());
1180     }
1181   }
1182 
1183   clearList(Function.InterproceduralReferences);
1184 }
1185 
1186 void BinaryContext::postProcessSymbolTable() {
1187   fixBinaryDataHoles();
1188   bool Valid = true;
1189   for (auto &Entry : BinaryDataMap) {
1190     BinaryData *BD = Entry.second;
1191     if ((BD->getName().startswith("SYMBOLat") ||
1192          BD->getName().startswith("DATAat")) &&
1193         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1194         BD->getSection()) {
1195       errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
1196       Valid = false;
1197     }
1198   }
1199   assert(Valid);
1200   (void)Valid;
1201   generateSymbolHashes();
1202 }
1203 
1204 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1205                                  BinaryFunction &ParentBF) {
1206   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1207          "cannot merge functions with multiple entry points");
1208 
1209   std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex,
1210                                                          std::defer_lock);
1211   std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock(
1212       SymbolToFunctionMapMutex, std::defer_lock);
1213 
1214   const StringRef ChildName = ChildBF.getOneName();
1215 
1216   // Move symbols over and update bookkeeping info.
1217   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1218     ParentBF.getSymbols().push_back(Symbol);
1219     WriteSymbolMapLock.lock();
1220     SymbolToFunctionMap[Symbol] = &ParentBF;
1221     WriteSymbolMapLock.unlock();
1222     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1223   }
1224   ChildBF.getSymbols().clear();
1225 
1226   // Move other names the child function is known under.
1227   std::move(ChildBF.Aliases.begin(), ChildBF.Aliases.end(),
1228             std::back_inserter(ParentBF.Aliases));
1229   ChildBF.Aliases.clear();
1230 
1231   if (HasRelocations) {
1232     // Merge execution counts of ChildBF into those of ParentBF.
1233     // Without relocations, we cannot reliably merge profiles as both functions
1234     // continue to exist and either one can be executed.
1235     ChildBF.mergeProfileDataInto(ParentBF);
1236 
1237     std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex,
1238                                                           std::defer_lock);
1239     std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex,
1240                                                            std::defer_lock);
1241     // Remove ChildBF from the global set of functions in relocs mode.
1242     ReadBfsLock.lock();
1243     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1244     ReadBfsLock.unlock();
1245 
1246     assert(FI != BinaryFunctions.end() && "function not found");
1247     assert(&ChildBF == &FI->second && "function mismatch");
1248 
1249     WriteBfsLock.lock();
1250     ChildBF.clearDisasmState();
1251     FI = BinaryFunctions.erase(FI);
1252     WriteBfsLock.unlock();
1253 
1254   } else {
1255     // In non-relocation mode we keep the function, but rename it.
1256     std::string NewName = "__ICF_" + ChildName.str();
1257 
1258     WriteCtxLock.lock();
1259     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1260     WriteCtxLock.unlock();
1261 
1262     ChildBF.setFolded(&ParentBF);
1263   }
1264 }
1265 
1266 void BinaryContext::fixBinaryDataHoles() {
1267   assert(validateObjectNesting() && "object nesting inconsitency detected");
1268 
1269   for (BinarySection &Section : allocatableSections()) {
1270     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1271 
1272     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1273       BinaryData *BD = Itr->second;
1274       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1275                      (BD->getName().startswith("SYMBOLat0x") ||
1276                       BD->getName().startswith("DATAat0x") ||
1277                       BD->getName().startswith("ANONYMOUS")));
1278       return !isHole && BD->getSection() == Section && !BD->getParent();
1279     };
1280 
1281     auto BDStart = BinaryDataMap.begin();
1282     auto BDEnd = BinaryDataMap.end();
1283     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1284     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1285 
1286     uint64_t EndAddress = Section.getAddress();
1287 
1288     while (Itr != End) {
1289       if (Itr->second->getAddress() > EndAddress) {
1290         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1291         Holes.emplace_back(EndAddress, Gap);
1292       }
1293       EndAddress = Itr->second->getEndAddress();
1294       ++Itr;
1295     }
1296 
1297     if (EndAddress < Section.getEndAddress())
1298       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1299 
1300     // If there is already a symbol at the start of the hole, grow that symbol
1301     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1302     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1303       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1304       if (BD) {
1305         // BD->getSection() can be != Section if there are sections that
1306         // overlap.  In this case it is probably safe to just skip the holes
1307         // since the overlapping section will not(?) have any symbols in it.
1308         if (BD->getSection() == Section)
1309           setBinaryDataSize(Hole.first, Hole.second);
1310       } else {
1311         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1312       }
1313     }
1314   }
1315 
1316   assert(validateObjectNesting() && "object nesting inconsitency detected");
1317   assert(validateHoles() && "top level hole detected in object map");
1318 }
1319 
1320 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1321   const BinarySection *CurrentSection = nullptr;
1322   bool FirstSection = true;
1323 
1324   for (auto &Entry : BinaryDataMap) {
1325     const BinaryData *BD = Entry.second;
1326     const BinarySection &Section = BD->getSection();
1327     if (FirstSection || Section != *CurrentSection) {
1328       uint64_t Address, Size;
1329       StringRef Name = Section.getName();
1330       if (Section) {
1331         Address = Section.getAddress();
1332         Size = Section.getSize();
1333       } else {
1334         Address = BD->getAddress();
1335         Size = BD->getSize();
1336       }
1337       OS << "BOLT-INFO: Section " << Name << ", "
1338          << "0x" + Twine::utohexstr(Address) << ":"
1339          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1340       CurrentSection = &Section;
1341       FirstSection = false;
1342     }
1343 
1344     OS << "BOLT-INFO: ";
1345     const BinaryData *P = BD->getParent();
1346     while (P) {
1347       OS << "  ";
1348       P = P->getParent();
1349     }
1350     OS << *BD << "\n";
1351   }
1352 }
1353 
1354 Expected<unsigned> BinaryContext::getDwarfFile(
1355     StringRef Directory, StringRef FileName, unsigned FileNumber,
1356     Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source,
1357     unsigned CUID, unsigned DWARFVersion) {
1358   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1359   return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1360                           FileNumber);
1361 }
1362 
1363 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1364                                                const uint32_t SrcCUID,
1365                                                unsigned FileIndex) {
1366   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1367   const DWARFDebugLine::LineTable *LineTable =
1368       DwCtx->getLineTableForUnit(SrcUnit);
1369   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1370       LineTable->Prologue.FileNames;
1371   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1372   // means empty dir.
1373   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1374          "FileIndex out of range for the compilation unit.");
1375   StringRef Dir = "";
1376   if (FileNames[FileIndex - 1].DirIdx != 0) {
1377     if (Optional<const char *> DirName = dwarf::toString(
1378             LineTable->Prologue
1379                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1380       Dir = *DirName;
1381     }
1382   }
1383   StringRef FileName = "";
1384   if (Optional<const char *> FName =
1385           dwarf::toString(FileNames[FileIndex - 1].Name))
1386     FileName = *FName;
1387   assert(FileName != "");
1388   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1389   return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID,
1390                                DstUnit->getVersion()));
1391 }
1392 
1393 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1394   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1395   std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
1396                  SortedFunctions.begin(),
1397                  [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1398                    return &BFI.second;
1399                  });
1400 
1401   std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(),
1402                    [](const BinaryFunction *A, const BinaryFunction *B) {
1403                      if (A->hasValidIndex() && B->hasValidIndex()) {
1404                        return A->getIndex() < B->getIndex();
1405                      }
1406                      return A->hasValidIndex();
1407                    });
1408   return SortedFunctions;
1409 }
1410 
1411 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1412   std::vector<BinaryFunction *> AllFunctions;
1413   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1414   std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
1415                  std::back_inserter(AllFunctions),
1416                  [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1417                    return &BFI.second;
1418                  });
1419   std::copy(InjectedBinaryFunctions.begin(), InjectedBinaryFunctions.end(),
1420             std::back_inserter(AllFunctions));
1421 
1422   return AllFunctions;
1423 }
1424 
1425 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1426   auto Iter = DWOCUs.find(DWOId);
1427   if (Iter == DWOCUs.end())
1428     return None;
1429 
1430   return Iter->second;
1431 }
1432 
1433 DWARFContext *BinaryContext::getDWOContext() const {
1434   if (DWOCUs.empty())
1435     return nullptr;
1436   return &DWOCUs.begin()->second->getContext();
1437 }
1438 
1439 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1440 void BinaryContext::preprocessDWODebugInfo() {
1441   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1442     DWARFUnit *const DwarfUnit = CU.get();
1443     if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1444       DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
1445       if (!DWOCU->isDWOUnit()) {
1446         std::string DWOName = dwarf::toString(
1447             DwarfUnit->getUnitDIE().find(
1448                 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1449             "");
1450         outs() << "BOLT-WARNING: Debug Fission: DWO debug information for "
1451                << DWOName
1452                << " was not retrieved and won't be updated. Please check "
1453                   "relative path.\n";
1454         continue;
1455       }
1456       DWOCUs[*DWOId] = DWOCU;
1457     }
1458   }
1459 }
1460 
1461 void BinaryContext::preprocessDebugInfo() {
1462   struct CURange {
1463     uint64_t LowPC;
1464     uint64_t HighPC;
1465     DWARFUnit *Unit;
1466 
1467     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1468   };
1469 
1470   // Building a map of address ranges to CUs similar to .debug_aranges and use
1471   // it to assign CU to functions.
1472   std::vector<CURange> AllRanges;
1473   AllRanges.reserve(DwCtx->getNumCompileUnits());
1474   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1475     Expected<DWARFAddressRangesVector> RangesOrError =
1476         CU->getUnitDIE().getAddressRanges();
1477     if (!RangesOrError) {
1478       consumeError(RangesOrError.takeError());
1479       continue;
1480     }
1481     for (DWARFAddressRange &Range : *RangesOrError) {
1482       // Parts of the debug info could be invalidated due to corresponding code
1483       // being removed from the binary by the linker. Hence we check if the
1484       // address is a valid one.
1485       if (containsAddress(Range.LowPC))
1486         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1487     }
1488 
1489     ContainsDwarf5 |= CU->getVersion() >= 5;
1490     ContainsDwarfLegacy |= CU->getVersion() < 5;
1491   }
1492 
1493   if (ContainsDwarf5 && ContainsDwarfLegacy)
1494     llvm::errs() << "BOLT-WARNING: BOLT does not support mix mode binary with "
1495                     "DWARF5 and DWARF{2,3,4}.\n";
1496 
1497   std::sort(AllRanges.begin(), AllRanges.end());
1498   for (auto &KV : BinaryFunctions) {
1499     const uint64_t FunctionAddress = KV.first;
1500     BinaryFunction &Function = KV.second;
1501 
1502     auto It = std::partition_point(
1503         AllRanges.begin(), AllRanges.end(),
1504         [=](CURange R) { return R.HighPC <= FunctionAddress; });
1505     if (It != AllRanges.end() && It->LowPC <= FunctionAddress) {
1506       Function.setDWARFUnit(It->Unit);
1507     }
1508   }
1509 
1510   // Discover units with debug info that needs to be updated.
1511   for (const auto &KV : BinaryFunctions) {
1512     const BinaryFunction &BF = KV.second;
1513     if (shouldEmit(BF) && BF.getDWARFUnit())
1514       ProcessedCUs.insert(BF.getDWARFUnit());
1515   }
1516 
1517   // Clear debug info for functions from units that we are not going to process.
1518   for (auto &KV : BinaryFunctions) {
1519     BinaryFunction &BF = KV.second;
1520     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1521       BF.setDWARFUnit(nullptr);
1522   }
1523 
1524   if (opts::Verbosity >= 1) {
1525     outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1526            << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1527   }
1528 
1529   preprocessDWODebugInfo();
1530 
1531   // Populate MCContext with DWARF files from all units.
1532   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1533   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1534     const uint64_t CUID = CU->getOffset();
1535     DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1536     BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1537         GlobalPrefix + "line_table_start" + Twine(CUID)));
1538 
1539     if (!ProcessedCUs.count(CU.get()))
1540       continue;
1541 
1542     const DWARFDebugLine::LineTable *LineTable =
1543         DwCtx->getLineTableForUnit(CU.get());
1544     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1545         LineTable->Prologue.FileNames;
1546 
1547     uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1548     if (DwarfVersion >= 5) {
1549       Optional<MD5::MD5Result> Checksum = None;
1550       if (LineTable->Prologue.ContentTypes.HasMD5)
1551         Checksum = LineTable->Prologue.FileNames[0].Checksum;
1552       Optional<const char *> Name =
1553           dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1554       if (Optional<uint64_t> DWOID = CU->getDWOId()) {
1555         auto Iter = DWOCUs.find(*DWOID);
1556         assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1557         Name = dwarf::toString(
1558             Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1559       }
1560       BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1561                                   None);
1562     }
1563 
1564     BinaryLineTable.setDwarfVersion(DwarfVersion);
1565 
1566     // Assign a unique label to every line table, one per CU.
1567     // Make sure empty debug line tables are registered too.
1568     if (FileNames.empty()) {
1569       cantFail(
1570           getDwarfFile("", "<unknown>", 0, None, None, CUID, DwarfVersion));
1571       continue;
1572     }
1573     const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1574     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1575       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1576       // means empty dir.
1577       StringRef Dir = "";
1578       if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1579         if (Optional<const char *> DirName = dwarf::toString(
1580                 LineTable->Prologue
1581                     .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1582           Dir = *DirName;
1583       StringRef FileName = "";
1584       if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name))
1585         FileName = *FName;
1586       assert(FileName != "");
1587       Optional<MD5::MD5Result> Checksum = None;
1588       if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1589         Checksum = LineTable->Prologue.FileNames[I].Checksum;
1590       cantFail(
1591           getDwarfFile(Dir, FileName, 0, Checksum, None, CUID, DwarfVersion));
1592     }
1593   }
1594 }
1595 
1596 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1597   if (Function.isPseudo())
1598     return false;
1599 
1600   if (opts::processAllFunctions())
1601     return true;
1602 
1603   if (Function.isIgnored())
1604     return false;
1605 
1606   // In relocation mode we will emit non-simple functions with CFG.
1607   // If the function does not have a CFG it should be marked as ignored.
1608   return HasRelocations || Function.isSimple();
1609 }
1610 
1611 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1612   uint32_t Operation = Inst.getOperation();
1613   switch (Operation) {
1614   case MCCFIInstruction::OpSameValue:
1615     OS << "OpSameValue Reg" << Inst.getRegister();
1616     break;
1617   case MCCFIInstruction::OpRememberState:
1618     OS << "OpRememberState";
1619     break;
1620   case MCCFIInstruction::OpRestoreState:
1621     OS << "OpRestoreState";
1622     break;
1623   case MCCFIInstruction::OpOffset:
1624     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1625     break;
1626   case MCCFIInstruction::OpDefCfaRegister:
1627     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1628     break;
1629   case MCCFIInstruction::OpDefCfaOffset:
1630     OS << "OpDefCfaOffset " << Inst.getOffset();
1631     break;
1632   case MCCFIInstruction::OpDefCfa:
1633     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1634     break;
1635   case MCCFIInstruction::OpRelOffset:
1636     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1637     break;
1638   case MCCFIInstruction::OpAdjustCfaOffset:
1639     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1640     break;
1641   case MCCFIInstruction::OpEscape:
1642     OS << "OpEscape";
1643     break;
1644   case MCCFIInstruction::OpRestore:
1645     OS << "OpRestore Reg" << Inst.getRegister();
1646     break;
1647   case MCCFIInstruction::OpUndefined:
1648     OS << "OpUndefined Reg" << Inst.getRegister();
1649     break;
1650   case MCCFIInstruction::OpRegister:
1651     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1652        << Inst.getRegister2();
1653     break;
1654   case MCCFIInstruction::OpWindowSave:
1655     OS << "OpWindowSave";
1656     break;
1657   case MCCFIInstruction::OpGnuArgsSize:
1658     OS << "OpGnuArgsSize";
1659     break;
1660   default:
1661     OS << "Op#" << Operation;
1662     break;
1663   }
1664 }
1665 
1666 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1667   // For aarch64, the ABI defines mapping symbols so we identify data in the
1668   // code section (see IHI0056B). $x identifies a symbol starting code or the
1669   // end of a data chunk inside code, $d indentifies start of data.
1670   if (!isAArch64() || ELFSymbolRef(Symbol).getSize())
1671     return MarkerSymType::NONE;
1672 
1673   Expected<StringRef> NameOrError = Symbol.getName();
1674   Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1675 
1676   if (!TypeOrError || !NameOrError)
1677     return MarkerSymType::NONE;
1678 
1679   if (*TypeOrError != SymbolRef::ST_Unknown)
1680     return MarkerSymType::NONE;
1681 
1682   if (*NameOrError == "$x" || NameOrError->startswith("$x."))
1683     return MarkerSymType::CODE;
1684 
1685   if (*NameOrError == "$d" || NameOrError->startswith("$d."))
1686     return MarkerSymType::DATA;
1687 
1688   return MarkerSymType::NONE;
1689 }
1690 
1691 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1692   return getMarkerType(Symbol) != MarkerSymType::NONE;
1693 }
1694 
1695 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1696                            const BinaryFunction *Function,
1697                            DWARFContext *DwCtx) {
1698   DebugLineTableRowRef RowRef =
1699       DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1700   if (RowRef == DebugLineTableRowRef::NULL_ROW)
1701     return;
1702 
1703   const DWARFDebugLine::LineTable *LineTable;
1704   if (Function && Function->getDWARFUnit() &&
1705       Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1706     LineTable = Function->getDWARFLineTable();
1707   } else {
1708     LineTable = DwCtx->getLineTableForUnit(
1709         DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1710   }
1711   assert(LineTable && "line table expected for instruction with debug info");
1712 
1713   const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1714   StringRef FileName = "";
1715   if (Optional<const char *> FName =
1716           dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1717     FileName = *FName;
1718   OS << " # debug line " << FileName << ":" << Row.Line;
1719   if (Row.Column)
1720     OS << ":" << Row.Column;
1721   if (Row.Discriminator)
1722     OS << " discriminator:" << Row.Discriminator;
1723 }
1724 
1725 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1726                                      uint64_t Offset,
1727                                      const BinaryFunction *Function,
1728                                      bool PrintMCInst, bool PrintMemData,
1729                                      bool PrintRelocations,
1730                                      StringRef Endl) const {
1731   if (MIB->isEHLabel(Instruction)) {
1732     OS << "  EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl;
1733     return;
1734   }
1735   OS << format("    %08" PRIx64 ": ", Offset);
1736   if (MIB->isCFI(Instruction)) {
1737     uint32_t Offset = Instruction.getOperand(0).getImm();
1738     OS << "\t!CFI\t$" << Offset << "\t; ";
1739     if (Function)
1740       printCFI(OS, *Function->getCFIFor(Instruction));
1741     OS << Endl;
1742     return;
1743   }
1744   InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1745   if (MIB->isCall(Instruction)) {
1746     if (MIB->isTailCall(Instruction))
1747       OS << " # TAILCALL ";
1748     if (MIB->isInvoke(Instruction)) {
1749       const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction);
1750       OS << " # handler: ";
1751       if (EHInfo->first)
1752         OS << *EHInfo->first;
1753       else
1754         OS << '0';
1755       OS << "; action: " << EHInfo->second;
1756       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1757       if (GnuArgsSize >= 0)
1758         OS << "; GNU_args_size = " << GnuArgsSize;
1759     }
1760   } else if (MIB->isIndirectBranch(Instruction)) {
1761     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1762       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1763     } else {
1764       OS << " # UNKNOWN CONTROL FLOW";
1765     }
1766   }
1767   if (Optional<uint32_t> Offset = MIB->getOffset(Instruction))
1768     OS << " # Offset: " << *Offset;
1769 
1770   MIB->printAnnotations(Instruction, OS);
1771 
1772   if (opts::PrintDebugInfo)
1773     printDebugInfo(OS, Instruction, Function, DwCtx.get());
1774 
1775   if ((opts::PrintRelocations || PrintRelocations) && Function) {
1776     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
1777     Function->printRelocations(OS, Offset, Size);
1778   }
1779 
1780   OS << Endl;
1781 
1782   if (PrintMCInst) {
1783     Instruction.dump_pretty(OS, InstPrinter.get());
1784     OS << Endl;
1785   }
1786 }
1787 
1788 Optional<uint64_t>
1789 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
1790                                         uint64_t FileOffset) const {
1791   // Find a segment with a matching file offset.
1792   for (auto &KV : SegmentMapInfo) {
1793     const SegmentInfo &SegInfo = KV.second;
1794     if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) {
1795       // Use segment's aligned memory offset to calculate the base address.
1796       const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment);
1797       return MMapAddress - MemOffset;
1798     }
1799   }
1800 
1801   return NoneType();
1802 }
1803 
1804 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
1805   auto SI = AddressToSection.upper_bound(Address);
1806   if (SI != AddressToSection.begin()) {
1807     --SI;
1808     uint64_t UpperBound = SI->first + SI->second->getSize();
1809     if (!SI->second->getSize())
1810       UpperBound += 1;
1811     if (UpperBound > Address)
1812       return *SI->second;
1813   }
1814   return std::make_error_code(std::errc::bad_address);
1815 }
1816 
1817 ErrorOr<StringRef>
1818 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
1819   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
1820     return Section->getName();
1821   return std::make_error_code(std::errc::bad_address);
1822 }
1823 
1824 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
1825   auto Res = Sections.insert(Section);
1826   (void)Res;
1827   assert(Res.second && "can't register the same section twice.");
1828 
1829   // Only register allocatable sections in the AddressToSection map.
1830   if (Section->isAllocatable() && Section->getAddress())
1831     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
1832   NameToSection.insert(
1833       std::make_pair(std::string(Section->getName()), Section));
1834   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
1835   return *Section;
1836 }
1837 
1838 BinarySection &BinaryContext::registerSection(SectionRef Section) {
1839   return registerSection(new BinarySection(*this, Section));
1840 }
1841 
1842 BinarySection &
1843 BinaryContext::registerSection(StringRef SectionName,
1844                                const BinarySection &OriginalSection) {
1845   return registerSection(
1846       new BinarySection(*this, SectionName, OriginalSection));
1847 }
1848 
1849 BinarySection &
1850 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType,
1851                                        unsigned ELFFlags, uint8_t *Data,
1852                                        uint64_t Size, unsigned Alignment) {
1853   auto NamedSections = getSectionByName(Name);
1854   if (NamedSections.begin() != NamedSections.end()) {
1855     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
1856            "can only update unique sections");
1857     BinarySection *Section = NamedSections.begin()->second;
1858 
1859     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
1860     const bool Flag = Section->isAllocatable();
1861     (void)Flag;
1862     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
1863     LLVM_DEBUG(dbgs() << *Section << "\n");
1864     // FIXME: Fix section flags/attributes for MachO.
1865     if (isELF())
1866       assert(Flag == Section->isAllocatable() &&
1867              "can't change section allocation status");
1868     return *Section;
1869   }
1870 
1871   return registerSection(
1872       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
1873 }
1874 
1875 bool BinaryContext::deregisterSection(BinarySection &Section) {
1876   BinarySection *SectionPtr = &Section;
1877   auto Itr = Sections.find(SectionPtr);
1878   if (Itr != Sections.end()) {
1879     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
1880     while (Range.first != Range.second) {
1881       if (Range.first->second == SectionPtr) {
1882         AddressToSection.erase(Range.first);
1883         break;
1884       }
1885       ++Range.first;
1886     }
1887 
1888     auto NameRange =
1889         NameToSection.equal_range(std::string(SectionPtr->getName()));
1890     while (NameRange.first != NameRange.second) {
1891       if (NameRange.first->second == SectionPtr) {
1892         NameToSection.erase(NameRange.first);
1893         break;
1894       }
1895       ++NameRange.first;
1896     }
1897 
1898     Sections.erase(Itr);
1899     delete SectionPtr;
1900     return true;
1901   }
1902   return false;
1903 }
1904 
1905 void BinaryContext::printSections(raw_ostream &OS) const {
1906   for (BinarySection *const &Section : Sections)
1907     OS << "BOLT-INFO: " << *Section << "\n";
1908 }
1909 
1910 BinarySection &BinaryContext::absoluteSection() {
1911   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
1912     return *Section;
1913   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
1914 }
1915 
1916 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
1917                                                            size_t Size) const {
1918   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
1919   if (!Section)
1920     return std::make_error_code(std::errc::bad_address);
1921 
1922   if (Section->isVirtual())
1923     return 0;
1924 
1925   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
1926                    AsmInfo->getCodePointerSize());
1927   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
1928   return DE.getUnsigned(&ValueOffset, Size);
1929 }
1930 
1931 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
1932                                                          size_t Size) const {
1933   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
1934   if (!Section)
1935     return std::make_error_code(std::errc::bad_address);
1936 
1937   if (Section->isVirtual())
1938     return 0;
1939 
1940   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
1941                    AsmInfo->getCodePointerSize());
1942   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
1943   return DE.getSigned(&ValueOffset, Size);
1944 }
1945 
1946 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
1947                                   uint64_t Type, uint64_t Addend,
1948                                   uint64_t Value) {
1949   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1950   assert(Section && "cannot find section for address");
1951   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
1952                          Value);
1953 }
1954 
1955 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
1956                                          uint64_t Type, uint64_t Addend,
1957                                          uint64_t Value) {
1958   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1959   assert(Section && "cannot find section for address");
1960   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
1961                                 Addend, Value);
1962 }
1963 
1964 bool BinaryContext::removeRelocationAt(uint64_t Address) {
1965   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1966   assert(Section && "cannot find section for address");
1967   return Section->removeRelocationAt(Address - Section->getAddress());
1968 }
1969 
1970 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) {
1971   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1972   if (!Section)
1973     return nullptr;
1974 
1975   return Section->getRelocationAt(Address - Section->getAddress());
1976 }
1977 
1978 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) {
1979   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1980   if (!Section)
1981     return nullptr;
1982 
1983   return Section->getDynamicRelocationAt(Address - Section->getAddress());
1984 }
1985 
1986 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
1987                                              const uint64_t Address) {
1988   auto setImmovable = [&](BinaryData &BD) {
1989     BinaryData *Root = BD.getAtomicRoot();
1990     LLVM_DEBUG(if (Root->isMoveable()) {
1991       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
1992              << "due to ambiguous relocation referencing 0x"
1993              << Twine::utohexstr(Address) << '\n';
1994     });
1995     Root->setIsMoveable(false);
1996   };
1997 
1998   if (Address == BD.getAddress()) {
1999     setImmovable(BD);
2000 
2001     // Set previous symbol as immovable
2002     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
2003     if (Prev && Prev->getEndAddress() == BD.getAddress())
2004       setImmovable(*Prev);
2005   }
2006 
2007   if (Address == BD.getEndAddress()) {
2008     setImmovable(BD);
2009 
2010     // Set next symbol as immovable
2011     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
2012     if (Next && Next->getAddress() == BD.getEndAddress())
2013       setImmovable(*Next);
2014   }
2015 }
2016 
2017 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2018                                                     uint64_t *EntryDesc) {
2019   std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex);
2020   auto BFI = SymbolToFunctionMap.find(Symbol);
2021   if (BFI == SymbolToFunctionMap.end())
2022     return nullptr;
2023 
2024   BinaryFunction *BF = BFI->second;
2025   if (EntryDesc)
2026     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2027 
2028   return BF;
2029 }
2030 
2031 void BinaryContext::exitWithBugReport(StringRef Message,
2032                                       const BinaryFunction &Function) const {
2033   errs() << "=======================================\n";
2034   errs() << "BOLT is unable to proceed because it couldn't properly understand "
2035             "this function.\n";
2036   errs() << "If you are running the most recent version of BOLT, you may "
2037             "want to "
2038             "report this and paste this dump.\nPlease check that there is no "
2039             "sensitive contents being shared in this dump.\n";
2040   errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
2041   ScopedPrinter SP(errs());
2042   SP.printBinaryBlock("Function contents", *Function.getData());
2043   errs() << "\n";
2044   Function.dump();
2045   errs() << "ERROR: " << Message;
2046   errs() << "\n=======================================\n";
2047   exit(1);
2048 }
2049 
2050 BinaryFunction *
2051 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2052                                             bool IsSimple) {
2053   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2054   BinaryFunction *BF = InjectedBinaryFunctions.back();
2055   setSymbolToFunctionMap(BF->getSymbol(), BF);
2056   BF->CurrentState = BinaryFunction::State::CFG;
2057   return BF;
2058 }
2059 
2060 std::pair<size_t, size_t>
2061 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2062   // Adjust branch instruction to match the current layout.
2063   if (FixBranches)
2064     BF.fixBranches();
2065 
2066   // Create local MC context to isolate the effect of ephemeral code emission.
2067   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2068   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2069   MCAsmBackend *MAB =
2070       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2071 
2072   SmallString<256> Code;
2073   raw_svector_ostream VecOS(Code);
2074 
2075   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2076   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2077       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2078       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
2079       /*RelaxAll=*/false,
2080       /*IncrementalLinkerCompatible=*/false,
2081       /*DWARFMustBeAtTheEnd=*/false));
2082 
2083   Streamer->initSections(false, *STI);
2084 
2085   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2086   Section->setHasInstructions(true);
2087 
2088   // Create symbols in the LocalCtx so that they get destroyed with it.
2089   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2090   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2091   MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol();
2092   MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol();
2093 
2094   Streamer->switchSection(Section);
2095   Streamer->emitLabel(StartLabel);
2096   emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false,
2097                    /*EmitCodeOnly=*/true);
2098   Streamer->emitLabel(EndLabel);
2099 
2100   if (BF.isSplit()) {
2101     MCSectionELF *ColdSection =
2102         LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS,
2103                                 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2104     ColdSection->setHasInstructions(true);
2105 
2106     Streamer->switchSection(ColdSection);
2107     Streamer->emitLabel(ColdStartLabel);
2108     emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true,
2109                      /*EmitCodeOnly=*/true);
2110     Streamer->emitLabel(ColdEndLabel);
2111     // To avoid calling MCObjectStreamer::flushPendingLabels() which is private
2112     Streamer->emitBytes(StringRef(""));
2113     Streamer->switchSection(Section);
2114   }
2115 
2116   // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
2117   // MCStreamer::Finish(), which does more than we want
2118   Streamer->emitBytes(StringRef(""));
2119 
2120   MCAssembler &Assembler =
2121       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2122   MCAsmLayout Layout(Assembler);
2123   Assembler.layout(Layout);
2124 
2125   const uint64_t HotSize =
2126       Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2127   const uint64_t ColdSize = BF.isSplit()
2128                                 ? Layout.getSymbolOffset(*ColdEndLabel) -
2129                                       Layout.getSymbolOffset(*ColdStartLabel)
2130                                 : 0ULL;
2131 
2132   // Clean-up the effect of the code emission.
2133   for (const MCSymbol &Symbol : Assembler.symbols()) {
2134     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2135     MutableSymbol->setUndefined();
2136     MutableSymbol->setIsRegistered(false);
2137   }
2138 
2139   return std::make_pair(HotSize, ColdSize);
2140 }
2141 
2142 bool BinaryContext::validateEncoding(const MCInst &Inst,
2143                                      ArrayRef<uint8_t> InputEncoding) const {
2144   SmallString<256> Code;
2145   SmallVector<MCFixup, 4> Fixups;
2146   raw_svector_ostream VecOS(Code);
2147 
2148   MCE->encodeInstruction(Inst, VecOS, Fixups, *STI);
2149   auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2150   if (InputEncoding != EncodedData) {
2151     if (opts::Verbosity > 1) {
2152       errs() << "BOLT-WARNING: mismatched encoding detected\n"
2153              << "      input: " << InputEncoding << '\n'
2154              << "     output: " << EncodedData << '\n';
2155     }
2156     return false;
2157   }
2158 
2159   return true;
2160 }
2161 
2162 uint64_t BinaryContext::getHotThreshold() const {
2163   static uint64_t Threshold = 0;
2164   if (Threshold == 0) {
2165     Threshold = std::max(
2166         (uint64_t)opts::ExecutionCountThreshold,
2167         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2168   }
2169   return Threshold;
2170 }
2171 
2172 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2173     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2174   auto FI = BinaryFunctions.upper_bound(Address);
2175   if (FI == BinaryFunctions.begin())
2176     return nullptr;
2177   --FI;
2178 
2179   const uint64_t UsedSize =
2180       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2181 
2182   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2183     return nullptr;
2184 
2185   return &FI->second;
2186 }
2187 
2188 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2189   // First, try to find a function starting at the given address. If the
2190   // function was folded, this will get us the original folded function if it
2191   // wasn't removed from the list, e.g. in non-relocation mode.
2192   auto BFI = BinaryFunctions.find(Address);
2193   if (BFI != BinaryFunctions.end())
2194     return &BFI->second;
2195 
2196   // We might have folded the function matching the object at the given
2197   // address. In such case, we look for a function matching the symbol
2198   // registered at the original address. The new function (the one that the
2199   // original was folded into) will hold the symbol.
2200   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2201     uint64_t EntryID = 0;
2202     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2203     if (BF && EntryID == 0)
2204       return BF;
2205   }
2206   return nullptr;
2207 }
2208 
2209 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2210     const DWARFAddressRangesVector &InputRanges) const {
2211   DebugAddressRangesVector OutputRanges;
2212 
2213   for (const DWARFAddressRange Range : InputRanges) {
2214     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2215     while (BFI != BinaryFunctions.end()) {
2216       const BinaryFunction &Function = BFI->second;
2217       if (Function.getAddress() >= Range.HighPC)
2218         break;
2219       const DebugAddressRangesVector FunctionRanges =
2220           Function.getOutputAddressRanges();
2221       std::move(std::begin(FunctionRanges), std::end(FunctionRanges),
2222                 std::back_inserter(OutputRanges));
2223       std::advance(BFI, 1);
2224     }
2225   }
2226 
2227   return OutputRanges;
2228 }
2229 
2230 } // namespace bolt
2231 } // namespace llvm
2232