1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/NameResolver.h"
18 #include "bolt/Utils/Utils.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/MC/MCAsmLayout.h"
24 #include "llvm/MC/MCAssembler.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
27 #include "llvm/MC/MCInstPrinter.h"
28 #include "llvm/MC/MCObjectStreamer.h"
29 #include "llvm/MC/MCObjectWriter.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSectionELF.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Regex.h"
37 #include <algorithm>
38 #include <functional>
39 #include <iterator>
40 #include <unordered_set>
41 
42 using namespace llvm;
43 
44 #undef  DEBUG_TYPE
45 #define DEBUG_TYPE "bolt"
46 
47 namespace opts {
48 
49 cl::opt<bool>
50 NoHugePages("no-huge-pages",
51   cl::desc("use regular size pages for code alignment"),
52   cl::ZeroOrMore,
53   cl::Hidden,
54   cl::cat(BoltCategory));
55 
56 static cl::opt<bool>
57 PrintDebugInfo("print-debug-info",
58   cl::desc("print debug info when printing functions"),
59   cl::Hidden,
60   cl::ZeroOrMore,
61   cl::cat(BoltCategory));
62 
63 cl::opt<bool>
64 PrintRelocations("print-relocations",
65   cl::desc("print relocations when printing functions/objects"),
66   cl::Hidden,
67   cl::ZeroOrMore,
68   cl::cat(BoltCategory));
69 
70 static cl::opt<bool>
71 PrintMemData("print-mem-data",
72   cl::desc("print memory data annotations when printing functions"),
73   cl::Hidden,
74   cl::ZeroOrMore,
75   cl::cat(BoltCategory));
76 
77 } // namespace opts
78 
79 namespace llvm {
80 namespace bolt {
81 
82 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
83                              std::unique_ptr<DWARFContext> DwCtx,
84                              std::unique_ptr<Triple> TheTriple,
85                              const Target *TheTarget, std::string TripleName,
86                              std::unique_ptr<MCCodeEmitter> MCE,
87                              std::unique_ptr<MCObjectFileInfo> MOFI,
88                              std::unique_ptr<const MCAsmInfo> AsmInfo,
89                              std::unique_ptr<const MCInstrInfo> MII,
90                              std::unique_ptr<const MCSubtargetInfo> STI,
91                              std::unique_ptr<MCInstPrinter> InstPrinter,
92                              std::unique_ptr<const MCInstrAnalysis> MIA,
93                              std::unique_ptr<MCPlusBuilder> MIB,
94                              std::unique_ptr<const MCRegisterInfo> MRI,
95                              std::unique_ptr<MCDisassembler> DisAsm)
96     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
97       TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
98       TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
99       AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
100       InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
101       MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) {
102   Relocation::Arch = this->TheTriple->getArch();
103   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
104 }
105 
106 BinaryContext::~BinaryContext() {
107   for (BinarySection *Section : Sections)
108     delete Section;
109   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
110     delete InjectedFunction;
111   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
112     delete JTI.second;
113   clearBinaryData();
114 }
115 
116 /// Create BinaryContext for a given architecture \p ArchName and
117 /// triple \p TripleName.
118 std::unique_ptr<BinaryContext>
119 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
120                                    std::unique_ptr<DWARFContext> DwCtx) {
121   StringRef ArchName = "";
122   StringRef FeaturesStr = "";
123   switch (File->getArch()) {
124   case llvm::Triple::x86_64:
125     ArchName = "x86-64";
126     FeaturesStr = "+nopl";
127     break;
128   case llvm::Triple::aarch64:
129     ArchName = "aarch64";
130     FeaturesStr = "+fp-armv8,+neon,+crypto,+dotprod,+crc,+lse,+ras,+rdm,"
131                   "+fullfp16,+spe,+fuse-aes,+rcpc";
132     break;
133   default:
134     errs() << "BOLT-ERROR: Unrecognized machine in ELF file.\n";
135     return nullptr;
136   }
137 
138   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
139   const std::string TripleName = TheTriple->str();
140 
141   std::string Error;
142   const Target *TheTarget =
143       TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
144   if (!TheTarget) {
145     errs() << "BOLT-ERROR: " << Error;
146     return nullptr;
147   }
148 
149   std::unique_ptr<const MCRegisterInfo> MRI(
150       TheTarget->createMCRegInfo(TripleName));
151   if (!MRI) {
152     errs() << "BOLT-ERROR: no register info for target " << TripleName << "\n";
153     return nullptr;
154   }
155 
156   // Set up disassembler.
157   std::unique_ptr<const MCAsmInfo> AsmInfo(
158       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
159   if (!AsmInfo) {
160     errs() << "BOLT-ERROR: no assembly info for target " << TripleName << "\n";
161     return nullptr;
162   }
163 
164   std::unique_ptr<const MCSubtargetInfo> STI(
165       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
166   if (!STI) {
167     errs() << "BOLT-ERROR: no subtarget info for target " << TripleName << "\n";
168     return nullptr;
169   }
170 
171   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
172   if (!MII) {
173     errs() << "BOLT-ERROR: no instruction info for target " << TripleName
174            << "\n";
175     return nullptr;
176   }
177 
178   std::unique_ptr<MCContext> Ctx(
179       new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
180   std::unique_ptr<MCObjectFileInfo> MOFI(
181       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
182   Ctx->setObjectFileInfo(MOFI.get());
183   // We do not support X86 Large code model. Change this in the future.
184   bool Large = false;
185   if (TheTriple->getArch() == llvm::Triple::aarch64)
186     Large = true;
187   unsigned LSDAEncoding =
188       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
189   unsigned TTypeEncoding =
190       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
191   if (IsPIC) {
192     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
193                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
194     TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
195                     (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
196   }
197 
198   std::unique_ptr<MCDisassembler> DisAsm(
199       TheTarget->createMCDisassembler(*STI, *Ctx));
200 
201   if (!DisAsm) {
202     errs() << "BOLT-ERROR: no disassembler for target " << TripleName << "\n";
203     return nullptr;
204   }
205 
206   std::unique_ptr<const MCInstrAnalysis> MIA(
207       TheTarget->createMCInstrAnalysis(MII.get()));
208   if (!MIA) {
209     errs() << "BOLT-ERROR: failed to create instruction analysis for target"
210            << TripleName << "\n";
211     return nullptr;
212   }
213 
214   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
215   std::unique_ptr<MCInstPrinter> InstructionPrinter(
216       TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
217                                      *MII, *MRI));
218   if (!InstructionPrinter) {
219     errs() << "BOLT-ERROR: no instruction printer for target " << TripleName
220            << '\n';
221     return nullptr;
222   }
223   InstructionPrinter->setPrintImmHex(true);
224 
225   std::unique_ptr<MCCodeEmitter> MCE(
226       TheTarget->createMCCodeEmitter(*MII, *Ctx));
227 
228   // Make sure we don't miss any output on core dumps.
229   outs().SetUnbuffered();
230   errs().SetUnbuffered();
231   dbgs().SetUnbuffered();
232 
233   auto BC = std::make_unique<BinaryContext>(
234       std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
235       std::string(TripleName), std::move(MCE), std::move(MOFI),
236       std::move(AsmInfo), std::move(MII), std::move(STI),
237       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
238       std::move(DisAsm));
239 
240   BC->TTypeEncoding = TTypeEncoding;
241   BC->LSDAEncoding = LSDAEncoding;
242 
243   BC->MAB = std::unique_ptr<MCAsmBackend>(
244       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
245 
246   BC->setFilename(File->getFileName());
247 
248   BC->HasFixedLoadAddress = !IsPIC;
249 
250   return BC;
251 }
252 
253 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
254   if (opts::HotText &&
255       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
256     return true;
257 
258   if (opts::HotData &&
259       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
260     return true;
261 
262   if (SymbolName == "_end")
263     return true;
264 
265   return false;
266 }
267 
268 std::unique_ptr<MCObjectWriter>
269 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
270   return MAB->createObjectWriter(OS);
271 }
272 
273 bool BinaryContext::validateObjectNesting() const {
274   auto Itr = BinaryDataMap.begin();
275   auto End = BinaryDataMap.end();
276   bool Valid = true;
277   while (Itr != End) {
278     auto Next = std::next(Itr);
279     while (Next != End &&
280            Itr->second->getSection() == Next->second->getSection() &&
281            Itr->second->containsRange(Next->second->getAddress(),
282                                       Next->second->getSize())) {
283       if (Next->second->Parent != Itr->second) {
284         errs() << "BOLT-WARNING: object nesting incorrect for:\n"
285                << "BOLT-WARNING:  " << *Itr->second << "\n"
286                << "BOLT-WARNING:  " << *Next->second << "\n";
287         Valid = false;
288       }
289       ++Next;
290     }
291     Itr = Next;
292   }
293   return Valid;
294 }
295 
296 bool BinaryContext::validateHoles() const {
297   bool Valid = true;
298   for (BinarySection &Section : sections()) {
299     for (const Relocation &Rel : Section.relocations()) {
300       uint64_t RelAddr = Rel.Offset + Section.getAddress();
301       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
302       if (!BD) {
303         errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
304                << " 0x" << Twine::utohexstr(RelAddr) << " in "
305                << Section.getName() << "\n";
306         Valid = false;
307       } else if (!BD->getAtomicRoot()) {
308         errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
309                << "address 0x" << Twine::utohexstr(RelAddr) << " in "
310                << Section.getName() << "\n";
311         Valid = false;
312       }
313     }
314   }
315   return Valid;
316 }
317 
318 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
319   const uint64_t Address = GAI->second->getAddress();
320   const uint64_t Size = GAI->second->getSize();
321 
322   auto fixParents = [&](BinaryDataMapType::iterator Itr,
323                         BinaryData *NewParent) {
324     BinaryData *OldParent = Itr->second->Parent;
325     Itr->second->Parent = NewParent;
326     ++Itr;
327     while (Itr != BinaryDataMap.end() && OldParent &&
328            Itr->second->Parent == OldParent) {
329       Itr->second->Parent = NewParent;
330       ++Itr;
331     }
332   };
333 
334   // Check if the previous symbol contains the newly added symbol.
335   if (GAI != BinaryDataMap.begin()) {
336     BinaryData *Prev = std::prev(GAI)->second;
337     while (Prev) {
338       if (Prev->getSection() == GAI->second->getSection() &&
339           Prev->containsRange(Address, Size)) {
340         fixParents(GAI, Prev);
341       } else {
342         fixParents(GAI, nullptr);
343       }
344       Prev = Prev->Parent;
345     }
346   }
347 
348   // Check if the newly added symbol contains any subsequent symbols.
349   if (Size != 0) {
350     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
351     auto Itr = std::next(GAI);
352     while (
353         Itr != BinaryDataMap.end() &&
354         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
355       Itr->second->Parent = BD;
356       ++Itr;
357     }
358   }
359 }
360 
361 iterator_range<BinaryContext::binary_data_iterator>
362 BinaryContext::getSubBinaryData(BinaryData *BD) {
363   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
364   auto End = Start;
365   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
366     ++End;
367   return make_range(Start, End);
368 }
369 
370 std::pair<const MCSymbol *, uint64_t>
371 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
372                                 bool IsPCRel) {
373   uint64_t Addend = 0;
374 
375   if (isAArch64()) {
376     // Check if this is an access to a constant island and create bookkeeping
377     // to keep track of it and emit it later as part of this function.
378     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
379       return std::make_pair(IslandSym, Addend);
380 
381     // Detect custom code written in assembly that refers to arbitrary
382     // constant islands from other functions. Write this reference so we
383     // can pull this constant island and emit it as part of this function
384     // too.
385     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
386     if (IslandIter != AddressToConstantIslandMap.end()) {
387       if (MCSymbol *IslandSym =
388               IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) {
389         BF.createIslandDependency(IslandSym, IslandIter->second);
390         return std::make_pair(IslandSym, Addend);
391       }
392     }
393   }
394 
395   // Note that the address does not necessarily have to reside inside
396   // a section, it could be an absolute address too.
397   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
398   if (Section && Section->isText()) {
399     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
400       if (Address != BF.getAddress()) {
401         // The address could potentially escape. Mark it as another entry
402         // point into the function.
403         if (opts::Verbosity >= 1) {
404           outs() << "BOLT-INFO: potentially escaped address 0x"
405                  << Twine::utohexstr(Address) << " in function " << BF << '\n';
406         }
407         BF.HasInternalLabelReference = true;
408         return std::make_pair(
409             BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend);
410       }
411     } else {
412       BF.InterproceduralReferences.insert(Address);
413     }
414   }
415 
416   // With relocations, catch jump table references outside of the basic block
417   // containing the indirect jump.
418   if (HasRelocations) {
419     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
420     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
421       const MCSymbol *Symbol =
422           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
423 
424       return std::make_pair(Symbol, Addend);
425     }
426   }
427 
428   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
429     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
430 
431   // TODO: use DWARF info to get size/alignment here?
432   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
433   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
434   return std::make_pair(TargetSymbol, Addend);
435 }
436 
437 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
438                                                   BinaryFunction &BF) {
439   if (!isX86())
440     return MemoryContentsType::UNKNOWN;
441 
442   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
443   if (!Section) {
444     // No section - possibly an absolute address. Since we don't allow
445     // internal function addresses to escape the function scope - we
446     // consider it a tail call.
447     if (opts::Verbosity > 1) {
448       errs() << "BOLT-WARNING: no section for address 0x"
449              << Twine::utohexstr(Address) << " referenced from function " << BF
450              << '\n';
451     }
452     return MemoryContentsType::UNKNOWN;
453   }
454 
455   if (Section->isVirtual()) {
456     // The contents are filled at runtime.
457     return MemoryContentsType::UNKNOWN;
458   }
459 
460   // No support for jump tables in code yet.
461   if (Section->isText())
462     return MemoryContentsType::UNKNOWN;
463 
464   // Start with checking for PIC jump table. We expect non-PIC jump tables
465   // to have high 32 bits set to 0.
466   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
467     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
468 
469   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
470     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
471 
472   return MemoryContentsType::UNKNOWN;
473 }
474 
475 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)?
476 bool isPotentialFragmentByName(BinaryFunction &Fragment,
477                                BinaryFunction &Parent) {
478   for (StringRef Name : Parent.getNames()) {
479     std::string NamePrefix = Regex::escape(NameResolver::restore(Name));
480     std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str();
481     if (Fragment.hasRestoredNameRegex(NameRegex))
482       return true;
483   }
484   return false;
485 }
486 
487 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
488                                      const JumpTable::JumpTableType Type,
489                                      BinaryFunction &BF,
490                                      const uint64_t NextJTAddress,
491                                      JumpTable::OffsetsType *Offsets) {
492   // Is one of the targets __builtin_unreachable?
493   bool HasUnreachable = false;
494 
495   // Number of targets other than __builtin_unreachable.
496   uint64_t NumRealEntries = 0;
497 
498   constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max();
499   auto addOffset = [&](uint64_t Offset) {
500     if (Offsets)
501       Offsets->emplace_back(Offset);
502   };
503 
504   auto doesBelongToFunction = [&](const uint64_t Addr,
505                                   BinaryFunction *TargetBF) -> bool {
506     if (BF.containsAddress(Addr))
507       return true;
508     // Nothing to do if we failed to identify the containing function.
509     if (!TargetBF)
510       return false;
511     // Case 1: check if BF is a fragment and TargetBF is its parent.
512     if (BF.isFragment()) {
513       // Parent function may or may not be already registered.
514       // Set parent link based on function name matching heuristic.
515       return registerFragment(BF, *TargetBF);
516     }
517     // Case 2: check if TargetBF is a fragment and BF is its parent.
518     return TargetBF->isFragment() && registerFragment(*TargetBF, BF);
519   };
520 
521   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
522   if (!Section)
523     return false;
524 
525   // The upper bound is defined by containing object, section limits, and
526   // the next jump table in memory.
527   uint64_t UpperBound = Section->getEndAddress();
528   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
529   if (JumpTableBD && JumpTableBD->getSize()) {
530     assert(JumpTableBD->getEndAddress() <= UpperBound &&
531            "data object cannot cross a section boundary");
532     UpperBound = JumpTableBD->getEndAddress();
533   }
534   if (NextJTAddress)
535     UpperBound = std::min(NextJTAddress, UpperBound);
536 
537   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName()
538                     << '\n');
539   const uint64_t EntrySize = getJumpTableEntrySize(Type);
540   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
541        EntryAddress += EntrySize) {
542     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
543                       << " -> ");
544     // Check if there's a proper relocation against the jump table entry.
545     if (HasRelocations) {
546       if (Type == JumpTable::JTT_PIC &&
547           !DataPCRelocations.count(EntryAddress)) {
548         LLVM_DEBUG(
549             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
550         break;
551       }
552       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
553         LLVM_DEBUG(
554             dbgs()
555             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
556         break;
557       }
558     }
559 
560     const uint64_t Value =
561         (Type == JumpTable::JTT_PIC)
562             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
563             : *getPointerAtAddress(EntryAddress);
564 
565     // __builtin_unreachable() case.
566     if (Value == BF.getAddress() + BF.getSize()) {
567       addOffset(Value - BF.getAddress());
568       HasUnreachable = true;
569       LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n");
570       continue;
571     }
572 
573     // Function or one of its fragments.
574     BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
575 
576     // We assume that a jump table cannot have function start as an entry.
577     if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) {
578       LLVM_DEBUG({
579         if (!BF.containsAddress(Value)) {
580           dbgs() << "FAIL: function doesn't contain this address\n";
581           if (TargetBF) {
582             dbgs() << "  ! function containing this address: "
583                    << TargetBF->getPrintName() << '\n';
584             if (TargetBF->isFragment())
585               dbgs() << "  ! is a fragment\n";
586             for (BinaryFunction *TargetParent : TargetBF->ParentFragments)
587               dbgs() << "  ! its parent is "
588                      << (TargetParent ? TargetParent->getPrintName() : "(none)")
589                      << '\n';
590           }
591         }
592         if (Value == BF.getAddress())
593           dbgs() << "FAIL: jump table cannot have function start as an entry\n";
594       });
595       break;
596     }
597 
598     // Check there's an instruction at this offset.
599     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
600         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
601       LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n");
602       break;
603     }
604 
605     ++NumRealEntries;
606 
607     if (TargetBF == &BF) {
608       // Address inside the function.
609       addOffset(Value - TargetBF->getAddress());
610       LLVM_DEBUG(dbgs() << "OK: real entry\n");
611     } else {
612       // Address in split fragment.
613       BF.setHasSplitJumpTable(true);
614       // Add invalid offset for proper identification of jump table size.
615       addOffset(INVALID_OFFSET);
616       LLVM_DEBUG(dbgs() << "OK: address in split fragment "
617                         << TargetBF->getPrintName() << '\n');
618     }
619   }
620 
621   // It's a jump table if the number of real entries is more than 1, or there's
622   // one real entry and "unreachable" targets. If there are only multiple
623   // "unreachable" targets, then it's not a jump table.
624   return NumRealEntries + HasUnreachable >= 2;
625 }
626 
627 void BinaryContext::populateJumpTables() {
628   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
629                     << '\n');
630   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
631        ++JTI) {
632     JumpTable *JT = JTI->second;
633     BinaryFunction &BF = *JT->Parent;
634 
635     if (!BF.isSimple())
636       continue;
637 
638     uint64_t NextJTAddress = 0;
639     auto NextJTI = std::next(JTI);
640     if (NextJTI != JTE)
641       NextJTAddress = NextJTI->second->getAddress();
642 
643     const bool Success = analyzeJumpTable(JT->getAddress(), JT->Type, BF,
644                                           NextJTAddress, &JT->OffsetEntries);
645     if (!Success) {
646       dbgs() << "failed to analyze jump table in function " << BF << '\n';
647       JT->print(dbgs());
648       if (NextJTI != JTE) {
649         dbgs() << "next jump table at 0x"
650                << Twine::utohexstr(NextJTI->second->getAddress())
651                << " belongs to function " << *NextJTI->second->Parent << '\n';
652         NextJTI->second->print(dbgs());
653       }
654       llvm_unreachable("jump table heuristic failure");
655     }
656 
657     for (uint64_t EntryOffset : JT->OffsetEntries) {
658       if (EntryOffset == BF.getSize())
659         BF.IgnoredBranches.emplace_back(EntryOffset, BF.getSize());
660       else
661         BF.registerReferencedOffset(EntryOffset);
662     }
663 
664     // In strict mode, erase PC-relative relocation record. Later we check that
665     // all such records are erased and thus have been accounted for.
666     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
667       for (uint64_t Address = JT->getAddress();
668            Address < JT->getAddress() + JT->getSize();
669            Address += JT->EntrySize) {
670         DataPCRelocations.erase(DataPCRelocations.find(Address));
671       }
672     }
673 
674     // Mark to skip the function and all its fragments.
675     if (BF.hasSplitJumpTable())
676       FragmentsToSkip.push_back(&BF);
677   }
678 
679   if (opts::StrictMode && DataPCRelocations.size()) {
680     LLVM_DEBUG({
681       dbgs() << DataPCRelocations.size()
682              << " unclaimed PC-relative relocations left in data:\n";
683       for (uint64_t Reloc : DataPCRelocations)
684         dbgs() << Twine::utohexstr(Reloc) << '\n';
685     });
686     assert(0 && "unclaimed PC-relative relocations left in data\n");
687   }
688   clearList(DataPCRelocations);
689 }
690 
691 void BinaryContext::skipMarkedFragments() {
692   // Unique functions in the vector.
693   std::unordered_set<BinaryFunction *> UniqueFunctions(FragmentsToSkip.begin(),
694                                                        FragmentsToSkip.end());
695   // Copy the functions back to FragmentsToSkip.
696   FragmentsToSkip.assign(UniqueFunctions.begin(), UniqueFunctions.end());
697   auto addToWorklist = [&](BinaryFunction *Function) -> void {
698     if (UniqueFunctions.count(Function))
699       return;
700     FragmentsToSkip.push_back(Function);
701     UniqueFunctions.insert(Function);
702   };
703   // Functions containing split jump tables need to be skipped with all
704   // fragments (transitively).
705   for (size_t I = 0; I != FragmentsToSkip.size(); I++) {
706     BinaryFunction *BF = FragmentsToSkip[I];
707     assert(UniqueFunctions.count(BF) &&
708            "internal error in traversing function fragments");
709     if (opts::Verbosity >= 1)
710       errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
711     BF->setIgnored();
712     std::for_each(BF->Fragments.begin(), BF->Fragments.end(), addToWorklist);
713     std::for_each(BF->ParentFragments.begin(), BF->ParentFragments.end(),
714                   addToWorklist);
715   }
716   if (!FragmentsToSkip.empty())
717     errs() << "BOLT-WARNING: ignored " << FragmentsToSkip.size() << " function"
718            << (FragmentsToSkip.size() == 1 ? "" : "s")
719            << " due to cold fragments\n";
720   FragmentsToSkip.clear();
721 }
722 
723 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
724                                                  uint64_t Size,
725                                                  uint16_t Alignment,
726                                                  unsigned Flags) {
727   auto Itr = BinaryDataMap.find(Address);
728   if (Itr != BinaryDataMap.end()) {
729     assert(Itr->second->getSize() == Size || !Size);
730     return Itr->second->getSymbol();
731   }
732 
733   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
734   assert(!GlobalSymbols.count(Name) && "created name is not unique");
735   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
736 }
737 
738 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
739   return Ctx->getOrCreateSymbol(Name);
740 }
741 
742 BinaryFunction *BinaryContext::createBinaryFunction(
743     const std::string &Name, BinarySection &Section, uint64_t Address,
744     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
745   auto Result = BinaryFunctions.emplace(
746       Address, BinaryFunction(Name, Section, Address, Size, *this));
747   assert(Result.second == true && "unexpected duplicate function");
748   BinaryFunction *BF = &Result.first->second;
749   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
750                         Alignment);
751   setSymbolToFunctionMap(BF->getSymbol(), BF);
752   return BF;
753 }
754 
755 const MCSymbol *
756 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
757                                     JumpTable::JumpTableType Type) {
758   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
759     assert(JT->Type == Type && "jump table types have to match");
760     assert(JT->Parent == &Function &&
761            "cannot re-use jump table of a different function");
762     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
763 
764     return JT->getFirstLabel();
765   }
766 
767   // Re-use the existing symbol if possible.
768   MCSymbol *JTLabel = nullptr;
769   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
770     if (!isInternalSymbolName(Object->getSymbol()->getName()))
771       JTLabel = Object->getSymbol();
772   }
773 
774   const uint64_t EntrySize = getJumpTableEntrySize(Type);
775   if (!JTLabel) {
776     const std::string JumpTableName = generateJumpTableName(Function, Address);
777     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
778   }
779 
780   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
781                     << " in function " << Function << '\n');
782 
783   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
784                                 JumpTable::LabelMapType{{0, JTLabel}}, Function,
785                                 *getSectionForAddress(Address));
786   JumpTables.emplace(Address, JT);
787 
788   // Duplicate the entry for the parent function for easy access.
789   Function.JumpTables.emplace(Address, JT);
790 
791   return JTLabel;
792 }
793 
794 std::pair<uint64_t, const MCSymbol *>
795 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
796                                   const MCSymbol *OldLabel) {
797   auto L = scopeLock();
798   unsigned Offset = 0;
799   bool Found = false;
800   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
801     if (Elmt.second != OldLabel)
802       continue;
803     Offset = Elmt.first;
804     Found = true;
805     break;
806   }
807   assert(Found && "Label not found");
808   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
809   JumpTable *NewJT =
810       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
811                     JumpTable::LabelMapType{{Offset, NewLabel}}, Function,
812                     *getSectionForAddress(JT->getAddress()));
813   NewJT->Entries = JT->Entries;
814   NewJT->Counts = JT->Counts;
815   uint64_t JumpTableID = ++DuplicatedJumpTables;
816   // Invert it to differentiate from regular jump tables whose IDs are their
817   // addresses in the input binary memory space
818   JumpTableID = ~JumpTableID;
819   JumpTables.emplace(JumpTableID, NewJT);
820   Function.JumpTables.emplace(JumpTableID, NewJT);
821   return std::make_pair(JumpTableID, NewLabel);
822 }
823 
824 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
825                                                  uint64_t Address) {
826   size_t Id;
827   uint64_t Offset = 0;
828   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
829     Offset = Address - JT->getAddress();
830     auto Itr = JT->Labels.find(Offset);
831     if (Itr != JT->Labels.end())
832       return std::string(Itr->second->getName());
833     Id = JumpTableIds.at(JT->getAddress());
834   } else {
835     Id = JumpTableIds[Address] = BF.JumpTables.size();
836   }
837   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
838           (Offset ? ("." + std::to_string(Offset)) : ""));
839 }
840 
841 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
842   // FIXME: aarch64 support is missing.
843   if (!isX86())
844     return true;
845 
846   if (BF.getSize() == BF.getMaxSize())
847     return true;
848 
849   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
850   assert(FunctionData && "cannot get function as data");
851 
852   uint64_t Offset = BF.getSize();
853   MCInst Instr;
854   uint64_t InstrSize = 0;
855   uint64_t InstrAddress = BF.getAddress() + Offset;
856   using std::placeholders::_1;
857 
858   // Skip instructions that satisfy the predicate condition.
859   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
860     const uint64_t StartOffset = Offset;
861     for (; Offset < BF.getMaxSize();
862          Offset += InstrSize, InstrAddress += InstrSize) {
863       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
864                                   InstrAddress, nulls()))
865         break;
866       if (!Predicate(Instr))
867         break;
868     }
869 
870     return Offset - StartOffset;
871   };
872 
873   // Skip a sequence of zero bytes.
874   auto skipZeros = [&]() {
875     const uint64_t StartOffset = Offset;
876     for (; Offset < BF.getMaxSize(); ++Offset)
877       if ((*FunctionData)[Offset] != 0)
878         break;
879 
880     return Offset - StartOffset;
881   };
882 
883   // Accept the whole padding area filled with breakpoints.
884   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
885   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
886     return true;
887 
888   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
889 
890   // Some functions have a jump to the next function or to the padding area
891   // inserted after the body.
892   auto isSkipJump = [&](const MCInst &Instr) {
893     uint64_t TargetAddress = 0;
894     if (MIB->isUnconditionalBranch(Instr) &&
895         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
896       if (TargetAddress >= InstrAddress + InstrSize &&
897           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
898         return true;
899       }
900     }
901     return false;
902   };
903 
904   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
905   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
906          skipZeros())
907     ;
908 
909   if (Offset == BF.getMaxSize())
910     return true;
911 
912   if (opts::Verbosity >= 1) {
913     errs() << "BOLT-WARNING: bad padding at address 0x"
914            << Twine::utohexstr(BF.getAddress() + BF.getSize())
915            << " starting at offset " << (Offset - BF.getSize())
916            << " in function " << BF << '\n'
917            << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
918            << '\n';
919   }
920 
921   return false;
922 }
923 
924 void BinaryContext::adjustCodePadding() {
925   for (auto &BFI : BinaryFunctions) {
926     BinaryFunction &BF = BFI.second;
927     if (!shouldEmit(BF))
928       continue;
929 
930     if (!hasValidCodePadding(BF)) {
931       if (HasRelocations) {
932         if (opts::Verbosity >= 1) {
933           outs() << "BOLT-INFO: function " << BF
934                  << " has invalid padding. Ignoring the function.\n";
935         }
936         BF.setIgnored();
937       } else {
938         BF.setMaxSize(BF.getSize());
939       }
940     }
941   }
942 }
943 
944 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
945                                                uint64_t Size,
946                                                uint16_t Alignment,
947                                                unsigned Flags) {
948   // Register the name with MCContext.
949   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
950 
951   auto GAI = BinaryDataMap.find(Address);
952   BinaryData *BD;
953   if (GAI == BinaryDataMap.end()) {
954     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
955     BinarySection &Section =
956         SectionOrErr ? SectionOrErr.get() : absoluteSection();
957     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
958                         Section, Flags);
959     GAI = BinaryDataMap.emplace(Address, BD).first;
960     GlobalSymbols[Name] = BD;
961     updateObjectNesting(GAI);
962   } else {
963     BD = GAI->second;
964     if (!BD->hasName(Name)) {
965       GlobalSymbols[Name] = BD;
966       BD->Symbols.push_back(Symbol);
967     }
968   }
969 
970   return Symbol;
971 }
972 
973 const BinaryData *
974 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
975   auto NI = BinaryDataMap.lower_bound(Address);
976   auto End = BinaryDataMap.end();
977   if ((NI != End && Address == NI->first) ||
978       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
979     if (NI->second->containsAddress(Address))
980       return NI->second;
981 
982     // If this is a sub-symbol, see if a parent data contains the address.
983     const BinaryData *BD = NI->second->getParent();
984     while (BD) {
985       if (BD->containsAddress(Address))
986         return BD;
987       BD = BD->getParent();
988     }
989   }
990   return nullptr;
991 }
992 
993 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
994   auto NI = BinaryDataMap.find(Address);
995   assert(NI != BinaryDataMap.end());
996   if (NI == BinaryDataMap.end())
997     return false;
998   // TODO: it's possible that a jump table starts at the same address
999   // as a larger blob of private data.  When we set the size of the
1000   // jump table, it might be smaller than the total blob size.  In this
1001   // case we just leave the original size since (currently) it won't really
1002   // affect anything.
1003   assert((!NI->second->Size || NI->second->Size == Size ||
1004           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1005          "can't change the size of a symbol that has already had its "
1006          "size set");
1007   if (!NI->second->Size) {
1008     NI->second->Size = Size;
1009     updateObjectNesting(NI);
1010     return true;
1011   }
1012   return false;
1013 }
1014 
1015 void BinaryContext::generateSymbolHashes() {
1016   auto isPadding = [](const BinaryData &BD) {
1017     StringRef Contents = BD.getSection().getContents();
1018     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1019     return (BD.getName().startswith("HOLEat") ||
1020             SymData.find_first_not_of(0) == StringRef::npos);
1021   };
1022 
1023   uint64_t NumCollisions = 0;
1024   for (auto &Entry : BinaryDataMap) {
1025     BinaryData &BD = *Entry.second;
1026     StringRef Name = BD.getName();
1027 
1028     if (!isInternalSymbolName(Name))
1029       continue;
1030 
1031     // First check if a non-anonymous alias exists and move it to the front.
1032     if (BD.getSymbols().size() > 1) {
1033       auto Itr = std::find_if(BD.getSymbols().begin(), BD.getSymbols().end(),
1034                               [&](const MCSymbol *Symbol) {
1035                                 return !isInternalSymbolName(Symbol->getName());
1036                               });
1037       if (Itr != BD.getSymbols().end()) {
1038         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1039         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1040         continue;
1041       }
1042     }
1043 
1044     // We have to skip 0 size symbols since they will all collide.
1045     if (BD.getSize() == 0) {
1046       continue;
1047     }
1048 
1049     const uint64_t Hash = BD.getSection().hash(BD);
1050     const size_t Idx = Name.find("0x");
1051     std::string NewName =
1052         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1053     if (getBinaryDataByName(NewName)) {
1054       // Ignore collisions for symbols that appear to be padding
1055       // (i.e. all zeros or a "hole")
1056       if (!isPadding(BD)) {
1057         if (opts::Verbosity) {
1058           errs() << "BOLT-WARNING: collision detected when hashing " << BD
1059                  << " with new name (" << NewName << "), skipping.\n";
1060         }
1061         ++NumCollisions;
1062       }
1063       continue;
1064     }
1065     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1066     GlobalSymbols[NewName] = &BD;
1067   }
1068   if (NumCollisions) {
1069     errs() << "BOLT-WARNING: " << NumCollisions
1070            << " collisions detected while hashing binary objects";
1071     if (!opts::Verbosity)
1072       errs() << ". Use -v=1 to see the list.";
1073     errs() << '\n';
1074   }
1075 }
1076 
1077 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1078                                      BinaryFunction &Function) const {
1079   if (!isPotentialFragmentByName(TargetFunction, Function))
1080     return false;
1081   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1082   if (TargetFunction.isParentFragment(&Function))
1083     return true;
1084   TargetFunction.addParentFragment(Function);
1085   Function.addFragment(TargetFunction);
1086   if (!HasRelocations) {
1087     TargetFunction.setSimple(false);
1088     Function.setSimple(false);
1089   }
1090   if (opts::Verbosity >= 1) {
1091     outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of "
1092            << Function << '\n';
1093   }
1094   return true;
1095 }
1096 
1097 void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) {
1098   for (uint64_t Address : Function.InterproceduralReferences) {
1099     if (!Address)
1100       continue;
1101 
1102     BinaryFunction *TargetFunction =
1103         getBinaryFunctionContainingAddress(Address);
1104     if (&Function == TargetFunction)
1105       continue;
1106 
1107     if (TargetFunction) {
1108       if (TargetFunction->IsFragment &&
1109           !registerFragment(*TargetFunction, Function)) {
1110         errs() << "BOLT-WARNING: interprocedural reference between unrelated "
1111                   "fragments: "
1112                << Function.getPrintName() << " and "
1113                << TargetFunction->getPrintName() << '\n';
1114       }
1115       if (uint64_t Offset = Address - TargetFunction->getAddress())
1116         TargetFunction->addEntryPointAtOffset(Offset);
1117 
1118       continue;
1119     }
1120 
1121     // Check if address falls in function padding space - this could be
1122     // unmarked data in code. In this case adjust the padding space size.
1123     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1124     assert(Section && "cannot get section for referenced address");
1125 
1126     if (!Section->isText())
1127       continue;
1128 
1129     // PLT requires special handling and could be ignored in this context.
1130     StringRef SectionName = Section->getName();
1131     if (SectionName == ".plt" || SectionName == ".plt.got")
1132       continue;
1133 
1134     if (opts::processAllFunctions()) {
1135       errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1136              << "object in code at address 0x" << Twine::utohexstr(Address)
1137              << " belonging to section " << SectionName << " in current mode\n";
1138       exit(1);
1139     }
1140 
1141     TargetFunction = getBinaryFunctionContainingAddress(Address,
1142                                                         /*CheckPastEnd=*/false,
1143                                                         /*UseMaxSize=*/true);
1144     // We are not going to overwrite non-simple functions, but for simple
1145     // ones - adjust the padding size.
1146     if (TargetFunction && TargetFunction->isSimple()) {
1147       errs() << "BOLT-WARNING: function " << *TargetFunction
1148              << " has an object detected in a padding region at address 0x"
1149              << Twine::utohexstr(Address) << '\n';
1150       TargetFunction->setMaxSize(TargetFunction->getSize());
1151     }
1152   }
1153 
1154   clearList(Function.InterproceduralReferences);
1155 }
1156 
1157 void BinaryContext::postProcessSymbolTable() {
1158   fixBinaryDataHoles();
1159   bool Valid = true;
1160   for (auto &Entry : BinaryDataMap) {
1161     BinaryData *BD = Entry.second;
1162     if ((BD->getName().startswith("SYMBOLat") ||
1163          BD->getName().startswith("DATAat")) &&
1164         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1165         BD->getSection()) {
1166       errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
1167       Valid = false;
1168     }
1169   }
1170   assert(Valid);
1171   generateSymbolHashes();
1172 }
1173 
1174 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1175                                  BinaryFunction &ParentBF) {
1176   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1177          "cannot merge functions with multiple entry points");
1178 
1179   std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex,
1180                                                          std::defer_lock);
1181   std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock(
1182       SymbolToFunctionMapMutex, std::defer_lock);
1183 
1184   const StringRef ChildName = ChildBF.getOneName();
1185 
1186   // Move symbols over and update bookkeeping info.
1187   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1188     ParentBF.getSymbols().push_back(Symbol);
1189     WriteSymbolMapLock.lock();
1190     SymbolToFunctionMap[Symbol] = &ParentBF;
1191     WriteSymbolMapLock.unlock();
1192     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1193   }
1194   ChildBF.getSymbols().clear();
1195 
1196   // Move other names the child function is known under.
1197   std::move(ChildBF.Aliases.begin(), ChildBF.Aliases.end(),
1198             std::back_inserter(ParentBF.Aliases));
1199   ChildBF.Aliases.clear();
1200 
1201   if (HasRelocations) {
1202     // Merge execution counts of ChildBF into those of ParentBF.
1203     // Without relocations, we cannot reliably merge profiles as both functions
1204     // continue to exist and either one can be executed.
1205     ChildBF.mergeProfileDataInto(ParentBF);
1206 
1207     std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex,
1208                                                           std::defer_lock);
1209     std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex,
1210                                                            std::defer_lock);
1211     // Remove ChildBF from the global set of functions in relocs mode.
1212     ReadBfsLock.lock();
1213     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1214     ReadBfsLock.unlock();
1215 
1216     assert(FI != BinaryFunctions.end() && "function not found");
1217     assert(&ChildBF == &FI->second && "function mismatch");
1218 
1219     WriteBfsLock.lock();
1220     ChildBF.clearDisasmState();
1221     FI = BinaryFunctions.erase(FI);
1222     WriteBfsLock.unlock();
1223 
1224   } else {
1225     // In non-relocation mode we keep the function, but rename it.
1226     std::string NewName = "__ICF_" + ChildName.str();
1227 
1228     WriteCtxLock.lock();
1229     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1230     WriteCtxLock.unlock();
1231 
1232     ChildBF.setFolded(&ParentBF);
1233   }
1234 }
1235 
1236 void BinaryContext::fixBinaryDataHoles() {
1237   assert(validateObjectNesting() && "object nesting inconsitency detected");
1238 
1239   for (BinarySection &Section : allocatableSections()) {
1240     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1241 
1242     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1243       BinaryData *BD = Itr->second;
1244       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1245                      (BD->getName().startswith("SYMBOLat0x") ||
1246                       BD->getName().startswith("DATAat0x") ||
1247                       BD->getName().startswith("ANONYMOUS")));
1248       return !isHole && BD->getSection() == Section && !BD->getParent();
1249     };
1250 
1251     auto BDStart = BinaryDataMap.begin();
1252     auto BDEnd = BinaryDataMap.end();
1253     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1254     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1255 
1256     uint64_t EndAddress = Section.getAddress();
1257 
1258     while (Itr != End) {
1259       if (Itr->second->getAddress() > EndAddress) {
1260         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1261         Holes.emplace_back(EndAddress, Gap);
1262       }
1263       EndAddress = Itr->second->getEndAddress();
1264       ++Itr;
1265     }
1266 
1267     if (EndAddress < Section.getEndAddress())
1268       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1269 
1270     // If there is already a symbol at the start of the hole, grow that symbol
1271     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1272     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1273       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1274       if (BD) {
1275         // BD->getSection() can be != Section if there are sections that
1276         // overlap.  In this case it is probably safe to just skip the holes
1277         // since the overlapping section will not(?) have any symbols in it.
1278         if (BD->getSection() == Section)
1279           setBinaryDataSize(Hole.first, Hole.second);
1280       } else {
1281         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1282       }
1283     }
1284   }
1285 
1286   assert(validateObjectNesting() && "object nesting inconsitency detected");
1287   assert(validateHoles() && "top level hole detected in object map");
1288 }
1289 
1290 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1291   const BinarySection *CurrentSection = nullptr;
1292   bool FirstSection = true;
1293 
1294   for (auto &Entry : BinaryDataMap) {
1295     const BinaryData *BD = Entry.second;
1296     const BinarySection &Section = BD->getSection();
1297     if (FirstSection || Section != *CurrentSection) {
1298       uint64_t Address, Size;
1299       StringRef Name = Section.getName();
1300       if (Section) {
1301         Address = Section.getAddress();
1302         Size = Section.getSize();
1303       } else {
1304         Address = BD->getAddress();
1305         Size = BD->getSize();
1306       }
1307       OS << "BOLT-INFO: Section " << Name << ", "
1308          << "0x" + Twine::utohexstr(Address) << ":"
1309          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1310       CurrentSection = &Section;
1311       FirstSection = false;
1312     }
1313 
1314     OS << "BOLT-INFO: ";
1315     const BinaryData *P = BD->getParent();
1316     while (P) {
1317       OS << "  ";
1318       P = P->getParent();
1319     }
1320     OS << *BD << "\n";
1321   }
1322 }
1323 
1324 Expected<unsigned>
1325 BinaryContext::getDwarfFile(StringRef Directory, StringRef FileName,
1326                             unsigned FileNumber,
1327                             Optional<MD5::MD5Result> Checksum,
1328                             Optional<StringRef> Source, unsigned CUID) {
1329   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1330   return Table.tryGetFile(Directory, FileName, Checksum, Source,
1331                           Ctx->getDwarfVersion(), FileNumber);
1332 }
1333 
1334 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1335                                                const uint32_t SrcCUID,
1336                                                unsigned FileIndex) {
1337   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1338   const DWARFDebugLine::LineTable *LineTable =
1339       DwCtx->getLineTableForUnit(SrcUnit);
1340   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1341       LineTable->Prologue.FileNames;
1342   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1343   // means empty dir.
1344   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1345          "FileIndex out of range for the compilation unit.");
1346   StringRef Dir = "";
1347   if (FileNames[FileIndex - 1].DirIdx != 0) {
1348     if (Optional<const char *> DirName = dwarf::toString(
1349             LineTable->Prologue
1350                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1351       Dir = *DirName;
1352     }
1353   }
1354   StringRef FileName = "";
1355   if (Optional<const char *> FName =
1356           dwarf::toString(FileNames[FileIndex - 1].Name))
1357     FileName = *FName;
1358   assert(FileName != "");
1359   return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID));
1360 }
1361 
1362 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1363   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1364   std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
1365                  SortedFunctions.begin(),
1366                  [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1367                    return &BFI.second;
1368                  });
1369 
1370   std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(),
1371                    [](const BinaryFunction *A, const BinaryFunction *B) {
1372                      if (A->hasValidIndex() && B->hasValidIndex()) {
1373                        return A->getIndex() < B->getIndex();
1374                      }
1375                      return A->hasValidIndex();
1376                    });
1377   return SortedFunctions;
1378 }
1379 
1380 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1381   std::vector<BinaryFunction *> AllFunctions;
1382   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1383   std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
1384                  std::back_inserter(AllFunctions),
1385                  [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1386                    return &BFI.second;
1387                  });
1388   std::copy(InjectedBinaryFunctions.begin(), InjectedBinaryFunctions.end(),
1389             std::back_inserter(AllFunctions));
1390 
1391   return AllFunctions;
1392 }
1393 
1394 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1395   auto Iter = DWOCUs.find(DWOId);
1396   if (Iter == DWOCUs.end())
1397     return None;
1398 
1399   return Iter->second;
1400 }
1401 
1402 DWARFContext *BinaryContext::getDWOContext() {
1403   if (DWOCUs.empty())
1404     return nullptr;
1405   return &DWOCUs.begin()->second->getContext();
1406 }
1407 
1408 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1409 void BinaryContext::preprocessDWODebugInfo() {
1410   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1411     DWARFUnit *const DwarfUnit = CU.get();
1412     if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1413       DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
1414       if (!DWOCU->isDWOUnit()) {
1415         std::string DWOName = dwarf::toString(
1416             DwarfUnit->getUnitDIE().find(
1417                 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1418             "");
1419         outs() << "BOLT-WARNING: Debug Fission: DWO debug information for "
1420                << DWOName
1421                << " was not retrieved and won't be updated. Please check "
1422                   "relative path.\n";
1423         continue;
1424       }
1425       DWOCUs[*DWOId] = DWOCU;
1426     }
1427   }
1428 }
1429 
1430 void BinaryContext::preprocessDebugInfo() {
1431   struct CURange {
1432     uint64_t LowPC;
1433     uint64_t HighPC;
1434     DWARFUnit *Unit;
1435 
1436     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1437   };
1438 
1439   // Building a map of address ranges to CUs similar to .debug_aranges and use
1440   // it to assign CU to functions.
1441   std::vector<CURange> AllRanges;
1442   AllRanges.reserve(DwCtx->getNumCompileUnits());
1443   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1444     Expected<DWARFAddressRangesVector> RangesOrError =
1445         CU->getUnitDIE().getAddressRanges();
1446     if (!RangesOrError) {
1447       consumeError(RangesOrError.takeError());
1448       continue;
1449     }
1450     for (DWARFAddressRange &Range : *RangesOrError) {
1451       // Parts of the debug info could be invalidated due to corresponding code
1452       // being removed from the binary by the linker. Hence we check if the
1453       // address is a valid one.
1454       if (containsAddress(Range.LowPC))
1455         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1456     }
1457   }
1458 
1459   std::sort(AllRanges.begin(), AllRanges.end());
1460   for (auto &KV : BinaryFunctions) {
1461     const uint64_t FunctionAddress = KV.first;
1462     BinaryFunction &Function = KV.second;
1463 
1464     auto It = std::partition_point(
1465         AllRanges.begin(), AllRanges.end(),
1466         [=](CURange R) { return R.HighPC <= FunctionAddress; });
1467     if (It != AllRanges.end() && It->LowPC <= FunctionAddress) {
1468       Function.setDWARFUnit(It->Unit);
1469     }
1470   }
1471 
1472   // Discover units with debug info that needs to be updated.
1473   for (const auto &KV : BinaryFunctions) {
1474     const BinaryFunction &BF = KV.second;
1475     if (shouldEmit(BF) && BF.getDWARFUnit())
1476       ProcessedCUs.insert(BF.getDWARFUnit());
1477   }
1478 
1479   // Clear debug info for functions from units that we are not going to process.
1480   for (auto &KV : BinaryFunctions) {
1481     BinaryFunction &BF = KV.second;
1482     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1483       BF.setDWARFUnit(nullptr);
1484   }
1485 
1486   if (opts::Verbosity >= 1) {
1487     outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1488            << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1489   }
1490 
1491   // Populate MCContext with DWARF files from all units.
1492   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1493   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1494     const uint64_t CUID = CU->getOffset();
1495     getDwarfLineTable(CUID).setLabel(Ctx->getOrCreateSymbol(
1496         GlobalPrefix + "line_table_start" + Twine(CUID)));
1497 
1498     if (!ProcessedCUs.count(CU.get()))
1499       continue;
1500 
1501     const DWARFDebugLine::LineTable *LineTable =
1502         DwCtx->getLineTableForUnit(CU.get());
1503     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1504         LineTable->Prologue.FileNames;
1505 
1506     // Assign a unique label to every line table, one per CU.
1507     // Make sure empty debug line tables are registered too.
1508     if (FileNames.empty()) {
1509       cantFail(getDwarfFile("", "<unknown>", 0, None, None, CUID));
1510       continue;
1511     }
1512     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1513       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1514       // means empty dir.
1515       StringRef Dir = "";
1516       if (FileNames[I].DirIdx != 0)
1517         if (Optional<const char *> DirName = dwarf::toString(
1518                 LineTable->Prologue
1519                     .IncludeDirectories[FileNames[I].DirIdx - 1]))
1520           Dir = *DirName;
1521       StringRef FileName = "";
1522       if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name))
1523         FileName = *FName;
1524       assert(FileName != "");
1525       cantFail(getDwarfFile(Dir, FileName, 0, None, None, CUID));
1526     }
1527   }
1528 
1529   preprocessDWODebugInfo();
1530 }
1531 
1532 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1533   if (opts::processAllFunctions())
1534     return true;
1535 
1536   if (Function.isIgnored())
1537     return false;
1538 
1539   // In relocation mode we will emit non-simple functions with CFG.
1540   // If the function does not have a CFG it should be marked as ignored.
1541   return HasRelocations || Function.isSimple();
1542 }
1543 
1544 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1545   uint32_t Operation = Inst.getOperation();
1546   switch (Operation) {
1547   case MCCFIInstruction::OpSameValue:
1548     OS << "OpSameValue Reg" << Inst.getRegister();
1549     break;
1550   case MCCFIInstruction::OpRememberState:
1551     OS << "OpRememberState";
1552     break;
1553   case MCCFIInstruction::OpRestoreState:
1554     OS << "OpRestoreState";
1555     break;
1556   case MCCFIInstruction::OpOffset:
1557     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1558     break;
1559   case MCCFIInstruction::OpDefCfaRegister:
1560     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1561     break;
1562   case MCCFIInstruction::OpDefCfaOffset:
1563     OS << "OpDefCfaOffset " << Inst.getOffset();
1564     break;
1565   case MCCFIInstruction::OpDefCfa:
1566     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1567     break;
1568   case MCCFIInstruction::OpRelOffset:
1569     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1570     break;
1571   case MCCFIInstruction::OpAdjustCfaOffset:
1572     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1573     break;
1574   case MCCFIInstruction::OpEscape:
1575     OS << "OpEscape";
1576     break;
1577   case MCCFIInstruction::OpRestore:
1578     OS << "OpRestore Reg" << Inst.getRegister();
1579     break;
1580   case MCCFIInstruction::OpUndefined:
1581     OS << "OpUndefined Reg" << Inst.getRegister();
1582     break;
1583   case MCCFIInstruction::OpRegister:
1584     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1585        << Inst.getRegister2();
1586     break;
1587   case MCCFIInstruction::OpWindowSave:
1588     OS << "OpWindowSave";
1589     break;
1590   case MCCFIInstruction::OpGnuArgsSize:
1591     OS << "OpGnuArgsSize";
1592     break;
1593   default:
1594     OS << "Op#" << Operation;
1595     break;
1596   }
1597 }
1598 
1599 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1600                                      uint64_t Offset,
1601                                      const BinaryFunction *Function,
1602                                      bool PrintMCInst, bool PrintMemData,
1603                                      bool PrintRelocations) const {
1604   if (MIB->isEHLabel(Instruction)) {
1605     OS << "  EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << '\n';
1606     return;
1607   }
1608   OS << format("    %08" PRIx64 ": ", Offset);
1609   if (MIB->isCFI(Instruction)) {
1610     uint32_t Offset = Instruction.getOperand(0).getImm();
1611     OS << "\t!CFI\t$" << Offset << "\t; ";
1612     if (Function)
1613       printCFI(OS, *Function->getCFIFor(Instruction));
1614     OS << "\n";
1615     return;
1616   }
1617   InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1618   if (MIB->isCall(Instruction)) {
1619     if (MIB->isTailCall(Instruction))
1620       OS << " # TAILCALL ";
1621     if (MIB->isInvoke(Instruction)) {
1622       const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction);
1623       OS << " # handler: ";
1624       if (EHInfo->first)
1625         OS << *EHInfo->first;
1626       else
1627         OS << '0';
1628       OS << "; action: " << EHInfo->second;
1629       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1630       if (GnuArgsSize >= 0)
1631         OS << "; GNU_args_size = " << GnuArgsSize;
1632     }
1633   } else if (MIB->isIndirectBranch(Instruction)) {
1634     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1635       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1636     } else {
1637       OS << " # UNKNOWN CONTROL FLOW";
1638     }
1639   }
1640   if (Optional<uint32_t> Offset = MIB->getOffset(Instruction))
1641     OS << " # Offset: " << *Offset;
1642 
1643   MIB->printAnnotations(Instruction, OS);
1644 
1645   if (opts::PrintDebugInfo) {
1646     DebugLineTableRowRef RowRef =
1647         DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1648     if (RowRef != DebugLineTableRowRef::NULL_ROW) {
1649       const DWARFDebugLine::LineTable *LineTable;
1650       if (Function && Function->getDWARFUnit() &&
1651           Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1652         LineTable = Function->getDWARFLineTable();
1653       } else {
1654         LineTable = DwCtx->getLineTableForUnit(
1655             DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1656       }
1657       assert(LineTable &&
1658              "line table expected for instruction with debug info");
1659 
1660       const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1661       StringRef FileName = "";
1662       if (Optional<const char *> FName =
1663               dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1664         FileName = *FName;
1665       OS << " # debug line " << FileName << ":" << Row.Line;
1666       if (Row.Column)
1667         OS << ":" << Row.Column;
1668       if (Row.Discriminator)
1669         OS << " discriminator:" << Row.Discriminator;
1670     }
1671   }
1672 
1673   if ((opts::PrintRelocations || PrintRelocations) && Function) {
1674     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
1675     Function->printRelocations(OS, Offset, Size);
1676   }
1677 
1678   OS << "\n";
1679 
1680   if (PrintMCInst) {
1681     Instruction.dump_pretty(OS, InstPrinter.get());
1682     OS << "\n";
1683   }
1684 }
1685 
1686 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
1687   auto SI = AddressToSection.upper_bound(Address);
1688   if (SI != AddressToSection.begin()) {
1689     --SI;
1690     uint64_t UpperBound = SI->first + SI->second->getSize();
1691     if (!SI->second->getSize())
1692       UpperBound += 1;
1693     if (UpperBound > Address)
1694       return *SI->second;
1695   }
1696   return std::make_error_code(std::errc::bad_address);
1697 }
1698 
1699 ErrorOr<StringRef>
1700 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
1701   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
1702     return Section->getName();
1703   return std::make_error_code(std::errc::bad_address);
1704 }
1705 
1706 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
1707   auto Res = Sections.insert(Section);
1708   (void)Res;
1709   assert(Res.second && "can't register the same section twice.");
1710 
1711   // Only register allocatable sections in the AddressToSection map.
1712   if (Section->isAllocatable() && Section->getAddress())
1713     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
1714   NameToSection.insert(
1715       std::make_pair(std::string(Section->getName()), Section));
1716   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
1717   return *Section;
1718 }
1719 
1720 BinarySection &BinaryContext::registerSection(SectionRef Section) {
1721   return registerSection(new BinarySection(*this, Section));
1722 }
1723 
1724 BinarySection &
1725 BinaryContext::registerSection(StringRef SectionName,
1726                                const BinarySection &OriginalSection) {
1727   return registerSection(
1728       new BinarySection(*this, SectionName, OriginalSection));
1729 }
1730 
1731 BinarySection &
1732 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType,
1733                                        unsigned ELFFlags, uint8_t *Data,
1734                                        uint64_t Size, unsigned Alignment) {
1735   auto NamedSections = getSectionByName(Name);
1736   if (NamedSections.begin() != NamedSections.end()) {
1737     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
1738            "can only update unique sections");
1739     BinarySection *Section = NamedSections.begin()->second;
1740 
1741     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
1742     const bool Flag = Section->isAllocatable();
1743     (void)Flag;
1744     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
1745     LLVM_DEBUG(dbgs() << *Section << "\n");
1746     // FIXME: Fix section flags/attributes for MachO.
1747     if (isELF())
1748       assert(Flag == Section->isAllocatable() &&
1749              "can't change section allocation status");
1750     return *Section;
1751   }
1752 
1753   return registerSection(
1754       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
1755 }
1756 
1757 bool BinaryContext::deregisterSection(BinarySection &Section) {
1758   BinarySection *SectionPtr = &Section;
1759   auto Itr = Sections.find(SectionPtr);
1760   if (Itr != Sections.end()) {
1761     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
1762     while (Range.first != Range.second) {
1763       if (Range.first->second == SectionPtr) {
1764         AddressToSection.erase(Range.first);
1765         break;
1766       }
1767       ++Range.first;
1768     }
1769 
1770     auto NameRange =
1771         NameToSection.equal_range(std::string(SectionPtr->getName()));
1772     while (NameRange.first != NameRange.second) {
1773       if (NameRange.first->second == SectionPtr) {
1774         NameToSection.erase(NameRange.first);
1775         break;
1776       }
1777       ++NameRange.first;
1778     }
1779 
1780     Sections.erase(Itr);
1781     delete SectionPtr;
1782     return true;
1783   }
1784   return false;
1785 }
1786 
1787 void BinaryContext::printSections(raw_ostream &OS) const {
1788   for (BinarySection *const &Section : Sections)
1789     OS << "BOLT-INFO: " << *Section << "\n";
1790 }
1791 
1792 BinarySection &BinaryContext::absoluteSection() {
1793   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
1794     return *Section;
1795   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
1796 }
1797 
1798 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
1799                                                            size_t Size) const {
1800   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
1801   if (!Section)
1802     return std::make_error_code(std::errc::bad_address);
1803 
1804   if (Section->isVirtual())
1805     return 0;
1806 
1807   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
1808                    AsmInfo->getCodePointerSize());
1809   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
1810   return DE.getUnsigned(&ValueOffset, Size);
1811 }
1812 
1813 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
1814                                                          size_t Size) const {
1815   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
1816   if (!Section)
1817     return std::make_error_code(std::errc::bad_address);
1818 
1819   if (Section->isVirtual())
1820     return 0;
1821 
1822   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
1823                    AsmInfo->getCodePointerSize());
1824   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
1825   return DE.getSigned(&ValueOffset, Size);
1826 }
1827 
1828 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
1829                                   uint64_t Type, uint64_t Addend,
1830                                   uint64_t Value) {
1831   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1832   assert(Section && "cannot find section for address");
1833   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
1834                          Value);
1835 }
1836 
1837 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
1838                                          uint64_t Type, uint64_t Addend,
1839                                          uint64_t Value) {
1840   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1841   assert(Section && "cannot find section for address");
1842   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
1843                                 Addend, Value);
1844 }
1845 
1846 bool BinaryContext::removeRelocationAt(uint64_t Address) {
1847   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1848   assert(Section && "cannot find section for address");
1849   return Section->removeRelocationAt(Address - Section->getAddress());
1850 }
1851 
1852 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) {
1853   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1854   if (!Section)
1855     return nullptr;
1856 
1857   return Section->getRelocationAt(Address - Section->getAddress());
1858 }
1859 
1860 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) {
1861   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1862   if (!Section)
1863     return nullptr;
1864 
1865   return Section->getDynamicRelocationAt(Address - Section->getAddress());
1866 }
1867 
1868 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
1869                                              const uint64_t Address) {
1870   auto setImmovable = [&](BinaryData &BD) {
1871     BinaryData *Root = BD.getAtomicRoot();
1872     LLVM_DEBUG(if (Root->isMoveable()) {
1873       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
1874              << "due to ambiguous relocation referencing 0x"
1875              << Twine::utohexstr(Address) << '\n';
1876     });
1877     Root->setIsMoveable(false);
1878   };
1879 
1880   if (Address == BD.getAddress()) {
1881     setImmovable(BD);
1882 
1883     // Set previous symbol as immovable
1884     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
1885     if (Prev && Prev->getEndAddress() == BD.getAddress())
1886       setImmovable(*Prev);
1887   }
1888 
1889   if (Address == BD.getEndAddress()) {
1890     setImmovable(BD);
1891 
1892     // Set next symbol as immovable
1893     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
1894     if (Next && Next->getAddress() == BD.getEndAddress())
1895       setImmovable(*Next);
1896   }
1897 }
1898 
1899 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
1900                                                     uint64_t *EntryDesc) {
1901   std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex);
1902   auto BFI = SymbolToFunctionMap.find(Symbol);
1903   if (BFI == SymbolToFunctionMap.end())
1904     return nullptr;
1905 
1906   BinaryFunction *BF = BFI->second;
1907   if (EntryDesc)
1908     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
1909 
1910   return BF;
1911 }
1912 
1913 void BinaryContext::exitWithBugReport(StringRef Message,
1914                                       const BinaryFunction &Function) const {
1915   errs() << "=======================================\n";
1916   errs() << "BOLT is unable to proceed because it couldn't properly understand "
1917             "this function.\n";
1918   errs() << "If you are running the most recent version of BOLT, you may "
1919             "want to "
1920             "report this and paste this dump.\nPlease check that there is no "
1921             "sensitive contents being shared in this dump.\n";
1922   errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
1923   ScopedPrinter SP(errs());
1924   SP.printBinaryBlock("Function contents", *Function.getData());
1925   errs() << "\n";
1926   Function.dump();
1927   errs() << "ERROR: " << Message;
1928   errs() << "\n=======================================\n";
1929   exit(1);
1930 }
1931 
1932 BinaryFunction *
1933 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
1934                                             bool IsSimple) {
1935   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
1936   BinaryFunction *BF = InjectedBinaryFunctions.back();
1937   setSymbolToFunctionMap(BF->getSymbol(), BF);
1938   BF->CurrentState = BinaryFunction::State::CFG;
1939   return BF;
1940 }
1941 
1942 std::pair<size_t, size_t>
1943 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
1944   // Adjust branch instruction to match the current layout.
1945   if (FixBranches)
1946     BF.fixBranches();
1947 
1948   // Create local MC context to isolate the effect of ephemeral code emission.
1949   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
1950   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
1951   MCAsmBackend *MAB =
1952       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
1953 
1954   SmallString<256> Code;
1955   raw_svector_ostream VecOS(Code);
1956 
1957   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
1958   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
1959       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
1960       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
1961       /*RelaxAll=*/false,
1962       /*IncrementalLinkerCompatible=*/false,
1963       /*DWARFMustBeAtTheEnd=*/false));
1964 
1965   Streamer->initSections(false, *STI);
1966 
1967   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
1968   Section->setHasInstructions(true);
1969 
1970   // Create symbols in the LocalCtx so that they get destroyed with it.
1971   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
1972   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
1973   MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol();
1974   MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol();
1975 
1976   Streamer->SwitchSection(Section);
1977   Streamer->emitLabel(StartLabel);
1978   emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false,
1979                    /*EmitCodeOnly=*/true);
1980   Streamer->emitLabel(EndLabel);
1981 
1982   if (BF.isSplit()) {
1983     MCSectionELF *ColdSection =
1984         LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS,
1985                                 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
1986     ColdSection->setHasInstructions(true);
1987 
1988     Streamer->SwitchSection(ColdSection);
1989     Streamer->emitLabel(ColdStartLabel);
1990     emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true,
1991                      /*EmitCodeOnly=*/true);
1992     Streamer->emitLabel(ColdEndLabel);
1993     // To avoid calling MCObjectStreamer::flushPendingLabels() which is private
1994     Streamer->emitBytes(StringRef(""));
1995     Streamer->SwitchSection(Section);
1996   }
1997 
1998   // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
1999   // MCStreamer::Finish(), which does more than we want
2000   Streamer->emitBytes(StringRef(""));
2001 
2002   MCAssembler &Assembler =
2003       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2004   MCAsmLayout Layout(Assembler);
2005   Assembler.layout(Layout);
2006 
2007   const uint64_t HotSize =
2008       Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2009   const uint64_t ColdSize = BF.isSplit()
2010                                 ? Layout.getSymbolOffset(*ColdEndLabel) -
2011                                       Layout.getSymbolOffset(*ColdStartLabel)
2012                                 : 0ULL;
2013 
2014   // Clean-up the effect of the code emission.
2015   for (const MCSymbol &Symbol : Assembler.symbols()) {
2016     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2017     MutableSymbol->setUndefined();
2018     MutableSymbol->setIsRegistered(false);
2019   }
2020 
2021   return std::make_pair(HotSize, ColdSize);
2022 }
2023 
2024 bool BinaryContext::validateEncoding(const MCInst &Inst,
2025                                      ArrayRef<uint8_t> InputEncoding) const {
2026   SmallString<256> Code;
2027   SmallVector<MCFixup, 4> Fixups;
2028   raw_svector_ostream VecOS(Code);
2029 
2030   MCE->encodeInstruction(Inst, VecOS, Fixups, *STI);
2031   auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2032   if (InputEncoding != EncodedData) {
2033     if (opts::Verbosity > 1) {
2034       errs() << "BOLT-WARNING: mismatched encoding detected\n"
2035              << "      input: " << InputEncoding << '\n'
2036              << "     output: " << EncodedData << '\n';
2037     }
2038     return false;
2039   }
2040 
2041   return true;
2042 }
2043 
2044 uint64_t BinaryContext::getHotThreshold() const {
2045   static uint64_t Threshold = 0;
2046   if (Threshold == 0) {
2047     Threshold = std::max(
2048         (uint64_t)opts::ExecutionCountThreshold,
2049         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2050   }
2051   return Threshold;
2052 }
2053 
2054 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2055     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2056   auto FI = BinaryFunctions.upper_bound(Address);
2057   if (FI == BinaryFunctions.begin())
2058     return nullptr;
2059   --FI;
2060 
2061   const uint64_t UsedSize =
2062       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2063 
2064   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2065     return nullptr;
2066 
2067   return &FI->second;
2068 }
2069 
2070 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2071   // First, try to find a function starting at the given address. If the
2072   // function was folded, this will get us the original folded function if it
2073   // wasn't removed from the list, e.g. in non-relocation mode.
2074   auto BFI = BinaryFunctions.find(Address);
2075   if (BFI != BinaryFunctions.end())
2076     return &BFI->second;
2077 
2078   // We might have folded the function matching the object at the given
2079   // address. In such case, we look for a function matching the symbol
2080   // registered at the original address. The new function (the one that the
2081   // original was folded into) will hold the symbol.
2082   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2083     uint64_t EntryID = 0;
2084     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2085     if (BF && EntryID == 0)
2086       return BF;
2087   }
2088   return nullptr;
2089 }
2090 
2091 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2092     const DWARFAddressRangesVector &InputRanges) const {
2093   DebugAddressRangesVector OutputRanges;
2094 
2095   for (const DWARFAddressRange Range : InputRanges) {
2096     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2097     while (BFI != BinaryFunctions.end()) {
2098       const BinaryFunction &Function = BFI->second;
2099       if (Function.getAddress() >= Range.HighPC)
2100         break;
2101       const DebugAddressRangesVector FunctionRanges =
2102           Function.getOutputAddressRanges();
2103       std::move(std::begin(FunctionRanges), std::end(FunctionRanges),
2104                 std::back_inserter(OutputRanges));
2105       std::advance(BFI, 1);
2106     }
2107   }
2108 
2109   return OutputRanges;
2110 }
2111 
2112 } // namespace bolt
2113 } // namespace llvm
2114