1 //===- bolt/Core/BinaryContext.h - Low-level context ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Context for processing binary executable/library files.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef BOLT_CORE_BINARY_CONTEXT_H
14 #define BOLT_CORE_BINARY_CONTEXT_H
15 
16 #include "bolt/Core/BinaryData.h"
17 #include "bolt/Core/BinarySection.h"
18 #include "bolt/Core/DebugData.h"
19 #include "bolt/Core/JumpTable.h"
20 #include "bolt/Core/MCPlusBuilder.h"
21 #include "bolt/RuntimeLibs/RuntimeLibrary.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/StringMap.h"
24 #include "llvm/ADT/Triple.h"
25 #include "llvm/ADT/iterator.h"
26 #include "llvm/BinaryFormat/Dwarf.h"
27 #include "llvm/BinaryFormat/MachO.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCCodeEmitter.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCObjectFileInfo.h"
32 #include "llvm/MC/MCObjectWriter.h"
33 #include "llvm/MC/MCPseudoProbe.h"
34 #include "llvm/MC/MCSectionELF.h"
35 #include "llvm/MC/MCSectionMachO.h"
36 #include "llvm/MC/MCStreamer.h"
37 #include "llvm/MC/MCSymbol.h"
38 #include "llvm/MC/TargetRegistry.h"
39 #include "llvm/Support/ErrorOr.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include <functional>
42 #include <list>
43 #include <map>
44 #include <set>
45 #include <shared_mutex>
46 #include <string>
47 #include <system_error>
48 #include <type_traits>
49 #include <unordered_map>
50 #include <vector>
51 
52 namespace llvm {
53 class MCDisassembler;
54 class MCInstPrinter;
55 
56 using namespace object;
57 
58 namespace bolt {
59 
60 class BinaryFunction;
61 class ExecutableFileMemoryManager;
62 
63 /// Information on loadable part of the file.
64 struct SegmentInfo {
65   uint64_t Address;           /// Address of the segment in memory.
66   uint64_t Size;              /// Size of the segment in memory.
67   uint64_t FileOffset;        /// Offset in the file.
68   uint64_t FileSize;          /// Size in file.
69   uint64_t Alignment;         /// Alignment of the segment.
70 
printSegmentInfo71   void print(raw_ostream &OS) const {
72     OS << "SegmentInfo { Address: 0x"
73        << Twine::utohexstr(Address) << ", Size: 0x"
74        << Twine::utohexstr(Size) << ", FileOffset: 0x"
75        << Twine::utohexstr(FileOffset) << ", FileSize: 0x"
76        << Twine::utohexstr(FileSize) << ", Alignment: 0x"
77        << Twine::utohexstr(Alignment) << "}";
78   };
79 };
80 
81 inline raw_ostream &operator<<(raw_ostream &OS, const SegmentInfo &SegInfo) {
82   SegInfo.print(OS);
83   return OS;
84 }
85 
86 // AArch64-specific symbol markers used to delimit code/data in .text.
87 enum class MarkerSymType : char {
88   NONE = 0,
89   CODE,
90   DATA,
91 };
92 
93 enum class MemoryContentsType : char {
94   UNKNOWN = 0,             /// Unknown contents.
95   POSSIBLE_JUMP_TABLE,     /// Possibly a non-PIC jump table.
96   POSSIBLE_PIC_JUMP_TABLE, /// Possibly a PIC jump table.
97 };
98 
99 /// Helper function to truncate a \p Value to given size in \p Bytes.
truncateToSize(int64_t Value,unsigned Bytes)100 inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
101   return Value & ((uint64_t)(int64_t)-1 >> (64 - Bytes * 8));
102 }
103 
104 /// Filter iterator.
105 template <typename ItrType,
106           typename PredType = std::function<bool(const ItrType &)>>
107 class FilterIterator
108     : public std::iterator<std::bidirectional_iterator_tag,
109                            typename std::iterator_traits<ItrType>::value_type> {
110   using Iterator = FilterIterator;
111   using T = typename std::iterator_traits<ItrType>::reference;
112   using PointerT = typename std::iterator_traits<ItrType>::pointer;
113 
114   PredType Pred;
115   ItrType Itr, End;
116 
prev()117   void prev() {
118     while (!Pred(--Itr))
119       ;
120   }
next()121   void next() {
122     ++Itr;
123     nextMatching();
124   }
nextMatching()125   void nextMatching() {
126     while (Itr != End && !Pred(Itr))
127       ++Itr;
128   }
129 
130 public:
131   Iterator &operator++() { next(); return *this; }
132   Iterator &operator--() { prev(); return *this; }
133   Iterator operator++(int) { auto Tmp(Itr); next(); return Tmp; }
134   Iterator operator--(int) { auto Tmp(Itr); prev(); return Tmp; }
135   bool operator==(const Iterator &Other) const { return Itr == Other.Itr; }
136   bool operator!=(const Iterator &Other) const { return !operator==(Other); }
137   T operator*() { return *Itr; }
138   PointerT operator->() { return &operator*(); }
FilterIterator(PredType Pred,ItrType Itr,ItrType End)139   FilterIterator(PredType Pred, ItrType Itr, ItrType End)
140       : Pred(Pred), Itr(Itr), End(End) {
141     nextMatching();
142   }
143 };
144 
145 class BinaryContext {
146   BinaryContext() = delete;
147 
148   /// Name of the binary file the context originated from.
149   std::string Filename;
150 
151   /// Unique build ID if available for the binary.
152   Optional<std::string> FileBuildID;
153 
154   /// Set of all sections.
155   struct CompareSections {
operatorCompareSections156     bool operator()(const BinarySection *A, const BinarySection *B) const {
157       return *A < *B;
158     }
159   };
160   using SectionSetType = std::set<BinarySection *, CompareSections>;
161   SectionSetType Sections;
162 
163   using SectionIterator = pointee_iterator<SectionSetType::iterator>;
164   using SectionConstIterator = pointee_iterator<SectionSetType::const_iterator>;
165 
166   using FilteredSectionIterator = FilterIterator<SectionIterator>;
167   using FilteredSectionConstIterator = FilterIterator<SectionConstIterator>;
168 
169   /// Map virtual address to a section.  It is possible to have more than one
170   /// section mapped to the same address, e.g. non-allocatable sections.
171   using AddressToSectionMapType = std::multimap<uint64_t, BinarySection *>;
172   AddressToSectionMapType AddressToSection;
173 
174   /// multimap of section name to BinarySection object.  Some binaries
175   /// have multiple sections with the same name.
176   using NameToSectionMapType = std::multimap<std::string, BinarySection *>;
177   NameToSectionMapType NameToSection;
178 
179   /// Low level section registration.
180   BinarySection &registerSection(BinarySection *Section);
181 
182   /// Store all functions in the binary, sorted by original address.
183   std::map<uint64_t, BinaryFunction> BinaryFunctions;
184 
185   /// A mutex that is used to control parallel accesses to BinaryFunctions
186   mutable std::shared_timed_mutex BinaryFunctionsMutex;
187 
188   /// Functions injected by BOLT
189   std::vector<BinaryFunction *> InjectedBinaryFunctions;
190 
191   /// Jump tables for all functions mapped by address.
192   std::map<uint64_t, JumpTable *> JumpTables;
193 
194   /// Locations of PC-relative relocations in data objects.
195   std::unordered_set<uint64_t> DataPCRelocations;
196 
197   /// Used in duplicateJumpTable() to uniquely identify a JT clone
198   /// Start our IDs with a high number so getJumpTableContainingAddress checks
199   /// with size won't overflow
200   uint32_t DuplicatedJumpTables{0x10000000};
201 
202   /// Function fragments to skip.
203   std::unordered_set<BinaryFunction *> FragmentsToSkip;
204 
205   /// The runtime library.
206   std::unique_ptr<RuntimeLibrary> RtLibrary;
207 
208   /// DWP Context.
209   std::shared_ptr<DWARFContext> DWPContext;
210 
211   /// A map of DWO Ids to CUs.
212   using DWOIdToCUMapType = std::unordered_map<uint64_t, DWARFUnit *>;
213   DWOIdToCUMapType DWOCUs;
214 
215   bool ContainsDwarf5{false};
216   bool ContainsDwarfLegacy{false};
217 
218   /// Preprocess DWO debug information.
219   void preprocessDWODebugInfo();
220 
221   /// DWARF line info for CUs.
222   std::map<unsigned, DwarfLineTable> DwarfLineTablesCUMap;
223 
224 public:
225   static Expected<std::unique_ptr<BinaryContext>>
226   createBinaryContext(const ObjectFile *File, bool IsPIC,
227                       std::unique_ptr<DWARFContext> DwCtx);
228 
229   /// Superset of compiler units that will contain overwritten code that needs
230   /// new debug info. In a few cases, functions may end up not being
231   /// overwritten, but it is okay to re-generate debug info for them.
232   std::set<const DWARFUnit *> ProcessedCUs;
233 
234   // Setup MCPlus target builder
initializeTarget(std::unique_ptr<MCPlusBuilder> TargetBuilder)235   void initializeTarget(std::unique_ptr<MCPlusBuilder> TargetBuilder) {
236     MIB = std::move(TargetBuilder);
237   }
238 
239   /// Return function fragments to skip.
getFragmentsToSkip()240   const std::unordered_set<BinaryFunction *> &getFragmentsToSkip() {
241     return FragmentsToSkip;
242   }
243 
244   /// Add function fragment to skip
addFragmentsToSkip(BinaryFunction * Function)245   void addFragmentsToSkip(BinaryFunction *Function) {
246     FragmentsToSkip.insert(Function);
247   }
248 
clearFragmentsToSkip()249   void clearFragmentsToSkip() { FragmentsToSkip.clear(); }
250 
251   /// Given DWOId returns CU if it exists in DWOCUs.
252   Optional<DWARFUnit *> getDWOCU(uint64_t DWOId);
253 
254   /// Returns DWOContext if it exists.
255   DWARFContext *getDWOContext() const;
256 
257   /// Get Number of DWOCUs in a map.
getNumDWOCUs()258   uint32_t getNumDWOCUs() { return DWOCUs.size(); }
259 
260   /// Returns true if DWARF5 is used.
isDWARF5Used()261   bool isDWARF5Used() const { return ContainsDwarf5; }
262 
263   /// Returns true if DWARF4 or lower is used.
isDWARFLegacyUsed()264   bool isDWARFLegacyUsed() const { return ContainsDwarfLegacy; }
265 
getDwarfLineTables()266   std::map<unsigned, DwarfLineTable> &getDwarfLineTables() {
267     return DwarfLineTablesCUMap;
268   }
269 
getDwarfLineTable(unsigned CUID)270   DwarfLineTable &getDwarfLineTable(unsigned CUID) {
271     return DwarfLineTablesCUMap[CUID];
272   }
273 
274   Expected<unsigned> getDwarfFile(StringRef Directory, StringRef FileName,
275                                   unsigned FileNumber,
276                                   Optional<MD5::MD5Result> Checksum,
277                                   Optional<StringRef> Source, unsigned CUID,
278                                   unsigned DWARFVersion);
279 
280   /// [start memory address] -> [segment info] mapping.
281   std::map<uint64_t, SegmentInfo> SegmentMapInfo;
282 
283   /// Symbols that are expected to be undefined in MCContext during emission.
284   std::unordered_set<MCSymbol *> UndefinedSymbols;
285 
286   /// [name] -> [BinaryData*] map used for global symbol resolution.
287   using SymbolMapType = StringMap<BinaryData *>;
288   SymbolMapType GlobalSymbols;
289 
290   /// [address] -> [BinaryData], ...
291   /// Addresses never change.
292   /// Note: it is important that clients do not hold on to instances of
293   /// BinaryData* while the map is still being modified during BinaryFunction
294   /// disassembly.  This is because of the possibility that a regular
295   /// BinaryData is later discovered to be a JumpTable.
296   using BinaryDataMapType = std::map<uint64_t, BinaryData *>;
297   using binary_data_iterator = BinaryDataMapType::iterator;
298   using binary_data_const_iterator = BinaryDataMapType::const_iterator;
299   BinaryDataMapType BinaryDataMap;
300 
301   using FilteredBinaryDataConstIterator =
302       FilterIterator<binary_data_const_iterator>;
303   using FilteredBinaryDataIterator = FilterIterator<binary_data_iterator>;
304 
305   /// Memory manager for sections and segments. Used to communicate with ORC
306   /// among other things.
307   std::shared_ptr<ExecutableFileMemoryManager> EFMM;
308 
getFilename()309   StringRef getFilename() const { return Filename; }
setFilename(StringRef Name)310   void setFilename(StringRef Name) { Filename = std::string(Name); }
311 
getFileBuildID()312   Optional<StringRef> getFileBuildID() const {
313     if (FileBuildID)
314       return StringRef(*FileBuildID);
315 
316     return NoneType();
317   }
setFileBuildID(StringRef ID)318   void setFileBuildID(StringRef ID) { FileBuildID = std::string(ID); }
319 
hasSymbolsWithFileName()320   bool hasSymbolsWithFileName() const { return HasSymbolsWithFileName; }
setHasSymbolsWithFileName(bool Value)321   void setHasSymbolsWithFileName(bool Value) { HasSymbolsWithFileName = true; }
322 
323   /// Return true if relocations against symbol with a given name
324   /// must be created.
325   bool forceSymbolRelocations(StringRef SymbolName) const;
326 
getNumUnusedProfiledObjects()327   uint64_t getNumUnusedProfiledObjects() const {
328     return NumUnusedProfiledObjects;
329   }
setNumUnusedProfiledObjects(uint64_t N)330   void setNumUnusedProfiledObjects(uint64_t N) { NumUnusedProfiledObjects = N; }
331 
getRuntimeLibrary()332   RuntimeLibrary *getRuntimeLibrary() { return RtLibrary.get(); }
setRuntimeLibrary(std::unique_ptr<RuntimeLibrary> Lib)333   void setRuntimeLibrary(std::unique_ptr<RuntimeLibrary> Lib) {
334     assert(!RtLibrary && "Cannot set runtime library twice.");
335     RtLibrary = std::move(Lib);
336   }
337 
338   /// Return BinaryFunction containing a given \p Address or nullptr if
339   /// no registered function contains the \p Address.
340   ///
341   /// In a binary a function has somewhat vague  boundaries. E.g. a function can
342   /// refer to the first byte past the end of the function, and it will still be
343   /// referring to this function, not the function following it in the address
344   /// space. Thus we have the following flags that allow to lookup for
345   /// a function where a caller has more context for the search.
346   ///
347   /// If \p CheckPastEnd is true and the \p Address falls on a byte
348   /// immediately following the last byte of some function and there's no other
349   /// function that starts there, then return the function as the one containing
350   /// the \p Address. This is useful when we need to locate functions for
351   /// references pointing immediately past a function body.
352   ///
353   /// If \p UseMaxSize is true, then include the space between this function
354   /// body and the next object in address ranges that we check.
355   BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address,
356                                                      bool CheckPastEnd = false,
357                                                      bool UseMaxSize = false);
358 
359   /// Return a BinaryFunction that starts at a given \p Address.
360   BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address);
361 
getBinaryFunctionAtAddress(uint64_t Address)362   const BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address) const {
363     return const_cast<BinaryContext *>(this)->getBinaryFunctionAtAddress(
364         Address);
365   }
366 
367   /// Return size of an entry for the given jump table \p Type.
getJumpTableEntrySize(JumpTable::JumpTableType Type)368   uint64_t getJumpTableEntrySize(JumpTable::JumpTableType Type) const {
369     return Type == JumpTable::JTT_PIC ? 4 : AsmInfo->getCodePointerSize();
370   }
371 
372   /// Return JumpTable containing a given \p Address.
getJumpTableContainingAddress(uint64_t Address)373   JumpTable *getJumpTableContainingAddress(uint64_t Address) {
374     auto JTI = JumpTables.upper_bound(Address);
375     if (JTI == JumpTables.begin())
376       return nullptr;
377     --JTI;
378     if (JTI->first + JTI->second->getSize() > Address)
379       return JTI->second;
380     if (JTI->second->getSize() == 0 && JTI->first == Address)
381       return JTI->second;
382     return nullptr;
383   }
384 
getDWARFEncodingSize(unsigned Encoding)385   unsigned getDWARFEncodingSize(unsigned Encoding) {
386     switch (Encoding & 0x0f) {
387     default:
388       llvm_unreachable("unknown encoding");
389     case dwarf::DW_EH_PE_absptr:
390     case dwarf::DW_EH_PE_signed:
391       return AsmInfo->getCodePointerSize();
392     case dwarf::DW_EH_PE_udata2:
393     case dwarf::DW_EH_PE_sdata2:
394       return 2;
395     case dwarf::DW_EH_PE_udata4:
396     case dwarf::DW_EH_PE_sdata4:
397       return 4;
398     case dwarf::DW_EH_PE_udata8:
399     case dwarf::DW_EH_PE_sdata8:
400       return 8;
401     }
402   }
403 
404   /// [MCSymbol] -> [BinaryFunction]
405   ///
406   /// As we fold identical functions, multiple symbols can point
407   /// to the same BinaryFunction.
408   std::unordered_map<const MCSymbol *, BinaryFunction *> SymbolToFunctionMap;
409 
410   /// A mutex that is used to control parallel accesses to SymbolToFunctionMap
411   mutable std::shared_timed_mutex SymbolToFunctionMapMutex;
412 
413   /// Look up the symbol entry that contains the given \p Address (based on
414   /// the start address and size for each symbol).  Returns a pointer to
415   /// the BinaryData for that symbol.  If no data is found, nullptr is returned.
416   const BinaryData *getBinaryDataContainingAddressImpl(uint64_t Address) const;
417 
418   /// Update the Parent fields in BinaryDatas after adding a new entry into
419   /// \p BinaryDataMap.
420   void updateObjectNesting(BinaryDataMapType::iterator GAI);
421 
422   /// Validate that if object address ranges overlap that the object with
423   /// the larger range is a parent of the object with the smaller range.
424   bool validateObjectNesting() const;
425 
426   /// Validate that there are no top level "holes" in each section
427   /// and that all relocations with a section are mapped to a valid
428   /// top level BinaryData.
429   bool validateHoles() const;
430 
431   /// Produce output address ranges based on input ranges for some module.
432   DebugAddressRangesVector translateModuleAddressRanges(
433       const DWARFAddressRangesVector &InputRanges) const;
434 
435   /// Get a bogus "absolute" section that will be associated with all
436   /// absolute BinaryDatas.
437   BinarySection &absoluteSection();
438 
439   /// Process "holes" in between known BinaryData objects.  For now,
440   /// symbols are padded with the space before the next BinaryData object.
441   void fixBinaryDataHoles();
442 
443   /// Generate names based on data hashes for unknown symbols.
444   void generateSymbolHashes();
445 
446   /// Construct BinaryFunction object and add it to internal maps.
447   BinaryFunction *createBinaryFunction(const std::string &Name,
448                                        BinarySection &Section, uint64_t Address,
449                                        uint64_t Size, uint64_t SymbolSize = 0,
450                                        uint16_t Alignment = 0);
451 
452   /// Return all functions for this rewrite instance.
getBinaryFunctions()453   std::map<uint64_t, BinaryFunction> &getBinaryFunctions() {
454     return BinaryFunctions;
455   }
456 
457   /// Return all functions for this rewrite instance.
getBinaryFunctions()458   const std::map<uint64_t, BinaryFunction> &getBinaryFunctions() const {
459     return BinaryFunctions;
460   }
461 
462   /// Create BOLT-injected function
463   BinaryFunction *createInjectedBinaryFunction(const std::string &Name,
464                                                bool IsSimple = true);
465 
getInjectedBinaryFunctions()466   std::vector<BinaryFunction *> &getInjectedBinaryFunctions() {
467     return InjectedBinaryFunctions;
468   }
469 
470   /// Return vector with all functions, i.e. include functions from the input
471   /// binary and functions created by BOLT.
472   std::vector<BinaryFunction *> getAllBinaryFunctions();
473 
474   /// Construct a jump table for \p Function at \p Address or return an existing
475   /// one at that location.
476   ///
477   /// May create an embedded jump table and return its label as the second
478   /// element of the pair.
479   const MCSymbol *getOrCreateJumpTable(BinaryFunction &Function,
480                                        uint64_t Address,
481                                        JumpTable::JumpTableType Type);
482 
483   /// Analyze a possible jump table of type \p Type at a given \p Address.
484   /// \p BF is a function referencing the jump table.
485   /// Return true if the jump table was detected at \p Address, and false
486   /// otherwise.
487   ///
488   /// If \p NextJTAddress is different from zero, it is used as an upper
489   /// bound for jump table memory layout.
490   ///
491   /// Optionally, populate \p Address from jump table entries. The entries
492   /// could be partially populated if the jump table detection fails.
493   bool analyzeJumpTable(const uint64_t Address,
494                         const JumpTable::JumpTableType Type, BinaryFunction &BF,
495                         const uint64_t NextJTAddress = 0,
496                         JumpTable::AddressesType *EntriesAsAddress = nullptr);
497 
498   /// After jump table locations are established, this function will populate
499   /// their EntriesAsAddress based on memory contents.
500   void populateJumpTables();
501 
502   /// Returns a jump table ID and label pointing to the duplicated jump table.
503   /// Ordinarily, jump tables are identified by their address in the input
504   /// binary. We return an ID with the high bit set to differentiate it from
505   /// regular addresses, avoiding conflicts with standard jump tables.
506   std::pair<uint64_t, const MCSymbol *>
507   duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
508                      const MCSymbol *OldLabel);
509 
510   /// Generate a unique name for jump table at a given \p Address belonging
511   /// to function \p BF.
512   std::string generateJumpTableName(const BinaryFunction &BF, uint64_t Address);
513 
514   /// Free memory used by JumpTable's EntriesAsAddress
clearJumpTableTempData()515   void clearJumpTableTempData() {
516     for (auto &JTI : JumpTables) {
517       JumpTable &JT = *JTI.second;
518       JumpTable::AddressesType Temp;
519       Temp.swap(JT.EntriesAsAddress);
520     }
521   }
522   /// Return true if the array of bytes represents a valid code padding.
523   bool hasValidCodePadding(const BinaryFunction &BF);
524 
525   /// Verify padding area between functions, and adjust max function size
526   /// accordingly.
527   void adjustCodePadding();
528 
529   /// Regular page size.
530   unsigned RegularPageSize{0x1000};
531   static constexpr unsigned RegularPageSizeX86 = 0x1000;
532   static constexpr unsigned RegularPageSizeAArch64 = 0x10000;
533 
534   /// Huge page size to use.
535   static constexpr unsigned HugePageSize = 0x200000;
536 
537   /// Map address to a constant island owner (constant data in code section)
538   std::map<uint64_t, BinaryFunction *> AddressToConstantIslandMap;
539 
540   /// A map from jump table address to insertion order.  Used for generating
541   /// jump table names.
542   std::map<uint64_t, size_t> JumpTableIds;
543 
544   std::unique_ptr<MCContext> Ctx;
545 
546   /// A mutex that is used to control parallel accesses to Ctx
547   mutable std::shared_timed_mutex CtxMutex;
scopeLock()548   std::unique_lock<std::shared_timed_mutex> scopeLock() const {
549     return std::unique_lock<std::shared_timed_mutex>(CtxMutex);
550   }
551 
552   std::unique_ptr<DWARFContext> DwCtx;
553 
554   std::unique_ptr<Triple> TheTriple;
555 
556   const Target *TheTarget;
557 
558   std::string TripleName;
559 
560   std::unique_ptr<MCCodeEmitter> MCE;
561 
562   std::unique_ptr<MCObjectFileInfo> MOFI;
563 
564   std::unique_ptr<const MCAsmInfo> AsmInfo;
565 
566   std::unique_ptr<const MCInstrInfo> MII;
567 
568   std::unique_ptr<const MCSubtargetInfo> STI;
569 
570   std::unique_ptr<MCInstPrinter> InstPrinter;
571 
572   std::unique_ptr<const MCInstrAnalysis> MIA;
573 
574   std::unique_ptr<MCPlusBuilder> MIB;
575 
576   std::unique_ptr<const MCRegisterInfo> MRI;
577 
578   std::unique_ptr<MCDisassembler> DisAsm;
579 
580   /// Symbolic disassembler.
581   std::unique_ptr<MCDisassembler> SymbolicDisAsm;
582 
583   std::unique_ptr<MCAsmBackend> MAB;
584 
585   /// Indicates if relocations are available for usage.
586   bool HasRelocations{false};
587 
588   /// Is the binary always loaded at a fixed address. Shared objects and
589   /// position-independent executables (PIEs) are examples of binaries that
590   /// will have HasFixedLoadAddress set to false.
591   bool HasFixedLoadAddress{true};
592 
593   /// True if the binary has no dynamic dependencies, i.e., if it was statically
594   /// linked.
595   bool IsStaticExecutable{false};
596 
597   /// Set to true if the binary contains PT_INTERP header.
598   bool HasInterpHeader{false};
599 
600   /// Indicates if any of local symbols used for functions or data objects
601   /// have an origin file name available.
602   bool HasSymbolsWithFileName{false};
603 
604   /// Sum of execution count of all functions
605   uint64_t SumExecutionCount{0};
606 
607   /// Number of functions with profile information
608   uint64_t NumProfiledFuncs{0};
609 
610   /// Number of objects in profile whose profile was ignored.
611   uint64_t NumUnusedProfiledObjects{0};
612 
613   /// Total hotness score according to profiling data for this binary.
614   uint64_t TotalScore{0};
615 
616   /// Binary-wide stats for macro-fusion.
617   uint64_t MissedMacroFusionPairs{0};
618   uint64_t MissedMacroFusionExecCount{0};
619 
620   // Address of the first allocated segment.
621   uint64_t FirstAllocAddress{std::numeric_limits<uint64_t>::max()};
622 
623   /// Track next available address for new allocatable sections. RewriteInstance
624   /// sets this prior to running BOLT passes, so layout passes are aware of the
625   /// final addresses functions will have.
626   uint64_t LayoutStartAddress{0};
627 
628   /// Old .text info.
629   uint64_t OldTextSectionAddress{0};
630   uint64_t OldTextSectionOffset{0};
631   uint64_t OldTextSectionSize{0};
632 
633   /// Address of the code/function that is executed before any other code in
634   /// the binary.
635   Optional<uint64_t> StartFunctionAddress;
636 
637   /// Address of the code/function that is going to be executed right before
638   /// the execution of the binary is completed.
639   Optional<uint64_t> FiniFunctionAddress;
640 
641   /// Page alignment used for code layout.
642   uint64_t PageAlign{HugePageSize};
643 
644   /// True if the binary requires immediate relocation processing.
645   bool RequiresZNow{false};
646 
647   /// List of functions that always trap.
648   std::vector<const BinaryFunction *> TrappedFunctions;
649 
650   /// Map SDT locations to SDT markers info
651   std::unordered_map<uint64_t, SDTMarkerInfo> SDTMarkers;
652 
653   /// Map linux kernel program locations/instructions to their pointers in
654   /// special linux kernel sections
655   std::unordered_map<uint64_t, std::vector<LKInstructionMarkerInfo>> LKMarkers;
656 
657   /// List of external addresses in the code that are not a function start
658   /// and are referenced from BinaryFunction.
659   std::list<std::pair<BinaryFunction *, uint64_t>> InterproceduralReferences;
660 
661   /// PseudoProbe decoder
662   MCPseudoProbeDecoder ProbeDecoder;
663 
664   /// DWARF encoding. Available encoding types defined in BinaryFormat/Dwarf.h
665   /// enum Constants, e.g. DW_EH_PE_omit.
666   unsigned TTypeEncoding = dwarf::DW_EH_PE_omit;
667   unsigned LSDAEncoding = dwarf::DW_EH_PE_omit;
668 
669   BinaryContext(std::unique_ptr<MCContext> Ctx,
670                 std::unique_ptr<DWARFContext> DwCtx,
671                 std::unique_ptr<Triple> TheTriple, const Target *TheTarget,
672                 std::string TripleName, std::unique_ptr<MCCodeEmitter> MCE,
673                 std::unique_ptr<MCObjectFileInfo> MOFI,
674                 std::unique_ptr<const MCAsmInfo> AsmInfo,
675                 std::unique_ptr<const MCInstrInfo> MII,
676                 std::unique_ptr<const MCSubtargetInfo> STI,
677                 std::unique_ptr<MCInstPrinter> InstPrinter,
678                 std::unique_ptr<const MCInstrAnalysis> MIA,
679                 std::unique_ptr<MCPlusBuilder> MIB,
680                 std::unique_ptr<const MCRegisterInfo> MRI,
681                 std::unique_ptr<MCDisassembler> DisAsm);
682 
683   ~BinaryContext();
684 
685   std::unique_ptr<MCObjectWriter> createObjectWriter(raw_pwrite_stream &OS);
686 
isELF()687   bool isELF() const { return TheTriple->isOSBinFormatELF(); }
688 
isMachO()689   bool isMachO() const { return TheTriple->isOSBinFormatMachO(); }
690 
isAArch64()691   bool isAArch64() const {
692     return TheTriple->getArch() == llvm::Triple::aarch64;
693   }
694 
isX86()695   bool isX86() const {
696     return TheTriple->getArch() == llvm::Triple::x86 ||
697            TheTriple->getArch() == llvm::Triple::x86_64;
698   }
699 
700   // AArch64-specific functions to check if symbol is used to delimit
701   // code/data in .text. Code is marked by $x, data by $d.
702   MarkerSymType getMarkerType(const SymbolRef &Symbol) const;
703   bool isMarker(const SymbolRef &Symbol) const;
704 
705   /// Iterate over all BinaryData.
getBinaryData()706   iterator_range<binary_data_const_iterator> getBinaryData() const {
707     return make_range(BinaryDataMap.begin(), BinaryDataMap.end());
708   }
709 
710   /// Iterate over all BinaryData.
getBinaryData()711   iterator_range<binary_data_iterator> getBinaryData() {
712     return make_range(BinaryDataMap.begin(), BinaryDataMap.end());
713   }
714 
715   /// Iterate over all BinaryData associated with the given \p Section.
716   iterator_range<FilteredBinaryDataConstIterator>
getBinaryDataForSection(const BinarySection & Section)717   getBinaryDataForSection(const BinarySection &Section) const {
718     auto Begin = BinaryDataMap.lower_bound(Section.getAddress());
719     if (Begin != BinaryDataMap.begin())
720       --Begin;
721     auto End = BinaryDataMap.upper_bound(Section.getEndAddress());
722     auto pred = [&Section](const binary_data_const_iterator &Itr) -> bool {
723       return Itr->second->getSection() == Section;
724     };
725     return make_range(FilteredBinaryDataConstIterator(pred, Begin, End),
726                       FilteredBinaryDataConstIterator(pred, End, End));
727   }
728 
729   /// Iterate over all BinaryData associated with the given \p Section.
730   iterator_range<FilteredBinaryDataIterator>
getBinaryDataForSection(BinarySection & Section)731   getBinaryDataForSection(BinarySection &Section) {
732     auto Begin = BinaryDataMap.lower_bound(Section.getAddress());
733     if (Begin != BinaryDataMap.begin())
734       --Begin;
735     auto End = BinaryDataMap.upper_bound(Section.getEndAddress());
736     auto pred = [&Section](const binary_data_iterator &Itr) -> bool {
737       return Itr->second->getSection() == Section;
738     };
739     return make_range(FilteredBinaryDataIterator(pred, Begin, End),
740                       FilteredBinaryDataIterator(pred, End, End));
741   }
742 
743   /// Iterate over all the sub-symbols of /p BD (if any).
744   iterator_range<binary_data_iterator> getSubBinaryData(BinaryData *BD);
745 
746   /// Clear the global symbol address -> name(s) map.
clearBinaryData()747   void clearBinaryData() {
748     GlobalSymbols.clear();
749     for (auto &Entry : BinaryDataMap)
750       delete Entry.second;
751     BinaryDataMap.clear();
752   }
753 
754   /// Process \p Address reference from code in function \BF.
755   /// \p IsPCRel indicates if the reference is PC-relative.
756   /// Return <Symbol, Addend> pair corresponding to the \p Address.
757   std::pair<const MCSymbol *, uint64_t>
758   handleAddressRef(uint64_t Address, BinaryFunction &BF, bool IsPCRel);
759 
760   /// Analyze memory contents at the given \p Address and return the type of
761   /// memory contents (such as a possible jump table).
762   MemoryContentsType analyzeMemoryAt(uint64_t Address, BinaryFunction &BF);
763 
764   /// Return a value of the global \p Symbol or an error if the value
765   /// was not set.
getSymbolValue(const MCSymbol & Symbol)766   ErrorOr<uint64_t> getSymbolValue(const MCSymbol &Symbol) const {
767     const BinaryData *BD = getBinaryDataByName(Symbol.getName());
768     if (!BD)
769       return std::make_error_code(std::errc::bad_address);
770     return BD->getAddress();
771   }
772 
773   /// Return a global symbol registered at a given \p Address and \p Size.
774   /// If no symbol exists, create one with unique name using \p Prefix.
775   /// If there are multiple symbols registered at the \p Address, then
776   /// return the first one.
777   MCSymbol *getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
778                                     uint64_t Size = 0, uint16_t Alignment = 0,
779                                     unsigned Flags = 0);
780 
781   /// Create a global symbol without registering an address.
782   MCSymbol *getOrCreateUndefinedGlobalSymbol(StringRef Name);
783 
784   /// Register a symbol with \p Name at a given \p Address using \p Size,
785   /// \p Alignment, and \p Flags. See llvm::SymbolRef::Flags for the definition
786   /// of \p Flags.
787   MCSymbol *registerNameAtAddress(StringRef Name, uint64_t Address,
788                                   uint64_t Size, uint16_t Alignment,
789                                   unsigned Flags = 0);
790 
791   /// Return BinaryData registered at a given \p Address or nullptr if no
792   /// global symbol was registered at the location.
getBinaryDataAtAddress(uint64_t Address)793   const BinaryData *getBinaryDataAtAddress(uint64_t Address) const {
794     auto NI = BinaryDataMap.find(Address);
795     return NI != BinaryDataMap.end() ? NI->second : nullptr;
796   }
797 
getBinaryDataAtAddress(uint64_t Address)798   BinaryData *getBinaryDataAtAddress(uint64_t Address) {
799     auto NI = BinaryDataMap.find(Address);
800     return NI != BinaryDataMap.end() ? NI->second : nullptr;
801   }
802 
803   /// Look up the symbol entry that contains the given \p Address (based on
804   /// the start address and size for each symbol).  Returns a pointer to
805   /// the BinaryData for that symbol.  If no data is found, nullptr is returned.
getBinaryDataContainingAddress(uint64_t Address)806   const BinaryData *getBinaryDataContainingAddress(uint64_t Address) const {
807     return getBinaryDataContainingAddressImpl(Address);
808   }
809 
getBinaryDataContainingAddress(uint64_t Address)810   BinaryData *getBinaryDataContainingAddress(uint64_t Address) {
811     return const_cast<BinaryData *>(
812         getBinaryDataContainingAddressImpl(Address));
813   }
814 
815   /// Return BinaryData for the given \p Name or nullptr if no
816   /// global symbol with that name exists.
getBinaryDataByName(StringRef Name)817   const BinaryData *getBinaryDataByName(StringRef Name) const {
818     auto Itr = GlobalSymbols.find(Name);
819     return Itr != GlobalSymbols.end() ? Itr->second : nullptr;
820   }
821 
getBinaryDataByName(StringRef Name)822   BinaryData *getBinaryDataByName(StringRef Name) {
823     auto Itr = GlobalSymbols.find(Name);
824     return Itr != GlobalSymbols.end() ? Itr->second : nullptr;
825   }
826 
827   /// Return registered PLT entry BinaryData with the given \p Name
828   /// or nullptr if no global PLT symbol with that name exists.
getPLTBinaryDataByName(StringRef Name)829   const BinaryData *getPLTBinaryDataByName(StringRef Name) const {
830     if (const BinaryData *Data = getBinaryDataByName(Name.str() + "@PLT"))
831       return Data;
832 
833     // The symbol name might contain versioning information e.g
834     // memcpy@@GLIBC_2.17. Remove it and try to locate binary data
835     // without it.
836     size_t At = Name.find("@");
837     if (At != std::string::npos)
838       return getBinaryDataByName(Name.str().substr(0, At) + "@PLT");
839 
840     return nullptr;
841   }
842 
843   /// Return true if \p SymbolName was generated internally and was not present
844   /// in the input binary.
isInternalSymbolName(const StringRef Name)845   bool isInternalSymbolName(const StringRef Name) {
846     return Name.startswith("SYMBOLat") || Name.startswith("DATAat") ||
847            Name.startswith("HOLEat");
848   }
849 
getHotTextStartSymbol()850   MCSymbol *getHotTextStartSymbol() const {
851     return Ctx->getOrCreateSymbol("__hot_start");
852   }
853 
getHotTextEndSymbol()854   MCSymbol *getHotTextEndSymbol() const {
855     return Ctx->getOrCreateSymbol("__hot_end");
856   }
857 
getTextSection()858   MCSection *getTextSection() const { return MOFI->getTextSection(); }
859 
860   /// Return code section with a given name.
getCodeSection(StringRef SectionName)861   MCSection *getCodeSection(StringRef SectionName) const {
862     if (isELF())
863       return Ctx->getELFSection(SectionName, ELF::SHT_PROGBITS,
864                                 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
865     else
866       return Ctx->getMachOSection("__TEXT", SectionName,
867                                   MachO::S_ATTR_PURE_INSTRUCTIONS,
868                                   SectionKind::getText());
869   }
870 
871   /// Return data section with a given name.
getDataSection(StringRef SectionName)872   MCSection *getDataSection(StringRef SectionName) const {
873     return Ctx->getELFSection(SectionName, ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
874   }
875 
876   /// \name Pre-assigned Section Names
877   /// @{
878 
getMainCodeSectionName()879   const char *getMainCodeSectionName() const { return ".text"; }
880 
getColdCodeSectionName()881   const char *getColdCodeSectionName() const { return ".text.cold"; }
882 
getHotTextMoverSectionName()883   const char *getHotTextMoverSectionName() const { return ".text.mover"; }
884 
getInjectedCodeSectionName()885   const char *getInjectedCodeSectionName() const { return ".text.injected"; }
886 
getInjectedColdCodeSectionName()887   const char *getInjectedColdCodeSectionName() const {
888     return ".text.injected.cold";
889   }
890 
getGdbIndexSection()891   ErrorOr<BinarySection &> getGdbIndexSection() const {
892     return getUniqueSectionByName(".gdb_index");
893   }
894 
895   /// @}
896 
897   /// Register \p TargetFunction as a fragment of \p Function if checks pass:
898   /// - if \p TargetFunction name matches \p Function name with a suffix:
899   ///   fragment_name == parent_name.cold(.\d+)?
900   /// True if the Function is registered, false if the check failed.
901   bool registerFragment(BinaryFunction &TargetFunction,
902                         BinaryFunction &Function) const;
903 
904   /// Add unterprocedural reference for \p Function to \p Address
addInterproceduralReference(BinaryFunction * Function,uint64_t Address)905   void addInterproceduralReference(BinaryFunction *Function, uint64_t Address) {
906     InterproceduralReferences.push_back({Function, Address});
907   }
908 
909   /// Used to fix the target of linker-generated AArch64 adrp + add
910   /// sequence with no relocation info.
911   void addAdrpAddRelocAArch64(BinaryFunction &BF, MCInst &LoadLowBits,
912                               MCInst &LoadHiBits, uint64_t Target);
913 
914   /// Return true if AARch64 veneer was successfully matched at a given
915   /// \p Address and register veneer binary function if \p MatchOnly
916   /// argument is false.
917   bool handleAArch64Veneer(uint64_t Address, bool MatchOnly = false);
918 
919   /// Resolve inter-procedural dependencies from
920   void processInterproceduralReferences();
921 
922   /// Skip functions with all parent and child fragments transitively.
923   void skipMarkedFragments();
924 
925   /// Perform any necessary post processing on the symbol table after
926   /// function disassembly is complete.  This processing fixes top
927   /// level data holes and makes sure the symbol table is valid.
928   /// It also assigns all memory profiling info to the appropriate
929   /// BinaryData objects.
930   void postProcessSymbolTable();
931 
932   /// Set the size of the global symbol located at \p Address.  Return
933   /// false if no symbol exists, true otherwise.
934   bool setBinaryDataSize(uint64_t Address, uint64_t Size);
935 
936   /// Print the global symbol table.
937   void printGlobalSymbols(raw_ostream &OS) const;
938 
939   /// Register information about the given \p Section so we can look up
940   /// sections by address.
941   BinarySection &registerSection(SectionRef Section);
942 
943   /// Register a copy of /p OriginalSection under a different name.
944   BinarySection &registerSection(StringRef SectionName,
945                                  const BinarySection &OriginalSection);
946 
947   /// Register or update the information for the section with the given
948   /// /p Name.  If the section already exists, the information in the
949   /// section will be updated with the new data.
950   BinarySection &registerOrUpdateSection(StringRef Name, unsigned ELFType,
951                                          unsigned ELFFlags,
952                                          uint8_t *Data = nullptr,
953                                          uint64_t Size = 0,
954                                          unsigned Alignment = 1);
955 
956   /// Register the information for the note (non-allocatable) section
957   /// with the given /p Name.  If the section already exists, the
958   /// information in the section will be updated with the new data.
959   BinarySection &
960   registerOrUpdateNoteSection(StringRef Name, uint8_t *Data = nullptr,
961                               uint64_t Size = 0, unsigned Alignment = 1,
962                               bool IsReadOnly = true,
963                               unsigned ELFType = ELF::SHT_PROGBITS) {
964     return registerOrUpdateSection(Name, ELFType,
965                                    BinarySection::getFlags(IsReadOnly), Data,
966                                    Size, Alignment);
967   }
968 
969   /// Remove the given /p Section from the set of all sections.  Return
970   /// true if the section was removed (and deleted), otherwise false.
971   bool deregisterSection(BinarySection &Section);
972 
973   /// Iterate over all registered sections.
sections()974   iterator_range<FilteredSectionIterator> sections() {
975     auto notNull = [](const SectionIterator &Itr) { return (bool)*Itr; };
976     return make_range(
977         FilteredSectionIterator(notNull, Sections.begin(), Sections.end()),
978         FilteredSectionIterator(notNull, Sections.end(), Sections.end()));
979   }
980 
981   /// Iterate over all registered sections.
sections()982   iterator_range<FilteredSectionConstIterator> sections() const {
983     return const_cast<BinaryContext *>(this)->sections();
984   }
985 
986   /// Iterate over all registered allocatable sections.
allocatableSections()987   iterator_range<FilteredSectionIterator> allocatableSections() {
988     auto isAllocatable = [](const SectionIterator &Itr) {
989       return *Itr && Itr->isAllocatable();
990     };
991     return make_range(
992         FilteredSectionIterator(isAllocatable, Sections.begin(),
993                                 Sections.end()),
994         FilteredSectionIterator(isAllocatable, Sections.end(), Sections.end()));
995   }
996 
997   /// Iterate over all registered code sections.
textSections()998   iterator_range<FilteredSectionIterator> textSections() {
999     auto isText = [](const SectionIterator &Itr) {
1000       return *Itr && Itr->isAllocatable() && Itr->isText();
1001     };
1002     return make_range(
1003         FilteredSectionIterator(isText, Sections.begin(), Sections.end()),
1004         FilteredSectionIterator(isText, Sections.end(), Sections.end()));
1005   }
1006 
1007   /// Iterate over all registered allocatable sections.
allocatableSections()1008   iterator_range<FilteredSectionConstIterator> allocatableSections() const {
1009     return const_cast<BinaryContext *>(this)->allocatableSections();
1010   }
1011 
1012   /// Iterate over all registered non-allocatable sections.
nonAllocatableSections()1013   iterator_range<FilteredSectionIterator> nonAllocatableSections() {
1014     auto notAllocated = [](const SectionIterator &Itr) {
1015       return *Itr && !Itr->isAllocatable();
1016     };
1017     return make_range(
1018         FilteredSectionIterator(notAllocated, Sections.begin(), Sections.end()),
1019         FilteredSectionIterator(notAllocated, Sections.end(), Sections.end()));
1020   }
1021 
1022   /// Iterate over all registered non-allocatable sections.
nonAllocatableSections()1023   iterator_range<FilteredSectionConstIterator> nonAllocatableSections() const {
1024     return const_cast<BinaryContext *>(this)->nonAllocatableSections();
1025   }
1026 
1027   /// Iterate over all allocatable relocation sections.
allocatableRelaSections()1028   iterator_range<FilteredSectionIterator> allocatableRelaSections() {
1029     auto isAllocatableRela = [](const SectionIterator &Itr) {
1030       return *Itr && Itr->isAllocatable() && Itr->isRela();
1031     };
1032     return make_range(FilteredSectionIterator(isAllocatableRela,
1033                                               Sections.begin(), Sections.end()),
1034                       FilteredSectionIterator(isAllocatableRela, Sections.end(),
1035                                               Sections.end()));
1036   }
1037 
1038   /// Return base address for the shared object or PIE based on the segment
1039   /// mapping information. \p MMapAddress is an address where one of the
1040   /// segments was mapped. \p FileOffset is the offset in the file of the
1041   /// mapping. Note that \p FileOffset should be page-aligned and could be
1042   /// different from the file offset of the segment which could be unaligned.
1043   /// If no segment is found that matches \p FileOffset, return NoneType().
1044   Optional<uint64_t> getBaseAddressForMapping(uint64_t MMapAddress,
1045                                               uint64_t FileOffset) const;
1046 
1047   /// Check if the address belongs to this binary's static allocation space.
containsAddress(uint64_t Address)1048   bool containsAddress(uint64_t Address) const {
1049     return Address >= FirstAllocAddress && Address < LayoutStartAddress;
1050   }
1051 
1052   /// Return section name containing the given \p Address.
1053   ErrorOr<StringRef> getSectionNameForAddress(uint64_t Address) const;
1054 
1055   /// Print all sections.
1056   void printSections(raw_ostream &OS) const;
1057 
1058   /// Return largest section containing the given \p Address.  These
1059   /// functions only work for allocatable sections, i.e. ones with non-zero
1060   /// addresses.
1061   ErrorOr<BinarySection &> getSectionForAddress(uint64_t Address);
getSectionForAddress(uint64_t Address)1062   ErrorOr<const BinarySection &> getSectionForAddress(uint64_t Address) const {
1063     return const_cast<BinaryContext *>(this)->getSectionForAddress(Address);
1064   }
1065 
1066   /// Return section(s) associated with given \p Name.
1067   iterator_range<NameToSectionMapType::iterator>
getSectionByName(StringRef Name)1068   getSectionByName(StringRef Name) {
1069     return make_range(NameToSection.equal_range(std::string(Name)));
1070   }
1071   iterator_range<NameToSectionMapType::const_iterator>
getSectionByName(StringRef Name)1072   getSectionByName(StringRef Name) const {
1073     return make_range(NameToSection.equal_range(std::string(Name)));
1074   }
1075 
1076   /// Return the unique section associated with given \p Name.
1077   /// If there is more than one section with the same name, return an error
1078   /// object.
getUniqueSectionByName(StringRef SectionName)1079   ErrorOr<BinarySection &> getUniqueSectionByName(StringRef SectionName) const {
1080     auto Sections = getSectionByName(SectionName);
1081     if (Sections.begin() != Sections.end() &&
1082         std::next(Sections.begin()) == Sections.end())
1083       return *Sections.begin()->second;
1084     return std::make_error_code(std::errc::bad_address);
1085   }
1086 
1087   /// Return an unsigned value of \p Size stored at \p Address. The address has
1088   /// to be a valid statically allocated address for the binary.
1089   ErrorOr<uint64_t> getUnsignedValueAtAddress(uint64_t Address,
1090                                               size_t Size) const;
1091 
1092   /// Return a signed value of \p Size stored at \p Address. The address has
1093   /// to be a valid statically allocated address for the binary.
1094   ErrorOr<uint64_t> getSignedValueAtAddress(uint64_t Address,
1095                                             size_t Size) const;
1096 
1097   /// Special case of getUnsignedValueAtAddress() that uses a pointer size.
getPointerAtAddress(uint64_t Address)1098   ErrorOr<uint64_t> getPointerAtAddress(uint64_t Address) const {
1099     return getUnsignedValueAtAddress(Address, AsmInfo->getCodePointerSize());
1100   }
1101 
1102   /// Replaces all references to \p ChildBF with \p ParentBF. \p ChildBF is then
1103   /// removed from the list of functions \p BFs. The profile data of \p ChildBF
1104   /// is merged into that of \p ParentBF. This function is thread safe.
1105   void foldFunction(BinaryFunction &ChildBF, BinaryFunction &ParentBF);
1106 
1107   /// Add a Section relocation at a given \p Address.
1108   void addRelocation(uint64_t Address, MCSymbol *Symbol, uint64_t Type,
1109                      uint64_t Addend = 0, uint64_t Value = 0);
1110 
1111   /// Return a relocation registered at a given \p Address, or nullptr if there
1112   /// is no relocation at such address.
1113   const Relocation *getRelocationAt(uint64_t Address);
1114 
1115   /// Register a presence of PC-relative relocation at the given \p Address.
addPCRelativeDataRelocation(uint64_t Address)1116   void addPCRelativeDataRelocation(uint64_t Address) {
1117     DataPCRelocations.emplace(Address);
1118   }
1119 
1120   /// Register dynamic relocation at \p Address.
1121   void addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, uint64_t Type,
1122                             uint64_t Addend, uint64_t Value = 0);
1123 
1124   /// Return a dynamic relocation registered at a given \p Address, or nullptr
1125   /// if there is no dynamic relocation at such address.
1126   const Relocation *getDynamicRelocationAt(uint64_t Address);
1127 
1128   /// Remove registered relocation at a given \p Address.
1129   bool removeRelocationAt(uint64_t Address);
1130 
1131   /// This function makes sure that symbols referenced by ambiguous relocations
1132   /// are marked as immovable. For now, if a section relocation points at the
1133   /// boundary between two symbols then those symbols are marked as immovable.
1134   void markAmbiguousRelocations(BinaryData &BD, const uint64_t Address);
1135 
1136   /// Return BinaryFunction corresponding to \p Symbol. If \p EntryDesc is not
1137   /// nullptr, set it to entry descriminator corresponding to \p Symbol
1138   /// (0 for single-entry functions). This function is thread safe.
1139   BinaryFunction *getFunctionForSymbol(const MCSymbol *Symbol,
1140                                        uint64_t *EntryDesc = nullptr);
1141 
1142   const BinaryFunction *
1143   getFunctionForSymbol(const MCSymbol *Symbol,
1144                        uint64_t *EntryDesc = nullptr) const {
1145     return const_cast<BinaryContext *>(this)->getFunctionForSymbol(Symbol,
1146                                                                    EntryDesc);
1147   }
1148 
1149   /// Associate the symbol \p Sym with the function \p BF for lookups with
1150   /// getFunctionForSymbol().
setSymbolToFunctionMap(const MCSymbol * Sym,BinaryFunction * BF)1151   void setSymbolToFunctionMap(const MCSymbol *Sym, BinaryFunction *BF) {
1152     SymbolToFunctionMap[Sym] = BF;
1153   }
1154 
1155   /// Populate some internal data structures with debug info.
1156   void preprocessDebugInfo();
1157 
1158   /// Add a filename entry from SrcCUID to DestCUID.
1159   unsigned addDebugFilenameToUnit(const uint32_t DestCUID,
1160                                   const uint32_t SrcCUID, unsigned FileIndex);
1161 
1162   /// Return functions in output layout order
1163   std::vector<BinaryFunction *> getSortedFunctions();
1164 
1165   /// Do the best effort to calculate the size of the function by emitting
1166   /// its code, and relaxing branch instructions. By default, branch
1167   /// instructions are updated to match the layout. Pass \p FixBranches set to
1168   /// false if the branches are known to be up to date with the code layout.
1169   ///
1170   /// Return the pair where the first size is for the main part, and the second
1171   /// size is for the cold one.
1172   std::pair<size_t, size_t> calculateEmittedSize(BinaryFunction &BF,
1173                                                  bool FixBranches = true);
1174 
1175   /// Calculate the size of the instruction \p Inst optionally using a
1176   /// user-supplied emitter for lock-free multi-thread work. MCCodeEmitter is
1177   /// not thread safe and each thread should operate with its own copy of it.
1178   uint64_t
1179   computeInstructionSize(const MCInst &Inst,
1180                          const MCCodeEmitter *Emitter = nullptr) const {
1181     if (auto Size = MIB->getAnnotationWithDefault<uint32_t>(Inst, "Size"))
1182       return Size;
1183 
1184     if (!Emitter)
1185       Emitter = this->MCE.get();
1186     SmallString<256> Code;
1187     SmallVector<MCFixup, 4> Fixups;
1188     raw_svector_ostream VecOS(Code);
1189     Emitter->encodeInstruction(Inst, VecOS, Fixups, *STI);
1190     return Code.size();
1191   }
1192 
1193   /// Compute the native code size for a range of instructions.
1194   /// Note: this can be imprecise wrt the final binary since happening prior to
1195   /// relaxation, as well as wrt the original binary because of opcode
1196   /// shortening.MCCodeEmitter is not thread safe and each thread should operate
1197   /// with its own copy of it.
1198   template <typename Itr>
1199   uint64_t computeCodeSize(Itr Beg, Itr End,
1200                            const MCCodeEmitter *Emitter = nullptr) const {
1201     uint64_t Size = 0;
1202     while (Beg != End) {
1203       if (!MIB->isPseudo(*Beg))
1204         Size += computeInstructionSize(*Beg, Emitter);
1205       ++Beg;
1206     }
1207     return Size;
1208   }
1209 
1210   /// Verify that assembling instruction \p Inst results in the same sequence of
1211   /// bytes as \p Encoding.
1212   bool validateEncoding(const MCInst &Instruction,
1213                         ArrayRef<uint8_t> Encoding) const;
1214 
1215   /// Return a function execution count threshold for determining whether
1216   /// the function is 'hot'. Consider it hot if count is above the average exec
1217   /// count of profiled functions.
1218   uint64_t getHotThreshold() const;
1219 
1220   /// Return true if instruction \p Inst requires an offset for further
1221   /// processing (e.g. assigning a profile).
keepOffsetForInstruction(const MCInst & Inst)1222   bool keepOffsetForInstruction(const MCInst &Inst) const {
1223     if (MIB->isCall(Inst) || MIB->isBranch(Inst) || MIB->isReturn(Inst) ||
1224         MIB->isPrefix(Inst) || MIB->isIndirectBranch(Inst)) {
1225       return true;
1226     }
1227     return false;
1228   }
1229 
1230   /// Return true if the function should be emitted to the output file.
1231   bool shouldEmit(const BinaryFunction &Function) const;
1232 
1233   /// Print the string name for a CFI operation.
1234   static void printCFI(raw_ostream &OS, const MCCFIInstruction &Inst);
1235 
1236   /// Print a single MCInst in native format.  If Function is non-null,
1237   /// the instruction will be annotated with CFI and possibly DWARF line table
1238   /// info.
1239   /// If printMCInst is true, the instruction is also printed in the
1240   /// architecture independent format.
1241   void printInstruction(raw_ostream &OS, const MCInst &Instruction,
1242                         uint64_t Offset = 0,
1243                         const BinaryFunction *Function = nullptr,
1244                         bool PrintMCInst = false, bool PrintMemData = false,
1245                         bool PrintRelocations = false,
1246                         StringRef Endl = "\n") const;
1247 
1248   /// Print a range of instructions.
1249   template <typename Itr>
1250   uint64_t
1251   printInstructions(raw_ostream &OS, Itr Begin, Itr End, uint64_t Offset = 0,
1252                     const BinaryFunction *Function = nullptr,
1253                     bool PrintMCInst = false, bool PrintMemData = false,
1254                     bool PrintRelocations = false,
1255                     StringRef Endl = "\n") const {
1256     while (Begin != End) {
1257       printInstruction(OS, *Begin, Offset, Function, PrintMCInst, PrintMemData,
1258                        PrintRelocations, Endl);
1259       Offset += computeCodeSize(Begin, Begin + 1);
1260       ++Begin;
1261     }
1262     return Offset;
1263   }
1264 
1265   void exitWithBugReport(StringRef Message,
1266                          const BinaryFunction &Function) const;
1267 
1268   struct IndependentCodeEmitter {
1269     std::unique_ptr<MCObjectFileInfo> LocalMOFI;
1270     std::unique_ptr<MCContext> LocalCtx;
1271     std::unique_ptr<MCCodeEmitter> MCE;
1272   };
1273 
1274   /// Encapsulates an independent MCCodeEmitter that doesn't share resources
1275   /// with the main one available through BinaryContext::MCE, managed by
1276   /// BinaryContext.
1277   /// This is intended to create a lock-free environment for an auxiliary thread
1278   /// that needs to perform work with an MCCodeEmitter that can be transient or
1279   /// won't be used in the main code emitter.
createIndependentMCCodeEmitter()1280   IndependentCodeEmitter createIndependentMCCodeEmitter() const {
1281     IndependentCodeEmitter MCEInstance;
1282     MCEInstance.LocalCtx.reset(
1283         new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
1284     MCEInstance.LocalMOFI.reset(
1285         TheTarget->createMCObjectFileInfo(*MCEInstance.LocalCtx.get(),
1286                                           /*PIC=*/!HasFixedLoadAddress));
1287     MCEInstance.LocalCtx->setObjectFileInfo(MCEInstance.LocalMOFI.get());
1288     MCEInstance.MCE.reset(
1289         TheTarget->createMCCodeEmitter(*MII, *MCEInstance.LocalCtx));
1290     return MCEInstance;
1291   }
1292 
1293   /// Creating MCStreamer instance.
1294   std::unique_ptr<MCStreamer>
createStreamer(llvm::raw_pwrite_stream & OS)1295   createStreamer(llvm::raw_pwrite_stream &OS) const {
1296     MCCodeEmitter *MCE = TheTarget->createMCCodeEmitter(*MII, *Ctx);
1297     MCAsmBackend *MAB =
1298         TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
1299     std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(OS);
1300     std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
1301         *TheTriple, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
1302         std::unique_ptr<MCCodeEmitter>(MCE), *STI,
1303         /* RelaxAll */ false,
1304         /* IncrementalLinkerCompatible */ false,
1305         /* DWARFMustBeAtTheEnd */ false));
1306     return Streamer;
1307   }
1308 };
1309 
1310 template <typename T, typename = std::enable_if_t<sizeof(T) == 1>>
1311 inline raw_ostream &operator<<(raw_ostream &OS, const ArrayRef<T> &ByteArray) {
1312   const char *Sep = "";
1313   for (const auto Byte : ByteArray) {
1314     OS << Sep << format("%.2x", Byte);
1315     Sep = " ";
1316   }
1317   return OS;
1318 }
1319 
1320 } // namespace bolt
1321 } // namespace llvm
1322 
1323 #endif
1324