1 //===- bolt/Core/BinaryContext.h - Low-level context ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Context for processing binary executable/library files.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef BOLT_CORE_BINARY_CONTEXT_H
14 #define BOLT_CORE_BINARY_CONTEXT_H
15
16 #include "bolt/Core/BinaryData.h"
17 #include "bolt/Core/BinarySection.h"
18 #include "bolt/Core/DebugData.h"
19 #include "bolt/Core/JumpTable.h"
20 #include "bolt/Core/MCPlusBuilder.h"
21 #include "bolt/RuntimeLibs/RuntimeLibrary.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/StringMap.h"
24 #include "llvm/ADT/Triple.h"
25 #include "llvm/ADT/iterator.h"
26 #include "llvm/BinaryFormat/Dwarf.h"
27 #include "llvm/BinaryFormat/MachO.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCCodeEmitter.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCObjectFileInfo.h"
32 #include "llvm/MC/MCObjectWriter.h"
33 #include "llvm/MC/MCPseudoProbe.h"
34 #include "llvm/MC/MCSectionELF.h"
35 #include "llvm/MC/MCSectionMachO.h"
36 #include "llvm/MC/MCStreamer.h"
37 #include "llvm/MC/MCSymbol.h"
38 #include "llvm/MC/TargetRegistry.h"
39 #include "llvm/Support/ErrorOr.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include <functional>
42 #include <list>
43 #include <map>
44 #include <set>
45 #include <shared_mutex>
46 #include <string>
47 #include <system_error>
48 #include <type_traits>
49 #include <unordered_map>
50 #include <vector>
51
52 namespace llvm {
53 class MCDisassembler;
54 class MCInstPrinter;
55
56 using namespace object;
57
58 namespace bolt {
59
60 class BinaryFunction;
61 class ExecutableFileMemoryManager;
62
63 /// Information on loadable part of the file.
64 struct SegmentInfo {
65 uint64_t Address; /// Address of the segment in memory.
66 uint64_t Size; /// Size of the segment in memory.
67 uint64_t FileOffset; /// Offset in the file.
68 uint64_t FileSize; /// Size in file.
69 uint64_t Alignment; /// Alignment of the segment.
70
printSegmentInfo71 void print(raw_ostream &OS) const {
72 OS << "SegmentInfo { Address: 0x"
73 << Twine::utohexstr(Address) << ", Size: 0x"
74 << Twine::utohexstr(Size) << ", FileOffset: 0x"
75 << Twine::utohexstr(FileOffset) << ", FileSize: 0x"
76 << Twine::utohexstr(FileSize) << ", Alignment: 0x"
77 << Twine::utohexstr(Alignment) << "}";
78 };
79 };
80
81 inline raw_ostream &operator<<(raw_ostream &OS, const SegmentInfo &SegInfo) {
82 SegInfo.print(OS);
83 return OS;
84 }
85
86 // AArch64-specific symbol markers used to delimit code/data in .text.
87 enum class MarkerSymType : char {
88 NONE = 0,
89 CODE,
90 DATA,
91 };
92
93 enum class MemoryContentsType : char {
94 UNKNOWN = 0, /// Unknown contents.
95 POSSIBLE_JUMP_TABLE, /// Possibly a non-PIC jump table.
96 POSSIBLE_PIC_JUMP_TABLE, /// Possibly a PIC jump table.
97 };
98
99 /// Helper function to truncate a \p Value to given size in \p Bytes.
truncateToSize(int64_t Value,unsigned Bytes)100 inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
101 return Value & ((uint64_t)(int64_t)-1 >> (64 - Bytes * 8));
102 }
103
104 /// Filter iterator.
105 template <typename ItrType,
106 typename PredType = std::function<bool(const ItrType &)>>
107 class FilterIterator
108 : public std::iterator<std::bidirectional_iterator_tag,
109 typename std::iterator_traits<ItrType>::value_type> {
110 using Iterator = FilterIterator;
111 using T = typename std::iterator_traits<ItrType>::reference;
112 using PointerT = typename std::iterator_traits<ItrType>::pointer;
113
114 PredType Pred;
115 ItrType Itr, End;
116
prev()117 void prev() {
118 while (!Pred(--Itr))
119 ;
120 }
next()121 void next() {
122 ++Itr;
123 nextMatching();
124 }
nextMatching()125 void nextMatching() {
126 while (Itr != End && !Pred(Itr))
127 ++Itr;
128 }
129
130 public:
131 Iterator &operator++() { next(); return *this; }
132 Iterator &operator--() { prev(); return *this; }
133 Iterator operator++(int) { auto Tmp(Itr); next(); return Tmp; }
134 Iterator operator--(int) { auto Tmp(Itr); prev(); return Tmp; }
135 bool operator==(const Iterator &Other) const { return Itr == Other.Itr; }
136 bool operator!=(const Iterator &Other) const { return !operator==(Other); }
137 T operator*() { return *Itr; }
138 PointerT operator->() { return &operator*(); }
FilterIterator(PredType Pred,ItrType Itr,ItrType End)139 FilterIterator(PredType Pred, ItrType Itr, ItrType End)
140 : Pred(Pred), Itr(Itr), End(End) {
141 nextMatching();
142 }
143 };
144
145 class BinaryContext {
146 BinaryContext() = delete;
147
148 /// Name of the binary file the context originated from.
149 std::string Filename;
150
151 /// Unique build ID if available for the binary.
152 Optional<std::string> FileBuildID;
153
154 /// Set of all sections.
155 struct CompareSections {
operatorCompareSections156 bool operator()(const BinarySection *A, const BinarySection *B) const {
157 return *A < *B;
158 }
159 };
160 using SectionSetType = std::set<BinarySection *, CompareSections>;
161 SectionSetType Sections;
162
163 using SectionIterator = pointee_iterator<SectionSetType::iterator>;
164 using SectionConstIterator = pointee_iterator<SectionSetType::const_iterator>;
165
166 using FilteredSectionIterator = FilterIterator<SectionIterator>;
167 using FilteredSectionConstIterator = FilterIterator<SectionConstIterator>;
168
169 /// Map virtual address to a section. It is possible to have more than one
170 /// section mapped to the same address, e.g. non-allocatable sections.
171 using AddressToSectionMapType = std::multimap<uint64_t, BinarySection *>;
172 AddressToSectionMapType AddressToSection;
173
174 /// multimap of section name to BinarySection object. Some binaries
175 /// have multiple sections with the same name.
176 using NameToSectionMapType = std::multimap<std::string, BinarySection *>;
177 NameToSectionMapType NameToSection;
178
179 /// Low level section registration.
180 BinarySection ®isterSection(BinarySection *Section);
181
182 /// Store all functions in the binary, sorted by original address.
183 std::map<uint64_t, BinaryFunction> BinaryFunctions;
184
185 /// A mutex that is used to control parallel accesses to BinaryFunctions
186 mutable std::shared_timed_mutex BinaryFunctionsMutex;
187
188 /// Functions injected by BOLT
189 std::vector<BinaryFunction *> InjectedBinaryFunctions;
190
191 /// Jump tables for all functions mapped by address.
192 std::map<uint64_t, JumpTable *> JumpTables;
193
194 /// Locations of PC-relative relocations in data objects.
195 std::unordered_set<uint64_t> DataPCRelocations;
196
197 /// Used in duplicateJumpTable() to uniquely identify a JT clone
198 /// Start our IDs with a high number so getJumpTableContainingAddress checks
199 /// with size won't overflow
200 uint32_t DuplicatedJumpTables{0x10000000};
201
202 /// Function fragments to skip.
203 std::unordered_set<BinaryFunction *> FragmentsToSkip;
204
205 /// The runtime library.
206 std::unique_ptr<RuntimeLibrary> RtLibrary;
207
208 /// DWP Context.
209 std::shared_ptr<DWARFContext> DWPContext;
210
211 /// A map of DWO Ids to CUs.
212 using DWOIdToCUMapType = std::unordered_map<uint64_t, DWARFUnit *>;
213 DWOIdToCUMapType DWOCUs;
214
215 bool ContainsDwarf5{false};
216 bool ContainsDwarfLegacy{false};
217
218 /// Preprocess DWO debug information.
219 void preprocessDWODebugInfo();
220
221 /// DWARF line info for CUs.
222 std::map<unsigned, DwarfLineTable> DwarfLineTablesCUMap;
223
224 public:
225 static Expected<std::unique_ptr<BinaryContext>>
226 createBinaryContext(const ObjectFile *File, bool IsPIC,
227 std::unique_ptr<DWARFContext> DwCtx);
228
229 /// Superset of compiler units that will contain overwritten code that needs
230 /// new debug info. In a few cases, functions may end up not being
231 /// overwritten, but it is okay to re-generate debug info for them.
232 std::set<const DWARFUnit *> ProcessedCUs;
233
234 // Setup MCPlus target builder
initializeTarget(std::unique_ptr<MCPlusBuilder> TargetBuilder)235 void initializeTarget(std::unique_ptr<MCPlusBuilder> TargetBuilder) {
236 MIB = std::move(TargetBuilder);
237 }
238
239 /// Return function fragments to skip.
getFragmentsToSkip()240 const std::unordered_set<BinaryFunction *> &getFragmentsToSkip() {
241 return FragmentsToSkip;
242 }
243
244 /// Add function fragment to skip
addFragmentsToSkip(BinaryFunction * Function)245 void addFragmentsToSkip(BinaryFunction *Function) {
246 FragmentsToSkip.insert(Function);
247 }
248
clearFragmentsToSkip()249 void clearFragmentsToSkip() { FragmentsToSkip.clear(); }
250
251 /// Given DWOId returns CU if it exists in DWOCUs.
252 Optional<DWARFUnit *> getDWOCU(uint64_t DWOId);
253
254 /// Returns DWOContext if it exists.
255 DWARFContext *getDWOContext() const;
256
257 /// Get Number of DWOCUs in a map.
getNumDWOCUs()258 uint32_t getNumDWOCUs() { return DWOCUs.size(); }
259
260 /// Returns true if DWARF5 is used.
isDWARF5Used()261 bool isDWARF5Used() const { return ContainsDwarf5; }
262
263 /// Returns true if DWARF4 or lower is used.
isDWARFLegacyUsed()264 bool isDWARFLegacyUsed() const { return ContainsDwarfLegacy; }
265
getDwarfLineTables()266 std::map<unsigned, DwarfLineTable> &getDwarfLineTables() {
267 return DwarfLineTablesCUMap;
268 }
269
getDwarfLineTable(unsigned CUID)270 DwarfLineTable &getDwarfLineTable(unsigned CUID) {
271 return DwarfLineTablesCUMap[CUID];
272 }
273
274 Expected<unsigned> getDwarfFile(StringRef Directory, StringRef FileName,
275 unsigned FileNumber,
276 Optional<MD5::MD5Result> Checksum,
277 Optional<StringRef> Source, unsigned CUID,
278 unsigned DWARFVersion);
279
280 /// [start memory address] -> [segment info] mapping.
281 std::map<uint64_t, SegmentInfo> SegmentMapInfo;
282
283 /// Symbols that are expected to be undefined in MCContext during emission.
284 std::unordered_set<MCSymbol *> UndefinedSymbols;
285
286 /// [name] -> [BinaryData*] map used for global symbol resolution.
287 using SymbolMapType = StringMap<BinaryData *>;
288 SymbolMapType GlobalSymbols;
289
290 /// [address] -> [BinaryData], ...
291 /// Addresses never change.
292 /// Note: it is important that clients do not hold on to instances of
293 /// BinaryData* while the map is still being modified during BinaryFunction
294 /// disassembly. This is because of the possibility that a regular
295 /// BinaryData is later discovered to be a JumpTable.
296 using BinaryDataMapType = std::map<uint64_t, BinaryData *>;
297 using binary_data_iterator = BinaryDataMapType::iterator;
298 using binary_data_const_iterator = BinaryDataMapType::const_iterator;
299 BinaryDataMapType BinaryDataMap;
300
301 using FilteredBinaryDataConstIterator =
302 FilterIterator<binary_data_const_iterator>;
303 using FilteredBinaryDataIterator = FilterIterator<binary_data_iterator>;
304
305 /// Memory manager for sections and segments. Used to communicate with ORC
306 /// among other things.
307 std::shared_ptr<ExecutableFileMemoryManager> EFMM;
308
getFilename()309 StringRef getFilename() const { return Filename; }
setFilename(StringRef Name)310 void setFilename(StringRef Name) { Filename = std::string(Name); }
311
getFileBuildID()312 Optional<StringRef> getFileBuildID() const {
313 if (FileBuildID)
314 return StringRef(*FileBuildID);
315
316 return NoneType();
317 }
setFileBuildID(StringRef ID)318 void setFileBuildID(StringRef ID) { FileBuildID = std::string(ID); }
319
hasSymbolsWithFileName()320 bool hasSymbolsWithFileName() const { return HasSymbolsWithFileName; }
setHasSymbolsWithFileName(bool Value)321 void setHasSymbolsWithFileName(bool Value) { HasSymbolsWithFileName = true; }
322
323 /// Return true if relocations against symbol with a given name
324 /// must be created.
325 bool forceSymbolRelocations(StringRef SymbolName) const;
326
getNumUnusedProfiledObjects()327 uint64_t getNumUnusedProfiledObjects() const {
328 return NumUnusedProfiledObjects;
329 }
setNumUnusedProfiledObjects(uint64_t N)330 void setNumUnusedProfiledObjects(uint64_t N) { NumUnusedProfiledObjects = N; }
331
getRuntimeLibrary()332 RuntimeLibrary *getRuntimeLibrary() { return RtLibrary.get(); }
setRuntimeLibrary(std::unique_ptr<RuntimeLibrary> Lib)333 void setRuntimeLibrary(std::unique_ptr<RuntimeLibrary> Lib) {
334 assert(!RtLibrary && "Cannot set runtime library twice.");
335 RtLibrary = std::move(Lib);
336 }
337
338 /// Return BinaryFunction containing a given \p Address or nullptr if
339 /// no registered function contains the \p Address.
340 ///
341 /// In a binary a function has somewhat vague boundaries. E.g. a function can
342 /// refer to the first byte past the end of the function, and it will still be
343 /// referring to this function, not the function following it in the address
344 /// space. Thus we have the following flags that allow to lookup for
345 /// a function where a caller has more context for the search.
346 ///
347 /// If \p CheckPastEnd is true and the \p Address falls on a byte
348 /// immediately following the last byte of some function and there's no other
349 /// function that starts there, then return the function as the one containing
350 /// the \p Address. This is useful when we need to locate functions for
351 /// references pointing immediately past a function body.
352 ///
353 /// If \p UseMaxSize is true, then include the space between this function
354 /// body and the next object in address ranges that we check.
355 BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address,
356 bool CheckPastEnd = false,
357 bool UseMaxSize = false);
358
359 /// Return a BinaryFunction that starts at a given \p Address.
360 BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address);
361
getBinaryFunctionAtAddress(uint64_t Address)362 const BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address) const {
363 return const_cast<BinaryContext *>(this)->getBinaryFunctionAtAddress(
364 Address);
365 }
366
367 /// Return size of an entry for the given jump table \p Type.
getJumpTableEntrySize(JumpTable::JumpTableType Type)368 uint64_t getJumpTableEntrySize(JumpTable::JumpTableType Type) const {
369 return Type == JumpTable::JTT_PIC ? 4 : AsmInfo->getCodePointerSize();
370 }
371
372 /// Return JumpTable containing a given \p Address.
getJumpTableContainingAddress(uint64_t Address)373 JumpTable *getJumpTableContainingAddress(uint64_t Address) {
374 auto JTI = JumpTables.upper_bound(Address);
375 if (JTI == JumpTables.begin())
376 return nullptr;
377 --JTI;
378 if (JTI->first + JTI->second->getSize() > Address)
379 return JTI->second;
380 if (JTI->second->getSize() == 0 && JTI->first == Address)
381 return JTI->second;
382 return nullptr;
383 }
384
getDWARFEncodingSize(unsigned Encoding)385 unsigned getDWARFEncodingSize(unsigned Encoding) {
386 switch (Encoding & 0x0f) {
387 default:
388 llvm_unreachable("unknown encoding");
389 case dwarf::DW_EH_PE_absptr:
390 case dwarf::DW_EH_PE_signed:
391 return AsmInfo->getCodePointerSize();
392 case dwarf::DW_EH_PE_udata2:
393 case dwarf::DW_EH_PE_sdata2:
394 return 2;
395 case dwarf::DW_EH_PE_udata4:
396 case dwarf::DW_EH_PE_sdata4:
397 return 4;
398 case dwarf::DW_EH_PE_udata8:
399 case dwarf::DW_EH_PE_sdata8:
400 return 8;
401 }
402 }
403
404 /// [MCSymbol] -> [BinaryFunction]
405 ///
406 /// As we fold identical functions, multiple symbols can point
407 /// to the same BinaryFunction.
408 std::unordered_map<const MCSymbol *, BinaryFunction *> SymbolToFunctionMap;
409
410 /// A mutex that is used to control parallel accesses to SymbolToFunctionMap
411 mutable std::shared_timed_mutex SymbolToFunctionMapMutex;
412
413 /// Look up the symbol entry that contains the given \p Address (based on
414 /// the start address and size for each symbol). Returns a pointer to
415 /// the BinaryData for that symbol. If no data is found, nullptr is returned.
416 const BinaryData *getBinaryDataContainingAddressImpl(uint64_t Address) const;
417
418 /// Update the Parent fields in BinaryDatas after adding a new entry into
419 /// \p BinaryDataMap.
420 void updateObjectNesting(BinaryDataMapType::iterator GAI);
421
422 /// Validate that if object address ranges overlap that the object with
423 /// the larger range is a parent of the object with the smaller range.
424 bool validateObjectNesting() const;
425
426 /// Validate that there are no top level "holes" in each section
427 /// and that all relocations with a section are mapped to a valid
428 /// top level BinaryData.
429 bool validateHoles() const;
430
431 /// Produce output address ranges based on input ranges for some module.
432 DebugAddressRangesVector translateModuleAddressRanges(
433 const DWARFAddressRangesVector &InputRanges) const;
434
435 /// Get a bogus "absolute" section that will be associated with all
436 /// absolute BinaryDatas.
437 BinarySection &absoluteSection();
438
439 /// Process "holes" in between known BinaryData objects. For now,
440 /// symbols are padded with the space before the next BinaryData object.
441 void fixBinaryDataHoles();
442
443 /// Generate names based on data hashes for unknown symbols.
444 void generateSymbolHashes();
445
446 /// Construct BinaryFunction object and add it to internal maps.
447 BinaryFunction *createBinaryFunction(const std::string &Name,
448 BinarySection &Section, uint64_t Address,
449 uint64_t Size, uint64_t SymbolSize = 0,
450 uint16_t Alignment = 0);
451
452 /// Return all functions for this rewrite instance.
getBinaryFunctions()453 std::map<uint64_t, BinaryFunction> &getBinaryFunctions() {
454 return BinaryFunctions;
455 }
456
457 /// Return all functions for this rewrite instance.
getBinaryFunctions()458 const std::map<uint64_t, BinaryFunction> &getBinaryFunctions() const {
459 return BinaryFunctions;
460 }
461
462 /// Create BOLT-injected function
463 BinaryFunction *createInjectedBinaryFunction(const std::string &Name,
464 bool IsSimple = true);
465
getInjectedBinaryFunctions()466 std::vector<BinaryFunction *> &getInjectedBinaryFunctions() {
467 return InjectedBinaryFunctions;
468 }
469
470 /// Return vector with all functions, i.e. include functions from the input
471 /// binary and functions created by BOLT.
472 std::vector<BinaryFunction *> getAllBinaryFunctions();
473
474 /// Construct a jump table for \p Function at \p Address or return an existing
475 /// one at that location.
476 ///
477 /// May create an embedded jump table and return its label as the second
478 /// element of the pair.
479 const MCSymbol *getOrCreateJumpTable(BinaryFunction &Function,
480 uint64_t Address,
481 JumpTable::JumpTableType Type);
482
483 /// Analyze a possible jump table of type \p Type at a given \p Address.
484 /// \p BF is a function referencing the jump table.
485 /// Return true if the jump table was detected at \p Address, and false
486 /// otherwise.
487 ///
488 /// If \p NextJTAddress is different from zero, it is used as an upper
489 /// bound for jump table memory layout.
490 ///
491 /// Optionally, populate \p Address from jump table entries. The entries
492 /// could be partially populated if the jump table detection fails.
493 bool analyzeJumpTable(const uint64_t Address,
494 const JumpTable::JumpTableType Type, BinaryFunction &BF,
495 const uint64_t NextJTAddress = 0,
496 JumpTable::AddressesType *EntriesAsAddress = nullptr);
497
498 /// After jump table locations are established, this function will populate
499 /// their EntriesAsAddress based on memory contents.
500 void populateJumpTables();
501
502 /// Returns a jump table ID and label pointing to the duplicated jump table.
503 /// Ordinarily, jump tables are identified by their address in the input
504 /// binary. We return an ID with the high bit set to differentiate it from
505 /// regular addresses, avoiding conflicts with standard jump tables.
506 std::pair<uint64_t, const MCSymbol *>
507 duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
508 const MCSymbol *OldLabel);
509
510 /// Generate a unique name for jump table at a given \p Address belonging
511 /// to function \p BF.
512 std::string generateJumpTableName(const BinaryFunction &BF, uint64_t Address);
513
514 /// Free memory used by JumpTable's EntriesAsAddress
clearJumpTableTempData()515 void clearJumpTableTempData() {
516 for (auto &JTI : JumpTables) {
517 JumpTable &JT = *JTI.second;
518 JumpTable::AddressesType Temp;
519 Temp.swap(JT.EntriesAsAddress);
520 }
521 }
522 /// Return true if the array of bytes represents a valid code padding.
523 bool hasValidCodePadding(const BinaryFunction &BF);
524
525 /// Verify padding area between functions, and adjust max function size
526 /// accordingly.
527 void adjustCodePadding();
528
529 /// Regular page size.
530 unsigned RegularPageSize{0x1000};
531 static constexpr unsigned RegularPageSizeX86 = 0x1000;
532 static constexpr unsigned RegularPageSizeAArch64 = 0x10000;
533
534 /// Huge page size to use.
535 static constexpr unsigned HugePageSize = 0x200000;
536
537 /// Map address to a constant island owner (constant data in code section)
538 std::map<uint64_t, BinaryFunction *> AddressToConstantIslandMap;
539
540 /// A map from jump table address to insertion order. Used for generating
541 /// jump table names.
542 std::map<uint64_t, size_t> JumpTableIds;
543
544 std::unique_ptr<MCContext> Ctx;
545
546 /// A mutex that is used to control parallel accesses to Ctx
547 mutable std::shared_timed_mutex CtxMutex;
scopeLock()548 std::unique_lock<std::shared_timed_mutex> scopeLock() const {
549 return std::unique_lock<std::shared_timed_mutex>(CtxMutex);
550 }
551
552 std::unique_ptr<DWARFContext> DwCtx;
553
554 std::unique_ptr<Triple> TheTriple;
555
556 const Target *TheTarget;
557
558 std::string TripleName;
559
560 std::unique_ptr<MCCodeEmitter> MCE;
561
562 std::unique_ptr<MCObjectFileInfo> MOFI;
563
564 std::unique_ptr<const MCAsmInfo> AsmInfo;
565
566 std::unique_ptr<const MCInstrInfo> MII;
567
568 std::unique_ptr<const MCSubtargetInfo> STI;
569
570 std::unique_ptr<MCInstPrinter> InstPrinter;
571
572 std::unique_ptr<const MCInstrAnalysis> MIA;
573
574 std::unique_ptr<MCPlusBuilder> MIB;
575
576 std::unique_ptr<const MCRegisterInfo> MRI;
577
578 std::unique_ptr<MCDisassembler> DisAsm;
579
580 /// Symbolic disassembler.
581 std::unique_ptr<MCDisassembler> SymbolicDisAsm;
582
583 std::unique_ptr<MCAsmBackend> MAB;
584
585 /// Indicates if relocations are available for usage.
586 bool HasRelocations{false};
587
588 /// Is the binary always loaded at a fixed address. Shared objects and
589 /// position-independent executables (PIEs) are examples of binaries that
590 /// will have HasFixedLoadAddress set to false.
591 bool HasFixedLoadAddress{true};
592
593 /// True if the binary has no dynamic dependencies, i.e., if it was statically
594 /// linked.
595 bool IsStaticExecutable{false};
596
597 /// Set to true if the binary contains PT_INTERP header.
598 bool HasInterpHeader{false};
599
600 /// Indicates if any of local symbols used for functions or data objects
601 /// have an origin file name available.
602 bool HasSymbolsWithFileName{false};
603
604 /// Sum of execution count of all functions
605 uint64_t SumExecutionCount{0};
606
607 /// Number of functions with profile information
608 uint64_t NumProfiledFuncs{0};
609
610 /// Number of objects in profile whose profile was ignored.
611 uint64_t NumUnusedProfiledObjects{0};
612
613 /// Total hotness score according to profiling data for this binary.
614 uint64_t TotalScore{0};
615
616 /// Binary-wide stats for macro-fusion.
617 uint64_t MissedMacroFusionPairs{0};
618 uint64_t MissedMacroFusionExecCount{0};
619
620 // Address of the first allocated segment.
621 uint64_t FirstAllocAddress{std::numeric_limits<uint64_t>::max()};
622
623 /// Track next available address for new allocatable sections. RewriteInstance
624 /// sets this prior to running BOLT passes, so layout passes are aware of the
625 /// final addresses functions will have.
626 uint64_t LayoutStartAddress{0};
627
628 /// Old .text info.
629 uint64_t OldTextSectionAddress{0};
630 uint64_t OldTextSectionOffset{0};
631 uint64_t OldTextSectionSize{0};
632
633 /// Address of the code/function that is executed before any other code in
634 /// the binary.
635 Optional<uint64_t> StartFunctionAddress;
636
637 /// Address of the code/function that is going to be executed right before
638 /// the execution of the binary is completed.
639 Optional<uint64_t> FiniFunctionAddress;
640
641 /// Page alignment used for code layout.
642 uint64_t PageAlign{HugePageSize};
643
644 /// True if the binary requires immediate relocation processing.
645 bool RequiresZNow{false};
646
647 /// List of functions that always trap.
648 std::vector<const BinaryFunction *> TrappedFunctions;
649
650 /// Map SDT locations to SDT markers info
651 std::unordered_map<uint64_t, SDTMarkerInfo> SDTMarkers;
652
653 /// Map linux kernel program locations/instructions to their pointers in
654 /// special linux kernel sections
655 std::unordered_map<uint64_t, std::vector<LKInstructionMarkerInfo>> LKMarkers;
656
657 /// List of external addresses in the code that are not a function start
658 /// and are referenced from BinaryFunction.
659 std::list<std::pair<BinaryFunction *, uint64_t>> InterproceduralReferences;
660
661 /// PseudoProbe decoder
662 MCPseudoProbeDecoder ProbeDecoder;
663
664 /// DWARF encoding. Available encoding types defined in BinaryFormat/Dwarf.h
665 /// enum Constants, e.g. DW_EH_PE_omit.
666 unsigned TTypeEncoding = dwarf::DW_EH_PE_omit;
667 unsigned LSDAEncoding = dwarf::DW_EH_PE_omit;
668
669 BinaryContext(std::unique_ptr<MCContext> Ctx,
670 std::unique_ptr<DWARFContext> DwCtx,
671 std::unique_ptr<Triple> TheTriple, const Target *TheTarget,
672 std::string TripleName, std::unique_ptr<MCCodeEmitter> MCE,
673 std::unique_ptr<MCObjectFileInfo> MOFI,
674 std::unique_ptr<const MCAsmInfo> AsmInfo,
675 std::unique_ptr<const MCInstrInfo> MII,
676 std::unique_ptr<const MCSubtargetInfo> STI,
677 std::unique_ptr<MCInstPrinter> InstPrinter,
678 std::unique_ptr<const MCInstrAnalysis> MIA,
679 std::unique_ptr<MCPlusBuilder> MIB,
680 std::unique_ptr<const MCRegisterInfo> MRI,
681 std::unique_ptr<MCDisassembler> DisAsm);
682
683 ~BinaryContext();
684
685 std::unique_ptr<MCObjectWriter> createObjectWriter(raw_pwrite_stream &OS);
686
isELF()687 bool isELF() const { return TheTriple->isOSBinFormatELF(); }
688
isMachO()689 bool isMachO() const { return TheTriple->isOSBinFormatMachO(); }
690
isAArch64()691 bool isAArch64() const {
692 return TheTriple->getArch() == llvm::Triple::aarch64;
693 }
694
isX86()695 bool isX86() const {
696 return TheTriple->getArch() == llvm::Triple::x86 ||
697 TheTriple->getArch() == llvm::Triple::x86_64;
698 }
699
700 // AArch64-specific functions to check if symbol is used to delimit
701 // code/data in .text. Code is marked by $x, data by $d.
702 MarkerSymType getMarkerType(const SymbolRef &Symbol) const;
703 bool isMarker(const SymbolRef &Symbol) const;
704
705 /// Iterate over all BinaryData.
getBinaryData()706 iterator_range<binary_data_const_iterator> getBinaryData() const {
707 return make_range(BinaryDataMap.begin(), BinaryDataMap.end());
708 }
709
710 /// Iterate over all BinaryData.
getBinaryData()711 iterator_range<binary_data_iterator> getBinaryData() {
712 return make_range(BinaryDataMap.begin(), BinaryDataMap.end());
713 }
714
715 /// Iterate over all BinaryData associated with the given \p Section.
716 iterator_range<FilteredBinaryDataConstIterator>
getBinaryDataForSection(const BinarySection & Section)717 getBinaryDataForSection(const BinarySection &Section) const {
718 auto Begin = BinaryDataMap.lower_bound(Section.getAddress());
719 if (Begin != BinaryDataMap.begin())
720 --Begin;
721 auto End = BinaryDataMap.upper_bound(Section.getEndAddress());
722 auto pred = [&Section](const binary_data_const_iterator &Itr) -> bool {
723 return Itr->second->getSection() == Section;
724 };
725 return make_range(FilteredBinaryDataConstIterator(pred, Begin, End),
726 FilteredBinaryDataConstIterator(pred, End, End));
727 }
728
729 /// Iterate over all BinaryData associated with the given \p Section.
730 iterator_range<FilteredBinaryDataIterator>
getBinaryDataForSection(BinarySection & Section)731 getBinaryDataForSection(BinarySection &Section) {
732 auto Begin = BinaryDataMap.lower_bound(Section.getAddress());
733 if (Begin != BinaryDataMap.begin())
734 --Begin;
735 auto End = BinaryDataMap.upper_bound(Section.getEndAddress());
736 auto pred = [&Section](const binary_data_iterator &Itr) -> bool {
737 return Itr->second->getSection() == Section;
738 };
739 return make_range(FilteredBinaryDataIterator(pred, Begin, End),
740 FilteredBinaryDataIterator(pred, End, End));
741 }
742
743 /// Iterate over all the sub-symbols of /p BD (if any).
744 iterator_range<binary_data_iterator> getSubBinaryData(BinaryData *BD);
745
746 /// Clear the global symbol address -> name(s) map.
clearBinaryData()747 void clearBinaryData() {
748 GlobalSymbols.clear();
749 for (auto &Entry : BinaryDataMap)
750 delete Entry.second;
751 BinaryDataMap.clear();
752 }
753
754 /// Process \p Address reference from code in function \BF.
755 /// \p IsPCRel indicates if the reference is PC-relative.
756 /// Return <Symbol, Addend> pair corresponding to the \p Address.
757 std::pair<const MCSymbol *, uint64_t>
758 handleAddressRef(uint64_t Address, BinaryFunction &BF, bool IsPCRel);
759
760 /// Analyze memory contents at the given \p Address and return the type of
761 /// memory contents (such as a possible jump table).
762 MemoryContentsType analyzeMemoryAt(uint64_t Address, BinaryFunction &BF);
763
764 /// Return a value of the global \p Symbol or an error if the value
765 /// was not set.
getSymbolValue(const MCSymbol & Symbol)766 ErrorOr<uint64_t> getSymbolValue(const MCSymbol &Symbol) const {
767 const BinaryData *BD = getBinaryDataByName(Symbol.getName());
768 if (!BD)
769 return std::make_error_code(std::errc::bad_address);
770 return BD->getAddress();
771 }
772
773 /// Return a global symbol registered at a given \p Address and \p Size.
774 /// If no symbol exists, create one with unique name using \p Prefix.
775 /// If there are multiple symbols registered at the \p Address, then
776 /// return the first one.
777 MCSymbol *getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
778 uint64_t Size = 0, uint16_t Alignment = 0,
779 unsigned Flags = 0);
780
781 /// Create a global symbol without registering an address.
782 MCSymbol *getOrCreateUndefinedGlobalSymbol(StringRef Name);
783
784 /// Register a symbol with \p Name at a given \p Address using \p Size,
785 /// \p Alignment, and \p Flags. See llvm::SymbolRef::Flags for the definition
786 /// of \p Flags.
787 MCSymbol *registerNameAtAddress(StringRef Name, uint64_t Address,
788 uint64_t Size, uint16_t Alignment,
789 unsigned Flags = 0);
790
791 /// Return BinaryData registered at a given \p Address or nullptr if no
792 /// global symbol was registered at the location.
getBinaryDataAtAddress(uint64_t Address)793 const BinaryData *getBinaryDataAtAddress(uint64_t Address) const {
794 auto NI = BinaryDataMap.find(Address);
795 return NI != BinaryDataMap.end() ? NI->second : nullptr;
796 }
797
getBinaryDataAtAddress(uint64_t Address)798 BinaryData *getBinaryDataAtAddress(uint64_t Address) {
799 auto NI = BinaryDataMap.find(Address);
800 return NI != BinaryDataMap.end() ? NI->second : nullptr;
801 }
802
803 /// Look up the symbol entry that contains the given \p Address (based on
804 /// the start address and size for each symbol). Returns a pointer to
805 /// the BinaryData for that symbol. If no data is found, nullptr is returned.
getBinaryDataContainingAddress(uint64_t Address)806 const BinaryData *getBinaryDataContainingAddress(uint64_t Address) const {
807 return getBinaryDataContainingAddressImpl(Address);
808 }
809
getBinaryDataContainingAddress(uint64_t Address)810 BinaryData *getBinaryDataContainingAddress(uint64_t Address) {
811 return const_cast<BinaryData *>(
812 getBinaryDataContainingAddressImpl(Address));
813 }
814
815 /// Return BinaryData for the given \p Name or nullptr if no
816 /// global symbol with that name exists.
getBinaryDataByName(StringRef Name)817 const BinaryData *getBinaryDataByName(StringRef Name) const {
818 auto Itr = GlobalSymbols.find(Name);
819 return Itr != GlobalSymbols.end() ? Itr->second : nullptr;
820 }
821
getBinaryDataByName(StringRef Name)822 BinaryData *getBinaryDataByName(StringRef Name) {
823 auto Itr = GlobalSymbols.find(Name);
824 return Itr != GlobalSymbols.end() ? Itr->second : nullptr;
825 }
826
827 /// Return registered PLT entry BinaryData with the given \p Name
828 /// or nullptr if no global PLT symbol with that name exists.
getPLTBinaryDataByName(StringRef Name)829 const BinaryData *getPLTBinaryDataByName(StringRef Name) const {
830 if (const BinaryData *Data = getBinaryDataByName(Name.str() + "@PLT"))
831 return Data;
832
833 // The symbol name might contain versioning information e.g
834 // memcpy@@GLIBC_2.17. Remove it and try to locate binary data
835 // without it.
836 size_t At = Name.find("@");
837 if (At != std::string::npos)
838 return getBinaryDataByName(Name.str().substr(0, At) + "@PLT");
839
840 return nullptr;
841 }
842
843 /// Return true if \p SymbolName was generated internally and was not present
844 /// in the input binary.
isInternalSymbolName(const StringRef Name)845 bool isInternalSymbolName(const StringRef Name) {
846 return Name.startswith("SYMBOLat") || Name.startswith("DATAat") ||
847 Name.startswith("HOLEat");
848 }
849
getHotTextStartSymbol()850 MCSymbol *getHotTextStartSymbol() const {
851 return Ctx->getOrCreateSymbol("__hot_start");
852 }
853
getHotTextEndSymbol()854 MCSymbol *getHotTextEndSymbol() const {
855 return Ctx->getOrCreateSymbol("__hot_end");
856 }
857
getTextSection()858 MCSection *getTextSection() const { return MOFI->getTextSection(); }
859
860 /// Return code section with a given name.
getCodeSection(StringRef SectionName)861 MCSection *getCodeSection(StringRef SectionName) const {
862 if (isELF())
863 return Ctx->getELFSection(SectionName, ELF::SHT_PROGBITS,
864 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
865 else
866 return Ctx->getMachOSection("__TEXT", SectionName,
867 MachO::S_ATTR_PURE_INSTRUCTIONS,
868 SectionKind::getText());
869 }
870
871 /// Return data section with a given name.
getDataSection(StringRef SectionName)872 MCSection *getDataSection(StringRef SectionName) const {
873 return Ctx->getELFSection(SectionName, ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
874 }
875
876 /// \name Pre-assigned Section Names
877 /// @{
878
getMainCodeSectionName()879 const char *getMainCodeSectionName() const { return ".text"; }
880
getColdCodeSectionName()881 const char *getColdCodeSectionName() const { return ".text.cold"; }
882
getHotTextMoverSectionName()883 const char *getHotTextMoverSectionName() const { return ".text.mover"; }
884
getInjectedCodeSectionName()885 const char *getInjectedCodeSectionName() const { return ".text.injected"; }
886
getInjectedColdCodeSectionName()887 const char *getInjectedColdCodeSectionName() const {
888 return ".text.injected.cold";
889 }
890
getGdbIndexSection()891 ErrorOr<BinarySection &> getGdbIndexSection() const {
892 return getUniqueSectionByName(".gdb_index");
893 }
894
895 /// @}
896
897 /// Register \p TargetFunction as a fragment of \p Function if checks pass:
898 /// - if \p TargetFunction name matches \p Function name with a suffix:
899 /// fragment_name == parent_name.cold(.\d+)?
900 /// True if the Function is registered, false if the check failed.
901 bool registerFragment(BinaryFunction &TargetFunction,
902 BinaryFunction &Function) const;
903
904 /// Add unterprocedural reference for \p Function to \p Address
addInterproceduralReference(BinaryFunction * Function,uint64_t Address)905 void addInterproceduralReference(BinaryFunction *Function, uint64_t Address) {
906 InterproceduralReferences.push_back({Function, Address});
907 }
908
909 /// Used to fix the target of linker-generated AArch64 adrp + add
910 /// sequence with no relocation info.
911 void addAdrpAddRelocAArch64(BinaryFunction &BF, MCInst &LoadLowBits,
912 MCInst &LoadHiBits, uint64_t Target);
913
914 /// Return true if AARch64 veneer was successfully matched at a given
915 /// \p Address and register veneer binary function if \p MatchOnly
916 /// argument is false.
917 bool handleAArch64Veneer(uint64_t Address, bool MatchOnly = false);
918
919 /// Resolve inter-procedural dependencies from
920 void processInterproceduralReferences();
921
922 /// Skip functions with all parent and child fragments transitively.
923 void skipMarkedFragments();
924
925 /// Perform any necessary post processing on the symbol table after
926 /// function disassembly is complete. This processing fixes top
927 /// level data holes and makes sure the symbol table is valid.
928 /// It also assigns all memory profiling info to the appropriate
929 /// BinaryData objects.
930 void postProcessSymbolTable();
931
932 /// Set the size of the global symbol located at \p Address. Return
933 /// false if no symbol exists, true otherwise.
934 bool setBinaryDataSize(uint64_t Address, uint64_t Size);
935
936 /// Print the global symbol table.
937 void printGlobalSymbols(raw_ostream &OS) const;
938
939 /// Register information about the given \p Section so we can look up
940 /// sections by address.
941 BinarySection ®isterSection(SectionRef Section);
942
943 /// Register a copy of /p OriginalSection under a different name.
944 BinarySection ®isterSection(StringRef SectionName,
945 const BinarySection &OriginalSection);
946
947 /// Register or update the information for the section with the given
948 /// /p Name. If the section already exists, the information in the
949 /// section will be updated with the new data.
950 BinarySection ®isterOrUpdateSection(StringRef Name, unsigned ELFType,
951 unsigned ELFFlags,
952 uint8_t *Data = nullptr,
953 uint64_t Size = 0,
954 unsigned Alignment = 1);
955
956 /// Register the information for the note (non-allocatable) section
957 /// with the given /p Name. If the section already exists, the
958 /// information in the section will be updated with the new data.
959 BinarySection &
960 registerOrUpdateNoteSection(StringRef Name, uint8_t *Data = nullptr,
961 uint64_t Size = 0, unsigned Alignment = 1,
962 bool IsReadOnly = true,
963 unsigned ELFType = ELF::SHT_PROGBITS) {
964 return registerOrUpdateSection(Name, ELFType,
965 BinarySection::getFlags(IsReadOnly), Data,
966 Size, Alignment);
967 }
968
969 /// Remove the given /p Section from the set of all sections. Return
970 /// true if the section was removed (and deleted), otherwise false.
971 bool deregisterSection(BinarySection &Section);
972
973 /// Iterate over all registered sections.
sections()974 iterator_range<FilteredSectionIterator> sections() {
975 auto notNull = [](const SectionIterator &Itr) { return (bool)*Itr; };
976 return make_range(
977 FilteredSectionIterator(notNull, Sections.begin(), Sections.end()),
978 FilteredSectionIterator(notNull, Sections.end(), Sections.end()));
979 }
980
981 /// Iterate over all registered sections.
sections()982 iterator_range<FilteredSectionConstIterator> sections() const {
983 return const_cast<BinaryContext *>(this)->sections();
984 }
985
986 /// Iterate over all registered allocatable sections.
allocatableSections()987 iterator_range<FilteredSectionIterator> allocatableSections() {
988 auto isAllocatable = [](const SectionIterator &Itr) {
989 return *Itr && Itr->isAllocatable();
990 };
991 return make_range(
992 FilteredSectionIterator(isAllocatable, Sections.begin(),
993 Sections.end()),
994 FilteredSectionIterator(isAllocatable, Sections.end(), Sections.end()));
995 }
996
997 /// Iterate over all registered code sections.
textSections()998 iterator_range<FilteredSectionIterator> textSections() {
999 auto isText = [](const SectionIterator &Itr) {
1000 return *Itr && Itr->isAllocatable() && Itr->isText();
1001 };
1002 return make_range(
1003 FilteredSectionIterator(isText, Sections.begin(), Sections.end()),
1004 FilteredSectionIterator(isText, Sections.end(), Sections.end()));
1005 }
1006
1007 /// Iterate over all registered allocatable sections.
allocatableSections()1008 iterator_range<FilteredSectionConstIterator> allocatableSections() const {
1009 return const_cast<BinaryContext *>(this)->allocatableSections();
1010 }
1011
1012 /// Iterate over all registered non-allocatable sections.
nonAllocatableSections()1013 iterator_range<FilteredSectionIterator> nonAllocatableSections() {
1014 auto notAllocated = [](const SectionIterator &Itr) {
1015 return *Itr && !Itr->isAllocatable();
1016 };
1017 return make_range(
1018 FilteredSectionIterator(notAllocated, Sections.begin(), Sections.end()),
1019 FilteredSectionIterator(notAllocated, Sections.end(), Sections.end()));
1020 }
1021
1022 /// Iterate over all registered non-allocatable sections.
nonAllocatableSections()1023 iterator_range<FilteredSectionConstIterator> nonAllocatableSections() const {
1024 return const_cast<BinaryContext *>(this)->nonAllocatableSections();
1025 }
1026
1027 /// Iterate over all allocatable relocation sections.
allocatableRelaSections()1028 iterator_range<FilteredSectionIterator> allocatableRelaSections() {
1029 auto isAllocatableRela = [](const SectionIterator &Itr) {
1030 return *Itr && Itr->isAllocatable() && Itr->isRela();
1031 };
1032 return make_range(FilteredSectionIterator(isAllocatableRela,
1033 Sections.begin(), Sections.end()),
1034 FilteredSectionIterator(isAllocatableRela, Sections.end(),
1035 Sections.end()));
1036 }
1037
1038 /// Return base address for the shared object or PIE based on the segment
1039 /// mapping information. \p MMapAddress is an address where one of the
1040 /// segments was mapped. \p FileOffset is the offset in the file of the
1041 /// mapping. Note that \p FileOffset should be page-aligned and could be
1042 /// different from the file offset of the segment which could be unaligned.
1043 /// If no segment is found that matches \p FileOffset, return NoneType().
1044 Optional<uint64_t> getBaseAddressForMapping(uint64_t MMapAddress,
1045 uint64_t FileOffset) const;
1046
1047 /// Check if the address belongs to this binary's static allocation space.
containsAddress(uint64_t Address)1048 bool containsAddress(uint64_t Address) const {
1049 return Address >= FirstAllocAddress && Address < LayoutStartAddress;
1050 }
1051
1052 /// Return section name containing the given \p Address.
1053 ErrorOr<StringRef> getSectionNameForAddress(uint64_t Address) const;
1054
1055 /// Print all sections.
1056 void printSections(raw_ostream &OS) const;
1057
1058 /// Return largest section containing the given \p Address. These
1059 /// functions only work for allocatable sections, i.e. ones with non-zero
1060 /// addresses.
1061 ErrorOr<BinarySection &> getSectionForAddress(uint64_t Address);
getSectionForAddress(uint64_t Address)1062 ErrorOr<const BinarySection &> getSectionForAddress(uint64_t Address) const {
1063 return const_cast<BinaryContext *>(this)->getSectionForAddress(Address);
1064 }
1065
1066 /// Return section(s) associated with given \p Name.
1067 iterator_range<NameToSectionMapType::iterator>
getSectionByName(StringRef Name)1068 getSectionByName(StringRef Name) {
1069 return make_range(NameToSection.equal_range(std::string(Name)));
1070 }
1071 iterator_range<NameToSectionMapType::const_iterator>
getSectionByName(StringRef Name)1072 getSectionByName(StringRef Name) const {
1073 return make_range(NameToSection.equal_range(std::string(Name)));
1074 }
1075
1076 /// Return the unique section associated with given \p Name.
1077 /// If there is more than one section with the same name, return an error
1078 /// object.
getUniqueSectionByName(StringRef SectionName)1079 ErrorOr<BinarySection &> getUniqueSectionByName(StringRef SectionName) const {
1080 auto Sections = getSectionByName(SectionName);
1081 if (Sections.begin() != Sections.end() &&
1082 std::next(Sections.begin()) == Sections.end())
1083 return *Sections.begin()->second;
1084 return std::make_error_code(std::errc::bad_address);
1085 }
1086
1087 /// Return an unsigned value of \p Size stored at \p Address. The address has
1088 /// to be a valid statically allocated address for the binary.
1089 ErrorOr<uint64_t> getUnsignedValueAtAddress(uint64_t Address,
1090 size_t Size) const;
1091
1092 /// Return a signed value of \p Size stored at \p Address. The address has
1093 /// to be a valid statically allocated address for the binary.
1094 ErrorOr<uint64_t> getSignedValueAtAddress(uint64_t Address,
1095 size_t Size) const;
1096
1097 /// Special case of getUnsignedValueAtAddress() that uses a pointer size.
getPointerAtAddress(uint64_t Address)1098 ErrorOr<uint64_t> getPointerAtAddress(uint64_t Address) const {
1099 return getUnsignedValueAtAddress(Address, AsmInfo->getCodePointerSize());
1100 }
1101
1102 /// Replaces all references to \p ChildBF with \p ParentBF. \p ChildBF is then
1103 /// removed from the list of functions \p BFs. The profile data of \p ChildBF
1104 /// is merged into that of \p ParentBF. This function is thread safe.
1105 void foldFunction(BinaryFunction &ChildBF, BinaryFunction &ParentBF);
1106
1107 /// Add a Section relocation at a given \p Address.
1108 void addRelocation(uint64_t Address, MCSymbol *Symbol, uint64_t Type,
1109 uint64_t Addend = 0, uint64_t Value = 0);
1110
1111 /// Return a relocation registered at a given \p Address, or nullptr if there
1112 /// is no relocation at such address.
1113 const Relocation *getRelocationAt(uint64_t Address);
1114
1115 /// Register a presence of PC-relative relocation at the given \p Address.
addPCRelativeDataRelocation(uint64_t Address)1116 void addPCRelativeDataRelocation(uint64_t Address) {
1117 DataPCRelocations.emplace(Address);
1118 }
1119
1120 /// Register dynamic relocation at \p Address.
1121 void addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, uint64_t Type,
1122 uint64_t Addend, uint64_t Value = 0);
1123
1124 /// Return a dynamic relocation registered at a given \p Address, or nullptr
1125 /// if there is no dynamic relocation at such address.
1126 const Relocation *getDynamicRelocationAt(uint64_t Address);
1127
1128 /// Remove registered relocation at a given \p Address.
1129 bool removeRelocationAt(uint64_t Address);
1130
1131 /// This function makes sure that symbols referenced by ambiguous relocations
1132 /// are marked as immovable. For now, if a section relocation points at the
1133 /// boundary between two symbols then those symbols are marked as immovable.
1134 void markAmbiguousRelocations(BinaryData &BD, const uint64_t Address);
1135
1136 /// Return BinaryFunction corresponding to \p Symbol. If \p EntryDesc is not
1137 /// nullptr, set it to entry descriminator corresponding to \p Symbol
1138 /// (0 for single-entry functions). This function is thread safe.
1139 BinaryFunction *getFunctionForSymbol(const MCSymbol *Symbol,
1140 uint64_t *EntryDesc = nullptr);
1141
1142 const BinaryFunction *
1143 getFunctionForSymbol(const MCSymbol *Symbol,
1144 uint64_t *EntryDesc = nullptr) const {
1145 return const_cast<BinaryContext *>(this)->getFunctionForSymbol(Symbol,
1146 EntryDesc);
1147 }
1148
1149 /// Associate the symbol \p Sym with the function \p BF for lookups with
1150 /// getFunctionForSymbol().
setSymbolToFunctionMap(const MCSymbol * Sym,BinaryFunction * BF)1151 void setSymbolToFunctionMap(const MCSymbol *Sym, BinaryFunction *BF) {
1152 SymbolToFunctionMap[Sym] = BF;
1153 }
1154
1155 /// Populate some internal data structures with debug info.
1156 void preprocessDebugInfo();
1157
1158 /// Add a filename entry from SrcCUID to DestCUID.
1159 unsigned addDebugFilenameToUnit(const uint32_t DestCUID,
1160 const uint32_t SrcCUID, unsigned FileIndex);
1161
1162 /// Return functions in output layout order
1163 std::vector<BinaryFunction *> getSortedFunctions();
1164
1165 /// Do the best effort to calculate the size of the function by emitting
1166 /// its code, and relaxing branch instructions. By default, branch
1167 /// instructions are updated to match the layout. Pass \p FixBranches set to
1168 /// false if the branches are known to be up to date with the code layout.
1169 ///
1170 /// Return the pair where the first size is for the main part, and the second
1171 /// size is for the cold one.
1172 std::pair<size_t, size_t> calculateEmittedSize(BinaryFunction &BF,
1173 bool FixBranches = true);
1174
1175 /// Calculate the size of the instruction \p Inst optionally using a
1176 /// user-supplied emitter for lock-free multi-thread work. MCCodeEmitter is
1177 /// not thread safe and each thread should operate with its own copy of it.
1178 uint64_t
1179 computeInstructionSize(const MCInst &Inst,
1180 const MCCodeEmitter *Emitter = nullptr) const {
1181 if (auto Size = MIB->getAnnotationWithDefault<uint32_t>(Inst, "Size"))
1182 return Size;
1183
1184 if (!Emitter)
1185 Emitter = this->MCE.get();
1186 SmallString<256> Code;
1187 SmallVector<MCFixup, 4> Fixups;
1188 raw_svector_ostream VecOS(Code);
1189 Emitter->encodeInstruction(Inst, VecOS, Fixups, *STI);
1190 return Code.size();
1191 }
1192
1193 /// Compute the native code size for a range of instructions.
1194 /// Note: this can be imprecise wrt the final binary since happening prior to
1195 /// relaxation, as well as wrt the original binary because of opcode
1196 /// shortening.MCCodeEmitter is not thread safe and each thread should operate
1197 /// with its own copy of it.
1198 template <typename Itr>
1199 uint64_t computeCodeSize(Itr Beg, Itr End,
1200 const MCCodeEmitter *Emitter = nullptr) const {
1201 uint64_t Size = 0;
1202 while (Beg != End) {
1203 if (!MIB->isPseudo(*Beg))
1204 Size += computeInstructionSize(*Beg, Emitter);
1205 ++Beg;
1206 }
1207 return Size;
1208 }
1209
1210 /// Verify that assembling instruction \p Inst results in the same sequence of
1211 /// bytes as \p Encoding.
1212 bool validateEncoding(const MCInst &Instruction,
1213 ArrayRef<uint8_t> Encoding) const;
1214
1215 /// Return a function execution count threshold for determining whether
1216 /// the function is 'hot'. Consider it hot if count is above the average exec
1217 /// count of profiled functions.
1218 uint64_t getHotThreshold() const;
1219
1220 /// Return true if instruction \p Inst requires an offset for further
1221 /// processing (e.g. assigning a profile).
keepOffsetForInstruction(const MCInst & Inst)1222 bool keepOffsetForInstruction(const MCInst &Inst) const {
1223 if (MIB->isCall(Inst) || MIB->isBranch(Inst) || MIB->isReturn(Inst) ||
1224 MIB->isPrefix(Inst) || MIB->isIndirectBranch(Inst)) {
1225 return true;
1226 }
1227 return false;
1228 }
1229
1230 /// Return true if the function should be emitted to the output file.
1231 bool shouldEmit(const BinaryFunction &Function) const;
1232
1233 /// Print the string name for a CFI operation.
1234 static void printCFI(raw_ostream &OS, const MCCFIInstruction &Inst);
1235
1236 /// Print a single MCInst in native format. If Function is non-null,
1237 /// the instruction will be annotated with CFI and possibly DWARF line table
1238 /// info.
1239 /// If printMCInst is true, the instruction is also printed in the
1240 /// architecture independent format.
1241 void printInstruction(raw_ostream &OS, const MCInst &Instruction,
1242 uint64_t Offset = 0,
1243 const BinaryFunction *Function = nullptr,
1244 bool PrintMCInst = false, bool PrintMemData = false,
1245 bool PrintRelocations = false,
1246 StringRef Endl = "\n") const;
1247
1248 /// Print a range of instructions.
1249 template <typename Itr>
1250 uint64_t
1251 printInstructions(raw_ostream &OS, Itr Begin, Itr End, uint64_t Offset = 0,
1252 const BinaryFunction *Function = nullptr,
1253 bool PrintMCInst = false, bool PrintMemData = false,
1254 bool PrintRelocations = false,
1255 StringRef Endl = "\n") const {
1256 while (Begin != End) {
1257 printInstruction(OS, *Begin, Offset, Function, PrintMCInst, PrintMemData,
1258 PrintRelocations, Endl);
1259 Offset += computeCodeSize(Begin, Begin + 1);
1260 ++Begin;
1261 }
1262 return Offset;
1263 }
1264
1265 void exitWithBugReport(StringRef Message,
1266 const BinaryFunction &Function) const;
1267
1268 struct IndependentCodeEmitter {
1269 std::unique_ptr<MCObjectFileInfo> LocalMOFI;
1270 std::unique_ptr<MCContext> LocalCtx;
1271 std::unique_ptr<MCCodeEmitter> MCE;
1272 };
1273
1274 /// Encapsulates an independent MCCodeEmitter that doesn't share resources
1275 /// with the main one available through BinaryContext::MCE, managed by
1276 /// BinaryContext.
1277 /// This is intended to create a lock-free environment for an auxiliary thread
1278 /// that needs to perform work with an MCCodeEmitter that can be transient or
1279 /// won't be used in the main code emitter.
createIndependentMCCodeEmitter()1280 IndependentCodeEmitter createIndependentMCCodeEmitter() const {
1281 IndependentCodeEmitter MCEInstance;
1282 MCEInstance.LocalCtx.reset(
1283 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
1284 MCEInstance.LocalMOFI.reset(
1285 TheTarget->createMCObjectFileInfo(*MCEInstance.LocalCtx.get(),
1286 /*PIC=*/!HasFixedLoadAddress));
1287 MCEInstance.LocalCtx->setObjectFileInfo(MCEInstance.LocalMOFI.get());
1288 MCEInstance.MCE.reset(
1289 TheTarget->createMCCodeEmitter(*MII, *MCEInstance.LocalCtx));
1290 return MCEInstance;
1291 }
1292
1293 /// Creating MCStreamer instance.
1294 std::unique_ptr<MCStreamer>
createStreamer(llvm::raw_pwrite_stream & OS)1295 createStreamer(llvm::raw_pwrite_stream &OS) const {
1296 MCCodeEmitter *MCE = TheTarget->createMCCodeEmitter(*MII, *Ctx);
1297 MCAsmBackend *MAB =
1298 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
1299 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(OS);
1300 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
1301 *TheTriple, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
1302 std::unique_ptr<MCCodeEmitter>(MCE), *STI,
1303 /* RelaxAll */ false,
1304 /* IncrementalLinkerCompatible */ false,
1305 /* DWARFMustBeAtTheEnd */ false));
1306 return Streamer;
1307 }
1308 };
1309
1310 template <typename T, typename = std::enable_if_t<sizeof(T) == 1>>
1311 inline raw_ostream &operator<<(raw_ostream &OS, const ArrayRef<T> &ByteArray) {
1312 const char *Sep = "";
1313 for (const auto Byte : ByteArray) {
1314 OS << Sep << format("%.2x", Byte);
1315 Sep = " ";
1316 }
1317 return OS;
1318 }
1319
1320 } // namespace bolt
1321 } // namespace llvm
1322
1323 #endif
1324