1 //===- bolt/Core/BinarySection.h - Section in a binary file -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the declaration of the BinarySection class, which
10 // represents a section in an executable file and contains its properties,
11 // flags, contents, and relocations.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef BOLT_CORE_BINARY_SECTION_H
16 #define BOLT_CORE_BINARY_SECTION_H
17 
18 #include "bolt/Core/DebugData.h"
19 #include "bolt/Core/Relocation.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/BinaryFormat/ELF.h"
23 #include "llvm/Object/ELFObjectFile.h"
24 #include "llvm/Object/MachO.h"
25 #include "llvm/Object/ObjectFile.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <map>
28 #include <memory>
29 #include <set>
30 
31 namespace llvm {
32 class MCStreamer;
33 class MCSymbol;
34 
35 using namespace object;
36 
37 namespace bolt {
38 
39 class BinaryContext;
40 class BinaryData;
41 
42 /// A class to manage binary sections that also manages related relocations.
43 class BinarySection {
44   friend class BinaryContext;
45 
46   BinaryContext &BC;           // Owning BinaryContext
47   std::string Name;            // Section name
48   const SectionRef Section;    // SectionRef (may be null)
49   StringRef Contents;          // Input section contents
50   const uint64_t Address;      // Address of section in input binary (may be 0)
51   const uint64_t Size;         // Input section size
52   uint64_t InputFileOffset{0}; // Offset in the input binary
53   unsigned Alignment;          // alignment in bytes (must be > 0)
54   unsigned ELFType;            // ELF section type
55   unsigned ELFFlags;           // ELF section flags
56 
57   // Relocations associated with this section. Relocation offsets are
58   // wrt. to the original section address and size.
59   using RelocationSetType = std::set<Relocation, std::less<>>;
60   RelocationSetType Relocations;
61 
62   // Dynamic relocations associated with this section. Relocation offsets are
63   // from the original section address.
64   RelocationSetType DynamicRelocations;
65 
66   // Pending relocations for this section.
67   std::vector<Relocation> PendingRelocations;
68 
69   struct BinaryPatch {
70     uint64_t Offset;
71     SmallString<8> Bytes;
72 
BinaryPatchBinaryPatch73     BinaryPatch(uint64_t Offset, const SmallVectorImpl<char> &Bytes)
74         : Offset(Offset), Bytes(Bytes.begin(), Bytes.end()) {}
75   };
76   std::vector<BinaryPatch> Patches;
77   /// Patcher used to apply simple changes to sections of the input binary.
78   std::unique_ptr<BinaryPatcher> Patcher;
79 
80   // Output info
81   bool IsFinalized{false};         // Has this section had output information
82                                    // finalized?
83   std::string OutputName;          // Output section name (if the section has
84                                    // been renamed)
85   uint64_t OutputAddress{0};       // Section address for the rewritten binary.
86   uint64_t OutputSize{0};          // Section size in the rewritten binary.
87   uint64_t OutputFileOffset{0};    // File offset in the rewritten binary file.
88   StringRef OutputContents;        // Rewritten section contents.
89   unsigned SectionID{-1u};         // Unique ID used for address mapping.
90                                    // Set by ExecutableFileMemoryManager.
91   uint32_t Index{0};               // Section index in the output file.
92   mutable bool IsReordered{false}; // Have the contents been reordered?
93   bool IsAnonymous{false};         // True if the name should not be included
94                                    // in the output file.
95 
96   uint64_t hash(const BinaryData &BD,
97                 std::map<const BinaryData *, uint64_t> &Cache) const;
98 
99   // non-copyable
100   BinarySection(const BinarySection &) = delete;
101   BinarySection(BinarySection &&) = delete;
102   BinarySection &operator=(const BinarySection &) = delete;
103   BinarySection &operator=(BinarySection &&) = delete;
104 
getName(SectionRef Section)105   static StringRef getName(SectionRef Section) {
106     return cantFail(Section.getName());
107   }
getContents(SectionRef Section)108   static StringRef getContents(SectionRef Section) {
109     if (Section.getObject()->isELF() &&
110         ELFSectionRef(Section).getType() == ELF::SHT_NOBITS)
111       return StringRef();
112 
113     Expected<StringRef> ContentsOrErr = Section.getContents();
114     if (!ContentsOrErr) {
115       Error E = ContentsOrErr.takeError();
116       errs() << "BOLT-ERROR: cannot get section contents for "
117              << getName(Section) << ": " << E << ".\n";
118       exit(1);
119     }
120     return *ContentsOrErr;
121   }
122 
123   /// Get the set of relocations refering to data in this section that
124   /// has been reordered.  The relocation offsets will be modified to
125   /// reflect the new data locations.
126   RelocationSetType reorderRelocations(bool Inplace) const;
127 
128   /// Set output info for this section.
update(uint8_t * NewData,uint64_t NewSize,unsigned NewAlignment,unsigned NewELFType,unsigned NewELFFlags)129   void update(uint8_t *NewData, uint64_t NewSize, unsigned NewAlignment,
130               unsigned NewELFType, unsigned NewELFFlags) {
131     assert(NewAlignment > 0 && "section alignment must be > 0");
132     Alignment = NewAlignment;
133     ELFType = NewELFType;
134     ELFFlags = NewELFFlags;
135     OutputSize = NewSize;
136     OutputContents = StringRef(reinterpret_cast<const char *>(NewData),
137                                NewData ? NewSize : 0);
138     IsFinalized = true;
139   }
140 
141 public:
142   /// Copy a section.
BinarySection(BinaryContext & BC,StringRef Name,const BinarySection & Section)143   explicit BinarySection(BinaryContext &BC, StringRef Name,
144                          const BinarySection &Section)
145       : BC(BC), Name(Name), Section(Section.getSectionRef()),
146         Contents(Section.getContents()), Address(Section.getAddress()),
147         Size(Section.getSize()), Alignment(Section.getAlignment()),
148         ELFType(Section.getELFType()), ELFFlags(Section.getELFFlags()),
149         Relocations(Section.Relocations),
150         PendingRelocations(Section.PendingRelocations), OutputName(Name) {}
151 
BinarySection(BinaryContext & BC,SectionRef Section)152   BinarySection(BinaryContext &BC, SectionRef Section)
153       : BC(BC), Name(getName(Section)), Section(Section),
154         Contents(getContents(Section)), Address(Section.getAddress()),
155         Size(Section.getSize()), Alignment(Section.getAlignment()),
156         OutputName(Name) {
157     if (isELF()) {
158       ELFType = ELFSectionRef(Section).getType();
159       ELFFlags = ELFSectionRef(Section).getFlags();
160       InputFileOffset = ELFSectionRef(Section).getOffset();
161     } else if (isMachO()) {
162       auto *O = cast<MachOObjectFile>(Section.getObject());
163       InputFileOffset =
164           O->is64Bit() ? O->getSection64(Section.getRawDataRefImpl()).offset
165                        : O->getSection(Section.getRawDataRefImpl()).offset;
166     }
167   }
168 
169   // TODO: pass Data as StringRef/ArrayRef? use StringRef::copy method.
BinarySection(BinaryContext & BC,StringRef Name,uint8_t * Data,uint64_t Size,unsigned Alignment,unsigned ELFType,unsigned ELFFlags)170   BinarySection(BinaryContext &BC, StringRef Name, uint8_t *Data, uint64_t Size,
171                 unsigned Alignment, unsigned ELFType, unsigned ELFFlags)
172       : BC(BC), Name(Name),
173         Contents(reinterpret_cast<const char *>(Data), Data ? Size : 0),
174         Address(0), Size(Size), Alignment(Alignment), ELFType(ELFType),
175         ELFFlags(ELFFlags), IsFinalized(true), OutputName(Name),
176         OutputSize(Size), OutputContents(Contents) {
177     assert(Alignment > 0 && "section alignment must be > 0");
178   }
179 
180   ~BinarySection();
181 
182   /// Helper function to generate the proper ELF flags from section properties.
183   static unsigned getFlags(bool IsReadOnly = true, bool IsText = false,
184                            bool IsAllocatable = false) {
185     unsigned Flags = 0;
186     if (IsAllocatable)
187       Flags |= ELF::SHF_ALLOC;
188     if (!IsReadOnly)
189       Flags |= ELF::SHF_WRITE;
190     if (IsText)
191       Flags |= ELF::SHF_EXECINSTR;
192     return Flags;
193   }
194 
195   operator bool() const { return ELFType != ELF::SHT_NULL; }
196 
197   bool operator==(const BinarySection &Other) const {
198     return (Name == Other.Name && Address == Other.Address &&
199             Size == Other.Size && getData() == Other.getData() &&
200             Alignment == Other.Alignment && ELFType == Other.ELFType &&
201             ELFFlags == Other.ELFFlags);
202   }
203 
204   bool operator!=(const BinarySection &Other) const {
205     return !operator==(Other);
206   }
207 
208   // Order sections by their immutable properties.
209   bool operator<(const BinarySection &Other) const {
210     return (getAddress() < Other.getAddress() ||
211             (getAddress() == Other.getAddress() &&
212              (getSize() < Other.getSize() ||
213               (getSize() == Other.getSize() && getName() < Other.getName()))));
214   }
215 
216   ///
217   /// Basic property access.
218   ///
getBinaryContext()219   BinaryContext &getBinaryContext() { return BC; }
220   bool isELF() const;
221   bool isMachO() const;
getName()222   StringRef getName() const { return Name; }
getAddress()223   uint64_t getAddress() const { return Address; }
getEndAddress()224   uint64_t getEndAddress() const { return Address + Size; }
getSize()225   uint64_t getSize() const { return Size; }
getInputFileOffset()226   uint64_t getInputFileOffset() const { return InputFileOffset; }
getAlignment()227   uint64_t getAlignment() const { return Alignment; }
isText()228   bool isText() const {
229     if (isELF())
230       return (ELFFlags & ELF::SHF_EXECINSTR);
231     return getSectionRef().isText();
232   }
isData()233   bool isData() const {
234     if (isELF())
235       return (ELFType == ELF::SHT_PROGBITS &&
236               (ELFFlags & (ELF::SHF_ALLOC | ELF::SHF_WRITE)));
237     return getSectionRef().isData();
238   }
isBSS()239   bool isBSS() const {
240     return (ELFType == ELF::SHT_NOBITS &&
241             (ELFFlags & (ELF::SHF_ALLOC | ELF::SHF_WRITE)));
242   }
isTLS()243   bool isTLS() const { return (ELFFlags & ELF::SHF_TLS); }
isTBSS()244   bool isTBSS() const { return isBSS() && isTLS(); }
isVirtual()245   bool isVirtual() const { return ELFType == ELF::SHT_NOBITS; }
isRela()246   bool isRela() const { return ELFType == ELF::SHT_RELA; }
isReadOnly()247   bool isReadOnly() const {
248     return ((ELFFlags & ELF::SHF_ALLOC) && !(ELFFlags & ELF::SHF_WRITE) &&
249             ELFType == ELF::SHT_PROGBITS);
250   }
isAllocatable()251   bool isAllocatable() const {
252     if (isELF()) {
253       return (ELFFlags & ELF::SHF_ALLOC) && !isTBSS();
254     } else {
255       // On non-ELF assume all sections are allocatable.
256       return true;
257     }
258   }
isReordered()259   bool isReordered() const { return IsReordered; }
isAnonymous()260   bool isAnonymous() const { return IsAnonymous; }
getELFType()261   unsigned getELFType() const { return ELFType; }
getELFFlags()262   unsigned getELFFlags() const { return ELFFlags; }
263 
getData()264   uint8_t *getData() {
265     return reinterpret_cast<uint8_t *>(
266         const_cast<char *>(getContents().data()));
267   }
getData()268   const uint8_t *getData() const {
269     return reinterpret_cast<const uint8_t *>(getContents().data());
270   }
getContents()271   StringRef getContents() const { return Contents; }
clearContents()272   void clearContents() { Contents = {}; }
hasSectionRef()273   bool hasSectionRef() const { return Section != SectionRef(); }
getSectionRef()274   SectionRef getSectionRef() const { return Section; }
275 
276   /// Does this section contain the given \p Address?
277   /// Note: this is in terms of the original mapped binary addresses.
containsAddress(uint64_t Address)278   bool containsAddress(uint64_t Address) const {
279     return (getAddress() <= Address && Address < getEndAddress()) ||
280            (getSize() == 0 && getAddress() == Address);
281   }
282 
283   /// Does this section contain the range [\p Address, \p Address + \p Size)?
284   /// Note: this is in terms of the original mapped binary addresses.
containsRange(uint64_t Address,uint64_t Size)285   bool containsRange(uint64_t Address, uint64_t Size) const {
286     return containsAddress(Address) && Address + Size <= getEndAddress();
287   }
288 
289   /// Iterate over all non-pending relocations for this section.
relocations()290   iterator_range<RelocationSetType::iterator> relocations() {
291     return make_range(Relocations.begin(), Relocations.end());
292   }
293 
294   /// Iterate over all non-pending relocations for this section.
relocations()295   iterator_range<RelocationSetType::const_iterator> relocations() const {
296     return make_range(Relocations.begin(), Relocations.end());
297   }
298 
299   /// Iterate over all dynamic relocations for this section.
dynamicRelocations()300   iterator_range<RelocationSetType::iterator> dynamicRelocations() {
301     return make_range(DynamicRelocations.begin(), DynamicRelocations.end());
302   }
303 
304   /// Iterate over all dynamic relocations for this section.
dynamicRelocations()305   iterator_range<RelocationSetType::const_iterator> dynamicRelocations() const {
306     return make_range(DynamicRelocations.begin(), DynamicRelocations.end());
307   }
308 
309   /// Does this section have any non-pending relocations?
hasRelocations()310   bool hasRelocations() const { return !Relocations.empty(); }
311 
312   /// Does this section have any pending relocations?
hasPendingRelocations()313   bool hasPendingRelocations() const { return !PendingRelocations.empty(); }
314 
315   /// Remove non-pending relocation with the given /p Offset.
removeRelocationAt(uint64_t Offset)316   bool removeRelocationAt(uint64_t Offset) {
317     auto Itr = Relocations.find(Offset);
318     if (Itr != Relocations.end()) {
319       Relocations.erase(Itr);
320       return true;
321     }
322     return false;
323   }
324 
325   void clearRelocations();
326 
327   /// Add a new relocation at the given /p Offset.
328   void addRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type,
329                      uint64_t Addend, uint64_t Value = 0,
330                      bool Pending = false) {
331     assert(Offset < getSize() && "offset not within section bounds");
332     if (!Pending) {
333       Relocations.emplace(Relocation{Offset, Symbol, Type, Addend, Value});
334     } else {
335       PendingRelocations.emplace_back(
336           Relocation{Offset, Symbol, Type, Addend, Value});
337     }
338   }
339 
340   /// Add a dynamic relocation at the given /p Offset.
341   void addDynamicRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type,
342                             uint64_t Addend, uint64_t Value = 0) {
343     assert(Offset < getSize() && "offset not within section bounds");
344     DynamicRelocations.emplace(Relocation{Offset, Symbol, Type, Addend, Value});
345   }
346 
347   /// Add relocation against the original contents of this section.
addPendingRelocation(const Relocation & Rel)348   void addPendingRelocation(const Relocation &Rel) {
349     PendingRelocations.push_back(Rel);
350   }
351 
352   /// Add patch to the input contents of this section.
addPatch(uint64_t Offset,const SmallVectorImpl<char> & Bytes)353   void addPatch(uint64_t Offset, const SmallVectorImpl<char> &Bytes) {
354     Patches.emplace_back(BinaryPatch(Offset, Bytes));
355   }
356 
357   /// Register patcher for this section.
registerPatcher(std::unique_ptr<BinaryPatcher> BPatcher)358   void registerPatcher(std::unique_ptr<BinaryPatcher> BPatcher) {
359     Patcher = std::move(BPatcher);
360   }
361 
362   /// Returns the patcher
getPatcher()363   BinaryPatcher *getPatcher() { return Patcher.get(); }
364 
365   /// Lookup the relocation (if any) at the given /p Offset.
getRelocationAt(uint64_t Offset)366   const Relocation *getRelocationAt(uint64_t Offset) const {
367     auto Itr = Relocations.find(Offset);
368     return Itr != Relocations.end() ? &*Itr : nullptr;
369   }
370 
371   /// Lookup the relocation (if any) at the given /p Offset.
getDynamicRelocationAt(uint64_t Offset)372   const Relocation *getDynamicRelocationAt(uint64_t Offset) const {
373     Relocation Key{Offset, 0, 0, 0, 0};
374     auto Itr = DynamicRelocations.find(Key);
375     return Itr != DynamicRelocations.end() ? &*Itr : nullptr;
376   }
377 
hash(const BinaryData & BD)378   uint64_t hash(const BinaryData &BD) const {
379     std::map<const BinaryData *, uint64_t> Cache;
380     return hash(BD, Cache);
381   }
382 
383   ///
384   /// Property accessors related to output data.
385   ///
386 
isFinalized()387   bool isFinalized() const { return IsFinalized; }
setIsFinalized()388   void setIsFinalized() { IsFinalized = true; }
getOutputName()389   StringRef getOutputName() const { return OutputName; }
getOutputSize()390   uint64_t getOutputSize() const { return OutputSize; }
getOutputData()391   uint8_t *getOutputData() {
392     return reinterpret_cast<uint8_t *>(
393         const_cast<char *>(getOutputContents().data()));
394   }
getOutputData()395   const uint8_t *getOutputData() const {
396     return reinterpret_cast<const uint8_t *>(getOutputContents().data());
397   }
getOutputContents()398   StringRef getOutputContents() const { return OutputContents; }
getAllocAddress()399   uint64_t getAllocAddress() const {
400     return reinterpret_cast<uint64_t>(getOutputData());
401   }
getOutputAddress()402   uint64_t getOutputAddress() const { return OutputAddress; }
getOutputFileOffset()403   uint64_t getOutputFileOffset() const { return OutputFileOffset; }
getSectionID()404   unsigned getSectionID() const {
405     assert(hasValidSectionID() && "trying to use uninitialized section id");
406     return SectionID;
407   }
hasValidSectionID()408   bool hasValidSectionID() const { return SectionID != -1u; }
getIndex()409   uint32_t getIndex() const { return Index; }
410 
411   // mutation
setOutputAddress(uint64_t Address)412   void setOutputAddress(uint64_t Address) { OutputAddress = Address; }
setOutputFileOffset(uint64_t Offset)413   void setOutputFileOffset(uint64_t Offset) { OutputFileOffset = Offset; }
setSectionID(unsigned ID)414   void setSectionID(unsigned ID) {
415     assert(!hasValidSectionID() && "trying to set section id twice");
416     SectionID = ID;
417   }
setIndex(uint32_t I)418   void setIndex(uint32_t I) { Index = I; }
setOutputName(StringRef Name)419   void setOutputName(StringRef Name) { OutputName = std::string(Name); }
setAnonymous(bool Flag)420   void setAnonymous(bool Flag) { IsAnonymous = Flag; }
421 
422   /// Emit the section as data, possibly with relocations. Use name \p NewName
423   //  for the section during emission if non-empty.
424   void emitAsData(MCStreamer &Streamer, StringRef NewName = StringRef()) const;
425 
426   using SymbolResolverFuncTy = llvm::function_ref<uint64_t(const MCSymbol *)>;
427 
428   /// Flush all pending relocations to patch original contents of sections
429   /// that were not emitted via MCStreamer.
430   void flushPendingRelocations(raw_pwrite_stream &OS,
431                                SymbolResolverFuncTy Resolver);
432 
433   /// Reorder the contents of this section according to /p Order.  If
434   /// /p Inplace is true, the entire contents of the section is reordered,
435   /// otherwise the new contents contain only the reordered data.
436   void reorderContents(const std::vector<BinaryData *> &Order, bool Inplace);
437 
438   void print(raw_ostream &OS) const;
439 
440   /// Write the contents of an ELF note section given the name of the producer,
441   /// a number identifying the type of note and the contents of the note in
442   /// \p DescStr.
443   static std::string encodeELFNote(StringRef NameStr, StringRef DescStr,
444                                    uint32_t Type);
445 
446   /// Code for ELF notes written by producer 'BOLT'
447   enum { NT_BOLT_BAT = 1, NT_BOLT_INSTRUMENTATION_TABLES = 2 };
448 };
449 
copyByteArray(const uint8_t * Data,uint64_t Size)450 inline uint8_t *copyByteArray(const uint8_t *Data, uint64_t Size) {
451   auto *Array = new uint8_t[Size];
452   memcpy(Array, Data, Size);
453   return Array;
454 }
455 
copyByteArray(StringRef Buffer)456 inline uint8_t *copyByteArray(StringRef Buffer) {
457   return copyByteArray(reinterpret_cast<const uint8_t *>(Buffer.data()),
458                        Buffer.size());
459 }
460 
copyByteArray(ArrayRef<char> Buffer)461 inline uint8_t *copyByteArray(ArrayRef<char> Buffer) {
462   return copyByteArray(reinterpret_cast<const uint8_t *>(Buffer.data()),
463                        Buffer.size());
464 }
465 
466 inline raw_ostream &operator<<(raw_ostream &OS, const BinarySection &Section) {
467   Section.print(OS);
468   return OS;
469 }
470 
471 struct SDTMarkerInfo {
472   uint64_t PC;
473   uint64_t Base;
474   uint64_t Semaphore;
475   StringRef Provider;
476   StringRef Name;
477   StringRef Args;
478 
479   /// The offset of PC within the note section
480   unsigned PCOffset;
481 };
482 
483 /// Linux Kernel special sections point to a specific instruction in many cases.
484 /// Unlike SDTMarkerInfo, these markers can come from different sections.
485 struct LKInstructionMarkerInfo {
486   uint64_t SectionOffset;
487   int32_t PCRelativeOffset;
488   bool IsPCRelative;
489   StringRef SectionName;
490 };
491 
492 } // namespace bolt
493 } // namespace llvm
494 
495 #endif
496