1 //===- bolt/Core/BinarySection.h - Section in a binary file -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the declaration of the BinarySection class, which
10 // represents a section in an executable file and contains its properties,
11 // flags, contents, and relocations.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #ifndef BOLT_CORE_BINARY_SECTION_H
16 #define BOLT_CORE_BINARY_SECTION_H
17
18 #include "bolt/Core/DebugData.h"
19 #include "bolt/Core/Relocation.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/BinaryFormat/ELF.h"
23 #include "llvm/Object/ELFObjectFile.h"
24 #include "llvm/Object/MachO.h"
25 #include "llvm/Object/ObjectFile.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <map>
28 #include <memory>
29 #include <set>
30
31 namespace llvm {
32 class MCStreamer;
33 class MCSymbol;
34
35 using namespace object;
36
37 namespace bolt {
38
39 class BinaryContext;
40 class BinaryData;
41
42 /// A class to manage binary sections that also manages related relocations.
43 class BinarySection {
44 friend class BinaryContext;
45
46 BinaryContext &BC; // Owning BinaryContext
47 std::string Name; // Section name
48 const SectionRef Section; // SectionRef (may be null)
49 StringRef Contents; // Input section contents
50 const uint64_t Address; // Address of section in input binary (may be 0)
51 const uint64_t Size; // Input section size
52 uint64_t InputFileOffset{0}; // Offset in the input binary
53 unsigned Alignment; // alignment in bytes (must be > 0)
54 unsigned ELFType; // ELF section type
55 unsigned ELFFlags; // ELF section flags
56
57 // Relocations associated with this section. Relocation offsets are
58 // wrt. to the original section address and size.
59 using RelocationSetType = std::set<Relocation, std::less<>>;
60 RelocationSetType Relocations;
61
62 // Dynamic relocations associated with this section. Relocation offsets are
63 // from the original section address.
64 RelocationSetType DynamicRelocations;
65
66 // Pending relocations for this section.
67 std::vector<Relocation> PendingRelocations;
68
69 struct BinaryPatch {
70 uint64_t Offset;
71 SmallString<8> Bytes;
72
BinaryPatchBinaryPatch73 BinaryPatch(uint64_t Offset, const SmallVectorImpl<char> &Bytes)
74 : Offset(Offset), Bytes(Bytes.begin(), Bytes.end()) {}
75 };
76 std::vector<BinaryPatch> Patches;
77 /// Patcher used to apply simple changes to sections of the input binary.
78 std::unique_ptr<BinaryPatcher> Patcher;
79
80 // Output info
81 bool IsFinalized{false}; // Has this section had output information
82 // finalized?
83 std::string OutputName; // Output section name (if the section has
84 // been renamed)
85 uint64_t OutputAddress{0}; // Section address for the rewritten binary.
86 uint64_t OutputSize{0}; // Section size in the rewritten binary.
87 uint64_t OutputFileOffset{0}; // File offset in the rewritten binary file.
88 StringRef OutputContents; // Rewritten section contents.
89 unsigned SectionID{-1u}; // Unique ID used for address mapping.
90 // Set by ExecutableFileMemoryManager.
91 uint32_t Index{0}; // Section index in the output file.
92 mutable bool IsReordered{false}; // Have the contents been reordered?
93 bool IsAnonymous{false}; // True if the name should not be included
94 // in the output file.
95
96 uint64_t hash(const BinaryData &BD,
97 std::map<const BinaryData *, uint64_t> &Cache) const;
98
99 // non-copyable
100 BinarySection(const BinarySection &) = delete;
101 BinarySection(BinarySection &&) = delete;
102 BinarySection &operator=(const BinarySection &) = delete;
103 BinarySection &operator=(BinarySection &&) = delete;
104
getName(SectionRef Section)105 static StringRef getName(SectionRef Section) {
106 return cantFail(Section.getName());
107 }
getContents(SectionRef Section)108 static StringRef getContents(SectionRef Section) {
109 if (Section.getObject()->isELF() &&
110 ELFSectionRef(Section).getType() == ELF::SHT_NOBITS)
111 return StringRef();
112
113 Expected<StringRef> ContentsOrErr = Section.getContents();
114 if (!ContentsOrErr) {
115 Error E = ContentsOrErr.takeError();
116 errs() << "BOLT-ERROR: cannot get section contents for "
117 << getName(Section) << ": " << E << ".\n";
118 exit(1);
119 }
120 return *ContentsOrErr;
121 }
122
123 /// Get the set of relocations refering to data in this section that
124 /// has been reordered. The relocation offsets will be modified to
125 /// reflect the new data locations.
126 RelocationSetType reorderRelocations(bool Inplace) const;
127
128 /// Set output info for this section.
update(uint8_t * NewData,uint64_t NewSize,unsigned NewAlignment,unsigned NewELFType,unsigned NewELFFlags)129 void update(uint8_t *NewData, uint64_t NewSize, unsigned NewAlignment,
130 unsigned NewELFType, unsigned NewELFFlags) {
131 assert(NewAlignment > 0 && "section alignment must be > 0");
132 Alignment = NewAlignment;
133 ELFType = NewELFType;
134 ELFFlags = NewELFFlags;
135 OutputSize = NewSize;
136 OutputContents = StringRef(reinterpret_cast<const char *>(NewData),
137 NewData ? NewSize : 0);
138 IsFinalized = true;
139 }
140
141 public:
142 /// Copy a section.
BinarySection(BinaryContext & BC,StringRef Name,const BinarySection & Section)143 explicit BinarySection(BinaryContext &BC, StringRef Name,
144 const BinarySection &Section)
145 : BC(BC), Name(Name), Section(Section.getSectionRef()),
146 Contents(Section.getContents()), Address(Section.getAddress()),
147 Size(Section.getSize()), Alignment(Section.getAlignment()),
148 ELFType(Section.getELFType()), ELFFlags(Section.getELFFlags()),
149 Relocations(Section.Relocations),
150 PendingRelocations(Section.PendingRelocations), OutputName(Name) {}
151
BinarySection(BinaryContext & BC,SectionRef Section)152 BinarySection(BinaryContext &BC, SectionRef Section)
153 : BC(BC), Name(getName(Section)), Section(Section),
154 Contents(getContents(Section)), Address(Section.getAddress()),
155 Size(Section.getSize()), Alignment(Section.getAlignment()),
156 OutputName(Name) {
157 if (isELF()) {
158 ELFType = ELFSectionRef(Section).getType();
159 ELFFlags = ELFSectionRef(Section).getFlags();
160 InputFileOffset = ELFSectionRef(Section).getOffset();
161 } else if (isMachO()) {
162 auto *O = cast<MachOObjectFile>(Section.getObject());
163 InputFileOffset =
164 O->is64Bit() ? O->getSection64(Section.getRawDataRefImpl()).offset
165 : O->getSection(Section.getRawDataRefImpl()).offset;
166 }
167 }
168
169 // TODO: pass Data as StringRef/ArrayRef? use StringRef::copy method.
BinarySection(BinaryContext & BC,StringRef Name,uint8_t * Data,uint64_t Size,unsigned Alignment,unsigned ELFType,unsigned ELFFlags)170 BinarySection(BinaryContext &BC, StringRef Name, uint8_t *Data, uint64_t Size,
171 unsigned Alignment, unsigned ELFType, unsigned ELFFlags)
172 : BC(BC), Name(Name),
173 Contents(reinterpret_cast<const char *>(Data), Data ? Size : 0),
174 Address(0), Size(Size), Alignment(Alignment), ELFType(ELFType),
175 ELFFlags(ELFFlags), IsFinalized(true), OutputName(Name),
176 OutputSize(Size), OutputContents(Contents) {
177 assert(Alignment > 0 && "section alignment must be > 0");
178 }
179
180 ~BinarySection();
181
182 /// Helper function to generate the proper ELF flags from section properties.
183 static unsigned getFlags(bool IsReadOnly = true, bool IsText = false,
184 bool IsAllocatable = false) {
185 unsigned Flags = 0;
186 if (IsAllocatable)
187 Flags |= ELF::SHF_ALLOC;
188 if (!IsReadOnly)
189 Flags |= ELF::SHF_WRITE;
190 if (IsText)
191 Flags |= ELF::SHF_EXECINSTR;
192 return Flags;
193 }
194
195 operator bool() const { return ELFType != ELF::SHT_NULL; }
196
197 bool operator==(const BinarySection &Other) const {
198 return (Name == Other.Name && Address == Other.Address &&
199 Size == Other.Size && getData() == Other.getData() &&
200 Alignment == Other.Alignment && ELFType == Other.ELFType &&
201 ELFFlags == Other.ELFFlags);
202 }
203
204 bool operator!=(const BinarySection &Other) const {
205 return !operator==(Other);
206 }
207
208 // Order sections by their immutable properties.
209 bool operator<(const BinarySection &Other) const {
210 return (getAddress() < Other.getAddress() ||
211 (getAddress() == Other.getAddress() &&
212 (getSize() < Other.getSize() ||
213 (getSize() == Other.getSize() && getName() < Other.getName()))));
214 }
215
216 ///
217 /// Basic property access.
218 ///
getBinaryContext()219 BinaryContext &getBinaryContext() { return BC; }
220 bool isELF() const;
221 bool isMachO() const;
getName()222 StringRef getName() const { return Name; }
getAddress()223 uint64_t getAddress() const { return Address; }
getEndAddress()224 uint64_t getEndAddress() const { return Address + Size; }
getSize()225 uint64_t getSize() const { return Size; }
getInputFileOffset()226 uint64_t getInputFileOffset() const { return InputFileOffset; }
getAlignment()227 uint64_t getAlignment() const { return Alignment; }
isText()228 bool isText() const {
229 if (isELF())
230 return (ELFFlags & ELF::SHF_EXECINSTR);
231 return getSectionRef().isText();
232 }
isData()233 bool isData() const {
234 if (isELF())
235 return (ELFType == ELF::SHT_PROGBITS &&
236 (ELFFlags & (ELF::SHF_ALLOC | ELF::SHF_WRITE)));
237 return getSectionRef().isData();
238 }
isBSS()239 bool isBSS() const {
240 return (ELFType == ELF::SHT_NOBITS &&
241 (ELFFlags & (ELF::SHF_ALLOC | ELF::SHF_WRITE)));
242 }
isTLS()243 bool isTLS() const { return (ELFFlags & ELF::SHF_TLS); }
isTBSS()244 bool isTBSS() const { return isBSS() && isTLS(); }
isVirtual()245 bool isVirtual() const { return ELFType == ELF::SHT_NOBITS; }
isRela()246 bool isRela() const { return ELFType == ELF::SHT_RELA; }
isReadOnly()247 bool isReadOnly() const {
248 return ((ELFFlags & ELF::SHF_ALLOC) && !(ELFFlags & ELF::SHF_WRITE) &&
249 ELFType == ELF::SHT_PROGBITS);
250 }
isAllocatable()251 bool isAllocatable() const {
252 if (isELF()) {
253 return (ELFFlags & ELF::SHF_ALLOC) && !isTBSS();
254 } else {
255 // On non-ELF assume all sections are allocatable.
256 return true;
257 }
258 }
isReordered()259 bool isReordered() const { return IsReordered; }
isAnonymous()260 bool isAnonymous() const { return IsAnonymous; }
getELFType()261 unsigned getELFType() const { return ELFType; }
getELFFlags()262 unsigned getELFFlags() const { return ELFFlags; }
263
getData()264 uint8_t *getData() {
265 return reinterpret_cast<uint8_t *>(
266 const_cast<char *>(getContents().data()));
267 }
getData()268 const uint8_t *getData() const {
269 return reinterpret_cast<const uint8_t *>(getContents().data());
270 }
getContents()271 StringRef getContents() const { return Contents; }
clearContents()272 void clearContents() { Contents = {}; }
hasSectionRef()273 bool hasSectionRef() const { return Section != SectionRef(); }
getSectionRef()274 SectionRef getSectionRef() const { return Section; }
275
276 /// Does this section contain the given \p Address?
277 /// Note: this is in terms of the original mapped binary addresses.
containsAddress(uint64_t Address)278 bool containsAddress(uint64_t Address) const {
279 return (getAddress() <= Address && Address < getEndAddress()) ||
280 (getSize() == 0 && getAddress() == Address);
281 }
282
283 /// Does this section contain the range [\p Address, \p Address + \p Size)?
284 /// Note: this is in terms of the original mapped binary addresses.
containsRange(uint64_t Address,uint64_t Size)285 bool containsRange(uint64_t Address, uint64_t Size) const {
286 return containsAddress(Address) && Address + Size <= getEndAddress();
287 }
288
289 /// Iterate over all non-pending relocations for this section.
relocations()290 iterator_range<RelocationSetType::iterator> relocations() {
291 return make_range(Relocations.begin(), Relocations.end());
292 }
293
294 /// Iterate over all non-pending relocations for this section.
relocations()295 iterator_range<RelocationSetType::const_iterator> relocations() const {
296 return make_range(Relocations.begin(), Relocations.end());
297 }
298
299 /// Iterate over all dynamic relocations for this section.
dynamicRelocations()300 iterator_range<RelocationSetType::iterator> dynamicRelocations() {
301 return make_range(DynamicRelocations.begin(), DynamicRelocations.end());
302 }
303
304 /// Iterate over all dynamic relocations for this section.
dynamicRelocations()305 iterator_range<RelocationSetType::const_iterator> dynamicRelocations() const {
306 return make_range(DynamicRelocations.begin(), DynamicRelocations.end());
307 }
308
309 /// Does this section have any non-pending relocations?
hasRelocations()310 bool hasRelocations() const { return !Relocations.empty(); }
311
312 /// Does this section have any pending relocations?
hasPendingRelocations()313 bool hasPendingRelocations() const { return !PendingRelocations.empty(); }
314
315 /// Remove non-pending relocation with the given /p Offset.
removeRelocationAt(uint64_t Offset)316 bool removeRelocationAt(uint64_t Offset) {
317 auto Itr = Relocations.find(Offset);
318 if (Itr != Relocations.end()) {
319 Relocations.erase(Itr);
320 return true;
321 }
322 return false;
323 }
324
325 void clearRelocations();
326
327 /// Add a new relocation at the given /p Offset.
328 void addRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type,
329 uint64_t Addend, uint64_t Value = 0,
330 bool Pending = false) {
331 assert(Offset < getSize() && "offset not within section bounds");
332 if (!Pending) {
333 Relocations.emplace(Relocation{Offset, Symbol, Type, Addend, Value});
334 } else {
335 PendingRelocations.emplace_back(
336 Relocation{Offset, Symbol, Type, Addend, Value});
337 }
338 }
339
340 /// Add a dynamic relocation at the given /p Offset.
341 void addDynamicRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type,
342 uint64_t Addend, uint64_t Value = 0) {
343 assert(Offset < getSize() && "offset not within section bounds");
344 DynamicRelocations.emplace(Relocation{Offset, Symbol, Type, Addend, Value});
345 }
346
347 /// Add relocation against the original contents of this section.
addPendingRelocation(const Relocation & Rel)348 void addPendingRelocation(const Relocation &Rel) {
349 PendingRelocations.push_back(Rel);
350 }
351
352 /// Add patch to the input contents of this section.
addPatch(uint64_t Offset,const SmallVectorImpl<char> & Bytes)353 void addPatch(uint64_t Offset, const SmallVectorImpl<char> &Bytes) {
354 Patches.emplace_back(BinaryPatch(Offset, Bytes));
355 }
356
357 /// Register patcher for this section.
registerPatcher(std::unique_ptr<BinaryPatcher> BPatcher)358 void registerPatcher(std::unique_ptr<BinaryPatcher> BPatcher) {
359 Patcher = std::move(BPatcher);
360 }
361
362 /// Returns the patcher
getPatcher()363 BinaryPatcher *getPatcher() { return Patcher.get(); }
364
365 /// Lookup the relocation (if any) at the given /p Offset.
getRelocationAt(uint64_t Offset)366 const Relocation *getRelocationAt(uint64_t Offset) const {
367 auto Itr = Relocations.find(Offset);
368 return Itr != Relocations.end() ? &*Itr : nullptr;
369 }
370
371 /// Lookup the relocation (if any) at the given /p Offset.
getDynamicRelocationAt(uint64_t Offset)372 const Relocation *getDynamicRelocationAt(uint64_t Offset) const {
373 Relocation Key{Offset, 0, 0, 0, 0};
374 auto Itr = DynamicRelocations.find(Key);
375 return Itr != DynamicRelocations.end() ? &*Itr : nullptr;
376 }
377
hash(const BinaryData & BD)378 uint64_t hash(const BinaryData &BD) const {
379 std::map<const BinaryData *, uint64_t> Cache;
380 return hash(BD, Cache);
381 }
382
383 ///
384 /// Property accessors related to output data.
385 ///
386
isFinalized()387 bool isFinalized() const { return IsFinalized; }
setIsFinalized()388 void setIsFinalized() { IsFinalized = true; }
getOutputName()389 StringRef getOutputName() const { return OutputName; }
getOutputSize()390 uint64_t getOutputSize() const { return OutputSize; }
getOutputData()391 uint8_t *getOutputData() {
392 return reinterpret_cast<uint8_t *>(
393 const_cast<char *>(getOutputContents().data()));
394 }
getOutputData()395 const uint8_t *getOutputData() const {
396 return reinterpret_cast<const uint8_t *>(getOutputContents().data());
397 }
getOutputContents()398 StringRef getOutputContents() const { return OutputContents; }
getAllocAddress()399 uint64_t getAllocAddress() const {
400 return reinterpret_cast<uint64_t>(getOutputData());
401 }
getOutputAddress()402 uint64_t getOutputAddress() const { return OutputAddress; }
getOutputFileOffset()403 uint64_t getOutputFileOffset() const { return OutputFileOffset; }
getSectionID()404 unsigned getSectionID() const {
405 assert(hasValidSectionID() && "trying to use uninitialized section id");
406 return SectionID;
407 }
hasValidSectionID()408 bool hasValidSectionID() const { return SectionID != -1u; }
getIndex()409 uint32_t getIndex() const { return Index; }
410
411 // mutation
setOutputAddress(uint64_t Address)412 void setOutputAddress(uint64_t Address) { OutputAddress = Address; }
setOutputFileOffset(uint64_t Offset)413 void setOutputFileOffset(uint64_t Offset) { OutputFileOffset = Offset; }
setSectionID(unsigned ID)414 void setSectionID(unsigned ID) {
415 assert(!hasValidSectionID() && "trying to set section id twice");
416 SectionID = ID;
417 }
setIndex(uint32_t I)418 void setIndex(uint32_t I) { Index = I; }
setOutputName(StringRef Name)419 void setOutputName(StringRef Name) { OutputName = std::string(Name); }
setAnonymous(bool Flag)420 void setAnonymous(bool Flag) { IsAnonymous = Flag; }
421
422 /// Emit the section as data, possibly with relocations. Use name \p NewName
423 // for the section during emission if non-empty.
424 void emitAsData(MCStreamer &Streamer, StringRef NewName = StringRef()) const;
425
426 using SymbolResolverFuncTy = llvm::function_ref<uint64_t(const MCSymbol *)>;
427
428 /// Flush all pending relocations to patch original contents of sections
429 /// that were not emitted via MCStreamer.
430 void flushPendingRelocations(raw_pwrite_stream &OS,
431 SymbolResolverFuncTy Resolver);
432
433 /// Reorder the contents of this section according to /p Order. If
434 /// /p Inplace is true, the entire contents of the section is reordered,
435 /// otherwise the new contents contain only the reordered data.
436 void reorderContents(const std::vector<BinaryData *> &Order, bool Inplace);
437
438 void print(raw_ostream &OS) const;
439
440 /// Write the contents of an ELF note section given the name of the producer,
441 /// a number identifying the type of note and the contents of the note in
442 /// \p DescStr.
443 static std::string encodeELFNote(StringRef NameStr, StringRef DescStr,
444 uint32_t Type);
445
446 /// Code for ELF notes written by producer 'BOLT'
447 enum { NT_BOLT_BAT = 1, NT_BOLT_INSTRUMENTATION_TABLES = 2 };
448 };
449
copyByteArray(const uint8_t * Data,uint64_t Size)450 inline uint8_t *copyByteArray(const uint8_t *Data, uint64_t Size) {
451 auto *Array = new uint8_t[Size];
452 memcpy(Array, Data, Size);
453 return Array;
454 }
455
copyByteArray(StringRef Buffer)456 inline uint8_t *copyByteArray(StringRef Buffer) {
457 return copyByteArray(reinterpret_cast<const uint8_t *>(Buffer.data()),
458 Buffer.size());
459 }
460
copyByteArray(ArrayRef<char> Buffer)461 inline uint8_t *copyByteArray(ArrayRef<char> Buffer) {
462 return copyByteArray(reinterpret_cast<const uint8_t *>(Buffer.data()),
463 Buffer.size());
464 }
465
466 inline raw_ostream &operator<<(raw_ostream &OS, const BinarySection &Section) {
467 Section.print(OS);
468 return OS;
469 }
470
471 struct SDTMarkerInfo {
472 uint64_t PC;
473 uint64_t Base;
474 uint64_t Semaphore;
475 StringRef Provider;
476 StringRef Name;
477 StringRef Args;
478
479 /// The offset of PC within the note section
480 unsigned PCOffset;
481 };
482
483 /// Linux Kernel special sections point to a specific instruction in many cases.
484 /// Unlike SDTMarkerInfo, these markers can come from different sections.
485 struct LKInstructionMarkerInfo {
486 uint64_t SectionOffset;
487 int32_t PCRelativeOffset;
488 bool IsPCRelative;
489 StringRef SectionName;
490 };
491
492 } // namespace bolt
493 } // namespace llvm
494
495 #endif
496