1 //===- bolt/Core/BinaryData.h - Objects in a binary file --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the declaration of the BinaryData class, which represents
10 // an allocatable entity in a binary file, such as a data object, a jump table,
11 // or a function.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef BOLT_CORE_BINARY_DATA_H
16 #define BOLT_CORE_BINARY_DATA_H
17 
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/MC/MCSymbol.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include <algorithm>
22 #include <string>
23 #include <vector>
24 
25 namespace llvm {
26 namespace bolt {
27 
28 class BinarySection;
29 
30 /// \p BinaryData represents an indivisible part of a data section section.
31 /// BinaryData's may contain sub-components, e.g. jump tables but they are
32 /// considered to be part of the parent symbol in terms of divisibility and
33 /// reordering.
34 class BinaryData {
35   friend class BinaryContext;
36   /// Non-null if this BinaryData is contained in a larger BinaryData object,
37   /// i.e. the start and end addresses are contained within another object.
38   BinaryData *Parent{nullptr};
39 
40   // non-copyable
41   BinaryData() = delete;
42   BinaryData(const BinaryData &) = delete;
43   BinaryData &operator=(const BinaryData &) = delete;
44 
45 protected:
46   /// All symbols associated with this data.
47   std::vector<MCSymbol *> Symbols;
48 
49   /// Section this data belongs to.
50   BinarySection *Section{nullptr};
51 
52   /// Start address of this symbol.
53   uint64_t Address{0};
54   /// Size of this data (can be 0).
55   uint64_t Size{0};
56   /// Alignment of this data.
57   uint16_t Alignment{1};
58 
59   bool IsMoveable{true};
60 
61   /// Symbol flags (same as llvm::SymbolRef::Flags)
62   unsigned Flags{0};
63 
64   /// Output section for this data if it has been moved from the original
65   /// section.
66   BinarySection *OutputSection{nullptr};
67 
68   /// The offset of this symbol in the output section.  This is different
69   /// from \p Address - Section.getAddress() when the data has been reordered.
70   uint64_t OutputOffset{0};
71 
getRootData()72   BinaryData *getRootData() {
73     BinaryData *BD = this;
74     while (BD->Parent)
75       BD = BD->Parent;
76     return BD;
77   }
78 
79 public:
80   BinaryData(BinaryData &&) = default;
81   BinaryData(MCSymbol &Symbol, uint64_t Address, uint64_t Size,
82              uint16_t Alignment, BinarySection &Section, unsigned Flags = 0);
~BinaryData()83   virtual ~BinaryData() {}
84 
isJumpTable()85   virtual bool isJumpTable() const { return false; }
isObject()86   virtual bool isObject() const { return !isJumpTable(); }
87   virtual void merge(const BinaryData *Other);
88 
isTopLevelJumpTable()89   bool isTopLevelJumpTable() const {
90     return (isJumpTable() &&
91             (!Parent || (!Parent->Parent && Parent->isObject())));
92   }
93 
94   // BinaryData that is considered atomic and potentially moveable.  All
95   // MemInfo data and relocations should be wrt. to atomic data.
isAtomic()96   bool isAtomic() const { return isTopLevelJumpTable() || !Parent; }
97 
symbols()98   iterator_range<std::vector<MCSymbol *>::const_iterator> symbols() const {
99     return make_range(Symbols.begin(), Symbols.end());
100   }
101 
getName()102   StringRef getName() const { return getSymbol()->getName(); }
103 
getSymbol()104   MCSymbol *getSymbol() { return Symbols.front(); }
getSymbol()105   const MCSymbol *getSymbol() const { return Symbols.front(); }
106 
getSymbols()107   const std::vector<MCSymbol *> &getSymbols() const { return Symbols; }
getSymbols()108   std::vector<MCSymbol *> &getSymbols() { return Symbols; }
109 
110   bool hasName(StringRef Name) const;
111   bool hasNameRegex(StringRef Name) const;
112   bool nameStartsWith(StringRef Prefix) const;
113 
hasSymbol(const MCSymbol * Symbol)114   bool hasSymbol(const MCSymbol *Symbol) const {
115     return llvm::is_contained(Symbols, Symbol);
116   }
117 
118   bool isAbsolute() const;
119   bool isMoveable() const;
120 
getAddress()121   uint64_t getAddress() const { return Address; }
getEndAddress()122   uint64_t getEndAddress() const { return Address + Size; }
123   uint64_t getOffset() const;
getSize()124   uint64_t getSize() const { return Size; }
getAlignment()125   uint16_t getAlignment() const { return Alignment; }
126 
getSection()127   BinarySection &getSection() { return *Section; }
getSection()128   const BinarySection &getSection() const { return *Section; }
129   StringRef getSectionName() const;
130 
getOutputSection()131   BinarySection &getOutputSection() { return *OutputSection; }
getOutputSection()132   const BinarySection &getOutputSection() const { return *OutputSection; }
133   StringRef getOutputSectionName() const;
134   uint64_t getOutputAddress() const;
getOutputOffset()135   uint64_t getOutputOffset() const { return OutputOffset; }
getOutputSize()136   uint64_t getOutputSize() const { return Size; }
137 
138   bool isMoved() const;
containsAddress(uint64_t Address)139   bool containsAddress(uint64_t Address) const {
140     return ((getAddress() <= Address && Address < getEndAddress()) ||
141             (getAddress() == Address && !getSize()));
142   }
containsRange(uint64_t Address,uint64_t Size)143   bool containsRange(uint64_t Address, uint64_t Size) const {
144     return containsAddress(Address) && Address + Size <= getEndAddress();
145   }
146 
getParent()147   const BinaryData *getParent() const { return Parent; }
148 
getRootData()149   const BinaryData *getRootData() const {
150     const BinaryData *BD = this;
151     while (BD->Parent)
152       BD = BD->Parent;
153     return BD;
154   }
155 
getAtomicRoot()156   BinaryData *getAtomicRoot() {
157     BinaryData *BD = this;
158     while (!BD->isAtomic() && BD->Parent)
159       BD = BD->Parent;
160     return BD;
161   }
162 
getAtomicRoot()163   const BinaryData *getAtomicRoot() const {
164     const BinaryData *BD = this;
165     while (!BD->isAtomic() && BD->Parent)
166       BD = BD->Parent;
167     return BD;
168   }
169 
isAncestorOf(const BinaryData * BD)170   bool isAncestorOf(const BinaryData *BD) const {
171     return Parent && (Parent == BD || Parent->isAncestorOf(BD));
172   }
173 
setIsMoveable(bool Flag)174   void setIsMoveable(bool Flag) { IsMoveable = Flag; }
175   void setSection(BinarySection &NewSection);
setOutputSection(BinarySection & NewSection)176   void setOutputSection(BinarySection &NewSection) {
177     OutputSection = &NewSection;
178   }
setOutputOffset(uint64_t Offset)179   void setOutputOffset(uint64_t Offset) { OutputOffset = Offset; }
setOutputLocation(BinarySection & NewSection,uint64_t NewOffset)180   void setOutputLocation(BinarySection &NewSection, uint64_t NewOffset) {
181     setOutputSection(NewSection);
182     setOutputOffset(NewOffset);
183   }
184 
185   virtual void printBrief(raw_ostream &OS) const;
186   virtual void print(raw_ostream &OS) const;
187 };
188 
189 inline raw_ostream &operator<<(raw_ostream &OS, const BinaryData &BD) {
190   BD.printBrief(OS);
191   return OS;
192 }
193 
194 /// Address access info used for memory profiling.
195 struct AddressAccess {
196   BinaryData *MemoryObject; /// Object accessed or nullptr
197   uint64_t Offset;          /// Offset within the object or absolute address
198   uint64_t Count;           /// Number of accesses
199   bool operator==(const AddressAccess &Other) const {
200     return MemoryObject == Other.MemoryObject && Offset == Other.Offset &&
201            Count == Other.Count;
202   }
203 };
204 
205 /// Aggregated memory access info per instruction.
206 struct MemoryAccessProfile {
207   uint64_t NextInstrOffset;
208   SmallVector<AddressAccess, 4> AddressAccessInfo;
209   bool operator==(const MemoryAccessProfile &Other) const {
210     return NextInstrOffset == Other.NextInstrOffset &&
211            AddressAccessInfo == Other.AddressAccessInfo;
212   }
213 };
214 
215 inline raw_ostream &operator<<(raw_ostream &OS,
216                                const bolt::MemoryAccessProfile &MAP) {
217   std::string TempString;
218   raw_string_ostream SS(TempString);
219 
220   const char *Sep = "\n        ";
221   uint64_t TotalCount = 0;
222   for (const AddressAccess &AccessInfo : MAP.AddressAccessInfo) {
223     SS << Sep << "{ ";
224     if (AccessInfo.MemoryObject)
225       SS << AccessInfo.MemoryObject->getName() << " + ";
226     SS << "0x" << Twine::utohexstr(AccessInfo.Offset) << ": "
227        << AccessInfo.Count << " }";
228     Sep = ",\n        ";
229     TotalCount += AccessInfo.Count;
230   }
231   SS.flush();
232 
233   OS << TotalCount << " total counts : " << TempString;
234   return OS;
235 }
236 
237 } // namespace bolt
238 } // namespace llvm
239 
240 #endif
241