1 //===- bolt/Core/BinaryData.h - Objects in a binary file --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the declaration of the BinaryData class, which represents 10 // an allocatable entity in a binary file, such as a data object, a jump table, 11 // or a function. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef BOLT_CORE_BINARY_DATA_H 16 #define BOLT_CORE_BINARY_DATA_H 17 18 #include "llvm/ADT/Twine.h" 19 #include "llvm/MC/MCSymbol.h" 20 #include "llvm/Support/raw_ostream.h" 21 #include <algorithm> 22 #include <string> 23 #include <vector> 24 25 namespace llvm { 26 namespace bolt { 27 28 class BinarySection; 29 30 /// \p BinaryData represents an indivisible part of a data section section. 31 /// BinaryData's may contain sub-components, e.g. jump tables but they are 32 /// considered to be part of the parent symbol in terms of divisibility and 33 /// reordering. 34 class BinaryData { 35 friend class BinaryContext; 36 /// Non-null if this BinaryData is contained in a larger BinaryData object, 37 /// i.e. the start and end addresses are contained within another object. 38 BinaryData *Parent{nullptr}; 39 40 // non-copyable 41 BinaryData() = delete; 42 BinaryData(const BinaryData &) = delete; 43 BinaryData &operator=(const BinaryData &) = delete; 44 45 protected: 46 /// All symbols associated with this data. 47 std::vector<MCSymbol *> Symbols; 48 49 /// Section this data belongs to. 50 BinarySection *Section{nullptr}; 51 52 /// Start address of this symbol. 53 uint64_t Address{0}; 54 /// Size of this data (can be 0). 55 uint64_t Size{0}; 56 /// Alignment of this data. 57 uint16_t Alignment{1}; 58 59 bool IsMoveable{true}; 60 61 /// Symbol flags (same as llvm::SymbolRef::Flags) 62 unsigned Flags{0}; 63 64 /// Output section for this data if it has been moved from the original 65 /// section. 66 BinarySection *OutputSection{nullptr}; 67 68 /// The offset of this symbol in the output section. This is different 69 /// from \p Address - Section.getAddress() when the data has been reordered. 70 uint64_t OutputOffset{0}; 71 getRootData()72 BinaryData *getRootData() { 73 BinaryData *BD = this; 74 while (BD->Parent) 75 BD = BD->Parent; 76 return BD; 77 } 78 79 public: 80 BinaryData(BinaryData &&) = default; 81 BinaryData(MCSymbol &Symbol, uint64_t Address, uint64_t Size, 82 uint16_t Alignment, BinarySection &Section, unsigned Flags = 0); ~BinaryData()83 virtual ~BinaryData() {} 84 isJumpTable()85 virtual bool isJumpTable() const { return false; } isObject()86 virtual bool isObject() const { return !isJumpTable(); } 87 virtual void merge(const BinaryData *Other); 88 isTopLevelJumpTable()89 bool isTopLevelJumpTable() const { 90 return (isJumpTable() && 91 (!Parent || (!Parent->Parent && Parent->isObject()))); 92 } 93 94 // BinaryData that is considered atomic and potentially moveable. All 95 // MemInfo data and relocations should be wrt. to atomic data. isAtomic()96 bool isAtomic() const { return isTopLevelJumpTable() || !Parent; } 97 symbols()98 iterator_range<std::vector<MCSymbol *>::const_iterator> symbols() const { 99 return make_range(Symbols.begin(), Symbols.end()); 100 } 101 getName()102 StringRef getName() const { return getSymbol()->getName(); } 103 getSymbol()104 MCSymbol *getSymbol() { return Symbols.front(); } getSymbol()105 const MCSymbol *getSymbol() const { return Symbols.front(); } 106 getSymbols()107 const std::vector<MCSymbol *> &getSymbols() const { return Symbols; } getSymbols()108 std::vector<MCSymbol *> &getSymbols() { return Symbols; } 109 110 bool hasName(StringRef Name) const; 111 bool hasNameRegex(StringRef Name) const; 112 bool nameStartsWith(StringRef Prefix) const; 113 hasSymbol(const MCSymbol * Symbol)114 bool hasSymbol(const MCSymbol *Symbol) const { 115 return llvm::is_contained(Symbols, Symbol); 116 } 117 118 bool isAbsolute() const; 119 bool isMoveable() const; 120 getAddress()121 uint64_t getAddress() const { return Address; } getEndAddress()122 uint64_t getEndAddress() const { return Address + Size; } 123 uint64_t getOffset() const; getSize()124 uint64_t getSize() const { return Size; } getAlignment()125 uint16_t getAlignment() const { return Alignment; } 126 getSection()127 BinarySection &getSection() { return *Section; } getSection()128 const BinarySection &getSection() const { return *Section; } 129 StringRef getSectionName() const; 130 getOutputSection()131 BinarySection &getOutputSection() { return *OutputSection; } getOutputSection()132 const BinarySection &getOutputSection() const { return *OutputSection; } 133 StringRef getOutputSectionName() const; 134 uint64_t getOutputAddress() const; getOutputOffset()135 uint64_t getOutputOffset() const { return OutputOffset; } getOutputSize()136 uint64_t getOutputSize() const { return Size; } 137 138 bool isMoved() const; containsAddress(uint64_t Address)139 bool containsAddress(uint64_t Address) const { 140 return ((getAddress() <= Address && Address < getEndAddress()) || 141 (getAddress() == Address && !getSize())); 142 } containsRange(uint64_t Address,uint64_t Size)143 bool containsRange(uint64_t Address, uint64_t Size) const { 144 return containsAddress(Address) && Address + Size <= getEndAddress(); 145 } 146 getParent()147 const BinaryData *getParent() const { return Parent; } 148 getRootData()149 const BinaryData *getRootData() const { 150 const BinaryData *BD = this; 151 while (BD->Parent) 152 BD = BD->Parent; 153 return BD; 154 } 155 getAtomicRoot()156 BinaryData *getAtomicRoot() { 157 BinaryData *BD = this; 158 while (!BD->isAtomic() && BD->Parent) 159 BD = BD->Parent; 160 return BD; 161 } 162 getAtomicRoot()163 const BinaryData *getAtomicRoot() const { 164 const BinaryData *BD = this; 165 while (!BD->isAtomic() && BD->Parent) 166 BD = BD->Parent; 167 return BD; 168 } 169 isAncestorOf(const BinaryData * BD)170 bool isAncestorOf(const BinaryData *BD) const { 171 return Parent && (Parent == BD || Parent->isAncestorOf(BD)); 172 } 173 setIsMoveable(bool Flag)174 void setIsMoveable(bool Flag) { IsMoveable = Flag; } 175 void setSection(BinarySection &NewSection); setOutputSection(BinarySection & NewSection)176 void setOutputSection(BinarySection &NewSection) { 177 OutputSection = &NewSection; 178 } setOutputOffset(uint64_t Offset)179 void setOutputOffset(uint64_t Offset) { OutputOffset = Offset; } setOutputLocation(BinarySection & NewSection,uint64_t NewOffset)180 void setOutputLocation(BinarySection &NewSection, uint64_t NewOffset) { 181 setOutputSection(NewSection); 182 setOutputOffset(NewOffset); 183 } 184 185 virtual void printBrief(raw_ostream &OS) const; 186 virtual void print(raw_ostream &OS) const; 187 }; 188 189 inline raw_ostream &operator<<(raw_ostream &OS, const BinaryData &BD) { 190 BD.printBrief(OS); 191 return OS; 192 } 193 194 /// Address access info used for memory profiling. 195 struct AddressAccess { 196 BinaryData *MemoryObject; /// Object accessed or nullptr 197 uint64_t Offset; /// Offset within the object or absolute address 198 uint64_t Count; /// Number of accesses 199 bool operator==(const AddressAccess &Other) const { 200 return MemoryObject == Other.MemoryObject && Offset == Other.Offset && 201 Count == Other.Count; 202 } 203 }; 204 205 /// Aggregated memory access info per instruction. 206 struct MemoryAccessProfile { 207 uint64_t NextInstrOffset; 208 SmallVector<AddressAccess, 4> AddressAccessInfo; 209 bool operator==(const MemoryAccessProfile &Other) const { 210 return NextInstrOffset == Other.NextInstrOffset && 211 AddressAccessInfo == Other.AddressAccessInfo; 212 } 213 }; 214 215 inline raw_ostream &operator<<(raw_ostream &OS, 216 const bolt::MemoryAccessProfile &MAP) { 217 std::string TempString; 218 raw_string_ostream SS(TempString); 219 220 const char *Sep = "\n "; 221 uint64_t TotalCount = 0; 222 for (const AddressAccess &AccessInfo : MAP.AddressAccessInfo) { 223 SS << Sep << "{ "; 224 if (AccessInfo.MemoryObject) 225 SS << AccessInfo.MemoryObject->getName() << " + "; 226 SS << "0x" << Twine::utohexstr(AccessInfo.Offset) << ": " 227 << AccessInfo.Count << " }"; 228 Sep = ",\n "; 229 TotalCount += AccessInfo.Count; 230 } 231 SS.flush(); 232 233 OS << TotalCount << " total counts : " << TempString; 234 return OS; 235 } 236 237 } // namespace bolt 238 } // namespace llvm 239 240 #endif 241