1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_INPUT_FILES_H 10 #define LLD_MACHO_INPUT_FILES_H 11 12 #include "MachOStructs.h" 13 #include "Target.h" 14 15 #include "lld/Common/LLVM.h" 16 #include "lld/Common/Memory.h" 17 #include "llvm/ADT/CachedHashString.h" 18 #include "llvm/ADT/DenseSet.h" 19 #include "llvm/ADT/SetVector.h" 20 #include "llvm/BinaryFormat/MachO.h" 21 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 22 #include "llvm/Object/Archive.h" 23 #include "llvm/Support/MemoryBuffer.h" 24 #include "llvm/TextAPI/TextAPIReader.h" 25 26 #include <vector> 27 28 namespace llvm { 29 namespace lto { 30 class InputFile; 31 } // namespace lto 32 namespace MachO { 33 class InterfaceFile; 34 } // namespace MachO 35 class TarWriter; 36 } // namespace llvm 37 38 namespace lld { 39 namespace macho { 40 41 struct PlatformInfo; 42 class ConcatInputSection; 43 class Symbol; 44 class Defined; 45 struct Reloc; 46 enum class RefState : uint8_t; 47 48 // If --reproduce option is given, all input files are written 49 // to this tar archive. 50 extern std::unique_ptr<llvm::TarWriter> tar; 51 52 // If .subsections_via_symbols is set, each InputSection will be split along 53 // symbol boundaries. The field offset represents the offset of the subsection 54 // from the start of the original pre-split InputSection. 55 struct Subsection { 56 uint64_t offset = 0; 57 InputSection *isec = nullptr; 58 }; 59 60 using Subsections = std::vector<Subsection>; 61 62 struct Section { 63 uint64_t address = 0; 64 Subsections subsections; 65 Section(uint64_t addr) : address(addr){}; 66 }; 67 68 // Represents a call graph profile edge. 69 struct CallGraphEntry { 70 // The index of the caller in the symbol table. 71 uint32_t fromIndex; 72 // The index of the callee in the symbol table. 73 uint32_t toIndex; 74 // Number of calls from callee to caller in the profile. 75 uint64_t count; 76 }; 77 78 class InputFile { 79 public: 80 enum Kind { 81 ObjKind, 82 OpaqueKind, 83 DylibKind, 84 ArchiveKind, 85 BitcodeKind, 86 }; 87 88 virtual ~InputFile() = default; 89 Kind kind() const { return fileKind; } 90 StringRef getName() const { return name; } 91 static void resetIdCount() { idCount = 0; } 92 93 MemoryBufferRef mb; 94 95 std::vector<Symbol *> symbols; 96 std::vector<Section> sections; 97 // Provides an easy way to sort InputFiles deterministically. 98 const int id; 99 100 // If not empty, this stores the name of the archive containing this file. 101 // We use this string for creating error messages. 102 std::string archiveName; 103 104 protected: 105 InputFile(Kind kind, MemoryBufferRef mb) 106 : mb(mb), id(idCount++), fileKind(kind), name(mb.getBufferIdentifier()) {} 107 108 InputFile(Kind, const llvm::MachO::InterfaceFile &); 109 110 private: 111 const Kind fileKind; 112 const StringRef name; 113 114 static int idCount; 115 }; 116 117 // .o file 118 class ObjFile final : public InputFile { 119 public: 120 ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName); 121 ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const; 122 123 static bool classof(const InputFile *f) { return f->kind() == ObjKind; } 124 125 llvm::DWARFUnit *compileUnit = nullptr; 126 const uint32_t modTime; 127 std::vector<ConcatInputSection *> debugSections; 128 std::vector<CallGraphEntry> callGraph; 129 130 private: 131 Section *compactUnwindSection = nullptr; 132 133 template <class LP> void parse(); 134 template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>); 135 template <class LP> 136 void parseSymbols(ArrayRef<typename LP::section> sectionHeaders, 137 ArrayRef<typename LP::nlist> nList, const char *strtab, 138 bool subsectionsViaSymbols); 139 template <class NList> 140 Symbol *parseNonSectionSymbol(const NList &sym, StringRef name); 141 template <class SectionHeader> 142 void parseRelocations(ArrayRef<SectionHeader> sectionHeaders, 143 const SectionHeader &, Subsections &); 144 void parseDebugInfo(); 145 void registerCompactUnwind(); 146 }; 147 148 // command-line -sectcreate file 149 class OpaqueFile final : public InputFile { 150 public: 151 OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName); 152 static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; } 153 }; 154 155 // .dylib or .tbd file 156 class DylibFile final : public InputFile { 157 public: 158 // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the 159 // symbols in those sub-libraries will be available under the umbrella 160 // library's namespace. Those sub-libraries can also have their own 161 // re-exports. When loading a re-exported dylib, `umbrella` should be set to 162 // the root dylib to ensure symbols in the child library are correctly bound 163 // to the root. On the other hand, if a dylib is being directly loaded 164 // (through an -lfoo flag), then `umbrella` should be a nullptr. 165 explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella, 166 bool isBundleLoader = false); 167 explicit DylibFile(const llvm::MachO::InterfaceFile &interface, 168 DylibFile *umbrella = nullptr, 169 bool isBundleLoader = false); 170 171 void parseLoadCommands(MemoryBufferRef mb); 172 void parseReexports(const llvm::MachO::InterfaceFile &interface); 173 174 static bool classof(const InputFile *f) { return f->kind() == DylibKind; } 175 176 StringRef installName; 177 DylibFile *exportingFile = nullptr; 178 DylibFile *umbrella; 179 SmallVector<StringRef, 2> rpaths; 180 uint32_t compatibilityVersion = 0; 181 uint32_t currentVersion = 0; 182 int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel 183 RefState refState; 184 bool reexport = false; 185 bool forceNeeded = false; 186 bool forceWeakImport = false; 187 bool deadStrippable = false; 188 bool explicitlyLinked = false; 189 190 unsigned numReferencedSymbols = 0; 191 192 bool isReferenced() const { return numReferencedSymbols > 0; } 193 194 // An executable can be used as a bundle loader that will load the output 195 // file being linked, and that contains symbols referenced, but not 196 // implemented in the bundle. When used like this, it is very similar 197 // to a Dylib, so we re-used the same class to represent it. 198 bool isBundleLoader; 199 200 private: 201 bool handleLDSymbol(StringRef originalName); 202 void handleLDPreviousSymbol(StringRef name, StringRef originalName); 203 void handleLDInstallNameSymbol(StringRef name, StringRef originalName); 204 void handleLDHideSymbol(StringRef name, StringRef originalName); 205 void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const; 206 207 llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols; 208 }; 209 210 // .a file 211 class ArchiveFile final : public InputFile { 212 public: 213 explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file); 214 void addLazySymbols(); 215 void fetch(const llvm::object::Archive::Symbol &); 216 // LLD normally doesn't use Error for error-handling, but the underlying 217 // Archive library does, so this is the cleanest way to wrap it. 218 Error fetch(const llvm::object::Archive::Child &, StringRef reason); 219 const llvm::object::Archive &getArchive() const { return *file; }; 220 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 221 222 private: 223 std::unique_ptr<llvm::object::Archive> file; 224 // Keep track of children fetched from the archive by tracking 225 // which address offsets have been fetched already. 226 llvm::DenseSet<uint64_t> seen; 227 }; 228 229 class BitcodeFile final : public InputFile { 230 public: 231 explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName, 232 uint64_t offsetInArchive); 233 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 234 235 std::unique_ptr<llvm::lto::InputFile> obj; 236 }; 237 238 extern llvm::SetVector<InputFile *> inputFiles; 239 extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads; 240 241 llvm::Optional<MemoryBufferRef> readFile(StringRef path); 242 243 namespace detail { 244 245 template <class CommandType, class... Types> 246 std::vector<const CommandType *> 247 findCommands(const void *anyHdr, size_t maxCommands, Types... types) { 248 std::vector<const CommandType *> cmds; 249 std::initializer_list<uint32_t> typesList{types...}; 250 const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr); 251 const uint8_t *p = 252 reinterpret_cast<const uint8_t *>(hdr) + target->headerSize; 253 for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) { 254 auto *cmd = reinterpret_cast<const CommandType *>(p); 255 if (llvm::is_contained(typesList, cmd->cmd)) { 256 cmds.push_back(cmd); 257 if (cmds.size() == maxCommands) 258 return cmds; 259 } 260 p += cmd->cmdsize; 261 } 262 return cmds; 263 } 264 265 } // namespace detail 266 267 // anyHdr should be a pointer to either mach_header or mach_header_64 268 template <class CommandType = llvm::MachO::load_command, class... Types> 269 const CommandType *findCommand(const void *anyHdr, Types... types) { 270 std::vector<const CommandType *> cmds = 271 detail::findCommands<CommandType>(anyHdr, 1, types...); 272 return cmds.size() ? cmds[0] : nullptr; 273 } 274 275 template <class CommandType = llvm::MachO::load_command, class... Types> 276 std::vector<const CommandType *> findCommands(const void *anyHdr, 277 Types... types) { 278 return detail::findCommands<CommandType>(anyHdr, 0, types...); 279 } 280 281 } // namespace macho 282 283 std::string toString(const macho::InputFile *file); 284 } // namespace lld 285 286 #endif 287