1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_INPUT_FILES_H 10 #define LLD_MACHO_INPUT_FILES_H 11 12 #include "MachOStructs.h" 13 #include "Target.h" 14 15 #include "lld/Common/LLVM.h" 16 #include "lld/Common/Memory.h" 17 #include "llvm/ADT/CachedHashString.h" 18 #include "llvm/ADT/DenseSet.h" 19 #include "llvm/ADT/SetVector.h" 20 #include "llvm/BinaryFormat/MachO.h" 21 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 22 #include "llvm/Object/Archive.h" 23 #include "llvm/Support/MemoryBuffer.h" 24 #include "llvm/TextAPI/TextAPIReader.h" 25 26 #include <vector> 27 28 namespace llvm { 29 namespace lto { 30 class InputFile; 31 } // namespace lto 32 namespace MachO { 33 class InterfaceFile; 34 } // namespace MachO 35 class TarWriter; 36 } // namespace llvm 37 38 namespace lld { 39 namespace macho { 40 41 struct PlatformInfo; 42 class ConcatInputSection; 43 class Symbol; 44 class Defined; 45 struct Reloc; 46 enum class RefState : uint8_t; 47 48 // If --reproduce option is given, all input files are written 49 // to this tar archive. 50 extern std::unique_ptr<llvm::TarWriter> tar; 51 52 // If .subsections_via_symbols is set, each InputSection will be split along 53 // symbol boundaries. The field offset represents the offset of the subsection 54 // from the start of the original pre-split InputSection. 55 struct Subsection { 56 uint64_t offset = 0; 57 InputSection *isec = nullptr; 58 }; 59 60 using Subsections = std::vector<Subsection>; 61 class InputFile; 62 63 struct Section { 64 InputFile *file; 65 StringRef segname; 66 StringRef name; 67 uint32_t flags; 68 uint64_t addr; 69 Subsections subsections; 70 71 Section(InputFile *file, StringRef segname, StringRef name, uint32_t flags, 72 uint64_t addr) 73 : file(file), segname(segname), name(name), flags(flags), addr(addr) {} 74 // Ensure pointers to Sections are never invalidated. 75 Section(const Section &) = delete; 76 Section &operator=(const Section &) = delete; 77 Section(Section &&) = delete; 78 Section &operator=(Section &&) = delete; 79 }; 80 81 // Represents a call graph profile edge. 82 struct CallGraphEntry { 83 // The index of the caller in the symbol table. 84 uint32_t fromIndex; 85 // The index of the callee in the symbol table. 86 uint32_t toIndex; 87 // Number of calls from callee to caller in the profile. 88 uint64_t count; 89 90 CallGraphEntry(uint32_t fromIndex, uint32_t toIndex, uint64_t count) 91 : fromIndex(fromIndex), toIndex(toIndex), count(count) {} 92 }; 93 94 class InputFile { 95 public: 96 enum Kind { 97 ObjKind, 98 OpaqueKind, 99 DylibKind, 100 ArchiveKind, 101 BitcodeKind, 102 }; 103 104 virtual ~InputFile() = default; 105 Kind kind() const { return fileKind; } 106 StringRef getName() const { return name; } 107 static void resetIdCount() { idCount = 0; } 108 109 MemoryBufferRef mb; 110 111 std::vector<Symbol *> symbols; 112 std::vector<Section *> sections; 113 114 // If not empty, this stores the name of the archive containing this file. 115 // We use this string for creating error messages. 116 std::string archiveName; 117 118 // Provides an easy way to sort InputFiles deterministically. 119 const int id; 120 121 // True if this is a lazy ObjFile or BitcodeFile. 122 bool lazy = false; 123 124 protected: 125 InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false) 126 : mb(mb), id(idCount++), lazy(lazy), fileKind(kind), 127 name(mb.getBufferIdentifier()) {} 128 129 InputFile(Kind, const llvm::MachO::InterfaceFile &); 130 131 private: 132 const Kind fileKind; 133 const StringRef name; 134 135 static int idCount; 136 }; 137 138 // .o file 139 class ObjFile final : public InputFile { 140 public: 141 ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName, 142 bool lazy = false); 143 ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const; 144 template <class LP> void parse(); 145 146 static bool classof(const InputFile *f) { return f->kind() == ObjKind; } 147 148 llvm::DWARFUnit *compileUnit = nullptr; 149 const uint32_t modTime; 150 std::vector<ConcatInputSection *> debugSections; 151 std::vector<CallGraphEntry> callGraph; 152 153 private: 154 template <class LP> void parseLazy(); 155 template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>); 156 template <class LP> 157 void parseSymbols(ArrayRef<typename LP::section> sectionHeaders, 158 ArrayRef<typename LP::nlist> nList, const char *strtab, 159 bool subsectionsViaSymbols); 160 template <class NList> 161 Symbol *parseNonSectionSymbol(const NList &sym, StringRef name); 162 template <class SectionHeader> 163 void parseRelocations(ArrayRef<SectionHeader> sectionHeaders, 164 const SectionHeader &, Section &); 165 void parseDebugInfo(); 166 void registerCompactUnwind(Section &compactUnwindSection); 167 }; 168 169 // command-line -sectcreate file 170 class OpaqueFile final : public InputFile { 171 public: 172 OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName); 173 static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; } 174 }; 175 176 // .dylib or .tbd file 177 class DylibFile final : public InputFile { 178 public: 179 // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the 180 // symbols in those sub-libraries will be available under the umbrella 181 // library's namespace. Those sub-libraries can also have their own 182 // re-exports. When loading a re-exported dylib, `umbrella` should be set to 183 // the root dylib to ensure symbols in the child library are correctly bound 184 // to the root. On the other hand, if a dylib is being directly loaded 185 // (through an -lfoo flag), then `umbrella` should be a nullptr. 186 explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella, 187 bool isBundleLoader, bool explicitlyLinked); 188 explicit DylibFile(const llvm::MachO::InterfaceFile &interface, 189 DylibFile *umbrella, bool isBundleLoader, 190 bool explicitlyLinked); 191 192 void parseLoadCommands(MemoryBufferRef mb); 193 void parseReexports(const llvm::MachO::InterfaceFile &interface); 194 bool isReferenced() const { return numReferencedSymbols > 0; } 195 196 static bool classof(const InputFile *f) { return f->kind() == DylibKind; } 197 198 StringRef installName; 199 DylibFile *exportingFile = nullptr; 200 DylibFile *umbrella; 201 SmallVector<StringRef, 2> rpaths; 202 uint32_t compatibilityVersion = 0; 203 uint32_t currentVersion = 0; 204 int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel 205 unsigned numReferencedSymbols = 0; 206 RefState refState; 207 bool reexport = false; 208 bool forceNeeded = false; 209 bool forceWeakImport = false; 210 bool deadStrippable = false; 211 bool explicitlyLinked = false; 212 // An executable can be used as a bundle loader that will load the output 213 // file being linked, and that contains symbols referenced, but not 214 // implemented in the bundle. When used like this, it is very similar 215 // to a dylib, so we've used the same class to represent it. 216 bool isBundleLoader; 217 218 private: 219 bool handleLDSymbol(StringRef originalName); 220 void handleLDPreviousSymbol(StringRef name, StringRef originalName); 221 void handleLDInstallNameSymbol(StringRef name, StringRef originalName); 222 void handleLDHideSymbol(StringRef name, StringRef originalName); 223 void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const; 224 225 llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols; 226 }; 227 228 // .a file 229 class ArchiveFile final : public InputFile { 230 public: 231 explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file); 232 void addLazySymbols(); 233 void fetch(const llvm::object::Archive::Symbol &); 234 // LLD normally doesn't use Error for error-handling, but the underlying 235 // Archive library does, so this is the cleanest way to wrap it. 236 Error fetch(const llvm::object::Archive::Child &, StringRef reason); 237 const llvm::object::Archive &getArchive() const { return *file; }; 238 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 239 240 private: 241 std::unique_ptr<llvm::object::Archive> file; 242 // Keep track of children fetched from the archive by tracking 243 // which address offsets have been fetched already. 244 llvm::DenseSet<uint64_t> seen; 245 }; 246 247 class BitcodeFile final : public InputFile { 248 public: 249 explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName, 250 uint64_t offsetInArchive, bool lazy = false); 251 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 252 void parse(); 253 254 std::unique_ptr<llvm::lto::InputFile> obj; 255 256 private: 257 void parseLazy(); 258 }; 259 260 extern llvm::SetVector<InputFile *> inputFiles; 261 extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads; 262 263 llvm::Optional<MemoryBufferRef> readFile(StringRef path); 264 265 void extract(InputFile &file, StringRef reason); 266 267 namespace detail { 268 269 template <class CommandType, class... Types> 270 std::vector<const CommandType *> 271 findCommands(const void *anyHdr, size_t maxCommands, Types... types) { 272 std::vector<const CommandType *> cmds; 273 std::initializer_list<uint32_t> typesList{types...}; 274 const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr); 275 const uint8_t *p = 276 reinterpret_cast<const uint8_t *>(hdr) + target->headerSize; 277 for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) { 278 auto *cmd = reinterpret_cast<const CommandType *>(p); 279 if (llvm::is_contained(typesList, cmd->cmd)) { 280 cmds.push_back(cmd); 281 if (cmds.size() == maxCommands) 282 return cmds; 283 } 284 p += cmd->cmdsize; 285 } 286 return cmds; 287 } 288 289 } // namespace detail 290 291 // anyHdr should be a pointer to either mach_header or mach_header_64 292 template <class CommandType = llvm::MachO::load_command, class... Types> 293 const CommandType *findCommand(const void *anyHdr, Types... types) { 294 std::vector<const CommandType *> cmds = 295 detail::findCommands<CommandType>(anyHdr, 1, types...); 296 return cmds.size() ? cmds[0] : nullptr; 297 } 298 299 template <class CommandType = llvm::MachO::load_command, class... Types> 300 std::vector<const CommandType *> findCommands(const void *anyHdr, 301 Types... types) { 302 return detail::findCommands<CommandType>(anyHdr, 0, types...); 303 } 304 305 } // namespace macho 306 307 std::string toString(const macho::InputFile *file); 308 std::string toString(const macho::Section &); 309 } // namespace lld 310 311 #endif 312