1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_INPUT_FILES_H 10 #define LLD_MACHO_INPUT_FILES_H 11 12 #include "MachOStructs.h" 13 #include "Target.h" 14 15 #include "lld/Common/DWARF.h" 16 #include "lld/Common/LLVM.h" 17 #include "lld/Common/Memory.h" 18 #include "llvm/ADT/CachedHashString.h" 19 #include "llvm/ADT/DenseSet.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/BinaryFormat/MachO.h" 22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 23 #include "llvm/Object/Archive.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/Threading.h" 26 #include "llvm/TextAPI/TextAPIReader.h" 27 28 #include <vector> 29 30 namespace llvm { 31 namespace lto { 32 class InputFile; 33 } // namespace lto 34 namespace MachO { 35 class InterfaceFile; 36 } // namespace MachO 37 class TarWriter; 38 } // namespace llvm 39 40 namespace lld { 41 namespace macho { 42 43 struct PlatformInfo; 44 class ConcatInputSection; 45 class Symbol; 46 class Defined; 47 struct Reloc; 48 enum class RefState : uint8_t; 49 50 // If --reproduce option is given, all input files are written 51 // to this tar archive. 52 extern std::unique_ptr<llvm::TarWriter> tar; 53 54 // If .subsections_via_symbols is set, each InputSection will be split along 55 // symbol boundaries. The field offset represents the offset of the subsection 56 // from the start of the original pre-split InputSection. 57 struct Subsection { 58 uint64_t offset = 0; 59 InputSection *isec = nullptr; 60 }; 61 62 using Subsections = std::vector<Subsection>; 63 class InputFile; 64 65 class Section { 66 public: 67 InputFile *file; 68 StringRef segname; 69 StringRef name; 70 uint32_t flags; 71 uint64_t addr; 72 Subsections subsections; 73 74 Section(InputFile *file, StringRef segname, StringRef name, uint32_t flags, 75 uint64_t addr) 76 : file(file), segname(segname), name(name), flags(flags), addr(addr) {} 77 // Ensure pointers to Sections are never invalidated. 78 Section(const Section &) = delete; 79 Section &operator=(const Section &) = delete; 80 Section(Section &&) = delete; 81 Section &operator=(Section &&) = delete; 82 83 private: 84 // Whether we have already split this section into individual subsections. 85 // For sections that cannot be split (e.g. literal sections), this is always 86 // false. 87 bool doneSplitting = false; 88 friend class ObjFile; 89 }; 90 91 // Represents a call graph profile edge. 92 struct CallGraphEntry { 93 // The index of the caller in the symbol table. 94 uint32_t fromIndex; 95 // The index of the callee in the symbol table. 96 uint32_t toIndex; 97 // Number of calls from callee to caller in the profile. 98 uint64_t count; 99 100 CallGraphEntry(uint32_t fromIndex, uint32_t toIndex, uint64_t count) 101 : fromIndex(fromIndex), toIndex(toIndex), count(count) {} 102 }; 103 104 class InputFile { 105 public: 106 enum Kind { 107 ObjKind, 108 OpaqueKind, 109 DylibKind, 110 ArchiveKind, 111 BitcodeKind, 112 }; 113 114 virtual ~InputFile() = default; 115 Kind kind() const { return fileKind; } 116 StringRef getName() const { return name; } 117 static void resetIdCount() { idCount = 0; } 118 119 MemoryBufferRef mb; 120 121 std::vector<Symbol *> symbols; 122 std::vector<Section *> sections; 123 124 // If not empty, this stores the name of the archive containing this file. 125 // We use this string for creating error messages. 126 std::string archiveName; 127 128 // Provides an easy way to sort InputFiles deterministically. 129 const int id; 130 131 // True if this is a lazy ObjFile or BitcodeFile. 132 bool lazy = false; 133 134 protected: 135 InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false) 136 : mb(mb), id(idCount++), lazy(lazy), fileKind(kind), 137 name(mb.getBufferIdentifier()) {} 138 139 InputFile(Kind, const llvm::MachO::InterfaceFile &); 140 141 private: 142 const Kind fileKind; 143 const StringRef name; 144 145 static int idCount; 146 }; 147 148 struct FDE { 149 uint32_t funcLength; 150 Symbol *personality; 151 InputSection *lsda; 152 }; 153 154 // .o file 155 class ObjFile final : public InputFile { 156 public: 157 ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName, 158 bool lazy = false); 159 ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const; 160 template <class LP> void parse(); 161 162 static bool classof(const InputFile *f) { return f->kind() == ObjKind; } 163 164 std::string sourceFile() const; 165 // Parses line table information for diagnostics. compileUnit should be used 166 // for other purposes. 167 lld::DWARFCache *getDwarf(); 168 169 llvm::DWARFUnit *compileUnit = nullptr; 170 std::unique_ptr<lld::DWARFCache> dwarfCache; 171 Section *addrSigSection = nullptr; 172 const uint32_t modTime; 173 std::vector<ConcatInputSection *> debugSections; 174 std::vector<CallGraphEntry> callGraph; 175 llvm::DenseMap<ConcatInputSection *, FDE> fdes; 176 std::vector<OptimizationHint> optimizationHints; 177 178 private: 179 llvm::once_flag initDwarf; 180 template <class LP> void parseLazy(); 181 template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>); 182 template <class LP> 183 void parseSymbols(ArrayRef<typename LP::section> sectionHeaders, 184 ArrayRef<typename LP::nlist> nList, const char *strtab, 185 bool subsectionsViaSymbols); 186 template <class NList> 187 Symbol *parseNonSectionSymbol(const NList &sym, StringRef name); 188 template <class SectionHeader> 189 void parseRelocations(ArrayRef<SectionHeader> sectionHeaders, 190 const SectionHeader &, Section &); 191 void parseDebugInfo(); 192 void parseOptimizationHints(ArrayRef<uint8_t> data); 193 void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection); 194 void registerCompactUnwind(Section &compactUnwindSection); 195 void registerEhFrames(Section &ehFrameSection); 196 }; 197 198 // command-line -sectcreate file 199 class OpaqueFile final : public InputFile { 200 public: 201 OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName); 202 static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; } 203 }; 204 205 // .dylib or .tbd file 206 class DylibFile final : public InputFile { 207 public: 208 // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the 209 // symbols in those sub-libraries will be available under the umbrella 210 // library's namespace. Those sub-libraries can also have their own 211 // re-exports. When loading a re-exported dylib, `umbrella` should be set to 212 // the root dylib to ensure symbols in the child library are correctly bound 213 // to the root. On the other hand, if a dylib is being directly loaded 214 // (through an -lfoo flag), then `umbrella` should be a nullptr. 215 explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella, 216 bool isBundleLoader, bool explicitlyLinked); 217 explicit DylibFile(const llvm::MachO::InterfaceFile &interface, 218 DylibFile *umbrella, bool isBundleLoader, 219 bool explicitlyLinked); 220 221 void parseLoadCommands(MemoryBufferRef mb); 222 void parseReexports(const llvm::MachO::InterfaceFile &interface); 223 bool isReferenced() const { return numReferencedSymbols > 0; } 224 225 static bool classof(const InputFile *f) { return f->kind() == DylibKind; } 226 227 StringRef installName; 228 DylibFile *exportingFile = nullptr; 229 DylibFile *umbrella; 230 SmallVector<StringRef, 2> rpaths; 231 uint32_t compatibilityVersion = 0; 232 uint32_t currentVersion = 0; 233 int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel 234 unsigned numReferencedSymbols = 0; 235 RefState refState; 236 bool reexport = false; 237 bool forceNeeded = false; 238 bool forceWeakImport = false; 239 bool deadStrippable = false; 240 bool explicitlyLinked = false; 241 // An executable can be used as a bundle loader that will load the output 242 // file being linked, and that contains symbols referenced, but not 243 // implemented in the bundle. When used like this, it is very similar 244 // to a dylib, so we've used the same class to represent it. 245 bool isBundleLoader; 246 247 private: 248 bool handleLDSymbol(StringRef originalName); 249 void handleLDPreviousSymbol(StringRef name, StringRef originalName); 250 void handleLDInstallNameSymbol(StringRef name, StringRef originalName); 251 void handleLDHideSymbol(StringRef name, StringRef originalName); 252 void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const; 253 254 llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols; 255 }; 256 257 // .a file 258 class ArchiveFile final : public InputFile { 259 public: 260 explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file); 261 void addLazySymbols(); 262 void fetch(const llvm::object::Archive::Symbol &); 263 // LLD normally doesn't use Error for error-handling, but the underlying 264 // Archive library does, so this is the cleanest way to wrap it. 265 Error fetch(const llvm::object::Archive::Child &, StringRef reason); 266 const llvm::object::Archive &getArchive() const { return *file; }; 267 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 268 269 private: 270 std::unique_ptr<llvm::object::Archive> file; 271 // Keep track of children fetched from the archive by tracking 272 // which address offsets have been fetched already. 273 llvm::DenseSet<uint64_t> seen; 274 }; 275 276 class BitcodeFile final : public InputFile { 277 public: 278 explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName, 279 uint64_t offsetInArchive, bool lazy = false); 280 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 281 void parse(); 282 283 std::unique_ptr<llvm::lto::InputFile> obj; 284 285 private: 286 void parseLazy(); 287 }; 288 289 extern llvm::SetVector<InputFile *> inputFiles; 290 extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads; 291 292 llvm::Optional<MemoryBufferRef> readFile(StringRef path); 293 294 void extract(InputFile &file, StringRef reason); 295 296 namespace detail { 297 298 template <class CommandType, class... Types> 299 std::vector<const CommandType *> 300 findCommands(const void *anyHdr, size_t maxCommands, Types... types) { 301 std::vector<const CommandType *> cmds; 302 std::initializer_list<uint32_t> typesList{types...}; 303 const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr); 304 const uint8_t *p = 305 reinterpret_cast<const uint8_t *>(hdr) + target->headerSize; 306 for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) { 307 auto *cmd = reinterpret_cast<const CommandType *>(p); 308 if (llvm::is_contained(typesList, cmd->cmd)) { 309 cmds.push_back(cmd); 310 if (cmds.size() == maxCommands) 311 return cmds; 312 } 313 p += cmd->cmdsize; 314 } 315 return cmds; 316 } 317 318 } // namespace detail 319 320 // anyHdr should be a pointer to either mach_header or mach_header_64 321 template <class CommandType = llvm::MachO::load_command, class... Types> 322 const CommandType *findCommand(const void *anyHdr, Types... types) { 323 std::vector<const CommandType *> cmds = 324 detail::findCommands<CommandType>(anyHdr, 1, types...); 325 return cmds.size() ? cmds[0] : nullptr; 326 } 327 328 template <class CommandType = llvm::MachO::load_command, class... Types> 329 std::vector<const CommandType *> findCommands(const void *anyHdr, 330 Types... types) { 331 return detail::findCommands<CommandType>(anyHdr, 0, types...); 332 } 333 334 } // namespace macho 335 336 std::string toString(const macho::InputFile *file); 337 std::string toString(const macho::Section &); 338 } // namespace lld 339 340 #endif 341