1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_MACHO_INPUT_FILES_H
10 #define LLD_MACHO_INPUT_FILES_H
11 
12 #include "MachOStructs.h"
13 #include "Target.h"
14 
15 #include "lld/Common/LLVM.h"
16 #include "lld/Common/Memory.h"
17 #include "llvm/ADT/CachedHashString.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/BinaryFormat/MachO.h"
21 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
22 #include "llvm/Object/Archive.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/TextAPI/TextAPIReader.h"
25 
26 #include <vector>
27 
28 namespace llvm {
29 namespace lto {
30 class InputFile;
31 } // namespace lto
32 namespace MachO {
33 class InterfaceFile;
34 } // namespace MachO
35 class TarWriter;
36 } // namespace llvm
37 
38 namespace lld {
39 namespace macho {
40 
41 struct PlatformInfo;
42 class ConcatInputSection;
43 class Symbol;
44 class Defined;
45 struct Reloc;
46 enum class RefState : uint8_t;
47 
48 // If --reproduce option is given, all input files are written
49 // to this tar archive.
50 extern std::unique_ptr<llvm::TarWriter> tar;
51 
52 // If .subsections_via_symbols is set, each InputSection will be split along
53 // symbol boundaries. The field offset represents the offset of the subsection
54 // from the start of the original pre-split InputSection.
55 struct Subsection {
56   uint64_t offset = 0;
57   InputSection *isec = nullptr;
58 };
59 
60 using Subsections = std::vector<Subsection>;
61 class InputFile;
62 
63 struct Section {
64   InputFile *file;
65   StringRef segname;
66   StringRef name;
67   uint32_t flags;
68   uint64_t addr;
69   Subsections subsections;
70 
71   Section(InputFile *file, StringRef segname, StringRef name, uint32_t flags,
72           uint64_t addr)
73       : file(file), segname(segname), name(name), flags(flags), addr(addr) {}
74   // Ensure pointers to Sections are never invalidated.
75   Section(const Section &) = delete;
76   Section &operator=(const Section &) = delete;
77   Section(Section &&) = delete;
78   Section &operator=(Section &&) = delete;
79 };
80 
81 // Represents a call graph profile edge.
82 struct CallGraphEntry {
83   // The index of the caller in the symbol table.
84   uint32_t fromIndex;
85   // The index of the callee in the symbol table.
86   uint32_t toIndex;
87   // Number of calls from callee to caller in the profile.
88   uint64_t count;
89 
90   CallGraphEntry(uint32_t fromIndex, uint32_t toIndex, uint64_t count)
91       : fromIndex(fromIndex), toIndex(toIndex), count(count) {}
92 };
93 
94 class InputFile {
95 public:
96   enum Kind {
97     ObjKind,
98     OpaqueKind,
99     DylibKind,
100     ArchiveKind,
101     BitcodeKind,
102   };
103 
104   virtual ~InputFile() = default;
105   Kind kind() const { return fileKind; }
106   StringRef getName() const { return name; }
107   static void resetIdCount() { idCount = 0; }
108 
109   MemoryBufferRef mb;
110 
111   std::vector<Symbol *> symbols;
112   std::vector<Section *> sections;
113 
114   // If not empty, this stores the name of the archive containing this file.
115   // We use this string for creating error messages.
116   std::string archiveName;
117 
118   // Provides an easy way to sort InputFiles deterministically.
119   const int id;
120 
121   // True if this is a lazy ObjFile or BitcodeFile.
122   bool lazy = false;
123 
124 protected:
125   InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false)
126       : mb(mb), id(idCount++), lazy(lazy), fileKind(kind),
127         name(mb.getBufferIdentifier()) {}
128 
129   InputFile(Kind, const llvm::MachO::InterfaceFile &);
130 
131 private:
132   const Kind fileKind;
133   const StringRef name;
134 
135   static int idCount;
136 };
137 
138 // .o file
139 class ObjFile final : public InputFile {
140 public:
141   ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
142           bool lazy = false);
143   ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
144   template <class LP> void parse();
145 
146   static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
147 
148   llvm::DWARFUnit *compileUnit = nullptr;
149   const uint32_t modTime;
150   std::vector<ConcatInputSection *> debugSections;
151   std::vector<CallGraphEntry> callGraph;
152 
153 private:
154   template <class LP> void parseLazy();
155   template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>);
156   template <class LP>
157   void parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
158                     ArrayRef<typename LP::nlist> nList, const char *strtab,
159                     bool subsectionsViaSymbols);
160   template <class NList>
161   Symbol *parseNonSectionSymbol(const NList &sym, StringRef name);
162   template <class SectionHeader>
163   void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
164                         const SectionHeader &, Section &);
165   void parseDebugInfo();
166   void registerCompactUnwind(Section &compactUnwindSection);
167 };
168 
169 // command-line -sectcreate file
170 class OpaqueFile final : public InputFile {
171 public:
172   OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName);
173   static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; }
174 };
175 
176 // .dylib or .tbd file
177 class DylibFile final : public InputFile {
178 public:
179   // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the
180   // symbols in those sub-libraries will be available under the umbrella
181   // library's namespace. Those sub-libraries can also have their own
182   // re-exports. When loading a re-exported dylib, `umbrella` should be set to
183   // the root dylib to ensure symbols in the child library are correctly bound
184   // to the root. On the other hand, if a dylib is being directly loaded
185   // (through an -lfoo flag), then `umbrella` should be a nullptr.
186   explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
187                      bool isBundleLoader, bool explicitlyLinked);
188   explicit DylibFile(const llvm::MachO::InterfaceFile &interface,
189                      DylibFile *umbrella, bool isBundleLoader,
190                      bool explicitlyLinked);
191 
192   void parseLoadCommands(MemoryBufferRef mb);
193   void parseReexports(const llvm::MachO::InterfaceFile &interface);
194   bool isReferenced() const { return numReferencedSymbols > 0; }
195 
196   static bool classof(const InputFile *f) { return f->kind() == DylibKind; }
197 
198   StringRef installName;
199   DylibFile *exportingFile = nullptr;
200   DylibFile *umbrella;
201   SmallVector<StringRef, 2> rpaths;
202   uint32_t compatibilityVersion = 0;
203   uint32_t currentVersion = 0;
204   int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
205   unsigned numReferencedSymbols = 0;
206   RefState refState;
207   bool reexport = false;
208   bool forceNeeded = false;
209   bool forceWeakImport = false;
210   bool deadStrippable = false;
211   bool explicitlyLinked = false;
212   // An executable can be used as a bundle loader that will load the output
213   // file being linked, and that contains symbols referenced, but not
214   // implemented in the bundle. When used like this, it is very similar
215   // to a dylib, so we've used the same class to represent it.
216   bool isBundleLoader;
217 
218 private:
219   bool handleLDSymbol(StringRef originalName);
220   void handleLDPreviousSymbol(StringRef name, StringRef originalName);
221   void handleLDInstallNameSymbol(StringRef name, StringRef originalName);
222   void handleLDHideSymbol(StringRef name, StringRef originalName);
223   void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const;
224 
225   llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols;
226 };
227 
228 // .a file
229 class ArchiveFile final : public InputFile {
230 public:
231   explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file);
232   void addLazySymbols();
233   void fetch(const llvm::object::Archive::Symbol &);
234   // LLD normally doesn't use Error for error-handling, but the underlying
235   // Archive library does, so this is the cleanest way to wrap it.
236   Error fetch(const llvm::object::Archive::Child &, StringRef reason);
237   const llvm::object::Archive &getArchive() const { return *file; };
238   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
239 
240 private:
241   std::unique_ptr<llvm::object::Archive> file;
242   // Keep track of children fetched from the archive by tracking
243   // which address offsets have been fetched already.
244   llvm::DenseSet<uint64_t> seen;
245 };
246 
247 class BitcodeFile final : public InputFile {
248 public:
249   explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
250                        uint64_t offsetInArchive, bool lazy = false);
251   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
252   void parse();
253 
254   std::unique_ptr<llvm::lto::InputFile> obj;
255 
256 private:
257   void parseLazy();
258 };
259 
260 extern llvm::SetVector<InputFile *> inputFiles;
261 extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads;
262 
263 llvm::Optional<MemoryBufferRef> readFile(StringRef path);
264 
265 void extract(InputFile &file, StringRef reason);
266 
267 namespace detail {
268 
269 template <class CommandType, class... Types>
270 std::vector<const CommandType *>
271 findCommands(const void *anyHdr, size_t maxCommands, Types... types) {
272   std::vector<const CommandType *> cmds;
273   std::initializer_list<uint32_t> typesList{types...};
274   const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr);
275   const uint8_t *p =
276       reinterpret_cast<const uint8_t *>(hdr) + target->headerSize;
277   for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
278     auto *cmd = reinterpret_cast<const CommandType *>(p);
279     if (llvm::is_contained(typesList, cmd->cmd)) {
280       cmds.push_back(cmd);
281       if (cmds.size() == maxCommands)
282         return cmds;
283     }
284     p += cmd->cmdsize;
285   }
286   return cmds;
287 }
288 
289 } // namespace detail
290 
291 // anyHdr should be a pointer to either mach_header or mach_header_64
292 template <class CommandType = llvm::MachO::load_command, class... Types>
293 const CommandType *findCommand(const void *anyHdr, Types... types) {
294   std::vector<const CommandType *> cmds =
295       detail::findCommands<CommandType>(anyHdr, 1, types...);
296   return cmds.size() ? cmds[0] : nullptr;
297 }
298 
299 template <class CommandType = llvm::MachO::load_command, class... Types>
300 std::vector<const CommandType *> findCommands(const void *anyHdr,
301                                               Types... types) {
302   return detail::findCommands<CommandType>(anyHdr, 0, types...);
303 }
304 
305 } // namespace macho
306 
307 std::string toString(const macho::InputFile *file);
308 std::string toString(const macho::Section &);
309 } // namespace lld
310 
311 #endif
312