1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_MACHO_INPUT_FILES_H
10 #define LLD_MACHO_INPUT_FILES_H
11 
12 #include "MachOStructs.h"
13 #include "Target.h"
14 
15 #include "lld/Common/DWARF.h"
16 #include "lld/Common/LLVM.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm/ADT/CachedHashString.h"
19 #include "llvm/ADT/DenseSet.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/BinaryFormat/MachO.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/Object/Archive.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/Threading.h"
26 #include "llvm/TextAPI/TextAPIReader.h"
27 
28 #include <vector>
29 
30 namespace llvm {
31 namespace lto {
32 class InputFile;
33 } // namespace lto
34 namespace MachO {
35 class InterfaceFile;
36 } // namespace MachO
37 class TarWriter;
38 } // namespace llvm
39 
40 namespace lld {
41 namespace macho {
42 
43 struct PlatformInfo;
44 class ConcatInputSection;
45 class Symbol;
46 class Defined;
47 struct Reloc;
48 enum class RefState : uint8_t;
49 
50 // If --reproduce option is given, all input files are written
51 // to this tar archive.
52 extern std::unique_ptr<llvm::TarWriter> tar;
53 
54 // If .subsections_via_symbols is set, each InputSection will be split along
55 // symbol boundaries. The field offset represents the offset of the subsection
56 // from the start of the original pre-split InputSection.
57 struct Subsection {
58   uint64_t offset = 0;
59   InputSection *isec = nullptr;
60 };
61 
62 using Subsections = std::vector<Subsection>;
63 class InputFile;
64 
65 class Section {
66 public:
67   InputFile *file;
68   StringRef segname;
69   StringRef name;
70   uint32_t flags;
71   uint64_t addr;
72   Subsections subsections;
73 
74   Section(InputFile *file, StringRef segname, StringRef name, uint32_t flags,
75           uint64_t addr)
76       : file(file), segname(segname), name(name), flags(flags), addr(addr) {}
77   // Ensure pointers to Sections are never invalidated.
78   Section(const Section &) = delete;
79   Section &operator=(const Section &) = delete;
80   Section(Section &&) = delete;
81   Section &operator=(Section &&) = delete;
82 
83 private:
84   // Whether we have already split this section into individual subsections.
85   // For sections that cannot be split (e.g. literal sections), this is always
86   // false.
87   bool doneSplitting = false;
88   friend class ObjFile;
89 };
90 
91 // Represents a call graph profile edge.
92 struct CallGraphEntry {
93   // The index of the caller in the symbol table.
94   uint32_t fromIndex;
95   // The index of the callee in the symbol table.
96   uint32_t toIndex;
97   // Number of calls from callee to caller in the profile.
98   uint64_t count;
99 
100   CallGraphEntry(uint32_t fromIndex, uint32_t toIndex, uint64_t count)
101       : fromIndex(fromIndex), toIndex(toIndex), count(count) {}
102 };
103 
104 class InputFile {
105 public:
106   enum Kind {
107     ObjKind,
108     OpaqueKind,
109     DylibKind,
110     ArchiveKind,
111     BitcodeKind,
112   };
113 
114   virtual ~InputFile() = default;
115   Kind kind() const { return fileKind; }
116   StringRef getName() const { return name; }
117   static void resetIdCount() { idCount = 0; }
118 
119   MemoryBufferRef mb;
120 
121   std::vector<Symbol *> symbols;
122   std::vector<Section *> sections;
123 
124   // If not empty, this stores the name of the archive containing this file.
125   // We use this string for creating error messages.
126   std::string archiveName;
127 
128   // Provides an easy way to sort InputFiles deterministically.
129   const int id;
130 
131   // True if this is a lazy ObjFile or BitcodeFile.
132   bool lazy = false;
133 
134 protected:
135   InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false)
136       : mb(mb), id(idCount++), lazy(lazy), fileKind(kind),
137         name(mb.getBufferIdentifier()) {}
138 
139   InputFile(Kind, const llvm::MachO::InterfaceFile &);
140 
141 private:
142   const Kind fileKind;
143   const StringRef name;
144 
145   static int idCount;
146 };
147 
148 struct FDE {
149   uint32_t funcLength;
150   Symbol *personality;
151   InputSection *lsda;
152 };
153 
154 // .o file
155 class ObjFile final : public InputFile {
156 public:
157   ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
158           bool lazy = false);
159   ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
160   template <class LP> void parse();
161 
162   static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
163 
164   std::string sourceFile() const;
165   // Parses line table information for diagnostics. compileUnit should be used
166   // for other purposes.
167   lld::DWARFCache *getDwarf();
168 
169   llvm::DWARFUnit *compileUnit = nullptr;
170   std::unique_ptr<lld::DWARFCache> dwarfCache;
171   Section *addrSigSection = nullptr;
172   const uint32_t modTime;
173   std::vector<ConcatInputSection *> debugSections;
174   std::vector<CallGraphEntry> callGraph;
175   llvm::DenseMap<ConcatInputSection *, FDE> fdes;
176   std::vector<OptimizationHint> optimizationHints;
177 
178 private:
179   llvm::once_flag initDwarf;
180   template <class LP> void parseLazy();
181   template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>);
182   template <class LP>
183   void parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
184                     ArrayRef<typename LP::nlist> nList, const char *strtab,
185                     bool subsectionsViaSymbols);
186   template <class NList>
187   Symbol *parseNonSectionSymbol(const NList &sym, StringRef name);
188   template <class SectionHeader>
189   void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
190                         const SectionHeader &, Section &);
191   void parseDebugInfo();
192   void parseOptimizationHints(ArrayRef<uint8_t> data);
193   void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection);
194   void registerCompactUnwind(Section &compactUnwindSection);
195   void registerEhFrames(Section &ehFrameSection);
196 };
197 
198 // command-line -sectcreate file
199 class OpaqueFile final : public InputFile {
200 public:
201   OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName);
202   static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; }
203 };
204 
205 // .dylib or .tbd file
206 class DylibFile final : public InputFile {
207 public:
208   // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the
209   // symbols in those sub-libraries will be available under the umbrella
210   // library's namespace. Those sub-libraries can also have their own
211   // re-exports. When loading a re-exported dylib, `umbrella` should be set to
212   // the root dylib to ensure symbols in the child library are correctly bound
213   // to the root. On the other hand, if a dylib is being directly loaded
214   // (through an -lfoo flag), then `umbrella` should be a nullptr.
215   explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
216                      bool isBundleLoader, bool explicitlyLinked);
217   explicit DylibFile(const llvm::MachO::InterfaceFile &interface,
218                      DylibFile *umbrella, bool isBundleLoader,
219                      bool explicitlyLinked);
220 
221   void parseLoadCommands(MemoryBufferRef mb);
222   void parseReexports(const llvm::MachO::InterfaceFile &interface);
223   bool isReferenced() const { return numReferencedSymbols > 0; }
224 
225   static bool classof(const InputFile *f) { return f->kind() == DylibKind; }
226 
227   StringRef installName;
228   DylibFile *exportingFile = nullptr;
229   DylibFile *umbrella;
230   SmallVector<StringRef, 2> rpaths;
231   uint32_t compatibilityVersion = 0;
232   uint32_t currentVersion = 0;
233   int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
234   unsigned numReferencedSymbols = 0;
235   RefState refState;
236   bool reexport = false;
237   bool forceNeeded = false;
238   bool forceWeakImport = false;
239   bool deadStrippable = false;
240   bool explicitlyLinked = false;
241   // An executable can be used as a bundle loader that will load the output
242   // file being linked, and that contains symbols referenced, but not
243   // implemented in the bundle. When used like this, it is very similar
244   // to a dylib, so we've used the same class to represent it.
245   bool isBundleLoader;
246 
247 private:
248   bool handleLDSymbol(StringRef originalName);
249   void handleLDPreviousSymbol(StringRef name, StringRef originalName);
250   void handleLDInstallNameSymbol(StringRef name, StringRef originalName);
251   void handleLDHideSymbol(StringRef name, StringRef originalName);
252   void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const;
253 
254   llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols;
255 };
256 
257 // .a file
258 class ArchiveFile final : public InputFile {
259 public:
260   explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file);
261   void addLazySymbols();
262   void fetch(const llvm::object::Archive::Symbol &);
263   // LLD normally doesn't use Error for error-handling, but the underlying
264   // Archive library does, so this is the cleanest way to wrap it.
265   Error fetch(const llvm::object::Archive::Child &, StringRef reason);
266   const llvm::object::Archive &getArchive() const { return *file; };
267   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
268 
269 private:
270   std::unique_ptr<llvm::object::Archive> file;
271   // Keep track of children fetched from the archive by tracking
272   // which address offsets have been fetched already.
273   llvm::DenseSet<uint64_t> seen;
274 };
275 
276 class BitcodeFile final : public InputFile {
277 public:
278   explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
279                        uint64_t offsetInArchive, bool lazy = false);
280   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
281   void parse();
282 
283   std::unique_ptr<llvm::lto::InputFile> obj;
284 
285 private:
286   void parseLazy();
287 };
288 
289 extern llvm::SetVector<InputFile *> inputFiles;
290 extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads;
291 
292 llvm::Optional<MemoryBufferRef> readFile(StringRef path);
293 
294 void extract(InputFile &file, StringRef reason);
295 
296 namespace detail {
297 
298 template <class CommandType, class... Types>
299 std::vector<const CommandType *>
300 findCommands(const void *anyHdr, size_t maxCommands, Types... types) {
301   std::vector<const CommandType *> cmds;
302   std::initializer_list<uint32_t> typesList{types...};
303   const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr);
304   const uint8_t *p =
305       reinterpret_cast<const uint8_t *>(hdr) + target->headerSize;
306   for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
307     auto *cmd = reinterpret_cast<const CommandType *>(p);
308     if (llvm::is_contained(typesList, cmd->cmd)) {
309       cmds.push_back(cmd);
310       if (cmds.size() == maxCommands)
311         return cmds;
312     }
313     p += cmd->cmdsize;
314   }
315   return cmds;
316 }
317 
318 } // namespace detail
319 
320 // anyHdr should be a pointer to either mach_header or mach_header_64
321 template <class CommandType = llvm::MachO::load_command, class... Types>
322 const CommandType *findCommand(const void *anyHdr, Types... types) {
323   std::vector<const CommandType *> cmds =
324       detail::findCommands<CommandType>(anyHdr, 1, types...);
325   return cmds.size() ? cmds[0] : nullptr;
326 }
327 
328 template <class CommandType = llvm::MachO::load_command, class... Types>
329 std::vector<const CommandType *> findCommands(const void *anyHdr,
330                                               Types... types) {
331   return detail::findCommands<CommandType>(anyHdr, 0, types...);
332 }
333 
334 } // namespace macho
335 
336 std::string toString(const macho::InputFile *file);
337 std::string toString(const macho::Section &);
338 } // namespace lld
339 
340 #endif
341