1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains functions to parse Mach-O object files. In this comment,
10 // we describe the Mach-O file structure and how we parse it.
11 //
12 // Mach-O is not very different from ELF or COFF. The notion of symbols,
13 // sections and relocations exists in Mach-O as it does in ELF and COFF.
14 //
15 // Perhaps the notion that is new to those who know ELF/COFF is "subsections".
16 // In ELF/COFF, sections are an atomic unit of data copied from input files to
17 // output files. When we merge or garbage-collect sections, we treat each
18 // section as an atomic unit. In Mach-O, that's not the case. Sections can
19 // consist of multiple subsections, and subsections are a unit of merging and
20 // garbage-collecting. Therefore, Mach-O's subsections are more similar to
21 // ELF/COFF's sections than Mach-O's sections are.
22 //
23 // A section can have multiple symbols. A symbol that does not have the
24 // N_ALT_ENTRY attribute indicates a beginning of a subsection. Therefore, by
25 // definition, a symbol is always present at the beginning of each subsection. A
26 // symbol with N_ALT_ENTRY attribute does not start a new subsection and can
27 // point to a middle of a subsection.
28 //
29 // The notion of subsections also affects how relocations are represented in
30 // Mach-O. All references within a section need to be explicitly represented as
31 // relocations if they refer to different subsections, because we obviously need
32 // to fix up addresses if subsections are laid out in an output file differently
33 // than they were in object files. To represent that, Mach-O relocations can
34 // refer to an unnamed location via its address. Scattered relocations (those
35 // with the R_SCATTERED bit set) always refer to unnamed locations.
36 // Non-scattered relocations refer to an unnamed location if r_extern is not set
37 // and r_symbolnum is zero.
38 //
39 // Without the above differences, I think you can use your knowledge about ELF
40 // and COFF for Mach-O.
41 //
42 //===----------------------------------------------------------------------===//
43 
44 #include "InputFiles.h"
45 #include "Config.h"
46 #include "Driver.h"
47 #include "Dwarf.h"
48 #include "ExportTrie.h"
49 #include "InputSection.h"
50 #include "MachOStructs.h"
51 #include "ObjC.h"
52 #include "OutputSection.h"
53 #include "OutputSegment.h"
54 #include "SymbolTable.h"
55 #include "Symbols.h"
56 #include "SyntheticSections.h"
57 #include "Target.h"
58 
59 #include "lld/Common/DWARF.h"
60 #include "lld/Common/ErrorHandler.h"
61 #include "lld/Common/Memory.h"
62 #include "lld/Common/Reproduce.h"
63 #include "llvm/ADT/iterator.h"
64 #include "llvm/BinaryFormat/MachO.h"
65 #include "llvm/LTO/LTO.h"
66 #include "llvm/Support/Endian.h"
67 #include "llvm/Support/MemoryBuffer.h"
68 #include "llvm/Support/Path.h"
69 #include "llvm/Support/TarWriter.h"
70 #include "llvm/TextAPI/Architecture.h"
71 #include "llvm/TextAPI/InterfaceFile.h"
72 
73 using namespace llvm;
74 using namespace llvm::MachO;
75 using namespace llvm::support::endian;
76 using namespace llvm::sys;
77 using namespace lld;
78 using namespace lld::macho;
79 
80 // Returns "<internal>", "foo.a(bar.o)", or "baz.o".
81 std::string lld::toString(const InputFile *f) {
82   if (!f)
83     return "<internal>";
84 
85   // Multiple dylibs can be defined in one .tbd file.
86   if (auto dylibFile = dyn_cast<DylibFile>(f))
87     if (f->getName().endswith(".tbd"))
88       return (f->getName() + "(" + dylibFile->installName + ")").str();
89 
90   if (f->archiveName.empty())
91     return std::string(f->getName());
92   return (f->archiveName + "(" + path::filename(f->getName()) + ")").str();
93 }
94 
95 SetVector<InputFile *> macho::inputFiles;
96 std::unique_ptr<TarWriter> macho::tar;
97 int InputFile::idCount = 0;
98 
99 static VersionTuple decodeVersion(uint32_t version) {
100   unsigned major = version >> 16;
101   unsigned minor = (version >> 8) & 0xffu;
102   unsigned subMinor = version & 0xffu;
103   return VersionTuple(major, minor, subMinor);
104 }
105 
106 static std::vector<PlatformInfo> getPlatformInfos(const InputFile *input) {
107   if (!isa<ObjFile>(input) && !isa<DylibFile>(input))
108     return {};
109 
110   const char *hdr = input->mb.getBufferStart();
111 
112   std::vector<PlatformInfo> platformInfos;
113   for (auto *cmd : findCommands<build_version_command>(hdr, LC_BUILD_VERSION)) {
114     PlatformInfo info;
115     info.target.Platform = static_cast<PlatformKind>(cmd->platform);
116     info.minimum = decodeVersion(cmd->minos);
117     platformInfos.emplace_back(std::move(info));
118   }
119   for (auto *cmd : findCommands<version_min_command>(
120            hdr, LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS,
121            LC_VERSION_MIN_TVOS, LC_VERSION_MIN_WATCHOS)) {
122     PlatformInfo info;
123     switch (cmd->cmd) {
124     case LC_VERSION_MIN_MACOSX:
125       info.target.Platform = PlatformKind::macOS;
126       break;
127     case LC_VERSION_MIN_IPHONEOS:
128       info.target.Platform = PlatformKind::iOS;
129       break;
130     case LC_VERSION_MIN_TVOS:
131       info.target.Platform = PlatformKind::tvOS;
132       break;
133     case LC_VERSION_MIN_WATCHOS:
134       info.target.Platform = PlatformKind::watchOS;
135       break;
136     }
137     info.minimum = decodeVersion(cmd->version);
138     platformInfos.emplace_back(std::move(info));
139   }
140 
141   return platformInfos;
142 }
143 
144 static bool checkCompatibility(const InputFile *input) {
145   std::vector<PlatformInfo> platformInfos = getPlatformInfos(input);
146   if (platformInfos.empty())
147     return true;
148 
149   auto it = find_if(platformInfos, [&](const PlatformInfo &info) {
150     return removeSimulator(info.target.Platform) ==
151            removeSimulator(config->platform());
152   });
153   if (it == platformInfos.end()) {
154     std::string platformNames;
155     raw_string_ostream os(platformNames);
156     interleave(
157         platformInfos, os,
158         [&](const PlatformInfo &info) {
159           os << getPlatformName(info.target.Platform);
160         },
161         "/");
162     error(toString(input) + " has platform " + platformNames +
163           Twine(", which is different from target platform ") +
164           getPlatformName(config->platform()));
165     return false;
166   }
167 
168   if (it->minimum > config->platformInfo.minimum)
169     warn(toString(input) + " has version " + it->minimum.getAsString() +
170          ", which is newer than target minimum of " +
171          config->platformInfo.minimum.getAsString());
172 
173   return true;
174 }
175 
176 // Open a given file path and return it as a memory-mapped file.
177 Optional<MemoryBufferRef> macho::readFile(StringRef path) {
178   ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = MemoryBuffer::getFile(path);
179   if (std::error_code ec = mbOrErr.getError()) {
180     error("cannot open " + path + ": " + ec.message());
181     return None;
182   }
183 
184   std::unique_ptr<MemoryBuffer> &mb = *mbOrErr;
185   MemoryBufferRef mbref = mb->getMemBufferRef();
186   make<std::unique_ptr<MemoryBuffer>>(std::move(mb)); // take mb ownership
187 
188   // If this is a regular non-fat file, return it.
189   const char *buf = mbref.getBufferStart();
190   const auto *hdr = reinterpret_cast<const fat_header *>(buf);
191   if (mbref.getBufferSize() < sizeof(uint32_t) ||
192       read32be(&hdr->magic) != FAT_MAGIC) {
193     if (tar)
194       tar->append(relativeToRoot(path), mbref.getBuffer());
195     return mbref;
196   }
197 
198   // Object files and archive files may be fat files, which contain multiple
199   // real files for different CPU ISAs. Here, we search for a file that matches
200   // with the current link target and returns it as a MemoryBufferRef.
201   const auto *arch = reinterpret_cast<const fat_arch *>(buf + sizeof(*hdr));
202 
203   for (uint32_t i = 0, n = read32be(&hdr->nfat_arch); i < n; ++i) {
204     if (reinterpret_cast<const char *>(arch + i + 1) >
205         buf + mbref.getBufferSize()) {
206       error(path + ": fat_arch struct extends beyond end of file");
207       return None;
208     }
209 
210     if (read32be(&arch[i].cputype) != static_cast<uint32_t>(target->cpuType) ||
211         read32be(&arch[i].cpusubtype) != target->cpuSubtype)
212       continue;
213 
214     uint32_t offset = read32be(&arch[i].offset);
215     uint32_t size = read32be(&arch[i].size);
216     if (offset + size > mbref.getBufferSize())
217       error(path + ": slice extends beyond end of file");
218     if (tar)
219       tar->append(relativeToRoot(path), mbref.getBuffer());
220     return MemoryBufferRef(StringRef(buf + offset, size), path.copy(bAlloc));
221   }
222 
223   error("unable to find matching architecture in " + path);
224   return None;
225 }
226 
227 InputFile::InputFile(Kind kind, const InterfaceFile &interface)
228     : id(idCount++), fileKind(kind), name(saver.save(interface.getPath())) {}
229 
230 template <class Section>
231 void ObjFile::parseSections(ArrayRef<Section> sections) {
232   subsections.reserve(sections.size());
233   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
234 
235   for (const Section &sec : sections) {
236     StringRef name =
237         StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname)));
238     StringRef segname =
239         StringRef(sec.segname, strnlen(sec.segname, sizeof(sec.segname)));
240     ArrayRef<uint8_t> data = {isZeroFill(sec.flags) ? nullptr
241                                                     : buf + sec.offset,
242                               static_cast<size_t>(sec.size)};
243     if (sec.align >= 32) {
244       error("alignment " + std::to_string(sec.align) + " of section " + name +
245             " is too large");
246       subsections.push_back({});
247       continue;
248     }
249     uint32_t align = 1 << sec.align;
250     uint32_t flags = sec.flags;
251 
252     if (sectionType(sec.flags) == S_CSTRING_LITERALS ||
253         (config->dedupLiterals && isWordLiteralSection(sec.flags))) {
254       if (sec.nreloc && config->dedupLiterals)
255         fatal(toString(this) + " contains relocations in " + sec.segname + "," +
256               sec.sectname +
257               ", so LLD cannot deduplicate literals. Try re-running without "
258               "--deduplicate-literals.");
259 
260       InputSection *isec;
261       if (sectionType(sec.flags) == S_CSTRING_LITERALS) {
262         isec =
263             make<CStringInputSection>(segname, name, this, data, align, flags);
264         // FIXME: parallelize this?
265         cast<CStringInputSection>(isec)->splitIntoPieces();
266       } else {
267         isec = make<WordLiteralInputSection>(segname, name, this, data, align,
268                                              flags);
269       }
270       subsections.push_back({{0, isec}});
271     } else if (config->icfLevel != ICFLevel::none &&
272                (name == section_names::cfString &&
273                 segname == segment_names::data)) {
274       uint64_t literalSize = target->wordSize == 8 ? 32 : 16;
275       subsections.push_back({});
276       SubsectionMap &subsecMap = subsections.back();
277       for (uint64_t off = 0; off < data.size(); off += literalSize)
278         subsecMap.push_back(
279             {off, make<ConcatInputSection>(segname, name, this,
280                                            data.slice(off, literalSize), align,
281                                            flags)});
282     } else if (segname == segment_names::llvm) {
283       // ld64 does not appear to emit contents from sections within the __LLVM
284       // segment. Symbols within those sections point to bitcode metadata
285       // instead of actual symbols. Global symbols within those sections could
286       // have the same name without causing duplicate symbol errors. Push an
287       // empty map to ensure indices line up for the remaining sections.
288       // TODO: Evaluate whether the bitcode metadata is needed.
289       subsections.push_back({});
290     } else {
291       auto *isec =
292           make<ConcatInputSection>(segname, name, this, data, align, flags);
293       if (isDebugSection(isec->getFlags()) &&
294           isec->getSegName() == segment_names::dwarf) {
295         // Instead of emitting DWARF sections, we emit STABS symbols to the
296         // object files that contain them. We filter them out early to avoid
297         // parsing their relocations unnecessarily. But we must still push an
298         // empty map to ensure the indices line up for the remaining sections.
299         subsections.push_back({});
300         debugSections.push_back(isec);
301       } else {
302         subsections.push_back({{0, isec}});
303       }
304     }
305   }
306 }
307 
308 // Find the subsection corresponding to the greatest section offset that is <=
309 // that of the given offset.
310 //
311 // offset: an offset relative to the start of the original InputSection (before
312 // any subsection splitting has occurred). It will be updated to represent the
313 // same location as an offset relative to the start of the containing
314 // subsection.
315 static InputSection *findContainingSubsection(SubsectionMap &map,
316                                               uint64_t *offset) {
317   auto it = std::prev(llvm::upper_bound(
318       map, *offset, [](uint64_t value, SubsectionEntry subsecEntry) {
319         return value < subsecEntry.offset;
320       }));
321   *offset -= it->offset;
322   return it->isec;
323 }
324 
325 template <class Section>
326 static bool validateRelocationInfo(InputFile *file, const Section &sec,
327                                    relocation_info rel) {
328   const RelocAttrs &relocAttrs = target->getRelocAttrs(rel.r_type);
329   bool valid = true;
330   auto message = [relocAttrs, file, sec, rel, &valid](const Twine &diagnostic) {
331     valid = false;
332     return (relocAttrs.name + " relocation " + diagnostic + " at offset " +
333             std::to_string(rel.r_address) + " of " + sec.segname + "," +
334             sec.sectname + " in " + toString(file))
335         .str();
336   };
337 
338   if (!relocAttrs.hasAttr(RelocAttrBits::LOCAL) && !rel.r_extern)
339     error(message("must be extern"));
340   if (relocAttrs.hasAttr(RelocAttrBits::PCREL) != rel.r_pcrel)
341     error(message(Twine("must ") + (rel.r_pcrel ? "not " : "") +
342                   "be PC-relative"));
343   if (isThreadLocalVariables(sec.flags) &&
344       !relocAttrs.hasAttr(RelocAttrBits::UNSIGNED))
345     error(message("not allowed in thread-local section, must be UNSIGNED"));
346   if (rel.r_length < 2 || rel.r_length > 3 ||
347       !relocAttrs.hasAttr(static_cast<RelocAttrBits>(1 << rel.r_length))) {
348     static SmallVector<StringRef, 4> widths{"0", "4", "8", "4 or 8"};
349     error(message("has width " + std::to_string(1 << rel.r_length) +
350                   " bytes, but must be " +
351                   widths[(static_cast<int>(relocAttrs.bits) >> 2) & 3] +
352                   " bytes"));
353   }
354   return valid;
355 }
356 
357 template <class Section>
358 void ObjFile::parseRelocations(ArrayRef<Section> sectionHeaders,
359                                const Section &sec, SubsectionMap &subsecMap) {
360   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
361   ArrayRef<relocation_info> relInfos(
362       reinterpret_cast<const relocation_info *>(buf + sec.reloff), sec.nreloc);
363 
364   auto subsecIt = subsecMap.rbegin();
365   for (size_t i = 0; i < relInfos.size(); i++) {
366     // Paired relocations serve as Mach-O's method for attaching a
367     // supplemental datum to a primary relocation record. ELF does not
368     // need them because the *_RELOC_RELA records contain the extra
369     // addend field, vs. *_RELOC_REL which omit the addend.
370     //
371     // The {X86_64,ARM64}_RELOC_SUBTRACTOR record holds the subtrahend,
372     // and the paired *_RELOC_UNSIGNED record holds the minuend. The
373     // datum for each is a symbolic address. The result is the offset
374     // between two addresses.
375     //
376     // The ARM64_RELOC_ADDEND record holds the addend, and the paired
377     // ARM64_RELOC_BRANCH26 or ARM64_RELOC_PAGE21/PAGEOFF12 holds the
378     // base symbolic address.
379     //
380     // Note: X86 does not use *_RELOC_ADDEND because it can embed an
381     // addend into the instruction stream. On X86, a relocatable address
382     // field always occupies an entire contiguous sequence of byte(s),
383     // so there is no need to merge opcode bits with address
384     // bits. Therefore, it's easy and convenient to store addends in the
385     // instruction-stream bytes that would otherwise contain zeroes. By
386     // contrast, RISC ISAs such as ARM64 mix opcode bits with with
387     // address bits so that bitwise arithmetic is necessary to extract
388     // and insert them. Storing addends in the instruction stream is
389     // possible, but inconvenient and more costly at link time.
390 
391     int64_t pairedAddend = 0;
392     relocation_info relInfo = relInfos[i];
393     if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) {
394       pairedAddend = SignExtend64<24>(relInfo.r_symbolnum);
395       relInfo = relInfos[++i];
396     }
397     assert(i < relInfos.size());
398     if (!validateRelocationInfo(this, sec, relInfo))
399       continue;
400     if (relInfo.r_address & R_SCATTERED)
401       fatal("TODO: Scattered relocations not supported");
402 
403     bool isSubtrahend =
404         target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND);
405     int64_t embeddedAddend = target->getEmbeddedAddend(mb, sec.offset, relInfo);
406     assert(!(embeddedAddend && pairedAddend));
407     int64_t totalAddend = pairedAddend + embeddedAddend;
408     Reloc r;
409     r.type = relInfo.r_type;
410     r.pcrel = relInfo.r_pcrel;
411     r.length = relInfo.r_length;
412     r.offset = relInfo.r_address;
413     if (relInfo.r_extern) {
414       r.referent = symbols[relInfo.r_symbolnum];
415       r.addend = isSubtrahend ? 0 : totalAddend;
416     } else {
417       assert(!isSubtrahend);
418       const Section &referentSec = sectionHeaders[relInfo.r_symbolnum - 1];
419       uint64_t referentOffset;
420       if (relInfo.r_pcrel) {
421         // The implicit addend for pcrel section relocations is the pcrel offset
422         // in terms of the addresses in the input file. Here we adjust it so
423         // that it describes the offset from the start of the referent section.
424         // FIXME This logic was written around x86_64 behavior -- ARM64 doesn't
425         // have pcrel section relocations. We may want to factor this out into
426         // the arch-specific .cpp file.
427         assert(target->hasAttr(r.type, RelocAttrBits::BYTE4));
428         referentOffset =
429             sec.addr + relInfo.r_address + 4 + totalAddend - referentSec.addr;
430       } else {
431         // The addend for a non-pcrel relocation is its absolute address.
432         referentOffset = totalAddend - referentSec.addr;
433       }
434       SubsectionMap &referentSubsecMap = subsections[relInfo.r_symbolnum - 1];
435       r.referent = findContainingSubsection(referentSubsecMap, &referentOffset);
436       r.addend = referentOffset;
437     }
438 
439     // Find the subsection that this relocation belongs to.
440     // Though not required by the Mach-O format, clang and gcc seem to emit
441     // relocations in order, so let's take advantage of it. However, ld64 emits
442     // unsorted relocations (in `-r` mode), so we have a fallback for that
443     // uncommon case.
444     InputSection *subsec;
445     while (subsecIt != subsecMap.rend() && subsecIt->offset > r.offset)
446       ++subsecIt;
447     if (subsecIt == subsecMap.rend() ||
448         subsecIt->offset + subsecIt->isec->getSize() <= r.offset) {
449       subsec = findContainingSubsection(subsecMap, &r.offset);
450       // Now that we know the relocs are unsorted, avoid trying the 'fast path'
451       // for the other relocations.
452       subsecIt = subsecMap.rend();
453     } else {
454       subsec = subsecIt->isec;
455       r.offset -= subsecIt->offset;
456     }
457     subsec->relocs.push_back(r);
458 
459     if (isSubtrahend) {
460       relocation_info minuendInfo = relInfos[++i];
461       // SUBTRACTOR relocations should always be followed by an UNSIGNED one
462       // attached to the same address.
463       assert(target->hasAttr(minuendInfo.r_type, RelocAttrBits::UNSIGNED) &&
464              relInfo.r_address == minuendInfo.r_address);
465       Reloc p;
466       p.type = minuendInfo.r_type;
467       if (minuendInfo.r_extern) {
468         p.referent = symbols[minuendInfo.r_symbolnum];
469         p.addend = totalAddend;
470       } else {
471         uint64_t referentOffset =
472             totalAddend - sectionHeaders[minuendInfo.r_symbolnum - 1].addr;
473         SubsectionMap &referentSubsecMap =
474             subsections[minuendInfo.r_symbolnum - 1];
475         p.referent =
476             findContainingSubsection(referentSubsecMap, &referentOffset);
477         p.addend = referentOffset;
478       }
479       subsec->relocs.push_back(p);
480     }
481   }
482 }
483 
484 template <class NList>
485 static macho::Symbol *createDefined(const NList &sym, StringRef name,
486                                     InputSection *isec, uint64_t value,
487                                     uint64_t size) {
488   // Symbol scope is determined by sym.n_type & (N_EXT | N_PEXT):
489   // N_EXT: Global symbols. These go in the symbol table during the link,
490   //        and also in the export table of the output so that the dynamic
491   //        linker sees them.
492   // N_EXT | N_PEXT: Linkage unit (think: dylib) scoped. These go in the
493   //                 symbol table during the link so that duplicates are
494   //                 either reported (for non-weak symbols) or merged
495   //                 (for weak symbols), but they do not go in the export
496   //                 table of the output.
497   // N_PEXT: llvm-mc does not emit these, but `ld -r` (wherein ld64 emits
498   //         object files) may produce them. LLD does not yet support -r.
499   //         These are translation-unit scoped, identical to the `0` case.
500   // 0: Translation-unit scoped. These are not in the symbol table during
501   //    link, and not in the export table of the output either.
502   bool isWeakDefCanBeHidden =
503       (sym.n_desc & (N_WEAK_DEF | N_WEAK_REF)) == (N_WEAK_DEF | N_WEAK_REF);
504 
505   if (sym.n_type & N_EXT) {
506     bool isPrivateExtern = sym.n_type & N_PEXT;
507     // lld's behavior for merging symbols is slightly different from ld64:
508     // ld64 picks the winning symbol based on several criteria (see
509     // pickBetweenRegularAtoms() in ld64's SymbolTable.cpp), while lld
510     // just merges metadata and keeps the contents of the first symbol
511     // with that name (see SymbolTable::addDefined). For:
512     // * inline function F in a TU built with -fvisibility-inlines-hidden
513     // * and inline function F in another TU built without that flag
514     // ld64 will pick the one from the file built without
515     // -fvisibility-inlines-hidden.
516     // lld will instead pick the one listed first on the link command line and
517     // give it visibility as if the function was built without
518     // -fvisibility-inlines-hidden.
519     // If both functions have the same contents, this will have the same
520     // behavior. If not, it won't, but the input had an ODR violation in
521     // that case.
522     //
523     // Similarly, merging a symbol
524     // that's isPrivateExtern and not isWeakDefCanBeHidden with one
525     // that's not isPrivateExtern but isWeakDefCanBeHidden technically
526     // should produce one
527     // that's not isPrivateExtern but isWeakDefCanBeHidden. That matters
528     // with ld64's semantics, because it means the non-private-extern
529     // definition will continue to take priority if more private extern
530     // definitions are encountered. With lld's semantics there's no observable
531     // difference between a symbol that's isWeakDefCanBeHidden or one that's
532     // privateExtern -- neither makes it into the dynamic symbol table. So just
533     // promote isWeakDefCanBeHidden to isPrivateExtern here.
534     if (isWeakDefCanBeHidden)
535       isPrivateExtern = true;
536 
537     return symtab->addDefined(
538         name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
539         isPrivateExtern, sym.n_desc & N_ARM_THUMB_DEF,
540         sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP);
541   }
542 
543   assert(!isWeakDefCanBeHidden &&
544          "weak_def_can_be_hidden on already-hidden symbol?");
545   return make<Defined>(
546       name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
547       /*isExternal=*/false, /*isPrivateExtern=*/false,
548       sym.n_desc & N_ARM_THUMB_DEF, sym.n_desc & REFERENCED_DYNAMICALLY,
549       sym.n_desc & N_NO_DEAD_STRIP);
550 }
551 
552 // Absolute symbols are defined symbols that do not have an associated
553 // InputSection. They cannot be weak.
554 template <class NList>
555 static macho::Symbol *createAbsolute(const NList &sym, InputFile *file,
556                                      StringRef name) {
557   if (sym.n_type & N_EXT) {
558     return symtab->addDefined(
559         name, file, nullptr, sym.n_value, /*size=*/0,
560         /*isWeakDef=*/false, sym.n_type & N_PEXT, sym.n_desc & N_ARM_THUMB_DEF,
561         /*isReferencedDynamically=*/false, sym.n_desc & N_NO_DEAD_STRIP);
562   }
563   return make<Defined>(name, file, nullptr, sym.n_value, /*size=*/0,
564                        /*isWeakDef=*/false,
565                        /*isExternal=*/false, /*isPrivateExtern=*/false,
566                        sym.n_desc & N_ARM_THUMB_DEF,
567                        /*isReferencedDynamically=*/false,
568                        sym.n_desc & N_NO_DEAD_STRIP);
569 }
570 
571 template <class NList>
572 macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym,
573                                               StringRef name) {
574   uint8_t type = sym.n_type & N_TYPE;
575   switch (type) {
576   case N_UNDF:
577     return sym.n_value == 0
578                ? symtab->addUndefined(name, this, sym.n_desc & N_WEAK_REF)
579                : symtab->addCommon(name, this, sym.n_value,
580                                    1 << GET_COMM_ALIGN(sym.n_desc),
581                                    sym.n_type & N_PEXT);
582   case N_ABS:
583     return createAbsolute(sym, this, name);
584   case N_PBUD:
585   case N_INDR:
586     error("TODO: support symbols of type " + std::to_string(type));
587     return nullptr;
588   case N_SECT:
589     llvm_unreachable(
590         "N_SECT symbols should not be passed to parseNonSectionSymbol");
591   default:
592     llvm_unreachable("invalid symbol type");
593   }
594 }
595 
596 template <class NList>
597 static bool isUndef(const NList &sym) {
598   return (sym.n_type & N_TYPE) == N_UNDF && sym.n_value == 0;
599 }
600 
601 template <class LP>
602 void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
603                            ArrayRef<typename LP::nlist> nList,
604                            const char *strtab, bool subsectionsViaSymbols) {
605   using NList = typename LP::nlist;
606 
607   // Groups indices of the symbols by the sections that contain them.
608   std::vector<std::vector<uint32_t>> symbolsBySection(subsections.size());
609   symbols.resize(nList.size());
610   SmallVector<unsigned, 32> undefineds;
611   for (uint32_t i = 0; i < nList.size(); ++i) {
612     const NList &sym = nList[i];
613 
614     // Ignore debug symbols for now.
615     // FIXME: may need special handling.
616     if (sym.n_type & N_STAB)
617       continue;
618 
619     StringRef name = strtab + sym.n_strx;
620     if ((sym.n_type & N_TYPE) == N_SECT) {
621       SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
622       // parseSections() may have chosen not to parse this section.
623       if (subsecMap.empty())
624         continue;
625       symbolsBySection[sym.n_sect - 1].push_back(i);
626     } else if (isUndef(sym)) {
627       undefineds.push_back(i);
628     } else {
629       symbols[i] = parseNonSectionSymbol(sym, name);
630     }
631   }
632 
633   for (size_t i = 0; i < subsections.size(); ++i) {
634     SubsectionMap &subsecMap = subsections[i];
635     if (subsecMap.empty())
636       continue;
637 
638     std::vector<uint32_t> &symbolIndices = symbolsBySection[i];
639     uint64_t sectionAddr = sectionHeaders[i].addr;
640     uint32_t sectionAlign = 1u << sectionHeaders[i].align;
641 
642     InputSection *isec = subsecMap.back().isec;
643     // __cfstring has already been split into subsections during
644     // parseSections(), so we simply need to match Symbols to the corresponding
645     // subsection here.
646     if (config->icfLevel != ICFLevel::none && isCfStringSection(isec)) {
647       for (size_t j = 0; j < symbolIndices.size(); ++j) {
648         uint32_t symIndex = symbolIndices[j];
649         const NList &sym = nList[symIndex];
650         StringRef name = strtab + sym.n_strx;
651         uint64_t symbolOffset = sym.n_value - sectionAddr;
652         InputSection *isec = findContainingSubsection(subsecMap, &symbolOffset);
653         if (symbolOffset != 0) {
654           error(toString(this) + ": __cfstring contains symbol " + name +
655                 " at misaligned offset");
656           continue;
657         }
658         symbols[symIndex] = createDefined(sym, name, isec, 0, isec->getSize());
659       }
660       continue;
661     }
662 
663     // Calculate symbol sizes and create subsections by splitting the sections
664     // along symbol boundaries.
665     // We populate subsecMap by repeatedly splitting the last (highest address)
666     // subsection.
667     llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) {
668       return nList[lhs].n_value < nList[rhs].n_value;
669     });
670     SubsectionEntry subsecEntry = subsecMap.back();
671     for (size_t j = 0; j < symbolIndices.size(); ++j) {
672       uint32_t symIndex = symbolIndices[j];
673       const NList &sym = nList[symIndex];
674       StringRef name = strtab + sym.n_strx;
675       InputSection *isec = subsecEntry.isec;
676 
677       uint64_t subsecAddr = sectionAddr + subsecEntry.offset;
678       size_t symbolOffset = sym.n_value - subsecAddr;
679       uint64_t symbolSize =
680           j + 1 < symbolIndices.size()
681               ? nList[symbolIndices[j + 1]].n_value - sym.n_value
682               : isec->data.size() - symbolOffset;
683       // There are 4 cases where we do not need to create a new subsection:
684       //   1. If the input file does not use subsections-via-symbols.
685       //   2. Multiple symbols at the same address only induce one subsection.
686       //      (The symbolOffset == 0 check covers both this case as well as
687       //      the first loop iteration.)
688       //   3. Alternative entry points do not induce new subsections.
689       //   4. If we have a literal section (e.g. __cstring and __literal4).
690       if (!subsectionsViaSymbols || symbolOffset == 0 ||
691           sym.n_desc & N_ALT_ENTRY || !isa<ConcatInputSection>(isec)) {
692         symbols[symIndex] =
693             createDefined(sym, name, isec, symbolOffset, symbolSize);
694         continue;
695       }
696       auto *concatIsec = cast<ConcatInputSection>(isec);
697 
698       auto *nextIsec = make<ConcatInputSection>(*concatIsec);
699       nextIsec->numRefs = 0;
700       nextIsec->wasCoalesced = false;
701       if (isZeroFill(isec->getFlags())) {
702         // Zero-fill sections have NULL data.data() non-zero data.size()
703         nextIsec->data = {nullptr, isec->data.size() - symbolOffset};
704         isec->data = {nullptr, symbolOffset};
705       } else {
706         nextIsec->data = isec->data.slice(symbolOffset);
707         isec->data = isec->data.slice(0, symbolOffset);
708       }
709 
710       // By construction, the symbol will be at offset zero in the new
711       // subsection.
712       symbols[symIndex] =
713           createDefined(sym, name, nextIsec, /*value=*/0, symbolSize);
714       // TODO: ld64 appears to preserve the original alignment as well as each
715       // subsection's offset from the last aligned address. We should consider
716       // emulating that behavior.
717       nextIsec->align = MinAlign(sectionAlign, sym.n_value);
718       subsecMap.push_back({sym.n_value - sectionAddr, nextIsec});
719       subsecEntry = subsecMap.back();
720     }
721   }
722 
723   // Undefined symbols can trigger recursive fetch from Archives due to
724   // LazySymbols. Process defined symbols first so that the relative order
725   // between a defined symbol and an undefined symbol does not change the
726   // symbol resolution behavior. In addition, a set of interconnected symbols
727   // will all be resolved to the same file, instead of being resolved to
728   // different files.
729   for (unsigned i : undefineds) {
730     const NList &sym = nList[i];
731     StringRef name = strtab + sym.n_strx;
732     symbols[i] = parseNonSectionSymbol(sym, name);
733   }
734 }
735 
736 OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
737                        StringRef sectName)
738     : InputFile(OpaqueKind, mb) {
739   const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
740   ArrayRef<uint8_t> data = {buf, mb.getBufferSize()};
741   ConcatInputSection *isec =
742       make<ConcatInputSection>(segName.take_front(16), sectName.take_front(16),
743                                /*file=*/this, data);
744   isec->live = true;
745   subsections.push_back({{0, isec}});
746 }
747 
748 ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName)
749     : InputFile(ObjKind, mb), modTime(modTime) {
750   this->archiveName = std::string(archiveName);
751   if (target->wordSize == 8)
752     parse<LP64>();
753   else
754     parse<ILP32>();
755 }
756 
757 template <class LP> void ObjFile::parse() {
758   using Header = typename LP::mach_header;
759   using SegmentCommand = typename LP::segment_command;
760   using Section = typename LP::section;
761   using NList = typename LP::nlist;
762 
763   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
764   auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart());
765 
766   Architecture arch = getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype);
767   if (arch != config->arch()) {
768     error(toString(this) + " has architecture " + getArchitectureName(arch) +
769           " which is incompatible with target architecture " +
770           getArchitectureName(config->arch()));
771     return;
772   }
773 
774   if (!checkCompatibility(this))
775     return;
776 
777   for (auto *cmd : findCommands<linker_option_command>(hdr, LC_LINKER_OPTION)) {
778     StringRef data{reinterpret_cast<const char *>(cmd + 1),
779                    cmd->cmdsize - sizeof(linker_option_command)};
780     parseLCLinkerOption(this, cmd->count, data);
781   }
782 
783   ArrayRef<Section> sectionHeaders;
784   if (const load_command *cmd = findCommand(hdr, LP::segmentLCType)) {
785     auto *c = reinterpret_cast<const SegmentCommand *>(cmd);
786     sectionHeaders =
787         ArrayRef<Section>{reinterpret_cast<const Section *>(c + 1), c->nsects};
788     parseSections(sectionHeaders);
789   }
790 
791   // TODO: Error on missing LC_SYMTAB?
792   if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) {
793     auto *c = reinterpret_cast<const symtab_command *>(cmd);
794     ArrayRef<NList> nList(reinterpret_cast<const NList *>(buf + c->symoff),
795                           c->nsyms);
796     const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
797     bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS;
798     parseSymbols<LP>(sectionHeaders, nList, strtab, subsectionsViaSymbols);
799   }
800 
801   // The relocations may refer to the symbols, so we parse them after we have
802   // parsed all the symbols.
803   for (size_t i = 0, n = subsections.size(); i < n; ++i)
804     if (!subsections[i].empty())
805       parseRelocations(sectionHeaders, sectionHeaders[i], subsections[i]);
806 
807   parseDebugInfo();
808   if (config->emitDataInCodeInfo)
809     parseDataInCode();
810 }
811 
812 void ObjFile::parseDebugInfo() {
813   std::unique_ptr<DwarfObject> dObj = DwarfObject::create(this);
814   if (!dObj)
815     return;
816 
817   auto *ctx = make<DWARFContext>(
818       std::move(dObj), "",
819       [&](Error err) {
820         warn(toString(this) + ": " + toString(std::move(err)));
821       },
822       [&](Error warning) {
823         warn(toString(this) + ": " + toString(std::move(warning)));
824       });
825 
826   // TODO: Since object files can contain a lot of DWARF info, we should verify
827   // that we are parsing just the info we need
828   const DWARFContext::compile_unit_range &units = ctx->compile_units();
829   // FIXME: There can be more than one compile unit per object file. See
830   // PR48637.
831   auto it = units.begin();
832   compileUnit = it->get();
833 }
834 
835 void ObjFile::parseDataInCode() {
836   const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
837   const load_command *cmd = findCommand(buf, LC_DATA_IN_CODE);
838   if (!cmd)
839     return;
840   const auto *c = reinterpret_cast<const linkedit_data_command *>(cmd);
841   dataInCodeEntries = {
842       reinterpret_cast<const data_in_code_entry *>(buf + c->dataoff),
843       c->datasize / sizeof(data_in_code_entry)};
844   assert(is_sorted(dataInCodeEntries, [](const data_in_code_entry &lhs,
845                                          const data_in_code_entry &rhs) {
846     return lhs.offset < rhs.offset;
847   }));
848 }
849 
850 // The path can point to either a dylib or a .tbd file.
851 static DylibFile *loadDylib(StringRef path, DylibFile *umbrella) {
852   Optional<MemoryBufferRef> mbref = readFile(path);
853   if (!mbref) {
854     error("could not read dylib file at " + path);
855     return nullptr;
856   }
857   return loadDylib(*mbref, umbrella);
858 }
859 
860 // TBD files are parsed into a series of TAPI documents (InterfaceFiles), with
861 // the first document storing child pointers to the rest of them. When we are
862 // processing a given TBD file, we store that top-level document in
863 // currentTopLevelTapi. When processing re-exports, we search its children for
864 // potentially matching documents in the same TBD file. Note that the children
865 // themselves don't point to further documents, i.e. this is a two-level tree.
866 //
867 // Re-exports can either refer to on-disk files, or to documents within .tbd
868 // files.
869 static DylibFile *findDylib(StringRef path, DylibFile *umbrella,
870                             const InterfaceFile *currentTopLevelTapi) {
871   // Search order:
872   // 1. Install name basename in -F / -L directories.
873   {
874     StringRef stem = path::stem(path);
875     SmallString<128> frameworkName;
876     path::append(frameworkName, path::Style::posix, stem + ".framework", stem);
877     bool isFramework = path.endswith(frameworkName);
878     if (isFramework) {
879       for (StringRef dir : config->frameworkSearchPaths) {
880         SmallString<128> candidate = dir;
881         path::append(candidate, frameworkName);
882         if (Optional<std::string> dylibPath = resolveDylibPath(candidate))
883           return loadDylib(*dylibPath, umbrella);
884       }
885     } else if (Optional<StringRef> dylibPath = findPathCombination(
886                    stem, config->librarySearchPaths, {".tbd", ".dylib"}))
887       return loadDylib(*dylibPath, umbrella);
888   }
889 
890   // 2. As absolute path.
891   if (path::is_absolute(path, path::Style::posix))
892     for (StringRef root : config->systemLibraryRoots)
893       if (Optional<std::string> dylibPath =
894               resolveDylibPath((root + path).str()))
895         return loadDylib(*dylibPath, umbrella);
896 
897   // 3. As relative path.
898 
899   // TODO: Handle -dylib_file
900 
901   // Replace @executable_path, @loader_path, @rpath prefixes in install name.
902   SmallString<128> newPath;
903   if (config->outputType == MH_EXECUTE &&
904       path.consume_front("@executable_path/")) {
905     // ld64 allows overriding this with the undocumented flag -executable_path.
906     // lld doesn't currently implement that flag.
907     // FIXME: Consider using finalOutput instead of outputFile.
908     path::append(newPath, path::parent_path(config->outputFile), path);
909     path = newPath;
910   } else if (path.consume_front("@loader_path/")) {
911     fs::real_path(umbrella->getName(), newPath);
912     path::remove_filename(newPath);
913     path::append(newPath, path);
914     path = newPath;
915   } else if (path.startswith("@rpath/")) {
916     for (StringRef rpath : umbrella->rpaths) {
917       newPath.clear();
918       if (rpath.consume_front("@loader_path/")) {
919         fs::real_path(umbrella->getName(), newPath);
920         path::remove_filename(newPath);
921       }
922       path::append(newPath, rpath, path.drop_front(strlen("@rpath/")));
923       if (Optional<std::string> dylibPath = resolveDylibPath(newPath))
924         return loadDylib(*dylibPath, umbrella);
925     }
926   }
927 
928   // FIXME: Should this be further up?
929   if (currentTopLevelTapi) {
930     for (InterfaceFile &child :
931          make_pointee_range(currentTopLevelTapi->documents())) {
932       assert(child.documents().empty());
933       if (path == child.getInstallName()) {
934         auto file = make<DylibFile>(child, umbrella);
935         file->parseReexports(child);
936         return file;
937       }
938     }
939   }
940 
941   if (Optional<std::string> dylibPath = resolveDylibPath(path))
942     return loadDylib(*dylibPath, umbrella);
943 
944   return nullptr;
945 }
946 
947 // If a re-exported dylib is public (lives in /usr/lib or
948 // /System/Library/Frameworks), then it is considered implicitly linked: we
949 // should bind to its symbols directly instead of via the re-exporting umbrella
950 // library.
951 static bool isImplicitlyLinked(StringRef path) {
952   if (!config->implicitDylibs)
953     return false;
954 
955   if (path::parent_path(path) == "/usr/lib")
956     return true;
957 
958   // Match /System/Library/Frameworks/$FOO.framework/**/$FOO
959   if (path.consume_front("/System/Library/Frameworks/")) {
960     StringRef frameworkName = path.take_until([](char c) { return c == '.'; });
961     return path::filename(path) == frameworkName;
962   }
963 
964   return false;
965 }
966 
967 static void loadReexport(StringRef path, DylibFile *umbrella,
968                          const InterfaceFile *currentTopLevelTapi) {
969   DylibFile *reexport = findDylib(path, umbrella, currentTopLevelTapi);
970   if (!reexport)
971     error("unable to locate re-export with install name " + path);
972 }
973 
974 DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
975                      bool isBundleLoader)
976     : InputFile(DylibKind, mb), refState(RefState::Unreferenced),
977       isBundleLoader(isBundleLoader) {
978   assert(!isBundleLoader || !umbrella);
979   if (umbrella == nullptr)
980     umbrella = this;
981   this->umbrella = umbrella;
982 
983   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
984   auto *hdr = reinterpret_cast<const mach_header *>(mb.getBufferStart());
985 
986   // Initialize installName.
987   if (const load_command *cmd = findCommand(hdr, LC_ID_DYLIB)) {
988     auto *c = reinterpret_cast<const dylib_command *>(cmd);
989     currentVersion = read32le(&c->dylib.current_version);
990     compatibilityVersion = read32le(&c->dylib.compatibility_version);
991     installName =
992         reinterpret_cast<const char *>(cmd) + read32le(&c->dylib.name);
993   } else if (!isBundleLoader) {
994     // macho_executable and macho_bundle don't have LC_ID_DYLIB,
995     // so it's OK.
996     error("dylib " + toString(this) + " missing LC_ID_DYLIB load command");
997     return;
998   }
999 
1000   if (config->printEachFile)
1001     message(toString(this));
1002   inputFiles.insert(this);
1003 
1004   deadStrippable = hdr->flags & MH_DEAD_STRIPPABLE_DYLIB;
1005 
1006   if (!checkCompatibility(this))
1007     return;
1008 
1009   checkAppExtensionSafety(hdr->flags & MH_APP_EXTENSION_SAFE);
1010 
1011   for (auto *cmd : findCommands<rpath_command>(hdr, LC_RPATH)) {
1012     StringRef rpath{reinterpret_cast<const char *>(cmd) + cmd->path};
1013     rpaths.push_back(rpath);
1014   }
1015 
1016   // Initialize symbols.
1017   exportingFile = isImplicitlyLinked(installName) ? this : this->umbrella;
1018   if (const load_command *cmd = findCommand(hdr, LC_DYLD_INFO_ONLY)) {
1019     auto *c = reinterpret_cast<const dyld_info_command *>(cmd);
1020     parseTrie(buf + c->export_off, c->export_size,
1021               [&](const Twine &name, uint64_t flags) {
1022                 StringRef savedName = saver.save(name);
1023                 if (handleLDSymbol(savedName))
1024                   return;
1025                 bool isWeakDef = flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
1026                 bool isTlv = flags & EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL;
1027                 symbols.push_back(symtab->addDylib(savedName, exportingFile,
1028                                                    isWeakDef, isTlv));
1029               });
1030   } else {
1031     error("LC_DYLD_INFO_ONLY not found in " + toString(this));
1032     return;
1033   }
1034 }
1035 
1036 void DylibFile::parseLoadCommands(MemoryBufferRef mb) {
1037   auto *hdr = reinterpret_cast<const mach_header *>(mb.getBufferStart());
1038   const uint8_t *p = reinterpret_cast<const uint8_t *>(mb.getBufferStart()) +
1039                      target->headerSize;
1040   for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
1041     auto *cmd = reinterpret_cast<const load_command *>(p);
1042     p += cmd->cmdsize;
1043 
1044     if (!(hdr->flags & MH_NO_REEXPORTED_DYLIBS) &&
1045         cmd->cmd == LC_REEXPORT_DYLIB) {
1046       const auto *c = reinterpret_cast<const dylib_command *>(cmd);
1047       StringRef reexportPath =
1048           reinterpret_cast<const char *>(c) + read32le(&c->dylib.name);
1049       loadReexport(reexportPath, exportingFile, nullptr);
1050     }
1051 
1052     // FIXME: What about LC_LOAD_UPWARD_DYLIB, LC_LAZY_LOAD_DYLIB,
1053     // LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB (..are reexports from dylibs with
1054     // MH_NO_REEXPORTED_DYLIBS loaded for -flat_namespace)?
1055     if (config->namespaceKind == NamespaceKind::flat &&
1056         cmd->cmd == LC_LOAD_DYLIB) {
1057       const auto *c = reinterpret_cast<const dylib_command *>(cmd);
1058       StringRef dylibPath =
1059           reinterpret_cast<const char *>(c) + read32le(&c->dylib.name);
1060       DylibFile *dylib = findDylib(dylibPath, umbrella, nullptr);
1061       if (!dylib)
1062         error(Twine("unable to locate library '") + dylibPath +
1063               "' loaded from '" + toString(this) + "' for -flat_namespace");
1064     }
1065   }
1066 }
1067 
1068 // Some versions of XCode ship with .tbd files that don't have the right
1069 // platform settings.
1070 static constexpr std::array<StringRef, 3> skipPlatformChecks{
1071     "/usr/lib/system/libsystem_kernel.dylib",
1072     "/usr/lib/system/libsystem_platform.dylib",
1073     "/usr/lib/system/libsystem_pthread.dylib"};
1074 
1075 DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella,
1076                      bool isBundleLoader)
1077     : InputFile(DylibKind, interface), refState(RefState::Unreferenced),
1078       isBundleLoader(isBundleLoader) {
1079   // FIXME: Add test for the missing TBD code path.
1080 
1081   if (umbrella == nullptr)
1082     umbrella = this;
1083   this->umbrella = umbrella;
1084 
1085   installName = saver.save(interface.getInstallName());
1086   compatibilityVersion = interface.getCompatibilityVersion().rawValue();
1087   currentVersion = interface.getCurrentVersion().rawValue();
1088 
1089   if (config->printEachFile)
1090     message(toString(this));
1091   inputFiles.insert(this);
1092 
1093   if (!is_contained(skipPlatformChecks, installName) &&
1094       !is_contained(interface.targets(), config->platformInfo.target)) {
1095     error(toString(this) + " is incompatible with " +
1096           std::string(config->platformInfo.target));
1097     return;
1098   }
1099 
1100   checkAppExtensionSafety(interface.isApplicationExtensionSafe());
1101 
1102   exportingFile = isImplicitlyLinked(installName) ? this : umbrella;
1103   auto addSymbol = [&](const Twine &name) -> void {
1104     symbols.push_back(symtab->addDylib(saver.save(name), exportingFile,
1105                                        /*isWeakDef=*/false,
1106                                        /*isTlv=*/false));
1107   };
1108   // TODO(compnerd) filter out symbols based on the target platform
1109   // TODO: handle weak defs, thread locals
1110   for (const auto *symbol : interface.symbols()) {
1111     if (!symbol->getArchitectures().has(config->arch()))
1112       continue;
1113 
1114     if (handleLDSymbol(symbol->getName()))
1115       continue;
1116 
1117     switch (symbol->getKind()) {
1118     case SymbolKind::GlobalSymbol:
1119       addSymbol(symbol->getName());
1120       break;
1121     case SymbolKind::ObjectiveCClass:
1122       // XXX ld64 only creates these symbols when -ObjC is passed in. We may
1123       // want to emulate that.
1124       addSymbol(objc::klass + symbol->getName());
1125       addSymbol(objc::metaclass + symbol->getName());
1126       break;
1127     case SymbolKind::ObjectiveCClassEHType:
1128       addSymbol(objc::ehtype + symbol->getName());
1129       break;
1130     case SymbolKind::ObjectiveCInstanceVariable:
1131       addSymbol(objc::ivar + symbol->getName());
1132       break;
1133     }
1134   }
1135 }
1136 
1137 void DylibFile::parseReexports(const InterfaceFile &interface) {
1138   const InterfaceFile *topLevel =
1139       interface.getParent() == nullptr ? &interface : interface.getParent();
1140   for (InterfaceFileRef intfRef : interface.reexportedLibraries()) {
1141     InterfaceFile::const_target_range targets = intfRef.targets();
1142     if (is_contained(skipPlatformChecks, intfRef.getInstallName()) ||
1143         is_contained(targets, config->platformInfo.target))
1144       loadReexport(intfRef.getInstallName(), exportingFile, topLevel);
1145   }
1146 }
1147 
1148 // $ld$ symbols modify the properties/behavior of the library (e.g. its install
1149 // name, compatibility version or hide/add symbols) for specific target
1150 // versions.
1151 bool DylibFile::handleLDSymbol(StringRef originalName) {
1152   if (!originalName.startswith("$ld$"))
1153     return false;
1154 
1155   StringRef action;
1156   StringRef name;
1157   std::tie(action, name) = originalName.drop_front(strlen("$ld$")).split('$');
1158   if (action == "previous")
1159     handleLDPreviousSymbol(name, originalName);
1160   else if (action == "install_name")
1161     handleLDInstallNameSymbol(name, originalName);
1162   return true;
1163 }
1164 
1165 void DylibFile::handleLDPreviousSymbol(StringRef name, StringRef originalName) {
1166   // originalName: $ld$ previous $ <installname> $ <compatversion> $
1167   // <platformstr> $ <startversion> $ <endversion> $ <symbol-name> $
1168   StringRef installName;
1169   StringRef compatVersion;
1170   StringRef platformStr;
1171   StringRef startVersion;
1172   StringRef endVersion;
1173   StringRef symbolName;
1174   StringRef rest;
1175 
1176   std::tie(installName, name) = name.split('$');
1177   std::tie(compatVersion, name) = name.split('$');
1178   std::tie(platformStr, name) = name.split('$');
1179   std::tie(startVersion, name) = name.split('$');
1180   std::tie(endVersion, name) = name.split('$');
1181   std::tie(symbolName, rest) = name.split('$');
1182   // TODO: ld64 contains some logic for non-empty symbolName as well.
1183   if (!symbolName.empty())
1184     return;
1185   unsigned platform;
1186   if (platformStr.getAsInteger(10, platform) ||
1187       platform != static_cast<unsigned>(config->platform()))
1188     return;
1189 
1190   VersionTuple start;
1191   if (start.tryParse(startVersion)) {
1192     warn("failed to parse start version, symbol '" + originalName +
1193          "' ignored");
1194     return;
1195   }
1196   VersionTuple end;
1197   if (end.tryParse(endVersion)) {
1198     warn("failed to parse end version, symbol '" + originalName + "' ignored");
1199     return;
1200   }
1201   if (config->platformInfo.minimum < start ||
1202       config->platformInfo.minimum >= end)
1203     return;
1204 
1205   this->installName = saver.save(installName);
1206 
1207   if (!compatVersion.empty()) {
1208     VersionTuple cVersion;
1209     if (cVersion.tryParse(compatVersion)) {
1210       warn("failed to parse compatibility version, symbol '" + originalName +
1211            "' ignored");
1212       return;
1213     }
1214     compatibilityVersion = encodeVersion(cVersion);
1215   }
1216 }
1217 
1218 void DylibFile::handleLDInstallNameSymbol(StringRef name,
1219                                           StringRef originalName) {
1220   // originalName: $ld$ install_name $ os<version> $ install_name
1221   StringRef condition, installName;
1222   std::tie(condition, installName) = name.split('$');
1223   VersionTuple version;
1224   if (!condition.consume_front("os") || version.tryParse(condition))
1225     warn("failed to parse os version, symbol '" + originalName + "' ignored");
1226   else if (version == config->platformInfo.minimum)
1227     this->installName = saver.save(installName);
1228 }
1229 
1230 void DylibFile::checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const {
1231   if (config->applicationExtension && !dylibIsAppExtensionSafe)
1232     warn("using '-application_extension' with unsafe dylib: " + toString(this));
1233 }
1234 
1235 ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f)
1236     : InputFile(ArchiveKind, f->getMemoryBufferRef()), file(std::move(f)) {}
1237 
1238 void ArchiveFile::addLazySymbols() {
1239   for (const object::Archive::Symbol &sym : file->symbols())
1240     symtab->addLazy(sym.getName(), this, sym);
1241 }
1242 
1243 static Expected<InputFile *> loadArchiveMember(MemoryBufferRef mb,
1244                                                uint32_t modTime,
1245                                                StringRef archiveName,
1246                                                uint64_t offsetInArchive) {
1247   if (config->zeroModTime)
1248     modTime = 0;
1249 
1250   switch (identify_magic(mb.getBuffer())) {
1251   case file_magic::macho_object:
1252     return make<ObjFile>(mb, modTime, archiveName);
1253   case file_magic::bitcode:
1254     return make<BitcodeFile>(mb, archiveName, offsetInArchive);
1255   default:
1256     return createStringError(inconvertibleErrorCode(),
1257                              mb.getBufferIdentifier() +
1258                                  " has unhandled file type");
1259   }
1260 }
1261 
1262 Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
1263   if (!seen.insert(c.getChildOffset()).second)
1264     return Error::success();
1265 
1266   Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
1267   if (!mb)
1268     return mb.takeError();
1269 
1270   // Thin archives refer to .o files, so --reproduce needs the .o files too.
1271   if (tar && c.getParent()->isThin())
1272     tar->append(relativeToRoot(CHECK(c.getFullName(), this)), mb->getBuffer());
1273 
1274   Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
1275   if (!modTime)
1276     return modTime.takeError();
1277 
1278   Expected<InputFile *> file =
1279       loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset());
1280 
1281   if (!file)
1282     return file.takeError();
1283 
1284   inputFiles.insert(*file);
1285   printArchiveMemberLoad(reason, *file);
1286   return Error::success();
1287 }
1288 
1289 void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
1290   object::Archive::Child c =
1291       CHECK(sym.getMember(), toString(this) +
1292                                  ": could not get the member defining symbol " +
1293                                  toMachOString(sym));
1294 
1295   // `sym` is owned by a LazySym, which will be replace<>()d by make<ObjFile>
1296   // and become invalid after that call. Copy it to the stack so we can refer
1297   // to it later.
1298   const object::Archive::Symbol symCopy = sym;
1299 
1300   // ld64 doesn't demangle sym here even with -demangle.
1301   // Match that: intentionally don't call toMachOString().
1302   if (Error e = fetch(c, symCopy.getName()))
1303     error(toString(this) + ": could not get the member defining symbol " +
1304           toMachOString(symCopy) + ": " + toString(std::move(e)));
1305 }
1306 
1307 static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
1308                                           BitcodeFile &file) {
1309   StringRef name = saver.save(objSym.getName());
1310 
1311   // TODO: support weak references
1312   if (objSym.isUndefined())
1313     return symtab->addUndefined(name, &file, /*isWeakRef=*/false);
1314 
1315   // TODO: Write a test demonstrating why computing isPrivateExtern before
1316   // LTO compilation is important.
1317   bool isPrivateExtern = false;
1318   switch (objSym.getVisibility()) {
1319   case GlobalValue::HiddenVisibility:
1320     isPrivateExtern = true;
1321     break;
1322   case GlobalValue::ProtectedVisibility:
1323     error(name + " has protected visibility, which is not supported by Mach-O");
1324     break;
1325   case GlobalValue::DefaultVisibility:
1326     break;
1327   }
1328 
1329   if (objSym.isCommon())
1330     return symtab->addCommon(name, &file, objSym.getCommonSize(),
1331                              objSym.getCommonAlignment(), isPrivateExtern);
1332 
1333   return symtab->addDefined(name, &file, /*isec=*/nullptr, /*value=*/0,
1334                             /*size=*/0, objSym.isWeak(), isPrivateExtern,
1335                             /*isThumb=*/false,
1336                             /*isReferencedDynamically=*/false,
1337                             /*noDeadStrip=*/false);
1338 }
1339 
1340 BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
1341                          uint64_t offsetInArchive)
1342     : InputFile(BitcodeKind, mb) {
1343   std::string path = mb.getBufferIdentifier().str();
1344   // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
1345   // name. If two members with the same name are provided, this causes a
1346   // collision and ThinLTO can't proceed.
1347   // So, we append the archive name to disambiguate two members with the same
1348   // name from multiple different archives, and offset within the archive to
1349   // disambiguate two members of the same name from a single archive.
1350   MemoryBufferRef mbref(
1351       mb.getBuffer(),
1352       saver.save(archiveName.empty() ? path
1353                                      : archiveName + sys::path::filename(path) +
1354                                            utostr(offsetInArchive)));
1355 
1356   obj = check(lto::InputFile::create(mbref));
1357 
1358   // Convert LTO Symbols to LLD Symbols in order to perform resolution. The
1359   // "winning" symbol will then be marked as Prevailing at LTO compilation
1360   // time.
1361   for (const lto::InputFile::Symbol &objSym : obj->symbols())
1362     symbols.push_back(createBitcodeSymbol(objSym, *this));
1363 }
1364 
1365 template void ObjFile::parse<LP64>();
1366