16acd3003SFangrui Song //===- InputFiles.cpp -----------------------------------------------------===//
26acd3003SFangrui Song //
36acd3003SFangrui Song // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
46acd3003SFangrui Song // See https://llvm.org/LICENSE.txt for license information.
56acd3003SFangrui Song // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66acd3003SFangrui Song //
76acd3003SFangrui Song //===----------------------------------------------------------------------===//
86acd3003SFangrui Song //
96acd3003SFangrui Song // This file contains functions to parse Mach-O object files. In this comment,
106acd3003SFangrui Song // we describe the Mach-O file structure and how we parse it.
116acd3003SFangrui Song //
126acd3003SFangrui Song // Mach-O is not very different from ELF or COFF. The notion of symbols,
136acd3003SFangrui Song // sections and relocations exists in Mach-O as it does in ELF and COFF.
146acd3003SFangrui Song //
156acd3003SFangrui Song // Perhaps the notion that is new to those who know ELF/COFF is "subsections".
166acd3003SFangrui Song // In ELF/COFF, sections are an atomic unit of data copied from input files to
176acd3003SFangrui Song // output files. When we merge or garbage-collect sections, we treat each
186acd3003SFangrui Song // section as an atomic unit. In Mach-O, that's not the case. Sections can
196acd3003SFangrui Song // consist of multiple subsections, and subsections are a unit of merging and
206acd3003SFangrui Song // garbage-collecting. Therefore, Mach-O's subsections are more similar to
216acd3003SFangrui Song // ELF/COFF's sections than Mach-O's sections are.
226acd3003SFangrui Song //
236acd3003SFangrui Song // A section can have multiple symbols. A symbol that does not have the
246acd3003SFangrui Song // N_ALT_ENTRY attribute indicates a beginning of a subsection. Therefore, by
256acd3003SFangrui Song // definition, a symbol is always present at the beginning of each subsection. A
266acd3003SFangrui Song // symbol with N_ALT_ENTRY attribute does not start a new subsection and can
276acd3003SFangrui Song // point to a middle of a subsection.
286acd3003SFangrui Song //
296acd3003SFangrui Song // The notion of subsections also affects how relocations are represented in
306acd3003SFangrui Song // Mach-O. All references within a section need to be explicitly represented as
316acd3003SFangrui Song // relocations if they refer to different subsections, because we obviously need
326acd3003SFangrui Song // to fix up addresses if subsections are laid out in an output file differently
336acd3003SFangrui Song // than they were in object files. To represent that, Mach-O relocations can
346acd3003SFangrui Song // refer to an unnamed location via its address. Scattered relocations (those
356acd3003SFangrui Song // with the R_SCATTERED bit set) always refer to unnamed locations.
366acd3003SFangrui Song // Non-scattered relocations refer to an unnamed location if r_extern is not set
376acd3003SFangrui Song // and r_symbolnum is zero.
386acd3003SFangrui Song //
396acd3003SFangrui Song // Without the above differences, I think you can use your knowledge about ELF
406acd3003SFangrui Song // and COFF for Mach-O.
416acd3003SFangrui Song //
426acd3003SFangrui Song //===----------------------------------------------------------------------===//
436acd3003SFangrui Song 
446acd3003SFangrui Song #include "InputFiles.h"
4587b6fd3eSJez Ng #include "Config.h"
46c519bc7eSNico Weber #include "Driver.h"
473fcb0eebSJez Ng #include "Dwarf.h"
487bbdbacdSJez Ng #include "ExportTrie.h"
496acd3003SFangrui Song #include "InputSection.h"
501e1a3f67SJez Ng #include "MachOStructs.h"
51cf918c80SJez Ng #include "ObjC.h"
526cb07313SKellie Medlin #include "OutputSection.h"
53cf918c80SJez Ng #include "OutputSegment.h"
546acd3003SFangrui Song #include "SymbolTable.h"
556acd3003SFangrui Song #include "Symbols.h"
566acd3003SFangrui Song #include "Target.h"
576acd3003SFangrui Song 
583fcb0eebSJez Ng #include "lld/Common/DWARF.h"
596acd3003SFangrui Song #include "lld/Common/ErrorHandler.h"
606acd3003SFangrui Song #include "lld/Common/Memory.h"
6183e60f5aSNico Weber #include "lld/Common/Reproduce.h"
627394460dSJez Ng #include "llvm/ADT/iterator.h"
636acd3003SFangrui Song #include "llvm/BinaryFormat/MachO.h"
6421f83113SJez Ng #include "llvm/LTO/LTO.h"
656acd3003SFangrui Song #include "llvm/Support/Endian.h"
666acd3003SFangrui Song #include "llvm/Support/MemoryBuffer.h"
6787b6fd3eSJez Ng #include "llvm/Support/Path.h"
6883e60f5aSNico Weber #include "llvm/Support/TarWriter.h"
696acd3003SFangrui Song 
706acd3003SFangrui Song using namespace llvm;
716acd3003SFangrui Song using namespace llvm::MachO;
726acd3003SFangrui Song using namespace llvm::support::endian;
7387b6fd3eSJez Ng using namespace llvm::sys;
746acd3003SFangrui Song using namespace lld;
756acd3003SFangrui Song using namespace lld::macho;
766acd3003SFangrui Song 
77b2f00f24SNico Weber // Returns "<internal>", "foo.a(bar.o)", or "baz.o".
78b2f00f24SNico Weber std::string lld::toString(const InputFile *f) {
79b2f00f24SNico Weber   if (!f)
80b2f00f24SNico Weber     return "<internal>";
81b2f00f24SNico Weber   if (f->archiveName.empty())
82b2f00f24SNico Weber     return std::string(f->getName());
83b2f00f24SNico Weber   return (path::filename(f->archiveName) + "(" + path::filename(f->getName()) +
84b2f00f24SNico Weber           ")")
85b2f00f24SNico Weber       .str();
86b2f00f24SNico Weber }
87b2f00f24SNico Weber 
88544148aeSJez Ng SetVector<InputFile *> macho::inputFiles;
8983e60f5aSNico Weber std::unique_ptr<TarWriter> macho::tar;
9078f6498cSJez Ng int InputFile::idCount = 0;
916acd3003SFangrui Song 
926acd3003SFangrui Song // Open a given file path and return it as a memory-mapped file.
936acd3003SFangrui Song Optional<MemoryBufferRef> macho::readFile(StringRef path) {
946acd3003SFangrui Song   // Open a file.
956acd3003SFangrui Song   auto mbOrErr = MemoryBuffer::getFile(path);
966acd3003SFangrui Song   if (auto ec = mbOrErr.getError()) {
976acd3003SFangrui Song     error("cannot open " + path + ": " + ec.message());
986acd3003SFangrui Song     return None;
996acd3003SFangrui Song   }
1006acd3003SFangrui Song 
1016acd3003SFangrui Song   std::unique_ptr<MemoryBuffer> &mb = *mbOrErr;
1026acd3003SFangrui Song   MemoryBufferRef mbref = mb->getMemBufferRef();
1036acd3003SFangrui Song   make<std::unique_ptr<MemoryBuffer>>(std::move(mb)); // take mb ownership
104060efd24SJez Ng 
105060efd24SJez Ng   // If this is a regular non-fat file, return it.
106060efd24SJez Ng   const char *buf = mbref.getBufferStart();
107060efd24SJez Ng   auto *hdr = reinterpret_cast<const MachO::fat_header *>(buf);
10883e60f5aSNico Weber   if (read32be(&hdr->magic) != MachO::FAT_MAGIC) {
10983e60f5aSNico Weber     if (tar)
11083e60f5aSNico Weber       tar->append(relativeToRoot(path), mbref.getBuffer());
1116acd3003SFangrui Song     return mbref;
11283e60f5aSNico Weber   }
113060efd24SJez Ng 
114918948dbSJez Ng   // Object files and archive files may be fat files, which contains
115918948dbSJez Ng   // multiple real files for different CPU ISAs. Here, we search for a
116918948dbSJez Ng   // file that matches with the current link target and returns it as
117918948dbSJez Ng   // a MemoryBufferRef.
118918948dbSJez Ng   auto *arch = reinterpret_cast<const MachO::fat_arch *>(buf + sizeof(*hdr));
119918948dbSJez Ng 
120918948dbSJez Ng   for (uint32_t i = 0, n = read32be(&hdr->nfat_arch); i < n; ++i) {
121918948dbSJez Ng     if (reinterpret_cast<const char *>(arch + i + 1) >
122918948dbSJez Ng         buf + mbref.getBufferSize()) {
123918948dbSJez Ng       error(path + ": fat_arch struct extends beyond end of file");
124918948dbSJez Ng       return None;
125918948dbSJez Ng     }
126918948dbSJez Ng 
127918948dbSJez Ng     if (read32be(&arch[i].cputype) != target->cpuType ||
128918948dbSJez Ng         read32be(&arch[i].cpusubtype) != target->cpuSubtype)
129918948dbSJez Ng       continue;
130918948dbSJez Ng 
131918948dbSJez Ng     uint32_t offset = read32be(&arch[i].offset);
132918948dbSJez Ng     uint32_t size = read32be(&arch[i].size);
133918948dbSJez Ng     if (offset + size > mbref.getBufferSize())
134918948dbSJez Ng       error(path + ": slice extends beyond end of file");
13583e60f5aSNico Weber     if (tar)
13683e60f5aSNico Weber       tar->append(relativeToRoot(path), mbref.getBuffer());
137918948dbSJez Ng     return MemoryBufferRef(StringRef(buf + offset, size), path.copy(bAlloc));
138918948dbSJez Ng   }
139918948dbSJez Ng 
140918948dbSJez Ng   error("unable to find matching architecture in " + path);
141060efd24SJez Ng   return None;
1426acd3003SFangrui Song }
1436acd3003SFangrui Song 
144cf918c80SJez Ng const load_command *macho::findCommand(const mach_header_64 *hdr,
1456acd3003SFangrui Song                                        uint32_t type) {
1466acd3003SFangrui Song   const uint8_t *p =
1476acd3003SFangrui Song       reinterpret_cast<const uint8_t *>(hdr) + sizeof(mach_header_64);
1486acd3003SFangrui Song 
1496acd3003SFangrui Song   for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
1506acd3003SFangrui Song     auto *cmd = reinterpret_cast<const load_command *>(p);
1516acd3003SFangrui Song     if (cmd->cmd == type)
1526acd3003SFangrui Song       return cmd;
1536acd3003SFangrui Song     p += cmd->cmdsize;
1546acd3003SFangrui Song   }
1556acd3003SFangrui Song   return nullptr;
1566acd3003SFangrui Song }
1576acd3003SFangrui Song 
1587b007ac0SJez Ng void ObjFile::parseSections(ArrayRef<section_64> sections) {
1594eb6f485SJez Ng   subsections.reserve(sections.size());
1606acd3003SFangrui Song   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
1616acd3003SFangrui Song 
1626acd3003SFangrui Song   for (const section_64 &sec : sections) {
1636acd3003SFangrui Song     InputSection *isec = make<InputSection>();
1646acd3003SFangrui Song     isec->file = this;
165cf918c80SJez Ng     isec->name =
166cf918c80SJez Ng         StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname)));
167cf918c80SJez Ng     isec->segname =
168cf918c80SJez Ng         StringRef(sec.segname, strnlen(sec.segname, sizeof(sec.segname)));
16974871cdaSJez Ng     isec->data = {isZeroFill(sec.flags) ? nullptr : buf + sec.offset,
17074871cdaSJez Ng                   static_cast<size_t>(sec.size)};
1716acd3003SFangrui Song     if (sec.align >= 32)
1726acd3003SFangrui Song       error("alignment " + std::to_string(sec.align) + " of section " +
1736acd3003SFangrui Song             isec->name + " is too large");
1746acd3003SFangrui Song     else
1756acd3003SFangrui Song       isec->align = 1 << sec.align;
1766acd3003SFangrui Song     isec->flags = sec.flags;
177863f7a74SJez Ng 
178863f7a74SJez Ng     if (!(isDebugSection(isec->flags) &&
179863f7a74SJez Ng           isec->segname == segment_names::dwarf)) {
1804eb6f485SJez Ng       subsections.push_back({{0, isec}});
181863f7a74SJez Ng     } else {
182863f7a74SJez Ng       // Instead of emitting DWARF sections, we emit STABS symbols to the
183863f7a74SJez Ng       // object files that contain them. We filter them out early to avoid
184863f7a74SJez Ng       // parsing their relocations unnecessarily. But we must still push an
185863f7a74SJez Ng       // empty map to ensure the indices line up for the remaining sections.
186863f7a74SJez Ng       subsections.push_back({});
187863f7a74SJez Ng       debugSections.push_back(isec);
188863f7a74SJez Ng     }
1894eb6f485SJez Ng   }
1906acd3003SFangrui Song }
1916acd3003SFangrui Song 
1924eb6f485SJez Ng // Find the subsection corresponding to the greatest section offset that is <=
1934eb6f485SJez Ng // that of the given offset.
1944eb6f485SJez Ng //
1954eb6f485SJez Ng // offset: an offset relative to the start of the original InputSection (before
1964eb6f485SJez Ng // any subsection splitting has occurred). It will be updated to represent the
1974eb6f485SJez Ng // same location as an offset relative to the start of the containing
1984eb6f485SJez Ng // subsection.
1994eb6f485SJez Ng static InputSection *findContainingSubsection(SubsectionMap &map,
2004eb6f485SJez Ng                                               uint32_t *offset) {
2014eb6f485SJez Ng   auto it = std::prev(map.upper_bound(*offset));
2024eb6f485SJez Ng   *offset -= it->first;
2034eb6f485SJez Ng   return it->second;
2046acd3003SFangrui Song }
2056acd3003SFangrui Song 
2063a9d2f14SGreg McGary static bool validateRelocationInfo(MemoryBufferRef mb, const section_64 &sec,
2073a9d2f14SGreg McGary                                    relocation_info rel) {
2083a9d2f14SGreg McGary   const TargetInfo::RelocAttrs &relocAttrs = target->getRelocAttrs(rel.r_type);
2093a9d2f14SGreg McGary   bool valid = true;
2103a9d2f14SGreg McGary   auto message = [relocAttrs, mb, sec, rel, &valid](const Twine &diagnostic) {
2113a9d2f14SGreg McGary     valid = false;
2123a9d2f14SGreg McGary     return (relocAttrs.name + " relocation " + diagnostic + " at offset " +
2133a9d2f14SGreg McGary             std::to_string(rel.r_address) + " of " + sec.segname + "," +
2143a9d2f14SGreg McGary             sec.sectname + " in " + mb.getBufferIdentifier())
2153a9d2f14SGreg McGary         .str();
2163a9d2f14SGreg McGary   };
2173a9d2f14SGreg McGary 
2183a9d2f14SGreg McGary   if (!relocAttrs.hasAttr(RelocAttrBits::LOCAL) && !rel.r_extern)
2193a9d2f14SGreg McGary     error(message("must be extern"));
2203a9d2f14SGreg McGary   if (relocAttrs.hasAttr(RelocAttrBits::PCREL) != rel.r_pcrel)
2213a9d2f14SGreg McGary     error(message(Twine("must ") + (rel.r_pcrel ? "not " : "") +
2223a9d2f14SGreg McGary                   "be PC-relative"));
2233a9d2f14SGreg McGary   if (isThreadLocalVariables(sec.flags) &&
224*87104faaSGreg McGary       !relocAttrs.hasAttr(RelocAttrBits::TLV | RelocAttrBits::BYTE8))
2253a9d2f14SGreg McGary     error(message("not allowed in thread-local section, must be UNSIGNED"));
2263a9d2f14SGreg McGary   if (rel.r_length < 2 || rel.r_length > 3 ||
2273a9d2f14SGreg McGary       !relocAttrs.hasAttr(static_cast<RelocAttrBits>(1 << rel.r_length))) {
228*87104faaSGreg McGary     static SmallVector<StringRef, 4> widths{"0", "4", "8", "4 or 8"};
2293a9d2f14SGreg McGary     error(message("has width " + std::to_string(1 << rel.r_length) +
2303a9d2f14SGreg McGary                   " bytes, but must be " +
2313a9d2f14SGreg McGary                   widths[(static_cast<int>(relocAttrs.bits) >> 2) & 3] +
2323a9d2f14SGreg McGary                   " bytes"));
2333a9d2f14SGreg McGary   }
2343a9d2f14SGreg McGary   return valid;
2353a9d2f14SGreg McGary }
2363a9d2f14SGreg McGary 
2377b007ac0SJez Ng void ObjFile::parseRelocations(const section_64 &sec,
2384eb6f485SJez Ng                                SubsectionMap &subsecMap) {
2396acd3003SFangrui Song   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
240d4ec3346SGreg McGary   ArrayRef<relocation_info> relInfos(
241d4ec3346SGreg McGary       reinterpret_cast<const relocation_info *>(buf + sec.reloff), sec.nreloc);
2426acd3003SFangrui Song 
243d4ec3346SGreg McGary   for (size_t i = 0; i < relInfos.size(); i++) {
244d4ec3346SGreg McGary     // Paired relocations serve as Mach-O's method for attaching a
245d4ec3346SGreg McGary     // supplemental datum to a primary relocation record. ELF does not
246d4ec3346SGreg McGary     // need them because the *_RELOC_RELA records contain the extra
247d4ec3346SGreg McGary     // addend field, vs. *_RELOC_REL which omit the addend.
248d4ec3346SGreg McGary     //
249d4ec3346SGreg McGary     // The {X86_64,ARM64}_RELOC_SUBTRACTOR record holds the subtrahend,
250d4ec3346SGreg McGary     // and the paired *_RELOC_UNSIGNED record holds the minuend. The
2513a9d2f14SGreg McGary     // datum for each is a symbolic address. The result is the offset
2523a9d2f14SGreg McGary     // between two addresses.
253d4ec3346SGreg McGary     //
254d4ec3346SGreg McGary     // The ARM64_RELOC_ADDEND record holds the addend, and the paired
255d4ec3346SGreg McGary     // ARM64_RELOC_BRANCH26 or ARM64_RELOC_PAGE21/PAGEOFF12 holds the
256d4ec3346SGreg McGary     // base symbolic address.
257d4ec3346SGreg McGary     //
258d4ec3346SGreg McGary     // Note: X86 does not use *_RELOC_ADDEND because it can embed an
259d4ec3346SGreg McGary     // addend into the instruction stream. On X86, a relocatable address
260d4ec3346SGreg McGary     // field always occupies an entire contiguous sequence of byte(s),
261d4ec3346SGreg McGary     // so there is no need to merge opcode bits with address
262d4ec3346SGreg McGary     // bits. Therefore, it's easy and convenient to store addends in the
263d4ec3346SGreg McGary     // instruction-stream bytes that would otherwise contain zeroes. By
264d4ec3346SGreg McGary     // contrast, RISC ISAs such as ARM64 mix opcode bits with with
265d4ec3346SGreg McGary     // address bits so that bitwise arithmetic is necessary to extract
266d4ec3346SGreg McGary     // and insert them. Storing addends in the instruction stream is
267d4ec3346SGreg McGary     // possible, but inconvenient and more costly at link time.
268d4ec3346SGreg McGary 
2693a9d2f14SGreg McGary     uint64_t pairedAddend = 0;
2703a9d2f14SGreg McGary     relocation_info relInfo = relInfos[i];
2713a9d2f14SGreg McGary     if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) {
2723a9d2f14SGreg McGary       pairedAddend = SignExtend64<24>(relInfo.r_symbolnum);
2733a9d2f14SGreg McGary       relInfo = relInfos[++i];
2743a9d2f14SGreg McGary     }
275d4ec3346SGreg McGary     assert(i < relInfos.size());
2763a9d2f14SGreg McGary     if (!validateRelocationInfo(mb, sec, relInfo))
2773a9d2f14SGreg McGary       continue;
278d4ec3346SGreg McGary     if (relInfo.r_address & R_SCATTERED)
2794eb6f485SJez Ng       fatal("TODO: Scattered relocations not supported");
2803a9d2f14SGreg McGary     uint64_t embeddedAddend = target->getEmbeddedAddend(mb, sec, relInfo);
2813a9d2f14SGreg McGary     assert(!(embeddedAddend && pairedAddend));
2823a9d2f14SGreg McGary     uint64_t totalAddend = pairedAddend + embeddedAddend;
2834eb6f485SJez Ng 
2843a9d2f14SGreg McGary     Reloc p;
2853a9d2f14SGreg McGary     if (target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND)) {
2863a9d2f14SGreg McGary       p.type = relInfo.r_type;
2873a9d2f14SGreg McGary       p.referent = symbols[relInfo.r_symbolnum];
2883a9d2f14SGreg McGary       relInfo = relInfos[++i];
2893a9d2f14SGreg McGary     }
2904eb6f485SJez Ng     Reloc r;
2911a3ef041SGreg McGary     r.type = relInfo.r_type;
2921a3ef041SGreg McGary     r.pcrel = relInfo.r_pcrel;
2931a3ef041SGreg McGary     r.length = relInfo.r_length;
294d4ec3346SGreg McGary     r.offset = relInfo.r_address;
2951a3ef041SGreg McGary     if (relInfo.r_extern) {
2961a3ef041SGreg McGary       r.referent = symbols[relInfo.r_symbolnum];
2973a9d2f14SGreg McGary       r.addend = totalAddend;
298198b0c57SJez Ng     } else {
2991a3ef041SGreg McGary       SubsectionMap &referentSubsecMap = subsections[relInfo.r_symbolnum - 1];
3001a3ef041SGreg McGary       const section_64 &referentSec = sectionHeaders[relInfo.r_symbolnum - 1];
3011a3ef041SGreg McGary       uint32_t referentOffset;
3021a3ef041SGreg McGary       if (relInfo.r_pcrel) {
3034eb6f485SJez Ng         // The implicit addend for pcrel section relocations is the pcrel offset
304fcde378dSJez Ng         // in terms of the addresses in the input file. Here we adjust it so
3051a3ef041SGreg McGary         // that it describes the offset from the start of the referent section.
3064eb6f485SJez Ng         // TODO: The offset of 4 is probably not right for ARM64, nor for
3074eb6f485SJez Ng         //       relocations with r_length != 2.
3081a3ef041SGreg McGary         referentOffset =
3093a9d2f14SGreg McGary             sec.addr + relInfo.r_address + 4 + totalAddend - referentSec.addr;
310fcde378dSJez Ng       } else {
311fcde378dSJez Ng         // The addend for a non-pcrel relocation is its absolute address.
3123a9d2f14SGreg McGary         referentOffset = totalAddend - referentSec.addr;
313fcde378dSJez Ng       }
3141a3ef041SGreg McGary       r.referent = findContainingSubsection(referentSubsecMap, &referentOffset);
3151a3ef041SGreg McGary       r.addend = referentOffset;
3164eb6f485SJez Ng     }
3174eb6f485SJez Ng 
3186f6d9186SJez Ng     InputSection *subsec = findContainingSubsection(subsecMap, &r.offset);
3193a9d2f14SGreg McGary     if (p.type != GENERIC_RELOC_INVALID &&
3203a9d2f14SGreg McGary         target->hasAttr(p.type, RelocAttrBits::SUBTRAHEND))
3213a9d2f14SGreg McGary       subsec->relocs.push_back(p);
3224eb6f485SJez Ng     subsec->relocs.push_back(r);
3236acd3003SFangrui Song   }
3246acd3003SFangrui Song }
3254eb6f485SJez Ng 
32662a3f0c9SJez Ng static macho::Symbol *createDefined(const structs::nlist_64 &sym,
32762a3f0c9SJez Ng                                     StringRef name, InputSection *isec,
32862a3f0c9SJez Ng                                     uint32_t value) {
32913f439a1SNico Weber   // Symbol scope is determined by sym.n_type & (N_EXT | N_PEXT):
33013f439a1SNico Weber   // N_EXT: Global symbols
33113f439a1SNico Weber   // N_EXT | N_PEXT: Linkage unit (think: dylib) scoped
33213f439a1SNico Weber   // N_PEXT: Does not occur in input files in practice,
33313f439a1SNico Weber   //         a private extern must be external.
33413f439a1SNico Weber   // 0: Translation-unit scoped. These are not in the symbol table.
33513f439a1SNico Weber 
33613f439a1SNico Weber   if (sym.n_type & (N_EXT | N_PEXT)) {
33713f439a1SNico Weber     assert((sym.n_type & N_EXT) && "invalid input");
338163dcd85SJez Ng     return symtab->addDefined(name, isec->file, isec, value,
339163dcd85SJez Ng                               sym.n_desc & N_WEAK_DEF, sym.n_type & N_PEXT);
34013f439a1SNico Weber   }
341163dcd85SJez Ng   return make<Defined>(name, isec->file, isec, value, sym.n_desc & N_WEAK_DEF,
34213f439a1SNico Weber                        /*isExternal=*/false, /*isPrivateExtern=*/false);
34362a3f0c9SJez Ng }
34462a3f0c9SJez Ng 
34562a3f0c9SJez Ng // Absolute symbols are defined symbols that do not have an associated
34662a3f0c9SJez Ng // InputSection. They cannot be weak.
34762a3f0c9SJez Ng static macho::Symbol *createAbsolute(const structs::nlist_64 &sym,
348163dcd85SJez Ng                                      InputFile *file, StringRef name) {
34913f439a1SNico Weber   if (sym.n_type & (N_EXT | N_PEXT)) {
35013f439a1SNico Weber     assert((sym.n_type & N_EXT) && "invalid input");
351163dcd85SJez Ng     return symtab->addDefined(name, file, nullptr, sym.n_value,
352163dcd85SJez Ng                               /*isWeakDef=*/false, sym.n_type & N_PEXT);
35313f439a1SNico Weber   }
354163dcd85SJez Ng   return make<Defined>(name, file, nullptr, sym.n_value, /*isWeakDef=*/false,
35513f439a1SNico Weber                        /*isExternal=*/false, /*isPrivateExtern=*/false);
35662a3f0c9SJez Ng }
35762a3f0c9SJez Ng 
3587b007ac0SJez Ng macho::Symbol *ObjFile::parseNonSectionSymbol(const structs::nlist_64 &sym,
35962a3f0c9SJez Ng                                               StringRef name) {
36062a3f0c9SJez Ng   uint8_t type = sym.n_type & N_TYPE;
36162a3f0c9SJez Ng   switch (type) {
36262a3f0c9SJez Ng   case N_UNDF:
36362a3f0c9SJez Ng     return sym.n_value == 0
364163dcd85SJez Ng                ? symtab->addUndefined(name, this, sym.n_desc & N_WEAK_REF)
36562a3f0c9SJez Ng                : symtab->addCommon(name, this, sym.n_value,
36613f439a1SNico Weber                                    1 << GET_COMM_ALIGN(sym.n_desc),
36713f439a1SNico Weber                                    sym.n_type & N_PEXT);
36862a3f0c9SJez Ng   case N_ABS:
369163dcd85SJez Ng     return createAbsolute(sym, this, name);
37062a3f0c9SJez Ng   case N_PBUD:
37162a3f0c9SJez Ng   case N_INDR:
37262a3f0c9SJez Ng     error("TODO: support symbols of type " + std::to_string(type));
37362a3f0c9SJez Ng     return nullptr;
37462a3f0c9SJez Ng   case N_SECT:
37562a3f0c9SJez Ng     llvm_unreachable(
37662a3f0c9SJez Ng         "N_SECT symbols should not be passed to parseNonSectionSymbol");
37762a3f0c9SJez Ng   default:
37862a3f0c9SJez Ng     llvm_unreachable("invalid symbol type");
37962a3f0c9SJez Ng   }
38062a3f0c9SJez Ng }
38162a3f0c9SJez Ng 
3827b007ac0SJez Ng void ObjFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
3831e1a3f67SJez Ng                            const char *strtab, bool subsectionsViaSymbols) {
3844eb6f485SJez Ng   // resize(), not reserve(), because we are going to create N_ALT_ENTRY symbols
3854eb6f485SJez Ng   // out-of-sequence.
3864eb6f485SJez Ng   symbols.resize(nList.size());
3874eb6f485SJez Ng   std::vector<size_t> altEntrySymIdxs;
3884eb6f485SJez Ng 
3894eb6f485SJez Ng   for (size_t i = 0, n = nList.size(); i < n; ++i) {
3901e1a3f67SJez Ng     const structs::nlist_64 &sym = nList[i];
3914eb6f485SJez Ng     StringRef name = strtab + sym.n_strx;
39262a3f0c9SJez Ng 
39362a3f0c9SJez Ng     if ((sym.n_type & N_TYPE) != N_SECT) {
39462a3f0c9SJez Ng       symbols[i] = parseNonSectionSymbol(sym, name);
3954eb6f485SJez Ng       continue;
3964eb6f485SJez Ng     }
3974eb6f485SJez Ng 
3984eb6f485SJez Ng     const section_64 &sec = sectionHeaders[sym.n_sect - 1];
3994eb6f485SJez Ng     SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
400863f7a74SJez Ng     assert(!subsecMap.empty());
4014eb6f485SJez Ng     uint64_t offset = sym.n_value - sec.addr;
4024eb6f485SJez Ng 
4034eb6f485SJez Ng     // If the input file does not use subsections-via-symbols, all symbols can
4044eb6f485SJez Ng     // use the same subsection. Otherwise, we must split the sections along
4054eb6f485SJez Ng     // symbol boundaries.
4064eb6f485SJez Ng     if (!subsectionsViaSymbols) {
40762a3f0c9SJez Ng       symbols[i] = createDefined(sym, name, subsecMap[0], offset);
4084eb6f485SJez Ng       continue;
4094eb6f485SJez Ng     }
4104eb6f485SJez Ng 
4114eb6f485SJez Ng     // nList entries aren't necessarily arranged in address order. Therefore,
4124eb6f485SJez Ng     // we can't create alt-entry symbols at this point because a later symbol
4134eb6f485SJez Ng     // may split its section, which may affect which subsection the alt-entry
4144eb6f485SJez Ng     // symbol is assigned to. So we need to handle them in a second pass below.
4154eb6f485SJez Ng     if (sym.n_desc & N_ALT_ENTRY) {
4164eb6f485SJez Ng       altEntrySymIdxs.push_back(i);
4174eb6f485SJez Ng       continue;
4184eb6f485SJez Ng     }
4194eb6f485SJez Ng 
4204eb6f485SJez Ng     // Find the subsection corresponding to the greatest section offset that is
4214eb6f485SJez Ng     // <= that of the current symbol. The subsection that we find either needs
4224eb6f485SJez Ng     // to be used directly or split in two.
4234eb6f485SJez Ng     uint32_t firstSize = offset;
4244eb6f485SJez Ng     InputSection *firstIsec = findContainingSubsection(subsecMap, &firstSize);
4254eb6f485SJez Ng 
4264eb6f485SJez Ng     if (firstSize == 0) {
4274eb6f485SJez Ng       // Alias of an existing symbol, or the first symbol in the section. These
4284eb6f485SJez Ng       // are handled by reusing the existing section.
42962a3f0c9SJez Ng       symbols[i] = createDefined(sym, name, firstIsec, 0);
4304eb6f485SJez Ng       continue;
4314eb6f485SJez Ng     }
4324eb6f485SJez Ng 
4334eb6f485SJez Ng     // We saw a symbol definition at a new offset. Split the section into two
4344eb6f485SJez Ng     // subsections. The new symbol uses the second subsection.
4354eb6f485SJez Ng     auto *secondIsec = make<InputSection>(*firstIsec);
4364eb6f485SJez Ng     secondIsec->data = firstIsec->data.slice(firstSize);
4374eb6f485SJez Ng     firstIsec->data = firstIsec->data.slice(0, firstSize);
4384eb6f485SJez Ng     // TODO: ld64 appears to preserve the original alignment as well as each
4394eb6f485SJez Ng     // subsection's offset from the last aligned address. We should consider
4404eb6f485SJez Ng     // emulating that behavior.
4414eb6f485SJez Ng     secondIsec->align = MinAlign(firstIsec->align, offset);
4424eb6f485SJez Ng 
4434eb6f485SJez Ng     subsecMap[offset] = secondIsec;
4444eb6f485SJez Ng     // By construction, the symbol will be at offset zero in the new section.
44562a3f0c9SJez Ng     symbols[i] = createDefined(sym, name, secondIsec, 0);
4464eb6f485SJez Ng   }
4474eb6f485SJez Ng 
4484eb6f485SJez Ng   for (size_t idx : altEntrySymIdxs) {
4491e1a3f67SJez Ng     const structs::nlist_64 &sym = nList[idx];
45062a3f0c9SJez Ng     StringRef name = strtab + sym.n_strx;
4514eb6f485SJez Ng     SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
4524eb6f485SJez Ng     uint32_t off = sym.n_value - sectionHeaders[sym.n_sect - 1].addr;
4534eb6f485SJez Ng     InputSection *subsec = findContainingSubsection(subsecMap, &off);
45462a3f0c9SJez Ng     symbols[idx] = createDefined(sym, name, subsec, off);
4556acd3003SFangrui Song   }
4566acd3003SFangrui Song }
4576acd3003SFangrui Song 
458a379f2c2SGreg McGary OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
459a379f2c2SGreg McGary                        StringRef sectName)
460a379f2c2SGreg McGary     : InputFile(OpaqueKind, mb) {
461a379f2c2SGreg McGary   InputSection *isec = make<InputSection>();
462a379f2c2SGreg McGary   isec->file = this;
463a379f2c2SGreg McGary   isec->name = sectName.take_front(16);
464a379f2c2SGreg McGary   isec->segname = segName.take_front(16);
465a379f2c2SGreg McGary   const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
466a379f2c2SGreg McGary   isec->data = {buf, mb.getBufferSize()};
467a379f2c2SGreg McGary   subsections.push_back({{0, isec}});
468a379f2c2SGreg McGary }
469a379f2c2SGreg McGary 
470b2f00f24SNico Weber ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName)
471b768d57bSJez Ng     : InputFile(ObjKind, mb), modTime(modTime) {
472b2f00f24SNico Weber   this->archiveName = std::string(archiveName);
473b2f00f24SNico Weber 
4746acd3003SFangrui Song   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
4756acd3003SFangrui Song   auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart());
4766acd3003SFangrui Song 
47716b1f6e3SNico Weber   if (const load_command *cmd = findCommand(hdr, LC_LINKER_OPTION)) {
47816b1f6e3SNico Weber     auto *c = reinterpret_cast<const linker_option_command *>(cmd);
47916b1f6e3SNico Weber     StringRef data{reinterpret_cast<const char *>(c + 1),
48016b1f6e3SNico Weber                    c->cmdsize - sizeof(linker_option_command)};
48116b1f6e3SNico Weber     parseLCLinkerOption(this, c->count, data);
48216b1f6e3SNico Weber   }
48316b1f6e3SNico Weber 
4846acd3003SFangrui Song   if (const load_command *cmd = findCommand(hdr, LC_SEGMENT_64)) {
4856acd3003SFangrui Song     auto *c = reinterpret_cast<const segment_command_64 *>(cmd);
4864eb6f485SJez Ng     sectionHeaders = ArrayRef<section_64>{
4876acd3003SFangrui Song         reinterpret_cast<const section_64 *>(c + 1), c->nsects};
4884eb6f485SJez Ng     parseSections(sectionHeaders);
4896acd3003SFangrui Song   }
4906acd3003SFangrui Song 
491060efd24SJez Ng   // TODO: Error on missing LC_SYMTAB?
4926acd3003SFangrui Song   if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) {
4936acd3003SFangrui Song     auto *c = reinterpret_cast<const symtab_command *>(cmd);
4941e1a3f67SJez Ng     ArrayRef<structs::nlist_64> nList(
4951e1a3f67SJez Ng         reinterpret_cast<const structs::nlist_64 *>(buf + c->symoff), c->nsyms);
4964eb6f485SJez Ng     const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
4974eb6f485SJez Ng     bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS;
4984eb6f485SJez Ng     parseSymbols(nList, strtab, subsectionsViaSymbols);
4996acd3003SFangrui Song   }
5006acd3003SFangrui Song 
5016acd3003SFangrui Song   // The relocations may refer to the symbols, so we parse them after we have
5024eb6f485SJez Ng   // parsed all the symbols.
5034eb6f485SJez Ng   for (size_t i = 0, n = subsections.size(); i < n; ++i)
504863f7a74SJez Ng     if (!subsections[i].empty())
5054eb6f485SJez Ng       parseRelocations(sectionHeaders[i], subsections[i]);
5063fcb0eebSJez Ng 
5073fcb0eebSJez Ng   parseDebugInfo();
5083fcb0eebSJez Ng }
5093fcb0eebSJez Ng 
5103fcb0eebSJez Ng void ObjFile::parseDebugInfo() {
5113fcb0eebSJez Ng   std::unique_ptr<DwarfObject> dObj = DwarfObject::create(this);
5123fcb0eebSJez Ng   if (!dObj)
5133fcb0eebSJez Ng     return;
5143fcb0eebSJez Ng 
5153fcb0eebSJez Ng   auto *ctx = make<DWARFContext>(
5163fcb0eebSJez Ng       std::move(dObj), "",
517b2f00f24SNico Weber       [&](Error err) {
518b2f00f24SNico Weber         warn(toString(this) + ": " + toString(std::move(err)));
519b2f00f24SNico Weber       },
5203fcb0eebSJez Ng       [&](Error warning) {
521b2f00f24SNico Weber         warn(toString(this) + ": " + toString(std::move(warning)));
5223fcb0eebSJez Ng       });
5233fcb0eebSJez Ng 
5243fcb0eebSJez Ng   // TODO: Since object files can contain a lot of DWARF info, we should verify
5253fcb0eebSJez Ng   // that we are parsing just the info we need
5263fcb0eebSJez Ng   const DWARFContext::compile_unit_range &units = ctx->compile_units();
5273fcb0eebSJez Ng   auto it = units.begin();
5283fcb0eebSJez Ng   compileUnit = it->get();
5293fcb0eebSJez Ng   assert(std::next(it) == units.end());
5306acd3003SFangrui Song }
5316acd3003SFangrui Song 
5327394460dSJez Ng // The path can point to either a dylib or a .tbd file.
5337394460dSJez Ng static Optional<DylibFile *> loadDylib(StringRef path, DylibFile *umbrella) {
5347394460dSJez Ng   Optional<MemoryBufferRef> mbref = readFile(path);
5357394460dSJez Ng   if (!mbref) {
5367394460dSJez Ng     error("could not read dylib file at " + path);
5377394460dSJez Ng     return {};
5387394460dSJez Ng   }
53976c36c11SJez Ng   return loadDylib(*mbref, umbrella);
5407394460dSJez Ng }
5417394460dSJez Ng 
5427394460dSJez Ng // TBD files are parsed into a series of TAPI documents (InterfaceFiles), with
5437394460dSJez Ng // the first document storing child pointers to the rest of them. When we are
5447394460dSJez Ng // processing a given TBD file, we store that top-level document here. When
5457394460dSJez Ng // processing re-exports, we search its children for potentially matching
5467394460dSJez Ng // documents in the same TBD file. Note that the children themselves don't
5477394460dSJez Ng // point to further documents, i.e. this is a two-level tree.
5487394460dSJez Ng //
5497394460dSJez Ng // ld64 allows a TAPI re-export to reference documents nested within other TBD
5507394460dSJez Ng // files, but that seems like a strange design, so this is an intentional
5517394460dSJez Ng // deviation.
5527394460dSJez Ng const InterfaceFile *currentTopLevelTapi = nullptr;
5537394460dSJez Ng 
5547394460dSJez Ng // Re-exports can either refer to on-disk files, or to documents within .tbd
5557394460dSJez Ng // files.
5566a348f61SJez Ng static Optional<DylibFile *> loadReexportHelper(StringRef path,
5576a348f61SJez Ng                                                 DylibFile *umbrella) {
5587394460dSJez Ng   if (path::is_absolute(path, path::Style::posix))
5597394460dSJez Ng     for (StringRef root : config->systemLibraryRoots)
5607394460dSJez Ng       if (Optional<std::string> dylibPath =
5617394460dSJez Ng               resolveDylibPath((root + path).str()))
5627394460dSJez Ng         return loadDylib(*dylibPath, umbrella);
5637394460dSJez Ng 
5647394460dSJez Ng   // TODO: Expand @loader_path, @executable_path etc
5657394460dSJez Ng 
5669c702814SJez Ng   if (currentTopLevelTapi) {
5677394460dSJez Ng     for (InterfaceFile &child :
5687394460dSJez Ng          make_pointee_range(currentTopLevelTapi->documents())) {
5697394460dSJez Ng       if (path == child.getInstallName())
5707394460dSJez Ng         return make<DylibFile>(child, umbrella);
5717394460dSJez Ng       assert(child.documents().empty());
5727394460dSJez Ng     }
5737394460dSJez Ng   }
5747394460dSJez Ng 
5757394460dSJez Ng   if (Optional<std::string> dylibPath = resolveDylibPath(path))
5767394460dSJez Ng     return loadDylib(*dylibPath, umbrella);
5777394460dSJez Ng 
5787394460dSJez Ng   error("unable to locate re-export with install name " + path);
5797394460dSJez Ng   return {};
5807394460dSJez Ng }
5817394460dSJez Ng 
5826a348f61SJez Ng // If a re-exported dylib is public (lives in /usr/lib or
5836a348f61SJez Ng // /System/Library/Frameworks), then it is considered implicitly linked: we
5846a348f61SJez Ng // should bind to its symbols directly instead of via the re-exporting umbrella
5856a348f61SJez Ng // library.
5866a348f61SJez Ng static bool isImplicitlyLinked(StringRef path) {
5876a348f61SJez Ng   if (!config->implicitDylibs)
5886a348f61SJez Ng     return false;
5896a348f61SJez Ng 
5903aa8e071SJez Ng   if (path::parent_path(path) == "/usr/lib")
5913aa8e071SJez Ng     return true;
5923aa8e071SJez Ng 
5933aa8e071SJez Ng   // Match /System/Library/Frameworks/$FOO.framework/**/$FOO
5943aa8e071SJez Ng   if (path.consume_front("/System/Library/Frameworks/")) {
5953aa8e071SJez Ng     StringRef frameworkName = path.take_until([](char c) { return c == '.'; });
5963aa8e071SJez Ng     return path::filename(path) == frameworkName;
5973aa8e071SJez Ng   }
5983aa8e071SJez Ng 
5993aa8e071SJez Ng   return false;
6006a348f61SJez Ng }
6016a348f61SJez Ng 
60276c36c11SJez Ng void loadReexport(StringRef path, DylibFile *umbrella) {
6036a348f61SJez Ng   Optional<DylibFile *> reexport = loadReexportHelper(path, umbrella);
6046a348f61SJez Ng   if (reexport && isImplicitlyLinked(path))
605544148aeSJez Ng     inputFiles.insert(*reexport);
6066a348f61SJez Ng }
6076a348f61SJez Ng 
60887b6fd3eSJez Ng DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella)
6094c8276cdSJez Ng     : InputFile(DylibKind, mb), refState(RefState::Unreferenced) {
61087b6fd3eSJez Ng   if (umbrella == nullptr)
61187b6fd3eSJez Ng     umbrella = this;
61287b6fd3eSJez Ng 
613060efd24SJez Ng   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
614060efd24SJez Ng   auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart());
615060efd24SJez Ng 
616060efd24SJez Ng   // Initialize dylibName.
617060efd24SJez Ng   if (const load_command *cmd = findCommand(hdr, LC_ID_DYLIB)) {
618060efd24SJez Ng     auto *c = reinterpret_cast<const dylib_command *>(cmd);
619ec88746aSNico Weber     currentVersion = read32le(&c->dylib.current_version);
620ec88746aSNico Weber     compatibilityVersion = read32le(&c->dylib.compatibility_version);
621060efd24SJez Ng     dylibName = reinterpret_cast<const char *>(cmd) + read32le(&c->dylib.name);
622060efd24SJez Ng   } else {
623b2f00f24SNico Weber     error("dylib " + toString(this) + " missing LC_ID_DYLIB load command");
624060efd24SJez Ng     return;
625060efd24SJez Ng   }
626060efd24SJez Ng 
627060efd24SJez Ng   // Initialize symbols.
6286a348f61SJez Ng   DylibFile *exportingFile = isImplicitlyLinked(dylibName) ? this : umbrella;
6297bbdbacdSJez Ng   if (const load_command *cmd = findCommand(hdr, LC_DYLD_INFO_ONLY)) {
6307bbdbacdSJez Ng     auto *c = reinterpret_cast<const dyld_info_command *>(cmd);
6317bbdbacdSJez Ng     parseTrie(buf + c->export_off, c->export_size,
6327bbdbacdSJez Ng               [&](const Twine &name, uint64_t flags) {
63331d58858SJez Ng                 bool isWeakDef = flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
6343c9100fbSJez Ng                 bool isTlv = flags & EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL;
6356a348f61SJez Ng                 symbols.push_back(symtab->addDylib(
6366a348f61SJez Ng                     saver.save(name), exportingFile, isWeakDef, isTlv));
6377bbdbacdSJez Ng               });
6387bbdbacdSJez Ng   } else {
639b2f00f24SNico Weber     error("LC_DYLD_INFO_ONLY not found in " + toString(this));
64087b6fd3eSJez Ng     return;
64187b6fd3eSJez Ng   }
64287b6fd3eSJez Ng 
64387b6fd3eSJez Ng   if (hdr->flags & MH_NO_REEXPORTED_DYLIBS)
64487b6fd3eSJez Ng     return;
64587b6fd3eSJez Ng 
64687b6fd3eSJez Ng   const uint8_t *p =
64787b6fd3eSJez Ng       reinterpret_cast<const uint8_t *>(hdr) + sizeof(mach_header_64);
64887b6fd3eSJez Ng   for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
64987b6fd3eSJez Ng     auto *cmd = reinterpret_cast<const load_command *>(p);
65087b6fd3eSJez Ng     p += cmd->cmdsize;
65187b6fd3eSJez Ng     if (cmd->cmd != LC_REEXPORT_DYLIB)
65287b6fd3eSJez Ng       continue;
65387b6fd3eSJez Ng 
65487b6fd3eSJez Ng     auto *c = reinterpret_cast<const dylib_command *>(cmd);
65587b6fd3eSJez Ng     StringRef reexportPath =
65687b6fd3eSJez Ng         reinterpret_cast<const char *>(c) + read32le(&c->dylib.name);
65776c36c11SJez Ng     loadReexport(reexportPath, umbrella);
658060efd24SJez Ng   }
659060efd24SJez Ng }
660060efd24SJez Ng 
6617e6d6754SJez Ng DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella)
6624c8276cdSJez Ng     : InputFile(DylibKind, interface), refState(RefState::Unreferenced) {
6636fe27b5fSSaleem Abdulrasool   if (umbrella == nullptr)
6646fe27b5fSSaleem Abdulrasool     umbrella = this;
6656fe27b5fSSaleem Abdulrasool 
6667e6d6754SJez Ng   dylibName = saver.save(interface.getInstallName());
667ec88746aSNico Weber   compatibilityVersion = interface.getCompatibilityVersion().rawValue();
668ec88746aSNico Weber   currentVersion = interface.getCurrentVersion().rawValue();
6696a348f61SJez Ng   DylibFile *exportingFile = isImplicitlyLinked(dylibName) ? this : umbrella;
670a499898eSJez Ng   auto addSymbol = [&](const Twine &name) -> void {
6716a348f61SJez Ng     symbols.push_back(symtab->addDylib(saver.save(name), exportingFile,
672a499898eSJez Ng                                        /*isWeakDef=*/false,
673a499898eSJez Ng                                        /*isTlv=*/false));
674a499898eSJez Ng   };
6756fe27b5fSSaleem Abdulrasool   // TODO(compnerd) filter out symbols based on the target platform
6763c9100fbSJez Ng   // TODO: handle weak defs, thread locals
6777e6d6754SJez Ng   for (const auto symbol : interface.symbols()) {
678a499898eSJez Ng     if (!symbol->getArchitectures().has(config->arch))
679a499898eSJez Ng       continue;
680a499898eSJez Ng 
681a499898eSJez Ng     switch (symbol->getKind()) {
682a499898eSJez Ng     case SymbolKind::GlobalSymbol:
683a499898eSJez Ng       addSymbol(symbol->getName());
684a499898eSJez Ng       break;
685a499898eSJez Ng     case SymbolKind::ObjectiveCClass:
686a499898eSJez Ng       // XXX ld64 only creates these symbols when -ObjC is passed in. We may
687a499898eSJez Ng       // want to emulate that.
688cf918c80SJez Ng       addSymbol(objc::klass + symbol->getName());
689cf918c80SJez Ng       addSymbol(objc::metaclass + symbol->getName());
690a499898eSJez Ng       break;
691a499898eSJez Ng     case SymbolKind::ObjectiveCClassEHType:
692cf918c80SJez Ng       addSymbol(objc::ehtype + symbol->getName());
693a499898eSJez Ng       break;
694a499898eSJez Ng     case SymbolKind::ObjectiveCInstanceVariable:
695cf918c80SJez Ng       addSymbol(objc::ivar + symbol->getName());
696a499898eSJez Ng       break;
697a499898eSJez Ng     }
698a499898eSJez Ng   }
6997394460dSJez Ng 
7007394460dSJez Ng   bool isTopLevelTapi = false;
7017394460dSJez Ng   if (currentTopLevelTapi == nullptr) {
7027394460dSJez Ng     currentTopLevelTapi = &interface;
7037394460dSJez Ng     isTopLevelTapi = true;
7047394460dSJez Ng   }
7057394460dSJez Ng 
7067394460dSJez Ng   for (InterfaceFileRef intfRef : interface.reexportedLibraries())
70776c36c11SJez Ng     loadReexport(intfRef.getInstallName(), umbrella);
7087394460dSJez Ng 
7097394460dSJez Ng   if (isTopLevelTapi)
7107394460dSJez Ng     currentTopLevelTapi = nullptr;
7116fe27b5fSSaleem Abdulrasool }
7126fe27b5fSSaleem Abdulrasool 
713e98b441aSJez Ng ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f)
7142b920ae7SKellie Medlin     : InputFile(ArchiveKind, f->getMemoryBufferRef()), file(std::move(f)) {
7152b920ae7SKellie Medlin   for (const object::Archive::Symbol &sym : file->symbols())
7162b920ae7SKellie Medlin     symtab->addLazy(sym.getName(), this, sym);
7172b920ae7SKellie Medlin }
7182b920ae7SKellie Medlin 
7192b920ae7SKellie Medlin void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
7202b920ae7SKellie Medlin   object::Archive::Child c =
7212b920ae7SKellie Medlin       CHECK(sym.getMember(), toString(this) +
7222b920ae7SKellie Medlin                                  ": could not get the member for symbol " +
72307ab597bSNico Weber                                  toMachOString(sym));
7242b920ae7SKellie Medlin 
7252b920ae7SKellie Medlin   if (!seen.insert(c.getChildOffset()).second)
7262b920ae7SKellie Medlin     return;
7272b920ae7SKellie Medlin 
7282b920ae7SKellie Medlin   MemoryBufferRef mb =
7292b920ae7SKellie Medlin       CHECK(c.getMemoryBufferRef(),
7302b920ae7SKellie Medlin             toString(this) +
7312b920ae7SKellie Medlin                 ": could not get the buffer for the member defining symbol " +
73207ab597bSNico Weber                 toMachOString(sym));
733b768d57bSJez Ng 
734ca634393SNico Weber   if (tar && c.getParent()->isThin())
735ca634393SNico Weber     tar->append(relativeToRoot(CHECK(c.getFullName(), this)), mb.getBuffer());
736ca634393SNico Weber 
737b768d57bSJez Ng   uint32_t modTime = toTimeT(
738b768d57bSJez Ng       CHECK(c.getLastModified(), toString(this) +
739b768d57bSJez Ng                                      ": could not get the modification time "
740b768d57bSJez Ng                                      "for the member defining symbol " +
74107ab597bSNico Weber                                      toMachOString(sym)));
742b768d57bSJez Ng 
7433422f3ccSNico Weber   // `sym` is owned by a LazySym, which will be replace<>() by make<ObjFile>
7443422f3ccSNico Weber   // and become invalid after that call. Copy it to the stack so we can refer
7453422f3ccSNico Weber   // to it later.
7463422f3ccSNico Weber   const object::Archive::Symbol sym_copy = sym;
7473422f3ccSNico Weber 
748f843bb82SJez Ng   if (Optional<InputFile *> file =
749f843bb82SJez Ng           loadArchiveMember(mb, modTime, getName(), /*objCOnly=*/false)) {
750f843bb82SJez Ng     inputFiles.insert(*file);
7513422f3ccSNico Weber     // ld64 doesn't demangle sym here even with -demangle. Match that, so
7523422f3ccSNico Weber     // intentionally no call to toMachOString() here.
753f843bb82SJez Ng     printArchiveMemberLoad(sym_copy.getName(), *file);
754f843bb82SJez Ng   }
7552b920ae7SKellie Medlin }
7562b920ae7SKellie Medlin 
75721f83113SJez Ng BitcodeFile::BitcodeFile(MemoryBufferRef mbref)
75821f83113SJez Ng     : InputFile(BitcodeKind, mbref) {
75921f83113SJez Ng   obj = check(lto::InputFile::create(mbref));
76021f83113SJez Ng }
761