xref: /llvm-project-15.0.7/lld/MachO/Writer.cpp (revision 2ea7fb7b)
1 //===- Writer.cpp ---------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Writer.h"
10 #include "Config.h"
11 #include "InputFiles.h"
12 #include "InputSection.h"
13 #include "MapFile.h"
14 #include "MergedOutputSection.h"
15 #include "OutputSection.h"
16 #include "OutputSegment.h"
17 #include "SymbolTable.h"
18 #include "Symbols.h"
19 #include "SyntheticSections.h"
20 #include "Target.h"
21 #include "UnwindInfoSection.h"
22 
23 #include "lld/Common/Arrays.h"
24 #include "lld/Common/ErrorHandler.h"
25 #include "lld/Common/Memory.h"
26 #include "llvm/BinaryFormat/MachO.h"
27 #include "llvm/Config/llvm-config.h"
28 #include "llvm/Support/LEB128.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Support/Parallel.h"
31 #include "llvm/Support/Path.h"
32 #include "llvm/Support/TimeProfiler.h"
33 #include "llvm/Support/xxhash.h"
34 
35 #include <algorithm>
36 
37 using namespace llvm;
38 using namespace llvm::MachO;
39 using namespace llvm::sys;
40 using namespace lld;
41 using namespace lld::macho;
42 
43 namespace {
44 class LCUuid;
45 
46 class Writer {
47 public:
48   Writer() : buffer(errorHandler().outputBuffer) {}
49 
50   void scanRelocations();
51   void scanSymbols();
52   template <class LP> void createOutputSections();
53   template <class LP> void createLoadCommands();
54   void finalizeAddresses();
55   void finalizeLinkEditSegment();
56   void assignAddresses(OutputSegment *);
57 
58   void openFile();
59   void writeSections();
60   void writeUuid();
61   void writeCodeSignature();
62   void writeOutputFile();
63 
64   template <class LP> void run();
65 
66   std::unique_ptr<FileOutputBuffer> &buffer;
67   uint64_t addr = 0;
68   uint64_t fileOff = 0;
69   MachHeaderSection *header = nullptr;
70   StringTableSection *stringTableSection = nullptr;
71   SymtabSection *symtabSection = nullptr;
72   IndirectSymtabSection *indirectSymtabSection = nullptr;
73   CodeSignatureSection *codeSignatureSection = nullptr;
74   FunctionStartsSection *functionStartsSection = nullptr;
75 
76   LCUuid *uuidCommand = nullptr;
77   OutputSegment *linkEditSegment = nullptr;
78 };
79 
80 // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
81 class LCDyldInfo : public LoadCommand {
82 public:
83   LCDyldInfo(RebaseSection *rebaseSection, BindingSection *bindingSection,
84              WeakBindingSection *weakBindingSection,
85              LazyBindingSection *lazyBindingSection,
86              ExportSection *exportSection)
87       : rebaseSection(rebaseSection), bindingSection(bindingSection),
88         weakBindingSection(weakBindingSection),
89         lazyBindingSection(lazyBindingSection), exportSection(exportSection) {}
90 
91   uint32_t getSize() const override { return sizeof(dyld_info_command); }
92 
93   void writeTo(uint8_t *buf) const override {
94     auto *c = reinterpret_cast<dyld_info_command *>(buf);
95     c->cmd = LC_DYLD_INFO_ONLY;
96     c->cmdsize = getSize();
97     if (rebaseSection->isNeeded()) {
98       c->rebase_off = rebaseSection->fileOff;
99       c->rebase_size = rebaseSection->getFileSize();
100     }
101     if (bindingSection->isNeeded()) {
102       c->bind_off = bindingSection->fileOff;
103       c->bind_size = bindingSection->getFileSize();
104     }
105     if (weakBindingSection->isNeeded()) {
106       c->weak_bind_off = weakBindingSection->fileOff;
107       c->weak_bind_size = weakBindingSection->getFileSize();
108     }
109     if (lazyBindingSection->isNeeded()) {
110       c->lazy_bind_off = lazyBindingSection->fileOff;
111       c->lazy_bind_size = lazyBindingSection->getFileSize();
112     }
113     if (exportSection->isNeeded()) {
114       c->export_off = exportSection->fileOff;
115       c->export_size = exportSection->getFileSize();
116     }
117   }
118 
119   RebaseSection *rebaseSection;
120   BindingSection *bindingSection;
121   WeakBindingSection *weakBindingSection;
122   LazyBindingSection *lazyBindingSection;
123   ExportSection *exportSection;
124 };
125 
126 class LCFunctionStarts : public LoadCommand {
127 public:
128   explicit LCFunctionStarts(FunctionStartsSection *functionStartsSection)
129       : functionStartsSection(functionStartsSection) {}
130 
131   uint32_t getSize() const override { return sizeof(linkedit_data_command); }
132 
133   void writeTo(uint8_t *buf) const override {
134     auto *c = reinterpret_cast<linkedit_data_command *>(buf);
135     c->cmd = LC_FUNCTION_STARTS;
136     c->cmdsize = getSize();
137     c->dataoff = functionStartsSection->fileOff;
138     c->datasize = functionStartsSection->getFileSize();
139   }
140 
141 private:
142   FunctionStartsSection *functionStartsSection;
143 };
144 
145 class LCDysymtab : public LoadCommand {
146 public:
147   LCDysymtab(SymtabSection *symtabSection,
148              IndirectSymtabSection *indirectSymtabSection)
149       : symtabSection(symtabSection),
150         indirectSymtabSection(indirectSymtabSection) {}
151 
152   uint32_t getSize() const override { return sizeof(dysymtab_command); }
153 
154   void writeTo(uint8_t *buf) const override {
155     auto *c = reinterpret_cast<dysymtab_command *>(buf);
156     c->cmd = LC_DYSYMTAB;
157     c->cmdsize = getSize();
158 
159     c->ilocalsym = 0;
160     c->iextdefsym = c->nlocalsym = symtabSection->getNumLocalSymbols();
161     c->nextdefsym = symtabSection->getNumExternalSymbols();
162     c->iundefsym = c->iextdefsym + c->nextdefsym;
163     c->nundefsym = symtabSection->getNumUndefinedSymbols();
164 
165     c->indirectsymoff = indirectSymtabSection->fileOff;
166     c->nindirectsyms = indirectSymtabSection->getNumSymbols();
167   }
168 
169   SymtabSection *symtabSection;
170   IndirectSymtabSection *indirectSymtabSection;
171 };
172 
173 template <class LP> class LCSegment : public LoadCommand {
174 public:
175   LCSegment(StringRef name, OutputSegment *seg) : name(name), seg(seg) {}
176 
177   uint32_t getSize() const override {
178     return sizeof(typename LP::segment_command) +
179            seg->numNonHiddenSections() * sizeof(typename LP::section);
180   }
181 
182   void writeTo(uint8_t *buf) const override {
183     using SegmentCommand = typename LP::segment_command;
184     using Section = typename LP::section;
185 
186     auto *c = reinterpret_cast<SegmentCommand *>(buf);
187     buf += sizeof(SegmentCommand);
188 
189     c->cmd = LP::segmentLCType;
190     c->cmdsize = getSize();
191     memcpy(c->segname, name.data(), name.size());
192     c->fileoff = seg->fileOff;
193     c->maxprot = seg->maxProt;
194     c->initprot = seg->initProt;
195 
196     if (seg->getSections().empty())
197       return;
198 
199     c->vmaddr = seg->firstSection()->addr;
200     c->vmsize = seg->vmSize;
201     c->filesize = seg->fileSize;
202     c->nsects = seg->numNonHiddenSections();
203 
204     for (const OutputSection *osec : seg->getSections()) {
205       if (osec->isHidden())
206         continue;
207 
208       auto *sectHdr = reinterpret_cast<Section *>(buf);
209       buf += sizeof(Section);
210 
211       memcpy(sectHdr->sectname, osec->name.data(), osec->name.size());
212       memcpy(sectHdr->segname, name.data(), name.size());
213 
214       sectHdr->addr = osec->addr;
215       sectHdr->offset = osec->fileOff;
216       sectHdr->align = Log2_32(osec->align);
217       sectHdr->flags = osec->flags;
218       sectHdr->size = osec->getSize();
219       sectHdr->reserved1 = osec->reserved1;
220       sectHdr->reserved2 = osec->reserved2;
221     }
222   }
223 
224 private:
225   StringRef name;
226   OutputSegment *seg;
227 };
228 
229 class LCMain : public LoadCommand {
230   uint32_t getSize() const override {
231     return sizeof(structs::entry_point_command);
232   }
233 
234   void writeTo(uint8_t *buf) const override {
235     auto *c = reinterpret_cast<structs::entry_point_command *>(buf);
236     c->cmd = LC_MAIN;
237     c->cmdsize = getSize();
238 
239     if (config->entry->isInStubs())
240       c->entryoff =
241           in.stubs->fileOff + config->entry->stubsIndex * target->stubSize;
242     else
243       c->entryoff = config->entry->getFileOffset();
244 
245     c->stacksize = 0;
246   }
247 };
248 
249 class LCSymtab : public LoadCommand {
250 public:
251   LCSymtab(SymtabSection *symtabSection, StringTableSection *stringTableSection)
252       : symtabSection(symtabSection), stringTableSection(stringTableSection) {}
253 
254   uint32_t getSize() const override { return sizeof(symtab_command); }
255 
256   void writeTo(uint8_t *buf) const override {
257     auto *c = reinterpret_cast<symtab_command *>(buf);
258     c->cmd = LC_SYMTAB;
259     c->cmdsize = getSize();
260     c->symoff = symtabSection->fileOff;
261     c->nsyms = symtabSection->getNumSymbols();
262     c->stroff = stringTableSection->fileOff;
263     c->strsize = stringTableSection->getFileSize();
264   }
265 
266   SymtabSection *symtabSection = nullptr;
267   StringTableSection *stringTableSection = nullptr;
268 };
269 
270 // There are several dylib load commands that share the same structure:
271 //   * LC_LOAD_DYLIB
272 //   * LC_ID_DYLIB
273 //   * LC_REEXPORT_DYLIB
274 class LCDylib : public LoadCommand {
275 public:
276   LCDylib(LoadCommandType type, StringRef path,
277           uint32_t compatibilityVersion = 0, uint32_t currentVersion = 0)
278       : type(type), path(path), compatibilityVersion(compatibilityVersion),
279         currentVersion(currentVersion) {
280     instanceCount++;
281   }
282 
283   uint32_t getSize() const override {
284     return alignTo(sizeof(dylib_command) + path.size() + 1, 8);
285   }
286 
287   void writeTo(uint8_t *buf) const override {
288     auto *c = reinterpret_cast<dylib_command *>(buf);
289     buf += sizeof(dylib_command);
290 
291     c->cmd = type;
292     c->cmdsize = getSize();
293     c->dylib.name = sizeof(dylib_command);
294     c->dylib.timestamp = 0;
295     c->dylib.compatibility_version = compatibilityVersion;
296     c->dylib.current_version = currentVersion;
297 
298     memcpy(buf, path.data(), path.size());
299     buf[path.size()] = '\0';
300   }
301 
302   static uint32_t getInstanceCount() { return instanceCount; }
303 
304 private:
305   LoadCommandType type;
306   StringRef path;
307   uint32_t compatibilityVersion;
308   uint32_t currentVersion;
309   static uint32_t instanceCount;
310 };
311 
312 uint32_t LCDylib::instanceCount = 0;
313 
314 class LCLoadDylinker : public LoadCommand {
315 public:
316   uint32_t getSize() const override {
317     return alignTo(sizeof(dylinker_command) + path.size() + 1, 8);
318   }
319 
320   void writeTo(uint8_t *buf) const override {
321     auto *c = reinterpret_cast<dylinker_command *>(buf);
322     buf += sizeof(dylinker_command);
323 
324     c->cmd = LC_LOAD_DYLINKER;
325     c->cmdsize = getSize();
326     c->name = sizeof(dylinker_command);
327 
328     memcpy(buf, path.data(), path.size());
329     buf[path.size()] = '\0';
330   }
331 
332 private:
333   // Recent versions of Darwin won't run any binary that has dyld at a
334   // different location.
335   const StringRef path = "/usr/lib/dyld";
336 };
337 
338 class LCRPath : public LoadCommand {
339 public:
340   explicit LCRPath(StringRef path) : path(path) {}
341 
342   uint32_t getSize() const override {
343     return alignTo(sizeof(rpath_command) + path.size() + 1, target->wordSize);
344   }
345 
346   void writeTo(uint8_t *buf) const override {
347     auto *c = reinterpret_cast<rpath_command *>(buf);
348     buf += sizeof(rpath_command);
349 
350     c->cmd = LC_RPATH;
351     c->cmdsize = getSize();
352     c->path = sizeof(rpath_command);
353 
354     memcpy(buf, path.data(), path.size());
355     buf[path.size()] = '\0';
356   }
357 
358 private:
359   StringRef path;
360 };
361 
362 static uint32_t encodeVersion(const VersionTuple &version) {
363   return ((version.getMajor() << 020) |
364           (version.getMinor().getValueOr(0) << 010) |
365           version.getSubminor().getValueOr(0));
366 }
367 
368 class LCMinVersion : public LoadCommand {
369 public:
370   explicit LCMinVersion(const PlatformInfo &platformInfo)
371       : platformInfo(platformInfo) {}
372 
373   uint32_t getSize() const override { return sizeof(version_min_command); }
374 
375   void writeTo(uint8_t *buf) const override {
376     auto *c = reinterpret_cast<version_min_command *>(buf);
377     switch (platformInfo.target.Platform) {
378     case PlatformKind::macOS:
379       c->cmd = LC_VERSION_MIN_MACOSX;
380       break;
381     case PlatformKind::iOS:
382     case PlatformKind::iOSSimulator:
383       c->cmd = LC_VERSION_MIN_IPHONEOS;
384       break;
385     case PlatformKind::tvOS:
386     case PlatformKind::tvOSSimulator:
387       c->cmd = LC_VERSION_MIN_TVOS;
388       break;
389     case PlatformKind::watchOS:
390     case PlatformKind::watchOSSimulator:
391       c->cmd = LC_VERSION_MIN_WATCHOS;
392       break;
393     default:
394       llvm_unreachable("invalid platform");
395       break;
396     }
397     c->cmdsize = getSize();
398     c->version = encodeVersion(platformInfo.minimum);
399     c->sdk = encodeVersion(platformInfo.sdk);
400   }
401 
402 private:
403   const PlatformInfo &platformInfo;
404 };
405 
406 class LCBuildVersion : public LoadCommand {
407 public:
408   explicit LCBuildVersion(const PlatformInfo &platformInfo)
409       : platformInfo(platformInfo) {}
410 
411   const int ntools = 1;
412 
413   uint32_t getSize() const override {
414     return sizeof(build_version_command) + ntools * sizeof(build_tool_version);
415   }
416 
417   void writeTo(uint8_t *buf) const override {
418     auto *c = reinterpret_cast<build_version_command *>(buf);
419     c->cmd = LC_BUILD_VERSION;
420     c->cmdsize = getSize();
421     c->platform = static_cast<uint32_t>(platformInfo.target.Platform);
422     c->minos = encodeVersion(platformInfo.minimum);
423     c->sdk = encodeVersion(platformInfo.sdk);
424     c->ntools = ntools;
425     auto *t = reinterpret_cast<build_tool_version *>(&c[1]);
426     t->tool = TOOL_LD;
427     t->version = encodeVersion(llvm::VersionTuple(
428         LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH));
429   }
430 
431 private:
432   const PlatformInfo &platformInfo;
433 };
434 
435 // Stores a unique identifier for the output file based on an MD5 hash of its
436 // contents. In order to hash the contents, we must first write them, but
437 // LC_UUID itself must be part of the written contents in order for all the
438 // offsets to be calculated correctly. We resolve this circular paradox by
439 // first writing an LC_UUID with an all-zero UUID, then updating the UUID with
440 // its real value later.
441 class LCUuid : public LoadCommand {
442 public:
443   uint32_t getSize() const override { return sizeof(uuid_command); }
444 
445   void writeTo(uint8_t *buf) const override {
446     auto *c = reinterpret_cast<uuid_command *>(buf);
447     c->cmd = LC_UUID;
448     c->cmdsize = getSize();
449     uuidBuf = c->uuid;
450   }
451 
452   void writeUuid(uint64_t digest) const {
453     // xxhash only gives us 8 bytes, so put some fixed data in the other half.
454     static_assert(sizeof(uuid_command::uuid) == 16, "unexpected uuid size");
455     memcpy(uuidBuf, "LLD\xa1UU1D", 8);
456     memcpy(uuidBuf + 8, &digest, 8);
457 
458     // RFC 4122 conformance. We need to fix 4 bits in byte 6 and 2 bits in
459     // byte 8. Byte 6 is already fine due to the fixed data we put in. We don't
460     // want to lose bits of the digest in byte 8, so swap that with a byte of
461     // fixed data that happens to have the right bits set.
462     std::swap(uuidBuf[3], uuidBuf[8]);
463 
464     // Claim that this is an MD5-based hash. It isn't, but this signals that
465     // this is not a time-based and not a random hash. MD5 seems like the least
466     // bad lie we can put here.
467     assert((uuidBuf[6] & 0xf0) == 0x30 && "See RFC 4122 Sections 4.2.2, 4.1.3");
468     assert((uuidBuf[8] & 0xc0) == 0x80 && "See RFC 4122 Section 4.2.2");
469   }
470 
471   mutable uint8_t *uuidBuf;
472 };
473 
474 template <class LP> class LCEncryptionInfo : public LoadCommand {
475 public:
476   uint32_t getSize() const override {
477     return sizeof(typename LP::encryption_info_command);
478   }
479 
480   void writeTo(uint8_t *buf) const override {
481     using EncryptionInfo = typename LP::encryption_info_command;
482     auto *c = reinterpret_cast<EncryptionInfo *>(buf);
483     buf += sizeof(EncryptionInfo);
484     c->cmd = LP::encryptionInfoLCType;
485     c->cmdsize = getSize();
486     c->cryptoff = in.header->getSize();
487     auto it = find_if(outputSegments, [](const OutputSegment *seg) {
488       return seg->name == segment_names::text;
489     });
490     assert(it != outputSegments.end());
491     c->cryptsize = (*it)->fileSize - c->cryptoff;
492   }
493 };
494 
495 class LCCodeSignature : public LoadCommand {
496 public:
497   LCCodeSignature(CodeSignatureSection *section) : section(section) {}
498 
499   uint32_t getSize() const override { return sizeof(linkedit_data_command); }
500 
501   void writeTo(uint8_t *buf) const override {
502     auto *c = reinterpret_cast<linkedit_data_command *>(buf);
503     c->cmd = LC_CODE_SIGNATURE;
504     c->cmdsize = getSize();
505     c->dataoff = static_cast<uint32_t>(section->fileOff);
506     c->datasize = section->getSize();
507   }
508 
509   CodeSignatureSection *section;
510 };
511 
512 } // namespace
513 
514 // Adds stubs and bindings where necessary (e.g. if the symbol is a
515 // DylibSymbol.)
516 static void prepareBranchTarget(Symbol *sym) {
517   if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
518     if (in.stubs->addEntry(dysym)) {
519       if (sym->isWeakDef()) {
520         in.binding->addEntry(dysym, in.lazyPointers->isec,
521                              sym->stubsIndex * target->wordSize);
522         in.weakBinding->addEntry(sym, in.lazyPointers->isec,
523                                  sym->stubsIndex * target->wordSize);
524       } else {
525         in.lazyBinding->addEntry(dysym);
526       }
527     }
528   } else if (auto *defined = dyn_cast<Defined>(sym)) {
529     if (defined->isExternalWeakDef()) {
530       if (in.stubs->addEntry(sym)) {
531         in.rebase->addEntry(in.lazyPointers->isec,
532                             sym->stubsIndex * target->wordSize);
533         in.weakBinding->addEntry(sym, in.lazyPointers->isec,
534                                  sym->stubsIndex * target->wordSize);
535       }
536     }
537   }
538 }
539 
540 // Can a symbol's address can only be resolved at runtime?
541 static bool needsBinding(const Symbol *sym) {
542   if (isa<DylibSymbol>(sym))
543     return true;
544   if (const auto *defined = dyn_cast<Defined>(sym))
545     return defined->isExternalWeakDef();
546   return false;
547 }
548 
549 static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec,
550                                     const Reloc &r) {
551   const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type);
552 
553   if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) {
554     prepareBranchTarget(sym);
555   } else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) {
556     if (relocAttrs.hasAttr(RelocAttrBits::POINTER) || needsBinding(sym))
557       in.got->addEntry(sym);
558   } else if (relocAttrs.hasAttr(RelocAttrBits::TLV)) {
559     if (needsBinding(sym))
560       in.tlvPointers->addEntry(sym);
561   } else if (relocAttrs.hasAttr(RelocAttrBits::UNSIGNED)) {
562     // References from thread-local variable sections are treated as offsets
563     // relative to the start of the referent section, and therefore have no
564     // need of rebase opcodes.
565     if (!(isThreadLocalVariables(isec->flags) && isa<Defined>(sym)))
566       addNonLazyBindingEntries(sym, isec, r.offset, r.addend);
567   }
568 }
569 
570 void Writer::scanRelocations() {
571   TimeTraceScope timeScope("Scan relocations");
572   for (InputSection *isec : inputSections) {
573     if (isec->segname == segment_names::ld) {
574       in.unwindInfo->prepareRelocations(isec);
575       continue;
576     }
577 
578     for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) {
579       Reloc &r = *it;
580       if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) {
581         // Skip over the following UNSIGNED relocation -- it's just there as the
582         // minuend, and doesn't have the usual UNSIGNED semantics. We don't want
583         // to emit rebase opcodes for it.
584         it = std::next(it);
585         continue;
586       }
587       if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
588         if (auto *undefined = dyn_cast<Undefined>(sym))
589           treatUndefinedSymbol(*undefined);
590         // treatUndefinedSymbol() can replace sym with a DylibSymbol; re-check.
591         if (!isa<Undefined>(sym) && validateSymbolRelocation(sym, isec, r))
592           prepareSymbolRelocation(sym, isec, r);
593       } else {
594         assert(r.referent.is<InputSection *>());
595         if (!r.pcrel)
596           in.rebase->addEntry(isec, r.offset);
597       }
598     }
599   }
600 }
601 
602 void Writer::scanSymbols() {
603   TimeTraceScope timeScope("Scan symbols");
604   for (const Symbol *sym : symtab->getSymbols()) {
605     if (const auto *defined = dyn_cast<Defined>(sym)) {
606       if (defined->overridesWeakDef)
607         in.weakBinding->addNonWeakDefinition(defined);
608     } else if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
609       if (dysym->isDynamicLookup())
610         continue;
611       dysym->getFile()->refState =
612           std::max(dysym->getFile()->refState, dysym->refState);
613     }
614   }
615 }
616 
617 // TODO: ld64 enforces the old load commands in a few other cases.
618 static bool useLCBuildVersion(const PlatformInfo &platformInfo) {
619   static const std::map<PlatformKind, llvm::VersionTuple> minVersion = {
620       {PlatformKind::macOS, llvm::VersionTuple(10, 14)},
621       {PlatformKind::iOS, llvm::VersionTuple(12, 0)},
622       {PlatformKind::iOSSimulator, llvm::VersionTuple(13, 0)},
623       {PlatformKind::tvOS, llvm::VersionTuple(12, 0)},
624       {PlatformKind::tvOSSimulator, llvm::VersionTuple(13, 0)},
625       {PlatformKind::watchOS, llvm::VersionTuple(5, 0)},
626       {PlatformKind::watchOSSimulator, llvm::VersionTuple(6, 0)}};
627   auto it = minVersion.find(platformInfo.target.Platform);
628   return it == minVersion.end() ? true : platformInfo.minimum >= it->second;
629 }
630 
631 template <class LP> void Writer::createLoadCommands() {
632   uint8_t segIndex = 0;
633   for (OutputSegment *seg : outputSegments) {
634     in.header->addLoadCommand(make<LCSegment<LP>>(seg->name, seg));
635     seg->index = segIndex++;
636   }
637 
638   in.header->addLoadCommand(make<LCDyldInfo>(
639       in.rebase, in.binding, in.weakBinding, in.lazyBinding, in.exports));
640   in.header->addLoadCommand(make<LCSymtab>(symtabSection, stringTableSection));
641   in.header->addLoadCommand(
642       make<LCDysymtab>(symtabSection, indirectSymtabSection));
643   if (functionStartsSection)
644     in.header->addLoadCommand(make<LCFunctionStarts>(functionStartsSection));
645   if (config->emitEncryptionInfo)
646     in.header->addLoadCommand(make<LCEncryptionInfo<LP>>());
647   for (StringRef path : config->runtimePaths)
648     in.header->addLoadCommand(make<LCRPath>(path));
649 
650   switch (config->outputType) {
651   case MH_EXECUTE:
652     in.header->addLoadCommand(make<LCLoadDylinker>());
653     in.header->addLoadCommand(make<LCMain>());
654     break;
655   case MH_DYLIB:
656     in.header->addLoadCommand(make<LCDylib>(LC_ID_DYLIB, config->installName,
657                                             config->dylibCompatibilityVersion,
658                                             config->dylibCurrentVersion));
659     break;
660   case MH_BUNDLE:
661     break;
662   default:
663     llvm_unreachable("unhandled output file type");
664   }
665 
666   uuidCommand = make<LCUuid>();
667   in.header->addLoadCommand(uuidCommand);
668 
669   if (useLCBuildVersion(config->platformInfo))
670     in.header->addLoadCommand(make<LCBuildVersion>(config->platformInfo));
671   else
672     in.header->addLoadCommand(make<LCMinVersion>(config->platformInfo));
673 
674   int64_t dylibOrdinal = 1;
675   for (InputFile *file : inputFiles) {
676     if (auto *dylibFile = dyn_cast<DylibFile>(file)) {
677       if (dylibFile->isBundleLoader) {
678         dylibFile->ordinal = BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE;
679         // Shortcut since bundle-loader does not re-export the symbols.
680 
681         dylibFile->reexport = false;
682         continue;
683       }
684 
685       dylibFile->ordinal = dylibOrdinal++;
686       LoadCommandType lcType =
687           dylibFile->forceWeakImport || dylibFile->refState == RefState::Weak
688               ? LC_LOAD_WEAK_DYLIB
689               : LC_LOAD_DYLIB;
690       in.header->addLoadCommand(make<LCDylib>(lcType, dylibFile->dylibName,
691                                               dylibFile->compatibilityVersion,
692                                               dylibFile->currentVersion));
693 
694       if (dylibFile->reexport)
695         in.header->addLoadCommand(
696             make<LCDylib>(LC_REEXPORT_DYLIB, dylibFile->dylibName));
697     }
698   }
699 
700   if (codeSignatureSection)
701     in.header->addLoadCommand(make<LCCodeSignature>(codeSignatureSection));
702 
703   const uint32_t MACOS_MAXPATHLEN = 1024;
704   config->headerPad = std::max(
705       config->headerPad, (config->headerPadMaxInstallNames
706                               ? LCDylib::getInstanceCount() * MACOS_MAXPATHLEN
707                               : 0));
708 }
709 
710 static size_t getSymbolPriority(const SymbolPriorityEntry &entry,
711                                 const InputFile *f) {
712   // We don't use toString(InputFile *) here because it returns the full path
713   // for object files, and we only want the basename.
714   StringRef filename;
715   if (f->archiveName.empty())
716     filename = path::filename(f->getName());
717   else
718     filename = saver.save(path::filename(f->archiveName) + "(" +
719                           path::filename(f->getName()) + ")");
720   return std::max(entry.objectFiles.lookup(filename), entry.anyObjectFile);
721 }
722 
723 // Each section gets assigned the priority of the highest-priority symbol it
724 // contains.
725 static DenseMap<const InputSection *, size_t> buildInputSectionPriorities() {
726   DenseMap<const InputSection *, size_t> sectionPriorities;
727 
728   if (config->priorities.empty())
729     return sectionPriorities;
730 
731   auto addSym = [&](Defined &sym) {
732     auto it = config->priorities.find(sym.getName());
733     if (it == config->priorities.end())
734       return;
735 
736     SymbolPriorityEntry &entry = it->second;
737     size_t &priority = sectionPriorities[sym.isec];
738     priority = std::max(priority, getSymbolPriority(entry, sym.isec->file));
739   };
740 
741   // TODO: Make sure this handles weak symbols correctly.
742   for (const InputFile *file : inputFiles) {
743     if (isa<ObjFile>(file))
744       for (Symbol *sym : file->symbols)
745         if (auto *d = dyn_cast<Defined>(sym))
746           addSym(*d);
747   }
748 
749   return sectionPriorities;
750 }
751 
752 static int segmentOrder(OutputSegment *seg) {
753   return StringSwitch<int>(seg->name)
754       .Case(segment_names::pageZero, -4)
755       .Case(segment_names::text, -3)
756       .Case(segment_names::dataConst, -2)
757       .Case(segment_names::data, -1)
758       .Case(segment_names::llvm, std::numeric_limits<int>::max() - 1)
759       // Make sure __LINKEDIT is the last segment (i.e. all its hidden
760       // sections must be ordered after other sections).
761       .Case(segment_names::linkEdit, std::numeric_limits<int>::max())
762       .Default(0);
763 }
764 
765 static int sectionOrder(OutputSection *osec) {
766   StringRef segname = osec->parent->name;
767   // Sections are uniquely identified by their segment + section name.
768   if (segname == segment_names::text) {
769     return StringSwitch<int>(osec->name)
770         .Case(section_names::header, -4)
771         .Case(section_names::text, -3)
772         .Case(section_names::stubs, -2)
773         .Case(section_names::stubHelper, -1)
774         .Case(section_names::unwindInfo, std::numeric_limits<int>::max() - 1)
775         .Case(section_names::ehFrame, std::numeric_limits<int>::max())
776         .Default(0);
777   } else if (segname == segment_names::data) {
778     // For each thread spawned, dyld will initialize its TLVs by copying the
779     // address range from the start of the first thread-local data section to
780     // the end of the last one. We therefore arrange these sections contiguously
781     // to minimize the amount of memory used. Additionally, since zerofill
782     // sections must be at the end of their segments, and since TLV data
783     // sections can be zerofills, we end up putting all TLV data sections at the
784     // end of the segment.
785     switch (sectionType(osec->flags)) {
786     case S_THREAD_LOCAL_REGULAR:
787       return std::numeric_limits<int>::max() - 2;
788     case S_THREAD_LOCAL_ZEROFILL:
789       return std::numeric_limits<int>::max() - 1;
790     case S_ZEROFILL:
791       return std::numeric_limits<int>::max();
792     default:
793       return StringSwitch<int>(osec->name)
794           .Case(section_names::lazySymbolPtr, -2)
795           .Case(section_names::data, -1)
796           .Default(0);
797     }
798   } else if (segname == segment_names::linkEdit) {
799     return StringSwitch<int>(osec->name)
800         .Case(section_names::rebase, -9)
801         .Case(section_names::binding, -8)
802         .Case(section_names::weakBinding, -7)
803         .Case(section_names::lazyBinding, -6)
804         .Case(section_names::export_, -5)
805         .Case(section_names::functionStarts, -4)
806         .Case(section_names::symbolTable, -3)
807         .Case(section_names::indirectSymbolTable, -2)
808         .Case(section_names::stringTable, -1)
809         .Case(section_names::codeSignature, std::numeric_limits<int>::max())
810         .Default(0);
811   }
812   // ZeroFill sections must always be the at the end of their segments,
813   // otherwise subsequent sections may get overwritten with zeroes at runtime.
814   if (sectionType(osec->flags) == S_ZEROFILL)
815     return std::numeric_limits<int>::max();
816   return 0;
817 }
818 
819 template <typename T, typename F>
820 static std::function<bool(T, T)> compareByOrder(F ord) {
821   return [=](T a, T b) { return ord(a) < ord(b); };
822 }
823 
824 // Sorting only can happen once all outputs have been collected. Here we sort
825 // segments, output sections within each segment, and input sections within each
826 // output segment.
827 static void sortSegmentsAndSections() {
828   TimeTraceScope timeScope("Sort segments and sections");
829 
830   llvm::stable_sort(outputSegments,
831                     compareByOrder<OutputSegment *>(segmentOrder));
832 
833   DenseMap<const InputSection *, size_t> isecPriorities =
834       buildInputSectionPriorities();
835 
836   uint32_t sectionIndex = 0;
837   for (OutputSegment *seg : outputSegments) {
838     seg->sortOutputSections(compareByOrder<OutputSection *>(sectionOrder));
839     for (OutputSection *osec : seg->getSections()) {
840       // Now that the output sections are sorted, assign the final
841       // output section indices.
842       if (!osec->isHidden())
843         osec->index = ++sectionIndex;
844       if (!firstTLVDataSection && isThreadLocalData(osec->flags))
845         firstTLVDataSection = osec;
846 
847       if (!isecPriorities.empty()) {
848         if (auto *merged = dyn_cast<MergedOutputSection>(osec)) {
849           llvm::stable_sort(merged->inputs,
850                             [&](InputSection *a, InputSection *b) {
851                               return isecPriorities[a] > isecPriorities[b];
852                             });
853         }
854       }
855     }
856   }
857 }
858 
859 static NamePair maybeRenameSection(NamePair key) {
860   auto newNames = config->sectionRenameMap.find(key);
861   if (newNames != config->sectionRenameMap.end())
862     return newNames->second;
863   auto newName = config->segmentRenameMap.find(key.first);
864   if (newName != config->segmentRenameMap.end())
865     return std::make_pair(newName->second, key.second);
866   return key;
867 }
868 
869 template <class LP> void Writer::createOutputSections() {
870   TimeTraceScope timeScope("Create output sections");
871   // First, create hidden sections
872   stringTableSection = make<StringTableSection>();
873   symtabSection = makeSymtabSection<LP>(*stringTableSection);
874   indirectSymtabSection = make<IndirectSymtabSection>();
875   if (config->adhocCodesign)
876     codeSignatureSection = make<CodeSignatureSection>();
877   if (config->emitFunctionStarts)
878     functionStartsSection = make<FunctionStartsSection>();
879   if (config->emitBitcodeBundle)
880     make<BitcodeBundleSection>();
881 
882   switch (config->outputType) {
883   case MH_EXECUTE:
884     make<PageZeroSection>();
885     break;
886   case MH_DYLIB:
887   case MH_BUNDLE:
888     break;
889   default:
890     llvm_unreachable("unhandled output file type");
891   }
892 
893   // Then merge input sections into output sections.
894   MapVector<NamePair, MergedOutputSection *> mergedOutputSections;
895   for (InputSection *isec : inputSections) {
896     NamePair names = maybeRenameSection({isec->segname, isec->name});
897     MergedOutputSection *&osec = mergedOutputSections[names];
898     if (osec == nullptr)
899       osec = make<MergedOutputSection>(names.second);
900     osec->mergeInput(isec);
901   }
902 
903   for (const auto &it : mergedOutputSections) {
904     StringRef segname = it.first.first;
905     MergedOutputSection *osec = it.second;
906     if (segname == segment_names::ld) {
907       assert(osec->name == section_names::compactUnwind);
908       in.unwindInfo->setCompactUnwindSection(osec);
909     } else {
910       getOrCreateOutputSegment(segname)->addOutputSection(osec);
911     }
912   }
913 
914   for (SyntheticSection *ssec : syntheticSections) {
915     auto it = mergedOutputSections.find({ssec->segname, ssec->name});
916     if (it == mergedOutputSections.end()) {
917       if (ssec->isNeeded())
918         getOrCreateOutputSegment(ssec->segname)->addOutputSection(ssec);
919     } else {
920       error("section from " + toString(it->second->firstSection()->file) +
921             " conflicts with synthetic section " + ssec->segname + "," +
922             ssec->name);
923     }
924   }
925 
926   // dyld requires __LINKEDIT segment to always exist (even if empty).
927   linkEditSegment = getOrCreateOutputSegment(segment_names::linkEdit);
928 }
929 
930 void Writer::finalizeAddresses() {
931   TimeTraceScope timeScope("Finalize addresses");
932   uint64_t pageSize = target->getPageSize();
933   // Ensure that segments (and the sections they contain) are allocated
934   // addresses in ascending order, which dyld requires.
935   //
936   // Note that at this point, __LINKEDIT sections are empty, but we need to
937   // determine addresses of other segments/sections before generating its
938   // contents.
939   for (OutputSegment *seg : outputSegments) {
940     if (seg == linkEditSegment)
941       continue;
942     assignAddresses(seg);
943     // codesign / libstuff checks for segment ordering by verifying that
944     // `fileOff + fileSize == next segment fileOff`. So we call alignTo() before
945     // (instead of after) computing fileSize to ensure that the segments are
946     // contiguous. We handle addr / vmSize similarly for the same reason.
947     fileOff = alignTo(fileOff, pageSize);
948     addr = alignTo(addr, pageSize);
949     seg->vmSize = addr - seg->firstSection()->addr;
950     seg->fileSize = fileOff - seg->fileOff;
951   }
952 
953   // FIXME(gkm): create branch-extension thunks here, then adjust addresses
954 }
955 
956 void Writer::finalizeLinkEditSegment() {
957   TimeTraceScope timeScope("Finalize __LINKEDIT segment");
958   // Fill __LINKEDIT contents.
959   std::vector<LinkEditSection *> linkEditSections{
960       in.rebase,  in.binding,    in.weakBinding,        in.lazyBinding,
961       in.exports, symtabSection, indirectSymtabSection, functionStartsSection,
962   };
963   parallelForEach(linkEditSections, [](LinkEditSection *osec) {
964     if (osec)
965       osec->finalizeContents();
966   });
967 
968   // Now that __LINKEDIT is filled out, do a proper calculation of its
969   // addresses and offsets.
970   assignAddresses(linkEditSegment);
971   // No need to page-align fileOff / addr here since this is the last segment.
972   linkEditSegment->vmSize = addr - linkEditSegment->firstSection()->addr;
973   linkEditSegment->fileSize = fileOff - linkEditSegment->fileOff;
974 }
975 
976 void Writer::assignAddresses(OutputSegment *seg) {
977   seg->fileOff = fileOff;
978 
979   for (OutputSection *osec : seg->getSections()) {
980     if (!osec->isNeeded())
981       continue;
982     addr = alignTo(addr, osec->align);
983     fileOff = alignTo(fileOff, osec->align);
984     osec->addr = addr;
985     osec->fileOff = isZeroFill(osec->flags) ? 0 : fileOff;
986     osec->finalize();
987 
988     addr += osec->getSize();
989     fileOff += osec->getFileSize();
990   }
991 }
992 
993 void Writer::openFile() {
994   Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr =
995       FileOutputBuffer::create(config->outputFile, fileOff,
996                                FileOutputBuffer::F_executable);
997 
998   if (!bufferOrErr)
999     error("failed to open " + config->outputFile + ": " +
1000           llvm::toString(bufferOrErr.takeError()));
1001   else
1002     buffer = std::move(*bufferOrErr);
1003 }
1004 
1005 void Writer::writeSections() {
1006   uint8_t *buf = buffer->getBufferStart();
1007   for (const OutputSegment *seg : outputSegments)
1008     for (const OutputSection *osec : seg->getSections())
1009       osec->writeTo(buf + osec->fileOff);
1010 }
1011 
1012 // In order to utilize multiple cores, we first split the buffer into chunks,
1013 // compute a hash for each chunk, and then compute a hash value of the hash
1014 // values.
1015 void Writer::writeUuid() {
1016   TimeTraceScope timeScope("Computing UUID");
1017   ArrayRef<uint8_t> data{buffer->getBufferStart(), buffer->getBufferEnd()};
1018   unsigned chunkCount = parallel::strategy.compute_thread_count() * 10;
1019   // Round-up integer division
1020   size_t chunkSize = (data.size() + chunkCount - 1) / chunkCount;
1021   std::vector<ArrayRef<uint8_t>> chunks = split(data, chunkSize);
1022   std::vector<uint64_t> hashes(chunks.size());
1023   parallelForEachN(0, chunks.size(),
1024                    [&](size_t i) { hashes[i] = xxHash64(chunks[i]); });
1025   uint64_t digest = xxHash64({reinterpret_cast<uint8_t *>(hashes.data()),
1026                               hashes.size() * sizeof(uint64_t)});
1027   uuidCommand->writeUuid(digest);
1028 }
1029 
1030 void Writer::writeCodeSignature() {
1031   if (codeSignatureSection)
1032     codeSignatureSection->writeHashes(buffer->getBufferStart());
1033 }
1034 
1035 void Writer::writeOutputFile() {
1036   TimeTraceScope timeScope("Write output file");
1037   openFile();
1038   if (errorCount())
1039     return;
1040   writeSections();
1041   writeUuid();
1042   writeCodeSignature();
1043 
1044   if (auto e = buffer->commit())
1045     error("failed to write to the output file: " + toString(std::move(e)));
1046 }
1047 
1048 template <class LP> void Writer::run() {
1049   prepareBranchTarget(config->entry);
1050   scanRelocations();
1051   if (in.stubHelper->isNeeded())
1052     in.stubHelper->setup();
1053   scanSymbols();
1054   createOutputSections<LP>();
1055   // No more sections nor segments are created beyond this point.
1056   sortSegmentsAndSections();
1057   createLoadCommands<LP>();
1058   finalizeAddresses();
1059   finalizeLinkEditSegment();
1060   writeMapFile();
1061   writeOutputFile();
1062 }
1063 
1064 template <class LP> void macho::writeResult() { Writer().run<LP>(); }
1065 
1066 template <class LP> void macho::createSyntheticSections() {
1067   in.header = makeMachHeaderSection<LP>();
1068   in.rebase = make<RebaseSection>();
1069   in.binding = make<BindingSection>();
1070   in.weakBinding = make<WeakBindingSection>();
1071   in.lazyBinding = make<LazyBindingSection>();
1072   in.exports = make<ExportSection>();
1073   in.got = make<GotSection>();
1074   in.tlvPointers = make<TlvPointerSection>();
1075   in.lazyPointers = make<LazyPointerSection>();
1076   in.stubs = make<StubsSection>();
1077   in.stubHelper = make<StubHelperSection>();
1078   in.imageLoaderCache = make<ImageLoaderCacheSection>();
1079   in.unwindInfo = makeUnwindInfoSection();
1080 }
1081 
1082 OutputSection *macho::firstTLVDataSection = nullptr;
1083 
1084 template void macho::writeResult<LP64>();
1085 template void macho::writeResult<ILP32>();
1086 template void macho::createSyntheticSections<LP64>();
1087 template void macho::createSyntheticSections<ILP32>();
1088