1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ADT/DenseMap.h"
10 #include "llvm/ADT/Twine.h"
11 #include "llvm/ADT/iterator_range.h"
12 #include "llvm/BinaryFormat/MachO.h"
13 #include "llvm/MC/MCAsmBackend.h"
14 #include "llvm/MC/MCAsmLayout.h"
15 #include "llvm/MC/MCAssembler.h"
16 #include "llvm/MC/MCDirectives.h"
17 #include "llvm/MC/MCExpr.h"
18 #include "llvm/MC/MCFixupKindInfo.h"
19 #include "llvm/MC/MCFragment.h"
20 #include "llvm/MC/MCMachObjectWriter.h"
21 #include "llvm/MC/MCObjectWriter.h"
22 #include "llvm/MC/MCSection.h"
23 #include "llvm/MC/MCSectionMachO.h"
24 #include "llvm/MC/MCSymbol.h"
25 #include "llvm/MC/MCSymbolMachO.h"
26 #include "llvm/MC/MCValue.h"
27 #include "llvm/Support/Casting.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include <algorithm>
33 #include <cassert>
34 #include <cstdint>
35 #include <string>
36 #include <utility>
37 #include <vector>
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "mc"
42 
43 void MachObjectWriter::reset() {
44   Relocations.clear();
45   IndirectSymBase.clear();
46   StringTable.clear();
47   LocalSymbolData.clear();
48   ExternalSymbolData.clear();
49   UndefinedSymbolData.clear();
50   MCObjectWriter::reset();
51 }
52 
53 bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol &S) {
54   // Undefined symbols are always extern.
55   if (S.isUndefined())
56     return true;
57 
58   // References to weak definitions require external relocation entries; the
59   // definition may not always be the one in the same object file.
60   if (cast<MCSymbolMachO>(S).isWeakDefinition())
61     return true;
62 
63   // Otherwise, we can use an internal relocation.
64   return false;
65 }
66 
67 bool MachObjectWriter::
68 MachSymbolData::operator<(const MachSymbolData &RHS) const {
69   return Symbol->getName() < RHS.Symbol->getName();
70 }
71 
72 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
73   const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
74     (MCFixupKind) Kind);
75 
76   return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
77 }
78 
79 uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment,
80                                               const MCAsmLayout &Layout) const {
81   return getSectionAddress(Fragment->getParent()) +
82          Layout.getFragmentOffset(Fragment);
83 }
84 
85 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S,
86                                             const MCAsmLayout &Layout) const {
87   // If this is a variable, then recursively evaluate now.
88   if (S.isVariable()) {
89     if (const MCConstantExpr *C =
90           dyn_cast<const MCConstantExpr>(S.getVariableValue()))
91       return C->getValue();
92 
93     MCValue Target;
94     if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr))
95       report_fatal_error("unable to evaluate offset for variable '" +
96                          S.getName() + "'");
97 
98     // Verify that any used symbols are defined.
99     if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
100       report_fatal_error("unable to evaluate offset to undefined symbol '" +
101                          Target.getSymA()->getSymbol().getName() + "'");
102     if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
103       report_fatal_error("unable to evaluate offset to undefined symbol '" +
104                          Target.getSymB()->getSymbol().getName() + "'");
105 
106     uint64_t Address = Target.getConstant();
107     if (Target.getSymA())
108       Address += getSymbolAddress(Target.getSymA()->getSymbol(), Layout);
109     if (Target.getSymB())
110       Address += getSymbolAddress(Target.getSymB()->getSymbol(), Layout);
111     return Address;
112   }
113 
114   return getSectionAddress(S.getFragment()->getParent()) +
115          Layout.getSymbolOffset(S);
116 }
117 
118 uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec,
119                                           const MCAsmLayout &Layout) const {
120   uint64_t EndAddr = getSectionAddress(Sec) + Layout.getSectionAddressSize(Sec);
121   unsigned Next = Sec->getLayoutOrder() + 1;
122   if (Next >= Layout.getSectionOrder().size())
123     return 0;
124 
125   const MCSection &NextSec = *Layout.getSectionOrder()[Next];
126   if (NextSec.isVirtualSection())
127     return 0;
128   return OffsetToAlignment(EndAddr, NextSec.getAlignment());
129 }
130 
131 void MachObjectWriter::writeHeader(MachO::HeaderFileType Type,
132                                    unsigned NumLoadCommands,
133                                    unsigned LoadCommandsSize,
134                                    bool SubsectionsViaSymbols) {
135   uint32_t Flags = 0;
136 
137   if (SubsectionsViaSymbols)
138     Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
139 
140   // struct mach_header (28 bytes) or
141   // struct mach_header_64 (32 bytes)
142 
143   uint64_t Start = W.OS.tell();
144   (void) Start;
145 
146   W.write<uint32_t>(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC);
147 
148   W.write<uint32_t>(TargetObjectWriter->getCPUType());
149   W.write<uint32_t>(TargetObjectWriter->getCPUSubtype());
150 
151   W.write<uint32_t>(Type);
152   W.write<uint32_t>(NumLoadCommands);
153   W.write<uint32_t>(LoadCommandsSize);
154   W.write<uint32_t>(Flags);
155   if (is64Bit())
156     W.write<uint32_t>(0); // reserved
157 
158   assert(W.OS.tell() - Start == (is64Bit() ? sizeof(MachO::mach_header_64)
159                                            : sizeof(MachO::mach_header)));
160 }
161 
162 void MachObjectWriter::writeWithPadding(StringRef Str, uint64_t Size) {
163   assert(Size >= Str.size());
164   W.OS << Str;
165   W.OS.write_zeros(Size - Str.size());
166 }
167 
168 /// writeSegmentLoadCommand - Write a segment load command.
169 ///
170 /// \param NumSections The number of sections in this segment.
171 /// \param SectionDataSize The total size of the sections.
172 void MachObjectWriter::writeSegmentLoadCommand(
173     StringRef Name, unsigned NumSections, uint64_t VMAddr, uint64_t VMSize,
174     uint64_t SectionDataStartOffset, uint64_t SectionDataSize, uint32_t MaxProt,
175     uint32_t InitProt) {
176   // struct segment_command (56 bytes) or
177   // struct segment_command_64 (72 bytes)
178 
179   uint64_t Start = W.OS.tell();
180   (void) Start;
181 
182   unsigned SegmentLoadCommandSize =
183     is64Bit() ? sizeof(MachO::segment_command_64):
184     sizeof(MachO::segment_command);
185   W.write<uint32_t>(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT);
186   W.write<uint32_t>(SegmentLoadCommandSize +
187           NumSections * (is64Bit() ? sizeof(MachO::section_64) :
188                          sizeof(MachO::section)));
189 
190   writeWithPadding(Name, 16);
191   if (is64Bit()) {
192     W.write<uint64_t>(VMAddr);                 // vmaddr
193     W.write<uint64_t>(VMSize); // vmsize
194     W.write<uint64_t>(SectionDataStartOffset); // file offset
195     W.write<uint64_t>(SectionDataSize); // file size
196   } else {
197     W.write<uint32_t>(VMAddr);                 // vmaddr
198     W.write<uint32_t>(VMSize); // vmsize
199     W.write<uint32_t>(SectionDataStartOffset); // file offset
200     W.write<uint32_t>(SectionDataSize); // file size
201   }
202   // maxprot
203   W.write<uint32_t>(MaxProt);
204   // initprot
205   W.write<uint32_t>(InitProt);
206   W.write<uint32_t>(NumSections);
207   W.write<uint32_t>(0); // flags
208 
209   assert(W.OS.tell() - Start == SegmentLoadCommandSize);
210 }
211 
212 void MachObjectWriter::writeSection(const MCAsmLayout &Layout,
213                                     const MCSection &Sec, uint64_t VMAddr,
214                                     uint64_t FileOffset, unsigned Flags,
215                                     uint64_t RelocationsStart,
216                                     unsigned NumRelocations) {
217   uint64_t SectionSize = Layout.getSectionAddressSize(&Sec);
218   const MCSectionMachO &Section = cast<MCSectionMachO>(Sec);
219 
220   // The offset is unused for virtual sections.
221   if (Section.isVirtualSection()) {
222     assert(Layout.getSectionFileSize(&Sec) == 0 && "Invalid file size!");
223     FileOffset = 0;
224   }
225 
226   // struct section (68 bytes) or
227   // struct section_64 (80 bytes)
228 
229   uint64_t Start = W.OS.tell();
230   (void) Start;
231 
232   writeWithPadding(Section.getSectionName(), 16);
233   writeWithPadding(Section.getSegmentName(), 16);
234   if (is64Bit()) {
235     W.write<uint64_t>(VMAddr);      // address
236     W.write<uint64_t>(SectionSize); // size
237   } else {
238     W.write<uint32_t>(VMAddr);      // address
239     W.write<uint32_t>(SectionSize); // size
240   }
241   W.write<uint32_t>(FileOffset);
242 
243   assert(isPowerOf2_32(Section.getAlignment()) && "Invalid alignment!");
244   W.write<uint32_t>(Log2_32(Section.getAlignment()));
245   W.write<uint32_t>(NumRelocations ? RelocationsStart : 0);
246   W.write<uint32_t>(NumRelocations);
247   W.write<uint32_t>(Flags);
248   W.write<uint32_t>(IndirectSymBase.lookup(&Sec)); // reserved1
249   W.write<uint32_t>(Section.getStubSize()); // reserved2
250   if (is64Bit())
251     W.write<uint32_t>(0); // reserved3
252 
253   assert(W.OS.tell() - Start ==
254          (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section)));
255 }
256 
257 void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset,
258                                               uint32_t NumSymbols,
259                                               uint32_t StringTableOffset,
260                                               uint32_t StringTableSize) {
261   // struct symtab_command (24 bytes)
262 
263   uint64_t Start = W.OS.tell();
264   (void) Start;
265 
266   W.write<uint32_t>(MachO::LC_SYMTAB);
267   W.write<uint32_t>(sizeof(MachO::symtab_command));
268   W.write<uint32_t>(SymbolOffset);
269   W.write<uint32_t>(NumSymbols);
270   W.write<uint32_t>(StringTableOffset);
271   W.write<uint32_t>(StringTableSize);
272 
273   assert(W.OS.tell() - Start == sizeof(MachO::symtab_command));
274 }
275 
276 void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,
277                                                 uint32_t NumLocalSymbols,
278                                                 uint32_t FirstExternalSymbol,
279                                                 uint32_t NumExternalSymbols,
280                                                 uint32_t FirstUndefinedSymbol,
281                                                 uint32_t NumUndefinedSymbols,
282                                                 uint32_t IndirectSymbolOffset,
283                                                 uint32_t NumIndirectSymbols) {
284   // struct dysymtab_command (80 bytes)
285 
286   uint64_t Start = W.OS.tell();
287   (void) Start;
288 
289   W.write<uint32_t>(MachO::LC_DYSYMTAB);
290   W.write<uint32_t>(sizeof(MachO::dysymtab_command));
291   W.write<uint32_t>(FirstLocalSymbol);
292   W.write<uint32_t>(NumLocalSymbols);
293   W.write<uint32_t>(FirstExternalSymbol);
294   W.write<uint32_t>(NumExternalSymbols);
295   W.write<uint32_t>(FirstUndefinedSymbol);
296   W.write<uint32_t>(NumUndefinedSymbols);
297   W.write<uint32_t>(0); // tocoff
298   W.write<uint32_t>(0); // ntoc
299   W.write<uint32_t>(0); // modtaboff
300   W.write<uint32_t>(0); // nmodtab
301   W.write<uint32_t>(0); // extrefsymoff
302   W.write<uint32_t>(0); // nextrefsyms
303   W.write<uint32_t>(IndirectSymbolOffset);
304   W.write<uint32_t>(NumIndirectSymbols);
305   W.write<uint32_t>(0); // extreloff
306   W.write<uint32_t>(0); // nextrel
307   W.write<uint32_t>(0); // locreloff
308   W.write<uint32_t>(0); // nlocrel
309 
310   assert(W.OS.tell() - Start == sizeof(MachO::dysymtab_command));
311 }
312 
313 MachObjectWriter::MachSymbolData *
314 MachObjectWriter::findSymbolData(const MCSymbol &Sym) {
315   for (auto *SymbolData :
316        {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
317     for (MachSymbolData &Entry : *SymbolData)
318       if (Entry.Symbol == &Sym)
319         return &Entry;
320 
321   return nullptr;
322 }
323 
324 const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const {
325   const MCSymbol *S = &Sym;
326   while (S->isVariable()) {
327     const MCExpr *Value = S->getVariableValue();
328     const auto *Ref = dyn_cast<MCSymbolRefExpr>(Value);
329     if (!Ref)
330       return *S;
331     S = &Ref->getSymbol();
332   }
333   return *S;
334 }
335 
336 void MachObjectWriter::writeNlist(MachSymbolData &MSD,
337                                   const MCAsmLayout &Layout) {
338   const MCSymbol *Symbol = MSD.Symbol;
339   const MCSymbol &Data = *Symbol;
340   const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol);
341   uint8_t SectionIndex = MSD.SectionIndex;
342   uint8_t Type = 0;
343   uint64_t Address = 0;
344   bool IsAlias = Symbol != AliasedSymbol;
345 
346   const MCSymbol &OrigSymbol = *Symbol;
347   MachSymbolData *AliaseeInfo;
348   if (IsAlias) {
349     AliaseeInfo = findSymbolData(*AliasedSymbol);
350     if (AliaseeInfo)
351       SectionIndex = AliaseeInfo->SectionIndex;
352     Symbol = AliasedSymbol;
353     // FIXME: Should this update Data as well?
354   }
355 
356   // Set the N_TYPE bits. See <mach-o/nlist.h>.
357   //
358   // FIXME: Are the prebound or indirect fields possible here?
359   if (IsAlias && Symbol->isUndefined())
360     Type = MachO::N_INDR;
361   else if (Symbol->isUndefined())
362     Type = MachO::N_UNDF;
363   else if (Symbol->isAbsolute())
364     Type = MachO::N_ABS;
365   else
366     Type = MachO::N_SECT;
367 
368   // FIXME: Set STAB bits.
369 
370   if (Data.isPrivateExtern())
371     Type |= MachO::N_PEXT;
372 
373   // Set external bit.
374   if (Data.isExternal() || (!IsAlias && Symbol->isUndefined()))
375     Type |= MachO::N_EXT;
376 
377   // Compute the symbol address.
378   if (IsAlias && Symbol->isUndefined())
379     Address = AliaseeInfo->StringIndex;
380   else if (Symbol->isDefined())
381     Address = getSymbolAddress(OrigSymbol, Layout);
382   else if (Symbol->isCommon()) {
383     // Common symbols are encoded with the size in the address
384     // field, and their alignment in the flags.
385     Address = Symbol->getCommonSize();
386   }
387 
388   // struct nlist (12 bytes)
389 
390   W.write<uint32_t>(MSD.StringIndex);
391   W.OS << char(Type);
392   W.OS << char(SectionIndex);
393 
394   // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
395   // value.
396   bool EncodeAsAltEntry =
397     IsAlias && cast<MCSymbolMachO>(OrigSymbol).isAltEntry();
398   W.write<uint16_t>(cast<MCSymbolMachO>(Symbol)->getEncodedFlags(EncodeAsAltEntry));
399   if (is64Bit())
400     W.write<uint64_t>(Address);
401   else
402     W.write<uint32_t>(Address);
403 }
404 
405 void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type,
406                                                 uint32_t DataOffset,
407                                                 uint32_t DataSize) {
408   uint64_t Start = W.OS.tell();
409   (void) Start;
410 
411   W.write<uint32_t>(Type);
412   W.write<uint32_t>(sizeof(MachO::linkedit_data_command));
413   W.write<uint32_t>(DataOffset);
414   W.write<uint32_t>(DataSize);
415 
416   assert(W.OS.tell() - Start == sizeof(MachO::linkedit_data_command));
417 }
418 
419 static unsigned ComputeLinkerOptionsLoadCommandSize(
420   const std::vector<std::string> &Options, bool is64Bit)
421 {
422   unsigned Size = sizeof(MachO::linker_option_command);
423   for (const std::string &Option : Options)
424     Size += Option.size() + 1;
425   return alignTo(Size, is64Bit ? 8 : 4);
426 }
427 
428 void MachObjectWriter::writeLinkerOptionsLoadCommand(
429   const std::vector<std::string> &Options)
430 {
431   unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit());
432   uint64_t Start = W.OS.tell();
433   (void) Start;
434 
435   W.write<uint32_t>(MachO::LC_LINKER_OPTION);
436   W.write<uint32_t>(Size);
437   W.write<uint32_t>(Options.size());
438   uint64_t BytesWritten = sizeof(MachO::linker_option_command);
439   for (const std::string &Option : Options) {
440     // Write each string, including the null byte.
441     W.OS << Option << '\0';
442     BytesWritten += Option.size() + 1;
443   }
444 
445   // Pad to a multiple of the pointer size.
446   W.OS.write_zeros(OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4));
447 
448   assert(W.OS.tell() - Start == Size);
449 }
450 
451 void MachObjectWriter::recordRelocation(MCAssembler &Asm,
452                                         const MCAsmLayout &Layout,
453                                         const MCFragment *Fragment,
454                                         const MCFixup &Fixup, MCValue Target,
455                                         uint64_t &FixedValue) {
456   TargetObjectWriter->recordRelocation(this, Asm, Layout, Fragment, Fixup,
457                                        Target, FixedValue);
458 }
459 
460 void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
461   // This is the point where 'as' creates actual symbols for indirect symbols
462   // (in the following two passes). It would be easier for us to do this sooner
463   // when we see the attribute, but that makes getting the order in the symbol
464   // table much more complicated than it is worth.
465   //
466   // FIXME: Revisit this when the dust settles.
467 
468   // Report errors for use of .indirect_symbol not in a symbol pointer section
469   // or stub section.
470   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
471          ie = Asm.indirect_symbol_end(); it != ie; ++it) {
472     const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
473 
474     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
475         Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
476         Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS &&
477         Section.getType() != MachO::S_SYMBOL_STUBS) {
478       MCSymbol &Symbol = *it->Symbol;
479       report_fatal_error("indirect symbol '" + Symbol.getName() +
480                          "' not in a symbol pointer or stub section");
481     }
482   }
483 
484   // Bind non-lazy symbol pointers first.
485   unsigned IndirectIndex = 0;
486   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
487          ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
488     const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
489 
490     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
491         Section.getType() !=  MachO::S_THREAD_LOCAL_VARIABLE_POINTERS)
492       continue;
493 
494     // Initialize the section indirect symbol base, if necessary.
495     IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
496 
497     Asm.registerSymbol(*it->Symbol);
498   }
499 
500   // Then lazy symbol pointers and symbol stubs.
501   IndirectIndex = 0;
502   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
503          ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
504     const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
505 
506     if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
507         Section.getType() != MachO::S_SYMBOL_STUBS)
508       continue;
509 
510     // Initialize the section indirect symbol base, if necessary.
511     IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
512 
513     // Set the symbol type to undefined lazy, but only on construction.
514     //
515     // FIXME: Do not hardcode.
516     bool Created;
517     Asm.registerSymbol(*it->Symbol, &Created);
518     if (Created)
519       cast<MCSymbolMachO>(it->Symbol)->setReferenceTypeUndefinedLazy(true);
520   }
521 }
522 
523 /// computeSymbolTable - Compute the symbol table data
524 void MachObjectWriter::computeSymbolTable(
525     MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData,
526     std::vector<MachSymbolData> &ExternalSymbolData,
527     std::vector<MachSymbolData> &UndefinedSymbolData) {
528   // Build section lookup table.
529   DenseMap<const MCSection*, uint8_t> SectionIndexMap;
530   unsigned Index = 1;
531   for (MCAssembler::iterator it = Asm.begin(),
532          ie = Asm.end(); it != ie; ++it, ++Index)
533     SectionIndexMap[&*it] = Index;
534   assert(Index <= 256 && "Too many sections!");
535 
536   // Build the string table.
537   for (const MCSymbol &Symbol : Asm.symbols()) {
538     if (!Asm.isSymbolLinkerVisible(Symbol))
539       continue;
540 
541     StringTable.add(Symbol.getName());
542   }
543   StringTable.finalize();
544 
545   // Build the symbol arrays but only for non-local symbols.
546   //
547   // The particular order that we collect and then sort the symbols is chosen to
548   // match 'as'. Even though it doesn't matter for correctness, this is
549   // important for letting us diff .o files.
550   for (const MCSymbol &Symbol : Asm.symbols()) {
551     // Ignore non-linker visible symbols.
552     if (!Asm.isSymbolLinkerVisible(Symbol))
553       continue;
554 
555     if (!Symbol.isExternal() && !Symbol.isUndefined())
556       continue;
557 
558     MachSymbolData MSD;
559     MSD.Symbol = &Symbol;
560     MSD.StringIndex = StringTable.getOffset(Symbol.getName());
561 
562     if (Symbol.isUndefined()) {
563       MSD.SectionIndex = 0;
564       UndefinedSymbolData.push_back(MSD);
565     } else if (Symbol.isAbsolute()) {
566       MSD.SectionIndex = 0;
567       ExternalSymbolData.push_back(MSD);
568     } else {
569       MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
570       assert(MSD.SectionIndex && "Invalid section index!");
571       ExternalSymbolData.push_back(MSD);
572     }
573   }
574 
575   // Now add the data for local symbols.
576   for (const MCSymbol &Symbol : Asm.symbols()) {
577     // Ignore non-linker visible symbols.
578     if (!Asm.isSymbolLinkerVisible(Symbol))
579       continue;
580 
581     if (Symbol.isExternal() || Symbol.isUndefined())
582       continue;
583 
584     MachSymbolData MSD;
585     MSD.Symbol = &Symbol;
586     MSD.StringIndex = StringTable.getOffset(Symbol.getName());
587 
588     if (Symbol.isAbsolute()) {
589       MSD.SectionIndex = 0;
590       LocalSymbolData.push_back(MSD);
591     } else {
592       MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
593       assert(MSD.SectionIndex && "Invalid section index!");
594       LocalSymbolData.push_back(MSD);
595     }
596   }
597 
598   // External and undefined symbols are required to be in lexicographic order.
599   llvm::sort(ExternalSymbolData);
600   llvm::sort(UndefinedSymbolData);
601 
602   // Set the symbol indices.
603   Index = 0;
604   for (auto *SymbolData :
605        {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
606     for (MachSymbolData &Entry : *SymbolData)
607       Entry.Symbol->setIndex(Index++);
608 
609   for (const MCSection &Section : Asm) {
610     for (RelAndSymbol &Rel : Relocations[&Section]) {
611       if (!Rel.Sym)
612         continue;
613 
614       // Set the Index and the IsExtern bit.
615       unsigned Index = Rel.Sym->getIndex();
616       assert(isInt<24>(Index));
617       if (W.Endian == support::little)
618         Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27);
619       else
620         Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4);
621     }
622   }
623 }
624 
625 void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
626                                                const MCAsmLayout &Layout) {
627   uint64_t StartAddress = 0;
628   for (const MCSection *Sec : Layout.getSectionOrder()) {
629     StartAddress = alignTo(StartAddress, Sec->getAlignment());
630     SectionAddress[Sec] = StartAddress;
631     StartAddress += Layout.getSectionAddressSize(Sec);
632 
633     // Explicitly pad the section to match the alignment requirements of the
634     // following one. This is for 'gas' compatibility, it shouldn't
635     /// strictly be necessary.
636     StartAddress += getPaddingSize(Sec, Layout);
637   }
638 }
639 
640 void MachObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
641                                                 const MCAsmLayout &Layout) {
642   computeSectionAddresses(Asm, Layout);
643 
644   // Create symbol data for any indirect symbols.
645   bindIndirectSymbols(Asm);
646 }
647 
648 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
649     const MCAssembler &Asm, const MCSymbol &A, const MCSymbol &B,
650     bool InSet) const {
651   // FIXME: We don't handle things like
652   // foo = .
653   // creating atoms.
654   if (A.isVariable() || B.isVariable())
655     return false;
656   return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, A, B,
657                                                                 InSet);
658 }
659 
660 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
661     const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
662     bool InSet, bool IsPCRel) const {
663   if (InSet)
664     return true;
665 
666   // The effective address is
667   //     addr(atom(A)) + offset(A)
668   //   - addr(atom(B)) - offset(B)
669   // and the offsets are not relocatable, so the fixup is fully resolved when
670   //  addr(atom(A)) - addr(atom(B)) == 0.
671   const MCSymbol &SA = findAliasedSymbol(SymA);
672   const MCSection &SecA = SA.getSection();
673   const MCSection &SecB = *FB.getParent();
674 
675   if (IsPCRel) {
676     // The simple (Darwin, except on x86_64) way of dealing with this was to
677     // assume that any reference to a temporary symbol *must* be a temporary
678     // symbol in the same atom, unless the sections differ. Therefore, any PCrel
679     // relocation to a temporary symbol (in the same section) is fully
680     // resolved. This also works in conjunction with absolutized .set, which
681     // requires the compiler to use .set to absolutize the differences between
682     // symbols which the compiler knows to be assembly time constants, so we
683     // don't need to worry about considering symbol differences fully resolved.
684     //
685     // If the file isn't using sub-sections-via-symbols, we can make the
686     // same assumptions about any symbol that we normally make about
687     // assembler locals.
688 
689     bool hasReliableSymbolDifference = isX86_64();
690     if (!hasReliableSymbolDifference) {
691       if (!SA.isInSection() || &SecA != &SecB ||
692           (!SA.isTemporary() && FB.getAtom() != SA.getFragment()->getAtom() &&
693            Asm.getSubsectionsViaSymbols()))
694         return false;
695       return true;
696     }
697     // For Darwin x86_64, there is one special case when the reference IsPCRel.
698     // If the fragment with the reference does not have a base symbol but meets
699     // the simple way of dealing with this, in that it is a temporary symbol in
700     // the same atom then it is assumed to be fully resolved.  This is needed so
701     // a relocation entry is not created and so the static linker does not
702     // mess up the reference later.
703     else if(!FB.getAtom() &&
704             SA.isTemporary() && SA.isInSection() && &SecA == &SecB){
705       return true;
706     }
707   }
708 
709   // If they are not in the same section, we can't compute the diff.
710   if (&SecA != &SecB)
711     return false;
712 
713   const MCFragment *FA = SA.getFragment();
714 
715   // Bail if the symbol has no fragment.
716   if (!FA)
717     return false;
718 
719   // If the atoms are the same, they are guaranteed to have the same address.
720   if (FA->getAtom() == FB.getAtom())
721     return true;
722 
723   // Otherwise, we can't prove this is fully resolved.
724   return false;
725 }
726 
727 static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) {
728   switch (Type) {
729   case MCVM_OSXVersionMin:     return MachO::LC_VERSION_MIN_MACOSX;
730   case MCVM_IOSVersionMin:     return MachO::LC_VERSION_MIN_IPHONEOS;
731   case MCVM_TvOSVersionMin:    return MachO::LC_VERSION_MIN_TVOS;
732   case MCVM_WatchOSVersionMin: return MachO::LC_VERSION_MIN_WATCHOS;
733   }
734   llvm_unreachable("Invalid mc version min type");
735 }
736 
737 uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
738                                        const MCAsmLayout &Layout) {
739   uint64_t StartOffset = W.OS.tell();
740 
741   // Compute symbol table information and bind symbol indices.
742   computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
743                      UndefinedSymbolData);
744 
745   unsigned NumSections = Asm.size();
746   const MCAssembler::VersionInfoType &VersionInfo =
747     Layout.getAssembler().getVersionInfo();
748 
749   // The section data starts after the header, the segment load command (and
750   // section headers) and the symbol table.
751   unsigned NumLoadCommands = 1;
752   uint64_t LoadCommandsSize = is64Bit() ?
753     sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64):
754     sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section);
755 
756   // Add the deployment target version info load command size, if used.
757   if (VersionInfo.Major != 0) {
758     ++NumLoadCommands;
759     if (VersionInfo.EmitBuildVersion)
760       LoadCommandsSize += sizeof(MachO::build_version_command);
761     else
762       LoadCommandsSize += sizeof(MachO::version_min_command);
763   }
764 
765   // Add the data-in-code load command size, if used.
766   unsigned NumDataRegions = Asm.getDataRegions().size();
767   if (NumDataRegions) {
768     ++NumLoadCommands;
769     LoadCommandsSize += sizeof(MachO::linkedit_data_command);
770   }
771 
772   // Add the loh load command size, if used.
773   uint64_t LOHRawSize = Asm.getLOHContainer().getEmitSize(*this, Layout);
774   uint64_t LOHSize = alignTo(LOHRawSize, is64Bit() ? 8 : 4);
775   if (LOHSize) {
776     ++NumLoadCommands;
777     LoadCommandsSize += sizeof(MachO::linkedit_data_command);
778   }
779 
780   // Add the symbol table load command sizes, if used.
781   unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
782     UndefinedSymbolData.size();
783   if (NumSymbols) {
784     NumLoadCommands += 2;
785     LoadCommandsSize += (sizeof(MachO::symtab_command) +
786                          sizeof(MachO::dysymtab_command));
787   }
788 
789   // Add the linker option load commands sizes.
790   for (const auto &Option : Asm.getLinkerOptions()) {
791     ++NumLoadCommands;
792     LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit());
793   }
794 
795   // Compute the total size of the section data, as well as its file size and vm
796   // size.
797   uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) :
798                                sizeof(MachO::mach_header)) + LoadCommandsSize;
799   uint64_t SectionDataSize = 0;
800   uint64_t SectionDataFileSize = 0;
801   uint64_t VMSize = 0;
802   for (const MCSection &Sec : Asm) {
803     uint64_t Address = getSectionAddress(&Sec);
804     uint64_t Size = Layout.getSectionAddressSize(&Sec);
805     uint64_t FileSize = Layout.getSectionFileSize(&Sec);
806     FileSize += getPaddingSize(&Sec, Layout);
807 
808     VMSize = std::max(VMSize, Address + Size);
809 
810     if (Sec.isVirtualSection())
811       continue;
812 
813     SectionDataSize = std::max(SectionDataSize, Address + Size);
814     SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
815   }
816 
817   // The section data is padded to 4 bytes.
818   //
819   // FIXME: Is this machine dependent?
820   unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
821   SectionDataFileSize += SectionDataPadding;
822 
823   // Write the prolog, starting with the header and load command...
824   writeHeader(MachO::MH_OBJECT, NumLoadCommands, LoadCommandsSize,
825               Asm.getSubsectionsViaSymbols());
826   uint32_t Prot =
827       MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
828   writeSegmentLoadCommand("", NumSections, 0, VMSize, SectionDataStart,
829                           SectionDataSize, Prot, Prot);
830 
831   // ... and then the section headers.
832   uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
833   for (const MCSection &Section : Asm) {
834     const auto &Sec = cast<MCSectionMachO>(Section);
835     std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
836     unsigned NumRelocs = Relocs.size();
837     uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec);
838     unsigned Flags = Sec.getTypeAndAttributes();
839     if (Sec.hasInstructions())
840       Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS;
841     writeSection(Layout, Sec, getSectionAddress(&Sec), SectionStart, Flags,
842                  RelocTableEnd, NumRelocs);
843     RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info);
844   }
845 
846   // Write out the deployment target information, if it's available.
847   if (VersionInfo.Major != 0) {
848     auto EncodeVersion = [](VersionTuple V) -> uint32_t {
849       assert(!V.empty() && "empty version");
850       unsigned Update = V.getSubminor() ? *V.getSubminor() : 0;
851       unsigned Minor = V.getMinor() ? *V.getMinor() : 0;
852       assert(Update < 256 && "unencodable update target version");
853       assert(Minor < 256 && "unencodable minor target version");
854       assert(V.getMajor() < 65536 && "unencodable major target version");
855       return Update | (Minor << 8) | (V.getMajor() << 16);
856     };
857     uint32_t EncodedVersion = EncodeVersion(
858         VersionTuple(VersionInfo.Major, VersionInfo.Minor, VersionInfo.Update));
859     uint32_t SDKVersion = !VersionInfo.SDKVersion.empty()
860                               ? EncodeVersion(VersionInfo.SDKVersion)
861                               : 0;
862     if (VersionInfo.EmitBuildVersion) {
863       // FIXME: Currently empty tools. Add clang version in the future.
864       W.write<uint32_t>(MachO::LC_BUILD_VERSION);
865       W.write<uint32_t>(sizeof(MachO::build_version_command));
866       W.write<uint32_t>(VersionInfo.TypeOrPlatform.Platform);
867       W.write<uint32_t>(EncodedVersion);
868       W.write<uint32_t>(SDKVersion);
869       W.write<uint32_t>(0);         // Empty tools list.
870     } else {
871       MachO::LoadCommandType LCType
872         = getLCFromMCVM(VersionInfo.TypeOrPlatform.Type);
873       W.write<uint32_t>(LCType);
874       W.write<uint32_t>(sizeof(MachO::version_min_command));
875       W.write<uint32_t>(EncodedVersion);
876       W.write<uint32_t>(SDKVersion);
877     }
878   }
879 
880   // Write the data-in-code load command, if used.
881   uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8;
882   if (NumDataRegions) {
883     uint64_t DataRegionsOffset = RelocTableEnd;
884     uint64_t DataRegionsSize = NumDataRegions * 8;
885     writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset,
886                              DataRegionsSize);
887   }
888 
889   // Write the loh load command, if used.
890   uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize;
891   if (LOHSize)
892     writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT,
893                              DataInCodeTableEnd, LOHSize);
894 
895   // Write the symbol table load command, if used.
896   if (NumSymbols) {
897     unsigned FirstLocalSymbol = 0;
898     unsigned NumLocalSymbols = LocalSymbolData.size();
899     unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
900     unsigned NumExternalSymbols = ExternalSymbolData.size();
901     unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
902     unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
903     unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
904     unsigned NumSymTabSymbols =
905       NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
906     uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
907     uint64_t IndirectSymbolOffset = 0;
908 
909     // If used, the indirect symbols are written after the section data.
910     if (NumIndirectSymbols)
911       IndirectSymbolOffset = LOHTableEnd;
912 
913     // The symbol table is written after the indirect symbol data.
914     uint64_t SymbolTableOffset = LOHTableEnd + IndirectSymbolSize;
915 
916     // The string table is written after symbol table.
917     uint64_t StringTableOffset =
918       SymbolTableOffset + NumSymTabSymbols * (is64Bit() ?
919                                               sizeof(MachO::nlist_64) :
920                                               sizeof(MachO::nlist));
921     writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
922                            StringTableOffset, StringTable.getSize());
923 
924     writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
925                              FirstExternalSymbol, NumExternalSymbols,
926                              FirstUndefinedSymbol, NumUndefinedSymbols,
927                              IndirectSymbolOffset, NumIndirectSymbols);
928   }
929 
930   // Write the linker options load commands.
931   for (const auto &Option : Asm.getLinkerOptions())
932     writeLinkerOptionsLoadCommand(Option);
933 
934   // Write the actual section data.
935   for (const MCSection &Sec : Asm) {
936     Asm.writeSectionData(W.OS, &Sec, Layout);
937 
938     uint64_t Pad = getPaddingSize(&Sec, Layout);
939     W.OS.write_zeros(Pad);
940   }
941 
942   // Write the extra padding.
943   W.OS.write_zeros(SectionDataPadding);
944 
945   // Write the relocation entries.
946   for (const MCSection &Sec : Asm) {
947     // Write the section relocation entries, in reverse order to match 'as'
948     // (approximately, the exact algorithm is more complicated than this).
949     std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
950     for (const RelAndSymbol &Rel : make_range(Relocs.rbegin(), Relocs.rend())) {
951       W.write<uint32_t>(Rel.MRE.r_word0);
952       W.write<uint32_t>(Rel.MRE.r_word1);
953     }
954   }
955 
956   // Write out the data-in-code region payload, if there is one.
957   for (MCAssembler::const_data_region_iterator
958          it = Asm.data_region_begin(), ie = Asm.data_region_end();
959          it != ie; ++it) {
960     const DataRegionData *Data = &(*it);
961     uint64_t Start = getSymbolAddress(*Data->Start, Layout);
962     uint64_t End;
963     if (Data->End)
964       End = getSymbolAddress(*Data->End, Layout);
965     else
966       report_fatal_error("Data region not terminated");
967 
968     LLVM_DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind
969                       << "  start: " << Start << "(" << Data->Start->getName()
970                       << ")"
971                       << "  end: " << End << "(" << Data->End->getName() << ")"
972                       << "  size: " << End - Start << "\n");
973     W.write<uint32_t>(Start);
974     W.write<uint16_t>(End - Start);
975     W.write<uint16_t>(Data->Kind);
976   }
977 
978   // Write out the loh commands, if there is one.
979   if (LOHSize) {
980 #ifndef NDEBUG
981     unsigned Start = W.OS.tell();
982 #endif
983     Asm.getLOHContainer().emit(*this, Layout);
984     // Pad to a multiple of the pointer size.
985     W.OS.write_zeros(OffsetToAlignment(LOHRawSize, is64Bit() ? 8 : 4));
986     assert(W.OS.tell() - Start == LOHSize);
987   }
988 
989   // Write the symbol table data, if used.
990   if (NumSymbols) {
991     // Write the indirect symbol entries.
992     for (MCAssembler::const_indirect_symbol_iterator
993            it = Asm.indirect_symbol_begin(),
994            ie = Asm.indirect_symbol_end(); it != ie; ++it) {
995       // Indirect symbols in the non-lazy symbol pointer section have some
996       // special handling.
997       const MCSectionMachO &Section =
998           static_cast<const MCSectionMachO &>(*it->Section);
999       if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) {
1000         // If this symbol is defined and internal, mark it as such.
1001         if (it->Symbol->isDefined() && !it->Symbol->isExternal()) {
1002           uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL;
1003           if (it->Symbol->isAbsolute())
1004             Flags |= MachO::INDIRECT_SYMBOL_ABS;
1005           W.write<uint32_t>(Flags);
1006           continue;
1007         }
1008       }
1009 
1010       W.write<uint32_t>(it->Symbol->getIndex());
1011     }
1012 
1013     // FIXME: Check that offsets match computed ones.
1014 
1015     // Write the symbol table entries.
1016     for (auto *SymbolData :
1017          {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
1018       for (MachSymbolData &Entry : *SymbolData)
1019         writeNlist(Entry, Layout);
1020 
1021     // Write the string table.
1022     StringTable.write(W.OS);
1023   }
1024 
1025   return W.OS.tell() - StartOffset;
1026 }
1027 
1028 std::unique_ptr<MCObjectWriter>
1029 llvm::createMachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,
1030                              raw_pwrite_stream &OS, bool IsLittleEndian) {
1031   return llvm::make_unique<MachObjectWriter>(std::move(MOTW), OS,
1032                                              IsLittleEndian);
1033 }
1034