xref: /llvm-project-15.0.7/lld/ELF/Writer.cpp (revision 2fa83cb7)
1 //===- Writer.cpp ---------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Writer.h"
10 #include "AArch64ErrataFix.h"
11 #include "CallGraphSort.h"
12 #include "Config.h"
13 #include "LinkerScript.h"
14 #include "MapFile.h"
15 #include "OutputSections.h"
16 #include "Relocations.h"
17 #include "SymbolTable.h"
18 #include "Symbols.h"
19 #include "SyntheticSections.h"
20 #include "Target.h"
21 #include "lld/Common/Filesystem.h"
22 #include "lld/Common/Memory.h"
23 #include "lld/Common/Strings.h"
24 #include "lld/Common/Threads.h"
25 #include "llvm/ADT/StringMap.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/Support/RandomNumberGenerator.h"
28 #include "llvm/Support/SHA1.h"
29 #include "llvm/Support/xxhash.h"
30 #include <climits>
31 
32 using namespace llvm;
33 using namespace llvm::ELF;
34 using namespace llvm::object;
35 using namespace llvm::support;
36 using namespace llvm::support::endian;
37 
38 using namespace lld;
39 using namespace lld::elf;
40 
41 namespace {
42 // The writer writes a SymbolTable result to a file.
43 template <class ELFT> class Writer {
44 public:
45   Writer() : Buffer(errorHandler().OutputBuffer) {}
46   using Elf_Shdr = typename ELFT::Shdr;
47   using Elf_Ehdr = typename ELFT::Ehdr;
48   using Elf_Phdr = typename ELFT::Phdr;
49 
50   void run();
51 
52 private:
53   void copyLocalSymbols();
54   void addSectionSymbols();
55   void forEachRelSec(llvm::function_ref<void(InputSectionBase &)> Fn);
56   void sortSections();
57   void resolveShfLinkOrder();
58   void finalizeAddressDependentContent();
59   void sortInputSections();
60   void finalizeSections();
61   void checkExecuteOnly();
62   void setReservedSymbolSections();
63 
64   std::vector<PhdrEntry *> createPhdrs(Partition &Part);
65   void removeEmptyPTLoad(std::vector<PhdrEntry *> &PhdrEntry);
66   void addPhdrForSection(Partition &Part, unsigned ShType, unsigned PType,
67                          unsigned PFlags);
68   void assignFileOffsets();
69   void assignFileOffsetsBinary();
70   void setPhdrs(Partition &Part);
71   void checkSections();
72   void fixSectionAlignments();
73   void openFile();
74   void writeTrapInstr();
75   void writeHeader();
76   void writeSections();
77   void writeSectionsBinary();
78   void writeBuildId();
79 
80   std::unique_ptr<FileOutputBuffer> &Buffer;
81 
82   void addRelIpltSymbols();
83   void addStartEndSymbols();
84   void addStartStopSymbols(OutputSection *Sec);
85 
86   uint64_t FileSize;
87   uint64_t SectionHeaderOff;
88 };
89 } // anonymous namespace
90 
91 static bool isSectionPrefix(StringRef Prefix, StringRef Name) {
92   return Name.startswith(Prefix) || Name == Prefix.drop_back();
93 }
94 
95 StringRef elf::getOutputSectionName(const InputSectionBase *S) {
96   if (Config->Relocatable)
97     return S->Name;
98 
99   // This is for --emit-relocs. If .text.foo is emitted as .text.bar, we want
100   // to emit .rela.text.foo as .rela.text.bar for consistency (this is not
101   // technically required, but not doing it is odd). This code guarantees that.
102   if (auto *IS = dyn_cast<InputSection>(S)) {
103     if (InputSectionBase *Rel = IS->getRelocatedSection()) {
104       OutputSection *Out = Rel->getOutputSection();
105       if (S->Type == SHT_RELA)
106         return Saver.save(".rela" + Out->Name);
107       return Saver.save(".rel" + Out->Name);
108     }
109   }
110 
111   // This check is for -z keep-text-section-prefix.  This option separates text
112   // sections with prefix ".text.hot", ".text.unlikely", ".text.startup" or
113   // ".text.exit".
114   // When enabled, this allows identifying the hot code region (.text.hot) in
115   // the final binary which can be selectively mapped to huge pages or mlocked,
116   // for instance.
117   if (Config->ZKeepTextSectionPrefix)
118     for (StringRef V :
119          {".text.hot.", ".text.unlikely.", ".text.startup.", ".text.exit."})
120       if (isSectionPrefix(V, S->Name))
121         return V.drop_back();
122 
123   for (StringRef V :
124        {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.rel.ro.",
125         ".bss.", ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.",
126         ".gcc_except_table.", ".tdata.", ".ARM.exidx.", ".ARM.extab."})
127     if (isSectionPrefix(V, S->Name))
128       return V.drop_back();
129 
130   // CommonSection is identified as "COMMON" in linker scripts.
131   // By default, it should go to .bss section.
132   if (S->Name == "COMMON")
133     return ".bss";
134 
135   return S->Name;
136 }
137 
138 static bool needsInterpSection() {
139   return !SharedFiles.empty() && !Config->DynamicLinker.empty() &&
140          Script->needsInterpSection();
141 }
142 
143 template <class ELFT> void elf::writeResult() { Writer<ELFT>().run(); }
144 
145 template <class ELFT>
146 void Writer<ELFT>::removeEmptyPTLoad(std::vector<PhdrEntry *> &Phdrs) {
147   llvm::erase_if(Phdrs, [&](const PhdrEntry *P) {
148     if (P->p_type != PT_LOAD)
149       return false;
150     if (!P->FirstSec)
151       return true;
152     uint64_t Size = P->LastSec->Addr + P->LastSec->Size - P->FirstSec->Addr;
153     return Size == 0;
154   });
155 }
156 
157 template <class ELFT> static void copySectionsIntoPartitions() {
158   std::vector<InputSectionBase *> NewSections;
159   for (unsigned Part = 2; Part != Partitions.size() + 1; ++Part) {
160     for (InputSectionBase *S : InputSections) {
161       if (!(S->Flags & SHF_ALLOC) || !S->isLive())
162         continue;
163       InputSectionBase *Copy;
164       if (S->Type == SHT_NOTE)
165         Copy = make<InputSection>(cast<InputSection>(*S));
166       else if (auto *ES = dyn_cast<EhInputSection>(S))
167         Copy = make<EhInputSection>(*ES);
168       else
169         continue;
170       Copy->Partition = Part;
171       NewSections.push_back(Copy);
172     }
173   }
174 
175   InputSections.insert(InputSections.end(), NewSections.begin(),
176                        NewSections.end());
177 }
178 
179 template <class ELFT> static void combineEhSections() {
180   for (InputSectionBase *&S : InputSections) {
181     // Ignore dead sections and the partition end marker (.part.end),
182     // whose partition number is out of bounds.
183     if (!S->isLive() || S->Partition == 255)
184       continue;
185 
186     Partition &Part = S->getPartition();
187     if (auto *ES = dyn_cast<EhInputSection>(S)) {
188       Part.EhFrame->addSection<ELFT>(ES);
189       S = nullptr;
190     } else if (S->kind() == SectionBase::Regular && Part.ARMExidx &&
191                Part.ARMExidx->addSection(cast<InputSection>(S))) {
192       S = nullptr;
193     }
194   }
195 
196   std::vector<InputSectionBase *> &V = InputSections;
197   V.erase(std::remove(V.begin(), V.end(), nullptr), V.end());
198 }
199 
200 static Defined *addOptionalRegular(StringRef Name, SectionBase *Sec,
201                                    uint64_t Val, uint8_t StOther = STV_HIDDEN,
202                                    uint8_t Binding = STB_GLOBAL) {
203   Symbol *S = Symtab->find(Name);
204   if (!S || S->isDefined())
205     return nullptr;
206 
207   S->resolve(Defined{/*File=*/nullptr, Name, Binding, StOther, STT_NOTYPE, Val,
208                      /*Size=*/0, Sec});
209   return cast<Defined>(S);
210 }
211 
212 static Defined *addAbsolute(StringRef Name) {
213   Symbol *Sym = Symtab->addSymbol(Defined{nullptr, Name, STB_GLOBAL, STV_HIDDEN,
214                                           STT_NOTYPE, 0, 0, nullptr});
215   return cast<Defined>(Sym);
216 }
217 
218 // The linker is expected to define some symbols depending on
219 // the linking result. This function defines such symbols.
220 void elf::addReservedSymbols() {
221   if (Config->EMachine == EM_MIPS) {
222     // Define _gp for MIPS. st_value of _gp symbol will be updated by Writer
223     // so that it points to an absolute address which by default is relative
224     // to GOT. Default offset is 0x7ff0.
225     // See "Global Data Symbols" in Chapter 6 in the following document:
226     // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
227     ElfSym::MipsGp = addAbsolute("_gp");
228 
229     // On MIPS O32 ABI, _gp_disp is a magic symbol designates offset between
230     // start of function and 'gp' pointer into GOT.
231     if (Symtab->find("_gp_disp"))
232       ElfSym::MipsGpDisp = addAbsolute("_gp_disp");
233 
234     // The __gnu_local_gp is a magic symbol equal to the current value of 'gp'
235     // pointer. This symbol is used in the code generated by .cpload pseudo-op
236     // in case of using -mno-shared option.
237     // https://sourceware.org/ml/binutils/2004-12/msg00094.html
238     if (Symtab->find("__gnu_local_gp"))
239       ElfSym::MipsLocalGp = addAbsolute("__gnu_local_gp");
240   } else if (Config->EMachine == EM_PPC) {
241     // glibc *crt1.o has a undefined reference to _SDA_BASE_. Since we don't
242     // support Small Data Area, define it arbitrarily as 0.
243     addOptionalRegular("_SDA_BASE_", nullptr, 0, STV_HIDDEN);
244   }
245 
246   // The Power Architecture 64-bit v2 ABI defines a TableOfContents (TOC) which
247   // combines the typical ELF GOT with the small data sections. It commonly
248   // includes .got .toc .sdata .sbss. The .TOC. symbol replaces both
249   // _GLOBAL_OFFSET_TABLE_ and _SDA_BASE_ from the 32-bit ABI. It is used to
250   // represent the TOC base which is offset by 0x8000 bytes from the start of
251   // the .got section.
252   // We do not allow _GLOBAL_OFFSET_TABLE_ to be defined by input objects as the
253   // correctness of some relocations depends on its value.
254   StringRef GotSymName =
255       (Config->EMachine == EM_PPC64) ? ".TOC." : "_GLOBAL_OFFSET_TABLE_";
256 
257   if (Symbol *S = Symtab->find(GotSymName)) {
258     if (S->isDefined()) {
259       error(toString(S->File) + " cannot redefine linker defined symbol '" +
260             GotSymName + "'");
261       return;
262     }
263 
264     uint64_t GotOff = 0;
265     if (Config->EMachine == EM_PPC64)
266       GotOff = 0x8000;
267 
268     S->resolve(Defined{/*File=*/nullptr, GotSymName, STB_GLOBAL, STV_HIDDEN,
269                        STT_NOTYPE, GotOff, /*Size=*/0, Out::ElfHeader});
270     ElfSym::GlobalOffsetTable = cast<Defined>(S);
271   }
272 
273   // __ehdr_start is the location of ELF file headers. Note that we define
274   // this symbol unconditionally even when using a linker script, which
275   // differs from the behavior implemented by GNU linker which only define
276   // this symbol if ELF headers are in the memory mapped segment.
277   addOptionalRegular("__ehdr_start", Out::ElfHeader, 0, STV_HIDDEN);
278 
279   // __executable_start is not documented, but the expectation of at
280   // least the Android libc is that it points to the ELF header.
281   addOptionalRegular("__executable_start", Out::ElfHeader, 0, STV_HIDDEN);
282 
283   // __dso_handle symbol is passed to cxa_finalize as a marker to identify
284   // each DSO. The address of the symbol doesn't matter as long as they are
285   // different in different DSOs, so we chose the start address of the DSO.
286   addOptionalRegular("__dso_handle", Out::ElfHeader, 0, STV_HIDDEN);
287 
288   // If linker script do layout we do not need to create any standart symbols.
289   if (Script->HasSectionsCommand)
290     return;
291 
292   auto Add = [](StringRef S, int64_t Pos) {
293     return addOptionalRegular(S, Out::ElfHeader, Pos, STV_DEFAULT);
294   };
295 
296   ElfSym::Bss = Add("__bss_start", 0);
297   ElfSym::End1 = Add("end", -1);
298   ElfSym::End2 = Add("_end", -1);
299   ElfSym::Etext1 = Add("etext", -1);
300   ElfSym::Etext2 = Add("_etext", -1);
301   ElfSym::Edata1 = Add("edata", -1);
302   ElfSym::Edata2 = Add("_edata", -1);
303 }
304 
305 static OutputSection *findSection(StringRef Name, unsigned Partition = 1) {
306   for (BaseCommand *Base : Script->SectionCommands)
307     if (auto *Sec = dyn_cast<OutputSection>(Base))
308       if (Sec->Name == Name && Sec->Partition == Partition)
309         return Sec;
310   return nullptr;
311 }
312 
313 // Initialize Out members.
314 template <class ELFT> static void createSyntheticSections() {
315   // Initialize all pointers with NULL. This is needed because
316   // you can call lld::elf::main more than once as a library.
317   memset(&Out::First, 0, sizeof(Out));
318 
319   auto Add = [](InputSectionBase *Sec) { InputSections.push_back(Sec); };
320 
321   In.ShStrTab = make<StringTableSection>(".shstrtab", false);
322 
323   Out::ProgramHeaders = make<OutputSection>("", 0, SHF_ALLOC);
324   Out::ProgramHeaders->Alignment = Config->Wordsize;
325 
326   if (Config->Strip != StripPolicy::All) {
327     In.StrTab = make<StringTableSection>(".strtab", false);
328     In.SymTab = make<SymbolTableSection<ELFT>>(*In.StrTab);
329     In.SymTabShndx = make<SymtabShndxSection>();
330   }
331 
332   In.Bss = make<BssSection>(".bss", 0, 1);
333   Add(In.Bss);
334 
335   // If there is a SECTIONS command and a .data.rel.ro section name use name
336   // .data.rel.ro.bss so that we match in the .data.rel.ro output section.
337   // This makes sure our relro is contiguous.
338   bool HasDataRelRo =
339       Script->HasSectionsCommand && findSection(".data.rel.ro", 0);
340   In.BssRelRo =
341       make<BssSection>(HasDataRelRo ? ".data.rel.ro.bss" : ".bss.rel.ro", 0, 1);
342   Add(In.BssRelRo);
343 
344   // Add MIPS-specific sections.
345   if (Config->EMachine == EM_MIPS) {
346     if (!Config->Shared && Config->HasDynSymTab) {
347       In.MipsRldMap = make<MipsRldMapSection>();
348       Add(In.MipsRldMap);
349     }
350     if (auto *Sec = MipsAbiFlagsSection<ELFT>::create())
351       Add(Sec);
352     if (auto *Sec = MipsOptionsSection<ELFT>::create())
353       Add(Sec);
354     if (auto *Sec = MipsReginfoSection<ELFT>::create())
355       Add(Sec);
356   }
357 
358   for (Partition &Part : Partitions) {
359     auto Add = [&](InputSectionBase *Sec) {
360       Sec->Partition = Part.getNumber();
361       InputSections.push_back(Sec);
362     };
363 
364     if (!Part.Name.empty()) {
365       Part.ElfHeader = make<PartitionElfHeaderSection<ELFT>>();
366       Part.ElfHeader->Name = Part.Name;
367       Add(Part.ElfHeader);
368 
369       Part.ProgramHeaders = make<PartitionProgramHeadersSection<ELFT>>();
370       Add(Part.ProgramHeaders);
371     }
372 
373     if (Config->BuildId != BuildIdKind::None) {
374       Part.BuildId = make<BuildIdSection>();
375       Add(Part.BuildId);
376     }
377 
378     Part.DynStrTab = make<StringTableSection>(".dynstr", true);
379     Part.DynSymTab = make<SymbolTableSection<ELFT>>(*Part.DynStrTab);
380     Part.Dynamic = make<DynamicSection<ELFT>>();
381     if (Config->AndroidPackDynRelocs) {
382       Part.RelaDyn = make<AndroidPackedRelocationSection<ELFT>>(
383           Config->IsRela ? ".rela.dyn" : ".rel.dyn");
384     } else {
385       Part.RelaDyn = make<RelocationSection<ELFT>>(
386           Config->IsRela ? ".rela.dyn" : ".rel.dyn", Config->ZCombreloc);
387     }
388 
389     if (needsInterpSection())
390       Add(createInterpSection());
391 
392     if (Config->HasDynSymTab) {
393       Part.DynSymTab = make<SymbolTableSection<ELFT>>(*Part.DynStrTab);
394       Add(Part.DynSymTab);
395 
396       Part.VerSym = make<VersionTableSection>();
397       Add(Part.VerSym);
398 
399       if (!Config->VersionDefinitions.empty()) {
400         Part.VerDef = make<VersionDefinitionSection>();
401         Add(Part.VerDef);
402       }
403 
404       Part.VerNeed = make<VersionNeedSection<ELFT>>();
405       Add(Part.VerNeed);
406 
407       if (Config->GnuHash) {
408         Part.GnuHashTab = make<GnuHashTableSection>();
409         Add(Part.GnuHashTab);
410       }
411 
412       if (Config->SysvHash) {
413         Part.HashTab = make<HashTableSection>();
414         Add(Part.HashTab);
415       }
416 
417       Add(Part.Dynamic);
418       Add(Part.DynStrTab);
419       Add(Part.RelaDyn);
420     }
421 
422     if (Config->RelrPackDynRelocs) {
423       Part.RelrDyn = make<RelrSection<ELFT>>();
424       Add(Part.RelrDyn);
425     }
426 
427     if (!Config->Relocatable) {
428       if (Config->EhFrameHdr) {
429         Part.EhFrameHdr = make<EhFrameHeader>();
430         Add(Part.EhFrameHdr);
431       }
432       Part.EhFrame = make<EhFrameSection>();
433       Add(Part.EhFrame);
434     }
435 
436     if (Config->EMachine == EM_ARM && !Config->Relocatable) {
437       // The ARMExidxsyntheticsection replaces all the individual .ARM.exidx
438       // InputSections.
439       Part.ARMExidx = make<ARMExidxSyntheticSection>();
440       Add(Part.ARMExidx);
441     }
442   }
443 
444   if (Partitions.size() != 1) {
445     // Create the partition end marker. This needs to be in partition number 255
446     // so that it is sorted after all other partitions. It also has other
447     // special handling (see createPhdrs() and combineEhSections()).
448     In.PartEnd = make<BssSection>(".part.end", Config->MaxPageSize, 1);
449     In.PartEnd->Partition = 255;
450     Add(In.PartEnd);
451 
452     In.PartIndex = make<PartitionIndexSection>();
453     addOptionalRegular("__part_index_begin", In.PartIndex, 0);
454     addOptionalRegular("__part_index_end", In.PartIndex,
455                        In.PartIndex->getSize());
456     Add(In.PartIndex);
457   }
458 
459   // Add .got. MIPS' .got is so different from the other archs,
460   // it has its own class.
461   if (Config->EMachine == EM_MIPS) {
462     In.MipsGot = make<MipsGotSection>();
463     Add(In.MipsGot);
464   } else {
465     In.Got = make<GotSection>();
466     Add(In.Got);
467   }
468 
469   if (Config->EMachine == EM_PPC) {
470     In.PPC32Got2 = make<PPC32Got2Section>();
471     Add(In.PPC32Got2);
472   }
473 
474   if (Config->EMachine == EM_PPC64) {
475     In.PPC64LongBranchTarget = make<PPC64LongBranchTargetSection>();
476     Add(In.PPC64LongBranchTarget);
477   }
478 
479   In.GotPlt = make<GotPltSection>();
480   Add(In.GotPlt);
481   In.IgotPlt = make<IgotPltSection>();
482   Add(In.IgotPlt);
483 
484   // _GLOBAL_OFFSET_TABLE_ is defined relative to either .got.plt or .got. Treat
485   // it as a relocation and ensure the referenced section is created.
486   if (ElfSym::GlobalOffsetTable && Config->EMachine != EM_MIPS) {
487     if (Target->GotBaseSymInGotPlt)
488       In.GotPlt->HasGotPltOffRel = true;
489     else
490       In.Got->HasGotOffRel = true;
491   }
492 
493   if (Config->GdbIndex)
494     Add(GdbIndexSection::create<ELFT>());
495 
496   // We always need to add rel[a].plt to output if it has entries.
497   // Even for static linking it can contain R_[*]_IRELATIVE relocations.
498   In.RelaPlt = make<RelocationSection<ELFT>>(
499       Config->IsRela ? ".rela.plt" : ".rel.plt", false /*Sort*/);
500   Add(In.RelaPlt);
501 
502   // The RelaIplt immediately follows .rel.plt (.rel.dyn for ARM) to ensure
503   // that the IRelative relocations are processed last by the dynamic loader.
504   // We cannot place the iplt section in .rel.dyn when Android relocation
505   // packing is enabled because that would cause a section type mismatch.
506   // However, because the Android dynamic loader reads .rel.plt after .rel.dyn,
507   // we can get the desired behaviour by placing the iplt section in .rel.plt.
508   In.RelaIplt = make<RelocationSection<ELFT>>(
509       (Config->EMachine == EM_ARM && !Config->AndroidPackDynRelocs)
510           ? ".rel.dyn"
511           : In.RelaPlt->Name,
512       false /*Sort*/);
513   Add(In.RelaIplt);
514 
515   In.Plt = make<PltSection>(false);
516   Add(In.Plt);
517   In.Iplt = make<PltSection>(true);
518   Add(In.Iplt);
519 
520   if (Config->AndFeatures)
521     Add(make<GnuPropertySection>());
522 
523   // .note.GNU-stack is always added when we are creating a re-linkable
524   // object file. Other linkers are using the presence of this marker
525   // section to control the executable-ness of the stack area, but that
526   // is irrelevant these days. Stack area should always be non-executable
527   // by default. So we emit this section unconditionally.
528   if (Config->Relocatable)
529     Add(make<GnuStackSection>());
530 
531   if (In.SymTab)
532     Add(In.SymTab);
533   if (In.SymTabShndx)
534     Add(In.SymTabShndx);
535   Add(In.ShStrTab);
536   if (In.StrTab)
537     Add(In.StrTab);
538 }
539 
540 // The main function of the writer.
541 template <class ELFT> void Writer<ELFT>::run() {
542   // Make copies of any input sections that need to be copied into each
543   // partition.
544   copySectionsIntoPartitions<ELFT>();
545 
546   // Create linker-synthesized sections such as .got or .plt.
547   // Such sections are of type input section.
548   createSyntheticSections<ELFT>();
549 
550   // Some input sections that are used for exception handling need to be moved
551   // into synthetic sections. Do that now so that they aren't assigned to
552   // output sections in the usual way.
553   if (!Config->Relocatable)
554     combineEhSections<ELFT>();
555 
556   // We want to process linker script commands. When SECTIONS command
557   // is given we let it create sections.
558   Script->processSectionCommands();
559 
560   // Linker scripts controls how input sections are assigned to output sections.
561   // Input sections that were not handled by scripts are called "orphans", and
562   // they are assigned to output sections by the default rule. Process that.
563   Script->addOrphanSections();
564 
565   if (Config->Discard != DiscardPolicy::All)
566     copyLocalSymbols();
567 
568   if (Config->CopyRelocs)
569     addSectionSymbols();
570 
571   // Now that we have a complete set of output sections. This function
572   // completes section contents. For example, we need to add strings
573   // to the string table, and add entries to .got and .plt.
574   // finalizeSections does that.
575   finalizeSections();
576   checkExecuteOnly();
577   if (errorCount())
578     return;
579 
580   Script->assignAddresses();
581 
582   // If -compressed-debug-sections is specified, we need to compress
583   // .debug_* sections. Do it right now because it changes the size of
584   // output sections.
585   for (OutputSection *Sec : OutputSections)
586     Sec->maybeCompress<ELFT>();
587 
588   Script->allocateHeaders(Main->Phdrs);
589 
590   // Remove empty PT_LOAD to avoid causing the dynamic linker to try to mmap a
591   // 0 sized region. This has to be done late since only after assignAddresses
592   // we know the size of the sections.
593   for (Partition &Part : Partitions)
594     removeEmptyPTLoad(Part.Phdrs);
595 
596   if (!Config->OFormatBinary)
597     assignFileOffsets();
598   else
599     assignFileOffsetsBinary();
600 
601   for (Partition &Part : Partitions)
602     setPhdrs(Part);
603 
604   if (Config->Relocatable)
605     for (OutputSection *Sec : OutputSections)
606       Sec->Addr = 0;
607 
608   if (Config->CheckSections)
609     checkSections();
610 
611   // It does not make sense try to open the file if we have error already.
612   if (errorCount())
613     return;
614   // Write the result down to a file.
615   openFile();
616   if (errorCount())
617     return;
618 
619   if (!Config->OFormatBinary) {
620     writeTrapInstr();
621     writeHeader();
622     writeSections();
623   } else {
624     writeSectionsBinary();
625   }
626 
627   // Backfill .note.gnu.build-id section content. This is done at last
628   // because the content is usually a hash value of the entire output file.
629   writeBuildId();
630   if (errorCount())
631     return;
632 
633   // Handle -Map and -cref options.
634   writeMapFile();
635   writeCrossReferenceTable();
636   if (errorCount())
637     return;
638 
639   if (auto E = Buffer->commit())
640     error("failed to write to the output file: " + toString(std::move(E)));
641 }
642 
643 static bool shouldKeepInSymtab(const Defined &Sym) {
644   if (Sym.isSection())
645     return false;
646 
647   if (Config->Discard == DiscardPolicy::None)
648     return true;
649 
650   // If -emit-reloc is given, all symbols including local ones need to be
651   // copied because they may be referenced by relocations.
652   if (Config->EmitRelocs)
653     return true;
654 
655   // In ELF assembly .L symbols are normally discarded by the assembler.
656   // If the assembler fails to do so, the linker discards them if
657   // * --discard-locals is used.
658   // * The symbol is in a SHF_MERGE section, which is normally the reason for
659   //   the assembler keeping the .L symbol.
660   StringRef Name = Sym.getName();
661   bool IsLocal = Name.startswith(".L") || Name.empty();
662   if (!IsLocal)
663     return true;
664 
665   if (Config->Discard == DiscardPolicy::Locals)
666     return false;
667 
668   SectionBase *Sec = Sym.Section;
669   return !Sec || !(Sec->Flags & SHF_MERGE);
670 }
671 
672 static bool includeInSymtab(const Symbol &B) {
673   if (!B.isLocal() && !B.IsUsedInRegularObj)
674     return false;
675 
676   if (auto *D = dyn_cast<Defined>(&B)) {
677     // Always include absolute symbols.
678     SectionBase *Sec = D->Section;
679     if (!Sec)
680       return true;
681     Sec = Sec->Repl;
682 
683     // Exclude symbols pointing to garbage-collected sections.
684     if (isa<InputSectionBase>(Sec) && !Sec->isLive())
685       return false;
686 
687     if (auto *S = dyn_cast<MergeInputSection>(Sec))
688       if (!S->getSectionPiece(D->Value)->Live)
689         return false;
690     return true;
691   }
692   return B.Used;
693 }
694 
695 // Local symbols are not in the linker's symbol table. This function scans
696 // each object file's symbol table to copy local symbols to the output.
697 template <class ELFT> void Writer<ELFT>::copyLocalSymbols() {
698   if (!In.SymTab)
699     return;
700   for (InputFile *File : ObjectFiles) {
701     ObjFile<ELFT> *F = cast<ObjFile<ELFT>>(File);
702     for (Symbol *B : F->getLocalSymbols()) {
703       if (!B->isLocal())
704         fatal(toString(F) +
705               ": broken object: getLocalSymbols returns a non-local symbol");
706       auto *DR = dyn_cast<Defined>(B);
707 
708       // No reason to keep local undefined symbol in symtab.
709       if (!DR)
710         continue;
711       if (!includeInSymtab(*B))
712         continue;
713       if (!shouldKeepInSymtab(*DR))
714         continue;
715       In.SymTab->addSymbol(B);
716     }
717   }
718 }
719 
720 // Create a section symbol for each output section so that we can represent
721 // relocations that point to the section. If we know that no relocation is
722 // referring to a section (that happens if the section is a synthetic one), we
723 // don't create a section symbol for that section.
724 template <class ELFT> void Writer<ELFT>::addSectionSymbols() {
725   for (BaseCommand *Base : Script->SectionCommands) {
726     auto *Sec = dyn_cast<OutputSection>(Base);
727     if (!Sec)
728       continue;
729     auto I = llvm::find_if(Sec->SectionCommands, [](BaseCommand *Base) {
730       if (auto *ISD = dyn_cast<InputSectionDescription>(Base))
731         return !ISD->Sections.empty();
732       return false;
733     });
734     if (I == Sec->SectionCommands.end())
735       continue;
736     InputSection *IS = cast<InputSectionDescription>(*I)->Sections[0];
737 
738     // Relocations are not using REL[A] section symbols.
739     if (IS->Type == SHT_REL || IS->Type == SHT_RELA)
740       continue;
741 
742     // Unlike other synthetic sections, mergeable output sections contain data
743     // copied from input sections, and there may be a relocation pointing to its
744     // contents if -r or -emit-reloc are given.
745     if (isa<SyntheticSection>(IS) && !(IS->Flags & SHF_MERGE))
746       continue;
747 
748     auto *Sym =
749         make<Defined>(IS->File, "", STB_LOCAL, /*StOther=*/0, STT_SECTION,
750                       /*Value=*/0, /*Size=*/0, IS);
751     In.SymTab->addSymbol(Sym);
752   }
753 }
754 
755 // Today's loaders have a feature to make segments read-only after
756 // processing dynamic relocations to enhance security. PT_GNU_RELRO
757 // is defined for that.
758 //
759 // This function returns true if a section needs to be put into a
760 // PT_GNU_RELRO segment.
761 static bool isRelroSection(const OutputSection *Sec) {
762   if (!Config->ZRelro)
763     return false;
764 
765   uint64_t Flags = Sec->Flags;
766 
767   // Non-allocatable or non-writable sections don't need RELRO because
768   // they are not writable or not even mapped to memory in the first place.
769   // RELRO is for sections that are essentially read-only but need to
770   // be writable only at process startup to allow dynamic linker to
771   // apply relocations.
772   if (!(Flags & SHF_ALLOC) || !(Flags & SHF_WRITE))
773     return false;
774 
775   // Once initialized, TLS data segments are used as data templates
776   // for a thread-local storage. For each new thread, runtime
777   // allocates memory for a TLS and copy templates there. No thread
778   // are supposed to use templates directly. Thus, it can be in RELRO.
779   if (Flags & SHF_TLS)
780     return true;
781 
782   // .init_array, .preinit_array and .fini_array contain pointers to
783   // functions that are executed on process startup or exit. These
784   // pointers are set by the static linker, and they are not expected
785   // to change at runtime. But if you are an attacker, you could do
786   // interesting things by manipulating pointers in .fini_array, for
787   // example. So they are put into RELRO.
788   uint32_t Type = Sec->Type;
789   if (Type == SHT_INIT_ARRAY || Type == SHT_FINI_ARRAY ||
790       Type == SHT_PREINIT_ARRAY)
791     return true;
792 
793   // .got contains pointers to external symbols. They are resolved by
794   // the dynamic linker when a module is loaded into memory, and after
795   // that they are not expected to change. So, it can be in RELRO.
796   if (In.Got && Sec == In.Got->getParent())
797     return true;
798 
799   // .toc is a GOT-ish section for PowerPC64. Their contents are accessed
800   // through r2 register, which is reserved for that purpose. Since r2 is used
801   // for accessing .got as well, .got and .toc need to be close enough in the
802   // virtual address space. Usually, .toc comes just after .got. Since we place
803   // .got into RELRO, .toc needs to be placed into RELRO too.
804   if (Sec->Name.equals(".toc"))
805     return true;
806 
807   // .got.plt contains pointers to external function symbols. They are
808   // by default resolved lazily, so we usually cannot put it into RELRO.
809   // However, if "-z now" is given, the lazy symbol resolution is
810   // disabled, which enables us to put it into RELRO.
811   if (Sec == In.GotPlt->getParent())
812     return Config->ZNow;
813 
814   // .dynamic section contains data for the dynamic linker, and
815   // there's no need to write to it at runtime, so it's better to put
816   // it into RELRO.
817   if (Sec->Name == ".dynamic")
818     return true;
819 
820   // Sections with some special names are put into RELRO. This is a
821   // bit unfortunate because section names shouldn't be significant in
822   // ELF in spirit. But in reality many linker features depend on
823   // magic section names.
824   StringRef S = Sec->Name;
825   return S == ".data.rel.ro" || S == ".bss.rel.ro" || S == ".ctors" ||
826          S == ".dtors" || S == ".jcr" || S == ".eh_frame" ||
827          S == ".openbsd.randomdata";
828 }
829 
830 // We compute a rank for each section. The rank indicates where the
831 // section should be placed in the file.  Instead of using simple
832 // numbers (0,1,2...), we use a series of flags. One for each decision
833 // point when placing the section.
834 // Using flags has two key properties:
835 // * It is easy to check if a give branch was taken.
836 // * It is easy two see how similar two ranks are (see getRankProximity).
837 enum RankFlags {
838   RF_NOT_ADDR_SET = 1 << 27,
839   RF_NOT_ALLOC = 1 << 26,
840   RF_PARTITION = 1 << 18, // Partition number (8 bits)
841   RF_NOT_PART_EHDR = 1 << 17,
842   RF_NOT_PART_PHDR = 1 << 16,
843   RF_NOT_INTERP = 1 << 15,
844   RF_NOT_NOTE = 1 << 14,
845   RF_WRITE = 1 << 13,
846   RF_EXEC_WRITE = 1 << 12,
847   RF_EXEC = 1 << 11,
848   RF_RODATA = 1 << 10,
849   RF_NOT_RELRO = 1 << 9,
850   RF_NOT_TLS = 1 << 8,
851   RF_BSS = 1 << 7,
852   RF_PPC_NOT_TOCBSS = 1 << 6,
853   RF_PPC_TOCL = 1 << 5,
854   RF_PPC_TOC = 1 << 4,
855   RF_PPC_GOT = 1 << 3,
856   RF_PPC_BRANCH_LT = 1 << 2,
857   RF_MIPS_GPREL = 1 << 1,
858   RF_MIPS_NOT_GOT = 1 << 0
859 };
860 
861 static unsigned getSectionRank(const OutputSection *Sec) {
862   unsigned Rank = Sec->Partition * RF_PARTITION;
863 
864   // We want to put section specified by -T option first, so we
865   // can start assigning VA starting from them later.
866   if (Config->SectionStartMap.count(Sec->Name))
867     return Rank;
868   Rank |= RF_NOT_ADDR_SET;
869 
870   // Allocatable sections go first to reduce the total PT_LOAD size and
871   // so debug info doesn't change addresses in actual code.
872   if (!(Sec->Flags & SHF_ALLOC))
873     return Rank | RF_NOT_ALLOC;
874 
875   if (Sec->Type == SHT_LLVM_PART_EHDR)
876     return Rank;
877   Rank |= RF_NOT_PART_EHDR;
878 
879   if (Sec->Type == SHT_LLVM_PART_PHDR)
880     return Rank;
881   Rank |= RF_NOT_PART_PHDR;
882 
883   // Put .interp first because some loaders want to see that section
884   // on the first page of the executable file when loaded into memory.
885   if (Sec->Name == ".interp")
886     return Rank;
887   Rank |= RF_NOT_INTERP;
888 
889   // Put .note sections (which make up one PT_NOTE) at the beginning so that
890   // they are likely to be included in a core file even if core file size is
891   // limited. In particular, we want a .note.gnu.build-id and a .note.tag to be
892   // included in a core to match core files with executables.
893   if (Sec->Type == SHT_NOTE)
894     return Rank;
895   Rank |= RF_NOT_NOTE;
896 
897   // Sort sections based on their access permission in the following
898   // order: R, RX, RWX, RW.  This order is based on the following
899   // considerations:
900   // * Read-only sections come first such that they go in the
901   //   PT_LOAD covering the program headers at the start of the file.
902   // * Read-only, executable sections come next.
903   // * Writable, executable sections follow such that .plt on
904   //   architectures where it needs to be writable will be placed
905   //   between .text and .data.
906   // * Writable sections come last, such that .bss lands at the very
907   //   end of the last PT_LOAD.
908   bool IsExec = Sec->Flags & SHF_EXECINSTR;
909   bool IsWrite = Sec->Flags & SHF_WRITE;
910 
911   if (IsExec) {
912     if (IsWrite)
913       Rank |= RF_EXEC_WRITE;
914     else
915       Rank |= RF_EXEC;
916   } else if (IsWrite) {
917     Rank |= RF_WRITE;
918   } else if (Sec->Type == SHT_PROGBITS) {
919     // Make non-executable and non-writable PROGBITS sections (e.g .rodata
920     // .eh_frame) closer to .text. They likely contain PC or GOT relative
921     // relocations and there could be relocation overflow if other huge sections
922     // (.dynstr .dynsym) were placed in between.
923     Rank |= RF_RODATA;
924   }
925 
926   // Place RelRo sections first. After considering SHT_NOBITS below, the
927   // ordering is PT_LOAD(PT_GNU_RELRO(.data.rel.ro .bss.rel.ro) | .data .bss),
928   // where | marks where page alignment happens. An alternative ordering is
929   // PT_LOAD(.data | PT_GNU_RELRO( .data.rel.ro .bss.rel.ro) | .bss), but it may
930   // waste more bytes due to 2 alignment places.
931   if (!isRelroSection(Sec))
932     Rank |= RF_NOT_RELRO;
933 
934   // If we got here we know that both A and B are in the same PT_LOAD.
935 
936   // The TLS initialization block needs to be a single contiguous block in a R/W
937   // PT_LOAD, so stick TLS sections directly before the other RelRo R/W
938   // sections. Since p_filesz can be less than p_memsz, place NOBITS sections
939   // after PROGBITS.
940   if (!(Sec->Flags & SHF_TLS))
941     Rank |= RF_NOT_TLS;
942 
943   // Within TLS sections, or within other RelRo sections, or within non-RelRo
944   // sections, place non-NOBITS sections first.
945   if (Sec->Type == SHT_NOBITS)
946     Rank |= RF_BSS;
947 
948   // Some architectures have additional ordering restrictions for sections
949   // within the same PT_LOAD.
950   if (Config->EMachine == EM_PPC64) {
951     // PPC64 has a number of special SHT_PROGBITS+SHF_ALLOC+SHF_WRITE sections
952     // that we would like to make sure appear is a specific order to maximize
953     // their coverage by a single signed 16-bit offset from the TOC base
954     // pointer. Conversely, the special .tocbss section should be first among
955     // all SHT_NOBITS sections. This will put it next to the loaded special
956     // PPC64 sections (and, thus, within reach of the TOC base pointer).
957     StringRef Name = Sec->Name;
958     if (Name != ".tocbss")
959       Rank |= RF_PPC_NOT_TOCBSS;
960 
961     if (Name == ".toc1")
962       Rank |= RF_PPC_TOCL;
963 
964     if (Name == ".toc")
965       Rank |= RF_PPC_TOC;
966 
967     if (Name == ".got")
968       Rank |= RF_PPC_GOT;
969 
970     if (Name == ".branch_lt")
971       Rank |= RF_PPC_BRANCH_LT;
972   }
973 
974   if (Config->EMachine == EM_MIPS) {
975     // All sections with SHF_MIPS_GPREL flag should be grouped together
976     // because data in these sections is addressable with a gp relative address.
977     if (Sec->Flags & SHF_MIPS_GPREL)
978       Rank |= RF_MIPS_GPREL;
979 
980     if (Sec->Name != ".got")
981       Rank |= RF_MIPS_NOT_GOT;
982   }
983 
984   return Rank;
985 }
986 
987 static bool compareSections(const BaseCommand *ACmd, const BaseCommand *BCmd) {
988   const OutputSection *A = cast<OutputSection>(ACmd);
989   const OutputSection *B = cast<OutputSection>(BCmd);
990 
991   if (A->SortRank != B->SortRank)
992     return A->SortRank < B->SortRank;
993 
994   if (!(A->SortRank & RF_NOT_ADDR_SET))
995     return Config->SectionStartMap.lookup(A->Name) <
996            Config->SectionStartMap.lookup(B->Name);
997   return false;
998 }
999 
1000 void PhdrEntry::add(OutputSection *Sec) {
1001   LastSec = Sec;
1002   if (!FirstSec)
1003     FirstSec = Sec;
1004   p_align = std::max(p_align, Sec->Alignment);
1005   if (p_type == PT_LOAD)
1006     Sec->PtLoad = this;
1007 }
1008 
1009 // The beginning and the ending of .rel[a].plt section are marked
1010 // with __rel[a]_iplt_{start,end} symbols if it is a statically linked
1011 // executable. The runtime needs these symbols in order to resolve
1012 // all IRELATIVE relocs on startup. For dynamic executables, we don't
1013 // need these symbols, since IRELATIVE relocs are resolved through GOT
1014 // and PLT. For details, see http://www.airs.com/blog/archives/403.
1015 template <class ELFT> void Writer<ELFT>::addRelIpltSymbols() {
1016   if (Config->Relocatable || needsInterpSection())
1017     return;
1018 
1019   // By default, __rela_iplt_{start,end} belong to a dummy section 0
1020   // because .rela.plt might be empty and thus removed from output.
1021   // We'll override Out::ElfHeader with In.RelaIplt later when we are
1022   // sure that .rela.plt exists in output.
1023   ElfSym::RelaIpltStart = addOptionalRegular(
1024       Config->IsRela ? "__rela_iplt_start" : "__rel_iplt_start",
1025       Out::ElfHeader, 0, STV_HIDDEN, STB_WEAK);
1026 
1027   ElfSym::RelaIpltEnd = addOptionalRegular(
1028       Config->IsRela ? "__rela_iplt_end" : "__rel_iplt_end",
1029       Out::ElfHeader, 0, STV_HIDDEN, STB_WEAK);
1030 }
1031 
1032 template <class ELFT>
1033 void Writer<ELFT>::forEachRelSec(
1034     llvm::function_ref<void(InputSectionBase &)> Fn) {
1035   // Scan all relocations. Each relocation goes through a series
1036   // of tests to determine if it needs special treatment, such as
1037   // creating GOT, PLT, copy relocations, etc.
1038   // Note that relocations for non-alloc sections are directly
1039   // processed by InputSection::relocateNonAlloc.
1040   for (InputSectionBase *IS : InputSections)
1041     if (IS->isLive() && isa<InputSection>(IS) && (IS->Flags & SHF_ALLOC))
1042       Fn(*IS);
1043   for (Partition &Part : Partitions) {
1044     for (EhInputSection *ES : Part.EhFrame->Sections)
1045       Fn(*ES);
1046     if (Part.ARMExidx && Part.ARMExidx->isLive())
1047       for (InputSection *Ex : Part.ARMExidx->ExidxSections)
1048         Fn(*Ex);
1049   }
1050 }
1051 
1052 // This function generates assignments for predefined symbols (e.g. _end or
1053 // _etext) and inserts them into the commands sequence to be processed at the
1054 // appropriate time. This ensures that the value is going to be correct by the
1055 // time any references to these symbols are processed and is equivalent to
1056 // defining these symbols explicitly in the linker script.
1057 template <class ELFT> void Writer<ELFT>::setReservedSymbolSections() {
1058   if (ElfSym::GlobalOffsetTable) {
1059     // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention usually
1060     // to the start of the .got or .got.plt section.
1061     InputSection *GotSection = In.GotPlt;
1062     if (!Target->GotBaseSymInGotPlt)
1063       GotSection = In.MipsGot ? cast<InputSection>(In.MipsGot)
1064                               : cast<InputSection>(In.Got);
1065     ElfSym::GlobalOffsetTable->Section = GotSection;
1066   }
1067 
1068   // .rela_iplt_{start,end} mark the start and the end of .rela.plt section.
1069   if (ElfSym::RelaIpltStart && In.RelaIplt->isNeeded()) {
1070     ElfSym::RelaIpltStart->Section = In.RelaIplt;
1071     ElfSym::RelaIpltEnd->Section = In.RelaIplt;
1072     ElfSym::RelaIpltEnd->Value = In.RelaIplt->getSize();
1073   }
1074 
1075   PhdrEntry *Last = nullptr;
1076   PhdrEntry *LastRO = nullptr;
1077 
1078   for (Partition &Part : Partitions) {
1079     for (PhdrEntry *P : Part.Phdrs) {
1080       if (P->p_type != PT_LOAD)
1081         continue;
1082       Last = P;
1083       if (!(P->p_flags & PF_W))
1084         LastRO = P;
1085     }
1086   }
1087 
1088   if (LastRO) {
1089     // _etext is the first location after the last read-only loadable segment.
1090     if (ElfSym::Etext1)
1091       ElfSym::Etext1->Section = LastRO->LastSec;
1092     if (ElfSym::Etext2)
1093       ElfSym::Etext2->Section = LastRO->LastSec;
1094   }
1095 
1096   if (Last) {
1097     // _edata points to the end of the last mapped initialized section.
1098     OutputSection *Edata = nullptr;
1099     for (OutputSection *OS : OutputSections) {
1100       if (OS->Type != SHT_NOBITS)
1101         Edata = OS;
1102       if (OS == Last->LastSec)
1103         break;
1104     }
1105 
1106     if (ElfSym::Edata1)
1107       ElfSym::Edata1->Section = Edata;
1108     if (ElfSym::Edata2)
1109       ElfSym::Edata2->Section = Edata;
1110 
1111     // _end is the first location after the uninitialized data region.
1112     if (ElfSym::End1)
1113       ElfSym::End1->Section = Last->LastSec;
1114     if (ElfSym::End2)
1115       ElfSym::End2->Section = Last->LastSec;
1116   }
1117 
1118   if (ElfSym::Bss)
1119     ElfSym::Bss->Section = findSection(".bss");
1120 
1121   // Setup MIPS _gp_disp/__gnu_local_gp symbols which should
1122   // be equal to the _gp symbol's value.
1123   if (ElfSym::MipsGp) {
1124     // Find GP-relative section with the lowest address
1125     // and use this address to calculate default _gp value.
1126     for (OutputSection *OS : OutputSections) {
1127       if (OS->Flags & SHF_MIPS_GPREL) {
1128         ElfSym::MipsGp->Section = OS;
1129         ElfSym::MipsGp->Value = 0x7ff0;
1130         break;
1131       }
1132     }
1133   }
1134 }
1135 
1136 // We want to find how similar two ranks are.
1137 // The more branches in getSectionRank that match, the more similar they are.
1138 // Since each branch corresponds to a bit flag, we can just use
1139 // countLeadingZeros.
1140 static int getRankProximityAux(OutputSection *A, OutputSection *B) {
1141   return countLeadingZeros(A->SortRank ^ B->SortRank);
1142 }
1143 
1144 static int getRankProximity(OutputSection *A, BaseCommand *B) {
1145   auto *Sec = dyn_cast<OutputSection>(B);
1146   return (Sec && Sec->HasInputSections) ? getRankProximityAux(A, Sec) : -1;
1147 }
1148 
1149 // When placing orphan sections, we want to place them after symbol assignments
1150 // so that an orphan after
1151 //   begin_foo = .;
1152 //   foo : { *(foo) }
1153 //   end_foo = .;
1154 // doesn't break the intended meaning of the begin/end symbols.
1155 // We don't want to go over sections since findOrphanPos is the
1156 // one in charge of deciding the order of the sections.
1157 // We don't want to go over changes to '.', since doing so in
1158 //  rx_sec : { *(rx_sec) }
1159 //  . = ALIGN(0x1000);
1160 //  /* The RW PT_LOAD starts here*/
1161 //  rw_sec : { *(rw_sec) }
1162 // would mean that the RW PT_LOAD would become unaligned.
1163 static bool shouldSkip(BaseCommand *Cmd) {
1164   if (auto *Assign = dyn_cast<SymbolAssignment>(Cmd))
1165     return Assign->Name != ".";
1166   return false;
1167 }
1168 
1169 // We want to place orphan sections so that they share as much
1170 // characteristics with their neighbors as possible. For example, if
1171 // both are rw, or both are tls.
1172 static std::vector<BaseCommand *>::iterator
1173 findOrphanPos(std::vector<BaseCommand *>::iterator B,
1174               std::vector<BaseCommand *>::iterator E) {
1175   OutputSection *Sec = cast<OutputSection>(*E);
1176 
1177   // Find the first element that has as close a rank as possible.
1178   auto I = std::max_element(B, E, [=](BaseCommand *A, BaseCommand *B) {
1179     return getRankProximity(Sec, A) < getRankProximity(Sec, B);
1180   });
1181   if (I == E)
1182     return E;
1183 
1184   // Consider all existing sections with the same proximity.
1185   int Proximity = getRankProximity(Sec, *I);
1186   for (; I != E; ++I) {
1187     auto *CurSec = dyn_cast<OutputSection>(*I);
1188     if (!CurSec || !CurSec->HasInputSections)
1189       continue;
1190     if (getRankProximity(Sec, CurSec) != Proximity ||
1191         Sec->SortRank < CurSec->SortRank)
1192       break;
1193   }
1194 
1195   auto IsOutputSecWithInputSections = [](BaseCommand *Cmd) {
1196     auto *OS = dyn_cast<OutputSection>(Cmd);
1197     return OS && OS->HasInputSections;
1198   };
1199   auto J = std::find_if(llvm::make_reverse_iterator(I),
1200                         llvm::make_reverse_iterator(B),
1201                         IsOutputSecWithInputSections);
1202   I = J.base();
1203 
1204   // As a special case, if the orphan section is the last section, put
1205   // it at the very end, past any other commands.
1206   // This matches bfd's behavior and is convenient when the linker script fully
1207   // specifies the start of the file, but doesn't care about the end (the non
1208   // alloc sections for example).
1209   auto NextSec = std::find_if(I, E, IsOutputSecWithInputSections);
1210   if (NextSec == E)
1211     return E;
1212 
1213   while (I != E && shouldSkip(*I))
1214     ++I;
1215   return I;
1216 }
1217 
1218 // Builds section order for handling --symbol-ordering-file.
1219 static DenseMap<const InputSectionBase *, int> buildSectionOrder() {
1220   DenseMap<const InputSectionBase *, int> SectionOrder;
1221   // Use the rarely used option -call-graph-ordering-file to sort sections.
1222   if (!Config->CallGraphProfile.empty())
1223     return computeCallGraphProfileOrder();
1224 
1225   if (Config->SymbolOrderingFile.empty())
1226     return SectionOrder;
1227 
1228   struct SymbolOrderEntry {
1229     int Priority;
1230     bool Present;
1231   };
1232 
1233   // Build a map from symbols to their priorities. Symbols that didn't
1234   // appear in the symbol ordering file have the lowest priority 0.
1235   // All explicitly mentioned symbols have negative (higher) priorities.
1236   DenseMap<StringRef, SymbolOrderEntry> SymbolOrder;
1237   int Priority = -Config->SymbolOrderingFile.size();
1238   for (StringRef S : Config->SymbolOrderingFile)
1239     SymbolOrder.insert({S, {Priority++, false}});
1240 
1241   // Build a map from sections to their priorities.
1242   auto AddSym = [&](Symbol &Sym) {
1243     auto It = SymbolOrder.find(Sym.getName());
1244     if (It == SymbolOrder.end())
1245       return;
1246     SymbolOrderEntry &Ent = It->second;
1247     Ent.Present = true;
1248 
1249     maybeWarnUnorderableSymbol(&Sym);
1250 
1251     if (auto *D = dyn_cast<Defined>(&Sym)) {
1252       if (auto *Sec = dyn_cast_or_null<InputSectionBase>(D->Section)) {
1253         int &Priority = SectionOrder[cast<InputSectionBase>(Sec->Repl)];
1254         Priority = std::min(Priority, Ent.Priority);
1255       }
1256     }
1257   };
1258 
1259   // We want both global and local symbols. We get the global ones from the
1260   // symbol table and iterate the object files for the local ones.
1261   Symtab->forEachSymbol([&](Symbol *Sym) {
1262     if (!Sym->isLazy())
1263       AddSym(*Sym);
1264   });
1265 
1266   for (InputFile *File : ObjectFiles)
1267     for (Symbol *Sym : File->getSymbols())
1268       if (Sym->isLocal())
1269         AddSym(*Sym);
1270 
1271   if (Config->WarnSymbolOrdering)
1272     for (auto OrderEntry : SymbolOrder)
1273       if (!OrderEntry.second.Present)
1274         warn("symbol ordering file: no such symbol: " + OrderEntry.first);
1275 
1276   return SectionOrder;
1277 }
1278 
1279 // Sorts the sections in ISD according to the provided section order.
1280 static void
1281 sortISDBySectionOrder(InputSectionDescription *ISD,
1282                       const DenseMap<const InputSectionBase *, int> &Order) {
1283   std::vector<InputSection *> UnorderedSections;
1284   std::vector<std::pair<InputSection *, int>> OrderedSections;
1285   uint64_t UnorderedSize = 0;
1286 
1287   for (InputSection *IS : ISD->Sections) {
1288     auto I = Order.find(IS);
1289     if (I == Order.end()) {
1290       UnorderedSections.push_back(IS);
1291       UnorderedSize += IS->getSize();
1292       continue;
1293     }
1294     OrderedSections.push_back({IS, I->second});
1295   }
1296   llvm::sort(OrderedSections, [&](std::pair<InputSection *, int> A,
1297                                   std::pair<InputSection *, int> B) {
1298     return A.second < B.second;
1299   });
1300 
1301   // Find an insertion point for the ordered section list in the unordered
1302   // section list. On targets with limited-range branches, this is the mid-point
1303   // of the unordered section list. This decreases the likelihood that a range
1304   // extension thunk will be needed to enter or exit the ordered region. If the
1305   // ordered section list is a list of hot functions, we can generally expect
1306   // the ordered functions to be called more often than the unordered functions,
1307   // making it more likely that any particular call will be within range, and
1308   // therefore reducing the number of thunks required.
1309   //
1310   // For example, imagine that you have 8MB of hot code and 32MB of cold code.
1311   // If the layout is:
1312   //
1313   // 8MB hot
1314   // 32MB cold
1315   //
1316   // only the first 8-16MB of the cold code (depending on which hot function it
1317   // is actually calling) can call the hot code without a range extension thunk.
1318   // However, if we use this layout:
1319   //
1320   // 16MB cold
1321   // 8MB hot
1322   // 16MB cold
1323   //
1324   // both the last 8-16MB of the first block of cold code and the first 8-16MB
1325   // of the second block of cold code can call the hot code without a thunk. So
1326   // we effectively double the amount of code that could potentially call into
1327   // the hot code without a thunk.
1328   size_t InsPt = 0;
1329   if (Target->getThunkSectionSpacing() && !OrderedSections.empty()) {
1330     uint64_t UnorderedPos = 0;
1331     for (; InsPt != UnorderedSections.size(); ++InsPt) {
1332       UnorderedPos += UnorderedSections[InsPt]->getSize();
1333       if (UnorderedPos > UnorderedSize / 2)
1334         break;
1335     }
1336   }
1337 
1338   ISD->Sections.clear();
1339   for (InputSection *IS : makeArrayRef(UnorderedSections).slice(0, InsPt))
1340     ISD->Sections.push_back(IS);
1341   for (std::pair<InputSection *, int> P : OrderedSections)
1342     ISD->Sections.push_back(P.first);
1343   for (InputSection *IS : makeArrayRef(UnorderedSections).slice(InsPt))
1344     ISD->Sections.push_back(IS);
1345 }
1346 
1347 static void sortSection(OutputSection *Sec,
1348                         const DenseMap<const InputSectionBase *, int> &Order) {
1349   StringRef Name = Sec->Name;
1350 
1351   // Sort input sections by section name suffixes for
1352   // __attribute__((init_priority(N))).
1353   if (Name == ".init_array" || Name == ".fini_array") {
1354     if (!Script->HasSectionsCommand)
1355       Sec->sortInitFini();
1356     return;
1357   }
1358 
1359   // Sort input sections by the special rule for .ctors and .dtors.
1360   if (Name == ".ctors" || Name == ".dtors") {
1361     if (!Script->HasSectionsCommand)
1362       Sec->sortCtorsDtors();
1363     return;
1364   }
1365 
1366   // Never sort these.
1367   if (Name == ".init" || Name == ".fini")
1368     return;
1369 
1370   // .toc is allocated just after .got and is accessed using GOT-relative
1371   // relocations. Object files compiled with small code model have an
1372   // addressable range of [.got, .got + 0xFFFC] for GOT-relative relocations.
1373   // To reduce the risk of relocation overflow, .toc contents are sorted so that
1374   // sections having smaller relocation offsets are at beginning of .toc
1375   if (Config->EMachine == EM_PPC64 && Name == ".toc") {
1376     if (Script->HasSectionsCommand)
1377       return;
1378     assert(Sec->SectionCommands.size() == 1);
1379     auto *ISD = cast<InputSectionDescription>(Sec->SectionCommands[0]);
1380     llvm::stable_sort(ISD->Sections,
1381                       [](const InputSection *A, const InputSection *B) -> bool {
1382                         return A->File->PPC64SmallCodeModelTocRelocs &&
1383                                !B->File->PPC64SmallCodeModelTocRelocs;
1384                       });
1385     return;
1386   }
1387 
1388   // Sort input sections by priority using the list provided
1389   // by --symbol-ordering-file.
1390   if (!Order.empty())
1391     for (BaseCommand *B : Sec->SectionCommands)
1392       if (auto *ISD = dyn_cast<InputSectionDescription>(B))
1393         sortISDBySectionOrder(ISD, Order);
1394 }
1395 
1396 // If no layout was provided by linker script, we want to apply default
1397 // sorting for special input sections. This also handles --symbol-ordering-file.
1398 template <class ELFT> void Writer<ELFT>::sortInputSections() {
1399   // Build the order once since it is expensive.
1400   DenseMap<const InputSectionBase *, int> Order = buildSectionOrder();
1401   for (BaseCommand *Base : Script->SectionCommands)
1402     if (auto *Sec = dyn_cast<OutputSection>(Base))
1403       sortSection(Sec, Order);
1404 }
1405 
1406 template <class ELFT> void Writer<ELFT>::sortSections() {
1407   Script->adjustSectionsBeforeSorting();
1408 
1409   // Don't sort if using -r. It is not necessary and we want to preserve the
1410   // relative order for SHF_LINK_ORDER sections.
1411   if (Config->Relocatable)
1412     return;
1413 
1414   sortInputSections();
1415 
1416   for (BaseCommand *Base : Script->SectionCommands) {
1417     auto *OS = dyn_cast<OutputSection>(Base);
1418     if (!OS)
1419       continue;
1420     OS->SortRank = getSectionRank(OS);
1421 
1422     // We want to assign rude approximation values to OutSecOff fields
1423     // to know the relative order of the input sections. We use it for
1424     // sorting SHF_LINK_ORDER sections. See resolveShfLinkOrder().
1425     uint64_t I = 0;
1426     for (InputSection *Sec : getInputSections(OS))
1427       Sec->OutSecOff = I++;
1428   }
1429 
1430   if (!Script->HasSectionsCommand) {
1431     // We know that all the OutputSections are contiguous in this case.
1432     auto IsSection = [](BaseCommand *Base) { return isa<OutputSection>(Base); };
1433     std::stable_sort(
1434         llvm::find_if(Script->SectionCommands, IsSection),
1435         llvm::find_if(llvm::reverse(Script->SectionCommands), IsSection).base(),
1436         compareSections);
1437     return;
1438   }
1439 
1440   // Orphan sections are sections present in the input files which are
1441   // not explicitly placed into the output file by the linker script.
1442   //
1443   // The sections in the linker script are already in the correct
1444   // order. We have to figuere out where to insert the orphan
1445   // sections.
1446   //
1447   // The order of the sections in the script is arbitrary and may not agree with
1448   // compareSections. This means that we cannot easily define a strict weak
1449   // ordering. To see why, consider a comparison of a section in the script and
1450   // one not in the script. We have a two simple options:
1451   // * Make them equivalent (a is not less than b, and b is not less than a).
1452   //   The problem is then that equivalence has to be transitive and we can
1453   //   have sections a, b and c with only b in a script and a less than c
1454   //   which breaks this property.
1455   // * Use compareSectionsNonScript. Given that the script order doesn't have
1456   //   to match, we can end up with sections a, b, c, d where b and c are in the
1457   //   script and c is compareSectionsNonScript less than b. In which case d
1458   //   can be equivalent to c, a to b and d < a. As a concrete example:
1459   //   .a (rx) # not in script
1460   //   .b (rx) # in script
1461   //   .c (ro) # in script
1462   //   .d (ro) # not in script
1463   //
1464   // The way we define an order then is:
1465   // *  Sort only the orphan sections. They are in the end right now.
1466   // *  Move each orphan section to its preferred position. We try
1467   //    to put each section in the last position where it can share
1468   //    a PT_LOAD.
1469   //
1470   // There is some ambiguity as to where exactly a new entry should be
1471   // inserted, because Commands contains not only output section
1472   // commands but also other types of commands such as symbol assignment
1473   // expressions. There's no correct answer here due to the lack of the
1474   // formal specification of the linker script. We use heuristics to
1475   // determine whether a new output command should be added before or
1476   // after another commands. For the details, look at shouldSkip
1477   // function.
1478 
1479   auto I = Script->SectionCommands.begin();
1480   auto E = Script->SectionCommands.end();
1481   auto NonScriptI = std::find_if(I, E, [](BaseCommand *Base) {
1482     if (auto *Sec = dyn_cast<OutputSection>(Base))
1483       return Sec->SectionIndex == UINT32_MAX;
1484     return false;
1485   });
1486 
1487   // Sort the orphan sections.
1488   std::stable_sort(NonScriptI, E, compareSections);
1489 
1490   // As a horrible special case, skip the first . assignment if it is before any
1491   // section. We do this because it is common to set a load address by starting
1492   // the script with ". = 0xabcd" and the expectation is that every section is
1493   // after that.
1494   auto FirstSectionOrDotAssignment =
1495       std::find_if(I, E, [](BaseCommand *Cmd) { return !shouldSkip(Cmd); });
1496   if (FirstSectionOrDotAssignment != E &&
1497       isa<SymbolAssignment>(**FirstSectionOrDotAssignment))
1498     ++FirstSectionOrDotAssignment;
1499   I = FirstSectionOrDotAssignment;
1500 
1501   while (NonScriptI != E) {
1502     auto Pos = findOrphanPos(I, NonScriptI);
1503     OutputSection *Orphan = cast<OutputSection>(*NonScriptI);
1504 
1505     // As an optimization, find all sections with the same sort rank
1506     // and insert them with one rotate.
1507     unsigned Rank = Orphan->SortRank;
1508     auto End = std::find_if(NonScriptI + 1, E, [=](BaseCommand *Cmd) {
1509       return cast<OutputSection>(Cmd)->SortRank != Rank;
1510     });
1511     std::rotate(Pos, NonScriptI, End);
1512     NonScriptI = End;
1513   }
1514 
1515   Script->adjustSectionsAfterSorting();
1516 }
1517 
1518 static bool compareByFilePosition(InputSection *A, InputSection *B) {
1519   InputSection *LA = A->getLinkOrderDep();
1520   InputSection *LB = B->getLinkOrderDep();
1521   OutputSection *AOut = LA->getParent();
1522   OutputSection *BOut = LB->getParent();
1523 
1524   if (AOut != BOut)
1525     return AOut->SectionIndex < BOut->SectionIndex;
1526   return LA->OutSecOff < LB->OutSecOff;
1527 }
1528 
1529 template <class ELFT> void Writer<ELFT>::resolveShfLinkOrder() {
1530   for (OutputSection *Sec : OutputSections) {
1531     if (!(Sec->Flags & SHF_LINK_ORDER))
1532       continue;
1533 
1534     // Link order may be distributed across several InputSectionDescriptions
1535     // but sort must consider them all at once.
1536     std::vector<InputSection **> ScriptSections;
1537     std::vector<InputSection *> Sections;
1538     for (BaseCommand *Base : Sec->SectionCommands) {
1539       if (auto *ISD = dyn_cast<InputSectionDescription>(Base)) {
1540         for (InputSection *&IS : ISD->Sections) {
1541           ScriptSections.push_back(&IS);
1542           Sections.push_back(IS);
1543         }
1544       }
1545     }
1546 
1547     // The ARM.exidx section use SHF_LINK_ORDER, but we have consolidated
1548     // this processing inside the ARMExidxsyntheticsection::finalizeContents().
1549     if (!Config->Relocatable && Config->EMachine == EM_ARM &&
1550         Sec->Type == SHT_ARM_EXIDX)
1551       continue;
1552 
1553     llvm::stable_sort(Sections, compareByFilePosition);
1554 
1555     for (int I = 0, N = Sections.size(); I < N; ++I)
1556       *ScriptSections[I] = Sections[I];
1557   }
1558 }
1559 
1560 // We need to generate and finalize the content that depends on the address of
1561 // InputSections. As the generation of the content may also alter InputSection
1562 // addresses we must converge to a fixed point. We do that here. See the comment
1563 // in Writer<ELFT>::finalizeSections().
1564 template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() {
1565   ThunkCreator TC;
1566   AArch64Err843419Patcher A64P;
1567 
1568   // For some targets, like x86, this loop iterates only once.
1569   for (;;) {
1570     bool Changed = false;
1571 
1572     Script->assignAddresses();
1573 
1574     if (Target->NeedsThunks)
1575       Changed |= TC.createThunks(OutputSections);
1576 
1577     if (Config->FixCortexA53Errata843419) {
1578       if (Changed)
1579         Script->assignAddresses();
1580       Changed |= A64P.createFixes();
1581     }
1582 
1583     if (In.MipsGot)
1584       In.MipsGot->updateAllocSize();
1585 
1586     for (Partition &Part : Partitions) {
1587       Changed |= Part.RelaDyn->updateAllocSize();
1588       if (Part.RelrDyn)
1589         Changed |= Part.RelrDyn->updateAllocSize();
1590     }
1591 
1592     if (!Changed)
1593       return;
1594   }
1595 }
1596 
1597 static void finalizeSynthetic(SyntheticSection *Sec) {
1598   if (Sec && Sec->isNeeded() && Sec->getParent())
1599     Sec->finalizeContents();
1600 }
1601 
1602 // In order to allow users to manipulate linker-synthesized sections,
1603 // we had to add synthetic sections to the input section list early,
1604 // even before we make decisions whether they are needed. This allows
1605 // users to write scripts like this: ".mygot : { .got }".
1606 //
1607 // Doing it has an unintended side effects. If it turns out that we
1608 // don't need a .got (for example) at all because there's no
1609 // relocation that needs a .got, we don't want to emit .got.
1610 //
1611 // To deal with the above problem, this function is called after
1612 // scanRelocations is called to remove synthetic sections that turn
1613 // out to be empty.
1614 static void removeUnusedSyntheticSections() {
1615   // All input synthetic sections that can be empty are placed after
1616   // all regular ones. We iterate over them all and exit at first
1617   // non-synthetic.
1618   for (InputSectionBase *S : llvm::reverse(InputSections)) {
1619     SyntheticSection *SS = dyn_cast<SyntheticSection>(S);
1620     if (!SS)
1621       return;
1622     OutputSection *OS = SS->getParent();
1623     if (!OS || SS->isNeeded())
1624       continue;
1625 
1626     // If we reach here, then SS is an unused synthetic section and we want to
1627     // remove it from corresponding input section description of output section.
1628     for (BaseCommand *B : OS->SectionCommands)
1629       if (auto *ISD = dyn_cast<InputSectionDescription>(B))
1630         llvm::erase_if(ISD->Sections,
1631                        [=](InputSection *IS) { return IS == SS; });
1632   }
1633 }
1634 
1635 // Returns true if a symbol can be replaced at load-time by a symbol
1636 // with the same name defined in other ELF executable or DSO.
1637 static bool computeIsPreemptible(const Symbol &B) {
1638   assert(!B.isLocal());
1639 
1640   // Only symbols that appear in dynsym can be preempted.
1641   if (!B.includeInDynsym())
1642     return false;
1643 
1644   // Only default visibility symbols can be preempted.
1645   if (B.Visibility != STV_DEFAULT)
1646     return false;
1647 
1648   // At this point copy relocations have not been created yet, so any
1649   // symbol that is not defined locally is preemptible.
1650   if (!B.isDefined())
1651     return true;
1652 
1653   // If we have a dynamic list it specifies which local symbols are preemptible.
1654   if (Config->HasDynamicList)
1655     return false;
1656 
1657   if (!Config->Shared)
1658     return false;
1659 
1660   // -Bsymbolic means that definitions are not preempted.
1661   if (Config->Bsymbolic || (Config->BsymbolicFunctions && B.isFunc()))
1662     return false;
1663   return true;
1664 }
1665 
1666 // Create output section objects and add them to OutputSections.
1667 template <class ELFT> void Writer<ELFT>::finalizeSections() {
1668   Out::PreinitArray = findSection(".preinit_array");
1669   Out::InitArray = findSection(".init_array");
1670   Out::FiniArray = findSection(".fini_array");
1671 
1672   // The linker needs to define SECNAME_start, SECNAME_end and SECNAME_stop
1673   // symbols for sections, so that the runtime can get the start and end
1674   // addresses of each section by section name. Add such symbols.
1675   if (!Config->Relocatable) {
1676     addStartEndSymbols();
1677     for (BaseCommand *Base : Script->SectionCommands)
1678       if (auto *Sec = dyn_cast<OutputSection>(Base))
1679         addStartStopSymbols(Sec);
1680   }
1681 
1682   // Add _DYNAMIC symbol. Unlike GNU gold, our _DYNAMIC symbol has no type.
1683   // It should be okay as no one seems to care about the type.
1684   // Even the author of gold doesn't remember why gold behaves that way.
1685   // https://sourceware.org/ml/binutils/2002-03/msg00360.html
1686   if (Main->Dynamic->Parent)
1687     Symtab->addSymbol(Defined{/*File=*/nullptr, "_DYNAMIC", STB_WEAK,
1688                               STV_HIDDEN, STT_NOTYPE,
1689                               /*Value=*/0, /*Size=*/0, Main->Dynamic});
1690 
1691   // Define __rel[a]_iplt_{start,end} symbols if needed.
1692   addRelIpltSymbols();
1693 
1694   // RISC-V's gp can address +/- 2 KiB, set it to .sdata + 0x800 if not defined.
1695   if (Config->EMachine == EM_RISCV)
1696     if (!dyn_cast_or_null<Defined>(Symtab->find("__global_pointer$")))
1697       addOptionalRegular("__global_pointer$", findSection(".sdata"), 0x800);
1698 
1699   if (Config->EMachine == EM_X86_64) {
1700     // On targets that support TLSDESC, _TLS_MODULE_BASE_ is defined in such a
1701     // way that:
1702     //
1703     // 1) Without relaxation: it produces a dynamic TLSDESC relocation that
1704     // computes 0.
1705     // 2) With LD->LE relaxation: _TLS_MODULE_BASE_@tpoff = 0 (lowest address in
1706     // the TLS block).
1707     //
1708     // 2) is special cased in @tpoff computation. To satisfy 1), we define it as
1709     // an absolute symbol of zero. This is different from GNU linkers which
1710     // define _TLS_MODULE_BASE_ relative to the first TLS section.
1711     Symbol *S = Symtab->find("_TLS_MODULE_BASE_");
1712     if (S && S->isUndefined()) {
1713       S->resolve(Defined{/*File=*/nullptr, S->getName(), STB_GLOBAL, STV_HIDDEN,
1714                          STT_TLS, /*Value=*/0, 0,
1715                          /*Section=*/nullptr});
1716       ElfSym::TlsModuleBase = cast<Defined>(S);
1717     }
1718   }
1719 
1720   // This responsible for splitting up .eh_frame section into
1721   // pieces. The relocation scan uses those pieces, so this has to be
1722   // earlier.
1723   for (Partition &Part : Partitions)
1724     finalizeSynthetic(Part.EhFrame);
1725 
1726   Symtab->forEachSymbol([](Symbol *S) {
1727     if (!S->IsPreemptible)
1728       S->IsPreemptible = computeIsPreemptible(*S);
1729   });
1730 
1731   // Scan relocations. This must be done after every symbol is declared so that
1732   // we can correctly decide if a dynamic relocation is needed.
1733   if (!Config->Relocatable)
1734     forEachRelSec(scanRelocations<ELFT>);
1735 
1736   addIRelativeRelocs();
1737 
1738   if (In.Plt && In.Plt->isNeeded())
1739     In.Plt->addSymbols();
1740   if (In.Iplt && In.Iplt->isNeeded())
1741     In.Iplt->addSymbols();
1742 
1743   if (!Config->AllowShlibUndefined) {
1744     // Error on undefined symbols in a shared object, if all of its DT_NEEDED
1745     // entires are seen. These cases would otherwise lead to runtime errors
1746     // reported by the dynamic linker.
1747     //
1748     // ld.bfd traces all DT_NEEDED to emulate the logic of the dynamic linker to
1749     // catch more cases. That is too much for us. Our approach resembles the one
1750     // used in ld.gold, achieves a good balance to be useful but not too smart.
1751     for (SharedFile *File : SharedFiles)
1752       File->AllNeededIsKnown =
1753           llvm::all_of(File->DtNeeded, [&](StringRef Needed) {
1754             return Symtab->SoNames.count(Needed);
1755           });
1756 
1757     Symtab->forEachSymbol([](Symbol *Sym) {
1758       if (Sym->isUndefined() && !Sym->isWeak())
1759         if (auto *F = dyn_cast_or_null<SharedFile>(Sym->File))
1760           if (F->AllNeededIsKnown)
1761             error(toString(F) + ": undefined reference to " + toString(*Sym));
1762     });
1763   }
1764 
1765   // Now that we have defined all possible global symbols including linker-
1766   // synthesized ones. Visit all symbols to give the finishing touches.
1767   Symtab->forEachSymbol([](Symbol *Sym) {
1768     if (!includeInSymtab(*Sym))
1769       return;
1770     if (In.SymTab)
1771       In.SymTab->addSymbol(Sym);
1772 
1773     if (Sym->includeInDynsym()) {
1774       Partitions[Sym->Partition - 1].DynSymTab->addSymbol(Sym);
1775       if (auto *File = dyn_cast_or_null<SharedFile>(Sym->File))
1776         if (File->IsNeeded && !Sym->isUndefined())
1777           addVerneed(Sym);
1778     }
1779   });
1780 
1781   // We also need to scan the dynamic relocation tables of the other partitions
1782   // and add any referenced symbols to the partition's dynsym.
1783   for (Partition &Part : MutableArrayRef<Partition>(Partitions).slice(1)) {
1784     DenseSet<Symbol *> Syms;
1785     for (const SymbolTableEntry &E : Part.DynSymTab->getSymbols())
1786       Syms.insert(E.Sym);
1787     for (DynamicReloc &Reloc : Part.RelaDyn->Relocs)
1788       if (Reloc.Sym && !Reloc.UseSymVA && Syms.insert(Reloc.Sym).second)
1789         Part.DynSymTab->addSymbol(Reloc.Sym);
1790   }
1791 
1792   // Do not proceed if there was an undefined symbol.
1793   if (errorCount())
1794     return;
1795 
1796   if (In.MipsGot)
1797     In.MipsGot->build();
1798 
1799   removeUnusedSyntheticSections();
1800 
1801   sortSections();
1802 
1803   // Now that we have the final list, create a list of all the
1804   // OutputSections for convenience.
1805   for (BaseCommand *Base : Script->SectionCommands)
1806     if (auto *Sec = dyn_cast<OutputSection>(Base))
1807       OutputSections.push_back(Sec);
1808 
1809   // Prefer command line supplied address over other constraints.
1810   for (OutputSection *Sec : OutputSections) {
1811     auto I = Config->SectionStartMap.find(Sec->Name);
1812     if (I != Config->SectionStartMap.end())
1813       Sec->AddrExpr = [=] { return I->second; };
1814   }
1815 
1816   // This is a bit of a hack. A value of 0 means undef, so we set it
1817   // to 1 to make __ehdr_start defined. The section number is not
1818   // particularly relevant.
1819   Out::ElfHeader->SectionIndex = 1;
1820 
1821   for (size_t I = 0, E = OutputSections.size(); I != E; ++I) {
1822     OutputSection *Sec = OutputSections[I];
1823     Sec->SectionIndex = I + 1;
1824     Sec->ShName = In.ShStrTab->addString(Sec->Name);
1825   }
1826 
1827   // Binary and relocatable output does not have PHDRS.
1828   // The headers have to be created before finalize as that can influence the
1829   // image base and the dynamic section on mips includes the image base.
1830   if (!Config->Relocatable && !Config->OFormatBinary) {
1831     for (Partition &Part : Partitions) {
1832       Part.Phdrs = Script->hasPhdrsCommands() ? Script->createPhdrs()
1833                                               : createPhdrs(Part);
1834       if (Config->EMachine == EM_ARM) {
1835         // PT_ARM_EXIDX is the ARM EHABI equivalent of PT_GNU_EH_FRAME
1836         addPhdrForSection(Part, SHT_ARM_EXIDX, PT_ARM_EXIDX, PF_R);
1837       }
1838       if (Config->EMachine == EM_MIPS) {
1839         // Add separate segments for MIPS-specific sections.
1840         addPhdrForSection(Part, SHT_MIPS_REGINFO, PT_MIPS_REGINFO, PF_R);
1841         addPhdrForSection(Part, SHT_MIPS_OPTIONS, PT_MIPS_OPTIONS, PF_R);
1842         addPhdrForSection(Part, SHT_MIPS_ABIFLAGS, PT_MIPS_ABIFLAGS, PF_R);
1843       }
1844     }
1845     Out::ProgramHeaders->Size = sizeof(Elf_Phdr) * Main->Phdrs.size();
1846 
1847     // Find the TLS segment. This happens before the section layout loop so that
1848     // Android relocation packing can look up TLS symbol addresses. We only need
1849     // to care about the main partition here because all TLS symbols were moved
1850     // to the main partition (see MarkLive.cpp).
1851     for (PhdrEntry *P : Main->Phdrs)
1852       if (P->p_type == PT_TLS)
1853         Out::TlsPhdr = P;
1854   }
1855 
1856   // Some symbols are defined in term of program headers. Now that we
1857   // have the headers, we can find out which sections they point to.
1858   setReservedSymbolSections();
1859 
1860   finalizeSynthetic(In.Bss);
1861   finalizeSynthetic(In.BssRelRo);
1862   finalizeSynthetic(In.SymTabShndx);
1863   finalizeSynthetic(In.ShStrTab);
1864   finalizeSynthetic(In.StrTab);
1865   finalizeSynthetic(In.Got);
1866   finalizeSynthetic(In.MipsGot);
1867   finalizeSynthetic(In.IgotPlt);
1868   finalizeSynthetic(In.GotPlt);
1869   finalizeSynthetic(In.RelaIplt);
1870   finalizeSynthetic(In.RelaPlt);
1871   finalizeSynthetic(In.Plt);
1872   finalizeSynthetic(In.Iplt);
1873   finalizeSynthetic(In.PPC32Got2);
1874   finalizeSynthetic(In.PartIndex);
1875 
1876   // Dynamic section must be the last one in this list and dynamic
1877   // symbol table section (DynSymTab) must be the first one.
1878   for (Partition &Part : Partitions) {
1879     finalizeSynthetic(Part.ARMExidx);
1880     finalizeSynthetic(Part.DynSymTab);
1881     finalizeSynthetic(Part.GnuHashTab);
1882     finalizeSynthetic(Part.HashTab);
1883     finalizeSynthetic(Part.VerDef);
1884     finalizeSynthetic(Part.RelaDyn);
1885     finalizeSynthetic(Part.RelrDyn);
1886     finalizeSynthetic(Part.EhFrameHdr);
1887     finalizeSynthetic(Part.VerSym);
1888     finalizeSynthetic(Part.VerNeed);
1889     finalizeSynthetic(Part.Dynamic);
1890   }
1891 
1892   if (!Script->HasSectionsCommand && !Config->Relocatable)
1893     fixSectionAlignments();
1894 
1895   // SHFLinkOrder processing must be processed after relative section placements are
1896   // known but before addresses are allocated.
1897   resolveShfLinkOrder();
1898 
1899   // This is used to:
1900   // 1) Create "thunks":
1901   //    Jump instructions in many ISAs have small displacements, and therefore
1902   //    they cannot jump to arbitrary addresses in memory. For example, RISC-V
1903   //    JAL instruction can target only +-1 MiB from PC. It is a linker's
1904   //    responsibility to create and insert small pieces of code between
1905   //    sections to extend the ranges if jump targets are out of range. Such
1906   //    code pieces are called "thunks".
1907   //
1908   //    We add thunks at this stage. We couldn't do this before this point
1909   //    because this is the earliest point where we know sizes of sections and
1910   //    their layouts (that are needed to determine if jump targets are in
1911   //    range).
1912   //
1913   // 2) Update the sections. We need to generate content that depends on the
1914   //    address of InputSections. For example, MIPS GOT section content or
1915   //    android packed relocations sections content.
1916   //
1917   // 3) Assign the final values for the linker script symbols. Linker scripts
1918   //    sometimes using forward symbol declarations. We want to set the correct
1919   //    values. They also might change after adding the thunks.
1920   finalizeAddressDependentContent();
1921 
1922   // finalizeAddressDependentContent may have added local symbols to the static symbol table.
1923   finalizeSynthetic(In.SymTab);
1924   finalizeSynthetic(In.PPC64LongBranchTarget);
1925 
1926   // Fill other section headers. The dynamic table is finalized
1927   // at the end because some tags like RELSZ depend on result
1928   // of finalizing other sections.
1929   for (OutputSection *Sec : OutputSections)
1930     Sec->finalize();
1931 }
1932 
1933 // Ensure data sections are not mixed with executable sections when
1934 // -execute-only is used. -execute-only is a feature to make pages executable
1935 // but not readable, and the feature is currently supported only on AArch64.
1936 template <class ELFT> void Writer<ELFT>::checkExecuteOnly() {
1937   if (!Config->ExecuteOnly)
1938     return;
1939 
1940   for (OutputSection *OS : OutputSections)
1941     if (OS->Flags & SHF_EXECINSTR)
1942       for (InputSection *IS : getInputSections(OS))
1943         if (!(IS->Flags & SHF_EXECINSTR))
1944           error("cannot place " + toString(IS) + " into " + toString(OS->Name) +
1945                 ": -execute-only does not support intermingling data and code");
1946 }
1947 
1948 // The linker is expected to define SECNAME_start and SECNAME_end
1949 // symbols for a few sections. This function defines them.
1950 template <class ELFT> void Writer<ELFT>::addStartEndSymbols() {
1951   // If a section does not exist, there's ambiguity as to how we
1952   // define _start and _end symbols for an init/fini section. Since
1953   // the loader assume that the symbols are always defined, we need to
1954   // always define them. But what value? The loader iterates over all
1955   // pointers between _start and _end to run global ctors/dtors, so if
1956   // the section is empty, their symbol values don't actually matter
1957   // as long as _start and _end point to the same location.
1958   //
1959   // That said, we don't want to set the symbols to 0 (which is
1960   // probably the simplest value) because that could cause some
1961   // program to fail to link due to relocation overflow, if their
1962   // program text is above 2 GiB. We use the address of the .text
1963   // section instead to prevent that failure.
1964   //
1965   // In a rare sitaution, .text section may not exist. If that's the
1966   // case, use the image base address as a last resort.
1967   OutputSection *Default = findSection(".text");
1968   if (!Default)
1969     Default = Out::ElfHeader;
1970 
1971   auto Define = [=](StringRef Start, StringRef End, OutputSection *OS) {
1972     if (OS) {
1973       addOptionalRegular(Start, OS, 0);
1974       addOptionalRegular(End, OS, -1);
1975     } else {
1976       addOptionalRegular(Start, Default, 0);
1977       addOptionalRegular(End, Default, 0);
1978     }
1979   };
1980 
1981   Define("__preinit_array_start", "__preinit_array_end", Out::PreinitArray);
1982   Define("__init_array_start", "__init_array_end", Out::InitArray);
1983   Define("__fini_array_start", "__fini_array_end", Out::FiniArray);
1984 
1985   if (OutputSection *Sec = findSection(".ARM.exidx"))
1986     Define("__exidx_start", "__exidx_end", Sec);
1987 }
1988 
1989 // If a section name is valid as a C identifier (which is rare because of
1990 // the leading '.'), linkers are expected to define __start_<secname> and
1991 // __stop_<secname> symbols. They are at beginning and end of the section,
1992 // respectively. This is not requested by the ELF standard, but GNU ld and
1993 // gold provide the feature, and used by many programs.
1994 template <class ELFT>
1995 void Writer<ELFT>::addStartStopSymbols(OutputSection *Sec) {
1996   StringRef S = Sec->Name;
1997   if (!isValidCIdentifier(S))
1998     return;
1999   addOptionalRegular(Saver.save("__start_" + S), Sec, 0, STV_PROTECTED);
2000   addOptionalRegular(Saver.save("__stop_" + S), Sec, -1, STV_PROTECTED);
2001 }
2002 
2003 static bool needsPtLoad(OutputSection *Sec) {
2004   if (!(Sec->Flags & SHF_ALLOC) || Sec->Noload)
2005     return false;
2006 
2007   // Don't allocate VA space for TLS NOBITS sections. The PT_TLS PHDR is
2008   // responsible for allocating space for them, not the PT_LOAD that
2009   // contains the TLS initialization image.
2010   if ((Sec->Flags & SHF_TLS) && Sec->Type == SHT_NOBITS)
2011     return false;
2012   return true;
2013 }
2014 
2015 // Linker scripts are responsible for aligning addresses. Unfortunately, most
2016 // linker scripts are designed for creating two PT_LOADs only, one RX and one
2017 // RW. This means that there is no alignment in the RO to RX transition and we
2018 // cannot create a PT_LOAD there.
2019 static uint64_t computeFlags(uint64_t Flags) {
2020   if (Config->Omagic)
2021     return PF_R | PF_W | PF_X;
2022   if (Config->ExecuteOnly && (Flags & PF_X))
2023     return Flags & ~PF_R;
2024   if (Config->SingleRoRx && !(Flags & PF_W))
2025     return Flags | PF_X;
2026   return Flags;
2027 }
2028 
2029 // Decide which program headers to create and which sections to include in each
2030 // one.
2031 template <class ELFT>
2032 std::vector<PhdrEntry *> Writer<ELFT>::createPhdrs(Partition &Part) {
2033   std::vector<PhdrEntry *> Ret;
2034   auto AddHdr = [&](unsigned Type, unsigned Flags) -> PhdrEntry * {
2035     Ret.push_back(make<PhdrEntry>(Type, Flags));
2036     return Ret.back();
2037   };
2038 
2039   unsigned PartNo = Part.getNumber();
2040   bool IsMain = PartNo == 1;
2041 
2042   // The first phdr entry is PT_PHDR which describes the program header itself.
2043   if (IsMain)
2044     AddHdr(PT_PHDR, PF_R)->add(Out::ProgramHeaders);
2045   else
2046     AddHdr(PT_PHDR, PF_R)->add(Part.ProgramHeaders->getParent());
2047 
2048   // PT_INTERP must be the second entry if exists.
2049   if (OutputSection *Cmd = findSection(".interp", PartNo))
2050     AddHdr(PT_INTERP, Cmd->getPhdrFlags())->add(Cmd);
2051 
2052   // Add the first PT_LOAD segment for regular output sections.
2053   uint64_t Flags = computeFlags(PF_R);
2054   PhdrEntry *Load = nullptr;
2055 
2056   // Add the headers. We will remove them if they don't fit.
2057   // In the other partitions the headers are ordinary sections, so they don't
2058   // need to be added here.
2059   if (IsMain) {
2060     Load = AddHdr(PT_LOAD, Flags);
2061     Load->add(Out::ElfHeader);
2062     Load->add(Out::ProgramHeaders);
2063   }
2064 
2065   // PT_GNU_RELRO includes all sections that should be marked as
2066   // read-only by dynamic linker after proccessing relocations.
2067   // Current dynamic loaders only support one PT_GNU_RELRO PHDR, give
2068   // an error message if more than one PT_GNU_RELRO PHDR is required.
2069   PhdrEntry *RelRo = make<PhdrEntry>(PT_GNU_RELRO, PF_R);
2070   bool InRelroPhdr = false;
2071   OutputSection *RelroEnd = nullptr;
2072   for (OutputSection *Sec : OutputSections) {
2073     if (Sec->Partition != PartNo || !needsPtLoad(Sec))
2074       continue;
2075     if (isRelroSection(Sec)) {
2076       InRelroPhdr = true;
2077       if (!RelroEnd)
2078         RelRo->add(Sec);
2079       else
2080         error("section: " + Sec->Name + " is not contiguous with other relro" +
2081               " sections");
2082     } else if (InRelroPhdr) {
2083       InRelroPhdr = false;
2084       RelroEnd = Sec;
2085     }
2086   }
2087 
2088   for (OutputSection *Sec : OutputSections) {
2089     if (!(Sec->Flags & SHF_ALLOC))
2090       break;
2091     if (!needsPtLoad(Sec))
2092       continue;
2093 
2094     // Normally, sections in partitions other than the current partition are
2095     // ignored. But partition number 255 is a special case: it contains the
2096     // partition end marker (.part.end). It needs to be added to the main
2097     // partition so that a segment is created for it in the main partition,
2098     // which will cause the dynamic loader to reserve space for the other
2099     // partitions.
2100     if (Sec->Partition != PartNo) {
2101       if (IsMain && Sec->Partition == 255)
2102         AddHdr(PT_LOAD, computeFlags(Sec->getPhdrFlags()))->add(Sec);
2103       continue;
2104     }
2105 
2106     // Segments are contiguous memory regions that has the same attributes
2107     // (e.g. executable or writable). There is one phdr for each segment.
2108     // Therefore, we need to create a new phdr when the next section has
2109     // different flags or is loaded at a discontiguous address or memory
2110     // region using AT or AT> linker script command, respectively. At the same
2111     // time, we don't want to create a separate load segment for the headers,
2112     // even if the first output section has an AT or AT> attribute.
2113     uint64_t NewFlags = computeFlags(Sec->getPhdrFlags());
2114     if (!Load ||
2115         ((Sec->LMAExpr ||
2116           (Sec->LMARegion && (Sec->LMARegion != Load->FirstSec->LMARegion))) &&
2117          Load->LastSec != Out::ProgramHeaders) ||
2118         Sec->MemRegion != Load->FirstSec->MemRegion || Flags != NewFlags ||
2119         Sec == RelroEnd) {
2120       Load = AddHdr(PT_LOAD, NewFlags);
2121       Flags = NewFlags;
2122     }
2123 
2124     Load->add(Sec);
2125   }
2126 
2127   // Add a TLS segment if any.
2128   PhdrEntry *TlsHdr = make<PhdrEntry>(PT_TLS, PF_R);
2129   for (OutputSection *Sec : OutputSections)
2130     if (Sec->Partition == PartNo && Sec->Flags & SHF_TLS)
2131       TlsHdr->add(Sec);
2132   if (TlsHdr->FirstSec)
2133     Ret.push_back(TlsHdr);
2134 
2135   // Add an entry for .dynamic.
2136   if (OutputSection *Sec = Part.Dynamic->getParent())
2137     AddHdr(PT_DYNAMIC, Sec->getPhdrFlags())->add(Sec);
2138 
2139   if (RelRo->FirstSec)
2140     Ret.push_back(RelRo);
2141 
2142   // PT_GNU_EH_FRAME is a special section pointing on .eh_frame_hdr.
2143   if (Part.EhFrame->isNeeded() && Part.EhFrameHdr &&
2144       Part.EhFrame->getParent() && Part.EhFrameHdr->getParent())
2145     AddHdr(PT_GNU_EH_FRAME, Part.EhFrameHdr->getParent()->getPhdrFlags())
2146         ->add(Part.EhFrameHdr->getParent());
2147 
2148   // PT_OPENBSD_RANDOMIZE is an OpenBSD-specific feature. That makes
2149   // the dynamic linker fill the segment with random data.
2150   if (OutputSection *Cmd = findSection(".openbsd.randomdata", PartNo))
2151     AddHdr(PT_OPENBSD_RANDOMIZE, Cmd->getPhdrFlags())->add(Cmd);
2152 
2153   // PT_GNU_STACK is a special section to tell the loader to make the
2154   // pages for the stack non-executable. If you really want an executable
2155   // stack, you can pass -z execstack, but that's not recommended for
2156   // security reasons.
2157   unsigned Perm = PF_R | PF_W;
2158   if (Config->ZExecstack)
2159     Perm |= PF_X;
2160   AddHdr(PT_GNU_STACK, Perm)->p_memsz = Config->ZStackSize;
2161 
2162   // PT_OPENBSD_WXNEEDED is a OpenBSD-specific header to mark the executable
2163   // is expected to perform W^X violations, such as calling mprotect(2) or
2164   // mmap(2) with PROT_WRITE | PROT_EXEC, which is prohibited by default on
2165   // OpenBSD.
2166   if (Config->ZWxneeded)
2167     AddHdr(PT_OPENBSD_WXNEEDED, PF_X);
2168 
2169   // Create one PT_NOTE per a group of contiguous SHT_NOTE sections with the
2170   // same alignment.
2171   PhdrEntry *Note = nullptr;
2172   for (OutputSection *Sec : OutputSections) {
2173     if (Sec->Partition != PartNo)
2174       continue;
2175     if (Sec->Type == SHT_NOTE && (Sec->Flags & SHF_ALLOC)) {
2176       if (!Note || Sec->LMAExpr || Note->LastSec->Alignment != Sec->Alignment)
2177         Note = AddHdr(PT_NOTE, PF_R);
2178       Note->add(Sec);
2179     } else {
2180       Note = nullptr;
2181     }
2182   }
2183   return Ret;
2184 }
2185 
2186 template <class ELFT>
2187 void Writer<ELFT>::addPhdrForSection(Partition &Part, unsigned ShType,
2188                                      unsigned PType, unsigned PFlags) {
2189   unsigned PartNo = Part.getNumber();
2190   auto I = llvm::find_if(OutputSections, [=](OutputSection *Cmd) {
2191     return Cmd->Partition == PartNo && Cmd->Type == ShType;
2192   });
2193   if (I == OutputSections.end())
2194     return;
2195 
2196   PhdrEntry *Entry = make<PhdrEntry>(PType, PFlags);
2197   Entry->add(*I);
2198   Part.Phdrs.push_back(Entry);
2199 }
2200 
2201 // The first section of each PT_LOAD, the first section in PT_GNU_RELRO and the
2202 // first section after PT_GNU_RELRO have to be page aligned so that the dynamic
2203 // linker can set the permissions.
2204 template <class ELFT> void Writer<ELFT>::fixSectionAlignments() {
2205   auto PageAlign = [](OutputSection *Cmd) {
2206     if (Cmd && !Cmd->AddrExpr)
2207       Cmd->AddrExpr = [=] {
2208         return alignTo(Script->getDot(), Config->MaxPageSize);
2209       };
2210   };
2211 
2212   for (Partition &Part : Partitions) {
2213     for (const PhdrEntry *P : Part.Phdrs)
2214       if (P->p_type == PT_LOAD && P->FirstSec)
2215         PageAlign(P->FirstSec);
2216 
2217     for (const PhdrEntry *P : Part.Phdrs) {
2218       if (P->p_type != PT_GNU_RELRO)
2219         continue;
2220 
2221       if (P->FirstSec)
2222         PageAlign(P->FirstSec);
2223 
2224       // Find the first section after PT_GNU_RELRO. If it is in a PT_LOAD we
2225       // have to align it to a page.
2226       auto End = OutputSections.end();
2227       auto I = llvm::find(OutputSections, P->LastSec);
2228       if (I == End || (I + 1) == End)
2229         continue;
2230 
2231       OutputSection *Cmd = (*(I + 1));
2232       if (needsPtLoad(Cmd))
2233         PageAlign(Cmd);
2234     }
2235   }
2236 }
2237 
2238 // Compute an in-file position for a given section. The file offset must be the
2239 // same with its virtual address modulo the page size, so that the loader can
2240 // load executables without any address adjustment.
2241 static uint64_t computeFileOffset(OutputSection *OS, uint64_t Off) {
2242   // File offsets are not significant for .bss sections. By convention, we keep
2243   // section offsets monotonically increasing rather than setting to zero.
2244   if (OS->Type == SHT_NOBITS)
2245     return Off;
2246 
2247   // If the section is not in a PT_LOAD, we just have to align it.
2248   if (!OS->PtLoad)
2249     return alignTo(Off, OS->Alignment);
2250 
2251   // The first section in a PT_LOAD has to have congruent offset and address
2252   // module the page size.
2253   OutputSection *First = OS->PtLoad->FirstSec;
2254   if (OS == First) {
2255     uint64_t Alignment = std::max<uint64_t>(OS->Alignment, Config->MaxPageSize);
2256     return alignTo(Off, Alignment, OS->Addr);
2257   }
2258 
2259   // If two sections share the same PT_LOAD the file offset is calculated
2260   // using this formula: Off2 = Off1 + (VA2 - VA1).
2261   return First->Offset + OS->Addr - First->Addr;
2262 }
2263 
2264 // Set an in-file position to a given section and returns the end position of
2265 // the section.
2266 static uint64_t setFileOffset(OutputSection *OS, uint64_t Off) {
2267   Off = computeFileOffset(OS, Off);
2268   OS->Offset = Off;
2269 
2270   if (OS->Type == SHT_NOBITS)
2271     return Off;
2272   return Off + OS->Size;
2273 }
2274 
2275 template <class ELFT> void Writer<ELFT>::assignFileOffsetsBinary() {
2276   uint64_t Off = 0;
2277   for (OutputSection *Sec : OutputSections)
2278     if (Sec->Flags & SHF_ALLOC)
2279       Off = setFileOffset(Sec, Off);
2280   FileSize = alignTo(Off, Config->Wordsize);
2281 }
2282 
2283 static std::string rangeToString(uint64_t Addr, uint64_t Len) {
2284   return "[0x" + utohexstr(Addr) + ", 0x" + utohexstr(Addr + Len - 1) + "]";
2285 }
2286 
2287 // Assign file offsets to output sections.
2288 template <class ELFT> void Writer<ELFT>::assignFileOffsets() {
2289   uint64_t Off = 0;
2290   Off = setFileOffset(Out::ElfHeader, Off);
2291   Off = setFileOffset(Out::ProgramHeaders, Off);
2292 
2293   PhdrEntry *LastRX = nullptr;
2294   for (Partition &Part : Partitions)
2295     for (PhdrEntry *P : Part.Phdrs)
2296       if (P->p_type == PT_LOAD && (P->p_flags & PF_X))
2297         LastRX = P;
2298 
2299   for (OutputSection *Sec : OutputSections) {
2300     Off = setFileOffset(Sec, Off);
2301     if (Script->HasSectionsCommand)
2302       continue;
2303 
2304     // If this is a last section of the last executable segment and that
2305     // segment is the last loadable segment, align the offset of the
2306     // following section to avoid loading non-segments parts of the file.
2307     if (LastRX && LastRX->LastSec == Sec)
2308       Off = alignTo(Off, Config->CommonPageSize);
2309   }
2310 
2311   SectionHeaderOff = alignTo(Off, Config->Wordsize);
2312   FileSize = SectionHeaderOff + (OutputSections.size() + 1) * sizeof(Elf_Shdr);
2313 
2314   // Our logic assumes that sections have rising VA within the same segment.
2315   // With use of linker scripts it is possible to violate this rule and get file
2316   // offset overlaps or overflows. That should never happen with a valid script
2317   // which does not move the location counter backwards and usually scripts do
2318   // not do that. Unfortunately, there are apps in the wild, for example, Linux
2319   // kernel, which control segment distribution explicitly and move the counter
2320   // backwards, so we have to allow doing that to support linking them. We
2321   // perform non-critical checks for overlaps in checkSectionOverlap(), but here
2322   // we want to prevent file size overflows because it would crash the linker.
2323   for (OutputSection *Sec : OutputSections) {
2324     if (Sec->Type == SHT_NOBITS)
2325       continue;
2326     if ((Sec->Offset > FileSize) || (Sec->Offset + Sec->Size > FileSize))
2327       error("unable to place section " + Sec->Name + " at file offset " +
2328             rangeToString(Sec->Offset, Sec->Size) +
2329             "; check your linker script for overflows");
2330   }
2331 }
2332 
2333 // Finalize the program headers. We call this function after we assign
2334 // file offsets and VAs to all sections.
2335 template <class ELFT> void Writer<ELFT>::setPhdrs(Partition &Part) {
2336   for (PhdrEntry *P : Part.Phdrs) {
2337     OutputSection *First = P->FirstSec;
2338     OutputSection *Last = P->LastSec;
2339 
2340     if (First) {
2341       P->p_filesz = Last->Offset - First->Offset;
2342       if (Last->Type != SHT_NOBITS)
2343         P->p_filesz += Last->Size;
2344 
2345       P->p_memsz = Last->Addr + Last->Size - First->Addr;
2346       P->p_offset = First->Offset;
2347       P->p_vaddr = First->Addr;
2348 
2349       // File offsets in partitions other than the main partition are relative
2350       // to the offset of the ELF headers. Perform that adjustment now.
2351       if (Part.ElfHeader)
2352         P->p_offset -= Part.ElfHeader->getParent()->Offset;
2353 
2354       if (!P->HasLMA)
2355         P->p_paddr = First->getLMA();
2356     }
2357 
2358     if (P->p_type == PT_LOAD) {
2359       P->p_align = std::max<uint64_t>(P->p_align, Config->MaxPageSize);
2360     } else if (P->p_type == PT_GNU_RELRO) {
2361       P->p_align = 1;
2362       // The glibc dynamic loader rounds the size down, so we need to round up
2363       // to protect the last page. This is a no-op on FreeBSD which always
2364       // rounds up.
2365       P->p_memsz = alignTo(P->p_memsz, Config->CommonPageSize);
2366     }
2367   }
2368 }
2369 
2370 // A helper struct for checkSectionOverlap.
2371 namespace {
2372 struct SectionOffset {
2373   OutputSection *Sec;
2374   uint64_t Offset;
2375 };
2376 } // namespace
2377 
2378 // Check whether sections overlap for a specific address range (file offsets,
2379 // load and virtual adresses).
2380 static void checkOverlap(StringRef Name, std::vector<SectionOffset> &Sections,
2381                          bool IsVirtualAddr) {
2382   llvm::sort(Sections, [=](const SectionOffset &A, const SectionOffset &B) {
2383     return A.Offset < B.Offset;
2384   });
2385 
2386   // Finding overlap is easy given a vector is sorted by start position.
2387   // If an element starts before the end of the previous element, they overlap.
2388   for (size_t I = 1, End = Sections.size(); I < End; ++I) {
2389     SectionOffset A = Sections[I - 1];
2390     SectionOffset B = Sections[I];
2391     if (B.Offset >= A.Offset + A.Sec->Size)
2392       continue;
2393 
2394     // If both sections are in OVERLAY we allow the overlapping of virtual
2395     // addresses, because it is what OVERLAY was designed for.
2396     if (IsVirtualAddr && A.Sec->InOverlay && B.Sec->InOverlay)
2397       continue;
2398 
2399     errorOrWarn("section " + A.Sec->Name + " " + Name +
2400                 " range overlaps with " + B.Sec->Name + "\n>>> " + A.Sec->Name +
2401                 " range is " + rangeToString(A.Offset, A.Sec->Size) + "\n>>> " +
2402                 B.Sec->Name + " range is " +
2403                 rangeToString(B.Offset, B.Sec->Size));
2404   }
2405 }
2406 
2407 // Check for overlapping sections and address overflows.
2408 //
2409 // In this function we check that none of the output sections have overlapping
2410 // file offsets. For SHF_ALLOC sections we also check that the load address
2411 // ranges and the virtual address ranges don't overlap
2412 template <class ELFT> void Writer<ELFT>::checkSections() {
2413   // First, check that section's VAs fit in available address space for target.
2414   for (OutputSection *OS : OutputSections)
2415     if ((OS->Addr + OS->Size < OS->Addr) ||
2416         (!ELFT::Is64Bits && OS->Addr + OS->Size > UINT32_MAX))
2417       errorOrWarn("section " + OS->Name + " at 0x" + utohexstr(OS->Addr) +
2418                   " of size 0x" + utohexstr(OS->Size) +
2419                   " exceeds available address space");
2420 
2421   // Check for overlapping file offsets. In this case we need to skip any
2422   // section marked as SHT_NOBITS. These sections don't actually occupy space in
2423   // the file so Sec->Offset + Sec->Size can overlap with others. If --oformat
2424   // binary is specified only add SHF_ALLOC sections are added to the output
2425   // file so we skip any non-allocated sections in that case.
2426   std::vector<SectionOffset> FileOffs;
2427   for (OutputSection *Sec : OutputSections)
2428     if (Sec->Size > 0 && Sec->Type != SHT_NOBITS &&
2429         (!Config->OFormatBinary || (Sec->Flags & SHF_ALLOC)))
2430       FileOffs.push_back({Sec, Sec->Offset});
2431   checkOverlap("file", FileOffs, false);
2432 
2433   // When linking with -r there is no need to check for overlapping virtual/load
2434   // addresses since those addresses will only be assigned when the final
2435   // executable/shared object is created.
2436   if (Config->Relocatable)
2437     return;
2438 
2439   // Checking for overlapping virtual and load addresses only needs to take
2440   // into account SHF_ALLOC sections since others will not be loaded.
2441   // Furthermore, we also need to skip SHF_TLS sections since these will be
2442   // mapped to other addresses at runtime and can therefore have overlapping
2443   // ranges in the file.
2444   std::vector<SectionOffset> VMAs;
2445   for (OutputSection *Sec : OutputSections)
2446     if (Sec->Size > 0 && (Sec->Flags & SHF_ALLOC) && !(Sec->Flags & SHF_TLS))
2447       VMAs.push_back({Sec, Sec->Addr});
2448   checkOverlap("virtual address", VMAs, true);
2449 
2450   // Finally, check that the load addresses don't overlap. This will usually be
2451   // the same as the virtual addresses but can be different when using a linker
2452   // script with AT().
2453   std::vector<SectionOffset> LMAs;
2454   for (OutputSection *Sec : OutputSections)
2455     if (Sec->Size > 0 && (Sec->Flags & SHF_ALLOC) && !(Sec->Flags & SHF_TLS))
2456       LMAs.push_back({Sec, Sec->getLMA()});
2457   checkOverlap("load address", LMAs, false);
2458 }
2459 
2460 // The entry point address is chosen in the following ways.
2461 //
2462 // 1. the '-e' entry command-line option;
2463 // 2. the ENTRY(symbol) command in a linker control script;
2464 // 3. the value of the symbol _start, if present;
2465 // 4. the number represented by the entry symbol, if it is a number;
2466 // 5. the address of the first byte of the .text section, if present;
2467 // 6. the address 0.
2468 static uint64_t getEntryAddr() {
2469   // Case 1, 2 or 3
2470   if (Symbol *B = Symtab->find(Config->Entry))
2471     return B->getVA();
2472 
2473   // Case 4
2474   uint64_t Addr;
2475   if (to_integer(Config->Entry, Addr))
2476     return Addr;
2477 
2478   // Case 5
2479   if (OutputSection *Sec = findSection(".text")) {
2480     if (Config->WarnMissingEntry)
2481       warn("cannot find entry symbol " + Config->Entry + "; defaulting to 0x" +
2482            utohexstr(Sec->Addr));
2483     return Sec->Addr;
2484   }
2485 
2486   // Case 6
2487   if (Config->WarnMissingEntry)
2488     warn("cannot find entry symbol " + Config->Entry +
2489          "; not setting start address");
2490   return 0;
2491 }
2492 
2493 static uint16_t getELFType() {
2494   if (Config->Pic)
2495     return ET_DYN;
2496   if (Config->Relocatable)
2497     return ET_REL;
2498   return ET_EXEC;
2499 }
2500 
2501 template <class ELFT> void Writer<ELFT>::writeHeader() {
2502   writeEhdr<ELFT>(Out::BufferStart, *Main);
2503   writePhdrs<ELFT>(Out::BufferStart + sizeof(Elf_Ehdr), *Main);
2504 
2505   auto *EHdr = reinterpret_cast<Elf_Ehdr *>(Out::BufferStart);
2506   EHdr->e_type = getELFType();
2507   EHdr->e_entry = getEntryAddr();
2508   EHdr->e_shoff = SectionHeaderOff;
2509 
2510   // Write the section header table.
2511   //
2512   // The ELF header can only store numbers up to SHN_LORESERVE in the e_shnum
2513   // and e_shstrndx fields. When the value of one of these fields exceeds
2514   // SHN_LORESERVE ELF requires us to put sentinel values in the ELF header and
2515   // use fields in the section header at index 0 to store
2516   // the value. The sentinel values and fields are:
2517   // e_shnum = 0, SHdrs[0].sh_size = number of sections.
2518   // e_shstrndx = SHN_XINDEX, SHdrs[0].sh_link = .shstrtab section index.
2519   auto *SHdrs = reinterpret_cast<Elf_Shdr *>(Out::BufferStart + EHdr->e_shoff);
2520   size_t Num = OutputSections.size() + 1;
2521   if (Num >= SHN_LORESERVE)
2522     SHdrs->sh_size = Num;
2523   else
2524     EHdr->e_shnum = Num;
2525 
2526   uint32_t StrTabIndex = In.ShStrTab->getParent()->SectionIndex;
2527   if (StrTabIndex >= SHN_LORESERVE) {
2528     SHdrs->sh_link = StrTabIndex;
2529     EHdr->e_shstrndx = SHN_XINDEX;
2530   } else {
2531     EHdr->e_shstrndx = StrTabIndex;
2532   }
2533 
2534   for (OutputSection *Sec : OutputSections)
2535     Sec->writeHeaderTo<ELFT>(++SHdrs);
2536 }
2537 
2538 // Open a result file.
2539 template <class ELFT> void Writer<ELFT>::openFile() {
2540   uint64_t MaxSize = Config->Is64 ? INT64_MAX : UINT32_MAX;
2541   if (FileSize != size_t(FileSize) || MaxSize < FileSize) {
2542     error("output file too large: " + Twine(FileSize) + " bytes");
2543     return;
2544   }
2545 
2546   unlinkAsync(Config->OutputFile);
2547   unsigned Flags = 0;
2548   if (!Config->Relocatable)
2549     Flags = FileOutputBuffer::F_executable;
2550   Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
2551       FileOutputBuffer::create(Config->OutputFile, FileSize, Flags);
2552 
2553   if (!BufferOrErr) {
2554     error("failed to open " + Config->OutputFile + ": " +
2555           llvm::toString(BufferOrErr.takeError()));
2556     return;
2557   }
2558   Buffer = std::move(*BufferOrErr);
2559   Out::BufferStart = Buffer->getBufferStart();
2560 }
2561 
2562 template <class ELFT> void Writer<ELFT>::writeSectionsBinary() {
2563   for (OutputSection *Sec : OutputSections)
2564     if (Sec->Flags & SHF_ALLOC)
2565       Sec->writeTo<ELFT>(Out::BufferStart + Sec->Offset);
2566 }
2567 
2568 static void fillTrap(uint8_t *I, uint8_t *End) {
2569   for (; I + 4 <= End; I += 4)
2570     memcpy(I, &Target->TrapInstr, 4);
2571 }
2572 
2573 // Fill the last page of executable segments with trap instructions
2574 // instead of leaving them as zero. Even though it is not required by any
2575 // standard, it is in general a good thing to do for security reasons.
2576 //
2577 // We'll leave other pages in segments as-is because the rest will be
2578 // overwritten by output sections.
2579 template <class ELFT> void Writer<ELFT>::writeTrapInstr() {
2580   if (Script->HasSectionsCommand)
2581     return;
2582 
2583   for (Partition &Part : Partitions) {
2584     // Fill the last page.
2585     for (PhdrEntry *P : Part.Phdrs)
2586       if (P->p_type == PT_LOAD && (P->p_flags & PF_X))
2587         fillTrap(Out::BufferStart + alignDown(P->FirstSec->Offset + P->p_filesz,
2588                                               Config->CommonPageSize),
2589                  Out::BufferStart + alignTo(P->FirstSec->Offset + P->p_filesz,
2590                                             Config->CommonPageSize));
2591 
2592     // Round up the file size of the last segment to the page boundary iff it is
2593     // an executable segment to ensure that other tools don't accidentally
2594     // trim the instruction padding (e.g. when stripping the file).
2595     PhdrEntry *Last = nullptr;
2596     for (PhdrEntry *P : Part.Phdrs)
2597       if (P->p_type == PT_LOAD)
2598         Last = P;
2599 
2600     if (Last && (Last->p_flags & PF_X))
2601       Last->p_memsz = Last->p_filesz =
2602           alignTo(Last->p_filesz, Config->CommonPageSize);
2603   }
2604 }
2605 
2606 // Write section contents to a mmap'ed file.
2607 template <class ELFT> void Writer<ELFT>::writeSections() {
2608   // In -r or -emit-relocs mode, write the relocation sections first as in
2609   // ELf_Rel targets we might find out that we need to modify the relocated
2610   // section while doing it.
2611   for (OutputSection *Sec : OutputSections)
2612     if (Sec->Type == SHT_REL || Sec->Type == SHT_RELA)
2613       Sec->writeTo<ELFT>(Out::BufferStart + Sec->Offset);
2614 
2615   for (OutputSection *Sec : OutputSections)
2616     if (Sec->Type != SHT_REL && Sec->Type != SHT_RELA)
2617       Sec->writeTo<ELFT>(Out::BufferStart + Sec->Offset);
2618 }
2619 
2620 // Split one uint8 array into small pieces of uint8 arrays.
2621 static std::vector<ArrayRef<uint8_t>> split(ArrayRef<uint8_t> Arr,
2622                                             size_t ChunkSize) {
2623   std::vector<ArrayRef<uint8_t>> Ret;
2624   while (Arr.size() > ChunkSize) {
2625     Ret.push_back(Arr.take_front(ChunkSize));
2626     Arr = Arr.drop_front(ChunkSize);
2627   }
2628   if (!Arr.empty())
2629     Ret.push_back(Arr);
2630   return Ret;
2631 }
2632 
2633 // Computes a hash value of Data using a given hash function.
2634 // In order to utilize multiple cores, we first split data into 1MB
2635 // chunks, compute a hash for each chunk, and then compute a hash value
2636 // of the hash values.
2637 static void
2638 computeHash(llvm::MutableArrayRef<uint8_t> HashBuf,
2639             llvm::ArrayRef<uint8_t> Data,
2640             std::function<void(uint8_t *Dest, ArrayRef<uint8_t> Arr)> HashFn) {
2641   std::vector<ArrayRef<uint8_t>> Chunks = split(Data, 1024 * 1024);
2642   std::vector<uint8_t> Hashes(Chunks.size() * HashBuf.size());
2643 
2644   // Compute hash values.
2645   parallelForEachN(0, Chunks.size(), [&](size_t I) {
2646     HashFn(Hashes.data() + I * HashBuf.size(), Chunks[I]);
2647   });
2648 
2649   // Write to the final output buffer.
2650   HashFn(HashBuf.data(), Hashes);
2651 }
2652 
2653 template <class ELFT> void Writer<ELFT>::writeBuildId() {
2654   if (!Main->BuildId || !Main->BuildId->getParent())
2655     return;
2656 
2657   if (Config->BuildId == BuildIdKind::Hexstring) {
2658     for (Partition &Part : Partitions)
2659       Part.BuildId->writeBuildId(Config->BuildIdVector);
2660     return;
2661   }
2662 
2663   // Compute a hash of all sections of the output file.
2664   size_t HashSize = Main->BuildId->HashSize;
2665   std::vector<uint8_t> BuildId(HashSize);
2666   llvm::ArrayRef<uint8_t> Buf{Out::BufferStart, size_t(FileSize)};
2667 
2668   switch (Config->BuildId) {
2669   case BuildIdKind::Fast:
2670     computeHash(BuildId, Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) {
2671       write64le(Dest, xxHash64(Arr));
2672     });
2673     break;
2674   case BuildIdKind::Md5:
2675     computeHash(BuildId, Buf, [&](uint8_t *Dest, ArrayRef<uint8_t> Arr) {
2676       memcpy(Dest, MD5::hash(Arr).data(), HashSize);
2677     });
2678     break;
2679   case BuildIdKind::Sha1:
2680     computeHash(BuildId, Buf, [&](uint8_t *Dest, ArrayRef<uint8_t> Arr) {
2681       memcpy(Dest, SHA1::hash(Arr).data(), HashSize);
2682     });
2683     break;
2684   case BuildIdKind::Uuid:
2685     if (auto EC = llvm::getRandomBytes(BuildId.data(), HashSize))
2686       error("entropy source failure: " + EC.message());
2687     break;
2688   default:
2689     llvm_unreachable("unknown BuildIdKind");
2690   }
2691   for (Partition &Part : Partitions)
2692     Part.BuildId->writeBuildId(BuildId);
2693 }
2694 
2695 template void elf::writeResult<ELF32LE>();
2696 template void elf::writeResult<ELF32BE>();
2697 template void elf::writeResult<ELF64LE>();
2698 template void elf::writeResult<ELF64BE>();
2699