1 //===- LinkerScript.cpp ---------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the parser/evaluator of the linker script.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "LinkerScript.h"
15 #include "Config.h"
16 #include "InputSection.h"
17 #include "Memory.h"
18 #include "OutputSections.h"
19 #include "Strings.h"
20 #include "SymbolTable.h"
21 #include "Symbols.h"
22 #include "SyntheticSections.h"
23 #include "Target.h"
24 #include "Threads.h"
25 #include "Writer.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/Compression.h"
31 #include "llvm/Support/Endian.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/FileSystem.h"
34 #include "llvm/Support/Path.h"
35 #include <algorithm>
36 #include <cassert>
37 #include <cstddef>
38 #include <cstdint>
39 #include <iterator>
40 #include <limits>
41 #include <string>
42 #include <vector>
43 
44 using namespace llvm;
45 using namespace llvm::ELF;
46 using namespace llvm::object;
47 using namespace llvm::support::endian;
48 using namespace lld;
49 using namespace lld::elf;
50 
51 LinkerScript *elf::Script;
52 
53 uint64_t ExprValue::getValue() const {
54   if (Sec) {
55     if (OutputSection *OS = Sec->getOutputSection())
56       return alignTo(Sec->getOffset(Val) + OS->Addr, Alignment);
57     error(Loc + ": unable to evaluate expression: input section " + Sec->Name +
58           " has no output section assigned");
59   }
60   return alignTo(Val, Alignment);
61 }
62 
63 uint64_t ExprValue::getSecAddr() const {
64   if (Sec)
65     return Sec->getOffset(0) + Sec->getOutputSection()->Addr;
66   return 0;
67 }
68 
69 template <class ELFT> static SymbolBody *addRegular(SymbolAssignment *Cmd) {
70   Symbol *Sym;
71   uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT;
72   std::tie(Sym, std::ignore) = Symtab<ELFT>::X->insert(
73       Cmd->Name, /*Type*/ 0, Visibility, /*CanOmitFromDynSym*/ false,
74       /*File*/ nullptr);
75   Sym->Binding = STB_GLOBAL;
76   ExprValue Value = Cmd->Expression();
77   SectionBase *Sec = Value.isAbsolute() ? nullptr : Value.Sec;
78 
79   // We want to set symbol values early if we can. This allows us to use symbols
80   // as variables in linker scripts. Doing so allows us to write expressions
81   // like this: `alignment = 16; . = ALIGN(., alignment)`
82   uint64_t SymValue = Value.isAbsolute() ? Value.getValue() : 0;
83   replaceBody<DefinedRegular>(Sym, Cmd->Name, /*IsLocal=*/false, Visibility,
84                               STT_NOTYPE, SymValue, 0, Sec, nullptr);
85   return Sym->body();
86 }
87 
88 OutputSectionCommand *
89 LinkerScript::createOutputSectionCommand(StringRef Name, StringRef Location) {
90   OutputSectionCommand *&CmdRef = NameToOutputSectionCommand[Name];
91   OutputSectionCommand *Cmd;
92   if (CmdRef && CmdRef->Location.empty()) {
93     // There was a forward reference.
94     Cmd = CmdRef;
95   } else {
96     Cmd = make<OutputSectionCommand>(Name);
97     if (!CmdRef)
98       CmdRef = Cmd;
99   }
100   Cmd->Location = Location;
101   return Cmd;
102 }
103 
104 OutputSectionCommand *
105 LinkerScript::getOrCreateOutputSectionCommand(StringRef Name) {
106   OutputSectionCommand *&CmdRef = NameToOutputSectionCommand[Name];
107   if (!CmdRef)
108     CmdRef = make<OutputSectionCommand>(Name);
109   return CmdRef;
110 }
111 
112 void LinkerScript::setDot(Expr E, const Twine &Loc, bool InSec) {
113   uint64_t Val = E().getValue();
114   if (Val < Dot) {
115     if (InSec)
116       error(Loc + ": unable to move location counter backward for: " +
117             CurOutSec->Name);
118     else
119       error(Loc + ": unable to move location counter backward");
120   }
121   Dot = Val;
122   // Update to location counter means update to section size.
123   if (InSec)
124     CurOutSec->Size = Dot - CurOutSec->Addr;
125 }
126 
127 // Sets value of a symbol. Two kinds of symbols are processed: synthetic
128 // symbols, whose value is an offset from beginning of section and regular
129 // symbols whose value is absolute.
130 void LinkerScript::assignSymbol(SymbolAssignment *Cmd, bool InSec) {
131   if (Cmd->Name == ".") {
132     setDot(Cmd->Expression, Cmd->Location, InSec);
133     return;
134   }
135 
136   if (!Cmd->Sym)
137     return;
138 
139   auto *Sym = cast<DefinedRegular>(Cmd->Sym);
140   ExprValue V = Cmd->Expression();
141   if (V.isAbsolute()) {
142     Sym->Value = V.getValue();
143   } else {
144     Sym->Section = V.Sec;
145     Sym->Value = alignTo(V.Val, V.Alignment);
146   }
147 }
148 
149 static SymbolBody *findSymbol(StringRef S) {
150   switch (Config->EKind) {
151   case ELF32LEKind:
152     return Symtab<ELF32LE>::X->find(S);
153   case ELF32BEKind:
154     return Symtab<ELF32BE>::X->find(S);
155   case ELF64LEKind:
156     return Symtab<ELF64LE>::X->find(S);
157   case ELF64BEKind:
158     return Symtab<ELF64BE>::X->find(S);
159   default:
160     llvm_unreachable("unknown Config->EKind");
161   }
162 }
163 
164 static SymbolBody *addRegularSymbol(SymbolAssignment *Cmd) {
165   switch (Config->EKind) {
166   case ELF32LEKind:
167     return addRegular<ELF32LE>(Cmd);
168   case ELF32BEKind:
169     return addRegular<ELF32BE>(Cmd);
170   case ELF64LEKind:
171     return addRegular<ELF64LE>(Cmd);
172   case ELF64BEKind:
173     return addRegular<ELF64BE>(Cmd);
174   default:
175     llvm_unreachable("unknown Config->EKind");
176   }
177 }
178 
179 void LinkerScript::addSymbol(SymbolAssignment *Cmd) {
180   if (Cmd->Name == ".")
181     return;
182 
183   // If a symbol was in PROVIDE(), we need to define it only when
184   // it is a referenced undefined symbol.
185   SymbolBody *B = findSymbol(Cmd->Name);
186   if (Cmd->Provide && (!B || B->isDefined()))
187     return;
188 
189   Cmd->Sym = addRegularSymbol(Cmd);
190 }
191 
192 bool SymbolAssignment::classof(const BaseCommand *C) {
193   return C->Kind == AssignmentKind;
194 }
195 
196 bool OutputSectionCommand::classof(const BaseCommand *C) {
197   return C->Kind == OutputSectionKind;
198 }
199 
200 // Fill [Buf, Buf + Size) with Filler.
201 // This is used for linker script "=fillexp" command.
202 static void fill(uint8_t *Buf, size_t Size, uint32_t Filler) {
203   size_t I = 0;
204   for (; I + 4 < Size; I += 4)
205     memcpy(Buf + I, &Filler, 4);
206   memcpy(Buf + I, &Filler, Size - I);
207 }
208 
209 bool InputSectionDescription::classof(const BaseCommand *C) {
210   return C->Kind == InputSectionKind;
211 }
212 
213 bool AssertCommand::classof(const BaseCommand *C) {
214   return C->Kind == AssertKind;
215 }
216 
217 bool BytesDataCommand::classof(const BaseCommand *C) {
218   return C->Kind == BytesDataKind;
219 }
220 
221 static StringRef basename(InputSectionBase *S) {
222   if (S->File)
223     return sys::path::filename(S->File->getName());
224   return "";
225 }
226 
227 bool LinkerScript::shouldKeep(InputSectionBase *S) {
228   for (InputSectionDescription *ID : Opt.KeptSections)
229     if (ID->FilePat.match(basename(S)))
230       for (SectionPattern &P : ID->SectionPatterns)
231         if (P.SectionPat.match(S->Name))
232           return true;
233   return false;
234 }
235 
236 // A helper function for the SORT() command.
237 static std::function<bool(InputSectionBase *, InputSectionBase *)>
238 getComparator(SortSectionPolicy K) {
239   switch (K) {
240   case SortSectionPolicy::Alignment:
241     return [](InputSectionBase *A, InputSectionBase *B) {
242       // ">" is not a mistake. Sections with larger alignments are placed
243       // before sections with smaller alignments in order to reduce the
244       // amount of padding necessary. This is compatible with GNU.
245       return A->Alignment > B->Alignment;
246     };
247   case SortSectionPolicy::Name:
248     return [](InputSectionBase *A, InputSectionBase *B) {
249       return A->Name < B->Name;
250     };
251   case SortSectionPolicy::Priority:
252     return [](InputSectionBase *A, InputSectionBase *B) {
253       return getPriority(A->Name) < getPriority(B->Name);
254     };
255   default:
256     llvm_unreachable("unknown sort policy");
257   }
258 }
259 
260 // A helper function for the SORT() command.
261 static bool matchConstraints(ArrayRef<InputSectionBase *> Sections,
262                              ConstraintKind Kind) {
263   if (Kind == ConstraintKind::NoConstraint)
264     return true;
265 
266   bool IsRW = llvm::any_of(Sections, [](InputSectionBase *Sec) {
267     return static_cast<InputSectionBase *>(Sec)->Flags & SHF_WRITE;
268   });
269 
270   return (IsRW && Kind == ConstraintKind::ReadWrite) ||
271          (!IsRW && Kind == ConstraintKind::ReadOnly);
272 }
273 
274 static void sortSections(InputSection **Begin, InputSection **End,
275                          SortSectionPolicy K) {
276   if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None)
277     std::stable_sort(Begin, End, getComparator(K));
278 }
279 
280 // Compute and remember which sections the InputSectionDescription matches.
281 std::vector<InputSection *>
282 LinkerScript::computeInputSections(const InputSectionDescription *Cmd) {
283   std::vector<InputSection *> Ret;
284 
285   // Collects all sections that satisfy constraints of Cmd.
286   for (const SectionPattern &Pat : Cmd->SectionPatterns) {
287     size_t SizeBefore = Ret.size();
288 
289     for (InputSectionBase *Sec : InputSections) {
290       if (Sec->Assigned)
291         continue;
292 
293       if (!Sec->Live) {
294         reportDiscarded(Sec);
295         continue;
296       }
297 
298       // For -emit-relocs we have to ignore entries like
299       //   .rela.dyn : { *(.rela.data) }
300       // which are common because they are in the default bfd script.
301       if (Sec->Type == SHT_REL || Sec->Type == SHT_RELA)
302         continue;
303 
304       StringRef Filename = basename(Sec);
305       if (!Cmd->FilePat.match(Filename) ||
306           Pat.ExcludedFilePat.match(Filename) ||
307           !Pat.SectionPat.match(Sec->Name))
308         continue;
309 
310       Ret.push_back(cast<InputSection>(Sec));
311       Sec->Assigned = true;
312     }
313 
314     // Sort sections as instructed by SORT-family commands and --sort-section
315     // option. Because SORT-family commands can be nested at most two depth
316     // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command
317     // line option is respected even if a SORT command is given, the exact
318     // behavior we have here is a bit complicated. Here are the rules.
319     //
320     // 1. If two SORT commands are given, --sort-section is ignored.
321     // 2. If one SORT command is given, and if it is not SORT_NONE,
322     //    --sort-section is handled as an inner SORT command.
323     // 3. If one SORT command is given, and if it is SORT_NONE, don't sort.
324     // 4. If no SORT command is given, sort according to --sort-section.
325     InputSection **Begin = Ret.data() + SizeBefore;
326     InputSection **End = Ret.data() + Ret.size();
327     if (Pat.SortOuter != SortSectionPolicy::None) {
328       if (Pat.SortInner == SortSectionPolicy::Default)
329         sortSections(Begin, End, Config->SortSection);
330       else
331         sortSections(Begin, End, Pat.SortInner);
332       sortSections(Begin, End, Pat.SortOuter);
333     }
334   }
335   return Ret;
336 }
337 
338 void LinkerScript::discard(ArrayRef<InputSectionBase *> V) {
339   for (InputSectionBase *S : V) {
340     S->Live = false;
341     if (S == InX::ShStrTab || S == InX::Dynamic || S == InX::DynSymTab ||
342         S == InX::DynStrTab)
343       error("discarding " + S->Name + " section is not allowed");
344     discard(S->DependentSections);
345   }
346 }
347 
348 std::vector<InputSectionBase *>
349 LinkerScript::createInputSectionList(OutputSectionCommand &OutCmd) {
350   std::vector<InputSectionBase *> Ret;
351 
352   for (BaseCommand *Base : OutCmd.Commands) {
353     auto *Cmd = dyn_cast<InputSectionDescription>(Base);
354     if (!Cmd)
355       continue;
356 
357     Cmd->Sections = computeInputSections(Cmd);
358     Ret.insert(Ret.end(), Cmd->Sections.begin(), Cmd->Sections.end());
359   }
360 
361   return Ret;
362 }
363 
364 void LinkerScript::processCommands(OutputSectionFactory &Factory) {
365   // A symbol can be assigned before any section is mentioned in the linker
366   // script. In an DSO, the symbol values are addresses, so the only important
367   // section values are:
368   // * SHN_UNDEF
369   // * SHN_ABS
370   // * Any value meaning a regular section.
371   // To handle that, create a dummy aether section that fills the void before
372   // the linker scripts switches to another section. It has an index of one
373   // which will map to whatever the first actual section is.
374   Aether = make<OutputSection>("", 0, SHF_ALLOC);
375   Aether->SectionIndex = 1;
376   CurOutSec = Aether;
377   Dot = 0;
378 
379   for (size_t I = 0; I < Opt.Commands.size(); ++I) {
380     // Handle symbol assignments outside of any output section.
381     if (auto *Cmd = dyn_cast<SymbolAssignment>(Opt.Commands[I])) {
382       addSymbol(Cmd);
383       continue;
384     }
385 
386     if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I])) {
387       std::vector<InputSectionBase *> V = createInputSectionList(*Cmd);
388 
389       // The output section name `/DISCARD/' is special.
390       // Any input section assigned to it is discarded.
391       if (Cmd->Name == "/DISCARD/") {
392         discard(V);
393         continue;
394       }
395 
396       // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive
397       // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input
398       // sections satisfy a given constraint. If not, a directive is handled
399       // as if it wasn't present from the beginning.
400       //
401       // Because we'll iterate over Commands many more times, the easiest
402       // way to "make it as if it wasn't present" is to just remove it.
403       if (!matchConstraints(V, Cmd->Constraint)) {
404         for (InputSectionBase *S : V)
405           S->Assigned = false;
406         Opt.Commands.erase(Opt.Commands.begin() + I);
407         --I;
408         continue;
409       }
410 
411       // A directive may contain symbol definitions like this:
412       // ".foo : { ...; bar = .; }". Handle them.
413       for (BaseCommand *Base : Cmd->Commands)
414         if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base))
415           addSymbol(OutCmd);
416 
417       // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign
418       // is given, input sections are aligned to that value, whether the
419       // given value is larger or smaller than the original section alignment.
420       if (Cmd->SubalignExpr) {
421         uint32_t Subalign = Cmd->SubalignExpr().getValue();
422         for (InputSectionBase *S : V)
423           S->Alignment = Subalign;
424       }
425 
426       // Add input sections to an output section.
427       for (InputSectionBase *S : V)
428         Factory.addInputSec(S, Cmd->Name, Cmd->Sec);
429       if (OutputSection *Sec = Cmd->Sec) {
430         assert(Sec->SectionIndex == INT_MAX);
431         Sec->SectionIndex = I;
432         if (Cmd->Noload)
433           Sec->Type = SHT_NOBITS;
434         SecToCommand[Sec] = Cmd;
435       }
436     }
437   }
438   CurOutSec = nullptr;
439 }
440 
441 void LinkerScript::fabricateDefaultCommands() {
442   std::vector<BaseCommand *> Commands;
443 
444   // Define start address
445   uint64_t StartAddr = -1;
446 
447   // The Sections with -T<section> have been sorted in order of ascending
448   // address. We must lower StartAddr if the lowest -T<section address> as
449   // calls to setDot() must be monotonically increasing.
450   for (auto& KV : Config->SectionStartMap)
451     StartAddr = std::min(StartAddr, KV.second);
452 
453   Commands.push_back(make<SymbolAssignment>(
454       ".",
455       [=] {
456         return std::min(StartAddr, Config->ImageBase + elf::getHeaderSize());
457       },
458       ""));
459 
460   // For each OutputSection that needs a VA fabricate an OutputSectionCommand
461   // with an InputSectionDescription describing the InputSections
462   for (OutputSection *Sec : OutputSections) {
463     auto *OSCmd = createOutputSectionCommand(Sec->Name, "<internal>");
464     OSCmd->Sec = Sec;
465     SecToCommand[Sec] = OSCmd;
466 
467     Commands.push_back(OSCmd);
468     if (Sec->Sections.size()) {
469       auto *ISD = make<InputSectionDescription>("");
470       OSCmd->Commands.push_back(ISD);
471       for (InputSection *ISec : Sec->Sections) {
472         ISD->Sections.push_back(ISec);
473         ISec->Assigned = true;
474       }
475     }
476   }
477   // SECTIONS commands run before other non SECTIONS commands
478   Commands.insert(Commands.end(), Opt.Commands.begin(), Opt.Commands.end());
479   Opt.Commands = std::move(Commands);
480 }
481 
482 // Add sections that didn't match any sections command.
483 void LinkerScript::addOrphanSections(OutputSectionFactory &Factory) {
484   for (InputSectionBase *S : InputSections) {
485     if (!S->Live || S->Parent)
486       continue;
487     StringRef Name = getOutputSectionName(S->Name);
488     auto I = std::find_if(
489         Opt.Commands.begin(), Opt.Commands.end(), [&](BaseCommand *Base) {
490           if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base))
491             return Cmd->Name == Name;
492           return false;
493         });
494     if (I == Opt.Commands.end()) {
495       Factory.addInputSec(S, Name);
496     } else {
497       auto *Cmd = cast<OutputSectionCommand>(*I);
498       Factory.addInputSec(S, Name, Cmd->Sec);
499       if (OutputSection *Sec = Cmd->Sec) {
500         SecToCommand[Sec] = Cmd;
501         unsigned Index = std::distance(Opt.Commands.begin(), I);
502         assert(Sec->SectionIndex == INT_MAX || Sec->SectionIndex == Index);
503         Sec->SectionIndex = Index;
504       }
505       auto *ISD = make<InputSectionDescription>("");
506       ISD->Sections.push_back(cast<InputSection>(S));
507       Cmd->Commands.push_back(ISD);
508     }
509   }
510 }
511 
512 uint64_t LinkerScript::advance(uint64_t Size, unsigned Align) {
513   bool IsTbss = (CurOutSec->Flags & SHF_TLS) && CurOutSec->Type == SHT_NOBITS;
514   uint64_t Start = IsTbss ? Dot + ThreadBssOffset : Dot;
515   Start = alignTo(Start, Align);
516   uint64_t End = Start + Size;
517 
518   if (IsTbss)
519     ThreadBssOffset = End - Dot;
520   else
521     Dot = End;
522   return End;
523 }
524 
525 void LinkerScript::output(InputSection *S) {
526   uint64_t Pos = advance(S->getSize(), S->Alignment);
527   S->OutSecOff = Pos - S->getSize() - CurOutSec->Addr;
528 
529   // Update output section size after adding each section. This is so that
530   // SIZEOF works correctly in the case below:
531   // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) }
532   CurOutSec->Size = Pos - CurOutSec->Addr;
533 
534   // If there is a memory region associated with this input section, then
535   // place the section in that region and update the region index.
536   if (CurMemRegion) {
537     CurMemRegion->Offset += CurOutSec->Size;
538     uint64_t CurSize = CurMemRegion->Offset - CurMemRegion->Origin;
539     if (CurSize > CurMemRegion->Length) {
540       uint64_t OverflowAmt = CurSize - CurMemRegion->Length;
541       error("section '" + CurOutSec->Name + "' will not fit in region '" +
542             CurMemRegion->Name + "': overflowed by " + Twine(OverflowAmt) +
543             " bytes");
544     }
545   }
546 }
547 
548 void LinkerScript::switchTo(OutputSection *Sec) {
549   if (CurOutSec == Sec)
550     return;
551 
552   CurOutSec = Sec;
553   CurOutSec->Addr = advance(0, CurOutSec->Alignment);
554 
555   // If neither AT nor AT> is specified for an allocatable section, the linker
556   // will set the LMA such that the difference between VMA and LMA for the
557   // section is the same as the preceding output section in the same region
558   // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html
559   if (LMAOffset)
560     CurOutSec->LMAOffset = LMAOffset();
561 }
562 
563 void LinkerScript::process(BaseCommand &Base) {
564   // This handles the assignments to symbol or to the dot.
565   if (auto *Cmd = dyn_cast<SymbolAssignment>(&Base)) {
566     assignSymbol(Cmd, true);
567     return;
568   }
569 
570   // Handle BYTE(), SHORT(), LONG(), or QUAD().
571   if (auto *Cmd = dyn_cast<BytesDataCommand>(&Base)) {
572     Cmd->Offset = Dot - CurOutSec->Addr;
573     Dot += Cmd->Size;
574     CurOutSec->Size = Dot - CurOutSec->Addr;
575     return;
576   }
577 
578   // Handle ASSERT().
579   if (auto *Cmd = dyn_cast<AssertCommand>(&Base)) {
580     Cmd->Expression();
581     return;
582   }
583 
584   // Handle a single input section description command.
585   // It calculates and assigns the offsets for each section and also
586   // updates the output section size.
587   auto &Cmd = cast<InputSectionDescription>(Base);
588   for (InputSection *Sec : Cmd.Sections) {
589     // We tentatively added all synthetic sections at the beginning and removed
590     // empty ones afterwards (because there is no way to know whether they were
591     // going be empty or not other than actually running linker scripts.)
592     // We need to ignore remains of empty sections.
593     if (auto *S = dyn_cast<SyntheticSection>(Sec))
594       if (S->empty())
595         continue;
596 
597     if (!Sec->Live)
598       continue;
599     assert(CurOutSec == Sec->getParent());
600     output(Sec);
601   }
602 }
603 
604 // This function searches for a memory region to place the given output
605 // section in. If found, a pointer to the appropriate memory region is
606 // returned. Otherwise, a nullptr is returned.
607 MemoryRegion *LinkerScript::findMemoryRegion(OutputSectionCommand *Cmd) {
608   // If a memory region name was specified in the output section command,
609   // then try to find that region first.
610   if (!Cmd->MemoryRegionName.empty()) {
611     auto It = Opt.MemoryRegions.find(Cmd->MemoryRegionName);
612     if (It != Opt.MemoryRegions.end())
613       return &It->second;
614     error("memory region '" + Cmd->MemoryRegionName + "' not declared");
615     return nullptr;
616   }
617 
618   // If at least one memory region is defined, all sections must
619   // belong to some memory region. Otherwise, we don't need to do
620   // anything for memory regions.
621   if (Opt.MemoryRegions.empty())
622     return nullptr;
623 
624   OutputSection *Sec = Cmd->Sec;
625   // See if a region can be found by matching section flags.
626   for (auto &Pair : Opt.MemoryRegions) {
627     MemoryRegion &M = Pair.second;
628     if ((M.Flags & Sec->Flags) && (M.NegFlags & Sec->Flags) == 0)
629       return &M;
630   }
631 
632   // Otherwise, no suitable region was found.
633   if (Sec->Flags & SHF_ALLOC)
634     error("no memory region specified for section '" + Sec->Name + "'");
635   return nullptr;
636 }
637 
638 // This function assigns offsets to input sections and an output section
639 // for a single sections command (e.g. ".text { *(.text); }").
640 void LinkerScript::assignOffsets(OutputSectionCommand *Cmd) {
641   OutputSection *Sec = Cmd->Sec;
642   if (!Sec)
643     return;
644 
645   if (!(Sec->Flags & SHF_ALLOC))
646     Dot = 0;
647   else if (Cmd->AddrExpr)
648     setDot(Cmd->AddrExpr, Cmd->Location, false);
649 
650   if (Cmd->LMAExpr) {
651     uint64_t D = Dot;
652     LMAOffset = [=] { return Cmd->LMAExpr().getValue() - D; };
653   }
654 
655   CurMemRegion = Cmd->MemRegion;
656   if (CurMemRegion)
657     Dot = CurMemRegion->Offset;
658   switchTo(Sec);
659 
660   // We do not support custom layout for compressed debug sectons.
661   // At this point we already know their size and have compressed content.
662   if (CurOutSec->Flags & SHF_COMPRESSED)
663     return;
664 
665   for (BaseCommand *C : Cmd->Commands)
666     process(*C);
667 }
668 
669 void LinkerScript::removeEmptyCommands() {
670   // It is common practice to use very generic linker scripts. So for any
671   // given run some of the output sections in the script will be empty.
672   // We could create corresponding empty output sections, but that would
673   // clutter the output.
674   // We instead remove trivially empty sections. The bfd linker seems even
675   // more aggressive at removing them.
676   auto Pos = std::remove_if(
677       Opt.Commands.begin(), Opt.Commands.end(), [&](BaseCommand *Base) {
678         if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base))
679           return Cmd->Sec == nullptr;
680         return false;
681       });
682   Opt.Commands.erase(Pos, Opt.Commands.end());
683 }
684 
685 static bool isAllSectionDescription(const OutputSectionCommand &Cmd) {
686   for (BaseCommand *Base : Cmd.Commands)
687     if (!isa<InputSectionDescription>(*Base))
688       return false;
689   return true;
690 }
691 
692 void LinkerScript::adjustSectionsBeforeSorting() {
693   // If the output section contains only symbol assignments, create a
694   // corresponding output section. The bfd linker seems to only create them if
695   // '.' is assigned to, but creating these section should not have any bad
696   // consequeces and gives us a section to put the symbol in.
697   uint64_t Flags = SHF_ALLOC;
698 
699   for (int I = 0, E = Opt.Commands.size(); I != E; ++I) {
700     auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I]);
701     if (!Cmd)
702       continue;
703     if (OutputSection *Sec = Cmd->Sec) {
704       Flags = Sec->Flags;
705       continue;
706     }
707 
708     if (isAllSectionDescription(*Cmd))
709       continue;
710 
711     auto *OutSec = make<OutputSection>(Cmd->Name, SHT_PROGBITS, Flags);
712     OutSec->SectionIndex = I;
713     Cmd->Sec = OutSec;
714     SecToCommand[OutSec] = Cmd;
715   }
716 }
717 
718 void LinkerScript::adjustSectionsAfterSorting() {
719   // Try and find an appropriate memory region to assign offsets in.
720   for (BaseCommand *Base : Opt.Commands) {
721     if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) {
722       Cmd->MemRegion = findMemoryRegion(Cmd);
723       // Handle align (e.g. ".foo : ALIGN(16) { ... }").
724       if (Cmd->AlignExpr)
725 	Cmd->Sec->updateAlignment(Cmd->AlignExpr().getValue());
726     }
727   }
728 
729   // If output section command doesn't specify any segments,
730   // and we haven't previously assigned any section to segment,
731   // then we simply assign section to the very first load segment.
732   // Below is an example of such linker script:
733   // PHDRS { seg PT_LOAD; }
734   // SECTIONS { .aaa : { *(.aaa) } }
735   std::vector<StringRef> DefPhdrs;
736   auto FirstPtLoad =
737       std::find_if(Opt.PhdrsCommands.begin(), Opt.PhdrsCommands.end(),
738                    [](const PhdrsCommand &Cmd) { return Cmd.Type == PT_LOAD; });
739   if (FirstPtLoad != Opt.PhdrsCommands.end())
740     DefPhdrs.push_back(FirstPtLoad->Name);
741 
742   // Walk the commands and propagate the program headers to commands that don't
743   // explicitly specify them.
744   for (BaseCommand *Base : Opt.Commands) {
745     auto *Cmd = dyn_cast<OutputSectionCommand>(Base);
746     if (!Cmd)
747       continue;
748 
749     if (Cmd->Phdrs.empty())
750       Cmd->Phdrs = DefPhdrs;
751     else
752       DefPhdrs = Cmd->Phdrs;
753   }
754 
755   removeEmptyCommands();
756 }
757 
758 void LinkerScript::createOrphanCommands() {
759   for (OutputSection *Sec : OutputSections) {
760     if (Sec->SectionIndex != INT_MAX)
761       continue;
762     OutputSectionCommand *Cmd =
763         createOutputSectionCommand(Sec->Name, "<internal>");
764     Cmd->Sec = Sec;
765     SecToCommand[Sec] = Cmd;
766     auto *ISD = make<InputSectionDescription>("");
767     ISD->Sections = Sec->Sections;
768     Cmd->Commands.push_back(ISD);
769     Opt.Commands.push_back(Cmd);
770   }
771 }
772 
773 void LinkerScript::processNonSectionCommands() {
774   for (BaseCommand *Base : Opt.Commands) {
775     if (auto *Cmd = dyn_cast<SymbolAssignment>(Base))
776       assignSymbol(Cmd, false);
777     else if (auto *Cmd = dyn_cast<AssertCommand>(Base))
778       Cmd->Expression();
779   }
780 }
781 
782 static bool
783 allocateHeaders(std::vector<PhdrEntry> &Phdrs,
784                 ArrayRef<OutputSectionCommand *> OutputSectionCommands,
785                 uint64_t Min) {
786   auto FirstPTLoad =
787       std::find_if(Phdrs.begin(), Phdrs.end(),
788                    [](const PhdrEntry &E) { return E.p_type == PT_LOAD; });
789   if (FirstPTLoad == Phdrs.end())
790     return false;
791 
792   uint64_t HeaderSize = getHeaderSize();
793   if (HeaderSize <= Min || Script->hasPhdrsCommands()) {
794     Min = alignDown(Min - HeaderSize, Config->MaxPageSize);
795     Out::ElfHeader->Addr = Min;
796     Out::ProgramHeaders->Addr = Min + Out::ElfHeader->Size;
797     return true;
798   }
799 
800   assert(FirstPTLoad->First == Out::ElfHeader);
801   OutputSection *ActualFirst = nullptr;
802   for (OutputSectionCommand *Cmd : OutputSectionCommands) {
803     OutputSection *Sec = Cmd->Sec;
804     if (Sec->FirstInPtLoad == Out::ElfHeader) {
805       ActualFirst = Sec;
806       break;
807     }
808   }
809   if (ActualFirst) {
810     for (OutputSectionCommand *Cmd : OutputSectionCommands) {
811       OutputSection *Sec = Cmd->Sec;
812       if (Sec->FirstInPtLoad == Out::ElfHeader)
813         Sec->FirstInPtLoad = ActualFirst;
814     }
815     FirstPTLoad->First = ActualFirst;
816   } else {
817     Phdrs.erase(FirstPTLoad);
818   }
819 
820   auto PhdrI = std::find_if(Phdrs.begin(), Phdrs.end(), [](const PhdrEntry &E) {
821     return E.p_type == PT_PHDR;
822   });
823   if (PhdrI != Phdrs.end())
824     Phdrs.erase(PhdrI);
825   return false;
826 }
827 
828 void LinkerScript::assignAddresses(std::vector<PhdrEntry> &Phdrs) {
829   // Assign addresses as instructed by linker script SECTIONS sub-commands.
830   Dot = 0;
831   ErrorOnMissingSection = true;
832   switchTo(Aether);
833 
834   for (BaseCommand *Base : Opt.Commands) {
835     if (auto *Cmd = dyn_cast<SymbolAssignment>(Base)) {
836       assignSymbol(Cmd, false);
837       continue;
838     }
839 
840     if (auto *Cmd = dyn_cast<AssertCommand>(Base)) {
841       Cmd->Expression();
842       continue;
843     }
844 
845     auto *Cmd = cast<OutputSectionCommand>(Base);
846     assignOffsets(Cmd);
847   }
848 
849   uint64_t MinVA = std::numeric_limits<uint64_t>::max();
850   for (OutputSectionCommand *Cmd : OutputSectionCommands) {
851     OutputSection *Sec = Cmd->Sec;
852     if (Sec->Flags & SHF_ALLOC)
853       MinVA = std::min<uint64_t>(MinVA, Sec->Addr);
854   }
855 
856   allocateHeaders(Phdrs, OutputSectionCommands, MinVA);
857 }
858 
859 // Creates program headers as instructed by PHDRS linker script command.
860 std::vector<PhdrEntry> LinkerScript::createPhdrs() {
861   std::vector<PhdrEntry> Ret;
862 
863   // Process PHDRS and FILEHDR keywords because they are not
864   // real output sections and cannot be added in the following loop.
865   for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) {
866     Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags);
867     PhdrEntry &Phdr = Ret.back();
868 
869     if (Cmd.HasFilehdr)
870       Phdr.add(Out::ElfHeader);
871     if (Cmd.HasPhdrs)
872       Phdr.add(Out::ProgramHeaders);
873 
874     if (Cmd.LMAExpr) {
875       Phdr.p_paddr = Cmd.LMAExpr().getValue();
876       Phdr.HasLMA = true;
877     }
878   }
879 
880   // Add output sections to program headers.
881   for (OutputSectionCommand *Cmd : OutputSectionCommands) {
882     OutputSection *Sec = Cmd->Sec;
883     if (!(Sec->Flags & SHF_ALLOC))
884       break;
885 
886     // Assign headers specified by linker script
887     for (size_t Id : getPhdrIndices(Sec)) {
888       Ret[Id].add(Sec);
889       if (Opt.PhdrsCommands[Id].Flags == UINT_MAX)
890         Ret[Id].p_flags |= Sec->getPhdrFlags();
891     }
892   }
893   return Ret;
894 }
895 
896 bool LinkerScript::ignoreInterpSection() {
897   // Ignore .interp section in case we have PHDRS specification
898   // and PT_INTERP isn't listed.
899   if (Opt.PhdrsCommands.empty())
900     return false;
901   for (PhdrsCommand &Cmd : Opt.PhdrsCommands)
902     if (Cmd.Type == PT_INTERP)
903       return false;
904   return true;
905 }
906 
907 OutputSectionCommand *LinkerScript::getCmd(OutputSection *Sec) const {
908   auto I = SecToCommand.find(Sec);
909   if (I == SecToCommand.end())
910     return nullptr;
911   return I->second;
912 }
913 
914 uint32_t OutputSectionCommand::getFiller() {
915   if (Filler)
916     return *Filler;
917   if (Sec->Flags & SHF_EXECINSTR)
918     return Target->TrapInstr;
919   return 0;
920 }
921 
922 static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) {
923   if (Size == 1)
924     *Buf = Data;
925   else if (Size == 2)
926     write16(Buf, Data, Config->Endianness);
927   else if (Size == 4)
928     write32(Buf, Data, Config->Endianness);
929   else if (Size == 8)
930     write64(Buf, Data, Config->Endianness);
931   else
932     llvm_unreachable("unsupported Size argument");
933 }
934 
935 static bool compareByFilePosition(InputSection *A, InputSection *B) {
936   // Synthetic doesn't have link order dependecy, stable_sort will keep it last
937   if (A->kind() == InputSectionBase::Synthetic ||
938       B->kind() == InputSectionBase::Synthetic)
939     return false;
940   InputSection *LA = A->getLinkOrderDep();
941   InputSection *LB = B->getLinkOrderDep();
942   OutputSection *AOut = LA->getParent();
943   OutputSection *BOut = LB->getParent();
944   if (AOut != BOut)
945     return AOut->SectionIndex < BOut->SectionIndex;
946   return LA->OutSecOff < LB->OutSecOff;
947 }
948 
949 template <class ELFT>
950 static void finalizeShtGroup(OutputSection *OS,
951                              ArrayRef<InputSection *> Sections) {
952   assert(Config->Relocatable && Sections.size() == 1);
953 
954   // sh_link field for SHT_GROUP sections should contain the section index of
955   // the symbol table.
956   OS->Link = InX::SymTab->getParent()->SectionIndex;
957 
958   // sh_info then contain index of an entry in symbol table section which
959   // provides signature of the section group.
960   elf::ObjectFile<ELFT> *Obj = Sections[0]->getFile<ELFT>();
961   ArrayRef<SymbolBody *> Symbols = Obj->getSymbols();
962   OS->Info = InX::SymTab->getSymbolIndex(Symbols[Sections[0]->Info - 1]);
963 }
964 
965 template <class ELFT> void OutputSectionCommand::finalize() {
966   // Link order may be distributed across several InputSectionDescriptions
967   // but sort must consider them all at once.
968   std::vector<InputSection **> ScriptSections;
969   std::vector<InputSection *> Sections;
970   for (BaseCommand *Base : Commands)
971     if (auto *ISD = dyn_cast<InputSectionDescription>(Base))
972       for (InputSection *&IS : ISD->Sections) {
973         ScriptSections.push_back(&IS);
974         Sections.push_back(IS);
975       }
976 
977   if ((Sec->Flags & SHF_LINK_ORDER)) {
978     std::sort(Sections.begin(), Sections.end(), compareByFilePosition);
979     for (int I = 0, N = Sections.size(); I < N; ++I)
980       *ScriptSections[I] = Sections[I];
981 
982     // We must preserve the link order dependency of sections with the
983     // SHF_LINK_ORDER flag. The dependency is indicated by the sh_link field. We
984     // need to translate the InputSection sh_link to the OutputSection sh_link,
985     // all InputSections in the OutputSection have the same dependency.
986     if (auto *D = Sections.front()->getLinkOrderDep())
987       Sec->Link = D->getParent()->SectionIndex;
988   }
989 
990   uint32_t Type = Sec->Type;
991   if (Type == SHT_GROUP) {
992     finalizeShtGroup<ELFT>(Sec, Sections);
993     return;
994   }
995 
996   if (!Config->CopyRelocs || (Type != SHT_RELA && Type != SHT_REL))
997     return;
998 
999   InputSection *First = Sections[0];
1000   if (isa<SyntheticSection>(First))
1001     return;
1002 
1003   Sec->Link = InX::SymTab->getParent()->SectionIndex;
1004   // sh_info for SHT_REL[A] sections should contain the section header index of
1005   // the section to which the relocation applies.
1006   InputSectionBase *S = First->getRelocatedSection();
1007   Sec->Info = S->getOutputSection()->SectionIndex;
1008   Sec->Flags |= SHF_INFO_LINK;
1009 }
1010 
1011 // Compress section contents if this section contains debug info.
1012 template <class ELFT> void OutputSectionCommand::maybeCompress() {
1013   typedef typename ELFT::Chdr Elf_Chdr;
1014 
1015   // Compress only DWARF debug sections.
1016   if (!Config->CompressDebugSections || (Sec->Flags & SHF_ALLOC) ||
1017       !Name.startswith(".debug_"))
1018     return;
1019 
1020   // Create a section header.
1021   Sec->ZDebugHeader.resize(sizeof(Elf_Chdr));
1022   auto *Hdr = reinterpret_cast<Elf_Chdr *>(Sec->ZDebugHeader.data());
1023   Hdr->ch_type = ELFCOMPRESS_ZLIB;
1024   Hdr->ch_size = Sec->Size;
1025   Hdr->ch_addralign = Sec->Alignment;
1026 
1027   // Write section contents to a temporary buffer and compress it.
1028   std::vector<uint8_t> Buf(Sec->Size);
1029   writeTo<ELFT>(Buf.data());
1030   if (Error E = zlib::compress(toStringRef(Buf), Sec->CompressedData))
1031     fatal("compress failed: " + llvm::toString(std::move(E)));
1032 
1033   // Update section headers.
1034   Sec->Size = sizeof(Elf_Chdr) + Sec->CompressedData.size();
1035   Sec->Flags |= SHF_COMPRESSED;
1036 }
1037 
1038 template <class ELFT> void OutputSectionCommand::writeTo(uint8_t *Buf) {
1039   if (Sec->Type == SHT_NOBITS)
1040     return;
1041 
1042   Sec->Loc = Buf;
1043 
1044   // If -compress-debug-section is specified and if this is a debug seciton,
1045   // we've already compressed section contents. If that's the case,
1046   // just write it down.
1047   if (!Sec->CompressedData.empty()) {
1048     memcpy(Buf, Sec->ZDebugHeader.data(), Sec->ZDebugHeader.size());
1049     memcpy(Buf + Sec->ZDebugHeader.size(), Sec->CompressedData.data(),
1050            Sec->CompressedData.size());
1051     return;
1052   }
1053 
1054   // Write leading padding.
1055   std::vector<InputSection *> Sections;
1056   for (BaseCommand *Cmd : Commands)
1057     if (auto *ISD = dyn_cast<InputSectionDescription>(Cmd))
1058       for (InputSection *IS : ISD->Sections)
1059         if (IS->Live)
1060           Sections.push_back(IS);
1061   uint32_t Filler = getFiller();
1062   if (Filler)
1063     fill(Buf, Sections.empty() ? Sec->Size : Sections[0]->OutSecOff, Filler);
1064 
1065   parallelForEachN(0, Sections.size(), [=](size_t I) {
1066     InputSection *IS = Sections[I];
1067     IS->writeTo<ELFT>(Buf);
1068 
1069     // Fill gaps between sections.
1070     if (Filler) {
1071       uint8_t *Start = Buf + IS->OutSecOff + IS->getSize();
1072       uint8_t *End;
1073       if (I + 1 == Sections.size())
1074         End = Buf + Sec->Size;
1075       else
1076         End = Buf + Sections[I + 1]->OutSecOff;
1077       fill(Start, End - Start, Filler);
1078     }
1079   });
1080 
1081   // Linker scripts may have BYTE()-family commands with which you
1082   // can write arbitrary bytes to the output. Process them if any.
1083   for (BaseCommand *Base : Commands)
1084     if (auto *Data = dyn_cast<BytesDataCommand>(Base))
1085       writeInt(Buf + Data->Offset, Data->Expression().getValue(), Data->Size);
1086 }
1087 
1088 bool LinkerScript::hasLMA(OutputSection *Sec) {
1089   if (OutputSectionCommand *Cmd = getCmd(Sec))
1090     if (Cmd->LMAExpr)
1091       return true;
1092   return false;
1093 }
1094 
1095 ExprValue LinkerScript::getSymbolValue(const Twine &Loc, StringRef S) {
1096   if (S == ".")
1097     return {CurOutSec, Dot - CurOutSec->Addr, Loc};
1098   if (SymbolBody *B = findSymbol(S)) {
1099     if (auto *D = dyn_cast<DefinedRegular>(B))
1100       return {D->Section, D->Value, Loc};
1101     if (auto *C = dyn_cast<DefinedCommon>(B))
1102       return {InX::Common, C->Offset, Loc};
1103   }
1104   error(Loc + ": symbol not found: " + S);
1105   return 0;
1106 }
1107 
1108 bool LinkerScript::isDefined(StringRef S) { return findSymbol(S) != nullptr; }
1109 
1110 static const size_t NoPhdr = -1;
1111 
1112 // Returns indices of ELF headers containing specific section. Each index is a
1113 // zero based number of ELF header listed within PHDRS {} script block.
1114 std::vector<size_t> LinkerScript::getPhdrIndices(OutputSection *Sec) {
1115   if (OutputSectionCommand *Cmd = getCmd(Sec)) {
1116     std::vector<size_t> Ret;
1117     for (StringRef PhdrName : Cmd->Phdrs) {
1118       size_t Index = getPhdrIndex(Cmd->Location, PhdrName);
1119       if (Index != NoPhdr)
1120         Ret.push_back(Index);
1121     }
1122     return Ret;
1123   }
1124   return {};
1125 }
1126 
1127 // Returns the index of the segment named PhdrName if found otherwise
1128 // NoPhdr. When not found, if PhdrName is not the special case value 'NONE'
1129 // (which can be used to explicitly specify that a section isn't assigned to a
1130 // segment) then error.
1131 size_t LinkerScript::getPhdrIndex(const Twine &Loc, StringRef PhdrName) {
1132   size_t I = 0;
1133   for (PhdrsCommand &Cmd : Opt.PhdrsCommands) {
1134     if (Cmd.Name == PhdrName)
1135       return I;
1136     ++I;
1137   }
1138   if (PhdrName != "NONE")
1139     error(Loc + ": section header '" + PhdrName + "' is not listed in PHDRS");
1140   return NoPhdr;
1141 }
1142 
1143 template void OutputSectionCommand::writeTo<ELF32LE>(uint8_t *Buf);
1144 template void OutputSectionCommand::writeTo<ELF32BE>(uint8_t *Buf);
1145 template void OutputSectionCommand::writeTo<ELF64LE>(uint8_t *Buf);
1146 template void OutputSectionCommand::writeTo<ELF64BE>(uint8_t *Buf);
1147 
1148 template void OutputSectionCommand::maybeCompress<ELF32LE>();
1149 template void OutputSectionCommand::maybeCompress<ELF32BE>();
1150 template void OutputSectionCommand::maybeCompress<ELF64LE>();
1151 template void OutputSectionCommand::maybeCompress<ELF64BE>();
1152 
1153 template void OutputSectionCommand::finalize<ELF32LE>();
1154 template void OutputSectionCommand::finalize<ELF32BE>();
1155 template void OutputSectionCommand::finalize<ELF64LE>();
1156 template void OutputSectionCommand::finalize<ELF64BE>();
1157