1 //===- LinkerScript.cpp ---------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the parser/evaluator of the linker script.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "LinkerScript.h"
15 #include "Config.h"
16 #include "InputSection.h"
17 #include "Memory.h"
18 #include "OutputSections.h"
19 #include "Strings.h"
20 #include "SymbolTable.h"
21 #include "Symbols.h"
22 #include "SyntheticSections.h"
23 #include "Target.h"
24 #include "Threads.h"
25 #include "Writer.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/Endian.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/FileSystem.h"
33 #include "llvm/Support/Path.h"
34 #include <algorithm>
35 #include <cassert>
36 #include <cstddef>
37 #include <cstdint>
38 #include <iterator>
39 #include <limits>
40 #include <string>
41 #include <vector>
42 
43 using namespace llvm;
44 using namespace llvm::ELF;
45 using namespace llvm::object;
46 using namespace llvm::support::endian;
47 using namespace lld;
48 using namespace lld::elf;
49 
50 LinkerScript *elf::Script;
51 
52 uint64_t ExprValue::getValue() const {
53   if (Sec) {
54     if (OutputSection *OS = Sec->getOutputSection())
55       return alignTo(Sec->getOffset(Val) + OS->Addr, Alignment);
56     error(Loc + ": unable to evaluate expression: input section " + Sec->Name +
57           " has no output section assigned");
58   }
59   return alignTo(Val, Alignment);
60 }
61 
62 uint64_t ExprValue::getSecAddr() const {
63   if (Sec)
64     return Sec->getOffset(0) + Sec->getOutputSection()->Addr;
65   return 0;
66 }
67 
68 static SymbolBody *addRegular(SymbolAssignment *Cmd) {
69   Symbol *Sym;
70   uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT;
71   std::tie(Sym, std::ignore) = Symtab->insert(Cmd->Name, /*Type*/ 0, Visibility,
72                                               /*CanOmitFromDynSym*/ false,
73                                               /*File*/ nullptr);
74   Sym->Binding = STB_GLOBAL;
75   ExprValue Value = Cmd->Expression();
76   SectionBase *Sec = Value.isAbsolute() ? nullptr : Value.Sec;
77 
78   // We want to set symbol values early if we can. This allows us to use symbols
79   // as variables in linker scripts. Doing so allows us to write expressions
80   // like this: `alignment = 16; . = ALIGN(., alignment)`
81   uint64_t SymValue = Value.isAbsolute() ? Value.getValue() : 0;
82   replaceBody<DefinedRegular>(Sym, Cmd->Name, /*IsLocal=*/false, Visibility,
83                               STT_NOTYPE, SymValue, 0, Sec, nullptr);
84   return Sym->body();
85 }
86 
87 OutputSection *LinkerScript::createOutputSection(StringRef Name,
88                                                  StringRef Location) {
89   OutputSection *&SecRef = NameToOutputSection[Name];
90   OutputSection *Sec;
91   if (SecRef && SecRef->Location.empty()) {
92     // There was a forward reference.
93     Sec = SecRef;
94   } else {
95     Sec = make<OutputSection>(Name, SHT_PROGBITS, 0);
96     if (!SecRef)
97       SecRef = Sec;
98   }
99   Sec->Location = Location;
100   return Sec;
101 }
102 
103 OutputSection *LinkerScript::getOrCreateOutputSection(StringRef Name) {
104   OutputSection *&CmdRef = NameToOutputSection[Name];
105   if (!CmdRef)
106     CmdRef = make<OutputSection>(Name, SHT_PROGBITS, 0);
107   return CmdRef;
108 }
109 
110 void LinkerScript::setDot(Expr E, const Twine &Loc, bool InSec) {
111   uint64_t Val = E().getValue();
112   if (Val < Dot && InSec)
113     error(Loc + ": unable to move location counter backward for: " +
114           CurAddressState->OutSec->Name);
115   Dot = Val;
116   // Update to location counter means update to section size.
117   if (InSec)
118     CurAddressState->OutSec->Size = Dot - CurAddressState->OutSec->Addr;
119 }
120 
121 // Sets value of a symbol. Two kinds of symbols are processed: synthetic
122 // symbols, whose value is an offset from beginning of section and regular
123 // symbols whose value is absolute.
124 void LinkerScript::assignSymbol(SymbolAssignment *Cmd, bool InSec) {
125   if (Cmd->Name == ".") {
126     setDot(Cmd->Expression, Cmd->Location, InSec);
127     return;
128   }
129 
130   if (!Cmd->Sym)
131     return;
132 
133   auto *Sym = cast<DefinedRegular>(Cmd->Sym);
134   ExprValue V = Cmd->Expression();
135   if (V.isAbsolute()) {
136     Sym->Value = V.getValue();
137   } else {
138     Sym->Section = V.Sec;
139     Sym->Value = alignTo(V.Val, V.Alignment);
140   }
141 }
142 
143 void LinkerScript::addSymbol(SymbolAssignment *Cmd) {
144   if (Cmd->Name == ".")
145     return;
146 
147   // If a symbol was in PROVIDE(), we need to define it only when
148   // it is a referenced undefined symbol.
149   SymbolBody *B = Symtab->find(Cmd->Name);
150   if (Cmd->Provide && (!B || B->isDefined()))
151     return;
152 
153   Cmd->Sym = addRegular(Cmd);
154 }
155 
156 bool SymbolAssignment::classof(const BaseCommand *C) {
157   return C->Kind == AssignmentKind;
158 }
159 
160 bool InputSectionDescription::classof(const BaseCommand *C) {
161   return C->Kind == InputSectionKind;
162 }
163 
164 bool AssertCommand::classof(const BaseCommand *C) {
165   return C->Kind == AssertKind;
166 }
167 
168 bool BytesDataCommand::classof(const BaseCommand *C) {
169   return C->Kind == BytesDataKind;
170 }
171 
172 static StringRef basename(InputSectionBase *S) {
173   if (S->File)
174     return sys::path::filename(S->File->getName());
175   return "";
176 }
177 
178 bool LinkerScript::shouldKeep(InputSectionBase *S) {
179   for (InputSectionDescription *ID : Opt.KeptSections)
180     if (ID->FilePat.match(basename(S)))
181       for (SectionPattern &P : ID->SectionPatterns)
182         if (P.SectionPat.match(S->Name))
183           return true;
184   return false;
185 }
186 
187 // A helper function for the SORT() command.
188 static std::function<bool(InputSectionBase *, InputSectionBase *)>
189 getComparator(SortSectionPolicy K) {
190   switch (K) {
191   case SortSectionPolicy::Alignment:
192     return [](InputSectionBase *A, InputSectionBase *B) {
193       // ">" is not a mistake. Sections with larger alignments are placed
194       // before sections with smaller alignments in order to reduce the
195       // amount of padding necessary. This is compatible with GNU.
196       return A->Alignment > B->Alignment;
197     };
198   case SortSectionPolicy::Name:
199     return [](InputSectionBase *A, InputSectionBase *B) {
200       return A->Name < B->Name;
201     };
202   case SortSectionPolicy::Priority:
203     return [](InputSectionBase *A, InputSectionBase *B) {
204       return getPriority(A->Name) < getPriority(B->Name);
205     };
206   default:
207     llvm_unreachable("unknown sort policy");
208   }
209 }
210 
211 // A helper function for the SORT() command.
212 static bool matchConstraints(ArrayRef<InputSectionBase *> Sections,
213                              ConstraintKind Kind) {
214   if (Kind == ConstraintKind::NoConstraint)
215     return true;
216 
217   bool IsRW = llvm::any_of(Sections, [](InputSectionBase *Sec) {
218     return static_cast<InputSectionBase *>(Sec)->Flags & SHF_WRITE;
219   });
220 
221   return (IsRW && Kind == ConstraintKind::ReadWrite) ||
222          (!IsRW && Kind == ConstraintKind::ReadOnly);
223 }
224 
225 static void sortSections(InputSection **Begin, InputSection **End,
226                          SortSectionPolicy K) {
227   if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None)
228     std::stable_sort(Begin, End, getComparator(K));
229 }
230 
231 // Compute and remember which sections the InputSectionDescription matches.
232 std::vector<InputSection *>
233 LinkerScript::computeInputSections(const InputSectionDescription *Cmd) {
234   std::vector<InputSection *> Ret;
235 
236   // Collects all sections that satisfy constraints of Cmd.
237   for (const SectionPattern &Pat : Cmd->SectionPatterns) {
238     size_t SizeBefore = Ret.size();
239 
240     for (InputSectionBase *Sec : InputSections) {
241       if (Sec->Assigned)
242         continue;
243 
244       if (!Sec->Live) {
245         reportDiscarded(Sec);
246         continue;
247       }
248 
249       // For -emit-relocs we have to ignore entries like
250       //   .rela.dyn : { *(.rela.data) }
251       // which are common because they are in the default bfd script.
252       if (Sec->Type == SHT_REL || Sec->Type == SHT_RELA)
253         continue;
254 
255       StringRef Filename = basename(Sec);
256       if (!Cmd->FilePat.match(Filename) ||
257           Pat.ExcludedFilePat.match(Filename) ||
258           !Pat.SectionPat.match(Sec->Name))
259         continue;
260 
261       Ret.push_back(cast<InputSection>(Sec));
262       Sec->Assigned = true;
263     }
264 
265     // Sort sections as instructed by SORT-family commands and --sort-section
266     // option. Because SORT-family commands can be nested at most two depth
267     // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command
268     // line option is respected even if a SORT command is given, the exact
269     // behavior we have here is a bit complicated. Here are the rules.
270     //
271     // 1. If two SORT commands are given, --sort-section is ignored.
272     // 2. If one SORT command is given, and if it is not SORT_NONE,
273     //    --sort-section is handled as an inner SORT command.
274     // 3. If one SORT command is given, and if it is SORT_NONE, don't sort.
275     // 4. If no SORT command is given, sort according to --sort-section.
276     InputSection **Begin = Ret.data() + SizeBefore;
277     InputSection **End = Ret.data() + Ret.size();
278     if (Pat.SortOuter != SortSectionPolicy::None) {
279       if (Pat.SortInner == SortSectionPolicy::Default)
280         sortSections(Begin, End, Config->SortSection);
281       else
282         sortSections(Begin, End, Pat.SortInner);
283       sortSections(Begin, End, Pat.SortOuter);
284     }
285   }
286   return Ret;
287 }
288 
289 void LinkerScript::discard(ArrayRef<InputSectionBase *> V) {
290   for (InputSectionBase *S : V) {
291     S->Live = false;
292     if (S == InX::ShStrTab || S == InX::Dynamic || S == InX::DynSymTab ||
293         S == InX::DynStrTab)
294       error("discarding " + S->Name + " section is not allowed");
295     discard(S->DependentSections);
296   }
297 }
298 
299 std::vector<InputSectionBase *>
300 LinkerScript::createInputSectionList(OutputSection &OutCmd) {
301   std::vector<InputSectionBase *> Ret;
302 
303   for (BaseCommand *Base : OutCmd.Commands) {
304     auto *Cmd = dyn_cast<InputSectionDescription>(Base);
305     if (!Cmd)
306       continue;
307 
308     Cmd->Sections = computeInputSections(Cmd);
309     Ret.insert(Ret.end(), Cmd->Sections.begin(), Cmd->Sections.end());
310   }
311 
312   return Ret;
313 }
314 
315 void LinkerScript::processCommands(OutputSectionFactory &Factory) {
316   // A symbol can be assigned before any section is mentioned in the linker
317   // script. In an DSO, the symbol values are addresses, so the only important
318   // section values are:
319   // * SHN_UNDEF
320   // * SHN_ABS
321   // * Any value meaning a regular section.
322   // To handle that, create a dummy aether section that fills the void before
323   // the linker scripts switches to another section. It has an index of one
324   // which will map to whatever the first actual section is.
325   Aether = make<OutputSection>("", 0, SHF_ALLOC);
326   Aether->SectionIndex = 1;
327   auto State = make_unique<AddressState>(Opt);
328   // CurAddressState captures the local AddressState and makes it accessible
329   // deliberately. This is needed as there are some cases where we cannot just
330   // thread the current state through to a lambda function created by the
331   // script parser.
332   CurAddressState = State.get();
333   CurAddressState->OutSec = Aether;
334   Dot = 0;
335 
336   for (size_t I = 0; I < Opt.Commands.size(); ++I) {
337     // Handle symbol assignments outside of any output section.
338     if (auto *Cmd = dyn_cast<SymbolAssignment>(Opt.Commands[I])) {
339       addSymbol(Cmd);
340       continue;
341     }
342 
343     if (auto *Sec = dyn_cast<OutputSection>(Opt.Commands[I])) {
344       std::vector<InputSectionBase *> V = createInputSectionList(*Sec);
345 
346       // The output section name `/DISCARD/' is special.
347       // Any input section assigned to it is discarded.
348       if (Sec->Name == "/DISCARD/") {
349         discard(V);
350         continue;
351       }
352 
353       // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive
354       // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input
355       // sections satisfy a given constraint. If not, a directive is handled
356       // as if it wasn't present from the beginning.
357       //
358       // Because we'll iterate over Commands many more times, the easiest
359       // way to "make it as if it wasn't present" is to just remove it.
360       if (!matchConstraints(V, Sec->Constraint)) {
361         for (InputSectionBase *S : V)
362           S->Assigned = false;
363         Opt.Commands.erase(Opt.Commands.begin() + I);
364         --I;
365         continue;
366       }
367 
368       // A directive may contain symbol definitions like this:
369       // ".foo : { ...; bar = .; }". Handle them.
370       for (BaseCommand *Base : Sec->Commands)
371         if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base))
372           addSymbol(OutCmd);
373 
374       // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign
375       // is given, input sections are aligned to that value, whether the
376       // given value is larger or smaller than the original section alignment.
377       if (Sec->SubalignExpr) {
378         uint32_t Subalign = Sec->SubalignExpr().getValue();
379         for (InputSectionBase *S : V)
380           S->Alignment = Subalign;
381       }
382 
383       // Add input sections to an output section.
384       for (InputSectionBase *S : V)
385         Factory.addInputSec(S, Sec->Name, Sec);
386       assert(Sec->SectionIndex == INT_MAX);
387       Sec->SectionIndex = I;
388       if (Sec->Noload)
389         Sec->Type = SHT_NOBITS;
390     }
391   }
392   CurAddressState = nullptr;
393 }
394 
395 void LinkerScript::fabricateDefaultCommands() {
396   // Define start address
397   uint64_t StartAddr = -1;
398 
399   // The Sections with -T<section> have been sorted in order of ascending
400   // address. We must lower StartAddr if the lowest -T<section address> as
401   // calls to setDot() must be monotonically increasing.
402   for (auto &KV : Config->SectionStartMap)
403     StartAddr = std::min(StartAddr, KV.second);
404 
405   Opt.Commands.insert(Opt.Commands.begin(),
406                       make<SymbolAssignment>(".",
407                                              [=] {
408                                                return std::min(
409                                                    StartAddr,
410                                                    Config->ImageBase +
411                                                        elf::getHeaderSize());
412                                              },
413                                              ""));
414 }
415 
416 // Add sections that didn't match any sections command.
417 void LinkerScript::addOrphanSections(OutputSectionFactory &Factory) {
418   unsigned NumCommands = Opt.Commands.size();
419   for (InputSectionBase *S : InputSections) {
420     if (!S->Live || S->Parent)
421       continue;
422     StringRef Name = getOutputSectionName(S->Name);
423     auto End = Opt.Commands.begin() + NumCommands;
424     auto I = std::find_if(Opt.Commands.begin(), End, [&](BaseCommand *Base) {
425       if (auto *Sec = dyn_cast<OutputSection>(Base))
426         return Sec->Name == Name;
427       return false;
428     });
429     if (I == End) {
430       Factory.addInputSec(S, Name);
431       assert(S->getOutputSection()->SectionIndex == INT_MAX);
432     } else {
433       OutputSection *Sec = cast<OutputSection>(*I);
434       Factory.addInputSec(S, Name, Sec);
435       unsigned Index = std::distance(Opt.Commands.begin(), I);
436       assert(Sec->SectionIndex == INT_MAX || Sec->SectionIndex == Index);
437       Sec->SectionIndex = Index;
438     }
439   }
440 }
441 
442 uint64_t LinkerScript::advance(uint64_t Size, unsigned Align) {
443   bool IsTbss = (CurAddressState->OutSec->Flags & SHF_TLS) &&
444                 CurAddressState->OutSec->Type == SHT_NOBITS;
445   uint64_t Start = IsTbss ? Dot + CurAddressState->ThreadBssOffset : Dot;
446   Start = alignTo(Start, Align);
447   uint64_t End = Start + Size;
448 
449   if (IsTbss)
450     CurAddressState->ThreadBssOffset = End - Dot;
451   else
452     Dot = End;
453   return End;
454 }
455 
456 void LinkerScript::output(InputSection *S) {
457   uint64_t Before = advance(0, 1);
458   uint64_t Pos = advance(S->getSize(), S->Alignment);
459   S->OutSecOff = Pos - S->getSize() - CurAddressState->OutSec->Addr;
460 
461   // Update output section size after adding each section. This is so that
462   // SIZEOF works correctly in the case below:
463   // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) }
464   CurAddressState->OutSec->Size = Pos - CurAddressState->OutSec->Addr;
465 
466   // If there is a memory region associated with this input section, then
467   // place the section in that region and update the region index.
468   if (CurAddressState->MemRegion) {
469     uint64_t &CurOffset =
470         CurAddressState->MemRegionOffset[CurAddressState->MemRegion];
471     CurOffset += Pos - Before;
472     uint64_t CurSize = CurOffset - CurAddressState->MemRegion->Origin;
473     if (CurSize > CurAddressState->MemRegion->Length) {
474       uint64_t OverflowAmt = CurSize - CurAddressState->MemRegion->Length;
475       error("section '" + CurAddressState->OutSec->Name +
476             "' will not fit in region '" + CurAddressState->MemRegion->Name +
477             "': overflowed by " + Twine(OverflowAmt) + " bytes");
478     }
479   }
480 }
481 
482 void LinkerScript::switchTo(OutputSection *Sec) {
483   if (CurAddressState->OutSec == Sec)
484     return;
485 
486   CurAddressState->OutSec = Sec;
487   CurAddressState->OutSec->Addr =
488       advance(0, CurAddressState->OutSec->Alignment);
489 
490   // If neither AT nor AT> is specified for an allocatable section, the linker
491   // will set the LMA such that the difference between VMA and LMA for the
492   // section is the same as the preceding output section in the same region
493   // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html
494   if (CurAddressState->LMAOffset)
495     CurAddressState->OutSec->LMAOffset = CurAddressState->LMAOffset();
496 }
497 
498 void LinkerScript::process(BaseCommand &Base) {
499   // This handles the assignments to symbol or to the dot.
500   if (auto *Cmd = dyn_cast<SymbolAssignment>(&Base)) {
501     assignSymbol(Cmd, true);
502     return;
503   }
504 
505   // Handle BYTE(), SHORT(), LONG(), or QUAD().
506   if (auto *Cmd = dyn_cast<BytesDataCommand>(&Base)) {
507     Cmd->Offset = Dot - CurAddressState->OutSec->Addr;
508     Dot += Cmd->Size;
509     CurAddressState->OutSec->Size = Dot - CurAddressState->OutSec->Addr;
510     return;
511   }
512 
513   // Handle ASSERT().
514   if (auto *Cmd = dyn_cast<AssertCommand>(&Base)) {
515     Cmd->Expression();
516     return;
517   }
518 
519   // Handle a single input section description command.
520   // It calculates and assigns the offsets for each section and also
521   // updates the output section size.
522   auto &Cmd = cast<InputSectionDescription>(Base);
523   for (InputSection *Sec : Cmd.Sections) {
524     // We tentatively added all synthetic sections at the beginning and removed
525     // empty ones afterwards (because there is no way to know whether they were
526     // going be empty or not other than actually running linker scripts.)
527     // We need to ignore remains of empty sections.
528     if (auto *S = dyn_cast<SyntheticSection>(Sec))
529       if (S->empty())
530         continue;
531 
532     if (!Sec->Live)
533       continue;
534     assert(CurAddressState->OutSec == Sec->getParent());
535     output(Sec);
536   }
537 }
538 
539 // This function searches for a memory region to place the given output
540 // section in. If found, a pointer to the appropriate memory region is
541 // returned. Otherwise, a nullptr is returned.
542 MemoryRegion *LinkerScript::findMemoryRegion(OutputSection *Sec) {
543   // If a memory region name was specified in the output section command,
544   // then try to find that region first.
545   if (!Sec->MemoryRegionName.empty()) {
546     auto It = Opt.MemoryRegions.find(Sec->MemoryRegionName);
547     if (It != Opt.MemoryRegions.end())
548       return &It->second;
549     error("memory region '" + Sec->MemoryRegionName + "' not declared");
550     return nullptr;
551   }
552 
553   // If at least one memory region is defined, all sections must
554   // belong to some memory region. Otherwise, we don't need to do
555   // anything for memory regions.
556   if (Opt.MemoryRegions.empty())
557     return nullptr;
558 
559   // See if a region can be found by matching section flags.
560   for (auto &Pair : Opt.MemoryRegions) {
561     MemoryRegion &M = Pair.second;
562     if ((M.Flags & Sec->Flags) && (M.NegFlags & Sec->Flags) == 0)
563       return &M;
564   }
565 
566   // Otherwise, no suitable region was found.
567   if (Sec->Flags & SHF_ALLOC)
568     error("no memory region specified for section '" + Sec->Name + "'");
569   return nullptr;
570 }
571 
572 // This function assigns offsets to input sections and an output section
573 // for a single sections command (e.g. ".text { *(.text); }").
574 void LinkerScript::assignOffsets(OutputSection *Sec) {
575   if (!(Sec->Flags & SHF_ALLOC))
576     Dot = 0;
577   else if (Sec->AddrExpr)
578     setDot(Sec->AddrExpr, Sec->Location, false);
579 
580   if (Sec->LMAExpr) {
581     uint64_t D = Dot;
582     CurAddressState->LMAOffset = [=] { return Sec->LMAExpr().getValue() - D; };
583   }
584 
585   CurAddressState->MemRegion = Sec->MemRegion;
586   if (CurAddressState->MemRegion)
587     Dot = CurAddressState->MemRegionOffset[CurAddressState->MemRegion];
588   switchTo(Sec);
589 
590   // We do not support custom layout for compressed debug sectons.
591   // At this point we already know their size and have compressed content.
592   if (CurAddressState->OutSec->Flags & SHF_COMPRESSED)
593     return;
594 
595   for (BaseCommand *C : Sec->Commands)
596     process(*C);
597 }
598 
599 void LinkerScript::removeEmptyCommands() {
600   // It is common practice to use very generic linker scripts. So for any
601   // given run some of the output sections in the script will be empty.
602   // We could create corresponding empty output sections, but that would
603   // clutter the output.
604   // We instead remove trivially empty sections. The bfd linker seems even
605   // more aggressive at removing them.
606   auto Pos = std::remove_if(Opt.Commands.begin(), Opt.Commands.end(),
607                             [&](BaseCommand *Base) {
608                               if (auto *Sec = dyn_cast<OutputSection>(Base))
609                                 return !Sec->Live;
610                               return false;
611                             });
612   Opt.Commands.erase(Pos, Opt.Commands.end());
613 }
614 
615 static bool isAllSectionDescription(const OutputSection &Cmd) {
616   for (BaseCommand *Base : Cmd.Commands)
617     if (!isa<InputSectionDescription>(*Base))
618       return false;
619   return true;
620 }
621 
622 void LinkerScript::adjustSectionsBeforeSorting() {
623   // If the output section contains only symbol assignments, create a
624   // corresponding output section. The bfd linker seems to only create them if
625   // '.' is assigned to, but creating these section should not have any bad
626   // consequeces and gives us a section to put the symbol in.
627   uint64_t Flags = SHF_ALLOC;
628 
629   for (int I = 0, E = Opt.Commands.size(); I != E; ++I) {
630     auto *Sec = dyn_cast<OutputSection>(Opt.Commands[I]);
631     if (!Sec)
632       continue;
633     if (Sec->Live) {
634       Flags = Sec->Flags;
635       continue;
636     }
637 
638     if (isAllSectionDescription(*Sec))
639       continue;
640 
641     Sec->Live = true;
642     Sec->SectionIndex = I;
643     Sec->Flags = Flags;
644   }
645 }
646 
647 void LinkerScript::adjustSectionsAfterSorting() {
648   // Try and find an appropriate memory region to assign offsets in.
649   for (BaseCommand *Base : Opt.Commands) {
650     if (auto *Sec = dyn_cast<OutputSection>(Base)) {
651       Sec->MemRegion = findMemoryRegion(Sec);
652       // Handle align (e.g. ".foo : ALIGN(16) { ... }").
653       if (Sec->AlignExpr)
654         Sec->updateAlignment(Sec->AlignExpr().getValue());
655     }
656   }
657 
658   // If output section command doesn't specify any segments,
659   // and we haven't previously assigned any section to segment,
660   // then we simply assign section to the very first load segment.
661   // Below is an example of such linker script:
662   // PHDRS { seg PT_LOAD; }
663   // SECTIONS { .aaa : { *(.aaa) } }
664   std::vector<StringRef> DefPhdrs;
665   auto FirstPtLoad =
666       std::find_if(Opt.PhdrsCommands.begin(), Opt.PhdrsCommands.end(),
667                    [](const PhdrsCommand &Cmd) { return Cmd.Type == PT_LOAD; });
668   if (FirstPtLoad != Opt.PhdrsCommands.end())
669     DefPhdrs.push_back(FirstPtLoad->Name);
670 
671   // Walk the commands and propagate the program headers to commands that don't
672   // explicitly specify them.
673   for (BaseCommand *Base : Opt.Commands) {
674     auto *Sec = dyn_cast<OutputSection>(Base);
675     if (!Sec)
676       continue;
677 
678     if (Sec->Phdrs.empty()) {
679       // To match the bfd linker script behaviour, only propagate program
680       // headers to sections that are allocated.
681       if (Sec->Flags & SHF_ALLOC)
682         Sec->Phdrs = DefPhdrs;
683     } else {
684       DefPhdrs = Sec->Phdrs;
685     }
686   }
687 
688   removeEmptyCommands();
689 }
690 
691 void LinkerScript::allocateHeaders(std::vector<PhdrEntry *> &Phdrs) {
692   uint64_t Min = std::numeric_limits<uint64_t>::max();
693   for (OutputSection *Sec : OutputSections)
694     if (Sec->Flags & SHF_ALLOC)
695       Min = std::min<uint64_t>(Min, Sec->Addr);
696 
697   auto It = llvm::find_if(
698       Phdrs, [](const PhdrEntry *E) { return E->p_type == PT_LOAD; });
699   if (It == Phdrs.end())
700     return;
701   PhdrEntry *FirstPTLoad = *It;
702 
703   uint64_t HeaderSize = getHeaderSize();
704   if (HeaderSize <= Min || Script->hasPhdrsCommands()) {
705     Min = alignDown(Min - HeaderSize, Config->MaxPageSize);
706     Out::ElfHeader->Addr = Min;
707     Out::ProgramHeaders->Addr = Min + Out::ElfHeader->Size;
708     return;
709   }
710 
711   assert(FirstPTLoad->First == Out::ElfHeader);
712   OutputSection *ActualFirst = nullptr;
713   for (OutputSection *Sec : OutputSections) {
714     if (Sec->FirstInPtLoad == Out::ElfHeader) {
715       ActualFirst = Sec;
716       break;
717     }
718   }
719   if (ActualFirst) {
720     for (OutputSection *Sec : OutputSections)
721       if (Sec->FirstInPtLoad == Out::ElfHeader)
722         Sec->FirstInPtLoad = ActualFirst;
723     FirstPTLoad->First = ActualFirst;
724   } else {
725     Phdrs.erase(It);
726   }
727 
728   auto PhdrI = llvm::find_if(
729       Phdrs, [](const PhdrEntry *E) { return E->p_type == PT_PHDR; });
730   if (PhdrI != Phdrs.end())
731     Phdrs.erase(PhdrI);
732 }
733 
734 LinkerScript::AddressState::AddressState(const ScriptConfiguration &Opt) {
735   for (auto &MRI : Opt.MemoryRegions) {
736     const MemoryRegion *MR = &MRI.second;
737     MemRegionOffset[MR] = MR->Origin;
738   }
739 }
740 
741 void LinkerScript::assignAddresses() {
742   // Assign addresses as instructed by linker script SECTIONS sub-commands.
743   Dot = 0;
744   auto State = make_unique<AddressState>(Opt);
745   // CurAddressState captures the local AddressState and makes it accessible
746   // deliberately. This is needed as there are some cases where we cannot just
747   // thread the current state through to a lambda function created by the
748   // script parser.
749   CurAddressState = State.get();
750   ErrorOnMissingSection = true;
751   switchTo(Aether);
752 
753   for (BaseCommand *Base : Opt.Commands) {
754     if (auto *Cmd = dyn_cast<SymbolAssignment>(Base)) {
755       assignSymbol(Cmd, false);
756       continue;
757     }
758 
759     if (auto *Cmd = dyn_cast<AssertCommand>(Base)) {
760       Cmd->Expression();
761       continue;
762     }
763 
764     assignOffsets(cast<OutputSection>(Base));
765   }
766   CurAddressState = nullptr;
767 }
768 
769 // Creates program headers as instructed by PHDRS linker script command.
770 std::vector<PhdrEntry *> LinkerScript::createPhdrs() {
771   std::vector<PhdrEntry *> Ret;
772 
773   // Process PHDRS and FILEHDR keywords because they are not
774   // real output sections and cannot be added in the following loop.
775   for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) {
776     PhdrEntry *Phdr =
777         make<PhdrEntry>(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags);
778 
779     if (Cmd.HasFilehdr)
780       Phdr->add(Out::ElfHeader);
781     if (Cmd.HasPhdrs)
782       Phdr->add(Out::ProgramHeaders);
783 
784     if (Cmd.LMAExpr) {
785       Phdr->p_paddr = Cmd.LMAExpr().getValue();
786       Phdr->HasLMA = true;
787     }
788     Ret.push_back(Phdr);
789   }
790 
791   // Add output sections to program headers.
792   for (OutputSection *Sec : OutputSections) {
793     // Assign headers specified by linker script
794     for (size_t Id : getPhdrIndices(Sec)) {
795       Ret[Id]->add(Sec);
796       if (Opt.PhdrsCommands[Id].Flags == UINT_MAX)
797         Ret[Id]->p_flags |= Sec->getPhdrFlags();
798     }
799   }
800   return Ret;
801 }
802 
803 bool LinkerScript::ignoreInterpSection() {
804   // Ignore .interp section in case we have PHDRS specification
805   // and PT_INTERP isn't listed.
806   if (Opt.PhdrsCommands.empty())
807     return false;
808   for (PhdrsCommand &Cmd : Opt.PhdrsCommands)
809     if (Cmd.Type == PT_INTERP)
810       return false;
811   return true;
812 }
813 
814 ExprValue LinkerScript::getSymbolValue(const Twine &Loc, StringRef S) {
815   if (S == ".")
816     return {CurAddressState->OutSec, Dot - CurAddressState->OutSec->Addr, Loc};
817   if (SymbolBody *B = Symtab->find(S)) {
818     if (auto *D = dyn_cast<DefinedRegular>(B))
819       return {D->Section, D->Value, Loc};
820     if (auto *C = dyn_cast<DefinedCommon>(B))
821       return {InX::Common, C->Offset, Loc};
822   }
823   error(Loc + ": symbol not found: " + S);
824   return 0;
825 }
826 
827 bool LinkerScript::isDefined(StringRef S) { return Symtab->find(S) != nullptr; }
828 
829 static const size_t NoPhdr = -1;
830 
831 // Returns indices of ELF headers containing specific section. Each index is a
832 // zero based number of ELF header listed within PHDRS {} script block.
833 std::vector<size_t> LinkerScript::getPhdrIndices(OutputSection *Cmd) {
834   std::vector<size_t> Ret;
835   for (StringRef PhdrName : Cmd->Phdrs) {
836     size_t Index = getPhdrIndex(Cmd->Location, PhdrName);
837     if (Index != NoPhdr)
838       Ret.push_back(Index);
839   }
840   return Ret;
841 }
842 
843 // Returns the index of the segment named PhdrName if found otherwise
844 // NoPhdr. When not found, if PhdrName is not the special case value 'NONE'
845 // (which can be used to explicitly specify that a section isn't assigned to a
846 // segment) then error.
847 size_t LinkerScript::getPhdrIndex(const Twine &Loc, StringRef PhdrName) {
848   size_t I = 0;
849   for (PhdrsCommand &Cmd : Opt.PhdrsCommands) {
850     if (Cmd.Name == PhdrName)
851       return I;
852     ++I;
853   }
854   if (PhdrName != "NONE")
855     error(Loc + ": section header '" + PhdrName + "' is not listed in PHDRS");
856   return NoPhdr;
857 }
858