1 //===- LinkerScript.cpp ---------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the parser/evaluator of the linker script.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "LinkerScript.h"
15 #include "Config.h"
16 #include "Driver.h"
17 #include "InputSection.h"
18 #include "Memory.h"
19 #include "OutputSections.h"
20 #include "ScriptLexer.h"
21 #include "Strings.h"
22 #include "SymbolTable.h"
23 #include "Symbols.h"
24 #include "SyntheticSections.h"
25 #include "Target.h"
26 #include "Writer.h"
27 #include "llvm/ADT/STLExtras.h"
28 #include "llvm/ADT/SmallString.h"
29 #include "llvm/ADT/StringRef.h"
30 #include "llvm/ADT/StringSwitch.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/ELF.h"
33 #include "llvm/Support/Endian.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/Path.h"
38 #include <algorithm>
39 #include <cassert>
40 #include <cstddef>
41 #include <cstdint>
42 #include <iterator>
43 #include <limits>
44 #include <memory>
45 #include <string>
46 #include <tuple>
47 #include <vector>
48 
49 using namespace llvm;
50 using namespace llvm::ELF;
51 using namespace llvm::object;
52 using namespace llvm::support::endian;
53 using namespace lld;
54 using namespace lld::elf;
55 
56 LinkerScriptBase *elf::ScriptBase;
57 ScriptConfiguration *elf::ScriptConfig;
58 
59 template <class ELFT> static SymbolBody *addRegular(SymbolAssignment *Cmd) {
60   Symbol *Sym;
61   uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT;
62   std::tie(Sym, std::ignore) = Symtab<ELFT>::X->insert(
63       Cmd->Name, /*Type*/ 0, Visibility, /*CanOmitFromDynSym*/ false,
64       /*File*/ nullptr);
65   Sym->Binding = STB_GLOBAL;
66   SectionBase *Sec =
67       Cmd->Expression.IsAbsolute() ? nullptr : Cmd->Expression.Section();
68   replaceBody<DefinedRegular>(Sym, Cmd->Name, /*IsLocal=*/false, Visibility,
69                               STT_NOTYPE, 0, 0, Sec, nullptr);
70   return Sym->body();
71 }
72 
73 static bool isUnderSysroot(StringRef Path) {
74   if (Config->Sysroot == "")
75     return false;
76   for (; !Path.empty(); Path = sys::path::parent_path(Path))
77     if (sys::fs::equivalent(Config->Sysroot, Path))
78       return true;
79   return false;
80 }
81 
82 OutputSection *LinkerScriptBase::getOutputSection(const Twine &Loc,
83                                                   StringRef Name) {
84   static OutputSection FakeSec("", 0, 0);
85 
86   for (OutputSection *Sec : *OutputSections)
87     if (Sec->Name == Name)
88       return Sec;
89 
90   error(Loc + ": undefined section " + Name);
91   return &FakeSec;
92 }
93 
94 // This function is essentially the same as getOutputSection(Name)->Size,
95 // but it won't print out an error message if a given section is not found.
96 //
97 // Linker script does not create an output section if its content is empty.
98 // We want to allow SIZEOF(.foo) where .foo is a section which happened to
99 // be empty. That is why this function is different from getOutputSection().
100 uint64_t LinkerScriptBase::getOutputSectionSize(StringRef Name) {
101   for (OutputSection *Sec : *OutputSections)
102     if (Sec->Name == Name)
103       return Sec->Size;
104   return 0;
105 }
106 
107 void LinkerScriptBase::setDot(Expr E, const Twine &Loc, bool InSec) {
108   uint64_t Val = E();
109   if (Val < Dot) {
110     if (InSec)
111       error(Loc + ": unable to move location counter backward for: " +
112             CurOutSec->Name);
113     else
114       error(Loc + ": unable to move location counter backward");
115   }
116   Dot = Val;
117   // Update to location counter means update to section size.
118   if (InSec)
119     CurOutSec->Size = Dot - CurOutSec->Addr;
120 }
121 
122 // Sets value of a symbol. Two kinds of symbols are processed: synthetic
123 // symbols, whose value is an offset from beginning of section and regular
124 // symbols whose value is absolute.
125 void LinkerScriptBase::assignSymbol(SymbolAssignment *Cmd, bool InSec) {
126   if (Cmd->Name == ".") {
127     setDot(Cmd->Expression, Cmd->Location, InSec);
128     return;
129   }
130 
131   if (!Cmd->Sym)
132     return;
133 
134   auto *Sym = cast<DefinedRegular>(Cmd->Sym);
135   Sym->Value = Cmd->Expression();
136   if (!Cmd->Expression.IsAbsolute()) {
137     Sym->Section = Cmd->Expression.Section();
138     if (auto *Sec = dyn_cast_or_null<OutputSection>(Sym->Section))
139       if (Sec->Flags & SHF_ALLOC)
140         Sym->Value -= Sec->Addr;
141   }
142 }
143 
144 template <class ELFT>
145 void LinkerScript<ELFT>::addSymbol(SymbolAssignment *Cmd) {
146   if (Cmd->Name == ".")
147     return;
148 
149   // If a symbol was in PROVIDE(), we need to define it only when
150   // it is a referenced undefined symbol.
151   SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name);
152   if (Cmd->Provide && (!B || B->isDefined()))
153     return;
154 
155   Cmd->Sym = addRegular<ELFT>(Cmd);
156 
157   // If there are sections, then let the value be assigned later in
158   // `assignAddresses`.
159   if (!ScriptConfig->HasSections)
160     assignSymbol(Cmd);
161 }
162 
163 bool SymbolAssignment::classof(const BaseCommand *C) {
164   return C->Kind == AssignmentKind;
165 }
166 
167 bool OutputSectionCommand::classof(const BaseCommand *C) {
168   return C->Kind == OutputSectionKind;
169 }
170 
171 bool InputSectionDescription::classof(const BaseCommand *C) {
172   return C->Kind == InputSectionKind;
173 }
174 
175 bool AssertCommand::classof(const BaseCommand *C) {
176   return C->Kind == AssertKind;
177 }
178 
179 bool BytesDataCommand::classof(const BaseCommand *C) {
180   return C->Kind == BytesDataKind;
181 }
182 
183 template <class ELFT> LinkerScript<ELFT>::LinkerScript() = default;
184 template <class ELFT> LinkerScript<ELFT>::~LinkerScript() = default;
185 
186 static StringRef basename(InputSectionBase *S) {
187   if (S->File)
188     return sys::path::filename(S->File->getName());
189   return "";
190 }
191 
192 bool LinkerScriptBase::shouldKeep(InputSectionBase *S) {
193   for (InputSectionDescription *ID : Opt.KeptSections)
194     if (ID->FilePat.match(basename(S)))
195       for (SectionPattern &P : ID->SectionPatterns)
196         if (P.SectionPat.match(S->Name))
197           return true;
198   return false;
199 }
200 
201 static bool comparePriority(InputSectionBase *A, InputSectionBase *B) {
202   return getPriority(A->Name) < getPriority(B->Name);
203 }
204 
205 static bool compareName(InputSectionBase *A, InputSectionBase *B) {
206   return A->Name < B->Name;
207 }
208 
209 static bool compareAlignment(InputSectionBase *A, InputSectionBase *B) {
210   // ">" is not a mistake. Larger alignments are placed before smaller
211   // alignments in order to reduce the amount of padding necessary.
212   // This is compatible with GNU.
213   return A->Alignment > B->Alignment;
214 }
215 
216 static std::function<bool(InputSectionBase *, InputSectionBase *)>
217 getComparator(SortSectionPolicy K) {
218   switch (K) {
219   case SortSectionPolicy::Alignment:
220     return compareAlignment;
221   case SortSectionPolicy::Name:
222     return compareName;
223   case SortSectionPolicy::Priority:
224     return comparePriority;
225   default:
226     llvm_unreachable("unknown sort policy");
227   }
228 }
229 
230 static bool matchConstraints(ArrayRef<InputSectionBase *> Sections,
231                              ConstraintKind Kind) {
232   if (Kind == ConstraintKind::NoConstraint)
233     return true;
234   bool IsRW = llvm::any_of(Sections, [=](InputSectionBase *Sec2) {
235     auto *Sec = static_cast<InputSectionBase *>(Sec2);
236     return Sec->Flags & SHF_WRITE;
237   });
238   return (IsRW && Kind == ConstraintKind::ReadWrite) ||
239          (!IsRW && Kind == ConstraintKind::ReadOnly);
240 }
241 
242 static void sortSections(InputSectionBase **Begin, InputSectionBase **End,
243                          SortSectionPolicy K) {
244   if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None)
245     std::stable_sort(Begin, End, getComparator(K));
246 }
247 
248 // Compute and remember which sections the InputSectionDescription matches.
249 void LinkerScriptBase::computeInputSections(InputSectionDescription *I) {
250   // Collects all sections that satisfy constraints of I
251   // and attach them to I.
252   for (SectionPattern &Pat : I->SectionPatterns) {
253     size_t SizeBefore = I->Sections.size();
254 
255     for (InputSectionBase *S : InputSections) {
256       if (S->Assigned)
257         continue;
258       // For -emit-relocs we have to ignore entries like
259       //   .rela.dyn : { *(.rela.data) }
260       // which are common because they are in the default bfd script.
261       if (S->Type == SHT_REL || S->Type == SHT_RELA)
262         continue;
263 
264       StringRef Filename = basename(S);
265       if (!I->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename))
266         continue;
267       if (!Pat.SectionPat.match(S->Name))
268         continue;
269       I->Sections.push_back(S);
270       S->Assigned = true;
271     }
272 
273     // Sort sections as instructed by SORT-family commands and --sort-section
274     // option. Because SORT-family commands can be nested at most two depth
275     // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command
276     // line option is respected even if a SORT command is given, the exact
277     // behavior we have here is a bit complicated. Here are the rules.
278     //
279     // 1. If two SORT commands are given, --sort-section is ignored.
280     // 2. If one SORT command is given, and if it is not SORT_NONE,
281     //    --sort-section is handled as an inner SORT command.
282     // 3. If one SORT command is given, and if it is SORT_NONE, don't sort.
283     // 4. If no SORT command is given, sort according to --sort-section.
284     InputSectionBase **Begin = I->Sections.data() + SizeBefore;
285     InputSectionBase **End = I->Sections.data() + I->Sections.size();
286     if (Pat.SortOuter != SortSectionPolicy::None) {
287       if (Pat.SortInner == SortSectionPolicy::Default)
288         sortSections(Begin, End, Config->SortSection);
289       else
290         sortSections(Begin, End, Pat.SortInner);
291       sortSections(Begin, End, Pat.SortOuter);
292     }
293   }
294 }
295 
296 template <class ELFT>
297 void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase *> V) {
298   for (InputSectionBase *S : V) {
299     S->Live = false;
300     if (S == In<ELFT>::ShStrTab)
301       error("discarding .shstrtab section is not allowed");
302     discard(S->DependentSections);
303   }
304 }
305 
306 std::vector<InputSectionBase *>
307 LinkerScriptBase::createInputSectionList(OutputSectionCommand &OutCmd) {
308   std::vector<InputSectionBase *> Ret;
309 
310   for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) {
311     auto *Cmd = dyn_cast<InputSectionDescription>(Base.get());
312     if (!Cmd)
313       continue;
314     computeInputSections(Cmd);
315     for (InputSectionBase *S : Cmd->Sections)
316       Ret.push_back(static_cast<InputSectionBase *>(S));
317   }
318 
319   return Ret;
320 }
321 
322 template <class ELFT>
323 void LinkerScript<ELFT>::processCommands(OutputSectionFactory &Factory) {
324   // A symbol can be assigned before any section is mentioned in the linker
325   // script. In an DSO, the symbol values are addresses, so the only important
326   // section values are:
327   // * SHN_UNDEF
328   // * SHN_ABS
329   // * Any value meaning a regular section.
330   // To handle that, create a dummy aether section that fills the void before
331   // the linker scripts switches to another section. It has an index of one
332   // which will map to whatever the first actual section is.
333   Aether = make<OutputSection>("", 0, SHF_ALLOC);
334   Aether->SectionIndex = 1;
335   CurOutSec = Aether;
336 
337   for (unsigned I = 0; I < Opt.Commands.size(); ++I) {
338     auto Iter = Opt.Commands.begin() + I;
339     const std::unique_ptr<BaseCommand> &Base1 = *Iter;
340 
341     // Handle symbol assignments outside of any output section.
342     if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) {
343       addSymbol(Cmd);
344       continue;
345     }
346 
347     if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) {
348       // If we don't have SECTIONS then output sections have already been
349       // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses
350       // will not be called, so ASSERT should be evaluated now.
351       if (!Opt.HasSections)
352         Cmd->Expression();
353       continue;
354     }
355 
356     if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) {
357       std::vector<InputSectionBase *> V = createInputSectionList(*Cmd);
358 
359       // The output section name `/DISCARD/' is special.
360       // Any input section assigned to it is discarded.
361       if (Cmd->Name == "/DISCARD/") {
362         discard(V);
363         continue;
364       }
365 
366       // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive
367       // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input
368       // sections satisfy a given constraint. If not, a directive is handled
369       // as if it wasn't present from the beginning.
370       //
371       // Because we'll iterate over Commands many more times, the easiest
372       // way to "make it as if it wasn't present" is to just remove it.
373       if (!matchConstraints(V, Cmd->Constraint)) {
374         for (InputSectionBase *S : V)
375           S->Assigned = false;
376         Opt.Commands.erase(Iter);
377         --I;
378         continue;
379       }
380 
381       // A directive may contain symbol definitions like this:
382       // ".foo : { ...; bar = .; }". Handle them.
383       for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands)
384         if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get()))
385           addSymbol(OutCmd);
386 
387       // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign
388       // is given, input sections are aligned to that value, whether the
389       // given value is larger or smaller than the original section alignment.
390       if (Cmd->SubalignExpr) {
391         uint32_t Subalign = Cmd->SubalignExpr();
392         for (InputSectionBase *S : V)
393           S->Alignment = Subalign;
394       }
395 
396       // Add input sections to an output section.
397       for (InputSectionBase *S : V)
398         Factory.addInputSec(S, Cmd->Name);
399     }
400   }
401   CurOutSec = nullptr;
402 }
403 
404 // Add sections that didn't match any sections command.
405 void LinkerScriptBase::addOrphanSections(OutputSectionFactory &Factory) {
406   for (InputSectionBase *S : InputSections)
407     if (S->Live && !S->OutSec)
408       Factory.addInputSec(S, getOutputSectionName(S->Name));
409 }
410 
411 static bool isTbss(OutputSection *Sec) {
412   return (Sec->Flags & SHF_TLS) && Sec->Type == SHT_NOBITS;
413 }
414 
415 void LinkerScriptBase::output(InputSection *S) {
416   if (!AlreadyOutputIS.insert(S).second)
417     return;
418   bool IsTbss = isTbss(CurOutSec);
419 
420   uint64_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot;
421   Pos = alignTo(Pos, S->Alignment);
422   S->OutSecOff = Pos - CurOutSec->Addr;
423   Pos += S->getSize();
424 
425   // Update output section size after adding each section. This is so that
426   // SIZEOF works correctly in the case below:
427   // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) }
428   CurOutSec->Size = Pos - CurOutSec->Addr;
429 
430   // If there is a memory region associated with this input section, then
431   // place the section in that region and update the region index.
432   if (CurMemRegion) {
433     CurMemRegion->Offset += CurOutSec->Size;
434     uint64_t CurSize = CurMemRegion->Offset - CurMemRegion->Origin;
435     if (CurSize > CurMemRegion->Length) {
436       uint64_t OverflowAmt = CurSize - CurMemRegion->Length;
437       error("section '" + CurOutSec->Name + "' will not fit in region '" +
438             CurMemRegion->Name + "': overflowed by " + Twine(OverflowAmt) +
439             " bytes");
440     }
441   }
442 
443   if (IsTbss)
444     ThreadBssOffset = Pos - Dot;
445   else
446     Dot = Pos;
447 }
448 
449 void LinkerScriptBase::flush() {
450   assert(CurOutSec);
451   if (!AlreadyOutputOS.insert(CurOutSec).second)
452     return;
453   for (InputSection *I : CurOutSec->Sections)
454     output(I);
455 }
456 
457 void LinkerScriptBase::switchTo(OutputSection *Sec) {
458   if (CurOutSec == Sec)
459     return;
460   if (AlreadyOutputOS.count(Sec))
461     return;
462 
463   CurOutSec = Sec;
464 
465   Dot = alignTo(Dot, CurOutSec->Alignment);
466   CurOutSec->Addr = isTbss(CurOutSec) ? Dot + ThreadBssOffset : Dot;
467 
468   // If neither AT nor AT> is specified for an allocatable section, the linker
469   // will set the LMA such that the difference between VMA and LMA for the
470   // section is the same as the preceding output section in the same region
471   // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html
472   if (LMAOffset)
473     CurOutSec->LMAOffset = LMAOffset();
474 }
475 
476 void LinkerScriptBase::process(BaseCommand &Base) {
477   // This handles the assignments to symbol or to a location counter (.)
478   if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) {
479     assignSymbol(AssignCmd, true);
480     return;
481   }
482 
483   // Handle BYTE(), SHORT(), LONG(), or QUAD().
484   if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) {
485     DataCmd->Offset = Dot - CurOutSec->Addr;
486     Dot += DataCmd->Size;
487     CurOutSec->Size = Dot - CurOutSec->Addr;
488     return;
489   }
490 
491   if (auto *AssertCmd = dyn_cast<AssertCommand>(&Base)) {
492     AssertCmd->Expression();
493     return;
494   }
495 
496   // It handles single input section description command,
497   // calculates and assigns the offsets for each section and also
498   // updates the output section size.
499   auto &ICmd = cast<InputSectionDescription>(Base);
500   for (InputSectionBase *IB : ICmd.Sections) {
501     // We tentatively added all synthetic sections at the beginning and removed
502     // empty ones afterwards (because there is no way to know whether they were
503     // going be empty or not other than actually running linker scripts.)
504     // We need to ignore remains of empty sections.
505     if (auto *Sec = dyn_cast<SyntheticSection>(IB))
506       if (Sec->empty())
507         continue;
508 
509     if (!IB->Live)
510       continue;
511     assert(CurOutSec == IB->OutSec || AlreadyOutputOS.count(IB->OutSec));
512     output(cast<InputSection>(IB));
513   }
514 }
515 
516 static OutputSection *
517 findSection(StringRef Name, const std::vector<OutputSection *> &Sections) {
518   auto End = Sections.end();
519   auto HasName = [=](OutputSection *Sec) { return Sec->Name == Name; };
520   auto I = std::find_if(Sections.begin(), End, HasName);
521   std::vector<OutputSection *> Ret;
522   if (I == End)
523     return nullptr;
524   assert(std::find_if(I + 1, End, HasName) == End);
525   return *I;
526 }
527 
528 // This function searches for a memory region to place the given output
529 // section in. If found, a pointer to the appropriate memory region is
530 // returned. Otherwise, a nullptr is returned.
531 MemoryRegion *LinkerScriptBase::findMemoryRegion(OutputSectionCommand *Cmd,
532                                                  OutputSection *Sec) {
533   // If a memory region name was specified in the output section command,
534   // then try to find that region first.
535   if (!Cmd->MemoryRegionName.empty()) {
536     auto It = Opt.MemoryRegions.find(Cmd->MemoryRegionName);
537     if (It != Opt.MemoryRegions.end())
538       return &It->second;
539     error("memory region '" + Cmd->MemoryRegionName + "' not declared");
540     return nullptr;
541   }
542 
543   // The memory region name is empty, thus a suitable region must be
544   // searched for in the region map. If the region map is empty, just
545   // return. Note that this check doesn't happen at the very beginning
546   // so that uses of undeclared regions can be caught.
547   if (!Opt.MemoryRegions.size())
548     return nullptr;
549 
550   // See if a region can be found by matching section flags.
551   for (auto &MRI : Opt.MemoryRegions) {
552     MemoryRegion &MR = MRI.second;
553     if ((MR.Flags & Sec->Flags) != 0 && (MR.NegFlags & Sec->Flags) == 0)
554       return &MR;
555   }
556 
557   // Otherwise, no suitable region was found.
558   if (Sec->Flags & SHF_ALLOC)
559     error("no memory region specified for section '" + Sec->Name + "'");
560   return nullptr;
561 }
562 
563 // This function assigns offsets to input sections and an output section
564 // for a single sections command (e.g. ".text { *(.text); }").
565 void LinkerScriptBase::assignOffsets(OutputSectionCommand *Cmd) {
566   OutputSection *Sec = findSection(Cmd->Name, *OutputSections);
567   if (!Sec)
568     return;
569 
570   if (Cmd->AddrExpr && Sec->Flags & SHF_ALLOC)
571     setDot(Cmd->AddrExpr, Cmd->Location);
572 
573   if (Cmd->LMAExpr) {
574     uint64_t D = Dot;
575     LMAOffset = [=] { return Cmd->LMAExpr() - D; };
576   }
577 
578   // Handle align (e.g. ".foo : ALIGN(16) { ... }").
579   if (Cmd->AlignExpr)
580     Sec->updateAlignment(Cmd->AlignExpr());
581 
582   // Try and find an appropriate memory region to assign offsets in.
583   CurMemRegion = findMemoryRegion(Cmd, Sec);
584   if (CurMemRegion)
585     Dot = CurMemRegion->Offset;
586   switchTo(Sec);
587 
588   // Find the last section output location. We will output orphan sections
589   // there so that end symbols point to the correct location.
590   auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(),
591                         [](const std::unique_ptr<BaseCommand> &Cmd) {
592                           return !isa<SymbolAssignment>(*Cmd);
593                         })
594                .base();
595   for (auto I = Cmd->Commands.begin(); I != E; ++I)
596     process(**I);
597   flush();
598   std::for_each(E, Cmd->Commands.end(),
599                 [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); });
600 }
601 
602 void LinkerScriptBase::removeEmptyCommands() {
603   // It is common practice to use very generic linker scripts. So for any
604   // given run some of the output sections in the script will be empty.
605   // We could create corresponding empty output sections, but that would
606   // clutter the output.
607   // We instead remove trivially empty sections. The bfd linker seems even
608   // more aggressive at removing them.
609   auto Pos = std::remove_if(
610       Opt.Commands.begin(), Opt.Commands.end(),
611       [&](const std::unique_ptr<BaseCommand> &Base) {
612         if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()))
613           return !findSection(Cmd->Name, *OutputSections);
614         return false;
615       });
616   Opt.Commands.erase(Pos, Opt.Commands.end());
617 }
618 
619 static bool isAllSectionDescription(const OutputSectionCommand &Cmd) {
620   for (const std::unique_ptr<BaseCommand> &I : Cmd.Commands)
621     if (!isa<InputSectionDescription>(*I))
622       return false;
623   return true;
624 }
625 
626 void LinkerScriptBase::adjustSectionsBeforeSorting() {
627   // If the output section contains only symbol assignments, create a
628   // corresponding output section. The bfd linker seems to only create them if
629   // '.' is assigned to, but creating these section should not have any bad
630   // consequeces and gives us a section to put the symbol in.
631   uint64_t Flags = SHF_ALLOC;
632   uint32_t Type = SHT_NOBITS;
633   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) {
634     auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get());
635     if (!Cmd)
636       continue;
637     if (OutputSection *Sec = findSection(Cmd->Name, *OutputSections)) {
638       Flags = Sec->Flags;
639       Type = Sec->Type;
640       continue;
641     }
642 
643     if (isAllSectionDescription(*Cmd))
644       continue;
645 
646     auto *OutSec = make<OutputSection>(Cmd->Name, Type, Flags);
647     OutputSections->push_back(OutSec);
648   }
649 }
650 
651 void LinkerScriptBase::adjustSectionsAfterSorting() {
652   placeOrphanSections();
653 
654   // If output section command doesn't specify any segments,
655   // and we haven't previously assigned any section to segment,
656   // then we simply assign section to the very first load segment.
657   // Below is an example of such linker script:
658   // PHDRS { seg PT_LOAD; }
659   // SECTIONS { .aaa : { *(.aaa) } }
660   std::vector<StringRef> DefPhdrs;
661   auto FirstPtLoad =
662       std::find_if(Opt.PhdrsCommands.begin(), Opt.PhdrsCommands.end(),
663                    [](const PhdrsCommand &Cmd) { return Cmd.Type == PT_LOAD; });
664   if (FirstPtLoad != Opt.PhdrsCommands.end())
665     DefPhdrs.push_back(FirstPtLoad->Name);
666 
667   // Walk the commands and propagate the program headers to commands that don't
668   // explicitly specify them.
669   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) {
670     auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get());
671     if (!Cmd)
672       continue;
673     if (Cmd->Phdrs.empty())
674       Cmd->Phdrs = DefPhdrs;
675     else
676       DefPhdrs = Cmd->Phdrs;
677   }
678 
679   removeEmptyCommands();
680 }
681 
682 // When placing orphan sections, we want to place them after symbol assignments
683 // so that an orphan after
684 //   begin_foo = .;
685 //   foo : { *(foo) }
686 //   end_foo = .;
687 // doesn't break the intended meaning of the begin/end symbols.
688 // We don't want to go over sections since Writer<ELFT>::sortSections is the
689 // one in charge of deciding the order of the sections.
690 // We don't want to go over alignments, since doing so in
691 //  rx_sec : { *(rx_sec) }
692 //  . = ALIGN(0x1000);
693 //  /* The RW PT_LOAD starts here*/
694 //  rw_sec : { *(rw_sec) }
695 // would mean that the RW PT_LOAD would become unaligned.
696 static bool shouldSkip(const BaseCommand &Cmd) {
697   if (isa<OutputSectionCommand>(Cmd))
698     return false;
699   const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd);
700   if (!Assign)
701     return true;
702   return Assign->Name != ".";
703 }
704 
705 // Orphan sections are sections present in the input files which are
706 // not explicitly placed into the output file by the linker script.
707 //
708 // When the control reaches this function, Opt.Commands contains
709 // output section commands for non-orphan sections only. This function
710 // adds new elements for orphan sections to Opt.Commands so that all
711 // sections are explicitly handled by Opt.Commands.
712 //
713 // Writer<ELFT>::sortSections has already sorted output sections.
714 // What we need to do is to scan OutputSections vector and
715 // Opt.Commands in parallel to find orphan sections. If there is an
716 // output section that doesn't have a corresponding entry in
717 // Opt.Commands, we will insert a new entry to Opt.Commands.
718 //
719 // There is some ambiguity as to where exactly a new entry should be
720 // inserted, because Opt.Commands contains not only output section
721 // commands but other types of commands such as symbol assignment
722 // expressions. There's no correct answer here due to the lack of the
723 // formal specification of the linker script. We use heuristics to
724 // determine whether a new output command should be added before or
725 // after another commands. For the details, look at shouldSkip
726 // function.
727 void LinkerScriptBase::placeOrphanSections() {
728   // The OutputSections are already in the correct order.
729   // This loops creates or moves commands as needed so that they are in the
730   // correct order.
731   int CmdIndex = 0;
732 
733   // As a horrible special case, skip the first . assignment if it is before any
734   // section. We do this because it is common to set a load address by starting
735   // the script with ". = 0xabcd" and the expectation is that every section is
736   // after that.
737   auto FirstSectionOrDotAssignment =
738       std::find_if(Opt.Commands.begin(), Opt.Commands.end(),
739                    [](const std::unique_ptr<BaseCommand> &Cmd) {
740                      if (isa<OutputSectionCommand>(*Cmd))
741                        return true;
742                      const auto *Assign = dyn_cast<SymbolAssignment>(Cmd.get());
743                      if (!Assign)
744                        return false;
745                      return Assign->Name == ".";
746                    });
747   if (FirstSectionOrDotAssignment != Opt.Commands.end()) {
748     CmdIndex = FirstSectionOrDotAssignment - Opt.Commands.begin();
749     if (isa<SymbolAssignment>(**FirstSectionOrDotAssignment))
750       ++CmdIndex;
751   }
752 
753   for (OutputSection *Sec : *OutputSections) {
754     StringRef Name = Sec->Name;
755 
756     // Find the last spot where we can insert a command and still get the
757     // correct result.
758     auto CmdIter = Opt.Commands.begin() + CmdIndex;
759     auto E = Opt.Commands.end();
760     while (CmdIter != E && shouldSkip(**CmdIter)) {
761       ++CmdIter;
762       ++CmdIndex;
763     }
764 
765     auto Pos =
766         std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) {
767           auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get());
768           return Cmd && Cmd->Name == Name;
769         });
770     if (Pos == E) {
771       Opt.Commands.insert(CmdIter,
772                           llvm::make_unique<OutputSectionCommand>(Name));
773       ++CmdIndex;
774       continue;
775     }
776 
777     // Continue from where we found it.
778     CmdIndex = (Pos - Opt.Commands.begin()) + 1;
779   }
780 }
781 
782 void LinkerScriptBase::assignAddresses(std::vector<PhdrEntry> &Phdrs) {
783   // Assign addresses as instructed by linker script SECTIONS sub-commands.
784   Dot = 0;
785   switchTo(Aether);
786 
787   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) {
788     if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) {
789       assignSymbol(Cmd);
790       continue;
791     }
792 
793     if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) {
794       Cmd->Expression();
795       continue;
796     }
797 
798     auto *Cmd = cast<OutputSectionCommand>(Base.get());
799     assignOffsets(Cmd);
800   }
801 
802   uint64_t MinVA = std::numeric_limits<uint64_t>::max();
803   for (OutputSection *Sec : *OutputSections) {
804     if (Sec->Flags & SHF_ALLOC)
805       MinVA = std::min<uint64_t>(MinVA, Sec->Addr);
806     else
807       Sec->Addr = 0;
808   }
809 
810   allocateHeaders(Phdrs, *OutputSections, MinVA);
811 }
812 
813 // Creates program headers as instructed by PHDRS linker script command.
814 std::vector<PhdrEntry> LinkerScriptBase::createPhdrs() {
815   std::vector<PhdrEntry> Ret;
816 
817   // Process PHDRS and FILEHDR keywords because they are not
818   // real output sections and cannot be added in the following loop.
819   for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) {
820     Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags);
821     PhdrEntry &Phdr = Ret.back();
822 
823     if (Cmd.HasFilehdr)
824       Phdr.add(Out::ElfHeader);
825     if (Cmd.HasPhdrs)
826       Phdr.add(Out::ProgramHeaders);
827 
828     if (Cmd.LMAExpr) {
829       Phdr.p_paddr = Cmd.LMAExpr();
830       Phdr.HasLMA = true;
831     }
832   }
833 
834   // Add output sections to program headers.
835   for (OutputSection *Sec : *OutputSections) {
836     if (!(Sec->Flags & SHF_ALLOC))
837       break;
838 
839     // Assign headers specified by linker script
840     for (size_t Id : getPhdrIndices(Sec->Name)) {
841       Ret[Id].add(Sec);
842       if (Opt.PhdrsCommands[Id].Flags == UINT_MAX)
843         Ret[Id].p_flags |= Sec->getPhdrFlags();
844     }
845   }
846   return Ret;
847 }
848 
849 bool LinkerScriptBase::ignoreInterpSection() {
850   // Ignore .interp section in case we have PHDRS specification
851   // and PT_INTERP isn't listed.
852   return !Opt.PhdrsCommands.empty() &&
853          llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) {
854            return Cmd.Type == PT_INTERP;
855          }) == Opt.PhdrsCommands.end();
856 }
857 
858 uint32_t LinkerScriptBase::getFiller(StringRef Name) {
859   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands)
860     if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()))
861       if (Cmd->Name == Name)
862         return Cmd->Filler;
863   return 0;
864 }
865 
866 template <class ELFT>
867 static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) {
868   const endianness E = ELFT::TargetEndianness;
869 
870   switch (Size) {
871   case 1:
872     *Buf = (uint8_t)Data;
873     break;
874   case 2:
875     write16<E>(Buf, Data);
876     break;
877   case 4:
878     write32<E>(Buf, Data);
879     break;
880   case 8:
881     write64<E>(Buf, Data);
882     break;
883   default:
884     llvm_unreachable("unsupported Size argument");
885   }
886 }
887 
888 template <class ELFT>
889 void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) {
890   int I = getSectionIndex(Name);
891   if (I == INT_MAX)
892     return;
893 
894   auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get());
895   for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands)
896     if (auto *Data = dyn_cast<BytesDataCommand>(Base.get()))
897       writeInt<ELFT>(Buf + Data->Offset, Data->Expression(), Data->Size);
898 }
899 
900 bool LinkerScriptBase::hasLMA(StringRef Name) {
901   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands)
902     if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()))
903       if (Cmd->LMAExpr && Cmd->Name == Name)
904         return true;
905   return false;
906 }
907 
908 // Returns the index of the given section name in linker script
909 // SECTIONS commands. Sections are laid out as the same order as they
910 // were in the script. If a given name did not appear in the script,
911 // it returns INT_MAX, so that it will be laid out at end of file.
912 int LinkerScriptBase::getSectionIndex(StringRef Name) {
913   for (int I = 0, E = Opt.Commands.size(); I != E; ++I)
914     if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()))
915       if (Cmd->Name == Name)
916         return I;
917   return INT_MAX;
918 }
919 
920 template <class ELFT>
921 uint64_t LinkerScript<ELFT>::getSymbolValue(const Twine &Loc, StringRef S) {
922   if (S == ".")
923     return Dot;
924   if (SymbolBody *B = Symtab<ELFT>::X->find(S))
925     return B->getVA<ELFT>();
926   error(Loc + ": symbol not found: " + S);
927   return 0;
928 }
929 
930 template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) {
931   return Symtab<ELFT>::X->find(S) != nullptr;
932 }
933 
934 template <class ELFT> bool LinkerScript<ELFT>::isAbsolute(StringRef S) {
935   if (S == ".")
936     return false;
937   SymbolBody *Sym = Symtab<ELFT>::X->find(S);
938   auto *DR = dyn_cast_or_null<DefinedRegular>(Sym);
939   return DR && !DR->Section;
940 }
941 
942 // Gets section symbol belongs to. Symbol "." doesn't belong to any
943 // specific section but isn't absolute at the same time, so we try
944 // to find suitable section for it as well.
945 template <class ELFT>
946 OutputSection *LinkerScript<ELFT>::getSymbolSection(StringRef S) {
947   if (SymbolBody *Sym = Symtab<ELFT>::X->find(S))
948     return Sym->getOutputSection<ELFT>();
949   return CurOutSec;
950 }
951 
952 // Returns indices of ELF headers containing specific section, identified
953 // by Name. Each index is a zero based number of ELF header listed within
954 // PHDRS {} script block.
955 std::vector<size_t> LinkerScriptBase::getPhdrIndices(StringRef SectionName) {
956   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) {
957     auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get());
958     if (!Cmd || Cmd->Name != SectionName)
959       continue;
960 
961     std::vector<size_t> Ret;
962     for (StringRef PhdrName : Cmd->Phdrs)
963       Ret.push_back(getPhdrIndex(Cmd->Location, PhdrName));
964     return Ret;
965   }
966   return {};
967 }
968 
969 size_t LinkerScriptBase::getPhdrIndex(const Twine &Loc, StringRef PhdrName) {
970   size_t I = 0;
971   for (PhdrsCommand &Cmd : Opt.PhdrsCommands) {
972     if (Cmd.Name == PhdrName)
973       return I;
974     ++I;
975   }
976   error(Loc + ": section header '" + PhdrName + "' is not listed in PHDRS");
977   return 0;
978 }
979 
980 class elf::ScriptParser final : public ScriptLexer {
981   typedef void (ScriptParser::*Handler)();
982 
983 public:
984   ScriptParser(MemoryBufferRef MB)
985       : ScriptLexer(MB),
986         IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {}
987 
988   void readLinkerScript();
989   void readVersionScript();
990   void readDynamicList();
991 
992 private:
993   void addFile(StringRef Path);
994 
995   void readAsNeeded();
996   void readEntry();
997   void readExtern();
998   void readGroup();
999   void readInclude();
1000   void readMemory();
1001   void readOutput();
1002   void readOutputArch();
1003   void readOutputFormat();
1004   void readPhdrs();
1005   void readSearchDir();
1006   void readSections();
1007   void readVersion();
1008   void readVersionScriptCommand();
1009 
1010   SymbolAssignment *readAssignment(StringRef Name);
1011   BytesDataCommand *readBytesDataCommand(StringRef Tok);
1012   uint32_t readFill();
1013   OutputSectionCommand *readOutputSectionDescription(StringRef OutSec);
1014   uint32_t readOutputSectionFiller(StringRef Tok);
1015   std::vector<StringRef> readOutputSectionPhdrs();
1016   InputSectionDescription *readInputSectionDescription(StringRef Tok);
1017   StringMatcher readFilePatterns();
1018   std::vector<SectionPattern> readInputSectionsList();
1019   InputSectionDescription *readInputSectionRules(StringRef FilePattern);
1020   unsigned readPhdrType();
1021   SortSectionPolicy readSortKind();
1022   SymbolAssignment *readProvideHidden(bool Provide, bool Hidden);
1023   SymbolAssignment *readProvideOrAssignment(StringRef Tok);
1024   void readSort();
1025   Expr readAssert();
1026 
1027   uint64_t readMemoryAssignment(StringRef, StringRef, StringRef);
1028   std::pair<uint32_t, uint32_t> readMemoryAttributes();
1029 
1030   Expr readExpr();
1031   Expr readExpr1(Expr Lhs, int MinPrec);
1032   StringRef readParenLiteral();
1033   Expr readPrimary();
1034   Expr readTernary(Expr Cond);
1035   Expr readParenExpr();
1036 
1037   // For parsing version script.
1038   std::vector<SymbolVersion> readVersionExtern();
1039   void readAnonymousDeclaration();
1040   void readVersionDeclaration(StringRef VerStr);
1041 
1042   std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
1043   readSymbols();
1044 
1045   ScriptConfiguration &Opt = *ScriptConfig;
1046   bool IsUnderSysroot;
1047 };
1048 
1049 void ScriptParser::readDynamicList() {
1050   expect("{");
1051   readAnonymousDeclaration();
1052   if (!atEOF())
1053     setError("EOF expected, but got " + next());
1054 }
1055 
1056 void ScriptParser::readVersionScript() {
1057   readVersionScriptCommand();
1058   if (!atEOF())
1059     setError("EOF expected, but got " + next());
1060 }
1061 
1062 void ScriptParser::readVersionScriptCommand() {
1063   if (consume("{")) {
1064     readAnonymousDeclaration();
1065     return;
1066   }
1067 
1068   while (!atEOF() && !Error && peek() != "}") {
1069     StringRef VerStr = next();
1070     if (VerStr == "{") {
1071       setError("anonymous version definition is used in "
1072                "combination with other version definitions");
1073       return;
1074     }
1075     expect("{");
1076     readVersionDeclaration(VerStr);
1077   }
1078 }
1079 
1080 void ScriptParser::readVersion() {
1081   expect("{");
1082   readVersionScriptCommand();
1083   expect("}");
1084 }
1085 
1086 void ScriptParser::readLinkerScript() {
1087   while (!atEOF()) {
1088     StringRef Tok = next();
1089     if (Tok == ";")
1090       continue;
1091 
1092     if (Tok == "ASSERT") {
1093       Opt.Commands.emplace_back(new AssertCommand(readAssert()));
1094     } else if (Tok == "ENTRY") {
1095       readEntry();
1096     } else if (Tok == "EXTERN") {
1097       readExtern();
1098     } else if (Tok == "GROUP" || Tok == "INPUT") {
1099       readGroup();
1100     } else if (Tok == "INCLUDE") {
1101       readInclude();
1102     } else if (Tok == "MEMORY") {
1103       readMemory();
1104     } else if (Tok == "OUTPUT") {
1105       readOutput();
1106     } else if (Tok == "OUTPUT_ARCH") {
1107       readOutputArch();
1108     } else if (Tok == "OUTPUT_FORMAT") {
1109       readOutputFormat();
1110     } else if (Tok == "PHDRS") {
1111       readPhdrs();
1112     } else if (Tok == "SEARCH_DIR") {
1113       readSearchDir();
1114     } else if (Tok == "SECTIONS") {
1115       readSections();
1116     } else if (Tok == "VERSION") {
1117       readVersion();
1118     } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) {
1119       Opt.Commands.emplace_back(Cmd);
1120     } else {
1121       setError("unknown directive: " + Tok);
1122     }
1123   }
1124 }
1125 
1126 void ScriptParser::addFile(StringRef S) {
1127   if (IsUnderSysroot && S.startswith("/")) {
1128     SmallString<128> PathData;
1129     StringRef Path = (Config->Sysroot + S).toStringRef(PathData);
1130     if (sys::fs::exists(Path)) {
1131       Driver->addFile(Saver.save(Path));
1132       return;
1133     }
1134   }
1135 
1136   if (sys::path::is_absolute(S)) {
1137     Driver->addFile(S);
1138   } else if (S.startswith("=")) {
1139     if (Config->Sysroot.empty())
1140       Driver->addFile(S.substr(1));
1141     else
1142       Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)));
1143   } else if (S.startswith("-l")) {
1144     Driver->addLibrary(S.substr(2));
1145   } else if (sys::fs::exists(S)) {
1146     Driver->addFile(S);
1147   } else {
1148     if (Optional<std::string> Path = findFromSearchPaths(S))
1149       Driver->addFile(Saver.save(*Path));
1150     else
1151       setError("unable to find " + S);
1152   }
1153 }
1154 
1155 void ScriptParser::readAsNeeded() {
1156   expect("(");
1157   bool Orig = Config->AsNeeded;
1158   Config->AsNeeded = true;
1159   while (!Error && !consume(")"))
1160     addFile(unquote(next()));
1161   Config->AsNeeded = Orig;
1162 }
1163 
1164 void ScriptParser::readEntry() {
1165   // -e <symbol> takes predecence over ENTRY(<symbol>).
1166   expect("(");
1167   StringRef Tok = next();
1168   if (Config->Entry.empty())
1169     Config->Entry = Tok;
1170   expect(")");
1171 }
1172 
1173 void ScriptParser::readExtern() {
1174   expect("(");
1175   while (!Error && !consume(")"))
1176     Config->Undefined.push_back(next());
1177 }
1178 
1179 void ScriptParser::readGroup() {
1180   expect("(");
1181   while (!Error && !consume(")")) {
1182     StringRef Tok = next();
1183     if (Tok == "AS_NEEDED")
1184       readAsNeeded();
1185     else
1186       addFile(unquote(Tok));
1187   }
1188 }
1189 
1190 void ScriptParser::readInclude() {
1191   StringRef Tok = unquote(next());
1192 
1193   // https://sourceware.org/binutils/docs/ld/File-Commands.html:
1194   // The file will be searched for in the current directory, and in any
1195   // directory specified with the -L option.
1196   if (sys::fs::exists(Tok)) {
1197     if (Optional<MemoryBufferRef> MB = readFile(Tok))
1198       tokenize(*MB);
1199     return;
1200   }
1201   if (Optional<std::string> Path = findFromSearchPaths(Tok)) {
1202     if (Optional<MemoryBufferRef> MB = readFile(*Path))
1203       tokenize(*MB);
1204     return;
1205   }
1206   setError("cannot open " + Tok);
1207 }
1208 
1209 void ScriptParser::readOutput() {
1210   // -o <file> takes predecence over OUTPUT(<file>).
1211   expect("(");
1212   StringRef Tok = next();
1213   if (Config->OutputFile.empty())
1214     Config->OutputFile = unquote(Tok);
1215   expect(")");
1216 }
1217 
1218 void ScriptParser::readOutputArch() {
1219   // OUTPUT_ARCH is ignored for now.
1220   expect("(");
1221   while (!Error && !consume(")"))
1222     skip();
1223 }
1224 
1225 void ScriptParser::readOutputFormat() {
1226   // Error checking only for now.
1227   expect("(");
1228   skip();
1229   StringRef Tok = next();
1230   if (Tok == ")")
1231     return;
1232   if (Tok != ",") {
1233     setError("unexpected token: " + Tok);
1234     return;
1235   }
1236   skip();
1237   expect(",");
1238   skip();
1239   expect(")");
1240 }
1241 
1242 void ScriptParser::readPhdrs() {
1243   expect("{");
1244   while (!Error && !consume("}")) {
1245     StringRef Tok = next();
1246     Opt.PhdrsCommands.push_back(
1247         {Tok, PT_NULL, false, false, UINT_MAX, nullptr});
1248     PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back();
1249 
1250     PhdrCmd.Type = readPhdrType();
1251     do {
1252       Tok = next();
1253       if (Tok == ";")
1254         break;
1255       if (Tok == "FILEHDR")
1256         PhdrCmd.HasFilehdr = true;
1257       else if (Tok == "PHDRS")
1258         PhdrCmd.HasPhdrs = true;
1259       else if (Tok == "AT")
1260         PhdrCmd.LMAExpr = readParenExpr();
1261       else if (Tok == "FLAGS") {
1262         expect("(");
1263         // Passing 0 for the value of dot is a bit of a hack. It means that
1264         // we accept expressions like ".|1".
1265         PhdrCmd.Flags = readExpr()();
1266         expect(")");
1267       } else
1268         setError("unexpected header attribute: " + Tok);
1269     } while (!Error);
1270   }
1271 }
1272 
1273 void ScriptParser::readSearchDir() {
1274   expect("(");
1275   StringRef Tok = next();
1276   if (!Config->Nostdlib)
1277     Config->SearchPaths.push_back(unquote(Tok));
1278   expect(")");
1279 }
1280 
1281 void ScriptParser::readSections() {
1282   Opt.HasSections = true;
1283   // -no-rosegment is used to avoid placing read only non-executable sections in
1284   // their own segment. We do the same if SECTIONS command is present in linker
1285   // script. See comment for computeFlags().
1286   Config->SingleRoRx = true;
1287 
1288   expect("{");
1289   while (!Error && !consume("}")) {
1290     StringRef Tok = next();
1291     BaseCommand *Cmd = readProvideOrAssignment(Tok);
1292     if (!Cmd) {
1293       if (Tok == "ASSERT")
1294         Cmd = new AssertCommand(readAssert());
1295       else
1296         Cmd = readOutputSectionDescription(Tok);
1297     }
1298     Opt.Commands.emplace_back(Cmd);
1299   }
1300 }
1301 
1302 static int precedence(StringRef Op) {
1303   return StringSwitch<int>(Op)
1304       .Cases("*", "/", 5)
1305       .Cases("+", "-", 4)
1306       .Cases("<<", ">>", 3)
1307       .Cases("<", "<=", ">", ">=", "==", "!=", 2)
1308       .Cases("&", "|", 1)
1309       .Default(-1);
1310 }
1311 
1312 StringMatcher ScriptParser::readFilePatterns() {
1313   std::vector<StringRef> V;
1314   while (!Error && !consume(")"))
1315     V.push_back(next());
1316   return StringMatcher(V);
1317 }
1318 
1319 SortSectionPolicy ScriptParser::readSortKind() {
1320   if (consume("SORT") || consume("SORT_BY_NAME"))
1321     return SortSectionPolicy::Name;
1322   if (consume("SORT_BY_ALIGNMENT"))
1323     return SortSectionPolicy::Alignment;
1324   if (consume("SORT_BY_INIT_PRIORITY"))
1325     return SortSectionPolicy::Priority;
1326   if (consume("SORT_NONE"))
1327     return SortSectionPolicy::None;
1328   return SortSectionPolicy::Default;
1329 }
1330 
1331 // Method reads a list of sequence of excluded files and section globs given in
1332 // a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+
1333 // Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3)
1334 // The semantics of that is next:
1335 // * Include .foo.1 from every file.
1336 // * Include .foo.2 from every file but a.o
1337 // * Include .foo.3 from every file but b.o
1338 std::vector<SectionPattern> ScriptParser::readInputSectionsList() {
1339   std::vector<SectionPattern> Ret;
1340   while (!Error && peek() != ")") {
1341     StringMatcher ExcludeFilePat;
1342     if (consume("EXCLUDE_FILE")) {
1343       expect("(");
1344       ExcludeFilePat = readFilePatterns();
1345     }
1346 
1347     std::vector<StringRef> V;
1348     while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE")
1349       V.push_back(next());
1350 
1351     if (!V.empty())
1352       Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)});
1353     else
1354       setError("section pattern is expected");
1355   }
1356   return Ret;
1357 }
1358 
1359 // Reads contents of "SECTIONS" directive. That directive contains a
1360 // list of glob patterns for input sections. The grammar is as follows.
1361 //
1362 // <patterns> ::= <section-list>
1363 //              | <sort> "(" <section-list> ")"
1364 //              | <sort> "(" <sort> "(" <section-list> ")" ")"
1365 //
1366 // <sort>     ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT"
1367 //              | "SORT_BY_INIT_PRIORITY" | "SORT_NONE"
1368 //
1369 // <section-list> is parsed by readInputSectionsList().
1370 InputSectionDescription *
1371 ScriptParser::readInputSectionRules(StringRef FilePattern) {
1372   auto *Cmd = new InputSectionDescription(FilePattern);
1373   expect("(");
1374   while (!Error && !consume(")")) {
1375     SortSectionPolicy Outer = readSortKind();
1376     SortSectionPolicy Inner = SortSectionPolicy::Default;
1377     std::vector<SectionPattern> V;
1378     if (Outer != SortSectionPolicy::Default) {
1379       expect("(");
1380       Inner = readSortKind();
1381       if (Inner != SortSectionPolicy::Default) {
1382         expect("(");
1383         V = readInputSectionsList();
1384         expect(")");
1385       } else {
1386         V = readInputSectionsList();
1387       }
1388       expect(")");
1389     } else {
1390       V = readInputSectionsList();
1391     }
1392 
1393     for (SectionPattern &Pat : V) {
1394       Pat.SortInner = Inner;
1395       Pat.SortOuter = Outer;
1396     }
1397 
1398     std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns));
1399   }
1400   return Cmd;
1401 }
1402 
1403 InputSectionDescription *
1404 ScriptParser::readInputSectionDescription(StringRef Tok) {
1405   // Input section wildcard can be surrounded by KEEP.
1406   // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep
1407   if (Tok == "KEEP") {
1408     expect("(");
1409     StringRef FilePattern = next();
1410     InputSectionDescription *Cmd = readInputSectionRules(FilePattern);
1411     expect(")");
1412     Opt.KeptSections.push_back(Cmd);
1413     return Cmd;
1414   }
1415   return readInputSectionRules(Tok);
1416 }
1417 
1418 void ScriptParser::readSort() {
1419   expect("(");
1420   expect("CONSTRUCTORS");
1421   expect(")");
1422 }
1423 
1424 Expr ScriptParser::readAssert() {
1425   expect("(");
1426   Expr E = readExpr();
1427   expect(",");
1428   StringRef Msg = unquote(next());
1429   expect(")");
1430   return [=] {
1431     if (!E())
1432       error(Msg);
1433     return ScriptBase->getDot();
1434   };
1435 }
1436 
1437 // Reads a FILL(expr) command. We handle the FILL command as an
1438 // alias for =fillexp section attribute, which is different from
1439 // what GNU linkers do.
1440 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html
1441 uint32_t ScriptParser::readFill() {
1442   expect("(");
1443   uint32_t V = readOutputSectionFiller(next());
1444   expect(")");
1445   expect(";");
1446   return V;
1447 }
1448 
1449 OutputSectionCommand *
1450 ScriptParser::readOutputSectionDescription(StringRef OutSec) {
1451   OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec);
1452   Cmd->Location = getCurrentLocation();
1453 
1454   // Read an address expression.
1455   // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address
1456   if (peek() != ":")
1457     Cmd->AddrExpr = readExpr();
1458 
1459   expect(":");
1460 
1461   if (consume("AT"))
1462     Cmd->LMAExpr = readParenExpr();
1463   if (consume("ALIGN"))
1464     Cmd->AlignExpr = readParenExpr();
1465   if (consume("SUBALIGN"))
1466     Cmd->SubalignExpr = readParenExpr();
1467 
1468   // Parse constraints.
1469   if (consume("ONLY_IF_RO"))
1470     Cmd->Constraint = ConstraintKind::ReadOnly;
1471   if (consume("ONLY_IF_RW"))
1472     Cmd->Constraint = ConstraintKind::ReadWrite;
1473   expect("{");
1474 
1475   while (!Error && !consume("}")) {
1476     StringRef Tok = next();
1477     if (Tok == ";") {
1478       // Empty commands are allowed. Do nothing here.
1479     } else if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok)) {
1480       Cmd->Commands.emplace_back(Assignment);
1481     } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) {
1482       Cmd->Commands.emplace_back(Data);
1483     } else if (Tok == "ASSERT") {
1484       Cmd->Commands.emplace_back(new AssertCommand(readAssert()));
1485       expect(";");
1486     } else if (Tok == "CONSTRUCTORS") {
1487       // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors
1488       // by name. This is for very old file formats such as ECOFF/XCOFF.
1489       // For ELF, we should ignore.
1490     } else if (Tok == "FILL") {
1491       Cmd->Filler = readFill();
1492     } else if (Tok == "SORT") {
1493       readSort();
1494     } else if (peek() == "(") {
1495       Cmd->Commands.emplace_back(readInputSectionDescription(Tok));
1496     } else {
1497       setError("unknown command " + Tok);
1498     }
1499   }
1500 
1501   if (consume(">"))
1502     Cmd->MemoryRegionName = next();
1503 
1504   Cmd->Phdrs = readOutputSectionPhdrs();
1505 
1506   if (consume("="))
1507     Cmd->Filler = readOutputSectionFiller(next());
1508   else if (peek().startswith("="))
1509     Cmd->Filler = readOutputSectionFiller(next().drop_front());
1510 
1511   // Consume optional comma following output section command.
1512   consume(",");
1513 
1514   return Cmd;
1515 }
1516 
1517 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number.
1518 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html
1519 //
1520 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles
1521 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them
1522 // as 32-bit big-endian values. We will do the same as ld.gold does
1523 // because it's simpler than what ld.bfd does.
1524 uint32_t ScriptParser::readOutputSectionFiller(StringRef Tok) {
1525   uint32_t V;
1526   if (!Tok.getAsInteger(0, V))
1527     return V;
1528   setError("invalid filler expression: " + Tok);
1529   return 0;
1530 }
1531 
1532 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) {
1533   expect("(");
1534   SymbolAssignment *Cmd = readAssignment(next());
1535   Cmd->Provide = Provide;
1536   Cmd->Hidden = Hidden;
1537   expect(")");
1538   expect(";");
1539   return Cmd;
1540 }
1541 
1542 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) {
1543   SymbolAssignment *Cmd = nullptr;
1544   if (peek() == "=" || peek() == "+=") {
1545     Cmd = readAssignment(Tok);
1546     expect(";");
1547   } else if (Tok == "PROVIDE") {
1548     Cmd = readProvideHidden(true, false);
1549   } else if (Tok == "HIDDEN") {
1550     Cmd = readProvideHidden(false, true);
1551   } else if (Tok == "PROVIDE_HIDDEN") {
1552     Cmd = readProvideHidden(true, true);
1553   }
1554   return Cmd;
1555 }
1556 
1557 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) {
1558   StringRef Op = next();
1559   Expr E;
1560   assert(Op == "=" || Op == "+=");
1561   if (consume("ABSOLUTE")) {
1562     E = readExpr();
1563     E.IsAbsolute = [] { return true; };
1564   } else {
1565     E = readExpr();
1566   }
1567   if (Op == "+=") {
1568     std::string Loc = getCurrentLocation();
1569     E = [=] { return ScriptBase->getSymbolValue(Loc, Name) + E(); };
1570   }
1571   return new SymbolAssignment(Name, E, getCurrentLocation());
1572 }
1573 
1574 // This is an operator-precedence parser to parse a linker
1575 // script expression.
1576 Expr ScriptParser::readExpr() {
1577   // Our lexer is context-aware. Set the in-expression bit so that
1578   // they apply different tokenization rules.
1579   bool Orig = InExpr;
1580   InExpr = true;
1581   Expr E = readExpr1(readPrimary(), 0);
1582   InExpr = Orig;
1583   return E;
1584 }
1585 
1586 static Expr combine(StringRef Op, Expr L, Expr R) {
1587   auto IsAbs = [=] { return L.IsAbsolute() && R.IsAbsolute(); };
1588   auto GetOutSec = [=] {
1589     SectionBase *S = L.Section();
1590     return S ? S : R.Section();
1591   };
1592 
1593   if (Op == "*")
1594     return [=] { return L() * R(); };
1595   if (Op == "/") {
1596     return [=]() -> uint64_t {
1597       uint64_t RHS = R();
1598       if (RHS == 0) {
1599         error("division by zero");
1600         return 0;
1601       }
1602       return L() / RHS;
1603     };
1604   }
1605   if (Op == "+")
1606     return {[=] { return L() + R(); }, IsAbs, GetOutSec};
1607   if (Op == "-")
1608     return {[=] { return L() - R(); }, IsAbs, GetOutSec};
1609   if (Op == "<<")
1610     return [=] { return L() << R(); };
1611   if (Op == ">>")
1612     return [=] { return L() >> R(); };
1613   if (Op == "<")
1614     return [=] { return L() < R(); };
1615   if (Op == ">")
1616     return [=] { return L() > R(); };
1617   if (Op == ">=")
1618     return [=] { return L() >= R(); };
1619   if (Op == "<=")
1620     return [=] { return L() <= R(); };
1621   if (Op == "==")
1622     return [=] { return L() == R(); };
1623   if (Op == "!=")
1624     return [=] { return L() != R(); };
1625   if (Op == "&")
1626     return [=] { return L() & R(); };
1627   if (Op == "|")
1628     return [=] { return L() | R(); };
1629   llvm_unreachable("invalid operator");
1630 }
1631 
1632 // This is a part of the operator-precedence parser. This function
1633 // assumes that the remaining token stream starts with an operator.
1634 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) {
1635   while (!atEOF() && !Error) {
1636     // Read an operator and an expression.
1637     if (consume("?"))
1638       return readTernary(Lhs);
1639     StringRef Op1 = peek();
1640     if (precedence(Op1) < MinPrec)
1641       break;
1642     skip();
1643     Expr Rhs = readPrimary();
1644 
1645     // Evaluate the remaining part of the expression first if the
1646     // next operator has greater precedence than the previous one.
1647     // For example, if we have read "+" and "3", and if the next
1648     // operator is "*", then we'll evaluate 3 * ... part first.
1649     while (!atEOF()) {
1650       StringRef Op2 = peek();
1651       if (precedence(Op2) <= precedence(Op1))
1652         break;
1653       Rhs = readExpr1(Rhs, precedence(Op2));
1654     }
1655 
1656     Lhs = combine(Op1, Lhs, Rhs);
1657   }
1658   return Lhs;
1659 }
1660 
1661 uint64_t static getConstant(StringRef S) {
1662   if (S == "COMMONPAGESIZE")
1663     return Target->PageSize;
1664   if (S == "MAXPAGESIZE")
1665     return Config->MaxPageSize;
1666   error("unknown constant: " + S);
1667   return 0;
1668 }
1669 
1670 // Parses Tok as an integer. Returns true if successful.
1671 // It recognizes hexadecimal (prefixed with "0x" or suffixed with "H")
1672 // and decimal numbers. Decimal numbers may have "K" (kilo) or
1673 // "M" (mega) prefixes.
1674 static bool readInteger(StringRef Tok, uint64_t &Result) {
1675   // Negative number
1676   if (Tok.startswith("-")) {
1677     if (!readInteger(Tok.substr(1), Result))
1678       return false;
1679     Result = -Result;
1680     return true;
1681   }
1682 
1683   // Hexadecimal
1684   if (Tok.startswith_lower("0x"))
1685     return !Tok.substr(2).getAsInteger(16, Result);
1686   if (Tok.endswith_lower("H"))
1687     return !Tok.drop_back().getAsInteger(16, Result);
1688 
1689   // Decimal
1690   int Suffix = 1;
1691   if (Tok.endswith_lower("K")) {
1692     Suffix = 1024;
1693     Tok = Tok.drop_back();
1694   } else if (Tok.endswith_lower("M")) {
1695     Suffix = 1024 * 1024;
1696     Tok = Tok.drop_back();
1697   }
1698   if (Tok.getAsInteger(10, Result))
1699     return false;
1700   Result *= Suffix;
1701   return true;
1702 }
1703 
1704 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) {
1705   int Size = StringSwitch<unsigned>(Tok)
1706                  .Case("BYTE", 1)
1707                  .Case("SHORT", 2)
1708                  .Case("LONG", 4)
1709                  .Case("QUAD", 8)
1710                  .Default(-1);
1711   if (Size == -1)
1712     return nullptr;
1713 
1714   return new BytesDataCommand(readParenExpr(), Size);
1715 }
1716 
1717 StringRef ScriptParser::readParenLiteral() {
1718   expect("(");
1719   StringRef Tok = next();
1720   expect(")");
1721   return Tok;
1722 }
1723 
1724 Expr ScriptParser::readPrimary() {
1725   if (peek() == "(")
1726     return readParenExpr();
1727 
1728   StringRef Tok = next();
1729   std::string Location = getCurrentLocation();
1730 
1731   if (Tok == "~") {
1732     Expr E = readPrimary();
1733     return [=] { return ~E(); };
1734   }
1735   if (Tok == "-") {
1736     Expr E = readPrimary();
1737     return [=] { return -E(); };
1738   }
1739 
1740   // Built-in functions are parsed here.
1741   // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html.
1742   if (Tok == "ADDR") {
1743     StringRef Name = readParenLiteral();
1744     return {[=] { return ScriptBase->getOutputSection(Location, Name)->Addr; },
1745             [=] { return false; },
1746             [=] { return ScriptBase->getOutputSection(Location, Name); }};
1747   }
1748   if (Tok == "LOADADDR") {
1749     StringRef Name = readParenLiteral();
1750     return
1751         [=] { return ScriptBase->getOutputSection(Location, Name)->getLMA(); };
1752   }
1753   if (Tok == "ASSERT")
1754     return readAssert();
1755   if (Tok == "ALIGN") {
1756     expect("(");
1757     Expr E = readExpr();
1758     if (consume(",")) {
1759       Expr E2 = readExpr();
1760       expect(")");
1761       return [=] { return alignTo(E(), E2()); };
1762     }
1763     expect(")");
1764     return [=] { return alignTo(ScriptBase->getDot(), E()); };
1765   }
1766   if (Tok == "CONSTANT") {
1767     StringRef Name = readParenLiteral();
1768     return [=] { return getConstant(Name); };
1769   }
1770   if (Tok == "DEFINED") {
1771     StringRef Name = readParenLiteral();
1772     return [=] { return ScriptBase->isDefined(Name) ? 1 : 0; };
1773   }
1774   if (Tok == "SEGMENT_START") {
1775     expect("(");
1776     skip();
1777     expect(",");
1778     Expr E = readExpr();
1779     expect(")");
1780     return [=] { return E(); };
1781   }
1782   if (Tok == "DATA_SEGMENT_ALIGN") {
1783     expect("(");
1784     Expr E = readExpr();
1785     expect(",");
1786     readExpr();
1787     expect(")");
1788     return [=] { return alignTo(ScriptBase->getDot(), E()); };
1789   }
1790   if (Tok == "DATA_SEGMENT_END") {
1791     expect("(");
1792     expect(".");
1793     expect(")");
1794     return []() { return ScriptBase->getDot(); };
1795   }
1796   // GNU linkers implements more complicated logic to handle
1797   // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to
1798   // the next page boundary for simplicity.
1799   if (Tok == "DATA_SEGMENT_RELRO_END") {
1800     expect("(");
1801     readExpr();
1802     expect(",");
1803     readExpr();
1804     expect(")");
1805     return []() { return alignTo(ScriptBase->getDot(), Target->PageSize); };
1806   }
1807   if (Tok == "SIZEOF") {
1808     StringRef Name = readParenLiteral();
1809     return [=] { return ScriptBase->getOutputSectionSize(Name); };
1810   }
1811   if (Tok == "ALIGNOF") {
1812     StringRef Name = readParenLiteral();
1813     return
1814         [=] { return ScriptBase->getOutputSection(Location, Name)->Alignment; };
1815   }
1816   if (Tok == "SIZEOF_HEADERS")
1817     return [=] { return elf::getHeaderSize(); };
1818 
1819   // Tok is a literal number.
1820   uint64_t V;
1821   if (readInteger(Tok, V))
1822     return [=] { return V; };
1823 
1824   // Tok is a symbol name.
1825   if (Tok != "." && !isValidCIdentifier(Tok))
1826     setError("malformed number: " + Tok);
1827   return {[=] { return ScriptBase->getSymbolValue(Location, Tok); },
1828           [=] { return ScriptBase->isAbsolute(Tok); },
1829           [=] { return ScriptBase->getSymbolSection(Tok); }};
1830 }
1831 
1832 Expr ScriptParser::readTernary(Expr Cond) {
1833   Expr L = readExpr();
1834   expect(":");
1835   Expr R = readExpr();
1836   return [=] { return Cond() ? L() : R(); };
1837 }
1838 
1839 Expr ScriptParser::readParenExpr() {
1840   expect("(");
1841   Expr E = readExpr();
1842   expect(")");
1843   return E;
1844 }
1845 
1846 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() {
1847   std::vector<StringRef> Phdrs;
1848   while (!Error && peek().startswith(":")) {
1849     StringRef Tok = next();
1850     Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1));
1851   }
1852   return Phdrs;
1853 }
1854 
1855 // Read a program header type name. The next token must be a
1856 // name of a program header type or a constant (e.g. "0x3").
1857 unsigned ScriptParser::readPhdrType() {
1858   StringRef Tok = next();
1859   uint64_t Val;
1860   if (readInteger(Tok, Val))
1861     return Val;
1862 
1863   unsigned Ret = StringSwitch<unsigned>(Tok)
1864                      .Case("PT_NULL", PT_NULL)
1865                      .Case("PT_LOAD", PT_LOAD)
1866                      .Case("PT_DYNAMIC", PT_DYNAMIC)
1867                      .Case("PT_INTERP", PT_INTERP)
1868                      .Case("PT_NOTE", PT_NOTE)
1869                      .Case("PT_SHLIB", PT_SHLIB)
1870                      .Case("PT_PHDR", PT_PHDR)
1871                      .Case("PT_TLS", PT_TLS)
1872                      .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME)
1873                      .Case("PT_GNU_STACK", PT_GNU_STACK)
1874                      .Case("PT_GNU_RELRO", PT_GNU_RELRO)
1875                      .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE)
1876                      .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED)
1877                      .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA)
1878                      .Default(-1);
1879 
1880   if (Ret == (unsigned)-1) {
1881     setError("invalid program header type: " + Tok);
1882     return PT_NULL;
1883   }
1884   return Ret;
1885 }
1886 
1887 // Reads an anonymous version declaration.
1888 void ScriptParser::readAnonymousDeclaration() {
1889   std::vector<SymbolVersion> Locals;
1890   std::vector<SymbolVersion> Globals;
1891   std::tie(Locals, Globals) = readSymbols();
1892 
1893   for (SymbolVersion V : Locals) {
1894     if (V.Name == "*")
1895       Config->DefaultSymbolVersion = VER_NDX_LOCAL;
1896     else
1897       Config->VersionScriptLocals.push_back(V);
1898   }
1899 
1900   for (SymbolVersion V : Globals)
1901     Config->VersionScriptGlobals.push_back(V);
1902 
1903   expect(";");
1904 }
1905 
1906 // Reads a non-anonymous version definition,
1907 // e.g. "VerStr { global: foo; bar; local: *; };".
1908 void ScriptParser::readVersionDeclaration(StringRef VerStr) {
1909   // Read a symbol list.
1910   std::vector<SymbolVersion> Locals;
1911   std::vector<SymbolVersion> Globals;
1912   std::tie(Locals, Globals) = readSymbols();
1913 
1914   for (SymbolVersion V : Locals) {
1915     if (V.Name == "*")
1916       Config->DefaultSymbolVersion = VER_NDX_LOCAL;
1917     else
1918       Config->VersionScriptLocals.push_back(V);
1919   }
1920 
1921   // Create a new version definition and add that to the global symbols.
1922   VersionDefinition Ver;
1923   Ver.Name = VerStr;
1924   Ver.Globals = Globals;
1925 
1926   // User-defined version number starts from 2 because 0 and 1 are
1927   // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively.
1928   Ver.Id = Config->VersionDefinitions.size() + 2;
1929   Config->VersionDefinitions.push_back(Ver);
1930 
1931   // Each version may have a parent version. For example, "Ver2"
1932   // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1"
1933   // as a parent. This version hierarchy is, probably against your
1934   // instinct, purely for hint; the runtime doesn't care about it
1935   // at all. In LLD, we simply ignore it.
1936   if (peek() != ";")
1937     skip();
1938   expect(";");
1939 }
1940 
1941 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };".
1942 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
1943 ScriptParser::readSymbols() {
1944   std::vector<SymbolVersion> Locals;
1945   std::vector<SymbolVersion> Globals;
1946   std::vector<SymbolVersion> *V = &Globals;
1947 
1948   while (!Error) {
1949     if (consume("}"))
1950       break;
1951     if (consumeLabel("local")) {
1952       V = &Locals;
1953       continue;
1954     }
1955     if (consumeLabel("global")) {
1956       V = &Globals;
1957       continue;
1958     }
1959 
1960     if (consume("extern")) {
1961       std::vector<SymbolVersion> Ext = readVersionExtern();
1962       V->insert(V->end(), Ext.begin(), Ext.end());
1963     } else {
1964       StringRef Tok = next();
1965       V->push_back({unquote(Tok), false, hasWildcard(Tok)});
1966     }
1967     expect(";");
1968   }
1969   return {Locals, Globals};
1970 }
1971 
1972 // Reads an "extern C++" directive, e.g.,
1973 // "extern "C++" { ns::*; "f(int, double)"; };"
1974 std::vector<SymbolVersion> ScriptParser::readVersionExtern() {
1975   StringRef Tok = next();
1976   bool IsCXX = Tok == "\"C++\"";
1977   if (!IsCXX && Tok != "\"C\"")
1978     setError("Unknown language");
1979   expect("{");
1980 
1981   std::vector<SymbolVersion> Ret;
1982   while (!Error && peek() != "}") {
1983     StringRef Tok = next();
1984     bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok);
1985     Ret.push_back({unquote(Tok), IsCXX, HasWildcard});
1986     expect(";");
1987   }
1988 
1989   expect("}");
1990   return Ret;
1991 }
1992 
1993 uint64_t ScriptParser::readMemoryAssignment(
1994     StringRef S1, StringRef S2, StringRef S3) {
1995   if (!(consume(S1) || consume(S2) || consume(S3))) {
1996     setError("expected one of: " + S1 + ", " + S2 + ", or " + S3);
1997     return 0;
1998   }
1999   expect("=");
2000 
2001   // TODO: Fully support constant expressions.
2002   uint64_t Val;
2003   if (!readInteger(next(), Val))
2004     setError("nonconstant expression for "+ S1);
2005   return Val;
2006 }
2007 
2008 // Parse the MEMORY command as specified in:
2009 // https://sourceware.org/binutils/docs/ld/MEMORY.html
2010 //
2011 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... }
2012 void ScriptParser::readMemory() {
2013   expect("{");
2014   while (!Error && !consume("}")) {
2015     StringRef Name = next();
2016 
2017     uint32_t Flags = 0;
2018     uint32_t NegFlags = 0;
2019     if (consume("(")) {
2020       std::tie(Flags, NegFlags) = readMemoryAttributes();
2021       expect(")");
2022     }
2023     expect(":");
2024 
2025     uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o");
2026     expect(",");
2027     uint64_t Length = readMemoryAssignment("LENGTH", "len", "l");
2028 
2029     // Add the memory region to the region map (if it doesn't already exist).
2030     auto It = Opt.MemoryRegions.find(Name);
2031     if (It != Opt.MemoryRegions.end())
2032       setError("region '" + Name + "' already defined");
2033     else
2034       Opt.MemoryRegions[Name] = {Name, Origin, Length, Origin, Flags, NegFlags};
2035   }
2036 }
2037 
2038 // This function parses the attributes used to match against section
2039 // flags when placing output sections in a memory region. These flags
2040 // are only used when an explicit memory region name is not used.
2041 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() {
2042   uint32_t Flags = 0;
2043   uint32_t NegFlags = 0;
2044   bool Invert = false;
2045 
2046   for (char C : next().lower()) {
2047     uint32_t Flag = 0;
2048     if (C == '!')
2049       Invert = !Invert;
2050     else if (C == 'w')
2051       Flag = SHF_WRITE;
2052     else if (C == 'x')
2053       Flag = SHF_EXECINSTR;
2054     else if (C == 'a')
2055       Flag = SHF_ALLOC;
2056     else if (C != 'r')
2057       setError("invalid memory region attribute");
2058 
2059     if (Invert)
2060       NegFlags |= Flag;
2061     else
2062       Flags |= Flag;
2063   }
2064   return {Flags, NegFlags};
2065 }
2066 
2067 void elf::readLinkerScript(MemoryBufferRef MB) {
2068   ScriptParser(MB).readLinkerScript();
2069 }
2070 
2071 void elf::readVersionScript(MemoryBufferRef MB) {
2072   ScriptParser(MB).readVersionScript();
2073 }
2074 
2075 void elf::readDynamicList(MemoryBufferRef MB) {
2076   ScriptParser(MB).readDynamicList();
2077 }
2078 
2079 template class elf::LinkerScript<ELF32LE>;
2080 template class elf::LinkerScript<ELF32BE>;
2081 template class elf::LinkerScript<ELF64LE>;
2082 template class elf::LinkerScript<ELF64BE>;
2083