1 //===- LinkerScript.cpp ---------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the parser/evaluator of the linker script.
11 // It parses a linker script and write the result to Config or ScriptConfig
12 // objects.
13 //
14 // If SECTIONS command is used, a ScriptConfig contains an AST
15 // of the command which will later be consumed by createSections() and
16 // assignAddresses().
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "LinkerScript.h"
21 #include "Config.h"
22 #include "Driver.h"
23 #include "InputSection.h"
24 #include "OutputSections.h"
25 #include "ScriptParser.h"
26 #include "Strings.h"
27 #include "Symbols.h"
28 #include "SymbolTable.h"
29 #include "Target.h"
30 #include "Writer.h"
31 #include "llvm/ADT/StringSwitch.h"
32 #include "llvm/Support/ELF.h"
33 #include "llvm/Support/FileSystem.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/Path.h"
36 #include "llvm/Support/StringSaver.h"
37 
38 using namespace llvm;
39 using namespace llvm::ELF;
40 using namespace llvm::object;
41 using namespace llvm::support::endian;
42 using namespace lld;
43 using namespace lld::elf;
44 
45 LinkerScriptBase *elf::ScriptBase;
46 ScriptConfiguration *elf::ScriptConfig;
47 
48 template <class ELFT> static void addRegular(SymbolAssignment *Cmd) {
49   Symbol *Sym = Symtab<ELFT>::X->addRegular(Cmd->Name, STB_GLOBAL, STV_DEFAULT);
50   Sym->Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT;
51   Cmd->Sym = Sym->body();
52 
53   // If we have no SECTIONS then we don't have '.' and don't call
54   // assignAddresses(). We calculate symbol value immediately in this case.
55   if (!ScriptConfig->HasSections)
56     cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(0);
57 }
58 
59 template <class ELFT> static void addSynthetic(SymbolAssignment *Cmd) {
60   Symbol *Sym = Symtab<ELFT>::X->addSynthetic(
61       Cmd->Name, nullptr, 0, Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT);
62   Cmd->Sym = Sym->body();
63 }
64 
65 template <class ELFT> static void addSymbol(SymbolAssignment *Cmd) {
66   if (Cmd->IsAbsolute)
67     addRegular<ELFT>(Cmd);
68   else
69     addSynthetic<ELFT>(Cmd);
70 }
71 // If a symbol was in PROVIDE(), we need to define it only when
72 // it is an undefined symbol.
73 template <class ELFT> static bool shouldDefine(SymbolAssignment *Cmd) {
74   if (Cmd->Name == ".")
75     return false;
76   if (!Cmd->Provide)
77     return true;
78   SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name);
79   return B && B->isUndefined();
80 }
81 
82 bool SymbolAssignment::classof(const BaseCommand *C) {
83   return C->Kind == AssignmentKind;
84 }
85 
86 bool OutputSectionCommand::classof(const BaseCommand *C) {
87   return C->Kind == OutputSectionKind;
88 }
89 
90 bool InputSectionDescription::classof(const BaseCommand *C) {
91   return C->Kind == InputSectionKind;
92 }
93 
94 bool AssertCommand::classof(const BaseCommand *C) {
95   return C->Kind == AssertKind;
96 }
97 
98 bool BytesDataCommand::classof(const BaseCommand *C) {
99   return C->Kind == BytesDataKind;
100 }
101 
102 template <class ELFT> static bool isDiscarded(InputSectionBase<ELFT> *S) {
103   return !S || !S->Live;
104 }
105 
106 template <class ELFT> LinkerScript<ELFT>::LinkerScript() {}
107 template <class ELFT> LinkerScript<ELFT>::~LinkerScript() {}
108 
109 template <class ELFT>
110 bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) {
111   for (Regex *Re : Opt.KeptSections)
112     if (Re->match(S->Name))
113       return true;
114   return false;
115 }
116 
117 static bool comparePriority(InputSectionData *A, InputSectionData *B) {
118   return getPriority(A->Name) < getPriority(B->Name);
119 }
120 
121 static bool compareName(InputSectionData *A, InputSectionData *B) {
122   return A->Name < B->Name;
123 }
124 
125 static bool compareAlignment(InputSectionData *A, InputSectionData *B) {
126   // ">" is not a mistake. Larger alignments are placed before smaller
127   // alignments in order to reduce the amount of padding necessary.
128   // This is compatible with GNU.
129   return A->Alignment > B->Alignment;
130 }
131 
132 static std::function<bool(InputSectionData *, InputSectionData *)>
133 getComparator(SortSectionPolicy K) {
134   switch (K) {
135   case SortSectionPolicy::Alignment:
136     return compareAlignment;
137   case SortSectionPolicy::Name:
138     return compareName;
139   case SortSectionPolicy::Priority:
140     return comparePriority;
141   default:
142     llvm_unreachable("unknown sort policy");
143   }
144 }
145 
146 template <class ELFT>
147 static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections,
148                              ConstraintKind Kind) {
149   if (Kind == ConstraintKind::NoConstraint)
150     return true;
151   bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) {
152     auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2);
153     return Sec->getSectionHdr()->sh_flags & SHF_WRITE;
154   });
155   return (IsRW && Kind == ConstraintKind::ReadWrite) ||
156          (!IsRW && Kind == ConstraintKind::ReadOnly);
157 }
158 
159 static void sortSections(InputSectionData **Begin, InputSectionData **End,
160                          SortSectionPolicy K) {
161   if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None)
162     std::stable_sort(Begin, End, getComparator(K));
163 }
164 
165 // Compute and remember which sections the InputSectionDescription matches.
166 template <class ELFT>
167 void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) {
168   // Collects all sections that satisfy constraints of I
169   // and attach them to I.
170   for (SectionPattern &Pat : I->SectionPatterns) {
171     size_t SizeBefore = I->Sections.size();
172     for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) {
173       StringRef Filename = sys::path::filename(F->getName());
174       if (!I->FileRe.match(Filename) || Pat.ExcludedFileRe.match(Filename))
175         continue;
176 
177       for (InputSectionBase<ELFT> *S : F->getSections())
178         if (!isDiscarded(S) && !S->OutSec && Pat.SectionRe.match(S->Name))
179           I->Sections.push_back(S);
180       if (Pat.SectionRe.match("COMMON"))
181         I->Sections.push_back(CommonInputSection<ELFT>::X);
182     }
183 
184     // Sort sections as instructed by SORT-family commands and --sort-section
185     // option. Because SORT-family commands can be nested at most two depth
186     // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command
187     // line option is respected even if a SORT command is given, the exact
188     // behavior we have here is a bit complicated. Here are the rules.
189     //
190     // 1. If two SORT commands are given, --sort-section is ignored.
191     // 2. If one SORT command is given, and if it is not SORT_NONE,
192     //    --sort-section is handled as an inner SORT command.
193     // 3. If one SORT command is given, and if it is SORT_NONE, don't sort.
194     // 4. If no SORT command is given, sort according to --sort-section.
195     InputSectionData **Begin = I->Sections.data() + SizeBefore;
196     InputSectionData **End = I->Sections.data() + I->Sections.size();
197     if (Pat.SortOuter != SortSectionPolicy::None) {
198       if (Pat.SortInner == SortSectionPolicy::Default)
199         sortSections(Begin, End, Config->SortSection);
200       else
201         sortSections(Begin, End, Pat.SortInner);
202       sortSections(Begin, End, Pat.SortOuter);
203     }
204   }
205 
206   // We do not add duplicate input sections, so mark them with a dummy output
207   // section for now.
208   for (InputSectionData *S : I->Sections) {
209     auto *S2 = static_cast<InputSectionBase<ELFT> *>(S);
210     S2->OutSec = (OutputSectionBase<ELFT> *)-1;
211   }
212 }
213 
214 template <class ELFT>
215 void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) {
216   for (InputSectionBase<ELFT> *S : V) {
217     S->Live = false;
218     reportDiscarded(S);
219   }
220 }
221 
222 template <class ELFT>
223 std::vector<InputSectionBase<ELFT> *>
224 LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) {
225   std::vector<InputSectionBase<ELFT> *> Ret;
226 
227   for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) {
228     auto *Cmd = dyn_cast<InputSectionDescription>(Base.get());
229     if (!Cmd)
230       continue;
231     computeInputSections(Cmd);
232     for (InputSectionData *S : Cmd->Sections)
233       Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S));
234   }
235 
236   return Ret;
237 }
238 
239 template <class ELFT>
240 static SectionKey<ELFT::Is64Bits> createKey(InputSectionBase<ELFT> *C,
241                                             StringRef OutsecName) {
242   // When using linker script the merge rules are different.
243   // Unfortunately, linker scripts are name based. This means that expressions
244   // like *(.foo*) can refer to multiple input sections that would normally be
245   // placed in different output sections. We cannot put them in different
246   // output sections or we would produce wrong results for
247   // start = .; *(.foo.*) end = .; *(.bar)
248   // and a mapping of .foo1 and .bar1 to one section and .foo2 and .bar2 to
249   // another. The problem is that there is no way to layout those output
250   // sections such that the .foo sections are the only thing between the
251   // start and end symbols.
252 
253   // An extra annoyance is that we cannot simply disable merging of the contents
254   // of SHF_MERGE sections, but our implementation requires one output section
255   // per "kind" (string or not, which size/aligment).
256   // Fortunately, creating symbols in the middle of a merge section is not
257   // supported by bfd or gold, so we can just create multiple section in that
258   // case.
259   const typename ELFT::Shdr *H = C->getSectionHdr();
260   typedef typename ELFT::uint uintX_t;
261   uintX_t Flags = H->sh_flags & (SHF_MERGE | SHF_STRINGS);
262 
263   uintX_t Alignment = 0;
264   if (isa<MergeInputSection<ELFT>>(C))
265     Alignment = std::max(H->sh_addralign, H->sh_entsize);
266 
267   return SectionKey<ELFT::Is64Bits>{OutsecName, /*Type*/ 0, Flags, Alignment};
268 }
269 
270 template <class ELFT>
271 void LinkerScript<ELFT>::addSection(OutputSectionFactory<ELFT> &Factory,
272                                     InputSectionBase<ELFT> *Sec,
273                                     StringRef Name) {
274   OutputSectionBase<ELFT> *OutSec;
275   bool IsNew;
276   std::tie(OutSec, IsNew) = Factory.create(createKey(Sec, Name), Sec);
277   if (IsNew)
278     OutputSections->push_back(OutSec);
279   OutSec->addSection(Sec);
280 }
281 
282 template <class ELFT>
283 void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) {
284 
285   for (unsigned I = 0; I < Opt.Commands.size(); ++I) {
286     auto Iter = Opt.Commands.begin() + I;
287     const std::unique_ptr<BaseCommand> &Base1 = *Iter;
288     if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) {
289       if (shouldDefine<ELFT>(Cmd))
290         addRegular<ELFT>(Cmd);
291       continue;
292     }
293     if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) {
294       // If we don't have SECTIONS then output sections have already been
295       // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses
296       // will not be called, so ASSERT should be evaluated now.
297       if (!Opt.HasSections)
298         Cmd->Expression(0);
299       continue;
300     }
301 
302     if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) {
303       std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd);
304 
305       if (Cmd->Name == "/DISCARD/") {
306         discard(V);
307         continue;
308       }
309 
310       if (!matchConstraints<ELFT>(V, Cmd->Constraint)) {
311         for (InputSectionBase<ELFT> *S : V)
312           S->OutSec = nullptr;
313         Opt.Commands.erase(Iter);
314         --I;
315         continue;
316       }
317 
318       for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands)
319         if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get()))
320           if (shouldDefine<ELFT>(OutCmd))
321             addSymbol<ELFT>(OutCmd);
322 
323       if (V.empty())
324         continue;
325 
326       for (InputSectionBase<ELFT> *Sec : V) {
327         addSection(Factory, Sec, Cmd->Name);
328         if (uint32_t Subalign = Cmd->SubalignExpr ? Cmd->SubalignExpr(0) : 0)
329           Sec->Alignment = Subalign;
330       }
331     }
332   }
333 }
334 
335 template <class ELFT>
336 void LinkerScript<ELFT>::createSections(OutputSectionFactory<ELFT> &Factory) {
337   processCommands(Factory);
338   // Add orphan sections.
339   for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles())
340     for (InputSectionBase<ELFT> *S : F->getSections())
341       if (!isDiscarded(S) && !S->OutSec)
342         addSection(Factory, S, getOutputSectionName(S));
343 }
344 
345 // Sets value of a section-defined symbol. Two kinds of
346 // symbols are processed: synthetic symbols, whose value
347 // is an offset from beginning of section and regular
348 // symbols whose value is absolute.
349 template <class ELFT>
350 static void assignSectionSymbol(SymbolAssignment *Cmd,
351                                 OutputSectionBase<ELFT> *Sec,
352                                 typename ELFT::uint Off) {
353   if (!Cmd->Sym)
354     return;
355 
356   if (auto *Body = dyn_cast<DefinedSynthetic<ELFT>>(Cmd->Sym)) {
357     Body->Section = Sec;
358     Body->Value = Cmd->Expression(Sec->getVA() + Off) - Sec->getVA();
359     return;
360   }
361   auto *Body = cast<DefinedRegular<ELFT>>(Cmd->Sym);
362   Body->Value = Cmd->Expression(Sec->getVA() + Off);
363 }
364 
365 template <class ELFT> static bool isTbss(OutputSectionBase<ELFT> *Sec) {
366   return (Sec->getFlags() & SHF_TLS) && Sec->getType() == SHT_NOBITS;
367 }
368 
369 template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) {
370   if (!AlreadyOutputIS.insert(S).second)
371     return;
372   bool IsTbss = isTbss(CurOutSec);
373 
374   uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot;
375   Pos = alignTo(Pos, S->Alignment);
376   S->OutSecOff = Pos - CurOutSec->getVA();
377   Pos += S->getSize();
378 
379   // Update output section size after adding each section. This is so that
380   // SIZEOF works correctly in the case below:
381   // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) }
382   CurOutSec->setSize(Pos - CurOutSec->getVA());
383 
384   if (IsTbss)
385     ThreadBssOffset = Pos - Dot;
386   else
387     Dot = Pos;
388 }
389 
390 template <class ELFT> void LinkerScript<ELFT>::flush() {
391   if (!CurOutSec || !AlreadyOutputOS.insert(CurOutSec).second)
392     return;
393   if (auto *OutSec = dyn_cast<OutputSection<ELFT>>(CurOutSec)) {
394     for (InputSection<ELFT> *I : OutSec->Sections)
395       output(I);
396   } else {
397     Dot += CurOutSec->getSize();
398   }
399 }
400 
401 template <class ELFT>
402 void LinkerScript<ELFT>::switchTo(OutputSectionBase<ELFT> *Sec) {
403   if (CurOutSec == Sec)
404     return;
405   if (AlreadyOutputOS.count(Sec))
406     return;
407 
408   flush();
409   CurOutSec = Sec;
410 
411   Dot = alignTo(Dot, CurOutSec->getAlignment());
412   CurOutSec->setVA(isTbss(CurOutSec) ? Dot + ThreadBssOffset : Dot);
413 }
414 
415 template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) {
416   // This handles the assignments to symbol or to a location counter (.)
417   if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) {
418     if (AssignCmd->Name == ".") {
419       // Update to location counter means update to section size.
420       Dot = AssignCmd->Expression(Dot);
421       CurOutSec->setSize(Dot - CurOutSec->getVA());
422       return;
423     }
424     assignSectionSymbol<ELFT>(AssignCmd, CurOutSec, Dot - CurOutSec->getVA());
425     return;
426   }
427 
428   // Handle BYTE(), SHORT(), LONG(), or QUAD().
429   if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) {
430     DataCmd->Offset = Dot - CurOutSec->getVA();
431     Dot += DataCmd->Size;
432     CurOutSec->setSize(Dot - CurOutSec->getVA());
433     return;
434   }
435 
436   // It handles single input section description command,
437   // calculates and assigns the offsets for each section and also
438   // updates the output section size.
439   auto &ICmd = cast<InputSectionDescription>(Base);
440   for (InputSectionData *ID : ICmd.Sections) {
441     auto *IB = static_cast<InputSectionBase<ELFT> *>(ID);
442     switchTo(IB->OutSec);
443     if (auto *I = dyn_cast<InputSection<ELFT>>(IB))
444       output(I);
445     else
446       flush();
447   }
448 }
449 
450 template <class ELFT>
451 static std::vector<OutputSectionBase<ELFT> *>
452 findSections(OutputSectionCommand &Cmd,
453              const std::vector<OutputSectionBase<ELFT> *> &Sections) {
454   std::vector<OutputSectionBase<ELFT> *> Ret;
455   for (OutputSectionBase<ELFT> *Sec : Sections)
456     if (Sec->getName() == Cmd.Name)
457       Ret.push_back(Sec);
458   return Ret;
459 }
460 
461 template <class ELFT>
462 void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) {
463   std::vector<OutputSectionBase<ELFT> *> Sections =
464       findSections(*Cmd, *OutputSections);
465   if (Sections.empty())
466     return;
467   switchTo(Sections[0]);
468 
469   // Find the last section output location. We will output orphan sections
470   // there so that end symbols point to the correct location.
471   auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(),
472                         [](const std::unique_ptr<BaseCommand> &Cmd) {
473                           return !isa<SymbolAssignment>(*Cmd);
474                         })
475                .base();
476   for (auto I = Cmd->Commands.begin(); I != E; ++I)
477     process(**I);
478   for (OutputSectionBase<ELFT> *Base : Sections)
479     switchTo(Base);
480   flush();
481   std::for_each(E, Cmd->Commands.end(),
482                 [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); });
483 }
484 
485 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() {
486   // It is common practice to use very generic linker scripts. So for any
487   // given run some of the output sections in the script will be empty.
488   // We could create corresponding empty output sections, but that would
489   // clutter the output.
490   // We instead remove trivially empty sections. The bfd linker seems even
491   // more aggressive at removing them.
492   auto Pos = std::remove_if(
493       Opt.Commands.begin(), Opt.Commands.end(),
494       [&](const std::unique_ptr<BaseCommand> &Base) {
495         auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get());
496         if (!Cmd)
497           return false;
498         std::vector<OutputSectionBase<ELFT> *> Secs =
499             findSections(*Cmd, *OutputSections);
500         if (!Secs.empty())
501           return false;
502         for (const std::unique_ptr<BaseCommand> &I : Cmd->Commands)
503           if (!isa<InputSectionDescription>(I.get()))
504             return false;
505         return true;
506       });
507   Opt.Commands.erase(Pos, Opt.Commands.end());
508 
509   // If the output section contains only symbol assignments, create a
510   // corresponding output section. The bfd linker seems to only create them if
511   // '.' is assigned to, but creating these section should not have any bad
512   // consequeces and gives us a section to put the symbol in.
513   uintX_t Flags = SHF_ALLOC;
514   uint32_t Type = 0;
515   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) {
516     auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get());
517     if (!Cmd)
518       continue;
519     std::vector<OutputSectionBase<ELFT> *> Secs =
520         findSections(*Cmd, *OutputSections);
521     if (!Secs.empty()) {
522       Flags = Secs[0]->getFlags();
523       Type = Secs[0]->getType();
524       continue;
525     }
526 
527     auto *OutSec = new OutputSection<ELFT>(Cmd->Name, Type, Flags);
528     Out<ELFT>::Pool.emplace_back(OutSec);
529     OutputSections->push_back(OutSec);
530   }
531 }
532 
533 // When placing orphan sections, we want to place them after symbol assignments
534 // so that an orphan after
535 //   begin_foo = .;
536 //   foo : { *(foo) }
537 //   end_foo = .;
538 // doesn't break the intended meaning of the begin/end symbols.
539 // We don't want to go over sections since Writer<ELFT>::sortSections is the
540 // one in charge of deciding the order of the sections.
541 // We don't want to go over alignments, since doing so in
542 //  rx_sec : { *(rx_sec) }
543 //  . = ALIGN(0x1000);
544 //  /* The RW PT_LOAD starts here*/
545 //  rw_sec : { *(rw_sec) }
546 // would mean that the RW PT_LOAD would become unaligned.
547 static bool shouldSkip(const BaseCommand &Cmd) {
548   if (isa<OutputSectionCommand>(Cmd))
549     return false;
550   const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd);
551   if (!Assign)
552     return true;
553   return Assign->Name != ".";
554 }
555 
556 template <class ELFT>
557 void LinkerScript<ELFT>::assignAddresses(std::vector<PhdrEntry<ELFT>> &Phdrs) {
558   // Orphan sections are sections present in the input files which
559   // are not explicitly placed into the output file by the linker script.
560   // We place orphan sections at end of file.
561   // Other linkers places them using some heuristics as described in
562   // https://sourceware.org/binutils/docs/ld/Orphan-Sections.html#Orphan-Sections.
563 
564   // The OutputSections are already in the correct order.
565   // This loops creates or moves commands as needed so that they are in the
566   // correct order.
567   int CmdIndex = 0;
568   for (OutputSectionBase<ELFT> *Sec : *OutputSections) {
569     StringRef Name = Sec->getName();
570 
571     // Find the last spot where we can insert a command and still get the
572     // correct result.
573     auto CmdIter = Opt.Commands.begin() + CmdIndex;
574     auto E = Opt.Commands.end();
575     while (CmdIter != E && shouldSkip(**CmdIter)) {
576       ++CmdIter;
577       ++CmdIndex;
578     }
579 
580     auto Pos =
581         std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) {
582           auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get());
583           return Cmd && Cmd->Name == Name;
584         });
585     if (Pos == E) {
586       Opt.Commands.insert(CmdIter,
587                           llvm::make_unique<OutputSectionCommand>(Name));
588       ++CmdIndex;
589       continue;
590     }
591 
592     // Continue from where we found it.
593     CmdIndex = (Pos - Opt.Commands.begin()) + 1;
594     continue;
595   }
596 
597   // Assign addresses as instructed by linker script SECTIONS sub-commands.
598   Dot = 0;
599 
600   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) {
601     if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) {
602       if (Cmd->Name == ".") {
603         Dot = Cmd->Expression(Dot);
604       } else if (Cmd->Sym) {
605         cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(Dot);
606       }
607       continue;
608     }
609 
610     if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) {
611       Cmd->Expression(Dot);
612       continue;
613     }
614 
615     auto *Cmd = cast<OutputSectionCommand>(Base.get());
616 
617     if (Cmd->AddrExpr)
618       Dot = Cmd->AddrExpr(Dot);
619 
620     assignOffsets(Cmd);
621   }
622 
623   uintX_t MinVA = std::numeric_limits<uintX_t>::max();
624   for (OutputSectionBase<ELFT> *Sec : *OutputSections) {
625     if (Sec->getFlags() & SHF_ALLOC)
626       MinVA = std::min(MinVA, Sec->getVA());
627     else
628       Sec->setVA(0);
629   }
630 
631   uintX_t HeaderSize = getHeaderSize();
632   auto FirstPTLoad =
633       std::find_if(Phdrs.begin(), Phdrs.end(), [](const PhdrEntry<ELFT> &E) {
634         return E.H.p_type == PT_LOAD;
635       });
636   if (HeaderSize <= MinVA && FirstPTLoad != Phdrs.end()) {
637     // ELF and Program headers need to be right before the first section in
638     // memory. Set their addresses accordingly.
639     MinVA = alignDown(MinVA - HeaderSize, Target->PageSize);
640     Out<ELFT>::ElfHeader->setVA(MinVA);
641     Out<ELFT>::ProgramHeaders->setVA(Out<ELFT>::ElfHeader->getSize() + MinVA);
642     FirstPTLoad->First = Out<ELFT>::ElfHeader;
643     if (!FirstPTLoad->Last)
644       FirstPTLoad->Last = Out<ELFT>::ProgramHeaders;
645   }
646 }
647 
648 // Creates program headers as instructed by PHDRS linker script command.
649 template <class ELFT>
650 std::vector<PhdrEntry<ELFT>> LinkerScript<ELFT>::createPhdrs() {
651   std::vector<PhdrEntry<ELFT>> Ret;
652 
653   // Process PHDRS and FILEHDR keywords because they are not
654   // real output sections and cannot be added in the following loop.
655   for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) {
656     Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags);
657     PhdrEntry<ELFT> &Phdr = Ret.back();
658 
659     if (Cmd.HasFilehdr)
660       Phdr.add(Out<ELFT>::ElfHeader);
661     if (Cmd.HasPhdrs)
662       Phdr.add(Out<ELFT>::ProgramHeaders);
663 
664     if (Cmd.LMAExpr) {
665       Phdr.H.p_paddr = Cmd.LMAExpr(0);
666       Phdr.HasLMA = true;
667     }
668   }
669 
670   // Add output sections to program headers.
671   PhdrEntry<ELFT> *Load = nullptr;
672   uintX_t Flags = PF_R;
673   for (OutputSectionBase<ELFT> *Sec : *OutputSections) {
674     if (!(Sec->getFlags() & SHF_ALLOC))
675       break;
676 
677     std::vector<size_t> PhdrIds = getPhdrIndices(Sec->getName());
678     if (!PhdrIds.empty()) {
679       // Assign headers specified by linker script
680       for (size_t Id : PhdrIds) {
681         Ret[Id].add(Sec);
682         if (Opt.PhdrsCommands[Id].Flags == UINT_MAX)
683           Ret[Id].H.p_flags |= Sec->getPhdrFlags();
684       }
685     } else {
686       // If we have no load segment or flags've changed then we want new load
687       // segment.
688       uintX_t NewFlags = Sec->getPhdrFlags();
689       if (Load == nullptr || Flags != NewFlags) {
690         Load = &*Ret.emplace(Ret.end(), PT_LOAD, NewFlags);
691         Flags = NewFlags;
692       }
693       Load->add(Sec);
694     }
695   }
696   return Ret;
697 }
698 
699 template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() {
700   // Ignore .interp section in case we have PHDRS specification
701   // and PT_INTERP isn't listed.
702   return !Opt.PhdrsCommands.empty() &&
703          llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) {
704            return Cmd.Type == PT_INTERP;
705          }) == Opt.PhdrsCommands.end();
706 }
707 
708 template <class ELFT>
709 ArrayRef<uint8_t> LinkerScript<ELFT>::getFiller(StringRef Name) {
710   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands)
711     if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()))
712       if (Cmd->Name == Name)
713         return Cmd->Filler;
714   return {};
715 }
716 
717 template <class ELFT>
718 static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) {
719   const endianness E = ELFT::TargetEndianness;
720 
721   switch (Size) {
722   case 1:
723     *Buf = (uint8_t)Data;
724     break;
725   case 2:
726     write16<E>(Buf, Data);
727     break;
728   case 4:
729     write32<E>(Buf, Data);
730     break;
731   case 8:
732     write64<E>(Buf, Data);
733     break;
734   default:
735     llvm_unreachable("unsupported Size argument");
736   }
737 }
738 
739 template <class ELFT>
740 void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) {
741   int I = getSectionIndex(Name);
742   if (I == INT_MAX)
743     return;
744 
745   OutputSectionCommand *Cmd =
746       dyn_cast<OutputSectionCommand>(Opt.Commands[I].get());
747   for (const std::unique_ptr<BaseCommand> &Base2 : Cmd->Commands)
748     if (auto *DataCmd = dyn_cast<BytesDataCommand>(Base2.get()))
749       writeInt<ELFT>(&Buf[DataCmd->Offset], DataCmd->Data, DataCmd->Size);
750 }
751 
752 template <class ELFT> Expr LinkerScript<ELFT>::getLma(StringRef Name) {
753   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands)
754     if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()))
755       if (Cmd->LmaExpr && Cmd->Name == Name)
756         return Cmd->LmaExpr;
757   return {};
758 }
759 
760 // Returns the index of the given section name in linker script
761 // SECTIONS commands. Sections are laid out as the same order as they
762 // were in the script. If a given name did not appear in the script,
763 // it returns INT_MAX, so that it will be laid out at end of file.
764 template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) {
765   int I = 0;
766   for (std::unique_ptr<BaseCommand> &Base : Opt.Commands) {
767     if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()))
768       if (Cmd->Name == Name)
769         return I;
770     ++I;
771   }
772   return INT_MAX;
773 }
774 
775 template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() {
776   return !Opt.PhdrsCommands.empty();
777 }
778 
779 template <class ELFT>
780 uint64_t LinkerScript<ELFT>::getOutputSectionAddress(StringRef Name) {
781   for (OutputSectionBase<ELFT> *Sec : *OutputSections)
782     if (Sec->getName() == Name)
783       return Sec->getVA();
784   error("undefined section " + Name);
785   return 0;
786 }
787 
788 template <class ELFT>
789 uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) {
790   for (OutputSectionBase<ELFT> *Sec : *OutputSections)
791     if (Sec->getName() == Name)
792       return Sec->getSize();
793   error("undefined section " + Name);
794   return 0;
795 }
796 
797 template <class ELFT>
798 uint64_t LinkerScript<ELFT>::getOutputSectionAlign(StringRef Name) {
799   for (OutputSectionBase<ELFT> *Sec : *OutputSections)
800     if (Sec->getName() == Name)
801       return Sec->getAlignment();
802   error("undefined section " + Name);
803   return 0;
804 }
805 
806 template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() {
807   return elf::getHeaderSize<ELFT>();
808 }
809 
810 template <class ELFT> uint64_t LinkerScript<ELFT>::getSymbolValue(StringRef S) {
811   if (SymbolBody *B = Symtab<ELFT>::X->find(S))
812     return B->getVA<ELFT>();
813   error("symbol not found: " + S);
814   return 0;
815 }
816 
817 template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) {
818   return Symtab<ELFT>::X->find(S) != nullptr;
819 }
820 
821 // Returns indices of ELF headers containing specific section, identified
822 // by Name. Each index is a zero based number of ELF header listed within
823 // PHDRS {} script block.
824 template <class ELFT>
825 std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) {
826   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) {
827     auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get());
828     if (!Cmd || Cmd->Name != SectionName)
829       continue;
830 
831     std::vector<size_t> Ret;
832     for (StringRef PhdrName : Cmd->Phdrs)
833       Ret.push_back(getPhdrIndex(PhdrName));
834     return Ret;
835   }
836   return {};
837 }
838 
839 template <class ELFT>
840 size_t LinkerScript<ELFT>::getPhdrIndex(StringRef PhdrName) {
841   size_t I = 0;
842   for (PhdrsCommand &Cmd : Opt.PhdrsCommands) {
843     if (Cmd.Name == PhdrName)
844       return I;
845     ++I;
846   }
847   error("section header '" + PhdrName + "' is not listed in PHDRS");
848   return 0;
849 }
850 
851 class elf::ScriptParser : public ScriptParserBase {
852   typedef void (ScriptParser::*Handler)();
853 
854 public:
855   ScriptParser(StringRef S, bool B) : ScriptParserBase(S), IsUnderSysroot(B) {}
856 
857   void readLinkerScript();
858   void readVersionScript();
859 
860 private:
861   void addFile(StringRef Path);
862 
863   void readAsNeeded();
864   void readEntry();
865   void readExtern();
866   void readGroup();
867   void readInclude();
868   void readOutput();
869   void readOutputArch();
870   void readOutputFormat();
871   void readPhdrs();
872   void readSearchDir();
873   void readSections();
874   void readVersion();
875   void readVersionScriptCommand();
876 
877   SymbolAssignment *readAssignment(StringRef Name);
878   BytesDataCommand *readBytesDataCommand(StringRef Tok);
879   std::vector<uint8_t> readFill();
880   OutputSectionCommand *readOutputSectionDescription(StringRef OutSec);
881   std::vector<uint8_t> readOutputSectionFiller(StringRef Tok);
882   std::vector<StringRef> readOutputSectionPhdrs();
883   InputSectionDescription *readInputSectionDescription(StringRef Tok);
884   Regex readFilePatterns();
885   std::vector<SectionPattern> readInputSectionsList();
886   InputSectionDescription *readInputSectionRules(StringRef FilePattern);
887   unsigned readPhdrType();
888   SortSectionPolicy readSortKind();
889   SymbolAssignment *readProvideHidden(bool Provide, bool Hidden);
890   SymbolAssignment *readProvideOrAssignment(StringRef Tok, bool MakeAbsolute);
891   void readSort();
892   Expr readAssert();
893 
894   Expr readExpr();
895   Expr readExpr1(Expr Lhs, int MinPrec);
896   Expr readPrimary();
897   Expr readTernary(Expr Cond);
898   Expr readParenExpr();
899 
900   // For parsing version script.
901   void readExtern(std::vector<SymbolVersion> *Globals);
902   void readVersionDeclaration(StringRef VerStr);
903   void readGlobal(StringRef VerStr);
904   void readLocal();
905 
906   ScriptConfiguration &Opt = *ScriptConfig;
907   StringSaver Saver = {ScriptConfig->Alloc};
908   bool IsUnderSysroot;
909 };
910 
911 void ScriptParser::readVersionScript() {
912   readVersionScriptCommand();
913   if (!atEOF())
914     setError("EOF expected, but got " + next());
915 }
916 
917 void ScriptParser::readVersionScriptCommand() {
918   if (skip("{")) {
919     readVersionDeclaration("");
920     return;
921   }
922 
923   while (!atEOF() && !Error && peek() != "}") {
924     StringRef VerStr = next();
925     if (VerStr == "{") {
926       setError("anonymous version definition is used in "
927                "combination with other version definitions");
928       return;
929     }
930     expect("{");
931     readVersionDeclaration(VerStr);
932   }
933 }
934 
935 void ScriptParser::readVersion() {
936   expect("{");
937   readVersionScriptCommand();
938   expect("}");
939 }
940 
941 void ScriptParser::readLinkerScript() {
942   while (!atEOF()) {
943     StringRef Tok = next();
944     if (Tok == ";")
945       continue;
946 
947     if (Tok == "ASSERT") {
948       Opt.Commands.emplace_back(new AssertCommand(readAssert()));
949     } else if (Tok == "ENTRY") {
950       readEntry();
951     } else if (Tok == "EXTERN") {
952       readExtern();
953     } else if (Tok == "GROUP" || Tok == "INPUT") {
954       readGroup();
955     } else if (Tok == "INCLUDE") {
956       readInclude();
957     } else if (Tok == "OUTPUT") {
958       readOutput();
959     } else if (Tok == "OUTPUT_ARCH") {
960       readOutputArch();
961     } else if (Tok == "OUTPUT_FORMAT") {
962       readOutputFormat();
963     } else if (Tok == "PHDRS") {
964       readPhdrs();
965     } else if (Tok == "SEARCH_DIR") {
966       readSearchDir();
967     } else if (Tok == "SECTIONS") {
968       readSections();
969     } else if (Tok == "VERSION") {
970       readVersion();
971     } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok, true)) {
972       Opt.Commands.emplace_back(Cmd);
973     } else {
974       setError("unknown directive: " + Tok);
975     }
976   }
977 }
978 
979 void ScriptParser::addFile(StringRef S) {
980   if (IsUnderSysroot && S.startswith("/")) {
981     SmallString<128> Path;
982     (Config->Sysroot + S).toStringRef(Path);
983     if (sys::fs::exists(Path)) {
984       Driver->addFile(Saver.save(Path.str()));
985       return;
986     }
987   }
988 
989   if (sys::path::is_absolute(S)) {
990     Driver->addFile(S);
991   } else if (S.startswith("=")) {
992     if (Config->Sysroot.empty())
993       Driver->addFile(S.substr(1));
994     else
995       Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)));
996   } else if (S.startswith("-l")) {
997     Driver->addLibrary(S.substr(2));
998   } else if (sys::fs::exists(S)) {
999     Driver->addFile(S);
1000   } else {
1001     std::string Path = findFromSearchPaths(S);
1002     if (Path.empty())
1003       setError("unable to find " + S);
1004     else
1005       Driver->addFile(Saver.save(Path));
1006   }
1007 }
1008 
1009 void ScriptParser::readAsNeeded() {
1010   expect("(");
1011   bool Orig = Config->AsNeeded;
1012   Config->AsNeeded = true;
1013   while (!Error && !skip(")"))
1014     addFile(unquote(next()));
1015   Config->AsNeeded = Orig;
1016 }
1017 
1018 void ScriptParser::readEntry() {
1019   // -e <symbol> takes predecence over ENTRY(<symbol>).
1020   expect("(");
1021   StringRef Tok = next();
1022   if (Config->Entry.empty())
1023     Config->Entry = Tok;
1024   expect(")");
1025 }
1026 
1027 void ScriptParser::readExtern() {
1028   expect("(");
1029   while (!Error && !skip(")"))
1030     Config->Undefined.push_back(next());
1031 }
1032 
1033 void ScriptParser::readGroup() {
1034   expect("(");
1035   while (!Error && !skip(")")) {
1036     StringRef Tok = next();
1037     if (Tok == "AS_NEEDED")
1038       readAsNeeded();
1039     else
1040       addFile(unquote(Tok));
1041   }
1042 }
1043 
1044 void ScriptParser::readInclude() {
1045   StringRef Tok = next();
1046   auto MBOrErr = MemoryBuffer::getFile(unquote(Tok));
1047   if (!MBOrErr) {
1048     setError("cannot open " + Tok);
1049     return;
1050   }
1051   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
1052   StringRef S = Saver.save(MB->getMemBufferRef().getBuffer());
1053   std::vector<StringRef> V = tokenize(S);
1054   Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end());
1055 }
1056 
1057 void ScriptParser::readOutput() {
1058   // -o <file> takes predecence over OUTPUT(<file>).
1059   expect("(");
1060   StringRef Tok = next();
1061   if (Config->OutputFile.empty())
1062     Config->OutputFile = unquote(Tok);
1063   expect(")");
1064 }
1065 
1066 void ScriptParser::readOutputArch() {
1067   // Error checking only for now.
1068   expect("(");
1069   next();
1070   expect(")");
1071 }
1072 
1073 void ScriptParser::readOutputFormat() {
1074   // Error checking only for now.
1075   expect("(");
1076   next();
1077   StringRef Tok = next();
1078   if (Tok == ")")
1079     return;
1080   if (Tok != ",") {
1081     setError("unexpected token: " + Tok);
1082     return;
1083   }
1084   next();
1085   expect(",");
1086   next();
1087   expect(")");
1088 }
1089 
1090 void ScriptParser::readPhdrs() {
1091   expect("{");
1092   while (!Error && !skip("}")) {
1093     StringRef Tok = next();
1094     Opt.PhdrsCommands.push_back(
1095         {Tok, PT_NULL, false, false, UINT_MAX, nullptr});
1096     PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back();
1097 
1098     PhdrCmd.Type = readPhdrType();
1099     do {
1100       Tok = next();
1101       if (Tok == ";")
1102         break;
1103       if (Tok == "FILEHDR")
1104         PhdrCmd.HasFilehdr = true;
1105       else if (Tok == "PHDRS")
1106         PhdrCmd.HasPhdrs = true;
1107       else if (Tok == "AT")
1108         PhdrCmd.LMAExpr = readParenExpr();
1109       else if (Tok == "FLAGS") {
1110         expect("(");
1111         // Passing 0 for the value of dot is a bit of a hack. It means that
1112         // we accept expressions like ".|1".
1113         PhdrCmd.Flags = readExpr()(0);
1114         expect(")");
1115       } else
1116         setError("unexpected header attribute: " + Tok);
1117     } while (!Error);
1118   }
1119 }
1120 
1121 void ScriptParser::readSearchDir() {
1122   expect("(");
1123   StringRef Tok = next();
1124   if (!Config->Nostdlib)
1125     Config->SearchPaths.push_back(unquote(Tok));
1126   expect(")");
1127 }
1128 
1129 void ScriptParser::readSections() {
1130   Opt.HasSections = true;
1131   expect("{");
1132   while (!Error && !skip("}")) {
1133     StringRef Tok = next();
1134     BaseCommand *Cmd = readProvideOrAssignment(Tok, true);
1135     if (!Cmd) {
1136       if (Tok == "ASSERT")
1137         Cmd = new AssertCommand(readAssert());
1138       else
1139         Cmd = readOutputSectionDescription(Tok);
1140     }
1141     Opt.Commands.emplace_back(Cmd);
1142   }
1143 }
1144 
1145 static int precedence(StringRef Op) {
1146   return StringSwitch<int>(Op)
1147       .Cases("*", "/", 5)
1148       .Cases("+", "-", 4)
1149       .Cases("<<", ">>", 3)
1150       .Cases("<", "<=", ">", ">=", "==", "!=", 2)
1151       .Cases("&", "|", 1)
1152       .Default(-1);
1153 }
1154 
1155 Regex ScriptParser::readFilePatterns() {
1156   std::vector<StringRef> V;
1157   while (!Error && !skip(")"))
1158     V.push_back(next());
1159   return compileGlobPatterns(V);
1160 }
1161 
1162 SortSectionPolicy ScriptParser::readSortKind() {
1163   if (skip("SORT") || skip("SORT_BY_NAME"))
1164     return SortSectionPolicy::Name;
1165   if (skip("SORT_BY_ALIGNMENT"))
1166     return SortSectionPolicy::Alignment;
1167   if (skip("SORT_BY_INIT_PRIORITY"))
1168     return SortSectionPolicy::Priority;
1169   if (skip("SORT_NONE"))
1170     return SortSectionPolicy::None;
1171   return SortSectionPolicy::Default;
1172 }
1173 
1174 // Method reads a list of sequence of excluded files and section globs given in
1175 // a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+
1176 // Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3)
1177 // The semantics of that is next:
1178 // * Include .foo.1 from every file.
1179 // * Include .foo.2 from every file but a.o
1180 // * Include .foo.3 from every file but b.o
1181 std::vector<SectionPattern> ScriptParser::readInputSectionsList() {
1182   std::vector<SectionPattern> Ret;
1183   while (!Error && peek() != ")") {
1184     Regex ExcludeFileRe;
1185     if (skip("EXCLUDE_FILE")) {
1186       expect("(");
1187       ExcludeFileRe = readFilePatterns();
1188     }
1189 
1190     std::vector<StringRef> V;
1191     while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE")
1192       V.push_back(next());
1193 
1194     if (!V.empty())
1195       Ret.push_back({std::move(ExcludeFileRe), compileGlobPatterns(V)});
1196     else
1197       setError("section pattern is expected");
1198   }
1199   return Ret;
1200 }
1201 
1202 // Section pattern grammar can have complex expressions, for example:
1203 // *(SORT(.foo.* EXCLUDE_FILE (*file1.o) .bar.*) .bar.* SORT(.zed.*))
1204 // Generally is a sequence of globs and excludes that may be wrapped in a SORT()
1205 // commands, like: SORT(glob0) glob1 glob2 SORT(glob4)
1206 // This methods handles wrapping sequences of excluded files and section globs
1207 // into SORT() if that needed and reads them all.
1208 InputSectionDescription *
1209 ScriptParser::readInputSectionRules(StringRef FilePattern) {
1210   auto *Cmd = new InputSectionDescription(FilePattern);
1211   expect("(");
1212   while (!HasError && !skip(")")) {
1213     SortSectionPolicy Outer = readSortKind();
1214     SortSectionPolicy Inner = SortSectionPolicy::Default;
1215     std::vector<SectionPattern> V;
1216     if (Outer != SortSectionPolicy::Default) {
1217       expect("(");
1218       Inner = readSortKind();
1219       if (Inner != SortSectionPolicy::Default) {
1220         expect("(");
1221         V = readInputSectionsList();
1222         expect(")");
1223       } else {
1224         V = readInputSectionsList();
1225       }
1226       expect(")");
1227     } else {
1228       V = readInputSectionsList();
1229     }
1230 
1231     for (SectionPattern &Pat : V) {
1232       Pat.SortInner = Inner;
1233       Pat.SortOuter = Outer;
1234     }
1235 
1236     std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns));
1237   }
1238   return Cmd;
1239 }
1240 
1241 InputSectionDescription *
1242 ScriptParser::readInputSectionDescription(StringRef Tok) {
1243   // Input section wildcard can be surrounded by KEEP.
1244   // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep
1245   if (Tok == "KEEP") {
1246     expect("(");
1247     StringRef FilePattern = next();
1248     InputSectionDescription *Cmd = readInputSectionRules(FilePattern);
1249     expect(")");
1250     for (SectionPattern &Pat : Cmd->SectionPatterns)
1251       Opt.KeptSections.push_back(&Pat.SectionRe);
1252     return Cmd;
1253   }
1254   return readInputSectionRules(Tok);
1255 }
1256 
1257 void ScriptParser::readSort() {
1258   expect("(");
1259   expect("CONSTRUCTORS");
1260   expect(")");
1261 }
1262 
1263 Expr ScriptParser::readAssert() {
1264   expect("(");
1265   Expr E = readExpr();
1266   expect(",");
1267   StringRef Msg = unquote(next());
1268   expect(")");
1269   return [=](uint64_t Dot) {
1270     uint64_t V = E(Dot);
1271     if (!V)
1272       error(Msg);
1273     return V;
1274   };
1275 }
1276 
1277 // Reads a FILL(expr) command. We handle the FILL command as an
1278 // alias for =fillexp section attribute, which is different from
1279 // what GNU linkers do.
1280 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html
1281 std::vector<uint8_t> ScriptParser::readFill() {
1282   expect("(");
1283   std::vector<uint8_t> V = readOutputSectionFiller(next());
1284   expect(")");
1285   expect(";");
1286   return V;
1287 }
1288 
1289 OutputSectionCommand *
1290 ScriptParser::readOutputSectionDescription(StringRef OutSec) {
1291   OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec);
1292 
1293   // Read an address expression.
1294   // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address
1295   if (peek() != ":")
1296     Cmd->AddrExpr = readExpr();
1297 
1298   expect(":");
1299 
1300   if (skip("AT"))
1301     Cmd->LmaExpr = readParenExpr();
1302   if (skip("ALIGN"))
1303     Cmd->AlignExpr = readParenExpr();
1304   if (skip("SUBALIGN"))
1305     Cmd->SubalignExpr = readParenExpr();
1306 
1307   // Parse constraints.
1308   if (skip("ONLY_IF_RO"))
1309     Cmd->Constraint = ConstraintKind::ReadOnly;
1310   if (skip("ONLY_IF_RW"))
1311     Cmd->Constraint = ConstraintKind::ReadWrite;
1312   expect("{");
1313 
1314   while (!Error && !skip("}")) {
1315     StringRef Tok = next();
1316     if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok, false))
1317       Cmd->Commands.emplace_back(Assignment);
1318     else if (BytesDataCommand *Data = readBytesDataCommand(Tok))
1319       Cmd->Commands.emplace_back(Data);
1320     else if (Tok == "FILL")
1321       Cmd->Filler = readFill();
1322     else if (Tok == "SORT")
1323       readSort();
1324     else if (peek() == "(")
1325       Cmd->Commands.emplace_back(readInputSectionDescription(Tok));
1326     else
1327       setError("unknown command " + Tok);
1328   }
1329   Cmd->Phdrs = readOutputSectionPhdrs();
1330 
1331   if (skip("="))
1332     Cmd->Filler = readOutputSectionFiller(next());
1333   else if (peek().startswith("="))
1334     Cmd->Filler = readOutputSectionFiller(next().drop_front());
1335 
1336   return Cmd;
1337 }
1338 
1339 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number.
1340 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html
1341 //
1342 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles
1343 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them
1344 // as 32-bit big-endian values. We will do the same as ld.gold does
1345 // because it's simpler than what ld.bfd does.
1346 std::vector<uint8_t> ScriptParser::readOutputSectionFiller(StringRef Tok) {
1347   uint32_t V;
1348   if (Tok.getAsInteger(0, V)) {
1349     setError("invalid filler expression: " + Tok);
1350     return {};
1351   }
1352   return {uint8_t(V >> 24), uint8_t(V >> 16), uint8_t(V >> 8), uint8_t(V)};
1353 }
1354 
1355 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) {
1356   expect("(");
1357   SymbolAssignment *Cmd = readAssignment(next());
1358   Cmd->Provide = Provide;
1359   Cmd->Hidden = Hidden;
1360   expect(")");
1361   expect(";");
1362   return Cmd;
1363 }
1364 
1365 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok,
1366                                                         bool MakeAbsolute) {
1367   SymbolAssignment *Cmd = nullptr;
1368   if (peek() == "=" || peek() == "+=") {
1369     Cmd = readAssignment(Tok);
1370     expect(";");
1371   } else if (Tok == "PROVIDE") {
1372     Cmd = readProvideHidden(true, false);
1373   } else if (Tok == "HIDDEN") {
1374     Cmd = readProvideHidden(false, true);
1375   } else if (Tok == "PROVIDE_HIDDEN") {
1376     Cmd = readProvideHidden(true, true);
1377   }
1378   if (Cmd && MakeAbsolute)
1379     Cmd->IsAbsolute = true;
1380   return Cmd;
1381 }
1382 
1383 static uint64_t getSymbolValue(StringRef S, uint64_t Dot) {
1384   if (S == ".")
1385     return Dot;
1386   return ScriptBase->getSymbolValue(S);
1387 }
1388 
1389 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) {
1390   StringRef Op = next();
1391   bool IsAbsolute = false;
1392   Expr E;
1393   assert(Op == "=" || Op == "+=");
1394   if (skip("ABSOLUTE")) {
1395     E = readParenExpr();
1396     IsAbsolute = true;
1397   } else {
1398     E = readExpr();
1399   }
1400   if (Op == "+=")
1401     E = [=](uint64_t Dot) { return getSymbolValue(Name, Dot) + E(Dot); };
1402   return new SymbolAssignment(Name, E, IsAbsolute);
1403 }
1404 
1405 // This is an operator-precedence parser to parse a linker
1406 // script expression.
1407 Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); }
1408 
1409 static Expr combine(StringRef Op, Expr L, Expr R) {
1410   if (Op == "*")
1411     return [=](uint64_t Dot) { return L(Dot) * R(Dot); };
1412   if (Op == "/") {
1413     return [=](uint64_t Dot) -> uint64_t {
1414       uint64_t RHS = R(Dot);
1415       if (RHS == 0) {
1416         error("division by zero");
1417         return 0;
1418       }
1419       return L(Dot) / RHS;
1420     };
1421   }
1422   if (Op == "+")
1423     return [=](uint64_t Dot) { return L(Dot) + R(Dot); };
1424   if (Op == "-")
1425     return [=](uint64_t Dot) { return L(Dot) - R(Dot); };
1426   if (Op == "<<")
1427     return [=](uint64_t Dot) { return L(Dot) << R(Dot); };
1428   if (Op == ">>")
1429     return [=](uint64_t Dot) { return L(Dot) >> R(Dot); };
1430   if (Op == "<")
1431     return [=](uint64_t Dot) { return L(Dot) < R(Dot); };
1432   if (Op == ">")
1433     return [=](uint64_t Dot) { return L(Dot) > R(Dot); };
1434   if (Op == ">=")
1435     return [=](uint64_t Dot) { return L(Dot) >= R(Dot); };
1436   if (Op == "<=")
1437     return [=](uint64_t Dot) { return L(Dot) <= R(Dot); };
1438   if (Op == "==")
1439     return [=](uint64_t Dot) { return L(Dot) == R(Dot); };
1440   if (Op == "!=")
1441     return [=](uint64_t Dot) { return L(Dot) != R(Dot); };
1442   if (Op == "&")
1443     return [=](uint64_t Dot) { return L(Dot) & R(Dot); };
1444   if (Op == "|")
1445     return [=](uint64_t Dot) { return L(Dot) | R(Dot); };
1446   llvm_unreachable("invalid operator");
1447 }
1448 
1449 // This is a part of the operator-precedence parser. This function
1450 // assumes that the remaining token stream starts with an operator.
1451 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) {
1452   while (!atEOF() && !Error) {
1453     // Read an operator and an expression.
1454     StringRef Op1 = peek();
1455     if (Op1 == "?")
1456       return readTernary(Lhs);
1457     if (precedence(Op1) < MinPrec)
1458       break;
1459     next();
1460     Expr Rhs = readPrimary();
1461 
1462     // Evaluate the remaining part of the expression first if the
1463     // next operator has greater precedence than the previous one.
1464     // For example, if we have read "+" and "3", and if the next
1465     // operator is "*", then we'll evaluate 3 * ... part first.
1466     while (!atEOF()) {
1467       StringRef Op2 = peek();
1468       if (precedence(Op2) <= precedence(Op1))
1469         break;
1470       Rhs = readExpr1(Rhs, precedence(Op2));
1471     }
1472 
1473     Lhs = combine(Op1, Lhs, Rhs);
1474   }
1475   return Lhs;
1476 }
1477 
1478 uint64_t static getConstant(StringRef S) {
1479   if (S == "COMMONPAGESIZE")
1480     return Target->PageSize;
1481   if (S == "MAXPAGESIZE")
1482     return Config->MaxPageSize;
1483   error("unknown constant: " + S);
1484   return 0;
1485 }
1486 
1487 // Parses Tok as an integer. Returns true if successful.
1488 // It recognizes hexadecimal (prefixed with "0x" or suffixed with "H")
1489 // and decimal numbers. Decimal numbers may have "K" (kilo) or
1490 // "M" (mega) prefixes.
1491 static bool readInteger(StringRef Tok, uint64_t &Result) {
1492   if (Tok.startswith("-")) {
1493     if (!readInteger(Tok.substr(1), Result))
1494       return false;
1495     Result = -Result;
1496     return true;
1497   }
1498   if (Tok.startswith_lower("0x"))
1499     return !Tok.substr(2).getAsInteger(16, Result);
1500   if (Tok.endswith_lower("H"))
1501     return !Tok.drop_back().getAsInteger(16, Result);
1502 
1503   int Suffix = 1;
1504   if (Tok.endswith_lower("K")) {
1505     Suffix = 1024;
1506     Tok = Tok.drop_back();
1507   } else if (Tok.endswith_lower("M")) {
1508     Suffix = 1024 * 1024;
1509     Tok = Tok.drop_back();
1510   }
1511   if (Tok.getAsInteger(10, Result))
1512     return false;
1513   Result *= Suffix;
1514   return true;
1515 }
1516 
1517 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) {
1518   int Size = StringSwitch<unsigned>(Tok)
1519                  .Case("BYTE", 1)
1520                  .Case("SHORT", 2)
1521                  .Case("LONG", 4)
1522                  .Case("QUAD", 8)
1523                  .Default(-1);
1524   if (Size == -1)
1525     return nullptr;
1526 
1527   expect("(");
1528   uint64_t Val = 0;
1529   StringRef S = next();
1530   if (!readInteger(S, Val))
1531     setError("unexpected value: " + S);
1532   expect(")");
1533   return new BytesDataCommand(Val, Size);
1534 }
1535 
1536 Expr ScriptParser::readPrimary() {
1537   if (peek() == "(")
1538     return readParenExpr();
1539 
1540   StringRef Tok = next();
1541 
1542   if (Tok == "~") {
1543     Expr E = readPrimary();
1544     return [=](uint64_t Dot) { return ~E(Dot); };
1545   }
1546   if (Tok == "-") {
1547     Expr E = readPrimary();
1548     return [=](uint64_t Dot) { return -E(Dot); };
1549   }
1550 
1551   // Built-in functions are parsed here.
1552   // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html.
1553   if (Tok == "ADDR") {
1554     expect("(");
1555     StringRef Name = next();
1556     expect(")");
1557     return
1558         [=](uint64_t Dot) { return ScriptBase->getOutputSectionAddress(Name); };
1559   }
1560   if (Tok == "ASSERT")
1561     return readAssert();
1562   if (Tok == "ALIGN") {
1563     Expr E = readParenExpr();
1564     return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); };
1565   }
1566   if (Tok == "CONSTANT") {
1567     expect("(");
1568     StringRef Tok = next();
1569     expect(")");
1570     return [=](uint64_t Dot) { return getConstant(Tok); };
1571   }
1572   if (Tok == "DEFINED") {
1573     expect("(");
1574     StringRef Tok = next();
1575     expect(")");
1576     return [=](uint64_t Dot) { return ScriptBase->isDefined(Tok) ? 1 : 0; };
1577   }
1578   if (Tok == "SEGMENT_START") {
1579     expect("(");
1580     next();
1581     expect(",");
1582     Expr E = readExpr();
1583     expect(")");
1584     return [=](uint64_t Dot) { return E(Dot); };
1585   }
1586   if (Tok == "DATA_SEGMENT_ALIGN") {
1587     expect("(");
1588     Expr E = readExpr();
1589     expect(",");
1590     readExpr();
1591     expect(")");
1592     return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); };
1593   }
1594   if (Tok == "DATA_SEGMENT_END") {
1595     expect("(");
1596     expect(".");
1597     expect(")");
1598     return [](uint64_t Dot) { return Dot; };
1599   }
1600   // GNU linkers implements more complicated logic to handle
1601   // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to
1602   // the next page boundary for simplicity.
1603   if (Tok == "DATA_SEGMENT_RELRO_END") {
1604     expect("(");
1605     readExpr();
1606     expect(",");
1607     readExpr();
1608     expect(")");
1609     return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); };
1610   }
1611   if (Tok == "SIZEOF") {
1612     expect("(");
1613     StringRef Name = next();
1614     expect(")");
1615     return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); };
1616   }
1617   if (Tok == "ALIGNOF") {
1618     expect("(");
1619     StringRef Name = next();
1620     expect(")");
1621     return
1622         [=](uint64_t Dot) { return ScriptBase->getOutputSectionAlign(Name); };
1623   }
1624   if (Tok == "SIZEOF_HEADERS")
1625     return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); };
1626 
1627   // Tok is a literal number.
1628   uint64_t V;
1629   if (readInteger(Tok, V))
1630     return [=](uint64_t Dot) { return V; };
1631 
1632   // Tok is a symbol name.
1633   if (Tok != "." && !isValidCIdentifier(Tok))
1634     setError("malformed number: " + Tok);
1635   return [=](uint64_t Dot) { return getSymbolValue(Tok, Dot); };
1636 }
1637 
1638 Expr ScriptParser::readTernary(Expr Cond) {
1639   next();
1640   Expr L = readExpr();
1641   expect(":");
1642   Expr R = readExpr();
1643   return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); };
1644 }
1645 
1646 Expr ScriptParser::readParenExpr() {
1647   expect("(");
1648   Expr E = readExpr();
1649   expect(")");
1650   return E;
1651 }
1652 
1653 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() {
1654   std::vector<StringRef> Phdrs;
1655   while (!Error && peek().startswith(":")) {
1656     StringRef Tok = next();
1657     Tok = (Tok.size() == 1) ? next() : Tok.substr(1);
1658     if (Tok.empty()) {
1659       setError("section header name is empty");
1660       break;
1661     }
1662     Phdrs.push_back(Tok);
1663   }
1664   return Phdrs;
1665 }
1666 
1667 unsigned ScriptParser::readPhdrType() {
1668   StringRef Tok = next();
1669   unsigned Ret = StringSwitch<unsigned>(Tok)
1670                      .Case("PT_NULL", PT_NULL)
1671                      .Case("PT_LOAD", PT_LOAD)
1672                      .Case("PT_DYNAMIC", PT_DYNAMIC)
1673                      .Case("PT_INTERP", PT_INTERP)
1674                      .Case("PT_NOTE", PT_NOTE)
1675                      .Case("PT_SHLIB", PT_SHLIB)
1676                      .Case("PT_PHDR", PT_PHDR)
1677                      .Case("PT_TLS", PT_TLS)
1678                      .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME)
1679                      .Case("PT_GNU_STACK", PT_GNU_STACK)
1680                      .Case("PT_GNU_RELRO", PT_GNU_RELRO)
1681                      .Default(-1);
1682 
1683   if (Ret == (unsigned)-1) {
1684     setError("invalid program header type: " + Tok);
1685     return PT_NULL;
1686   }
1687   return Ret;
1688 }
1689 
1690 void ScriptParser::readVersionDeclaration(StringRef VerStr) {
1691   // Identifiers start at 2 because 0 and 1 are reserved
1692   // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants.
1693   size_t VersionId = Config->VersionDefinitions.size() + 2;
1694   Config->VersionDefinitions.push_back({VerStr, VersionId});
1695 
1696   if (skip("global:") || peek() != "local:")
1697     readGlobal(VerStr);
1698   if (skip("local:"))
1699     readLocal();
1700   expect("}");
1701 
1702   // Each version may have a parent version. For example, "Ver2" defined as
1703   // "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" as a parent. This
1704   // version hierarchy is, probably against your instinct, purely for human; the
1705   // runtime doesn't care about them at all. In LLD, we simply skip the token.
1706   if (!VerStr.empty() && peek() != ";")
1707     next();
1708   expect(";");
1709 }
1710 
1711 void ScriptParser::readLocal() {
1712   Config->DefaultSymbolVersion = VER_NDX_LOCAL;
1713   expect("*");
1714   expect(";");
1715 }
1716 
1717 void ScriptParser::readExtern(std::vector<SymbolVersion> *Globals) {
1718   expect("\"C++\"");
1719   expect("{");
1720 
1721   for (;;) {
1722     if (peek() == "}" || Error)
1723       break;
1724     bool HasWildcard = !peek().startswith("\"") && hasWildcard(peek());
1725     Globals->push_back({unquote(next()), true, HasWildcard});
1726     expect(";");
1727   }
1728 
1729   expect("}");
1730   expect(";");
1731 }
1732 
1733 void ScriptParser::readGlobal(StringRef VerStr) {
1734   std::vector<SymbolVersion> *Globals;
1735   if (VerStr.empty())
1736     Globals = &Config->VersionScriptGlobals;
1737   else
1738     Globals = &Config->VersionDefinitions.back().Globals;
1739 
1740   for (;;) {
1741     if (skip("extern"))
1742       readExtern(Globals);
1743 
1744     StringRef Cur = peek();
1745     if (Cur == "}" || Cur == "local:" || Error)
1746       return;
1747     next();
1748     Globals->push_back({unquote(Cur), false, hasWildcard(Cur)});
1749     expect(";");
1750   }
1751 }
1752 
1753 static bool isUnderSysroot(StringRef Path) {
1754   if (Config->Sysroot == "")
1755     return false;
1756   for (; !Path.empty(); Path = sys::path::parent_path(Path))
1757     if (sys::fs::equivalent(Config->Sysroot, Path))
1758       return true;
1759   return false;
1760 }
1761 
1762 void elf::readLinkerScript(MemoryBufferRef MB) {
1763   StringRef Path = MB.getBufferIdentifier();
1764   ScriptParser(MB.getBuffer(), isUnderSysroot(Path)).readLinkerScript();
1765 }
1766 
1767 void elf::readVersionScript(MemoryBufferRef MB) {
1768   ScriptParser(MB.getBuffer(), false).readVersionScript();
1769 }
1770 
1771 template class elf::LinkerScript<ELF32LE>;
1772 template class elf::LinkerScript<ELF32BE>;
1773 template class elf::LinkerScript<ELF64LE>;
1774 template class elf::LinkerScript<ELF64BE>;
1775