1 //===- LinkerScript.cpp ---------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the parser/evaluator of the linker script.
11 // It parses a linker script and write the result to Config or ScriptConfig
12 // objects.
13 //
14 // If SECTIONS command is used, a ScriptConfig contains an AST
15 // of the command which will later be consumed by createSections() and
16 // assignAddresses().
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "LinkerScript.h"
21 #include "Config.h"
22 #include "Driver.h"
23 #include "InputSection.h"
24 #include "OutputSections.h"
25 #include "ScriptParser.h"
26 #include "Strings.h"
27 #include "Symbols.h"
28 #include "SymbolTable.h"
29 #include "Target.h"
30 #include "Writer.h"
31 #include "llvm/ADT/StringSwitch.h"
32 #include "llvm/Support/ELF.h"
33 #include "llvm/Support/FileSystem.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/Path.h"
36 #include "llvm/Support/StringSaver.h"
37 
38 using namespace llvm;
39 using namespace llvm::ELF;
40 using namespace llvm::object;
41 using namespace lld;
42 using namespace lld::elf;
43 
44 LinkerScriptBase *elf::ScriptBase;
45 ScriptConfiguration *elf::ScriptConfig;
46 
47 template <class ELFT> static void addRegular(SymbolAssignment *Cmd) {
48   Symbol *Sym = Symtab<ELFT>::X->addRegular(Cmd->Name, STB_GLOBAL, STV_DEFAULT);
49   Sym->Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT;
50   Cmd->Sym = Sym->body();
51 
52   // If we have no SECTIONS then we don't have '.' and don't call
53   // assignAddresses(). We calculate symbol value immediately in this case.
54   if (!ScriptConfig->HasSections)
55     cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(0);
56 }
57 
58 template <class ELFT> static void addSynthetic(SymbolAssignment *Cmd) {
59   Symbol *Sym = Symtab<ELFT>::X->addSynthetic(
60       Cmd->Name, nullptr, 0, Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT);
61   Cmd->Sym = Sym->body();
62 }
63 
64 template <class ELFT> static void addSymbol(SymbolAssignment *Cmd) {
65   if (Cmd->IsAbsolute)
66     addRegular<ELFT>(Cmd);
67   else
68     addSynthetic<ELFT>(Cmd);
69 }
70 // If a symbol was in PROVIDE(), we need to define it only when
71 // it is an undefined symbol.
72 template <class ELFT> static bool shouldDefine(SymbolAssignment *Cmd) {
73   if (Cmd->Name == ".")
74     return false;
75   if (!Cmd->Provide)
76     return true;
77   SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name);
78   return B && B->isUndefined();
79 }
80 
81 bool SymbolAssignment::classof(const BaseCommand *C) {
82   return C->Kind == AssignmentKind;
83 }
84 
85 bool OutputSectionCommand::classof(const BaseCommand *C) {
86   return C->Kind == OutputSectionKind;
87 }
88 
89 bool InputSectionDescription::classof(const BaseCommand *C) {
90   return C->Kind == InputSectionKind;
91 }
92 
93 bool AssertCommand::classof(const BaseCommand *C) {
94   return C->Kind == AssertKind;
95 }
96 
97 template <class ELFT> static bool isDiscarded(InputSectionBase<ELFT> *S) {
98   return !S || !S->Live;
99 }
100 
101 template <class ELFT> LinkerScript<ELFT>::LinkerScript() {}
102 template <class ELFT> LinkerScript<ELFT>::~LinkerScript() {}
103 
104 template <class ELFT>
105 bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) {
106   for (Regex *Re : Opt.KeptSections)
107     if (Re->match(S->Name))
108       return true;
109   return false;
110 }
111 
112 static bool comparePriority(InputSectionData *A, InputSectionData *B) {
113   return getPriority(A->Name) < getPriority(B->Name);
114 }
115 
116 static bool compareName(InputSectionData *A, InputSectionData *B) {
117   return A->Name < B->Name;
118 }
119 
120 static bool compareAlignment(InputSectionData *A, InputSectionData *B) {
121   // ">" is not a mistake. Larger alignments are placed before smaller
122   // alignments in order to reduce the amount of padding necessary.
123   // This is compatible with GNU.
124   return A->Alignment > B->Alignment;
125 }
126 
127 static std::function<bool(InputSectionData *, InputSectionData *)>
128 getComparator(SortSectionPolicy K) {
129   switch (K) {
130   case SortSectionPolicy::Alignment:
131     return compareAlignment;
132   case SortSectionPolicy::Name:
133     return compareName;
134   case SortSectionPolicy::Priority:
135     return comparePriority;
136   default:
137     llvm_unreachable("unknown sort policy");
138   }
139 }
140 
141 template <class ELFT>
142 static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections,
143                              ConstraintKind Kind) {
144   if (Kind == ConstraintKind::NoConstraint)
145     return true;
146   bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) {
147     auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2);
148     return Sec->getSectionHdr()->sh_flags & SHF_WRITE;
149   });
150   return (IsRW && Kind == ConstraintKind::ReadWrite) ||
151          (!IsRW && Kind == ConstraintKind::ReadOnly);
152 }
153 
154 static void sortSections(InputSectionData **Begin, InputSectionData **End,
155                          SortSectionPolicy K) {
156   if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None)
157     std::stable_sort(Begin, End, getComparator(K));
158 }
159 
160 // Compute and remember which sections the InputSectionDescription matches.
161 template <class ELFT>
162 void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) {
163   // Collects all sections that satisfy constraints of I
164   // and attach them to I.
165   for (SectionPattern &Pat : I->SectionPatterns) {
166     size_t SizeBefore = I->Sections.size();
167     for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) {
168       StringRef Filename = sys::path::filename(F->getName());
169       if (!I->FileRe.match(Filename) || Pat.ExcludedFileRe.match(Filename))
170         continue;
171 
172       for (InputSectionBase<ELFT> *S : F->getSections())
173         if (!isDiscarded(S) && !S->OutSec && Pat.SectionRe.match(S->Name))
174           I->Sections.push_back(S);
175       if (Pat.SectionRe.match("COMMON"))
176         I->Sections.push_back(CommonInputSection<ELFT>::X);
177     }
178 
179     // Sort sections as instructed by SORT-family commands and --sort-section
180     // option. Because SORT-family commands can be nested at most two depth
181     // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command
182     // line option is respected even if a SORT command is given, the exact
183     // behavior we have here is a bit complicated. Here are the rules.
184     //
185     // 1. If two SORT commands are given, --sort-section is ignored.
186     // 2. If one SORT command is given, and if it is not SORT_NONE,
187     //    --sort-section is handled as an inner SORT command.
188     // 3. If one SORT command is given, and if it is SORT_NONE, don't sort.
189     // 4. If no SORT command is given, sort according to --sort-section.
190     InputSectionData **Begin = I->Sections.data() + SizeBefore;
191     InputSectionData **End = I->Sections.data() + I->Sections.size();
192     if (Pat.SortOuter != SortSectionPolicy::None) {
193       if (Pat.SortInner == SortSectionPolicy::Default)
194         sortSections(Begin, End, Config->SortSection);
195       else
196         sortSections(Begin, End, Pat.SortInner);
197       sortSections(Begin, End, Pat.SortOuter);
198     }
199   }
200 
201   // We do not add duplicate input sections, so mark them with a dummy output
202   // section for now.
203   for (InputSectionData *S : I->Sections) {
204     auto *S2 = static_cast<InputSectionBase<ELFT> *>(S);
205     S2->OutSec = (OutputSectionBase<ELFT> *)-1;
206   }
207 }
208 
209 template <class ELFT>
210 void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) {
211   for (InputSectionBase<ELFT> *S : V) {
212     S->Live = false;
213     reportDiscarded(S);
214   }
215 }
216 
217 template <class ELFT>
218 std::vector<InputSectionBase<ELFT> *>
219 LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) {
220   std::vector<InputSectionBase<ELFT> *> Ret;
221 
222   for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) {
223     auto *Cmd = dyn_cast<InputSectionDescription>(Base.get());
224     if (!Cmd)
225       continue;
226     computeInputSections(Cmd);
227     for (InputSectionData *S : Cmd->Sections)
228       Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S));
229   }
230 
231   return Ret;
232 }
233 
234 template <class ELFT>
235 static SectionKey<ELFT::Is64Bits> createKey(InputSectionBase<ELFT> *C,
236                                             StringRef OutsecName) {
237   // When using linker script the merge rules are different.
238   // Unfortunately, linker scripts are name based. This means that expressions
239   // like *(.foo*) can refer to multiple input sections that would normally be
240   // placed in different output sections. We cannot put them in different
241   // output sections or we would produce wrong results for
242   // start = .; *(.foo.*) end = .; *(.bar)
243   // and a mapping of .foo1 and .bar1 to one section and .foo2 and .bar2 to
244   // another. The problem is that there is no way to layout those output
245   // sections such that the .foo sections are the only thing between the
246   // start and end symbols.
247 
248   // An extra annoyance is that we cannot simply disable merging of the contents
249   // of SHF_MERGE sections, but our implementation requires one output section
250   // per "kind" (string or not, which size/aligment).
251   // Fortunately, creating symbols in the middle of a merge section is not
252   // supported by bfd or gold, so we can just create multiple section in that
253   // case.
254   const typename ELFT::Shdr *H = C->getSectionHdr();
255   typedef typename ELFT::uint uintX_t;
256   uintX_t Flags = H->sh_flags & (SHF_MERGE | SHF_STRINGS);
257 
258   uintX_t Alignment = 0;
259   if (isa<MergeInputSection<ELFT>>(C))
260     Alignment = std::max(H->sh_addralign, H->sh_entsize);
261 
262   return SectionKey<ELFT::Is64Bits>{OutsecName, /*Type*/ 0, Flags, Alignment};
263 }
264 
265 template <class ELFT>
266 void LinkerScript<ELFT>::addSection(OutputSectionFactory<ELFT> &Factory,
267                                     InputSectionBase<ELFT> *Sec,
268                                     StringRef Name) {
269   OutputSectionBase<ELFT> *OutSec;
270   bool IsNew;
271   std::tie(OutSec, IsNew) = Factory.create(createKey(Sec, Name), Sec);
272   if (IsNew)
273     OutputSections->push_back(OutSec);
274   OutSec->addSection(Sec);
275 }
276 
277 template <class ELFT>
278 void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) {
279 
280   for (unsigned I = 0; I < Opt.Commands.size(); ++I) {
281     auto Iter = Opt.Commands.begin() + I;
282     const std::unique_ptr<BaseCommand> &Base1 = *Iter;
283     if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) {
284       if (shouldDefine<ELFT>(Cmd))
285         addRegular<ELFT>(Cmd);
286       continue;
287     }
288     if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) {
289       // If we don't have SECTIONS then output sections have already been
290       // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses
291       // will not be called, so ASSERT should be evaluated now.
292       if (!Opt.HasSections)
293         Cmd->Expression(0);
294       continue;
295     }
296 
297     if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) {
298       std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd);
299 
300       if (Cmd->Name == "/DISCARD/") {
301         discard(V);
302         continue;
303       }
304 
305       if (!matchConstraints<ELFT>(V, Cmd->Constraint)) {
306         for (InputSectionBase<ELFT> *S : V)
307           S->OutSec = nullptr;
308         Opt.Commands.erase(Iter);
309         --I;
310         continue;
311       }
312 
313       for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands)
314         if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get()))
315           if (shouldDefine<ELFT>(OutCmd))
316             addSymbol<ELFT>(OutCmd);
317 
318       if (V.empty())
319         continue;
320 
321       for (InputSectionBase<ELFT> *Sec : V) {
322         addSection(Factory, Sec, Cmd->Name);
323         if (uint32_t Subalign = Cmd->SubalignExpr ? Cmd->SubalignExpr(0) : 0)
324           Sec->Alignment = Subalign;
325       }
326     }
327   }
328 }
329 
330 template <class ELFT>
331 void LinkerScript<ELFT>::createSections(OutputSectionFactory<ELFT> &Factory) {
332   processCommands(Factory);
333   // Add orphan sections.
334   for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles())
335     for (InputSectionBase<ELFT> *S : F->getSections())
336       if (!isDiscarded(S) && !S->OutSec)
337         addSection(Factory, S, getOutputSectionName(S));
338 }
339 
340 // Sets value of a section-defined symbol. Two kinds of
341 // symbols are processed: synthetic symbols, whose value
342 // is an offset from beginning of section and regular
343 // symbols whose value is absolute.
344 template <class ELFT>
345 static void assignSectionSymbol(SymbolAssignment *Cmd,
346                                 OutputSectionBase<ELFT> *Sec,
347                                 typename ELFT::uint Off) {
348   if (!Cmd->Sym)
349     return;
350 
351   if (auto *Body = dyn_cast<DefinedSynthetic<ELFT>>(Cmd->Sym)) {
352     Body->Section = Sec;
353     Body->Value = Cmd->Expression(Sec->getVA() + Off) - Sec->getVA();
354     return;
355   }
356   auto *Body = cast<DefinedRegular<ELFT>>(Cmd->Sym);
357   Body->Value = Cmd->Expression(Sec->getVA() + Off);
358 }
359 
360 template <class ELFT> static bool isTbss(OutputSectionBase<ELFT> *Sec) {
361   return (Sec->getFlags() & SHF_TLS) && Sec->getType() == SHT_NOBITS;
362 }
363 
364 template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) {
365   if (!AlreadyOutputIS.insert(S).second)
366     return;
367   bool IsTbss = isTbss(CurOutSec);
368 
369   uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot;
370   Pos = alignTo(Pos, S->Alignment);
371   S->OutSecOff = Pos - CurOutSec->getVA();
372   Pos += S->getSize();
373 
374   // Update output section size after adding each section. This is so that
375   // SIZEOF works correctly in the case below:
376   // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) }
377   CurOutSec->setSize(Pos - CurOutSec->getVA());
378 
379   if (IsTbss)
380     ThreadBssOffset = Pos - Dot;
381   else
382     Dot = Pos;
383 }
384 
385 template <class ELFT> void LinkerScript<ELFT>::flush() {
386   if (auto *OutSec = dyn_cast_or_null<OutputSection<ELFT>>(CurOutSec)) {
387     for (InputSection<ELFT> *I : OutSec->Sections)
388       output(I);
389     AlreadyOutputOS.insert(CurOutSec);
390   }
391 }
392 
393 template <class ELFT>
394 void LinkerScript<ELFT>::switchTo(OutputSectionBase<ELFT> *Sec) {
395   if (CurOutSec == Sec)
396     return;
397   if (AlreadyOutputOS.count(Sec))
398     return;
399 
400   flush();
401   CurOutSec = Sec;
402 
403   Dot = alignTo(Dot, CurOutSec->getAlignment());
404   CurOutSec->setVA(isTbss(CurOutSec) ? Dot + ThreadBssOffset : Dot);
405 }
406 
407 template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) {
408   if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) {
409     if (AssignCmd->Name == ".") {
410       // Update to location counter means update to section size.
411       Dot = AssignCmd->Expression(Dot);
412       CurOutSec->setSize(Dot - CurOutSec->getVA());
413       return;
414     }
415     assignSectionSymbol<ELFT>(AssignCmd, CurOutSec, Dot - CurOutSec->getVA());
416     return;
417   }
418   auto &ICmd = cast<InputSectionDescription>(Base);
419   for (InputSectionData *ID : ICmd.Sections) {
420     auto *IB = static_cast<InputSectionBase<ELFT> *>(ID);
421     switchTo(IB->OutSec);
422     if (auto *I = dyn_cast<InputSection<ELFT>>(IB))
423       output(I);
424     else if (AlreadyOutputOS.insert(CurOutSec).second)
425       Dot += CurOutSec->getSize();
426   }
427 }
428 
429 template <class ELFT>
430 static std::vector<OutputSectionBase<ELFT> *>
431 findSections(OutputSectionCommand &Cmd,
432              const std::vector<OutputSectionBase<ELFT> *> &Sections) {
433   std::vector<OutputSectionBase<ELFT> *> Ret;
434   for (OutputSectionBase<ELFT> *Sec : Sections)
435     if (Sec->getName() == Cmd.Name)
436       Ret.push_back(Sec);
437   return Ret;
438 }
439 
440 template <class ELFT>
441 void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) {
442   std::vector<OutputSectionBase<ELFT> *> Sections =
443       findSections(*Cmd, *OutputSections);
444   if (Sections.empty())
445     return;
446   switchTo(Sections[0]);
447 
448   // Find the last section output location. We will output orphan sections
449   // there so that end symbols point to the correct location.
450   auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(),
451                         [](const std::unique_ptr<BaseCommand> &Cmd) {
452                           return !isa<SymbolAssignment>(*Cmd);
453                         })
454                .base();
455   for (auto I = Cmd->Commands.begin(); I != E; ++I)
456     process(**I);
457   flush();
458   for (OutputSectionBase<ELFT> *Base : Sections) {
459     if (AlreadyOutputOS.count(Base))
460       continue;
461     switchTo(Base);
462     Dot += CurOutSec->getSize();
463     flush();
464   }
465   std::for_each(E, Cmd->Commands.end(),
466                 [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); });
467 }
468 
469 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() {
470   // It is common practice to use very generic linker scripts. So for any
471   // given run some of the output sections in the script will be empty.
472   // We could create corresponding empty output sections, but that would
473   // clutter the output.
474   // We instead remove trivially empty sections. The bfd linker seems even
475   // more aggressive at removing them.
476   auto Pos = std::remove_if(
477       Opt.Commands.begin(), Opt.Commands.end(),
478       [&](const std::unique_ptr<BaseCommand> &Base) {
479         auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get());
480         if (!Cmd)
481           return false;
482         std::vector<OutputSectionBase<ELFT> *> Secs =
483             findSections(*Cmd, *OutputSections);
484         if (!Secs.empty())
485           return false;
486         for (const std::unique_ptr<BaseCommand> &I : Cmd->Commands)
487           if (!isa<InputSectionDescription>(I.get()))
488             return false;
489         return true;
490       });
491   Opt.Commands.erase(Pos, Opt.Commands.end());
492 
493   // If the output section contains only symbol assignments, create a
494   // corresponding output section. The bfd linker seems to only create them if
495   // '.' is assigned to, but creating these section should not have any bad
496   // consequeces and gives us a section to put the symbol in.
497   uintX_t Flags = SHF_ALLOC;
498   uint32_t Type = 0;
499   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) {
500     auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get());
501     if (!Cmd)
502       continue;
503     std::vector<OutputSectionBase<ELFT> *> Secs =
504         findSections(*Cmd, *OutputSections);
505     if (!Secs.empty()) {
506       Flags = Secs[0]->getFlags();
507       Type = Secs[0]->getType();
508       continue;
509     }
510 
511     auto *OutSec = new OutputSection<ELFT>(Cmd->Name, Type, Flags);
512     Out<ELFT>::Pool.emplace_back(OutSec);
513     OutputSections->push_back(OutSec);
514   }
515 }
516 
517 // When placing orphan sections, we want to place them after symbol assignments
518 // so that an orphan after
519 //   begin_foo = .;
520 //   foo : { *(foo) }
521 //   end_foo = .;
522 // doesn't break the intended meaning of the begin/end symbols.
523 // We don't want to go over sections since Writer<ELFT>::sortSections is the
524 // one in charge of deciding the order of the sections.
525 // We don't want to go over alignments, since doing so in
526 //  rx_sec : { *(rx_sec) }
527 //  . = ALIGN(0x1000);
528 //  /* The RW PT_LOAD starts here*/
529 //  rw_sec : { *(rw_sec) }
530 // would mean that the RW PT_LOAD would become unaligned.
531 static bool shouldSkip(const BaseCommand &Cmd) {
532   if (isa<OutputSectionCommand>(Cmd))
533     return false;
534   const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd);
535   if (!Assign)
536     return true;
537   return Assign->Name != ".";
538 }
539 
540 template <class ELFT> void LinkerScript<ELFT>::assignAddresses() {
541   // Orphan sections are sections present in the input files which
542   // are not explicitly placed into the output file by the linker script.
543   // We place orphan sections at end of file.
544   // Other linkers places them using some heuristics as described in
545   // https://sourceware.org/binutils/docs/ld/Orphan-Sections.html#Orphan-Sections.
546 
547   // The OutputSections are already in the correct order.
548   // This loops creates or moves commands as needed so that they are in the
549   // correct order.
550   int CmdIndex = 0;
551   for (OutputSectionBase<ELFT> *Sec : *OutputSections) {
552     StringRef Name = Sec->getName();
553 
554     // Find the last spot where we can insert a command and still get the
555     // correct result.
556     auto CmdIter = Opt.Commands.begin() + CmdIndex;
557     auto E = Opt.Commands.end();
558     while (CmdIter != E && shouldSkip(**CmdIter)) {
559       ++CmdIter;
560       ++CmdIndex;
561     }
562 
563     auto Pos =
564         std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) {
565           auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get());
566           return Cmd && Cmd->Name == Name;
567         });
568     if (Pos == E) {
569       Opt.Commands.insert(CmdIter,
570                           llvm::make_unique<OutputSectionCommand>(Name));
571       ++CmdIndex;
572       continue;
573     }
574 
575     // Continue from where we found it.
576     CmdIndex = (Pos - Opt.Commands.begin()) + 1;
577     continue;
578   }
579 
580   // Assign addresses as instructed by linker script SECTIONS sub-commands.
581   Dot = getHeaderSize();
582 
583   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) {
584     if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) {
585       if (Cmd->Name == ".") {
586         Dot = Cmd->Expression(Dot);
587       } else if (Cmd->Sym) {
588         cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(Dot);
589       }
590       continue;
591     }
592 
593     if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) {
594       Cmd->Expression(Dot);
595       continue;
596     }
597 
598     auto *Cmd = cast<OutputSectionCommand>(Base.get());
599 
600     if (Cmd->AddrExpr)
601       Dot = Cmd->AddrExpr(Dot);
602 
603     assignOffsets(Cmd);
604   }
605 
606   uintX_t MinVA = std::numeric_limits<uintX_t>::max();
607   for (OutputSectionBase<ELFT> *Sec : *OutputSections) {
608     if (Sec->getFlags() & SHF_ALLOC)
609       MinVA = std::min(MinVA, Sec->getVA());
610     else
611       Sec->setVA(0);
612   }
613 
614   uintX_t HeaderSize = getHeaderSize();
615   if (HeaderSize > MinVA)
616     fatal("Not enough space for ELF and program headers");
617 
618   // ELF and Program headers need to be right before the first section in
619   // memory. Set their addresses accordingly.
620   MinVA = alignDown(MinVA - HeaderSize, Target->PageSize);
621   Out<ELFT>::ElfHeader->setVA(MinVA);
622   Out<ELFT>::ProgramHeaders->setVA(Out<ELFT>::ElfHeader->getSize() + MinVA);
623 }
624 
625 // Creates program headers as instructed by PHDRS linker script command.
626 template <class ELFT>
627 std::vector<PhdrEntry<ELFT>> LinkerScript<ELFT>::createPhdrs() {
628   std::vector<PhdrEntry<ELFT>> Ret;
629 
630   // Process PHDRS and FILEHDR keywords because they are not
631   // real output sections and cannot be added in the following loop.
632   for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) {
633     Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags);
634     PhdrEntry<ELFT> &Phdr = Ret.back();
635 
636     if (Cmd.HasFilehdr)
637       Phdr.add(Out<ELFT>::ElfHeader);
638     if (Cmd.HasPhdrs)
639       Phdr.add(Out<ELFT>::ProgramHeaders);
640 
641     if (Cmd.LMAExpr) {
642       Phdr.H.p_paddr = Cmd.LMAExpr(0);
643       Phdr.HasLMA = true;
644     }
645   }
646 
647   // Add output sections to program headers.
648   PhdrEntry<ELFT> *Load = nullptr;
649   uintX_t Flags = PF_R;
650   for (OutputSectionBase<ELFT> *Sec : *OutputSections) {
651     if (!(Sec->getFlags() & SHF_ALLOC))
652       break;
653 
654     std::vector<size_t> PhdrIds = getPhdrIndices(Sec->getName());
655     if (!PhdrIds.empty()) {
656       // Assign headers specified by linker script
657       for (size_t Id : PhdrIds) {
658         Ret[Id].add(Sec);
659         if (Opt.PhdrsCommands[Id].Flags == UINT_MAX)
660           Ret[Id].H.p_flags |= Sec->getPhdrFlags();
661       }
662     } else {
663       // If we have no load segment or flags've changed then we want new load
664       // segment.
665       uintX_t NewFlags = Sec->getPhdrFlags();
666       if (Load == nullptr || Flags != NewFlags) {
667         Load = &*Ret.emplace(Ret.end(), PT_LOAD, NewFlags);
668         Flags = NewFlags;
669       }
670       Load->add(Sec);
671     }
672   }
673   return Ret;
674 }
675 
676 template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() {
677   // Ignore .interp section in case we have PHDRS specification
678   // and PT_INTERP isn't listed.
679   return !Opt.PhdrsCommands.empty() &&
680          llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) {
681            return Cmd.Type == PT_INTERP;
682          }) == Opt.PhdrsCommands.end();
683 }
684 
685 template <class ELFT>
686 ArrayRef<uint8_t> LinkerScript<ELFT>::getFiller(StringRef Name) {
687   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands)
688     if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()))
689       if (Cmd->Name == Name)
690         return Cmd->Filler;
691   return {};
692 }
693 
694 template <class ELFT> Expr LinkerScript<ELFT>::getLma(StringRef Name) {
695   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands)
696     if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()))
697       if (Cmd->LmaExpr && Cmd->Name == Name)
698         return Cmd->LmaExpr;
699   return {};
700 }
701 
702 // Returns the index of the given section name in linker script
703 // SECTIONS commands. Sections are laid out as the same order as they
704 // were in the script. If a given name did not appear in the script,
705 // it returns INT_MAX, so that it will be laid out at end of file.
706 template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) {
707   int I = 0;
708   for (std::unique_ptr<BaseCommand> &Base : Opt.Commands) {
709     if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()))
710       if (Cmd->Name == Name)
711         return I;
712     ++I;
713   }
714   return INT_MAX;
715 }
716 
717 template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() {
718   return !Opt.PhdrsCommands.empty();
719 }
720 
721 template <class ELFT>
722 uint64_t LinkerScript<ELFT>::getOutputSectionAddress(StringRef Name) {
723   for (OutputSectionBase<ELFT> *Sec : *OutputSections)
724     if (Sec->getName() == Name)
725       return Sec->getVA();
726   error("undefined section " + Name);
727   return 0;
728 }
729 
730 template <class ELFT>
731 uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) {
732   for (OutputSectionBase<ELFT> *Sec : *OutputSections)
733     if (Sec->getName() == Name)
734       return Sec->getSize();
735   error("undefined section " + Name);
736   return 0;
737 }
738 
739 template <class ELFT>
740 uint64_t LinkerScript<ELFT>::getOutputSectionAlign(StringRef Name) {
741   for (OutputSectionBase<ELFT> *Sec : *OutputSections)
742     if (Sec->getName() == Name)
743       return Sec->getAlignment();
744   error("undefined section " + Name);
745   return 0;
746 }
747 
748 template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() {
749   return elf::getHeaderSize<ELFT>();
750 }
751 
752 template <class ELFT> uint64_t LinkerScript<ELFT>::getSymbolValue(StringRef S) {
753   if (SymbolBody *B = Symtab<ELFT>::X->find(S))
754     return B->getVA<ELFT>();
755   error("symbol not found: " + S);
756   return 0;
757 }
758 
759 template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) {
760   return Symtab<ELFT>::X->find(S) != nullptr;
761 }
762 
763 // Returns indices of ELF headers containing specific section, identified
764 // by Name. Each index is a zero based number of ELF header listed within
765 // PHDRS {} script block.
766 template <class ELFT>
767 std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) {
768   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) {
769     auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get());
770     if (!Cmd || Cmd->Name != SectionName)
771       continue;
772 
773     std::vector<size_t> Ret;
774     for (StringRef PhdrName : Cmd->Phdrs)
775       Ret.push_back(getPhdrIndex(PhdrName));
776     return Ret;
777   }
778   return {};
779 }
780 
781 template <class ELFT>
782 size_t LinkerScript<ELFT>::getPhdrIndex(StringRef PhdrName) {
783   size_t I = 0;
784   for (PhdrsCommand &Cmd : Opt.PhdrsCommands) {
785     if (Cmd.Name == PhdrName)
786       return I;
787     ++I;
788   }
789   error("section header '" + PhdrName + "' is not listed in PHDRS");
790   return 0;
791 }
792 
793 class elf::ScriptParser : public ScriptParserBase {
794   typedef void (ScriptParser::*Handler)();
795 
796 public:
797   ScriptParser(StringRef S, bool B) : ScriptParserBase(S), IsUnderSysroot(B) {}
798 
799   void readLinkerScript();
800   void readVersionScript();
801 
802 private:
803   void addFile(StringRef Path);
804 
805   void readAsNeeded();
806   void readEntry();
807   void readExtern();
808   void readGroup();
809   void readInclude();
810   void readOutput();
811   void readOutputArch();
812   void readOutputFormat();
813   void readPhdrs();
814   void readSearchDir();
815   void readSections();
816   void readVersion();
817   void readVersionScriptCommand();
818 
819   SymbolAssignment *readAssignment(StringRef Name);
820   std::vector<uint8_t> readFill();
821   OutputSectionCommand *readOutputSectionDescription(StringRef OutSec);
822   std::vector<uint8_t> readOutputSectionFiller(StringRef Tok);
823   std::vector<StringRef> readOutputSectionPhdrs();
824   InputSectionDescription *readInputSectionDescription(StringRef Tok);
825   Regex readFilePatterns();
826   std::vector<SectionPattern> readInputSectionsList();
827   InputSectionDescription *readInputSectionRules(StringRef FilePattern);
828   unsigned readPhdrType();
829   SortSectionPolicy readSortKind();
830   SymbolAssignment *readProvideHidden(bool Provide, bool Hidden);
831   SymbolAssignment *readProvideOrAssignment(StringRef Tok, bool MakeAbsolute);
832   void readSort();
833   Expr readAssert();
834 
835   Expr readExpr();
836   Expr readExpr1(Expr Lhs, int MinPrec);
837   Expr readPrimary();
838   Expr readTernary(Expr Cond);
839   Expr readParenExpr();
840 
841   // For parsing version script.
842   void readExtern(std::vector<SymbolVersion> *Globals);
843   void readVersionDeclaration(StringRef VerStr);
844   void readGlobal(StringRef VerStr);
845   void readLocal();
846 
847   ScriptConfiguration &Opt = *ScriptConfig;
848   StringSaver Saver = {ScriptConfig->Alloc};
849   bool IsUnderSysroot;
850 };
851 
852 void ScriptParser::readVersionScript() {
853   readVersionScriptCommand();
854   if (!atEOF())
855     setError("EOF expected, but got " + next());
856 }
857 
858 void ScriptParser::readVersionScriptCommand() {
859   if (skip("{")) {
860     readVersionDeclaration("");
861     return;
862   }
863 
864   while (!atEOF() && !Error && peek() != "}") {
865     StringRef VerStr = next();
866     if (VerStr == "{") {
867       setError("anonymous version definition is used in "
868                "combination with other version definitions");
869       return;
870     }
871     expect("{");
872     readVersionDeclaration(VerStr);
873   }
874 }
875 
876 void ScriptParser::readVersion() {
877   expect("{");
878   readVersionScriptCommand();
879   expect("}");
880 }
881 
882 void ScriptParser::readLinkerScript() {
883   while (!atEOF()) {
884     StringRef Tok = next();
885     if (Tok == ";")
886       continue;
887 
888     if (Tok == "ASSERT") {
889       Opt.Commands.emplace_back(new AssertCommand(readAssert()));
890     } else if (Tok == "ENTRY") {
891       readEntry();
892     } else if (Tok == "EXTERN") {
893       readExtern();
894     } else if (Tok == "GROUP" || Tok == "INPUT") {
895       readGroup();
896     } else if (Tok == "INCLUDE") {
897       readInclude();
898     } else if (Tok == "OUTPUT") {
899       readOutput();
900     } else if (Tok == "OUTPUT_ARCH") {
901       readOutputArch();
902     } else if (Tok == "OUTPUT_FORMAT") {
903       readOutputFormat();
904     } else if (Tok == "PHDRS") {
905       readPhdrs();
906     } else if (Tok == "SEARCH_DIR") {
907       readSearchDir();
908     } else if (Tok == "SECTIONS") {
909       readSections();
910     } else if (Tok == "VERSION") {
911       readVersion();
912     } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok, true)) {
913       Opt.Commands.emplace_back(Cmd);
914     } else {
915       setError("unknown directive: " + Tok);
916     }
917   }
918 }
919 
920 void ScriptParser::addFile(StringRef S) {
921   if (IsUnderSysroot && S.startswith("/")) {
922     SmallString<128> Path;
923     (Config->Sysroot + S).toStringRef(Path);
924     if (sys::fs::exists(Path)) {
925       Driver->addFile(Saver.save(Path.str()));
926       return;
927     }
928   }
929 
930   if (sys::path::is_absolute(S)) {
931     Driver->addFile(S);
932   } else if (S.startswith("=")) {
933     if (Config->Sysroot.empty())
934       Driver->addFile(S.substr(1));
935     else
936       Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)));
937   } else if (S.startswith("-l")) {
938     Driver->addLibrary(S.substr(2));
939   } else if (sys::fs::exists(S)) {
940     Driver->addFile(S);
941   } else {
942     std::string Path = findFromSearchPaths(S);
943     if (Path.empty())
944       setError("unable to find " + S);
945     else
946       Driver->addFile(Saver.save(Path));
947   }
948 }
949 
950 void ScriptParser::readAsNeeded() {
951   expect("(");
952   bool Orig = Config->AsNeeded;
953   Config->AsNeeded = true;
954   while (!Error && !skip(")"))
955     addFile(unquote(next()));
956   Config->AsNeeded = Orig;
957 }
958 
959 void ScriptParser::readEntry() {
960   // -e <symbol> takes predecence over ENTRY(<symbol>).
961   expect("(");
962   StringRef Tok = next();
963   if (Config->Entry.empty())
964     Config->Entry = Tok;
965   expect(")");
966 }
967 
968 void ScriptParser::readExtern() {
969   expect("(");
970   while (!Error && !skip(")"))
971     Config->Undefined.push_back(next());
972 }
973 
974 void ScriptParser::readGroup() {
975   expect("(");
976   while (!Error && !skip(")")) {
977     StringRef Tok = next();
978     if (Tok == "AS_NEEDED")
979       readAsNeeded();
980     else
981       addFile(unquote(Tok));
982   }
983 }
984 
985 void ScriptParser::readInclude() {
986   StringRef Tok = next();
987   auto MBOrErr = MemoryBuffer::getFile(unquote(Tok));
988   if (!MBOrErr) {
989     setError("cannot open " + Tok);
990     return;
991   }
992   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
993   StringRef S = Saver.save(MB->getMemBufferRef().getBuffer());
994   std::vector<StringRef> V = tokenize(S);
995   Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end());
996 }
997 
998 void ScriptParser::readOutput() {
999   // -o <file> takes predecence over OUTPUT(<file>).
1000   expect("(");
1001   StringRef Tok = next();
1002   if (Config->OutputFile.empty())
1003     Config->OutputFile = unquote(Tok);
1004   expect(")");
1005 }
1006 
1007 void ScriptParser::readOutputArch() {
1008   // Error checking only for now.
1009   expect("(");
1010   next();
1011   expect(")");
1012 }
1013 
1014 void ScriptParser::readOutputFormat() {
1015   // Error checking only for now.
1016   expect("(");
1017   next();
1018   StringRef Tok = next();
1019   if (Tok == ")")
1020     return;
1021   if (Tok != ",") {
1022     setError("unexpected token: " + Tok);
1023     return;
1024   }
1025   next();
1026   expect(",");
1027   next();
1028   expect(")");
1029 }
1030 
1031 void ScriptParser::readPhdrs() {
1032   expect("{");
1033   while (!Error && !skip("}")) {
1034     StringRef Tok = next();
1035     Opt.PhdrsCommands.push_back(
1036         {Tok, PT_NULL, false, false, UINT_MAX, nullptr});
1037     PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back();
1038 
1039     PhdrCmd.Type = readPhdrType();
1040     do {
1041       Tok = next();
1042       if (Tok == ";")
1043         break;
1044       if (Tok == "FILEHDR")
1045         PhdrCmd.HasFilehdr = true;
1046       else if (Tok == "PHDRS")
1047         PhdrCmd.HasPhdrs = true;
1048       else if (Tok == "AT")
1049         PhdrCmd.LMAExpr = readParenExpr();
1050       else if (Tok == "FLAGS") {
1051         expect("(");
1052         // Passing 0 for the value of dot is a bit of a hack. It means that
1053         // we accept expressions like ".|1".
1054         PhdrCmd.Flags = readExpr()(0);
1055         expect(")");
1056       } else
1057         setError("unexpected header attribute: " + Tok);
1058     } while (!Error);
1059   }
1060 }
1061 
1062 void ScriptParser::readSearchDir() {
1063   expect("(");
1064   StringRef Tok = next();
1065   if (!Config->Nostdlib)
1066     Config->SearchPaths.push_back(unquote(Tok));
1067   expect(")");
1068 }
1069 
1070 void ScriptParser::readSections() {
1071   Opt.HasSections = true;
1072   expect("{");
1073   while (!Error && !skip("}")) {
1074     StringRef Tok = next();
1075     BaseCommand *Cmd = readProvideOrAssignment(Tok, true);
1076     if (!Cmd) {
1077       if (Tok == "ASSERT")
1078         Cmd = new AssertCommand(readAssert());
1079       else
1080         Cmd = readOutputSectionDescription(Tok);
1081     }
1082     Opt.Commands.emplace_back(Cmd);
1083   }
1084 }
1085 
1086 static int precedence(StringRef Op) {
1087   return StringSwitch<int>(Op)
1088       .Case("*", 5)
1089       .Case("/", 5)
1090       .Case("+", 4)
1091       .Case("-", 4)
1092       .Case("<<", 3)
1093       .Case(">>", 3)
1094       .Case("<", 2)
1095       .Case(">", 2)
1096       .Case(">=", 2)
1097       .Case("<=", 2)
1098       .Case("==", 2)
1099       .Case("!=", 2)
1100       .Case("&", 1)
1101       .Case("|", 1)
1102       .Default(-1);
1103 }
1104 
1105 Regex ScriptParser::readFilePatterns() {
1106   std::vector<StringRef> V;
1107   while (!Error && !skip(")"))
1108     V.push_back(next());
1109   return compileGlobPatterns(V);
1110 }
1111 
1112 SortSectionPolicy ScriptParser::readSortKind() {
1113   if (skip("SORT") || skip("SORT_BY_NAME"))
1114     return SortSectionPolicy::Name;
1115   if (skip("SORT_BY_ALIGNMENT"))
1116     return SortSectionPolicy::Alignment;
1117   if (skip("SORT_BY_INIT_PRIORITY"))
1118     return SortSectionPolicy::Priority;
1119   if (skip("SORT_NONE"))
1120     return SortSectionPolicy::None;
1121   return SortSectionPolicy::Default;
1122 }
1123 
1124 // Method reads a list of sequence of excluded files and section globs given in
1125 // a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+
1126 // Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3)
1127 // The semantics of that is next:
1128 // * Include .foo.1 from every file.
1129 // * Include .foo.2 from every file but a.o
1130 // * Include .foo.3 from every file but b.o
1131 std::vector<SectionPattern> ScriptParser::readInputSectionsList() {
1132   std::vector<SectionPattern> Ret;
1133   while (!Error && peek() != ")") {
1134     Regex ExcludeFileRe;
1135     if (skip("EXCLUDE_FILE")) {
1136       expect("(");
1137       ExcludeFileRe = readFilePatterns();
1138     }
1139 
1140     std::vector<StringRef> V;
1141     while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE")
1142       V.push_back(next());
1143 
1144     if (!V.empty())
1145       Ret.push_back({std::move(ExcludeFileRe), compileGlobPatterns(V)});
1146     else
1147       setError("section pattern is expected");
1148   }
1149   return Ret;
1150 }
1151 
1152 // Section pattern grammar can have complex expressions, for example:
1153 // *(SORT(.foo.* EXCLUDE_FILE (*file1.o) .bar.*) .bar.* SORT(.zed.*))
1154 // Generally is a sequence of globs and excludes that may be wrapped in a SORT()
1155 // commands, like: SORT(glob0) glob1 glob2 SORT(glob4)
1156 // This methods handles wrapping sequences of excluded files and section globs
1157 // into SORT() if that needed and reads them all.
1158 InputSectionDescription *
1159 ScriptParser::readInputSectionRules(StringRef FilePattern) {
1160   auto *Cmd = new InputSectionDescription(FilePattern);
1161   expect("(");
1162   while (!HasError && !skip(")")) {
1163     SortSectionPolicy Outer = readSortKind();
1164     SortSectionPolicy Inner = SortSectionPolicy::Default;
1165     std::vector<SectionPattern> V;
1166     if (Outer != SortSectionPolicy::Default) {
1167       expect("(");
1168       Inner = readSortKind();
1169       if (Inner != SortSectionPolicy::Default) {
1170         expect("(");
1171         V = readInputSectionsList();
1172         expect(")");
1173       } else {
1174         V = readInputSectionsList();
1175       }
1176       expect(")");
1177     } else {
1178       V = readInputSectionsList();
1179     }
1180 
1181     for (SectionPattern &Pat : V) {
1182       Pat.SortInner = Inner;
1183       Pat.SortOuter = Outer;
1184     }
1185 
1186     std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns));
1187   }
1188   return Cmd;
1189 }
1190 
1191 InputSectionDescription *
1192 ScriptParser::readInputSectionDescription(StringRef Tok) {
1193   // Input section wildcard can be surrounded by KEEP.
1194   // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep
1195   if (Tok == "KEEP") {
1196     expect("(");
1197     StringRef FilePattern = next();
1198     InputSectionDescription *Cmd = readInputSectionRules(FilePattern);
1199     expect(")");
1200     for (SectionPattern &Pat : Cmd->SectionPatterns)
1201       Opt.KeptSections.push_back(&Pat.SectionRe);
1202     return Cmd;
1203   }
1204   return readInputSectionRules(Tok);
1205 }
1206 
1207 void ScriptParser::readSort() {
1208   expect("(");
1209   expect("CONSTRUCTORS");
1210   expect(")");
1211 }
1212 
1213 Expr ScriptParser::readAssert() {
1214   expect("(");
1215   Expr E = readExpr();
1216   expect(",");
1217   StringRef Msg = unquote(next());
1218   expect(")");
1219   return [=](uint64_t Dot) {
1220     uint64_t V = E(Dot);
1221     if (!V)
1222       error(Msg);
1223     return V;
1224   };
1225 }
1226 
1227 // Reads a FILL(expr) command. We handle the FILL command as an
1228 // alias for =fillexp section attribute, which is different from
1229 // what GNU linkers do.
1230 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html
1231 std::vector<uint8_t> ScriptParser::readFill() {
1232   expect("(");
1233   std::vector<uint8_t> V = readOutputSectionFiller(next());
1234   expect(")");
1235   expect(";");
1236   return V;
1237 }
1238 
1239 OutputSectionCommand *
1240 ScriptParser::readOutputSectionDescription(StringRef OutSec) {
1241   OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec);
1242 
1243   // Read an address expression.
1244   // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address
1245   if (peek() != ":")
1246     Cmd->AddrExpr = readExpr();
1247 
1248   expect(":");
1249 
1250   if (skip("AT"))
1251     Cmd->LmaExpr = readParenExpr();
1252   if (skip("ALIGN"))
1253     Cmd->AlignExpr = readParenExpr();
1254   if (skip("SUBALIGN"))
1255     Cmd->SubalignExpr = readParenExpr();
1256 
1257   // Parse constraints.
1258   if (skip("ONLY_IF_RO"))
1259     Cmd->Constraint = ConstraintKind::ReadOnly;
1260   if (skip("ONLY_IF_RW"))
1261     Cmd->Constraint = ConstraintKind::ReadWrite;
1262   expect("{");
1263 
1264   while (!Error && !skip("}")) {
1265     StringRef Tok = next();
1266     if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok, false))
1267       Cmd->Commands.emplace_back(Assignment);
1268     else if (Tok == "FILL")
1269       Cmd->Filler = readFill();
1270     else if (Tok == "SORT")
1271       readSort();
1272     else if (peek() == "(")
1273       Cmd->Commands.emplace_back(readInputSectionDescription(Tok));
1274     else
1275       setError("unknown command " + Tok);
1276   }
1277   Cmd->Phdrs = readOutputSectionPhdrs();
1278 
1279   if (skip("="))
1280     Cmd->Filler = readOutputSectionFiller(next());
1281   else if (peek().startswith("="))
1282     Cmd->Filler = readOutputSectionFiller(next().drop_front());
1283 
1284   return Cmd;
1285 }
1286 
1287 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number.
1288 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html
1289 //
1290 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles
1291 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them
1292 // as 32-bit big-endian values. We will do the same as ld.gold does
1293 // because it's simpler than what ld.bfd does.
1294 std::vector<uint8_t> ScriptParser::readOutputSectionFiller(StringRef Tok) {
1295   uint32_t V;
1296   if (Tok.getAsInteger(0, V)) {
1297     setError("invalid filler expression: " + Tok);
1298     return {};
1299   }
1300   return {uint8_t(V >> 24), uint8_t(V >> 16), uint8_t(V >> 8), uint8_t(V)};
1301 }
1302 
1303 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) {
1304   expect("(");
1305   SymbolAssignment *Cmd = readAssignment(next());
1306   Cmd->Provide = Provide;
1307   Cmd->Hidden = Hidden;
1308   expect(")");
1309   expect(";");
1310   return Cmd;
1311 }
1312 
1313 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok,
1314                                                         bool MakeAbsolute) {
1315   SymbolAssignment *Cmd = nullptr;
1316   if (peek() == "=" || peek() == "+=") {
1317     Cmd = readAssignment(Tok);
1318     expect(";");
1319   } else if (Tok == "PROVIDE") {
1320     Cmd = readProvideHidden(true, false);
1321   } else if (Tok == "HIDDEN") {
1322     Cmd = readProvideHidden(false, true);
1323   } else if (Tok == "PROVIDE_HIDDEN") {
1324     Cmd = readProvideHidden(true, true);
1325   }
1326   if (Cmd && MakeAbsolute)
1327     Cmd->IsAbsolute = true;
1328   return Cmd;
1329 }
1330 
1331 static uint64_t getSymbolValue(StringRef S, uint64_t Dot) {
1332   if (S == ".")
1333     return Dot;
1334   return ScriptBase->getSymbolValue(S);
1335 }
1336 
1337 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) {
1338   StringRef Op = next();
1339   bool IsAbsolute = false;
1340   Expr E;
1341   assert(Op == "=" || Op == "+=");
1342   if (skip("ABSOLUTE")) {
1343     E = readParenExpr();
1344     IsAbsolute = true;
1345   } else {
1346     E = readExpr();
1347   }
1348   if (Op == "+=")
1349     E = [=](uint64_t Dot) { return getSymbolValue(Name, Dot) + E(Dot); };
1350   return new SymbolAssignment(Name, E, IsAbsolute);
1351 }
1352 
1353 // This is an operator-precedence parser to parse a linker
1354 // script expression.
1355 Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); }
1356 
1357 static Expr combine(StringRef Op, Expr L, Expr R) {
1358   if (Op == "*")
1359     return [=](uint64_t Dot) { return L(Dot) * R(Dot); };
1360   if (Op == "/") {
1361     return [=](uint64_t Dot) -> uint64_t {
1362       uint64_t RHS = R(Dot);
1363       if (RHS == 0) {
1364         error("division by zero");
1365         return 0;
1366       }
1367       return L(Dot) / RHS;
1368     };
1369   }
1370   if (Op == "+")
1371     return [=](uint64_t Dot) { return L(Dot) + R(Dot); };
1372   if (Op == "-")
1373     return [=](uint64_t Dot) { return L(Dot) - R(Dot); };
1374   if (Op == "<<")
1375     return [=](uint64_t Dot) { return L(Dot) << R(Dot); };
1376   if (Op == ">>")
1377     return [=](uint64_t Dot) { return L(Dot) >> R(Dot); };
1378   if (Op == "<")
1379     return [=](uint64_t Dot) { return L(Dot) < R(Dot); };
1380   if (Op == ">")
1381     return [=](uint64_t Dot) { return L(Dot) > R(Dot); };
1382   if (Op == ">=")
1383     return [=](uint64_t Dot) { return L(Dot) >= R(Dot); };
1384   if (Op == "<=")
1385     return [=](uint64_t Dot) { return L(Dot) <= R(Dot); };
1386   if (Op == "==")
1387     return [=](uint64_t Dot) { return L(Dot) == R(Dot); };
1388   if (Op == "!=")
1389     return [=](uint64_t Dot) { return L(Dot) != R(Dot); };
1390   if (Op == "&")
1391     return [=](uint64_t Dot) { return L(Dot) & R(Dot); };
1392   if (Op == "|")
1393     return [=](uint64_t Dot) { return L(Dot) | R(Dot); };
1394   llvm_unreachable("invalid operator");
1395 }
1396 
1397 // This is a part of the operator-precedence parser. This function
1398 // assumes that the remaining token stream starts with an operator.
1399 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) {
1400   while (!atEOF() && !Error) {
1401     // Read an operator and an expression.
1402     StringRef Op1 = peek();
1403     if (Op1 == "?")
1404       return readTernary(Lhs);
1405     if (precedence(Op1) < MinPrec)
1406       break;
1407     next();
1408     Expr Rhs = readPrimary();
1409 
1410     // Evaluate the remaining part of the expression first if the
1411     // next operator has greater precedence than the previous one.
1412     // For example, if we have read "+" and "3", and if the next
1413     // operator is "*", then we'll evaluate 3 * ... part first.
1414     while (!atEOF()) {
1415       StringRef Op2 = peek();
1416       if (precedence(Op2) <= precedence(Op1))
1417         break;
1418       Rhs = readExpr1(Rhs, precedence(Op2));
1419     }
1420 
1421     Lhs = combine(Op1, Lhs, Rhs);
1422   }
1423   return Lhs;
1424 }
1425 
1426 uint64_t static getConstant(StringRef S) {
1427   if (S == "COMMONPAGESIZE")
1428     return Target->PageSize;
1429   if (S == "MAXPAGESIZE")
1430     return Target->MaxPageSize;
1431   error("unknown constant: " + S);
1432   return 0;
1433 }
1434 
1435 // Parses Tok as an integer. Returns true if successful.
1436 // It recognizes hexadecimal (prefixed with "0x" or suffixed with "H")
1437 // and decimal numbers. Decimal numbers may have "K" (kilo) or
1438 // "M" (mega) prefixes.
1439 static bool readInteger(StringRef Tok, uint64_t &Result) {
1440   if (Tok.startswith("-")) {
1441     if (!readInteger(Tok.substr(1), Result))
1442       return false;
1443     Result = -Result;
1444     return true;
1445   }
1446   if (Tok.startswith_lower("0x"))
1447     return !Tok.substr(2).getAsInteger(16, Result);
1448   if (Tok.endswith_lower("H"))
1449     return !Tok.drop_back().getAsInteger(16, Result);
1450 
1451   int Suffix = 1;
1452   if (Tok.endswith_lower("K")) {
1453     Suffix = 1024;
1454     Tok = Tok.drop_back();
1455   } else if (Tok.endswith_lower("M")) {
1456     Suffix = 1024 * 1024;
1457     Tok = Tok.drop_back();
1458   }
1459   if (Tok.getAsInteger(10, Result))
1460     return false;
1461   Result *= Suffix;
1462   return true;
1463 }
1464 
1465 Expr ScriptParser::readPrimary() {
1466   if (peek() == "(")
1467     return readParenExpr();
1468 
1469   StringRef Tok = next();
1470 
1471   if (Tok == "~") {
1472     Expr E = readPrimary();
1473     return [=](uint64_t Dot) { return ~E(Dot); };
1474   }
1475   if (Tok == "-") {
1476     Expr E = readPrimary();
1477     return [=](uint64_t Dot) { return -E(Dot); };
1478   }
1479 
1480   // Built-in functions are parsed here.
1481   // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html.
1482   if (Tok == "ADDR") {
1483     expect("(");
1484     StringRef Name = next();
1485     expect(")");
1486     return
1487         [=](uint64_t Dot) { return ScriptBase->getOutputSectionAddress(Name); };
1488   }
1489   if (Tok == "ASSERT")
1490     return readAssert();
1491   if (Tok == "ALIGN") {
1492     Expr E = readParenExpr();
1493     return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); };
1494   }
1495   if (Tok == "CONSTANT") {
1496     expect("(");
1497     StringRef Tok = next();
1498     expect(")");
1499     return [=](uint64_t Dot) { return getConstant(Tok); };
1500   }
1501   if (Tok == "DEFINED") {
1502     expect("(");
1503     StringRef Tok = next();
1504     expect(")");
1505     return [=](uint64_t Dot) {
1506       return ScriptBase->isDefined(Tok) ? 1 : 0;
1507     };
1508   }
1509   if (Tok == "SEGMENT_START") {
1510     expect("(");
1511     next();
1512     expect(",");
1513     Expr E = readExpr();
1514     expect(")");
1515     return [=](uint64_t Dot) { return E(Dot); };
1516   }
1517   if (Tok == "DATA_SEGMENT_ALIGN") {
1518     expect("(");
1519     Expr E = readExpr();
1520     expect(",");
1521     readExpr();
1522     expect(")");
1523     return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); };
1524   }
1525   if (Tok == "DATA_SEGMENT_END") {
1526     expect("(");
1527     expect(".");
1528     expect(")");
1529     return [](uint64_t Dot) { return Dot; };
1530   }
1531   // GNU linkers implements more complicated logic to handle
1532   // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to
1533   // the next page boundary for simplicity.
1534   if (Tok == "DATA_SEGMENT_RELRO_END") {
1535     expect("(");
1536     readExpr();
1537     expect(",");
1538     readExpr();
1539     expect(")");
1540     return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); };
1541   }
1542   if (Tok == "SIZEOF") {
1543     expect("(");
1544     StringRef Name = next();
1545     expect(")");
1546     return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); };
1547   }
1548   if (Tok == "ALIGNOF") {
1549     expect("(");
1550     StringRef Name = next();
1551     expect(")");
1552     return
1553         [=](uint64_t Dot) { return ScriptBase->getOutputSectionAlign(Name); };
1554   }
1555   if (Tok == "SIZEOF_HEADERS")
1556     return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); };
1557 
1558   // Tok is a literal number.
1559   uint64_t V;
1560   if (readInteger(Tok, V))
1561     return [=](uint64_t Dot) { return V; };
1562 
1563   // Tok is a symbol name.
1564   if (Tok != "." && !isValidCIdentifier(Tok))
1565     setError("malformed number: " + Tok);
1566   return [=](uint64_t Dot) { return getSymbolValue(Tok, Dot); };
1567 }
1568 
1569 Expr ScriptParser::readTernary(Expr Cond) {
1570   next();
1571   Expr L = readExpr();
1572   expect(":");
1573   Expr R = readExpr();
1574   return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); };
1575 }
1576 
1577 Expr ScriptParser::readParenExpr() {
1578   expect("(");
1579   Expr E = readExpr();
1580   expect(")");
1581   return E;
1582 }
1583 
1584 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() {
1585   std::vector<StringRef> Phdrs;
1586   while (!Error && peek().startswith(":")) {
1587     StringRef Tok = next();
1588     Tok = (Tok.size() == 1) ? next() : Tok.substr(1);
1589     if (Tok.empty()) {
1590       setError("section header name is empty");
1591       break;
1592     }
1593     Phdrs.push_back(Tok);
1594   }
1595   return Phdrs;
1596 }
1597 
1598 unsigned ScriptParser::readPhdrType() {
1599   StringRef Tok = next();
1600   unsigned Ret = StringSwitch<unsigned>(Tok)
1601                      .Case("PT_NULL", PT_NULL)
1602                      .Case("PT_LOAD", PT_LOAD)
1603                      .Case("PT_DYNAMIC", PT_DYNAMIC)
1604                      .Case("PT_INTERP", PT_INTERP)
1605                      .Case("PT_NOTE", PT_NOTE)
1606                      .Case("PT_SHLIB", PT_SHLIB)
1607                      .Case("PT_PHDR", PT_PHDR)
1608                      .Case("PT_TLS", PT_TLS)
1609                      .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME)
1610                      .Case("PT_GNU_STACK", PT_GNU_STACK)
1611                      .Case("PT_GNU_RELRO", PT_GNU_RELRO)
1612                      .Default(-1);
1613 
1614   if (Ret == (unsigned)-1) {
1615     setError("invalid program header type: " + Tok);
1616     return PT_NULL;
1617   }
1618   return Ret;
1619 }
1620 
1621 void ScriptParser::readVersionDeclaration(StringRef VerStr) {
1622   // Identifiers start at 2 because 0 and 1 are reserved
1623   // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants.
1624   size_t VersionId = Config->VersionDefinitions.size() + 2;
1625   Config->VersionDefinitions.push_back({VerStr, VersionId});
1626 
1627   if (skip("global:") || peek() != "local:")
1628     readGlobal(VerStr);
1629   if (skip("local:"))
1630     readLocal();
1631   expect("}");
1632 
1633   // Each version may have a parent version. For example, "Ver2" defined as
1634   // "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" as a parent. This
1635   // version hierarchy is, probably against your instinct, purely for human; the
1636   // runtime doesn't care about them at all. In LLD, we simply skip the token.
1637   if (!VerStr.empty() && peek() != ";")
1638     next();
1639   expect(";");
1640 }
1641 
1642 void ScriptParser::readLocal() {
1643   Config->DefaultSymbolVersion = VER_NDX_LOCAL;
1644   expect("*");
1645   expect(";");
1646 }
1647 
1648 void ScriptParser::readExtern(std::vector<SymbolVersion> *Globals) {
1649   expect("\"C++\"");
1650   expect("{");
1651 
1652   for (;;) {
1653     if (peek() == "}" || Error)
1654       break;
1655     bool HasWildcard = !peek().startswith("\"") && hasWildcard(peek());
1656     Globals->push_back({unquote(next()), true, HasWildcard});
1657     expect(";");
1658   }
1659 
1660   expect("}");
1661   expect(";");
1662 }
1663 
1664 void ScriptParser::readGlobal(StringRef VerStr) {
1665   std::vector<SymbolVersion> *Globals;
1666   if (VerStr.empty())
1667     Globals = &Config->VersionScriptGlobals;
1668   else
1669     Globals = &Config->VersionDefinitions.back().Globals;
1670 
1671   for (;;) {
1672     if (skip("extern"))
1673       readExtern(Globals);
1674 
1675     StringRef Cur = peek();
1676     if (Cur == "}" || Cur == "local:" || Error)
1677       return;
1678     next();
1679     Globals->push_back({unquote(Cur), false, hasWildcard(Cur)});
1680     expect(";");
1681   }
1682 }
1683 
1684 static bool isUnderSysroot(StringRef Path) {
1685   if (Config->Sysroot == "")
1686     return false;
1687   for (; !Path.empty(); Path = sys::path::parent_path(Path))
1688     if (sys::fs::equivalent(Config->Sysroot, Path))
1689       return true;
1690   return false;
1691 }
1692 
1693 void elf::readLinkerScript(MemoryBufferRef MB) {
1694   StringRef Path = MB.getBufferIdentifier();
1695   ScriptParser(MB.getBuffer(), isUnderSysroot(Path)).readLinkerScript();
1696 }
1697 
1698 void elf::readVersionScript(MemoryBufferRef MB) {
1699   ScriptParser(MB.getBuffer(), false).readVersionScript();
1700 }
1701 
1702 template class elf::LinkerScript<ELF32LE>;
1703 template class elf::LinkerScript<ELF32BE>;
1704 template class elf::LinkerScript<ELF64LE>;
1705 template class elf::LinkerScript<ELF64BE>;
1706