1 //===- LinkerScript.cpp ---------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the parser/evaluator of the linker script.
11 // It parses a linker script and write the result to Config or ScriptConfig
12 // objects.
13 //
14 // If SECTIONS command is used, a ScriptConfig contains an AST
15 // of the command which will later be consumed by createSections() and
16 // assignAddresses().
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "LinkerScript.h"
21 #include "Config.h"
22 #include "Driver.h"
23 #include "InputSection.h"
24 #include "OutputSections.h"
25 #include "ScriptParser.h"
26 #include "Strings.h"
27 #include "Symbols.h"
28 #include "SymbolTable.h"
29 #include "Target.h"
30 #include "Writer.h"
31 #include "llvm/ADT/StringSwitch.h"
32 #include "llvm/Support/ELF.h"
33 #include "llvm/Support/FileSystem.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/Path.h"
36 #include "llvm/Support/StringSaver.h"
37 
38 using namespace llvm;
39 using namespace llvm::ELF;
40 using namespace llvm::object;
41 using namespace llvm::support::endian;
42 using namespace lld;
43 using namespace lld::elf;
44 
45 LinkerScriptBase *elf::ScriptBase;
46 ScriptConfiguration *elf::ScriptConfig;
47 
48 template <class ELFT> static void addRegular(SymbolAssignment *Cmd) {
49   Symbol *Sym = Symtab<ELFT>::X->addRegular(Cmd->Name, STB_GLOBAL, STV_DEFAULT);
50   Sym->Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT;
51   Cmd->Sym = Sym->body();
52 
53   // If we have no SECTIONS then we don't have '.' and don't call
54   // assignAddresses(). We calculate symbol value immediately in this case.
55   if (!ScriptConfig->HasSections)
56     cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(0);
57 }
58 
59 template <class ELFT> static void addSynthetic(SymbolAssignment *Cmd) {
60   Symbol *Sym = Symtab<ELFT>::X->addSynthetic(
61       Cmd->Name, nullptr, 0, Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT);
62   Cmd->Sym = Sym->body();
63 }
64 
65 template <class ELFT> static void addSymbol(SymbolAssignment *Cmd) {
66   if (Cmd->IsAbsolute)
67     addRegular<ELFT>(Cmd);
68   else
69     addSynthetic<ELFT>(Cmd);
70 }
71 // If a symbol was in PROVIDE(), we need to define it only when
72 // it is an undefined symbol.
73 template <class ELFT> static bool shouldDefine(SymbolAssignment *Cmd) {
74   if (Cmd->Name == ".")
75     return false;
76   if (!Cmd->Provide)
77     return true;
78   SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name);
79   return B && B->isUndefined();
80 }
81 
82 bool SymbolAssignment::classof(const BaseCommand *C) {
83   return C->Kind == AssignmentKind;
84 }
85 
86 bool OutputSectionCommand::classof(const BaseCommand *C) {
87   return C->Kind == OutputSectionKind;
88 }
89 
90 bool InputSectionDescription::classof(const BaseCommand *C) {
91   return C->Kind == InputSectionKind;
92 }
93 
94 bool AssertCommand::classof(const BaseCommand *C) {
95   return C->Kind == AssertKind;
96 }
97 
98 bool BytesDataCommand::classof(const BaseCommand *C) {
99   return C->Kind == BytesDataKind;
100 }
101 
102 template <class ELFT> static bool isDiscarded(InputSectionBase<ELFT> *S) {
103   return !S || !S->Live;
104 }
105 
106 template <class ELFT> LinkerScript<ELFT>::LinkerScript() {}
107 template <class ELFT> LinkerScript<ELFT>::~LinkerScript() {}
108 
109 template <class ELFT>
110 bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) {
111   for (Regex *Re : Opt.KeptSections)
112     if (Re->match(S->Name))
113       return true;
114   return false;
115 }
116 
117 static bool comparePriority(InputSectionData *A, InputSectionData *B) {
118   return getPriority(A->Name) < getPriority(B->Name);
119 }
120 
121 static bool compareName(InputSectionData *A, InputSectionData *B) {
122   return A->Name < B->Name;
123 }
124 
125 static bool compareAlignment(InputSectionData *A, InputSectionData *B) {
126   // ">" is not a mistake. Larger alignments are placed before smaller
127   // alignments in order to reduce the amount of padding necessary.
128   // This is compatible with GNU.
129   return A->Alignment > B->Alignment;
130 }
131 
132 static std::function<bool(InputSectionData *, InputSectionData *)>
133 getComparator(SortSectionPolicy K) {
134   switch (K) {
135   case SortSectionPolicy::Alignment:
136     return compareAlignment;
137   case SortSectionPolicy::Name:
138     return compareName;
139   case SortSectionPolicy::Priority:
140     return comparePriority;
141   default:
142     llvm_unreachable("unknown sort policy");
143   }
144 }
145 
146 template <class ELFT>
147 static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections,
148                              ConstraintKind Kind) {
149   if (Kind == ConstraintKind::NoConstraint)
150     return true;
151   bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) {
152     auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2);
153     return Sec->getSectionHdr()->sh_flags & SHF_WRITE;
154   });
155   return (IsRW && Kind == ConstraintKind::ReadWrite) ||
156          (!IsRW && Kind == ConstraintKind::ReadOnly);
157 }
158 
159 static void sortSections(InputSectionData **Begin, InputSectionData **End,
160                          SortSectionPolicy K) {
161   if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None)
162     std::stable_sort(Begin, End, getComparator(K));
163 }
164 
165 // Compute and remember which sections the InputSectionDescription matches.
166 template <class ELFT>
167 void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) {
168   // Collects all sections that satisfy constraints of I
169   // and attach them to I.
170   for (SectionPattern &Pat : I->SectionPatterns) {
171     size_t SizeBefore = I->Sections.size();
172     for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) {
173       StringRef Filename = sys::path::filename(F->getName());
174       if (!I->FileRe.match(Filename) || Pat.ExcludedFileRe.match(Filename))
175         continue;
176 
177       for (InputSectionBase<ELFT> *S : F->getSections())
178         if (!isDiscarded(S) && !S->OutSec && Pat.SectionRe.match(S->Name))
179           I->Sections.push_back(S);
180       if (Pat.SectionRe.match("COMMON"))
181         I->Sections.push_back(CommonInputSection<ELFT>::X);
182     }
183 
184     // Sort sections as instructed by SORT-family commands and --sort-section
185     // option. Because SORT-family commands can be nested at most two depth
186     // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command
187     // line option is respected even if a SORT command is given, the exact
188     // behavior we have here is a bit complicated. Here are the rules.
189     //
190     // 1. If two SORT commands are given, --sort-section is ignored.
191     // 2. If one SORT command is given, and if it is not SORT_NONE,
192     //    --sort-section is handled as an inner SORT command.
193     // 3. If one SORT command is given, and if it is SORT_NONE, don't sort.
194     // 4. If no SORT command is given, sort according to --sort-section.
195     InputSectionData **Begin = I->Sections.data() + SizeBefore;
196     InputSectionData **End = I->Sections.data() + I->Sections.size();
197     if (Pat.SortOuter != SortSectionPolicy::None) {
198       if (Pat.SortInner == SortSectionPolicy::Default)
199         sortSections(Begin, End, Config->SortSection);
200       else
201         sortSections(Begin, End, Pat.SortInner);
202       sortSections(Begin, End, Pat.SortOuter);
203     }
204   }
205 
206   // We do not add duplicate input sections, so mark them with a dummy output
207   // section for now.
208   for (InputSectionData *S : I->Sections) {
209     auto *S2 = static_cast<InputSectionBase<ELFT> *>(S);
210     S2->OutSec = (OutputSectionBase<ELFT> *)-1;
211   }
212 }
213 
214 template <class ELFT>
215 void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) {
216   for (InputSectionBase<ELFT> *S : V) {
217     S->Live = false;
218     reportDiscarded(S);
219   }
220 }
221 
222 template <class ELFT>
223 std::vector<InputSectionBase<ELFT> *>
224 LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) {
225   std::vector<InputSectionBase<ELFT> *> Ret;
226 
227   for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) {
228     auto *Cmd = dyn_cast<InputSectionDescription>(Base.get());
229     if (!Cmd)
230       continue;
231     computeInputSections(Cmd);
232     for (InputSectionData *S : Cmd->Sections)
233       Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S));
234   }
235 
236   return Ret;
237 }
238 
239 template <class ELFT>
240 static SectionKey<ELFT::Is64Bits> createKey(InputSectionBase<ELFT> *C,
241                                             StringRef OutsecName) {
242   // When using linker script the merge rules are different.
243   // Unfortunately, linker scripts are name based. This means that expressions
244   // like *(.foo*) can refer to multiple input sections that would normally be
245   // placed in different output sections. We cannot put them in different
246   // output sections or we would produce wrong results for
247   // start = .; *(.foo.*) end = .; *(.bar)
248   // and a mapping of .foo1 and .bar1 to one section and .foo2 and .bar2 to
249   // another. The problem is that there is no way to layout those output
250   // sections such that the .foo sections are the only thing between the
251   // start and end symbols.
252 
253   // An extra annoyance is that we cannot simply disable merging of the contents
254   // of SHF_MERGE sections, but our implementation requires one output section
255   // per "kind" (string or not, which size/aligment).
256   // Fortunately, creating symbols in the middle of a merge section is not
257   // supported by bfd or gold, so we can just create multiple section in that
258   // case.
259   const typename ELFT::Shdr *H = C->getSectionHdr();
260   typedef typename ELFT::uint uintX_t;
261   uintX_t Flags = H->sh_flags & (SHF_MERGE | SHF_STRINGS);
262 
263   uintX_t Alignment = 0;
264   if (isa<MergeInputSection<ELFT>>(C))
265     Alignment = std::max(H->sh_addralign, H->sh_entsize);
266 
267   return SectionKey<ELFT::Is64Bits>{OutsecName, /*Type*/ 0, Flags, Alignment};
268 }
269 
270 template <class ELFT>
271 void LinkerScript<ELFT>::addSection(OutputSectionFactory<ELFT> &Factory,
272                                     InputSectionBase<ELFT> *Sec,
273                                     StringRef Name) {
274   OutputSectionBase<ELFT> *OutSec;
275   bool IsNew;
276   std::tie(OutSec, IsNew) = Factory.create(createKey(Sec, Name), Sec);
277   if (IsNew)
278     OutputSections->push_back(OutSec);
279   OutSec->addSection(Sec);
280 }
281 
282 template <class ELFT>
283 void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) {
284 
285   for (unsigned I = 0; I < Opt.Commands.size(); ++I) {
286     auto Iter = Opt.Commands.begin() + I;
287     const std::unique_ptr<BaseCommand> &Base1 = *Iter;
288     if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) {
289       if (shouldDefine<ELFT>(Cmd))
290         addRegular<ELFT>(Cmd);
291       continue;
292     }
293     if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) {
294       // If we don't have SECTIONS then output sections have already been
295       // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses
296       // will not be called, so ASSERT should be evaluated now.
297       if (!Opt.HasSections)
298         Cmd->Expression(0);
299       continue;
300     }
301 
302     if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) {
303       std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd);
304 
305       if (Cmd->Name == "/DISCARD/") {
306         discard(V);
307         continue;
308       }
309 
310       if (!matchConstraints<ELFT>(V, Cmd->Constraint)) {
311         for (InputSectionBase<ELFT> *S : V)
312           S->OutSec = nullptr;
313         Opt.Commands.erase(Iter);
314         --I;
315         continue;
316       }
317 
318       for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands)
319         if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get()))
320           if (shouldDefine<ELFT>(OutCmd))
321             addSymbol<ELFT>(OutCmd);
322 
323       if (V.empty())
324         continue;
325 
326       for (InputSectionBase<ELFT> *Sec : V) {
327         addSection(Factory, Sec, Cmd->Name);
328         if (uint32_t Subalign = Cmd->SubalignExpr ? Cmd->SubalignExpr(0) : 0)
329           Sec->Alignment = Subalign;
330       }
331     }
332   }
333 }
334 
335 template <class ELFT>
336 void LinkerScript<ELFT>::createSections(OutputSectionFactory<ELFT> &Factory) {
337   processCommands(Factory);
338   // Add orphan sections.
339   for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles())
340     for (InputSectionBase<ELFT> *S : F->getSections())
341       if (!isDiscarded(S) && !S->OutSec)
342         addSection(Factory, S, getOutputSectionName(S));
343 }
344 
345 // Sets value of a section-defined symbol. Two kinds of
346 // symbols are processed: synthetic symbols, whose value
347 // is an offset from beginning of section and regular
348 // symbols whose value is absolute.
349 template <class ELFT>
350 static void assignSectionSymbol(SymbolAssignment *Cmd,
351                                 OutputSectionBase<ELFT> *Sec,
352                                 typename ELFT::uint Off) {
353   if (!Cmd->Sym)
354     return;
355 
356   if (auto *Body = dyn_cast<DefinedSynthetic<ELFT>>(Cmd->Sym)) {
357     Body->Section = Sec;
358     Body->Value = Cmd->Expression(Sec->getVA() + Off) - Sec->getVA();
359     return;
360   }
361   auto *Body = cast<DefinedRegular<ELFT>>(Cmd->Sym);
362   Body->Value = Cmd->Expression(Sec->getVA() + Off);
363 }
364 
365 template <class ELFT> static bool isTbss(OutputSectionBase<ELFT> *Sec) {
366   return (Sec->getFlags() & SHF_TLS) && Sec->getType() == SHT_NOBITS;
367 }
368 
369 template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) {
370   if (!AlreadyOutputIS.insert(S).second)
371     return;
372   bool IsTbss = isTbss(CurOutSec);
373 
374   uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot;
375   Pos = alignTo(Pos, S->Alignment);
376   S->OutSecOff = Pos - CurOutSec->getVA();
377   Pos += S->getSize();
378 
379   // Update output section size after adding each section. This is so that
380   // SIZEOF works correctly in the case below:
381   // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) }
382   CurOutSec->setSize(Pos - CurOutSec->getVA());
383 
384   if (IsTbss)
385     ThreadBssOffset = Pos - Dot;
386   else
387     Dot = Pos;
388 }
389 
390 template <class ELFT> void LinkerScript<ELFT>::flush() {
391   if (!CurOutSec || !AlreadyOutputOS.insert(CurOutSec).second)
392     return;
393   if (auto *OutSec = dyn_cast<OutputSection<ELFT>>(CurOutSec)) {
394     for (InputSection<ELFT> *I : OutSec->Sections)
395       output(I);
396   } else {
397     Dot += CurOutSec->getSize();
398   }
399 }
400 
401 template <class ELFT>
402 void LinkerScript<ELFT>::switchTo(OutputSectionBase<ELFT> *Sec) {
403   if (CurOutSec == Sec)
404     return;
405   if (AlreadyOutputOS.count(Sec))
406     return;
407 
408   flush();
409   CurOutSec = Sec;
410 
411   Dot = alignTo(Dot, CurOutSec->getAlignment());
412   CurOutSec->setVA(isTbss(CurOutSec) ? Dot + ThreadBssOffset : Dot);
413 }
414 
415 template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) {
416   // This handles the assignments to symbol or to a location counter (.)
417   if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) {
418     if (AssignCmd->Name == ".") {
419       // Update to location counter means update to section size.
420       Dot = AssignCmd->Expression(Dot);
421       CurOutSec->setSize(Dot - CurOutSec->getVA());
422       return;
423     }
424     assignSectionSymbol<ELFT>(AssignCmd, CurOutSec, Dot - CurOutSec->getVA());
425     return;
426   }
427 
428   // Handle BYTE(), SHORT(), LONG(), or QUAD().
429   if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) {
430     DataCmd->Offset = Dot - CurOutSec->getVA();
431     Dot += DataCmd->Size;
432     CurOutSec->setSize(Dot - CurOutSec->getVA());
433     return;
434   }
435 
436   // It handles single input section description command,
437   // calculates and assigns the offsets for each section and also
438   // updates the output section size.
439   auto &ICmd = cast<InputSectionDescription>(Base);
440   for (InputSectionData *ID : ICmd.Sections) {
441     auto *IB = static_cast<InputSectionBase<ELFT> *>(ID);
442     switchTo(IB->OutSec);
443     if (auto *I = dyn_cast<InputSection<ELFT>>(IB))
444       output(I);
445     else
446       flush();
447   }
448 }
449 
450 template <class ELFT>
451 static std::vector<OutputSectionBase<ELFT> *>
452 findSections(OutputSectionCommand &Cmd,
453              const std::vector<OutputSectionBase<ELFT> *> &Sections) {
454   std::vector<OutputSectionBase<ELFT> *> Ret;
455   for (OutputSectionBase<ELFT> *Sec : Sections)
456     if (Sec->getName() == Cmd.Name)
457       Ret.push_back(Sec);
458   return Ret;
459 }
460 
461 template <class ELFT>
462 void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) {
463   std::vector<OutputSectionBase<ELFT> *> Sections =
464       findSections(*Cmd, *OutputSections);
465   if (Sections.empty())
466     return;
467   switchTo(Sections[0]);
468 
469   // Find the last section output location. We will output orphan sections
470   // there so that end symbols point to the correct location.
471   auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(),
472                         [](const std::unique_ptr<BaseCommand> &Cmd) {
473                           return !isa<SymbolAssignment>(*Cmd);
474                         })
475                .base();
476   for (auto I = Cmd->Commands.begin(); I != E; ++I)
477     process(**I);
478   for (OutputSectionBase<ELFT> *Base : Sections)
479     switchTo(Base);
480   flush();
481   std::for_each(E, Cmd->Commands.end(),
482                 [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); });
483 }
484 
485 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() {
486   // It is common practice to use very generic linker scripts. So for any
487   // given run some of the output sections in the script will be empty.
488   // We could create corresponding empty output sections, but that would
489   // clutter the output.
490   // We instead remove trivially empty sections. The bfd linker seems even
491   // more aggressive at removing them.
492   auto Pos = std::remove_if(
493       Opt.Commands.begin(), Opt.Commands.end(),
494       [&](const std::unique_ptr<BaseCommand> &Base) {
495         auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get());
496         if (!Cmd)
497           return false;
498         std::vector<OutputSectionBase<ELFT> *> Secs =
499             findSections(*Cmd, *OutputSections);
500         if (!Secs.empty())
501           return false;
502         for (const std::unique_ptr<BaseCommand> &I : Cmd->Commands)
503           if (!isa<InputSectionDescription>(I.get()))
504             return false;
505         return true;
506       });
507   Opt.Commands.erase(Pos, Opt.Commands.end());
508 
509   // If the output section contains only symbol assignments, create a
510   // corresponding output section. The bfd linker seems to only create them if
511   // '.' is assigned to, but creating these section should not have any bad
512   // consequeces and gives us a section to put the symbol in.
513   uintX_t Flags = SHF_ALLOC;
514   uint32_t Type = 0;
515   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) {
516     auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get());
517     if (!Cmd)
518       continue;
519     std::vector<OutputSectionBase<ELFT> *> Secs =
520         findSections(*Cmd, *OutputSections);
521     if (!Secs.empty()) {
522       Flags = Secs[0]->getFlags();
523       Type = Secs[0]->getType();
524       continue;
525     }
526 
527     auto *OutSec = new OutputSection<ELFT>(Cmd->Name, Type, Flags);
528     Out<ELFT>::Pool.emplace_back(OutSec);
529     OutputSections->push_back(OutSec);
530   }
531 }
532 
533 // When placing orphan sections, we want to place them after symbol assignments
534 // so that an orphan after
535 //   begin_foo = .;
536 //   foo : { *(foo) }
537 //   end_foo = .;
538 // doesn't break the intended meaning of the begin/end symbols.
539 // We don't want to go over sections since Writer<ELFT>::sortSections is the
540 // one in charge of deciding the order of the sections.
541 // We don't want to go over alignments, since doing so in
542 //  rx_sec : { *(rx_sec) }
543 //  . = ALIGN(0x1000);
544 //  /* The RW PT_LOAD starts here*/
545 //  rw_sec : { *(rw_sec) }
546 // would mean that the RW PT_LOAD would become unaligned.
547 static bool shouldSkip(const BaseCommand &Cmd) {
548   if (isa<OutputSectionCommand>(Cmd))
549     return false;
550   const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd);
551   if (!Assign)
552     return true;
553   return Assign->Name != ".";
554 }
555 
556 template <class ELFT> void LinkerScript<ELFT>::assignAddresses() {
557   // Orphan sections are sections present in the input files which
558   // are not explicitly placed into the output file by the linker script.
559   // We place orphan sections at end of file.
560   // Other linkers places them using some heuristics as described in
561   // https://sourceware.org/binutils/docs/ld/Orphan-Sections.html#Orphan-Sections.
562 
563   // The OutputSections are already in the correct order.
564   // This loops creates or moves commands as needed so that they are in the
565   // correct order.
566   int CmdIndex = 0;
567   for (OutputSectionBase<ELFT> *Sec : *OutputSections) {
568     StringRef Name = Sec->getName();
569 
570     // Find the last spot where we can insert a command and still get the
571     // correct result.
572     auto CmdIter = Opt.Commands.begin() + CmdIndex;
573     auto E = Opt.Commands.end();
574     while (CmdIter != E && shouldSkip(**CmdIter)) {
575       ++CmdIter;
576       ++CmdIndex;
577     }
578 
579     auto Pos =
580         std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) {
581           auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get());
582           return Cmd && Cmd->Name == Name;
583         });
584     if (Pos == E) {
585       Opt.Commands.insert(CmdIter,
586                           llvm::make_unique<OutputSectionCommand>(Name));
587       ++CmdIndex;
588       continue;
589     }
590 
591     // Continue from where we found it.
592     CmdIndex = (Pos - Opt.Commands.begin()) + 1;
593     continue;
594   }
595 
596   // Assign addresses as instructed by linker script SECTIONS sub-commands.
597   Dot = getHeaderSize();
598 
599   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) {
600     if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) {
601       if (Cmd->Name == ".") {
602         Dot = Cmd->Expression(Dot);
603       } else if (Cmd->Sym) {
604         cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(Dot);
605       }
606       continue;
607     }
608 
609     if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) {
610       Cmd->Expression(Dot);
611       continue;
612     }
613 
614     auto *Cmd = cast<OutputSectionCommand>(Base.get());
615 
616     if (Cmd->AddrExpr)
617       Dot = Cmd->AddrExpr(Dot);
618 
619     assignOffsets(Cmd);
620   }
621 
622   uintX_t MinVA = std::numeric_limits<uintX_t>::max();
623   for (OutputSectionBase<ELFT> *Sec : *OutputSections) {
624     if (Sec->getFlags() & SHF_ALLOC)
625       MinVA = std::min(MinVA, Sec->getVA());
626     else
627       Sec->setVA(0);
628   }
629 
630   uintX_t HeaderSize = getHeaderSize();
631   if (HeaderSize > MinVA)
632     fatal("Not enough space for ELF and program headers");
633 
634   // ELF and Program headers need to be right before the first section in
635   // memory. Set their addresses accordingly.
636   MinVA = alignDown(MinVA - HeaderSize, Target->PageSize);
637   Out<ELFT>::ElfHeader->setVA(MinVA);
638   Out<ELFT>::ProgramHeaders->setVA(Out<ELFT>::ElfHeader->getSize() + MinVA);
639 }
640 
641 // Creates program headers as instructed by PHDRS linker script command.
642 template <class ELFT>
643 std::vector<PhdrEntry<ELFT>> LinkerScript<ELFT>::createPhdrs() {
644   std::vector<PhdrEntry<ELFT>> Ret;
645 
646   // Process PHDRS and FILEHDR keywords because they are not
647   // real output sections and cannot be added in the following loop.
648   for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) {
649     Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags);
650     PhdrEntry<ELFT> &Phdr = Ret.back();
651 
652     if (Cmd.HasFilehdr)
653       Phdr.add(Out<ELFT>::ElfHeader);
654     if (Cmd.HasPhdrs)
655       Phdr.add(Out<ELFT>::ProgramHeaders);
656 
657     if (Cmd.LMAExpr) {
658       Phdr.H.p_paddr = Cmd.LMAExpr(0);
659       Phdr.HasLMA = true;
660     }
661   }
662 
663   // Add output sections to program headers.
664   PhdrEntry<ELFT> *Load = nullptr;
665   uintX_t Flags = PF_R;
666   for (OutputSectionBase<ELFT> *Sec : *OutputSections) {
667     if (!(Sec->getFlags() & SHF_ALLOC))
668       break;
669 
670     std::vector<size_t> PhdrIds = getPhdrIndices(Sec->getName());
671     if (!PhdrIds.empty()) {
672       // Assign headers specified by linker script
673       for (size_t Id : PhdrIds) {
674         Ret[Id].add(Sec);
675         if (Opt.PhdrsCommands[Id].Flags == UINT_MAX)
676           Ret[Id].H.p_flags |= Sec->getPhdrFlags();
677       }
678     } else {
679       // If we have no load segment or flags've changed then we want new load
680       // segment.
681       uintX_t NewFlags = Sec->getPhdrFlags();
682       if (Load == nullptr || Flags != NewFlags) {
683         Load = &*Ret.emplace(Ret.end(), PT_LOAD, NewFlags);
684         Flags = NewFlags;
685       }
686       Load->add(Sec);
687     }
688   }
689   return Ret;
690 }
691 
692 template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() {
693   // Ignore .interp section in case we have PHDRS specification
694   // and PT_INTERP isn't listed.
695   return !Opt.PhdrsCommands.empty() &&
696          llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) {
697            return Cmd.Type == PT_INTERP;
698          }) == Opt.PhdrsCommands.end();
699 }
700 
701 template <class ELFT>
702 ArrayRef<uint8_t> LinkerScript<ELFT>::getFiller(StringRef Name) {
703   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands)
704     if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()))
705       if (Cmd->Name == Name)
706         return Cmd->Filler;
707   return {};
708 }
709 
710 template <class ELFT>
711 static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) {
712   const endianness E = ELFT::TargetEndianness;
713 
714   switch (Size) {
715   case 1:
716     *Buf = (uint8_t)Data;
717     break;
718   case 2:
719     write16<E>(Buf, Data);
720     break;
721   case 4:
722     write32<E>(Buf, Data);
723     break;
724   case 8:
725     write64<E>(Buf, Data);
726     break;
727   default:
728     llvm_unreachable("unsupported Size argument");
729   }
730 }
731 
732 template <class ELFT>
733 void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) {
734   int I = getSectionIndex(Name);
735   if (I == INT_MAX)
736     return;
737 
738   OutputSectionCommand *Cmd =
739       dyn_cast<OutputSectionCommand>(Opt.Commands[I].get());
740   for (const std::unique_ptr<BaseCommand> &Base2 : Cmd->Commands)
741     if (auto *DataCmd = dyn_cast<BytesDataCommand>(Base2.get()))
742       writeInt<ELFT>(&Buf[DataCmd->Offset], DataCmd->Data, DataCmd->Size);
743 }
744 
745 template <class ELFT> Expr LinkerScript<ELFT>::getLma(StringRef Name) {
746   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands)
747     if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()))
748       if (Cmd->LmaExpr && Cmd->Name == Name)
749         return Cmd->LmaExpr;
750   return {};
751 }
752 
753 // Returns the index of the given section name in linker script
754 // SECTIONS commands. Sections are laid out as the same order as they
755 // were in the script. If a given name did not appear in the script,
756 // it returns INT_MAX, so that it will be laid out at end of file.
757 template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) {
758   int I = 0;
759   for (std::unique_ptr<BaseCommand> &Base : Opt.Commands) {
760     if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()))
761       if (Cmd->Name == Name)
762         return I;
763     ++I;
764   }
765   return INT_MAX;
766 }
767 
768 template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() {
769   return !Opt.PhdrsCommands.empty();
770 }
771 
772 template <class ELFT>
773 uint64_t LinkerScript<ELFT>::getOutputSectionAddress(StringRef Name) {
774   for (OutputSectionBase<ELFT> *Sec : *OutputSections)
775     if (Sec->getName() == Name)
776       return Sec->getVA();
777   error("undefined section " + Name);
778   return 0;
779 }
780 
781 template <class ELFT>
782 uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) {
783   for (OutputSectionBase<ELFT> *Sec : *OutputSections)
784     if (Sec->getName() == Name)
785       return Sec->getSize();
786   error("undefined section " + Name);
787   return 0;
788 }
789 
790 template <class ELFT>
791 uint64_t LinkerScript<ELFT>::getOutputSectionAlign(StringRef Name) {
792   for (OutputSectionBase<ELFT> *Sec : *OutputSections)
793     if (Sec->getName() == Name)
794       return Sec->getAlignment();
795   error("undefined section " + Name);
796   return 0;
797 }
798 
799 template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() {
800   return elf::getHeaderSize<ELFT>();
801 }
802 
803 template <class ELFT> uint64_t LinkerScript<ELFT>::getSymbolValue(StringRef S) {
804   if (SymbolBody *B = Symtab<ELFT>::X->find(S))
805     return B->getVA<ELFT>();
806   error("symbol not found: " + S);
807   return 0;
808 }
809 
810 template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) {
811   return Symtab<ELFT>::X->find(S) != nullptr;
812 }
813 
814 // Returns indices of ELF headers containing specific section, identified
815 // by Name. Each index is a zero based number of ELF header listed within
816 // PHDRS {} script block.
817 template <class ELFT>
818 std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) {
819   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) {
820     auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get());
821     if (!Cmd || Cmd->Name != SectionName)
822       continue;
823 
824     std::vector<size_t> Ret;
825     for (StringRef PhdrName : Cmd->Phdrs)
826       Ret.push_back(getPhdrIndex(PhdrName));
827     return Ret;
828   }
829   return {};
830 }
831 
832 template <class ELFT>
833 size_t LinkerScript<ELFT>::getPhdrIndex(StringRef PhdrName) {
834   size_t I = 0;
835   for (PhdrsCommand &Cmd : Opt.PhdrsCommands) {
836     if (Cmd.Name == PhdrName)
837       return I;
838     ++I;
839   }
840   error("section header '" + PhdrName + "' is not listed in PHDRS");
841   return 0;
842 }
843 
844 class elf::ScriptParser : public ScriptParserBase {
845   typedef void (ScriptParser::*Handler)();
846 
847 public:
848   ScriptParser(StringRef S, bool B) : ScriptParserBase(S), IsUnderSysroot(B) {}
849 
850   void readLinkerScript();
851   void readVersionScript();
852 
853 private:
854   void addFile(StringRef Path);
855 
856   void readAsNeeded();
857   void readEntry();
858   void readExtern();
859   void readGroup();
860   void readInclude();
861   void readOutput();
862   void readOutputArch();
863   void readOutputFormat();
864   void readPhdrs();
865   void readSearchDir();
866   void readSections();
867   void readVersion();
868   void readVersionScriptCommand();
869 
870   SymbolAssignment *readAssignment(StringRef Name);
871   BytesDataCommand *readBytesDataCommand(StringRef Tok);
872   std::vector<uint8_t> readFill();
873   OutputSectionCommand *readOutputSectionDescription(StringRef OutSec);
874   std::vector<uint8_t> readOutputSectionFiller(StringRef Tok);
875   std::vector<StringRef> readOutputSectionPhdrs();
876   InputSectionDescription *readInputSectionDescription(StringRef Tok);
877   Regex readFilePatterns();
878   std::vector<SectionPattern> readInputSectionsList();
879   InputSectionDescription *readInputSectionRules(StringRef FilePattern);
880   unsigned readPhdrType();
881   SortSectionPolicy readSortKind();
882   SymbolAssignment *readProvideHidden(bool Provide, bool Hidden);
883   SymbolAssignment *readProvideOrAssignment(StringRef Tok, bool MakeAbsolute);
884   void readSort();
885   Expr readAssert();
886 
887   Expr readExpr();
888   Expr readExpr1(Expr Lhs, int MinPrec);
889   Expr readPrimary();
890   Expr readTernary(Expr Cond);
891   Expr readParenExpr();
892 
893   // For parsing version script.
894   void readExtern(std::vector<SymbolVersion> *Globals);
895   void readVersionDeclaration(StringRef VerStr);
896   void readGlobal(StringRef VerStr);
897   void readLocal();
898 
899   ScriptConfiguration &Opt = *ScriptConfig;
900   StringSaver Saver = {ScriptConfig->Alloc};
901   bool IsUnderSysroot;
902 };
903 
904 void ScriptParser::readVersionScript() {
905   readVersionScriptCommand();
906   if (!atEOF())
907     setError("EOF expected, but got " + next());
908 }
909 
910 void ScriptParser::readVersionScriptCommand() {
911   if (skip("{")) {
912     readVersionDeclaration("");
913     return;
914   }
915 
916   while (!atEOF() && !Error && peek() != "}") {
917     StringRef VerStr = next();
918     if (VerStr == "{") {
919       setError("anonymous version definition is used in "
920                "combination with other version definitions");
921       return;
922     }
923     expect("{");
924     readVersionDeclaration(VerStr);
925   }
926 }
927 
928 void ScriptParser::readVersion() {
929   expect("{");
930   readVersionScriptCommand();
931   expect("}");
932 }
933 
934 void ScriptParser::readLinkerScript() {
935   while (!atEOF()) {
936     StringRef Tok = next();
937     if (Tok == ";")
938       continue;
939 
940     if (Tok == "ASSERT") {
941       Opt.Commands.emplace_back(new AssertCommand(readAssert()));
942     } else if (Tok == "ENTRY") {
943       readEntry();
944     } else if (Tok == "EXTERN") {
945       readExtern();
946     } else if (Tok == "GROUP" || Tok == "INPUT") {
947       readGroup();
948     } else if (Tok == "INCLUDE") {
949       readInclude();
950     } else if (Tok == "OUTPUT") {
951       readOutput();
952     } else if (Tok == "OUTPUT_ARCH") {
953       readOutputArch();
954     } else if (Tok == "OUTPUT_FORMAT") {
955       readOutputFormat();
956     } else if (Tok == "PHDRS") {
957       readPhdrs();
958     } else if (Tok == "SEARCH_DIR") {
959       readSearchDir();
960     } else if (Tok == "SECTIONS") {
961       readSections();
962     } else if (Tok == "VERSION") {
963       readVersion();
964     } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok, true)) {
965       Opt.Commands.emplace_back(Cmd);
966     } else {
967       setError("unknown directive: " + Tok);
968     }
969   }
970 }
971 
972 void ScriptParser::addFile(StringRef S) {
973   if (IsUnderSysroot && S.startswith("/")) {
974     SmallString<128> Path;
975     (Config->Sysroot + S).toStringRef(Path);
976     if (sys::fs::exists(Path)) {
977       Driver->addFile(Saver.save(Path.str()));
978       return;
979     }
980   }
981 
982   if (sys::path::is_absolute(S)) {
983     Driver->addFile(S);
984   } else if (S.startswith("=")) {
985     if (Config->Sysroot.empty())
986       Driver->addFile(S.substr(1));
987     else
988       Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)));
989   } else if (S.startswith("-l")) {
990     Driver->addLibrary(S.substr(2));
991   } else if (sys::fs::exists(S)) {
992     Driver->addFile(S);
993   } else {
994     std::string Path = findFromSearchPaths(S);
995     if (Path.empty())
996       setError("unable to find " + S);
997     else
998       Driver->addFile(Saver.save(Path));
999   }
1000 }
1001 
1002 void ScriptParser::readAsNeeded() {
1003   expect("(");
1004   bool Orig = Config->AsNeeded;
1005   Config->AsNeeded = true;
1006   while (!Error && !skip(")"))
1007     addFile(unquote(next()));
1008   Config->AsNeeded = Orig;
1009 }
1010 
1011 void ScriptParser::readEntry() {
1012   // -e <symbol> takes predecence over ENTRY(<symbol>).
1013   expect("(");
1014   StringRef Tok = next();
1015   if (Config->Entry.empty())
1016     Config->Entry = Tok;
1017   expect(")");
1018 }
1019 
1020 void ScriptParser::readExtern() {
1021   expect("(");
1022   while (!Error && !skip(")"))
1023     Config->Undefined.push_back(next());
1024 }
1025 
1026 void ScriptParser::readGroup() {
1027   expect("(");
1028   while (!Error && !skip(")")) {
1029     StringRef Tok = next();
1030     if (Tok == "AS_NEEDED")
1031       readAsNeeded();
1032     else
1033       addFile(unquote(Tok));
1034   }
1035 }
1036 
1037 void ScriptParser::readInclude() {
1038   StringRef Tok = next();
1039   auto MBOrErr = MemoryBuffer::getFile(unquote(Tok));
1040   if (!MBOrErr) {
1041     setError("cannot open " + Tok);
1042     return;
1043   }
1044   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
1045   StringRef S = Saver.save(MB->getMemBufferRef().getBuffer());
1046   std::vector<StringRef> V = tokenize(S);
1047   Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end());
1048 }
1049 
1050 void ScriptParser::readOutput() {
1051   // -o <file> takes predecence over OUTPUT(<file>).
1052   expect("(");
1053   StringRef Tok = next();
1054   if (Config->OutputFile.empty())
1055     Config->OutputFile = unquote(Tok);
1056   expect(")");
1057 }
1058 
1059 void ScriptParser::readOutputArch() {
1060   // Error checking only for now.
1061   expect("(");
1062   next();
1063   expect(")");
1064 }
1065 
1066 void ScriptParser::readOutputFormat() {
1067   // Error checking only for now.
1068   expect("(");
1069   next();
1070   StringRef Tok = next();
1071   if (Tok == ")")
1072     return;
1073   if (Tok != ",") {
1074     setError("unexpected token: " + Tok);
1075     return;
1076   }
1077   next();
1078   expect(",");
1079   next();
1080   expect(")");
1081 }
1082 
1083 void ScriptParser::readPhdrs() {
1084   expect("{");
1085   while (!Error && !skip("}")) {
1086     StringRef Tok = next();
1087     Opt.PhdrsCommands.push_back(
1088         {Tok, PT_NULL, false, false, UINT_MAX, nullptr});
1089     PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back();
1090 
1091     PhdrCmd.Type = readPhdrType();
1092     do {
1093       Tok = next();
1094       if (Tok == ";")
1095         break;
1096       if (Tok == "FILEHDR")
1097         PhdrCmd.HasFilehdr = true;
1098       else if (Tok == "PHDRS")
1099         PhdrCmd.HasPhdrs = true;
1100       else if (Tok == "AT")
1101         PhdrCmd.LMAExpr = readParenExpr();
1102       else if (Tok == "FLAGS") {
1103         expect("(");
1104         // Passing 0 for the value of dot is a bit of a hack. It means that
1105         // we accept expressions like ".|1".
1106         PhdrCmd.Flags = readExpr()(0);
1107         expect(")");
1108       } else
1109         setError("unexpected header attribute: " + Tok);
1110     } while (!Error);
1111   }
1112 }
1113 
1114 void ScriptParser::readSearchDir() {
1115   expect("(");
1116   StringRef Tok = next();
1117   if (!Config->Nostdlib)
1118     Config->SearchPaths.push_back(unquote(Tok));
1119   expect(")");
1120 }
1121 
1122 void ScriptParser::readSections() {
1123   Opt.HasSections = true;
1124   expect("{");
1125   while (!Error && !skip("}")) {
1126     StringRef Tok = next();
1127     BaseCommand *Cmd = readProvideOrAssignment(Tok, true);
1128     if (!Cmd) {
1129       if (Tok == "ASSERT")
1130         Cmd = new AssertCommand(readAssert());
1131       else
1132         Cmd = readOutputSectionDescription(Tok);
1133     }
1134     Opt.Commands.emplace_back(Cmd);
1135   }
1136 }
1137 
1138 static int precedence(StringRef Op) {
1139   return StringSwitch<int>(Op)
1140       .Cases("*", "/", 5)
1141       .Cases("+", "-", 4)
1142       .Cases("<<", ">>", 3)
1143       .Cases("<", "<=", ">", ">=", "==", "!=", 2)
1144       .Cases("&", "|", 1)
1145       .Default(-1);
1146 }
1147 
1148 Regex ScriptParser::readFilePatterns() {
1149   std::vector<StringRef> V;
1150   while (!Error && !skip(")"))
1151     V.push_back(next());
1152   return compileGlobPatterns(V);
1153 }
1154 
1155 SortSectionPolicy ScriptParser::readSortKind() {
1156   if (skip("SORT") || skip("SORT_BY_NAME"))
1157     return SortSectionPolicy::Name;
1158   if (skip("SORT_BY_ALIGNMENT"))
1159     return SortSectionPolicy::Alignment;
1160   if (skip("SORT_BY_INIT_PRIORITY"))
1161     return SortSectionPolicy::Priority;
1162   if (skip("SORT_NONE"))
1163     return SortSectionPolicy::None;
1164   return SortSectionPolicy::Default;
1165 }
1166 
1167 // Method reads a list of sequence of excluded files and section globs given in
1168 // a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+
1169 // Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3)
1170 // The semantics of that is next:
1171 // * Include .foo.1 from every file.
1172 // * Include .foo.2 from every file but a.o
1173 // * Include .foo.3 from every file but b.o
1174 std::vector<SectionPattern> ScriptParser::readInputSectionsList() {
1175   std::vector<SectionPattern> Ret;
1176   while (!Error && peek() != ")") {
1177     Regex ExcludeFileRe;
1178     if (skip("EXCLUDE_FILE")) {
1179       expect("(");
1180       ExcludeFileRe = readFilePatterns();
1181     }
1182 
1183     std::vector<StringRef> V;
1184     while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE")
1185       V.push_back(next());
1186 
1187     if (!V.empty())
1188       Ret.push_back({std::move(ExcludeFileRe), compileGlobPatterns(V)});
1189     else
1190       setError("section pattern is expected");
1191   }
1192   return Ret;
1193 }
1194 
1195 // Section pattern grammar can have complex expressions, for example:
1196 // *(SORT(.foo.* EXCLUDE_FILE (*file1.o) .bar.*) .bar.* SORT(.zed.*))
1197 // Generally is a sequence of globs and excludes that may be wrapped in a SORT()
1198 // commands, like: SORT(glob0) glob1 glob2 SORT(glob4)
1199 // This methods handles wrapping sequences of excluded files and section globs
1200 // into SORT() if that needed and reads them all.
1201 InputSectionDescription *
1202 ScriptParser::readInputSectionRules(StringRef FilePattern) {
1203   auto *Cmd = new InputSectionDescription(FilePattern);
1204   expect("(");
1205   while (!HasError && !skip(")")) {
1206     SortSectionPolicy Outer = readSortKind();
1207     SortSectionPolicy Inner = SortSectionPolicy::Default;
1208     std::vector<SectionPattern> V;
1209     if (Outer != SortSectionPolicy::Default) {
1210       expect("(");
1211       Inner = readSortKind();
1212       if (Inner != SortSectionPolicy::Default) {
1213         expect("(");
1214         V = readInputSectionsList();
1215         expect(")");
1216       } else {
1217         V = readInputSectionsList();
1218       }
1219       expect(")");
1220     } else {
1221       V = readInputSectionsList();
1222     }
1223 
1224     for (SectionPattern &Pat : V) {
1225       Pat.SortInner = Inner;
1226       Pat.SortOuter = Outer;
1227     }
1228 
1229     std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns));
1230   }
1231   return Cmd;
1232 }
1233 
1234 InputSectionDescription *
1235 ScriptParser::readInputSectionDescription(StringRef Tok) {
1236   // Input section wildcard can be surrounded by KEEP.
1237   // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep
1238   if (Tok == "KEEP") {
1239     expect("(");
1240     StringRef FilePattern = next();
1241     InputSectionDescription *Cmd = readInputSectionRules(FilePattern);
1242     expect(")");
1243     for (SectionPattern &Pat : Cmd->SectionPatterns)
1244       Opt.KeptSections.push_back(&Pat.SectionRe);
1245     return Cmd;
1246   }
1247   return readInputSectionRules(Tok);
1248 }
1249 
1250 void ScriptParser::readSort() {
1251   expect("(");
1252   expect("CONSTRUCTORS");
1253   expect(")");
1254 }
1255 
1256 Expr ScriptParser::readAssert() {
1257   expect("(");
1258   Expr E = readExpr();
1259   expect(",");
1260   StringRef Msg = unquote(next());
1261   expect(")");
1262   return [=](uint64_t Dot) {
1263     uint64_t V = E(Dot);
1264     if (!V)
1265       error(Msg);
1266     return V;
1267   };
1268 }
1269 
1270 // Reads a FILL(expr) command. We handle the FILL command as an
1271 // alias for =fillexp section attribute, which is different from
1272 // what GNU linkers do.
1273 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html
1274 std::vector<uint8_t> ScriptParser::readFill() {
1275   expect("(");
1276   std::vector<uint8_t> V = readOutputSectionFiller(next());
1277   expect(")");
1278   expect(";");
1279   return V;
1280 }
1281 
1282 OutputSectionCommand *
1283 ScriptParser::readOutputSectionDescription(StringRef OutSec) {
1284   OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec);
1285 
1286   // Read an address expression.
1287   // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address
1288   if (peek() != ":")
1289     Cmd->AddrExpr = readExpr();
1290 
1291   expect(":");
1292 
1293   if (skip("AT"))
1294     Cmd->LmaExpr = readParenExpr();
1295   if (skip("ALIGN"))
1296     Cmd->AlignExpr = readParenExpr();
1297   if (skip("SUBALIGN"))
1298     Cmd->SubalignExpr = readParenExpr();
1299 
1300   // Parse constraints.
1301   if (skip("ONLY_IF_RO"))
1302     Cmd->Constraint = ConstraintKind::ReadOnly;
1303   if (skip("ONLY_IF_RW"))
1304     Cmd->Constraint = ConstraintKind::ReadWrite;
1305   expect("{");
1306 
1307   while (!Error && !skip("}")) {
1308     StringRef Tok = next();
1309     if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok, false))
1310       Cmd->Commands.emplace_back(Assignment);
1311     else if (BytesDataCommand *Data = readBytesDataCommand(Tok))
1312       Cmd->Commands.emplace_back(Data);
1313     else if (Tok == "FILL")
1314       Cmd->Filler = readFill();
1315     else if (Tok == "SORT")
1316       readSort();
1317     else if (peek() == "(")
1318       Cmd->Commands.emplace_back(readInputSectionDescription(Tok));
1319     else
1320       setError("unknown command " + Tok);
1321   }
1322   Cmd->Phdrs = readOutputSectionPhdrs();
1323 
1324   if (skip("="))
1325     Cmd->Filler = readOutputSectionFiller(next());
1326   else if (peek().startswith("="))
1327     Cmd->Filler = readOutputSectionFiller(next().drop_front());
1328 
1329   return Cmd;
1330 }
1331 
1332 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number.
1333 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html
1334 //
1335 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles
1336 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them
1337 // as 32-bit big-endian values. We will do the same as ld.gold does
1338 // because it's simpler than what ld.bfd does.
1339 std::vector<uint8_t> ScriptParser::readOutputSectionFiller(StringRef Tok) {
1340   uint32_t V;
1341   if (Tok.getAsInteger(0, V)) {
1342     setError("invalid filler expression: " + Tok);
1343     return {};
1344   }
1345   return {uint8_t(V >> 24), uint8_t(V >> 16), uint8_t(V >> 8), uint8_t(V)};
1346 }
1347 
1348 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) {
1349   expect("(");
1350   SymbolAssignment *Cmd = readAssignment(next());
1351   Cmd->Provide = Provide;
1352   Cmd->Hidden = Hidden;
1353   expect(")");
1354   expect(";");
1355   return Cmd;
1356 }
1357 
1358 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok,
1359                                                         bool MakeAbsolute) {
1360   SymbolAssignment *Cmd = nullptr;
1361   if (peek() == "=" || peek() == "+=") {
1362     Cmd = readAssignment(Tok);
1363     expect(";");
1364   } else if (Tok == "PROVIDE") {
1365     Cmd = readProvideHidden(true, false);
1366   } else if (Tok == "HIDDEN") {
1367     Cmd = readProvideHidden(false, true);
1368   } else if (Tok == "PROVIDE_HIDDEN") {
1369     Cmd = readProvideHidden(true, true);
1370   }
1371   if (Cmd && MakeAbsolute)
1372     Cmd->IsAbsolute = true;
1373   return Cmd;
1374 }
1375 
1376 static uint64_t getSymbolValue(StringRef S, uint64_t Dot) {
1377   if (S == ".")
1378     return Dot;
1379   return ScriptBase->getSymbolValue(S);
1380 }
1381 
1382 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) {
1383   StringRef Op = next();
1384   bool IsAbsolute = false;
1385   Expr E;
1386   assert(Op == "=" || Op == "+=");
1387   if (skip("ABSOLUTE")) {
1388     E = readParenExpr();
1389     IsAbsolute = true;
1390   } else {
1391     E = readExpr();
1392   }
1393   if (Op == "+=")
1394     E = [=](uint64_t Dot) { return getSymbolValue(Name, Dot) + E(Dot); };
1395   return new SymbolAssignment(Name, E, IsAbsolute);
1396 }
1397 
1398 // This is an operator-precedence parser to parse a linker
1399 // script expression.
1400 Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); }
1401 
1402 static Expr combine(StringRef Op, Expr L, Expr R) {
1403   if (Op == "*")
1404     return [=](uint64_t Dot) { return L(Dot) * R(Dot); };
1405   if (Op == "/") {
1406     return [=](uint64_t Dot) -> uint64_t {
1407       uint64_t RHS = R(Dot);
1408       if (RHS == 0) {
1409         error("division by zero");
1410         return 0;
1411       }
1412       return L(Dot) / RHS;
1413     };
1414   }
1415   if (Op == "+")
1416     return [=](uint64_t Dot) { return L(Dot) + R(Dot); };
1417   if (Op == "-")
1418     return [=](uint64_t Dot) { return L(Dot) - R(Dot); };
1419   if (Op == "<<")
1420     return [=](uint64_t Dot) { return L(Dot) << R(Dot); };
1421   if (Op == ">>")
1422     return [=](uint64_t Dot) { return L(Dot) >> R(Dot); };
1423   if (Op == "<")
1424     return [=](uint64_t Dot) { return L(Dot) < R(Dot); };
1425   if (Op == ">")
1426     return [=](uint64_t Dot) { return L(Dot) > R(Dot); };
1427   if (Op == ">=")
1428     return [=](uint64_t Dot) { return L(Dot) >= R(Dot); };
1429   if (Op == "<=")
1430     return [=](uint64_t Dot) { return L(Dot) <= R(Dot); };
1431   if (Op == "==")
1432     return [=](uint64_t Dot) { return L(Dot) == R(Dot); };
1433   if (Op == "!=")
1434     return [=](uint64_t Dot) { return L(Dot) != R(Dot); };
1435   if (Op == "&")
1436     return [=](uint64_t Dot) { return L(Dot) & R(Dot); };
1437   if (Op == "|")
1438     return [=](uint64_t Dot) { return L(Dot) | R(Dot); };
1439   llvm_unreachable("invalid operator");
1440 }
1441 
1442 // This is a part of the operator-precedence parser. This function
1443 // assumes that the remaining token stream starts with an operator.
1444 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) {
1445   while (!atEOF() && !Error) {
1446     // Read an operator and an expression.
1447     StringRef Op1 = peek();
1448     if (Op1 == "?")
1449       return readTernary(Lhs);
1450     if (precedence(Op1) < MinPrec)
1451       break;
1452     next();
1453     Expr Rhs = readPrimary();
1454 
1455     // Evaluate the remaining part of the expression first if the
1456     // next operator has greater precedence than the previous one.
1457     // For example, if we have read "+" and "3", and if the next
1458     // operator is "*", then we'll evaluate 3 * ... part first.
1459     while (!atEOF()) {
1460       StringRef Op2 = peek();
1461       if (precedence(Op2) <= precedence(Op1))
1462         break;
1463       Rhs = readExpr1(Rhs, precedence(Op2));
1464     }
1465 
1466     Lhs = combine(Op1, Lhs, Rhs);
1467   }
1468   return Lhs;
1469 }
1470 
1471 uint64_t static getConstant(StringRef S) {
1472   if (S == "COMMONPAGESIZE")
1473     return Target->PageSize;
1474   if (S == "MAXPAGESIZE")
1475     return Target->MaxPageSize;
1476   error("unknown constant: " + S);
1477   return 0;
1478 }
1479 
1480 // Parses Tok as an integer. Returns true if successful.
1481 // It recognizes hexadecimal (prefixed with "0x" or suffixed with "H")
1482 // and decimal numbers. Decimal numbers may have "K" (kilo) or
1483 // "M" (mega) prefixes.
1484 static bool readInteger(StringRef Tok, uint64_t &Result) {
1485   if (Tok.startswith("-")) {
1486     if (!readInteger(Tok.substr(1), Result))
1487       return false;
1488     Result = -Result;
1489     return true;
1490   }
1491   if (Tok.startswith_lower("0x"))
1492     return !Tok.substr(2).getAsInteger(16, Result);
1493   if (Tok.endswith_lower("H"))
1494     return !Tok.drop_back().getAsInteger(16, Result);
1495 
1496   int Suffix = 1;
1497   if (Tok.endswith_lower("K")) {
1498     Suffix = 1024;
1499     Tok = Tok.drop_back();
1500   } else if (Tok.endswith_lower("M")) {
1501     Suffix = 1024 * 1024;
1502     Tok = Tok.drop_back();
1503   }
1504   if (Tok.getAsInteger(10, Result))
1505     return false;
1506   Result *= Suffix;
1507   return true;
1508 }
1509 
1510 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) {
1511   int Size = StringSwitch<unsigned>(Tok)
1512                  .Case("BYTE", 1)
1513                  .Case("SHORT", 2)
1514                  .Case("LONG", 4)
1515                  .Case("QUAD", 8)
1516                  .Default(-1);
1517   if (Size == -1)
1518     return nullptr;
1519 
1520   expect("(");
1521   uint64_t Val = 0;
1522   StringRef S = next();
1523   if (!readInteger(S, Val))
1524     setError("unexpected value: " + S);
1525   expect(")");
1526   return new BytesDataCommand(Val, Size);
1527 }
1528 
1529 Expr ScriptParser::readPrimary() {
1530   if (peek() == "(")
1531     return readParenExpr();
1532 
1533   StringRef Tok = next();
1534 
1535   if (Tok == "~") {
1536     Expr E = readPrimary();
1537     return [=](uint64_t Dot) { return ~E(Dot); };
1538   }
1539   if (Tok == "-") {
1540     Expr E = readPrimary();
1541     return [=](uint64_t Dot) { return -E(Dot); };
1542   }
1543 
1544   // Built-in functions are parsed here.
1545   // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html.
1546   if (Tok == "ADDR") {
1547     expect("(");
1548     StringRef Name = next();
1549     expect(")");
1550     return
1551         [=](uint64_t Dot) { return ScriptBase->getOutputSectionAddress(Name); };
1552   }
1553   if (Tok == "ASSERT")
1554     return readAssert();
1555   if (Tok == "ALIGN") {
1556     Expr E = readParenExpr();
1557     return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); };
1558   }
1559   if (Tok == "CONSTANT") {
1560     expect("(");
1561     StringRef Tok = next();
1562     expect(")");
1563     return [=](uint64_t Dot) { return getConstant(Tok); };
1564   }
1565   if (Tok == "DEFINED") {
1566     expect("(");
1567     StringRef Tok = next();
1568     expect(")");
1569     return [=](uint64_t Dot) { return ScriptBase->isDefined(Tok) ? 1 : 0; };
1570   }
1571   if (Tok == "SEGMENT_START") {
1572     expect("(");
1573     next();
1574     expect(",");
1575     Expr E = readExpr();
1576     expect(")");
1577     return [=](uint64_t Dot) { return E(Dot); };
1578   }
1579   if (Tok == "DATA_SEGMENT_ALIGN") {
1580     expect("(");
1581     Expr E = readExpr();
1582     expect(",");
1583     readExpr();
1584     expect(")");
1585     return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); };
1586   }
1587   if (Tok == "DATA_SEGMENT_END") {
1588     expect("(");
1589     expect(".");
1590     expect(")");
1591     return [](uint64_t Dot) { return Dot; };
1592   }
1593   // GNU linkers implements more complicated logic to handle
1594   // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to
1595   // the next page boundary for simplicity.
1596   if (Tok == "DATA_SEGMENT_RELRO_END") {
1597     expect("(");
1598     readExpr();
1599     expect(",");
1600     readExpr();
1601     expect(")");
1602     return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); };
1603   }
1604   if (Tok == "SIZEOF") {
1605     expect("(");
1606     StringRef Name = next();
1607     expect(")");
1608     return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); };
1609   }
1610   if (Tok == "ALIGNOF") {
1611     expect("(");
1612     StringRef Name = next();
1613     expect(")");
1614     return
1615         [=](uint64_t Dot) { return ScriptBase->getOutputSectionAlign(Name); };
1616   }
1617   if (Tok == "SIZEOF_HEADERS")
1618     return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); };
1619 
1620   // Tok is a literal number.
1621   uint64_t V;
1622   if (readInteger(Tok, V))
1623     return [=](uint64_t Dot) { return V; };
1624 
1625   // Tok is a symbol name.
1626   if (Tok != "." && !isValidCIdentifier(Tok))
1627     setError("malformed number: " + Tok);
1628   return [=](uint64_t Dot) { return getSymbolValue(Tok, Dot); };
1629 }
1630 
1631 Expr ScriptParser::readTernary(Expr Cond) {
1632   next();
1633   Expr L = readExpr();
1634   expect(":");
1635   Expr R = readExpr();
1636   return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); };
1637 }
1638 
1639 Expr ScriptParser::readParenExpr() {
1640   expect("(");
1641   Expr E = readExpr();
1642   expect(")");
1643   return E;
1644 }
1645 
1646 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() {
1647   std::vector<StringRef> Phdrs;
1648   while (!Error && peek().startswith(":")) {
1649     StringRef Tok = next();
1650     Tok = (Tok.size() == 1) ? next() : Tok.substr(1);
1651     if (Tok.empty()) {
1652       setError("section header name is empty");
1653       break;
1654     }
1655     Phdrs.push_back(Tok);
1656   }
1657   return Phdrs;
1658 }
1659 
1660 unsigned ScriptParser::readPhdrType() {
1661   StringRef Tok = next();
1662   unsigned Ret = StringSwitch<unsigned>(Tok)
1663                      .Case("PT_NULL", PT_NULL)
1664                      .Case("PT_LOAD", PT_LOAD)
1665                      .Case("PT_DYNAMIC", PT_DYNAMIC)
1666                      .Case("PT_INTERP", PT_INTERP)
1667                      .Case("PT_NOTE", PT_NOTE)
1668                      .Case("PT_SHLIB", PT_SHLIB)
1669                      .Case("PT_PHDR", PT_PHDR)
1670                      .Case("PT_TLS", PT_TLS)
1671                      .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME)
1672                      .Case("PT_GNU_STACK", PT_GNU_STACK)
1673                      .Case("PT_GNU_RELRO", PT_GNU_RELRO)
1674                      .Default(-1);
1675 
1676   if (Ret == (unsigned)-1) {
1677     setError("invalid program header type: " + Tok);
1678     return PT_NULL;
1679   }
1680   return Ret;
1681 }
1682 
1683 void ScriptParser::readVersionDeclaration(StringRef VerStr) {
1684   // Identifiers start at 2 because 0 and 1 are reserved
1685   // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants.
1686   size_t VersionId = Config->VersionDefinitions.size() + 2;
1687   Config->VersionDefinitions.push_back({VerStr, VersionId});
1688 
1689   if (skip("global:") || peek() != "local:")
1690     readGlobal(VerStr);
1691   if (skip("local:"))
1692     readLocal();
1693   expect("}");
1694 
1695   // Each version may have a parent version. For example, "Ver2" defined as
1696   // "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" as a parent. This
1697   // version hierarchy is, probably against your instinct, purely for human; the
1698   // runtime doesn't care about them at all. In LLD, we simply skip the token.
1699   if (!VerStr.empty() && peek() != ";")
1700     next();
1701   expect(";");
1702 }
1703 
1704 void ScriptParser::readLocal() {
1705   Config->DefaultSymbolVersion = VER_NDX_LOCAL;
1706   expect("*");
1707   expect(";");
1708 }
1709 
1710 void ScriptParser::readExtern(std::vector<SymbolVersion> *Globals) {
1711   expect("\"C++\"");
1712   expect("{");
1713 
1714   for (;;) {
1715     if (peek() == "}" || Error)
1716       break;
1717     bool HasWildcard = !peek().startswith("\"") && hasWildcard(peek());
1718     Globals->push_back({unquote(next()), true, HasWildcard});
1719     expect(";");
1720   }
1721 
1722   expect("}");
1723   expect(";");
1724 }
1725 
1726 void ScriptParser::readGlobal(StringRef VerStr) {
1727   std::vector<SymbolVersion> *Globals;
1728   if (VerStr.empty())
1729     Globals = &Config->VersionScriptGlobals;
1730   else
1731     Globals = &Config->VersionDefinitions.back().Globals;
1732 
1733   for (;;) {
1734     if (skip("extern"))
1735       readExtern(Globals);
1736 
1737     StringRef Cur = peek();
1738     if (Cur == "}" || Cur == "local:" || Error)
1739       return;
1740     next();
1741     Globals->push_back({unquote(Cur), false, hasWildcard(Cur)});
1742     expect(";");
1743   }
1744 }
1745 
1746 static bool isUnderSysroot(StringRef Path) {
1747   if (Config->Sysroot == "")
1748     return false;
1749   for (; !Path.empty(); Path = sys::path::parent_path(Path))
1750     if (sys::fs::equivalent(Config->Sysroot, Path))
1751       return true;
1752   return false;
1753 }
1754 
1755 void elf::readLinkerScript(MemoryBufferRef MB) {
1756   StringRef Path = MB.getBufferIdentifier();
1757   ScriptParser(MB.getBuffer(), isUnderSysroot(Path)).readLinkerScript();
1758 }
1759 
1760 void elf::readVersionScript(MemoryBufferRef MB) {
1761   ScriptParser(MB.getBuffer(), false).readVersionScript();
1762 }
1763 
1764 template class elf::LinkerScript<ELF32LE>;
1765 template class elf::LinkerScript<ELF32BE>;
1766 template class elf::LinkerScript<ELF64LE>;
1767 template class elf::LinkerScript<ELF64BE>;
1768