1 //===- ScriptParser.cpp ---------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a recursive-descendent parser for linker scripts.
10 // Parsed results are stored to Config and Script global objects.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ScriptParser.h"
15 #include "Config.h"
16 #include "Driver.h"
17 #include "InputSection.h"
18 #include "LinkerScript.h"
19 #include "OutputSections.h"
20 #include "ScriptLexer.h"
21 #include "Symbols.h"
22 #include "Target.h"
23 #include "lld/Common/Memory.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSet.h"
27 #include "llvm/ADT/StringSwitch.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/FileSystem.h"
32 #include "llvm/Support/Path.h"
33 #include <cassert>
34 #include <limits>
35 #include <vector>
36 
37 using namespace llvm;
38 using namespace llvm::ELF;
39 using namespace llvm::support::endian;
40 using namespace lld;
41 using namespace lld::elf;
42 
43 static bool isUnderSysroot(StringRef Path);
44 
45 namespace {
46 class ScriptParser final : ScriptLexer {
47 public:
48   ScriptParser(MemoryBufferRef MB)
49       : ScriptLexer(MB),
50         IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {}
51 
52   void readLinkerScript();
53   void readVersionScript();
54   void readDynamicList();
55   void readDefsym(StringRef Name);
56 
57 private:
58   void addFile(StringRef Path);
59 
60   void readAsNeeded();
61   void readEntry();
62   void readExtern();
63   void readGroup();
64   void readInclude();
65   void readInput();
66   void readMemory();
67   void readOutput();
68   void readOutputArch();
69   void readOutputFormat();
70   void readPhdrs();
71   void readRegionAlias();
72   void readSearchDir();
73   void readSections();
74   void readTarget();
75   void readVersion();
76   void readVersionScriptCommand();
77 
78   SymbolAssignment *readSymbolAssignment(StringRef Name);
79   ByteCommand *readByteCommand(StringRef Tok);
80   std::array<uint8_t, 4> readFill();
81   std::array<uint8_t, 4> parseFill(StringRef Tok);
82   bool readSectionDirective(OutputSection *Cmd, StringRef Tok1, StringRef Tok2);
83   void readSectionAddressType(OutputSection *Cmd);
84   OutputSection *readOverlaySectionDescription();
85   OutputSection *readOutputSectionDescription(StringRef OutSec);
86   std::vector<BaseCommand *> readOverlay();
87   std::vector<StringRef> readOutputSectionPhdrs();
88   InputSectionDescription *readInputSectionDescription(StringRef Tok);
89   StringMatcher readFilePatterns();
90   std::vector<SectionPattern> readInputSectionsList();
91   InputSectionDescription *readInputSectionRules(StringRef FilePattern);
92   unsigned readPhdrType();
93   SortSectionPolicy readSortKind();
94   SymbolAssignment *readProvideHidden(bool Provide, bool Hidden);
95   SymbolAssignment *readAssignment(StringRef Tok);
96   void readSort();
97   Expr readAssert();
98   Expr readConstant();
99   Expr getPageSize();
100 
101   uint64_t readMemoryAssignment(StringRef, StringRef, StringRef);
102   std::pair<uint32_t, uint32_t> readMemoryAttributes();
103 
104   Expr combine(StringRef Op, Expr L, Expr R);
105   Expr readExpr();
106   Expr readExpr1(Expr Lhs, int MinPrec);
107   StringRef readParenLiteral();
108   Expr readPrimary();
109   Expr readTernary(Expr Cond);
110   Expr readParenExpr();
111 
112   // For parsing version script.
113   std::vector<SymbolVersion> readVersionExtern();
114   void readAnonymousDeclaration();
115   void readVersionDeclaration(StringRef VerStr);
116 
117   std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
118   readSymbols();
119 
120   // True if a script being read is in a subdirectory specified by -sysroot.
121   bool IsUnderSysroot;
122 
123   // A set to detect an INCLUDE() cycle.
124   StringSet<> Seen;
125 };
126 } // namespace
127 
128 static StringRef unquote(StringRef S) {
129   if (S.startswith("\""))
130     return S.substr(1, S.size() - 2);
131   return S;
132 }
133 
134 static bool isUnderSysroot(StringRef Path) {
135   if (Config->Sysroot == "")
136     return false;
137   for (; !Path.empty(); Path = sys::path::parent_path(Path))
138     if (sys::fs::equivalent(Config->Sysroot, Path))
139       return true;
140   return false;
141 }
142 
143 // Some operations only support one non absolute value. Move the
144 // absolute one to the right hand side for convenience.
145 static void moveAbsRight(ExprValue &A, ExprValue &B) {
146   if (A.Sec == nullptr || (A.ForceAbsolute && !B.isAbsolute()))
147     std::swap(A, B);
148   if (!B.isAbsolute())
149     error(A.Loc + ": at least one side of the expression must be absolute");
150 }
151 
152 static ExprValue add(ExprValue A, ExprValue B) {
153   moveAbsRight(A, B);
154   return {A.Sec, A.ForceAbsolute, A.getSectionOffset() + B.getValue(), A.Loc};
155 }
156 
157 static ExprValue sub(ExprValue A, ExprValue B) {
158   // The distance between two symbols in sections is absolute.
159   if (!A.isAbsolute() && !B.isAbsolute())
160     return A.getValue() - B.getValue();
161   return {A.Sec, false, A.getSectionOffset() - B.getValue(), A.Loc};
162 }
163 
164 static ExprValue bitAnd(ExprValue A, ExprValue B) {
165   moveAbsRight(A, B);
166   return {A.Sec, A.ForceAbsolute,
167           (A.getValue() & B.getValue()) - A.getSecAddr(), A.Loc};
168 }
169 
170 static ExprValue bitOr(ExprValue A, ExprValue B) {
171   moveAbsRight(A, B);
172   return {A.Sec, A.ForceAbsolute,
173           (A.getValue() | B.getValue()) - A.getSecAddr(), A.Loc};
174 }
175 
176 void ScriptParser::readDynamicList() {
177   Config->HasDynamicList = true;
178   expect("{");
179   std::vector<SymbolVersion> Locals;
180   std::vector<SymbolVersion> Globals;
181   std::tie(Locals, Globals) = readSymbols();
182   expect(";");
183 
184   if (!atEOF()) {
185     setError("EOF expected, but got " + next());
186     return;
187   }
188   if (!Locals.empty()) {
189     setError("\"local:\" scope not supported in --dynamic-list");
190     return;
191   }
192 
193   for (SymbolVersion V : Globals)
194     Config->DynamicList.push_back(V);
195 }
196 
197 void ScriptParser::readVersionScript() {
198   readVersionScriptCommand();
199   if (!atEOF())
200     setError("EOF expected, but got " + next());
201 }
202 
203 void ScriptParser::readVersionScriptCommand() {
204   if (consume("{")) {
205     readAnonymousDeclaration();
206     return;
207   }
208 
209   while (!atEOF() && !errorCount() && peek() != "}") {
210     StringRef VerStr = next();
211     if (VerStr == "{") {
212       setError("anonymous version definition is used in "
213                "combination with other version definitions");
214       return;
215     }
216     expect("{");
217     readVersionDeclaration(VerStr);
218   }
219 }
220 
221 void ScriptParser::readVersion() {
222   expect("{");
223   readVersionScriptCommand();
224   expect("}");
225 }
226 
227 void ScriptParser::readLinkerScript() {
228   while (!atEOF()) {
229     StringRef Tok = next();
230     if (Tok == ";")
231       continue;
232 
233     if (Tok == "ENTRY") {
234       readEntry();
235     } else if (Tok == "EXTERN") {
236       readExtern();
237     } else if (Tok == "GROUP") {
238       readGroup();
239     } else if (Tok == "INCLUDE") {
240       readInclude();
241     } else if (Tok == "INPUT") {
242       readInput();
243     } else if (Tok == "MEMORY") {
244       readMemory();
245     } else if (Tok == "OUTPUT") {
246       readOutput();
247     } else if (Tok == "OUTPUT_ARCH") {
248       readOutputArch();
249     } else if (Tok == "OUTPUT_FORMAT") {
250       readOutputFormat();
251     } else if (Tok == "PHDRS") {
252       readPhdrs();
253     } else if (Tok == "REGION_ALIAS") {
254       readRegionAlias();
255     } else if (Tok == "SEARCH_DIR") {
256       readSearchDir();
257     } else if (Tok == "SECTIONS") {
258       readSections();
259     } else if (Tok == "TARGET") {
260       readTarget();
261     } else if (Tok == "VERSION") {
262       readVersion();
263     } else if (SymbolAssignment *Cmd = readAssignment(Tok)) {
264       Script->SectionCommands.push_back(Cmd);
265     } else {
266       setError("unknown directive: " + Tok);
267     }
268   }
269 }
270 
271 void ScriptParser::readDefsym(StringRef Name) {
272   if (errorCount())
273     return;
274   Expr E = readExpr();
275   if (!atEOF())
276     setError("EOF expected, but got " + next());
277   SymbolAssignment *Cmd = make<SymbolAssignment>(Name, E, getCurrentLocation());
278   Script->SectionCommands.push_back(Cmd);
279 }
280 
281 void ScriptParser::addFile(StringRef S) {
282   if (IsUnderSysroot && S.startswith("/")) {
283     SmallString<128> PathData;
284     StringRef Path = (Config->Sysroot + S).toStringRef(PathData);
285     if (sys::fs::exists(Path)) {
286       Driver->addFile(Saver.save(Path), /*WithLOption=*/false);
287       return;
288     }
289   }
290 
291   if (S.startswith("/")) {
292     Driver->addFile(S, /*WithLOption=*/false);
293   } else if (S.startswith("=")) {
294     if (Config->Sysroot.empty())
295       Driver->addFile(S.substr(1), /*WithLOption=*/false);
296     else
297       Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)),
298                       /*WithLOption=*/false);
299   } else if (S.startswith("-l")) {
300     Driver->addLibrary(S.substr(2));
301   } else if (sys::fs::exists(S)) {
302     Driver->addFile(S, /*WithLOption=*/false);
303   } else {
304     if (Optional<std::string> Path = findFromSearchPaths(S))
305       Driver->addFile(Saver.save(*Path), /*WithLOption=*/true);
306     else
307       setError("unable to find " + S);
308   }
309 }
310 
311 void ScriptParser::readAsNeeded() {
312   expect("(");
313   bool Orig = Config->AsNeeded;
314   Config->AsNeeded = true;
315   while (!errorCount() && !consume(")"))
316     addFile(unquote(next()));
317   Config->AsNeeded = Orig;
318 }
319 
320 void ScriptParser::readEntry() {
321   // -e <symbol> takes predecence over ENTRY(<symbol>).
322   expect("(");
323   StringRef Tok = next();
324   if (Config->Entry.empty())
325     Config->Entry = Tok;
326   expect(")");
327 }
328 
329 void ScriptParser::readExtern() {
330   expect("(");
331   while (!errorCount() && !consume(")"))
332     Config->Undefined.push_back(unquote(next()));
333 }
334 
335 void ScriptParser::readGroup() {
336   bool Orig = InputFile::IsInGroup;
337   InputFile::IsInGroup = true;
338   readInput();
339   InputFile::IsInGroup = Orig;
340   if (!Orig)
341     ++InputFile::NextGroupId;
342 }
343 
344 void ScriptParser::readInclude() {
345   StringRef Tok = unquote(next());
346 
347   if (!Seen.insert(Tok).second) {
348     setError("there is a cycle in linker script INCLUDEs");
349     return;
350   }
351 
352   if (Optional<std::string> Path = searchScript(Tok)) {
353     if (Optional<MemoryBufferRef> MB = readFile(*Path))
354       tokenize(*MB);
355     return;
356   }
357   setError("cannot find linker script " + Tok);
358 }
359 
360 void ScriptParser::readInput() {
361   expect("(");
362   while (!errorCount() && !consume(")")) {
363     if (consume("AS_NEEDED"))
364       readAsNeeded();
365     else
366       addFile(unquote(next()));
367   }
368 }
369 
370 void ScriptParser::readOutput() {
371   // -o <file> takes predecence over OUTPUT(<file>).
372   expect("(");
373   StringRef Tok = next();
374   if (Config->OutputFile.empty())
375     Config->OutputFile = unquote(Tok);
376   expect(")");
377 }
378 
379 void ScriptParser::readOutputArch() {
380   // OUTPUT_ARCH is ignored for now.
381   expect("(");
382   while (!errorCount() && !consume(")"))
383     skip();
384 }
385 
386 static std::pair<ELFKind, uint16_t> parseBfdName(StringRef S) {
387   return StringSwitch<std::pair<ELFKind, uint16_t>>(S)
388       .Case("elf32-i386", {ELF32LEKind, EM_386})
389       .Case("elf32-iamcu", {ELF32LEKind, EM_IAMCU})
390       .Case("elf32-littlearm", {ELF32LEKind, EM_ARM})
391       .Case("elf32-x86-64", {ELF32LEKind, EM_X86_64})
392       .Case("elf64-aarch64", {ELF64LEKind, EM_AARCH64})
393       .Case("elf64-littleaarch64", {ELF64LEKind, EM_AARCH64})
394       .Case("elf32-powerpc", {ELF32BEKind, EM_PPC})
395       .Case("elf64-powerpc", {ELF64BEKind, EM_PPC64})
396       .Case("elf64-powerpcle", {ELF64LEKind, EM_PPC64})
397       .Case("elf64-x86-64", {ELF64LEKind, EM_X86_64})
398       .Cases("elf32-tradbigmips", "elf32-bigmips", {ELF32BEKind, EM_MIPS})
399       .Case("elf32-ntradbigmips", {ELF32BEKind, EM_MIPS})
400       .Case("elf32-tradlittlemips", {ELF32LEKind, EM_MIPS})
401       .Case("elf32-ntradlittlemips", {ELF32LEKind, EM_MIPS})
402       .Case("elf64-tradbigmips", {ELF64BEKind, EM_MIPS})
403       .Case("elf64-tradlittlemips", {ELF64LEKind, EM_MIPS})
404       .Case("elf32-littleriscv", {ELF32LEKind, EM_RISCV})
405       .Case("elf64-littleriscv", {ELF64LEKind, EM_RISCV})
406       .Default({ELFNoneKind, EM_NONE});
407 }
408 
409 // Parse OUTPUT_FORMAT(bfdname) or OUTPUT_FORMAT(bfdname, big, little).
410 // Currently we ignore big and little parameters.
411 void ScriptParser::readOutputFormat() {
412   expect("(");
413 
414   StringRef Name = unquote(next());
415   StringRef S = Name;
416   if (S.consume_back("-freebsd"))
417     Config->OSABI = ELFOSABI_FREEBSD;
418 
419   std::tie(Config->EKind, Config->EMachine) = parseBfdName(S);
420   if (Config->EMachine == EM_NONE)
421     setError("unknown output format name: " + Name);
422   if (S == "elf32-ntradlittlemips" || S == "elf32-ntradbigmips")
423     Config->MipsN32Abi = true;
424 
425   if (consume(")"))
426     return;
427   expect(",");
428   skip();
429   expect(",");
430   skip();
431   expect(")");
432 }
433 
434 void ScriptParser::readPhdrs() {
435   expect("{");
436 
437   while (!errorCount() && !consume("}")) {
438     PhdrsCommand Cmd;
439     Cmd.Name = next();
440     Cmd.Type = readPhdrType();
441 
442     while (!errorCount() && !consume(";")) {
443       if (consume("FILEHDR"))
444         Cmd.HasFilehdr = true;
445       else if (consume("PHDRS"))
446         Cmd.HasPhdrs = true;
447       else if (consume("AT"))
448         Cmd.LMAExpr = readParenExpr();
449       else if (consume("FLAGS"))
450         Cmd.Flags = readParenExpr()().getValue();
451       else
452         setError("unexpected header attribute: " + next());
453     }
454 
455     Script->PhdrsCommands.push_back(Cmd);
456   }
457 }
458 
459 void ScriptParser::readRegionAlias() {
460   expect("(");
461   StringRef Alias = unquote(next());
462   expect(",");
463   StringRef Name = next();
464   expect(")");
465 
466   if (Script->MemoryRegions.count(Alias))
467     setError("redefinition of memory region '" + Alias + "'");
468   if (!Script->MemoryRegions.count(Name))
469     setError("memory region '" + Name + "' is not defined");
470   Script->MemoryRegions.insert({Alias, Script->MemoryRegions[Name]});
471 }
472 
473 void ScriptParser::readSearchDir() {
474   expect("(");
475   StringRef Tok = next();
476   if (!Config->Nostdlib)
477     Config->SearchPaths.push_back(unquote(Tok));
478   expect(")");
479 }
480 
481 // This reads an overlay description. Overlays are used to describe output
482 // sections that use the same virtual memory range and normally would trigger
483 // linker's sections sanity check failures.
484 // https://sourceware.org/binutils/docs/ld/Overlay-Description.html#Overlay-Description
485 std::vector<BaseCommand *> ScriptParser::readOverlay() {
486   // VA and LMA expressions are optional, though for simplicity of
487   // implementation we assume they are not. That is what OVERLAY was designed
488   // for first of all: to allow sections with overlapping VAs at different LMAs.
489   Expr AddrExpr = readExpr();
490   expect(":");
491   expect("AT");
492   Expr LMAExpr = readParenExpr();
493   expect("{");
494 
495   std::vector<BaseCommand *> V;
496   OutputSection *Prev = nullptr;
497   while (!errorCount() && !consume("}")) {
498     // VA is the same for all sections. The LMAs are consecutive in memory
499     // starting from the base load address specified.
500     OutputSection *OS = readOverlaySectionDescription();
501     OS->AddrExpr = AddrExpr;
502     if (Prev)
503       OS->LMAExpr = [=] { return Prev->getLMA() + Prev->Size; };
504     else
505       OS->LMAExpr = LMAExpr;
506     V.push_back(OS);
507     Prev = OS;
508   }
509 
510   // According to the specification, at the end of the overlay, the location
511   // counter should be equal to the overlay base address plus size of the
512   // largest section seen in the overlay.
513   // Here we want to create the Dot assignment command to achieve that.
514   Expr MoveDot = [=] {
515     uint64_t Max = 0;
516     for (BaseCommand *Cmd : V)
517       Max = std::max(Max, cast<OutputSection>(Cmd)->Size);
518     return AddrExpr().getValue() + Max;
519   };
520   V.push_back(make<SymbolAssignment>(".", MoveDot, getCurrentLocation()));
521   return V;
522 }
523 
524 void ScriptParser::readSections() {
525   Script->HasSectionsCommand = true;
526 
527   // -no-rosegment is used to avoid placing read only non-executable sections in
528   // their own segment. We do the same if SECTIONS command is present in linker
529   // script. See comment for computeFlags().
530   Config->SingleRoRx = true;
531 
532   expect("{");
533   std::vector<BaseCommand *> V;
534   while (!errorCount() && !consume("}")) {
535     StringRef Tok = next();
536     if (Tok == "OVERLAY") {
537       for (BaseCommand *Cmd : readOverlay())
538         V.push_back(Cmd);
539       continue;
540     } else if (Tok == "INCLUDE") {
541       readInclude();
542       continue;
543     }
544 
545     if (BaseCommand *Cmd = readAssignment(Tok))
546       V.push_back(Cmd);
547     else
548       V.push_back(readOutputSectionDescription(Tok));
549   }
550 
551   if (!atEOF() && consume("INSERT")) {
552     std::vector<BaseCommand *> *Dest = nullptr;
553     if (consume("AFTER"))
554       Dest = &Script->InsertAfterCommands[next()];
555     else if (consume("BEFORE"))
556       Dest = &Script->InsertBeforeCommands[next()];
557     else
558       setError("expected AFTER/BEFORE, but got '" + next() + "'");
559     if (Dest)
560       Dest->insert(Dest->end(), V.begin(), V.end());
561     return;
562   }
563 
564   Script->SectionCommands.insert(Script->SectionCommands.end(), V.begin(),
565                                  V.end());
566 }
567 
568 void ScriptParser::readTarget() {
569   // TARGET(foo) is an alias for "--format foo". Unlike GNU linkers,
570   // we accept only a limited set of BFD names (i.e. "elf" or "binary")
571   // for --format. We recognize only /^elf/ and "binary" in the linker
572   // script as well.
573   expect("(");
574   StringRef Tok = next();
575   expect(")");
576 
577   if (Tok.startswith("elf"))
578     Config->FormatBinary = false;
579   else if (Tok == "binary")
580     Config->FormatBinary = true;
581   else
582     setError("unknown target: " + Tok);
583 }
584 
585 static int precedence(StringRef Op) {
586   return StringSwitch<int>(Op)
587       .Cases("*", "/", "%", 8)
588       .Cases("+", "-", 7)
589       .Cases("<<", ">>", 6)
590       .Cases("<", "<=", ">", ">=", "==", "!=", 5)
591       .Case("&", 4)
592       .Case("|", 3)
593       .Case("&&", 2)
594       .Case("||", 1)
595       .Default(-1);
596 }
597 
598 StringMatcher ScriptParser::readFilePatterns() {
599   std::vector<StringRef> V;
600   while (!errorCount() && !consume(")"))
601     V.push_back(next());
602   return StringMatcher(V);
603 }
604 
605 SortSectionPolicy ScriptParser::readSortKind() {
606   if (consume("SORT") || consume("SORT_BY_NAME"))
607     return SortSectionPolicy::Name;
608   if (consume("SORT_BY_ALIGNMENT"))
609     return SortSectionPolicy::Alignment;
610   if (consume("SORT_BY_INIT_PRIORITY"))
611     return SortSectionPolicy::Priority;
612   if (consume("SORT_NONE"))
613     return SortSectionPolicy::None;
614   return SortSectionPolicy::Default;
615 }
616 
617 // Reads SECTIONS command contents in the following form:
618 //
619 // <contents> ::= <elem>*
620 // <elem>     ::= <exclude>? <glob-pattern>
621 // <exclude>  ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")"
622 //
623 // For example,
624 //
625 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz)
626 //
627 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o".
628 // The semantics of that is section .foo in any file, section .bar in
629 // any file but a.o, and section .baz in any file but b.o.
630 std::vector<SectionPattern> ScriptParser::readInputSectionsList() {
631   std::vector<SectionPattern> Ret;
632   while (!errorCount() && peek() != ")") {
633     StringMatcher ExcludeFilePat;
634     if (consume("EXCLUDE_FILE")) {
635       expect("(");
636       ExcludeFilePat = readFilePatterns();
637     }
638 
639     std::vector<StringRef> V;
640     while (!errorCount() && peek() != ")" && peek() != "EXCLUDE_FILE")
641       V.push_back(unquote(next()));
642 
643     if (!V.empty())
644       Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)});
645     else
646       setError("section pattern is expected");
647   }
648   return Ret;
649 }
650 
651 // Reads contents of "SECTIONS" directive. That directive contains a
652 // list of glob patterns for input sections. The grammar is as follows.
653 //
654 // <patterns> ::= <section-list>
655 //              | <sort> "(" <section-list> ")"
656 //              | <sort> "(" <sort> "(" <section-list> ")" ")"
657 //
658 // <sort>     ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT"
659 //              | "SORT_BY_INIT_PRIORITY" | "SORT_NONE"
660 //
661 // <section-list> is parsed by readInputSectionsList().
662 InputSectionDescription *
663 ScriptParser::readInputSectionRules(StringRef FilePattern) {
664   auto *Cmd = make<InputSectionDescription>(FilePattern);
665   expect("(");
666 
667   while (!errorCount() && !consume(")")) {
668     SortSectionPolicy Outer = readSortKind();
669     SortSectionPolicy Inner = SortSectionPolicy::Default;
670     std::vector<SectionPattern> V;
671     if (Outer != SortSectionPolicy::Default) {
672       expect("(");
673       Inner = readSortKind();
674       if (Inner != SortSectionPolicy::Default) {
675         expect("(");
676         V = readInputSectionsList();
677         expect(")");
678       } else {
679         V = readInputSectionsList();
680       }
681       expect(")");
682     } else {
683       V = readInputSectionsList();
684     }
685 
686     for (SectionPattern &Pat : V) {
687       Pat.SortInner = Inner;
688       Pat.SortOuter = Outer;
689     }
690 
691     std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns));
692   }
693   return Cmd;
694 }
695 
696 InputSectionDescription *
697 ScriptParser::readInputSectionDescription(StringRef Tok) {
698   // Input section wildcard can be surrounded by KEEP.
699   // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep
700   if (Tok == "KEEP") {
701     expect("(");
702     StringRef FilePattern = next();
703     InputSectionDescription *Cmd = readInputSectionRules(FilePattern);
704     expect(")");
705     Script->KeptSections.push_back(Cmd);
706     return Cmd;
707   }
708   return readInputSectionRules(Tok);
709 }
710 
711 void ScriptParser::readSort() {
712   expect("(");
713   expect("CONSTRUCTORS");
714   expect(")");
715 }
716 
717 Expr ScriptParser::readAssert() {
718   expect("(");
719   Expr E = readExpr();
720   expect(",");
721   StringRef Msg = unquote(next());
722   expect(")");
723 
724   return [=] {
725     if (!E().getValue())
726       error(Msg);
727     return Script->getDot();
728   };
729 }
730 
731 // Reads a FILL(expr) command. We handle the FILL command as an
732 // alias for =fillexp section attribute, which is different from
733 // what GNU linkers do.
734 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html
735 std::array<uint8_t, 4> ScriptParser::readFill() {
736   expect("(");
737   std::array<uint8_t, 4> V = parseFill(next());
738   expect(")");
739   return V;
740 }
741 
742 // Tries to read the special directive for an output section definition which
743 // can be one of following: "(NOLOAD)", "(COPY)", "(INFO)" or "(OVERLAY)".
744 // Tok1 and Tok2 are next 2 tokens peeked. See comment for readSectionAddressType below.
745 bool ScriptParser::readSectionDirective(OutputSection *Cmd, StringRef Tok1, StringRef Tok2) {
746   if (Tok1 != "(")
747     return false;
748   if (Tok2 != "NOLOAD" && Tok2 != "COPY" && Tok2 != "INFO" && Tok2 != "OVERLAY")
749     return false;
750 
751   expect("(");
752   if (consume("NOLOAD")) {
753     Cmd->Noload = true;
754   } else {
755     skip(); // This is "COPY", "INFO" or "OVERLAY".
756     Cmd->NonAlloc = true;
757   }
758   expect(")");
759   return true;
760 }
761 
762 // Reads an expression and/or the special directive for an output
763 // section definition. Directive is one of following: "(NOLOAD)",
764 // "(COPY)", "(INFO)" or "(OVERLAY)".
765 //
766 // An output section name can be followed by an address expression
767 // and/or directive. This grammar is not LL(1) because "(" can be
768 // interpreted as either the beginning of some expression or beginning
769 // of directive.
770 //
771 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html
772 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html
773 void ScriptParser::readSectionAddressType(OutputSection *Cmd) {
774   if (readSectionDirective(Cmd, peek(), peek2()))
775     return;
776 
777   Cmd->AddrExpr = readExpr();
778   if (peek() == "(" && !readSectionDirective(Cmd, "(", peek2()))
779     setError("unknown section directive: " + peek2());
780 }
781 
782 static Expr checkAlignment(Expr E, std::string &Loc) {
783   return [=] {
784     uint64_t Alignment = std::max((uint64_t)1, E().getValue());
785     if (!isPowerOf2_64(Alignment)) {
786       error(Loc + ": alignment must be power of 2");
787       return (uint64_t)1; // Return a dummy value.
788     }
789     return Alignment;
790   };
791 }
792 
793 OutputSection *ScriptParser::readOverlaySectionDescription() {
794   OutputSection *Cmd =
795       Script->createOutputSection(next(), getCurrentLocation());
796   Cmd->InOverlay = true;
797   expect("{");
798   while (!errorCount() && !consume("}"))
799     Cmd->SectionCommands.push_back(readInputSectionRules(next()));
800   Cmd->Phdrs = readOutputSectionPhdrs();
801   return Cmd;
802 }
803 
804 OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) {
805   OutputSection *Cmd =
806       Script->createOutputSection(OutSec, getCurrentLocation());
807 
808   size_t SymbolsReferenced = Script->ReferencedSymbols.size();
809 
810   if (peek() != ":")
811     readSectionAddressType(Cmd);
812   expect(":");
813 
814   std::string Location = getCurrentLocation();
815   if (consume("AT"))
816     Cmd->LMAExpr = readParenExpr();
817   if (consume("ALIGN"))
818     Cmd->AlignExpr = checkAlignment(readParenExpr(), Location);
819   if (consume("SUBALIGN"))
820     Cmd->SubalignExpr = checkAlignment(readParenExpr(), Location);
821 
822   // Parse constraints.
823   if (consume("ONLY_IF_RO"))
824     Cmd->Constraint = ConstraintKind::ReadOnly;
825   if (consume("ONLY_IF_RW"))
826     Cmd->Constraint = ConstraintKind::ReadWrite;
827   expect("{");
828 
829   while (!errorCount() && !consume("}")) {
830     StringRef Tok = next();
831     if (Tok == ";") {
832       // Empty commands are allowed. Do nothing here.
833     } else if (SymbolAssignment *Assign = readAssignment(Tok)) {
834       Cmd->SectionCommands.push_back(Assign);
835     } else if (ByteCommand *Data = readByteCommand(Tok)) {
836       Cmd->SectionCommands.push_back(Data);
837     } else if (Tok == "CONSTRUCTORS") {
838       // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors
839       // by name. This is for very old file formats such as ECOFF/XCOFF.
840       // For ELF, we should ignore.
841     } else if (Tok == "FILL") {
842       Cmd->Filler = readFill();
843     } else if (Tok == "SORT") {
844       readSort();
845     } else if (Tok == "INCLUDE") {
846       readInclude();
847     } else if (peek() == "(") {
848       Cmd->SectionCommands.push_back(readInputSectionDescription(Tok));
849     } else {
850       // We have a file name and no input sections description. It is not a
851       // commonly used syntax, but still acceptable. In that case, all sections
852       // from the file will be included.
853       auto *ISD = make<InputSectionDescription>(Tok);
854       ISD->SectionPatterns.push_back({{}, StringMatcher({"*"})});
855       Cmd->SectionCommands.push_back(ISD);
856     }
857   }
858 
859   if (consume(">"))
860     Cmd->MemoryRegionName = next();
861 
862   if (consume("AT")) {
863     expect(">");
864     Cmd->LMARegionName = next();
865   }
866 
867   if (Cmd->LMAExpr && !Cmd->LMARegionName.empty())
868     error("section can't have both LMA and a load region");
869 
870   Cmd->Phdrs = readOutputSectionPhdrs();
871 
872   if (consume("="))
873     Cmd->Filler = parseFill(next());
874   else if (peek().startswith("="))
875     Cmd->Filler = parseFill(next().drop_front());
876 
877   // Consume optional comma following output section command.
878   consume(",");
879 
880   if (Script->ReferencedSymbols.size() > SymbolsReferenced)
881     Cmd->ExpressionsUseSymbols = true;
882   return Cmd;
883 }
884 
885 // Parses a given string as a octal/decimal/hexadecimal number and
886 // returns it as a big-endian number. Used for `=<fillexp>`.
887 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html
888 //
889 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary
890 // size, while ld.gold always handles it as a 32-bit big-endian number.
891 // We are compatible with ld.gold because it's easier to implement.
892 std::array<uint8_t, 4> ScriptParser::parseFill(StringRef Tok) {
893   uint32_t V = 0;
894   if (!to_integer(Tok, V))
895     setError("invalid filler expression: " + Tok);
896 
897   std::array<uint8_t, 4> Buf;
898   write32be(Buf.data(), V);
899   return Buf;
900 }
901 
902 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) {
903   expect("(");
904   SymbolAssignment *Cmd = readSymbolAssignment(next());
905   Cmd->Provide = Provide;
906   Cmd->Hidden = Hidden;
907   expect(")");
908   return Cmd;
909 }
910 
911 SymbolAssignment *ScriptParser::readAssignment(StringRef Tok) {
912   // Assert expression returns Dot, so this is equal to ".=."
913   if (Tok == "ASSERT")
914     return make<SymbolAssignment>(".", readAssert(), getCurrentLocation());
915 
916   size_t OldPos = Pos;
917   SymbolAssignment *Cmd = nullptr;
918   if (peek() == "=" || peek() == "+=")
919     Cmd = readSymbolAssignment(Tok);
920   else if (Tok == "PROVIDE")
921     Cmd = readProvideHidden(true, false);
922   else if (Tok == "HIDDEN")
923     Cmd = readProvideHidden(false, true);
924   else if (Tok == "PROVIDE_HIDDEN")
925     Cmd = readProvideHidden(true, true);
926 
927   if (Cmd) {
928     Cmd->CommandString =
929         Tok.str() + " " +
930         llvm::join(Tokens.begin() + OldPos, Tokens.begin() + Pos, " ");
931     expect(";");
932   }
933   return Cmd;
934 }
935 
936 SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef Name) {
937   StringRef Op = next();
938   assert(Op == "=" || Op == "+=");
939   Expr E = readExpr();
940   if (Op == "+=") {
941     std::string Loc = getCurrentLocation();
942     E = [=] { return add(Script->getSymbolValue(Name, Loc), E()); };
943   }
944   return make<SymbolAssignment>(Name, E, getCurrentLocation());
945 }
946 
947 // This is an operator-precedence parser to parse a linker
948 // script expression.
949 Expr ScriptParser::readExpr() {
950   // Our lexer is context-aware. Set the in-expression bit so that
951   // they apply different tokenization rules.
952   bool Orig = InExpr;
953   InExpr = true;
954   Expr E = readExpr1(readPrimary(), 0);
955   InExpr = Orig;
956   return E;
957 }
958 
959 Expr ScriptParser::combine(StringRef Op, Expr L, Expr R) {
960   if (Op == "+")
961     return [=] { return add(L(), R()); };
962   if (Op == "-")
963     return [=] { return sub(L(), R()); };
964   if (Op == "*")
965     return [=] { return L().getValue() * R().getValue(); };
966   if (Op == "/") {
967     std::string Loc = getCurrentLocation();
968     return [=]() -> uint64_t {
969       if (uint64_t RV = R().getValue())
970         return L().getValue() / RV;
971       error(Loc + ": division by zero");
972       return 0;
973     };
974   }
975   if (Op == "%") {
976     std::string Loc = getCurrentLocation();
977     return [=]() -> uint64_t {
978       if (uint64_t RV = R().getValue())
979         return L().getValue() % RV;
980       error(Loc + ": modulo by zero");
981       return 0;
982     };
983   }
984   if (Op == "<<")
985     return [=] { return L().getValue() << R().getValue(); };
986   if (Op == ">>")
987     return [=] { return L().getValue() >> R().getValue(); };
988   if (Op == "<")
989     return [=] { return L().getValue() < R().getValue(); };
990   if (Op == ">")
991     return [=] { return L().getValue() > R().getValue(); };
992   if (Op == ">=")
993     return [=] { return L().getValue() >= R().getValue(); };
994   if (Op == "<=")
995     return [=] { return L().getValue() <= R().getValue(); };
996   if (Op == "==")
997     return [=] { return L().getValue() == R().getValue(); };
998   if (Op == "!=")
999     return [=] { return L().getValue() != R().getValue(); };
1000   if (Op == "||")
1001     return [=] { return L().getValue() || R().getValue(); };
1002   if (Op == "&&")
1003     return [=] { return L().getValue() && R().getValue(); };
1004   if (Op == "&")
1005     return [=] { return bitAnd(L(), R()); };
1006   if (Op == "|")
1007     return [=] { return bitOr(L(), R()); };
1008   llvm_unreachable("invalid operator");
1009 }
1010 
1011 // This is a part of the operator-precedence parser. This function
1012 // assumes that the remaining token stream starts with an operator.
1013 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) {
1014   while (!atEOF() && !errorCount()) {
1015     // Read an operator and an expression.
1016     if (consume("?"))
1017       return readTernary(Lhs);
1018     StringRef Op1 = peek();
1019     if (precedence(Op1) < MinPrec)
1020       break;
1021     skip();
1022     Expr Rhs = readPrimary();
1023 
1024     // Evaluate the remaining part of the expression first if the
1025     // next operator has greater precedence than the previous one.
1026     // For example, if we have read "+" and "3", and if the next
1027     // operator is "*", then we'll evaluate 3 * ... part first.
1028     while (!atEOF()) {
1029       StringRef Op2 = peek();
1030       if (precedence(Op2) <= precedence(Op1))
1031         break;
1032       Rhs = readExpr1(Rhs, precedence(Op2));
1033     }
1034 
1035     Lhs = combine(Op1, Lhs, Rhs);
1036   }
1037   return Lhs;
1038 }
1039 
1040 Expr ScriptParser::getPageSize() {
1041   std::string Location = getCurrentLocation();
1042   return [=]() -> uint64_t {
1043     if (Target)
1044       return Config->CommonPageSize;
1045     error(Location + ": unable to calculate page size");
1046     return 4096; // Return a dummy value.
1047   };
1048 }
1049 
1050 Expr ScriptParser::readConstant() {
1051   StringRef S = readParenLiteral();
1052   if (S == "COMMONPAGESIZE")
1053     return getPageSize();
1054   if (S == "MAXPAGESIZE")
1055     return [] { return Config->MaxPageSize; };
1056   setError("unknown constant: " + S);
1057   return [] { return 0; };
1058 }
1059 
1060 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with
1061 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may
1062 // have "K" (Ki) or "M" (Mi) suffixes.
1063 static Optional<uint64_t> parseInt(StringRef Tok) {
1064   // Hexadecimal
1065   uint64_t Val;
1066   if (Tok.startswith_lower("0x")) {
1067     if (!to_integer(Tok.substr(2), Val, 16))
1068       return None;
1069     return Val;
1070   }
1071   if (Tok.endswith_lower("H")) {
1072     if (!to_integer(Tok.drop_back(), Val, 16))
1073       return None;
1074     return Val;
1075   }
1076 
1077   // Decimal
1078   if (Tok.endswith_lower("K")) {
1079     if (!to_integer(Tok.drop_back(), Val, 10))
1080       return None;
1081     return Val * 1024;
1082   }
1083   if (Tok.endswith_lower("M")) {
1084     if (!to_integer(Tok.drop_back(), Val, 10))
1085       return None;
1086     return Val * 1024 * 1024;
1087   }
1088   if (!to_integer(Tok, Val, 10))
1089     return None;
1090   return Val;
1091 }
1092 
1093 ByteCommand *ScriptParser::readByteCommand(StringRef Tok) {
1094   int Size = StringSwitch<int>(Tok)
1095                  .Case("BYTE", 1)
1096                  .Case("SHORT", 2)
1097                  .Case("LONG", 4)
1098                  .Case("QUAD", 8)
1099                  .Default(-1);
1100   if (Size == -1)
1101     return nullptr;
1102 
1103   size_t OldPos = Pos;
1104   Expr E = readParenExpr();
1105   std::string CommandString =
1106       Tok.str() + " " +
1107       llvm::join(Tokens.begin() + OldPos, Tokens.begin() + Pos, " ");
1108   return make<ByteCommand>(E, Size, CommandString);
1109 }
1110 
1111 StringRef ScriptParser::readParenLiteral() {
1112   expect("(");
1113   bool Orig = InExpr;
1114   InExpr = false;
1115   StringRef Tok = next();
1116   InExpr = Orig;
1117   expect(")");
1118   return Tok;
1119 }
1120 
1121 static void checkIfExists(OutputSection *Cmd, StringRef Location) {
1122   if (Cmd->Location.empty() && Script->ErrorOnMissingSection)
1123     error(Location + ": undefined section " + Cmd->Name);
1124 }
1125 
1126 Expr ScriptParser::readPrimary() {
1127   if (peek() == "(")
1128     return readParenExpr();
1129 
1130   if (consume("~")) {
1131     Expr E = readPrimary();
1132     return [=] { return ~E().getValue(); };
1133   }
1134   if (consume("!")) {
1135     Expr E = readPrimary();
1136     return [=] { return !E().getValue(); };
1137   }
1138   if (consume("-")) {
1139     Expr E = readPrimary();
1140     return [=] { return -E().getValue(); };
1141   }
1142 
1143   StringRef Tok = next();
1144   std::string Location = getCurrentLocation();
1145 
1146   // Built-in functions are parsed here.
1147   // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html.
1148   if (Tok == "ABSOLUTE") {
1149     Expr Inner = readParenExpr();
1150     return [=] {
1151       ExprValue I = Inner();
1152       I.ForceAbsolute = true;
1153       return I;
1154     };
1155   }
1156   if (Tok == "ADDR") {
1157     StringRef Name = readParenLiteral();
1158     OutputSection *Sec = Script->getOrCreateOutputSection(Name);
1159     Sec->UsedInExpression = true;
1160     return [=]() -> ExprValue {
1161       checkIfExists(Sec, Location);
1162       return {Sec, false, 0, Location};
1163     };
1164   }
1165   if (Tok == "ALIGN") {
1166     expect("(");
1167     Expr E = readExpr();
1168     if (consume(")")) {
1169       E = checkAlignment(E, Location);
1170       return [=] { return alignTo(Script->getDot(), E().getValue()); };
1171     }
1172     expect(",");
1173     Expr E2 = checkAlignment(readExpr(), Location);
1174     expect(")");
1175     return [=] {
1176       ExprValue V = E();
1177       V.Alignment = E2().getValue();
1178       return V;
1179     };
1180   }
1181   if (Tok == "ALIGNOF") {
1182     StringRef Name = readParenLiteral();
1183     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
1184     return [=] {
1185       checkIfExists(Cmd, Location);
1186       return Cmd->Alignment;
1187     };
1188   }
1189   if (Tok == "ASSERT")
1190     return readAssert();
1191   if (Tok == "CONSTANT")
1192     return readConstant();
1193   if (Tok == "DATA_SEGMENT_ALIGN") {
1194     expect("(");
1195     Expr E = readExpr();
1196     expect(",");
1197     readExpr();
1198     expect(")");
1199     return [=] {
1200       return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue()));
1201     };
1202   }
1203   if (Tok == "DATA_SEGMENT_END") {
1204     expect("(");
1205     expect(".");
1206     expect(")");
1207     return [] { return Script->getDot(); };
1208   }
1209   if (Tok == "DATA_SEGMENT_RELRO_END") {
1210     // GNU linkers implements more complicated logic to handle
1211     // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and
1212     // just align to the next page boundary for simplicity.
1213     expect("(");
1214     readExpr();
1215     expect(",");
1216     readExpr();
1217     expect(")");
1218     Expr E = getPageSize();
1219     return [=] { return alignTo(Script->getDot(), E().getValue()); };
1220   }
1221   if (Tok == "DEFINED") {
1222     StringRef Name = readParenLiteral();
1223     return [=] { return Symtab->find(Name) ? 1 : 0; };
1224   }
1225   if (Tok == "LENGTH") {
1226     StringRef Name = readParenLiteral();
1227     if (Script->MemoryRegions.count(Name) == 0) {
1228       setError("memory region not defined: " + Name);
1229       return [] { return 0; };
1230     }
1231     return [=] { return Script->MemoryRegions[Name]->Length; };
1232   }
1233   if (Tok == "LOADADDR") {
1234     StringRef Name = readParenLiteral();
1235     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
1236     Cmd->UsedInExpression = true;
1237     return [=] {
1238       checkIfExists(Cmd, Location);
1239       return Cmd->getLMA();
1240     };
1241   }
1242   if (Tok == "MAX" || Tok == "MIN") {
1243     expect("(");
1244     Expr A = readExpr();
1245     expect(",");
1246     Expr B = readExpr();
1247     expect(")");
1248     if (Tok == "MIN")
1249       return [=] { return std::min(A().getValue(), B().getValue()); };
1250     return [=] { return std::max(A().getValue(), B().getValue()); };
1251   }
1252   if (Tok == "ORIGIN") {
1253     StringRef Name = readParenLiteral();
1254     if (Script->MemoryRegions.count(Name) == 0) {
1255       setError("memory region not defined: " + Name);
1256       return [] { return 0; };
1257     }
1258     return [=] { return Script->MemoryRegions[Name]->Origin; };
1259   }
1260   if (Tok == "SEGMENT_START") {
1261     expect("(");
1262     skip();
1263     expect(",");
1264     Expr E = readExpr();
1265     expect(")");
1266     return [=] { return E(); };
1267   }
1268   if (Tok == "SIZEOF") {
1269     StringRef Name = readParenLiteral();
1270     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
1271     // Linker script does not create an output section if its content is empty.
1272     // We want to allow SIZEOF(.foo) where .foo is a section which happened to
1273     // be empty.
1274     return [=] { return Cmd->Size; };
1275   }
1276   if (Tok == "SIZEOF_HEADERS")
1277     return [=] { return elf::getHeaderSize(); };
1278 
1279   // Tok is the dot.
1280   if (Tok == ".")
1281     return [=] { return Script->getSymbolValue(Tok, Location); };
1282 
1283   // Tok is a literal number.
1284   if (Optional<uint64_t> Val = parseInt(Tok))
1285     return [=] { return *Val; };
1286 
1287   // Tok is a symbol name.
1288   if (!isValidCIdentifier(Tok))
1289     setError("malformed number: " + Tok);
1290   Script->ReferencedSymbols.push_back(Tok);
1291   return [=] { return Script->getSymbolValue(Tok, Location); };
1292 }
1293 
1294 Expr ScriptParser::readTernary(Expr Cond) {
1295   Expr L = readExpr();
1296   expect(":");
1297   Expr R = readExpr();
1298   return [=] { return Cond().getValue() ? L() : R(); };
1299 }
1300 
1301 Expr ScriptParser::readParenExpr() {
1302   expect("(");
1303   Expr E = readExpr();
1304   expect(")");
1305   return E;
1306 }
1307 
1308 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() {
1309   std::vector<StringRef> Phdrs;
1310   while (!errorCount() && peek().startswith(":")) {
1311     StringRef Tok = next();
1312     Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1));
1313   }
1314   return Phdrs;
1315 }
1316 
1317 // Read a program header type name. The next token must be a
1318 // name of a program header type or a constant (e.g. "0x3").
1319 unsigned ScriptParser::readPhdrType() {
1320   StringRef Tok = next();
1321   if (Optional<uint64_t> Val = parseInt(Tok))
1322     return *Val;
1323 
1324   unsigned Ret = StringSwitch<unsigned>(Tok)
1325                      .Case("PT_NULL", PT_NULL)
1326                      .Case("PT_LOAD", PT_LOAD)
1327                      .Case("PT_DYNAMIC", PT_DYNAMIC)
1328                      .Case("PT_INTERP", PT_INTERP)
1329                      .Case("PT_NOTE", PT_NOTE)
1330                      .Case("PT_SHLIB", PT_SHLIB)
1331                      .Case("PT_PHDR", PT_PHDR)
1332                      .Case("PT_TLS", PT_TLS)
1333                      .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME)
1334                      .Case("PT_GNU_STACK", PT_GNU_STACK)
1335                      .Case("PT_GNU_RELRO", PT_GNU_RELRO)
1336                      .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE)
1337                      .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED)
1338                      .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA)
1339                      .Default(-1);
1340 
1341   if (Ret == (unsigned)-1) {
1342     setError("invalid program header type: " + Tok);
1343     return PT_NULL;
1344   }
1345   return Ret;
1346 }
1347 
1348 // Reads an anonymous version declaration.
1349 void ScriptParser::readAnonymousDeclaration() {
1350   std::vector<SymbolVersion> Locals;
1351   std::vector<SymbolVersion> Globals;
1352   std::tie(Locals, Globals) = readSymbols();
1353 
1354   for (SymbolVersion V : Locals) {
1355     if (V.Name == "*")
1356       Config->DefaultSymbolVersion = VER_NDX_LOCAL;
1357     else
1358       Config->VersionScriptLocals.push_back(V);
1359   }
1360 
1361   for (SymbolVersion V : Globals)
1362     Config->VersionScriptGlobals.push_back(V);
1363 
1364   expect(";");
1365 }
1366 
1367 // Reads a non-anonymous version definition,
1368 // e.g. "VerStr { global: foo; bar; local: *; };".
1369 void ScriptParser::readVersionDeclaration(StringRef VerStr) {
1370   // Read a symbol list.
1371   std::vector<SymbolVersion> Locals;
1372   std::vector<SymbolVersion> Globals;
1373   std::tie(Locals, Globals) = readSymbols();
1374 
1375   for (SymbolVersion V : Locals) {
1376     if (V.Name == "*")
1377       Config->DefaultSymbolVersion = VER_NDX_LOCAL;
1378     else
1379       Config->VersionScriptLocals.push_back(V);
1380   }
1381 
1382   // Create a new version definition and add that to the global symbols.
1383   VersionDefinition Ver;
1384   Ver.Name = VerStr;
1385   Ver.Globals = Globals;
1386 
1387   // User-defined version number starts from 2 because 0 and 1 are
1388   // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively.
1389   Ver.Id = Config->VersionDefinitions.size() + 2;
1390   Config->VersionDefinitions.push_back(Ver);
1391 
1392   // Each version may have a parent version. For example, "Ver2"
1393   // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1"
1394   // as a parent. This version hierarchy is, probably against your
1395   // instinct, purely for hint; the runtime doesn't care about it
1396   // at all. In LLD, we simply ignore it.
1397   if (peek() != ";")
1398     skip();
1399   expect(";");
1400 }
1401 
1402 static bool hasWildcard(StringRef S) {
1403   return S.find_first_of("?*[") != StringRef::npos;
1404 }
1405 
1406 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };".
1407 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
1408 ScriptParser::readSymbols() {
1409   std::vector<SymbolVersion> Locals;
1410   std::vector<SymbolVersion> Globals;
1411   std::vector<SymbolVersion> *V = &Globals;
1412 
1413   while (!errorCount()) {
1414     if (consume("}"))
1415       break;
1416     if (consumeLabel("local")) {
1417       V = &Locals;
1418       continue;
1419     }
1420     if (consumeLabel("global")) {
1421       V = &Globals;
1422       continue;
1423     }
1424 
1425     if (consume("extern")) {
1426       std::vector<SymbolVersion> Ext = readVersionExtern();
1427       V->insert(V->end(), Ext.begin(), Ext.end());
1428     } else {
1429       StringRef Tok = next();
1430       V->push_back({unquote(Tok), false, hasWildcard(Tok)});
1431     }
1432     expect(";");
1433   }
1434   return {Locals, Globals};
1435 }
1436 
1437 // Reads an "extern C++" directive, e.g.,
1438 // "extern "C++" { ns::*; "f(int, double)"; };"
1439 //
1440 // The last semicolon is optional. E.g. this is OK:
1441 // "extern "C++" { ns::*; "f(int, double)" };"
1442 std::vector<SymbolVersion> ScriptParser::readVersionExtern() {
1443   StringRef Tok = next();
1444   bool IsCXX = Tok == "\"C++\"";
1445   if (!IsCXX && Tok != "\"C\"")
1446     setError("Unknown language");
1447   expect("{");
1448 
1449   std::vector<SymbolVersion> Ret;
1450   while (!errorCount() && peek() != "}") {
1451     StringRef Tok = next();
1452     bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok);
1453     Ret.push_back({unquote(Tok), IsCXX, HasWildcard});
1454     if (consume("}"))
1455       return Ret;
1456     expect(";");
1457   }
1458 
1459   expect("}");
1460   return Ret;
1461 }
1462 
1463 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2,
1464                                             StringRef S3) {
1465   if (!consume(S1) && !consume(S2) && !consume(S3)) {
1466     setError("expected one of: " + S1 + ", " + S2 + ", or " + S3);
1467     return 0;
1468   }
1469   expect("=");
1470   return readExpr()().getValue();
1471 }
1472 
1473 // Parse the MEMORY command as specified in:
1474 // https://sourceware.org/binutils/docs/ld/MEMORY.html
1475 //
1476 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... }
1477 void ScriptParser::readMemory() {
1478   expect("{");
1479   while (!errorCount() && !consume("}")) {
1480     StringRef Tok = next();
1481     if (Tok == "INCLUDE") {
1482       readInclude();
1483       continue;
1484     }
1485 
1486     uint32_t Flags = 0;
1487     uint32_t NegFlags = 0;
1488     if (consume("(")) {
1489       std::tie(Flags, NegFlags) = readMemoryAttributes();
1490       expect(")");
1491     }
1492     expect(":");
1493 
1494     uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o");
1495     expect(",");
1496     uint64_t Length = readMemoryAssignment("LENGTH", "len", "l");
1497 
1498     // Add the memory region to the region map.
1499     MemoryRegion *MR = make<MemoryRegion>(Tok, Origin, Length, Flags, NegFlags);
1500     if (!Script->MemoryRegions.insert({Tok, MR}).second)
1501       setError("region '" + Tok + "' already defined");
1502   }
1503 }
1504 
1505 // This function parses the attributes used to match against section
1506 // flags when placing output sections in a memory region. These flags
1507 // are only used when an explicit memory region name is not used.
1508 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() {
1509   uint32_t Flags = 0;
1510   uint32_t NegFlags = 0;
1511   bool Invert = false;
1512 
1513   for (char C : next().lower()) {
1514     uint32_t Flag = 0;
1515     if (C == '!')
1516       Invert = !Invert;
1517     else if (C == 'w')
1518       Flag = SHF_WRITE;
1519     else if (C == 'x')
1520       Flag = SHF_EXECINSTR;
1521     else if (C == 'a')
1522       Flag = SHF_ALLOC;
1523     else if (C != 'r')
1524       setError("invalid memory region attribute");
1525 
1526     if (Invert)
1527       NegFlags |= Flag;
1528     else
1529       Flags |= Flag;
1530   }
1531   return {Flags, NegFlags};
1532 }
1533 
1534 void elf::readLinkerScript(MemoryBufferRef MB) {
1535   ScriptParser(MB).readLinkerScript();
1536 }
1537 
1538 void elf::readVersionScript(MemoryBufferRef MB) {
1539   ScriptParser(MB).readVersionScript();
1540 }
1541 
1542 void elf::readDynamicList(MemoryBufferRef MB) {
1543   ScriptParser(MB).readDynamicList();
1544 }
1545 
1546 void elf::readDefsym(StringRef Name, MemoryBufferRef MB) {
1547   ScriptParser(MB).readDefsym(Name);
1548 }
1549