1 //===- ScriptParser.cpp ---------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a recursive-descendent parser for linker scripts.
10 // Parsed results are stored to Config and Script global objects.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ScriptParser.h"
15 #include "Config.h"
16 #include "Driver.h"
17 #include "InputSection.h"
18 #include "LinkerScript.h"
19 #include "OutputSections.h"
20 #include "ScriptLexer.h"
21 #include "Symbols.h"
22 #include "Target.h"
23 #include "lld/Common/Memory.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSet.h"
27 #include "llvm/ADT/StringSwitch.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/FileSystem.h"
32 #include "llvm/Support/Path.h"
33 #include <cassert>
34 #include <limits>
35 #include <vector>
36 
37 using namespace llvm;
38 using namespace llvm::ELF;
39 using namespace llvm::support::endian;
40 using namespace lld;
41 using namespace lld::elf;
42 
43 namespace {
44 class ScriptParser final : ScriptLexer {
45 public:
46   ScriptParser(MemoryBufferRef MB) : ScriptLexer(MB) {
47     // Initialize IsUnderSysroot
48     if (Config->Sysroot == "")
49       return;
50     StringRef Path = MB.getBufferIdentifier();
51     for (; !Path.empty(); Path = sys::path::parent_path(Path)) {
52       if (!sys::fs::equivalent(Config->Sysroot, Path))
53         continue;
54       IsUnderSysroot = true;
55       return;
56     }
57   }
58 
59   void readLinkerScript();
60   void readVersionScript();
61   void readDynamicList();
62   void readDefsym(StringRef Name);
63 
64 private:
65   void addFile(StringRef Path);
66 
67   void readAsNeeded();
68   void readEntry();
69   void readExtern();
70   void readGroup();
71   void readInclude();
72   void readInput();
73   void readMemory();
74   void readOutput();
75   void readOutputArch();
76   void readOutputFormat();
77   void readPhdrs();
78   void readRegionAlias();
79   void readSearchDir();
80   void readSections();
81   void readTarget();
82   void readVersion();
83   void readVersionScriptCommand();
84 
85   SymbolAssignment *readSymbolAssignment(StringRef Name);
86   ByteCommand *readByteCommand(StringRef Tok);
87   std::array<uint8_t, 4> readFill();
88   bool readSectionDirective(OutputSection *Cmd, StringRef Tok1, StringRef Tok2);
89   void readSectionAddressType(OutputSection *Cmd);
90   OutputSection *readOverlaySectionDescription();
91   OutputSection *readOutputSectionDescription(StringRef OutSec);
92   std::vector<BaseCommand *> readOverlay();
93   std::vector<StringRef> readOutputSectionPhdrs();
94   InputSectionDescription *readInputSectionDescription(StringRef Tok);
95   StringMatcher readFilePatterns();
96   std::vector<SectionPattern> readInputSectionsList();
97   InputSectionDescription *readInputSectionRules(StringRef FilePattern);
98   unsigned readPhdrType();
99   SortSectionPolicy readSortKind();
100   SymbolAssignment *readProvideHidden(bool Provide, bool Hidden);
101   SymbolAssignment *readAssignment(StringRef Tok);
102   void readSort();
103   Expr readAssert();
104   Expr readConstant();
105   Expr getPageSize();
106 
107   uint64_t readMemoryAssignment(StringRef, StringRef, StringRef);
108   std::pair<uint32_t, uint32_t> readMemoryAttributes();
109 
110   Expr combine(StringRef Op, Expr L, Expr R);
111   Expr readExpr();
112   Expr readExpr1(Expr Lhs, int MinPrec);
113   StringRef readParenLiteral();
114   Expr readPrimary();
115   Expr readTernary(Expr Cond);
116   Expr readParenExpr();
117 
118   // For parsing version script.
119   std::vector<SymbolVersion> readVersionExtern();
120   void readAnonymousDeclaration();
121   void readVersionDeclaration(StringRef VerStr);
122 
123   std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
124   readSymbols();
125 
126   // True if a script being read is in a subdirectory specified by -sysroot.
127   bool IsUnderSysroot = false;
128 
129   // A set to detect an INCLUDE() cycle.
130   StringSet<> Seen;
131 };
132 } // namespace
133 
134 static StringRef unquote(StringRef S) {
135   if (S.startswith("\""))
136     return S.substr(1, S.size() - 2);
137   return S;
138 }
139 
140 // Some operations only support one non absolute value. Move the
141 // absolute one to the right hand side for convenience.
142 static void moveAbsRight(ExprValue &A, ExprValue &B) {
143   if (A.Sec == nullptr || (A.ForceAbsolute && !B.isAbsolute()))
144     std::swap(A, B);
145   if (!B.isAbsolute())
146     error(A.Loc + ": at least one side of the expression must be absolute");
147 }
148 
149 static ExprValue add(ExprValue A, ExprValue B) {
150   moveAbsRight(A, B);
151   return {A.Sec, A.ForceAbsolute, A.getSectionOffset() + B.getValue(), A.Loc};
152 }
153 
154 static ExprValue sub(ExprValue A, ExprValue B) {
155   // The distance between two symbols in sections is absolute.
156   if (!A.isAbsolute() && !B.isAbsolute())
157     return A.getValue() - B.getValue();
158   return {A.Sec, false, A.getSectionOffset() - B.getValue(), A.Loc};
159 }
160 
161 static ExprValue bitAnd(ExprValue A, ExprValue B) {
162   moveAbsRight(A, B);
163   return {A.Sec, A.ForceAbsolute,
164           (A.getValue() & B.getValue()) - A.getSecAddr(), A.Loc};
165 }
166 
167 static ExprValue bitOr(ExprValue A, ExprValue B) {
168   moveAbsRight(A, B);
169   return {A.Sec, A.ForceAbsolute,
170           (A.getValue() | B.getValue()) - A.getSecAddr(), A.Loc};
171 }
172 
173 void ScriptParser::readDynamicList() {
174   Config->HasDynamicList = true;
175   expect("{");
176   std::vector<SymbolVersion> Locals;
177   std::vector<SymbolVersion> Globals;
178   std::tie(Locals, Globals) = readSymbols();
179   expect(";");
180 
181   if (!atEOF()) {
182     setError("EOF expected, but got " + next());
183     return;
184   }
185   if (!Locals.empty()) {
186     setError("\"local:\" scope not supported in --dynamic-list");
187     return;
188   }
189 
190   for (SymbolVersion V : Globals)
191     Config->DynamicList.push_back(V);
192 }
193 
194 void ScriptParser::readVersionScript() {
195   readVersionScriptCommand();
196   if (!atEOF())
197     setError("EOF expected, but got " + next());
198 }
199 
200 void ScriptParser::readVersionScriptCommand() {
201   if (consume("{")) {
202     readAnonymousDeclaration();
203     return;
204   }
205 
206   while (!atEOF() && !errorCount() && peek() != "}") {
207     StringRef VerStr = next();
208     if (VerStr == "{") {
209       setError("anonymous version definition is used in "
210                "combination with other version definitions");
211       return;
212     }
213     expect("{");
214     readVersionDeclaration(VerStr);
215   }
216 }
217 
218 void ScriptParser::readVersion() {
219   expect("{");
220   readVersionScriptCommand();
221   expect("}");
222 }
223 
224 void ScriptParser::readLinkerScript() {
225   while (!atEOF()) {
226     StringRef Tok = next();
227     if (Tok == ";")
228       continue;
229 
230     if (Tok == "ENTRY") {
231       readEntry();
232     } else if (Tok == "EXTERN") {
233       readExtern();
234     } else if (Tok == "GROUP") {
235       readGroup();
236     } else if (Tok == "INCLUDE") {
237       readInclude();
238     } else if (Tok == "INPUT") {
239       readInput();
240     } else if (Tok == "MEMORY") {
241       readMemory();
242     } else if (Tok == "OUTPUT") {
243       readOutput();
244     } else if (Tok == "OUTPUT_ARCH") {
245       readOutputArch();
246     } else if (Tok == "OUTPUT_FORMAT") {
247       readOutputFormat();
248     } else if (Tok == "PHDRS") {
249       readPhdrs();
250     } else if (Tok == "REGION_ALIAS") {
251       readRegionAlias();
252     } else if (Tok == "SEARCH_DIR") {
253       readSearchDir();
254     } else if (Tok == "SECTIONS") {
255       readSections();
256     } else if (Tok == "TARGET") {
257       readTarget();
258     } else if (Tok == "VERSION") {
259       readVersion();
260     } else if (SymbolAssignment *Cmd = readAssignment(Tok)) {
261       Script->SectionCommands.push_back(Cmd);
262     } else {
263       setError("unknown directive: " + Tok);
264     }
265   }
266 }
267 
268 void ScriptParser::readDefsym(StringRef Name) {
269   if (errorCount())
270     return;
271   Expr E = readExpr();
272   if (!atEOF())
273     setError("EOF expected, but got " + next());
274   SymbolAssignment *Cmd = make<SymbolAssignment>(Name, E, getCurrentLocation());
275   Script->SectionCommands.push_back(Cmd);
276 }
277 
278 void ScriptParser::addFile(StringRef S) {
279   if (IsUnderSysroot && S.startswith("/")) {
280     SmallString<128> PathData;
281     StringRef Path = (Config->Sysroot + S).toStringRef(PathData);
282     if (sys::fs::exists(Path)) {
283       Driver->addFile(Saver.save(Path), /*WithLOption=*/false);
284       return;
285     }
286   }
287 
288   if (S.startswith("/")) {
289     Driver->addFile(S, /*WithLOption=*/false);
290   } else if (S.startswith("=")) {
291     if (Config->Sysroot.empty())
292       Driver->addFile(S.substr(1), /*WithLOption=*/false);
293     else
294       Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)),
295                       /*WithLOption=*/false);
296   } else if (S.startswith("-l")) {
297     Driver->addLibrary(S.substr(2));
298   } else if (sys::fs::exists(S)) {
299     Driver->addFile(S, /*WithLOption=*/false);
300   } else {
301     if (Optional<std::string> Path = findFromSearchPaths(S))
302       Driver->addFile(Saver.save(*Path), /*WithLOption=*/true);
303     else
304       setError("unable to find " + S);
305   }
306 }
307 
308 void ScriptParser::readAsNeeded() {
309   expect("(");
310   bool Orig = Config->AsNeeded;
311   Config->AsNeeded = true;
312   while (!errorCount() && !consume(")"))
313     addFile(unquote(next()));
314   Config->AsNeeded = Orig;
315 }
316 
317 void ScriptParser::readEntry() {
318   // -e <symbol> takes predecence over ENTRY(<symbol>).
319   expect("(");
320   StringRef Tok = next();
321   if (Config->Entry.empty())
322     Config->Entry = Tok;
323   expect(")");
324 }
325 
326 void ScriptParser::readExtern() {
327   expect("(");
328   while (!errorCount() && !consume(")"))
329     Config->Undefined.push_back(unquote(next()));
330 }
331 
332 void ScriptParser::readGroup() {
333   bool Orig = InputFile::IsInGroup;
334   InputFile::IsInGroup = true;
335   readInput();
336   InputFile::IsInGroup = Orig;
337   if (!Orig)
338     ++InputFile::NextGroupId;
339 }
340 
341 void ScriptParser::readInclude() {
342   StringRef Tok = unquote(next());
343 
344   if (!Seen.insert(Tok).second) {
345     setError("there is a cycle in linker script INCLUDEs");
346     return;
347   }
348 
349   if (Optional<std::string> Path = searchScript(Tok)) {
350     if (Optional<MemoryBufferRef> MB = readFile(*Path))
351       tokenize(*MB);
352     return;
353   }
354   setError("cannot find linker script " + Tok);
355 }
356 
357 void ScriptParser::readInput() {
358   expect("(");
359   while (!errorCount() && !consume(")")) {
360     if (consume("AS_NEEDED"))
361       readAsNeeded();
362     else
363       addFile(unquote(next()));
364   }
365 }
366 
367 void ScriptParser::readOutput() {
368   // -o <file> takes predecence over OUTPUT(<file>).
369   expect("(");
370   StringRef Tok = next();
371   if (Config->OutputFile.empty())
372     Config->OutputFile = unquote(Tok);
373   expect(")");
374 }
375 
376 void ScriptParser::readOutputArch() {
377   // OUTPUT_ARCH is ignored for now.
378   expect("(");
379   while (!errorCount() && !consume(")"))
380     skip();
381 }
382 
383 static std::pair<ELFKind, uint16_t> parseBfdName(StringRef S) {
384   return StringSwitch<std::pair<ELFKind, uint16_t>>(S)
385       .Case("elf32-i386", {ELF32LEKind, EM_386})
386       .Case("elf32-iamcu", {ELF32LEKind, EM_IAMCU})
387       .Case("elf32-littlearm", {ELF32LEKind, EM_ARM})
388       .Case("elf32-x86-64", {ELF32LEKind, EM_X86_64})
389       .Case("elf64-aarch64", {ELF64LEKind, EM_AARCH64})
390       .Case("elf64-littleaarch64", {ELF64LEKind, EM_AARCH64})
391       .Case("elf32-powerpc", {ELF32BEKind, EM_PPC})
392       .Case("elf64-powerpc", {ELF64BEKind, EM_PPC64})
393       .Case("elf64-powerpcle", {ELF64LEKind, EM_PPC64})
394       .Case("elf64-x86-64", {ELF64LEKind, EM_X86_64})
395       .Cases("elf32-tradbigmips", "elf32-bigmips", {ELF32BEKind, EM_MIPS})
396       .Case("elf32-ntradbigmips", {ELF32BEKind, EM_MIPS})
397       .Case("elf32-tradlittlemips", {ELF32LEKind, EM_MIPS})
398       .Case("elf32-ntradlittlemips", {ELF32LEKind, EM_MIPS})
399       .Case("elf64-tradbigmips", {ELF64BEKind, EM_MIPS})
400       .Case("elf64-tradlittlemips", {ELF64LEKind, EM_MIPS})
401       .Case("elf32-littleriscv", {ELF32LEKind, EM_RISCV})
402       .Case("elf64-littleriscv", {ELF64LEKind, EM_RISCV})
403       .Default({ELFNoneKind, EM_NONE});
404 }
405 
406 // Parse OUTPUT_FORMAT(bfdname) or OUTPUT_FORMAT(bfdname, big, little).
407 // Currently we ignore big and little parameters.
408 void ScriptParser::readOutputFormat() {
409   expect("(");
410 
411   StringRef Name = unquote(next());
412   StringRef S = Name;
413   if (S.consume_back("-freebsd"))
414     Config->OSABI = ELFOSABI_FREEBSD;
415 
416   std::tie(Config->EKind, Config->EMachine) = parseBfdName(S);
417   if (Config->EMachine == EM_NONE)
418     setError("unknown output format name: " + Name);
419   if (S == "elf32-ntradlittlemips" || S == "elf32-ntradbigmips")
420     Config->MipsN32Abi = true;
421 
422   if (consume(")"))
423     return;
424   expect(",");
425   skip();
426   expect(",");
427   skip();
428   expect(")");
429 }
430 
431 void ScriptParser::readPhdrs() {
432   expect("{");
433 
434   while (!errorCount() && !consume("}")) {
435     PhdrsCommand Cmd;
436     Cmd.Name = next();
437     Cmd.Type = readPhdrType();
438 
439     while (!errorCount() && !consume(";")) {
440       if (consume("FILEHDR"))
441         Cmd.HasFilehdr = true;
442       else if (consume("PHDRS"))
443         Cmd.HasPhdrs = true;
444       else if (consume("AT"))
445         Cmd.LMAExpr = readParenExpr();
446       else if (consume("FLAGS"))
447         Cmd.Flags = readParenExpr()().getValue();
448       else
449         setError("unexpected header attribute: " + next());
450     }
451 
452     Script->PhdrsCommands.push_back(Cmd);
453   }
454 }
455 
456 void ScriptParser::readRegionAlias() {
457   expect("(");
458   StringRef Alias = unquote(next());
459   expect(",");
460   StringRef Name = next();
461   expect(")");
462 
463   if (Script->MemoryRegions.count(Alias))
464     setError("redefinition of memory region '" + Alias + "'");
465   if (!Script->MemoryRegions.count(Name))
466     setError("memory region '" + Name + "' is not defined");
467   Script->MemoryRegions.insert({Alias, Script->MemoryRegions[Name]});
468 }
469 
470 void ScriptParser::readSearchDir() {
471   expect("(");
472   StringRef Tok = next();
473   if (!Config->Nostdlib)
474     Config->SearchPaths.push_back(unquote(Tok));
475   expect(")");
476 }
477 
478 // This reads an overlay description. Overlays are used to describe output
479 // sections that use the same virtual memory range and normally would trigger
480 // linker's sections sanity check failures.
481 // https://sourceware.org/binutils/docs/ld/Overlay-Description.html#Overlay-Description
482 std::vector<BaseCommand *> ScriptParser::readOverlay() {
483   // VA and LMA expressions are optional, though for simplicity of
484   // implementation we assume they are not. That is what OVERLAY was designed
485   // for first of all: to allow sections with overlapping VAs at different LMAs.
486   Expr AddrExpr = readExpr();
487   expect(":");
488   expect("AT");
489   Expr LMAExpr = readParenExpr();
490   expect("{");
491 
492   std::vector<BaseCommand *> V;
493   OutputSection *Prev = nullptr;
494   while (!errorCount() && !consume("}")) {
495     // VA is the same for all sections. The LMAs are consecutive in memory
496     // starting from the base load address specified.
497     OutputSection *OS = readOverlaySectionDescription();
498     OS->AddrExpr = AddrExpr;
499     if (Prev)
500       OS->LMAExpr = [=] { return Prev->getLMA() + Prev->Size; };
501     else
502       OS->LMAExpr = LMAExpr;
503     V.push_back(OS);
504     Prev = OS;
505   }
506 
507   // According to the specification, at the end of the overlay, the location
508   // counter should be equal to the overlay base address plus size of the
509   // largest section seen in the overlay.
510   // Here we want to create the Dot assignment command to achieve that.
511   Expr MoveDot = [=] {
512     uint64_t Max = 0;
513     for (BaseCommand *Cmd : V)
514       Max = std::max(Max, cast<OutputSection>(Cmd)->Size);
515     return AddrExpr().getValue() + Max;
516   };
517   V.push_back(make<SymbolAssignment>(".", MoveDot, getCurrentLocation()));
518   return V;
519 }
520 
521 void ScriptParser::readSections() {
522   Script->HasSectionsCommand = true;
523 
524   // -no-rosegment is used to avoid placing read only non-executable sections in
525   // their own segment. We do the same if SECTIONS command is present in linker
526   // script. See comment for computeFlags().
527   Config->SingleRoRx = true;
528 
529   expect("{");
530   std::vector<BaseCommand *> V;
531   while (!errorCount() && !consume("}")) {
532     StringRef Tok = next();
533     if (Tok == "OVERLAY") {
534       for (BaseCommand *Cmd : readOverlay())
535         V.push_back(Cmd);
536       continue;
537     } else if (Tok == "INCLUDE") {
538       readInclude();
539       continue;
540     }
541 
542     if (BaseCommand *Cmd = readAssignment(Tok))
543       V.push_back(Cmd);
544     else
545       V.push_back(readOutputSectionDescription(Tok));
546   }
547 
548   if (!atEOF() && consume("INSERT")) {
549     std::vector<BaseCommand *> *Dest = nullptr;
550     if (consume("AFTER"))
551       Dest = &Script->InsertAfterCommands[next()];
552     else if (consume("BEFORE"))
553       Dest = &Script->InsertBeforeCommands[next()];
554     else
555       setError("expected AFTER/BEFORE, but got '" + next() + "'");
556     if (Dest)
557       Dest->insert(Dest->end(), V.begin(), V.end());
558     return;
559   }
560 
561   Script->SectionCommands.insert(Script->SectionCommands.end(), V.begin(),
562                                  V.end());
563 }
564 
565 void ScriptParser::readTarget() {
566   // TARGET(foo) is an alias for "--format foo". Unlike GNU linkers,
567   // we accept only a limited set of BFD names (i.e. "elf" or "binary")
568   // for --format. We recognize only /^elf/ and "binary" in the linker
569   // script as well.
570   expect("(");
571   StringRef Tok = next();
572   expect(")");
573 
574   if (Tok.startswith("elf"))
575     Config->FormatBinary = false;
576   else if (Tok == "binary")
577     Config->FormatBinary = true;
578   else
579     setError("unknown target: " + Tok);
580 }
581 
582 static int precedence(StringRef Op) {
583   return StringSwitch<int>(Op)
584       .Cases("*", "/", "%", 8)
585       .Cases("+", "-", 7)
586       .Cases("<<", ">>", 6)
587       .Cases("<", "<=", ">", ">=", "==", "!=", 5)
588       .Case("&", 4)
589       .Case("|", 3)
590       .Case("&&", 2)
591       .Case("||", 1)
592       .Default(-1);
593 }
594 
595 StringMatcher ScriptParser::readFilePatterns() {
596   std::vector<StringRef> V;
597   while (!errorCount() && !consume(")"))
598     V.push_back(next());
599   return StringMatcher(V);
600 }
601 
602 SortSectionPolicy ScriptParser::readSortKind() {
603   if (consume("SORT") || consume("SORT_BY_NAME"))
604     return SortSectionPolicy::Name;
605   if (consume("SORT_BY_ALIGNMENT"))
606     return SortSectionPolicy::Alignment;
607   if (consume("SORT_BY_INIT_PRIORITY"))
608     return SortSectionPolicy::Priority;
609   if (consume("SORT_NONE"))
610     return SortSectionPolicy::None;
611   return SortSectionPolicy::Default;
612 }
613 
614 // Reads SECTIONS command contents in the following form:
615 //
616 // <contents> ::= <elem>*
617 // <elem>     ::= <exclude>? <glob-pattern>
618 // <exclude>  ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")"
619 //
620 // For example,
621 //
622 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz)
623 //
624 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o".
625 // The semantics of that is section .foo in any file, section .bar in
626 // any file but a.o, and section .baz in any file but b.o.
627 std::vector<SectionPattern> ScriptParser::readInputSectionsList() {
628   std::vector<SectionPattern> Ret;
629   while (!errorCount() && peek() != ")") {
630     StringMatcher ExcludeFilePat;
631     if (consume("EXCLUDE_FILE")) {
632       expect("(");
633       ExcludeFilePat = readFilePatterns();
634     }
635 
636     std::vector<StringRef> V;
637     while (!errorCount() && peek() != ")" && peek() != "EXCLUDE_FILE")
638       V.push_back(unquote(next()));
639 
640     if (!V.empty())
641       Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)});
642     else
643       setError("section pattern is expected");
644   }
645   return Ret;
646 }
647 
648 // Reads contents of "SECTIONS" directive. That directive contains a
649 // list of glob patterns for input sections. The grammar is as follows.
650 //
651 // <patterns> ::= <section-list>
652 //              | <sort> "(" <section-list> ")"
653 //              | <sort> "(" <sort> "(" <section-list> ")" ")"
654 //
655 // <sort>     ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT"
656 //              | "SORT_BY_INIT_PRIORITY" | "SORT_NONE"
657 //
658 // <section-list> is parsed by readInputSectionsList().
659 InputSectionDescription *
660 ScriptParser::readInputSectionRules(StringRef FilePattern) {
661   auto *Cmd = make<InputSectionDescription>(FilePattern);
662   expect("(");
663 
664   while (!errorCount() && !consume(")")) {
665     SortSectionPolicy Outer = readSortKind();
666     SortSectionPolicy Inner = SortSectionPolicy::Default;
667     std::vector<SectionPattern> V;
668     if (Outer != SortSectionPolicy::Default) {
669       expect("(");
670       Inner = readSortKind();
671       if (Inner != SortSectionPolicy::Default) {
672         expect("(");
673         V = readInputSectionsList();
674         expect(")");
675       } else {
676         V = readInputSectionsList();
677       }
678       expect(")");
679     } else {
680       V = readInputSectionsList();
681     }
682 
683     for (SectionPattern &Pat : V) {
684       Pat.SortInner = Inner;
685       Pat.SortOuter = Outer;
686     }
687 
688     std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns));
689   }
690   return Cmd;
691 }
692 
693 InputSectionDescription *
694 ScriptParser::readInputSectionDescription(StringRef Tok) {
695   // Input section wildcard can be surrounded by KEEP.
696   // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep
697   if (Tok == "KEEP") {
698     expect("(");
699     StringRef FilePattern = next();
700     InputSectionDescription *Cmd = readInputSectionRules(FilePattern);
701     expect(")");
702     Script->KeptSections.push_back(Cmd);
703     return Cmd;
704   }
705   return readInputSectionRules(Tok);
706 }
707 
708 void ScriptParser::readSort() {
709   expect("(");
710   expect("CONSTRUCTORS");
711   expect(")");
712 }
713 
714 Expr ScriptParser::readAssert() {
715   expect("(");
716   Expr E = readExpr();
717   expect(",");
718   StringRef Msg = unquote(next());
719   expect(")");
720 
721   return [=] {
722     if (!E().getValue())
723       error(Msg);
724     return Script->getDot();
725   };
726 }
727 
728 // Tries to read the special directive for an output section definition which
729 // can be one of following: "(NOLOAD)", "(COPY)", "(INFO)" or "(OVERLAY)".
730 // Tok1 and Tok2 are next 2 tokens peeked. See comment for readSectionAddressType below.
731 bool ScriptParser::readSectionDirective(OutputSection *Cmd, StringRef Tok1, StringRef Tok2) {
732   if (Tok1 != "(")
733     return false;
734   if (Tok2 != "NOLOAD" && Tok2 != "COPY" && Tok2 != "INFO" && Tok2 != "OVERLAY")
735     return false;
736 
737   expect("(");
738   if (consume("NOLOAD")) {
739     Cmd->Noload = true;
740   } else {
741     skip(); // This is "COPY", "INFO" or "OVERLAY".
742     Cmd->NonAlloc = true;
743   }
744   expect(")");
745   return true;
746 }
747 
748 // Reads an expression and/or the special directive for an output
749 // section definition. Directive is one of following: "(NOLOAD)",
750 // "(COPY)", "(INFO)" or "(OVERLAY)".
751 //
752 // An output section name can be followed by an address expression
753 // and/or directive. This grammar is not LL(1) because "(" can be
754 // interpreted as either the beginning of some expression or beginning
755 // of directive.
756 //
757 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html
758 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html
759 void ScriptParser::readSectionAddressType(OutputSection *Cmd) {
760   if (readSectionDirective(Cmd, peek(), peek2()))
761     return;
762 
763   Cmd->AddrExpr = readExpr();
764   if (peek() == "(" && !readSectionDirective(Cmd, "(", peek2()))
765     setError("unknown section directive: " + peek2());
766 }
767 
768 static Expr checkAlignment(Expr E, std::string &Loc) {
769   return [=] {
770     uint64_t Alignment = std::max((uint64_t)1, E().getValue());
771     if (!isPowerOf2_64(Alignment)) {
772       error(Loc + ": alignment must be power of 2");
773       return (uint64_t)1; // Return a dummy value.
774     }
775     return Alignment;
776   };
777 }
778 
779 OutputSection *ScriptParser::readOverlaySectionDescription() {
780   OutputSection *Cmd =
781       Script->createOutputSection(next(), getCurrentLocation());
782   Cmd->InOverlay = true;
783   expect("{");
784   while (!errorCount() && !consume("}"))
785     Cmd->SectionCommands.push_back(readInputSectionRules(next()));
786   Cmd->Phdrs = readOutputSectionPhdrs();
787   return Cmd;
788 }
789 
790 OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) {
791   OutputSection *Cmd =
792       Script->createOutputSection(OutSec, getCurrentLocation());
793 
794   size_t SymbolsReferenced = Script->ReferencedSymbols.size();
795 
796   if (peek() != ":")
797     readSectionAddressType(Cmd);
798   expect(":");
799 
800   std::string Location = getCurrentLocation();
801   if (consume("AT"))
802     Cmd->LMAExpr = readParenExpr();
803   if (consume("ALIGN"))
804     Cmd->AlignExpr = checkAlignment(readParenExpr(), Location);
805   if (consume("SUBALIGN"))
806     Cmd->SubalignExpr = checkAlignment(readParenExpr(), Location);
807 
808   // Parse constraints.
809   if (consume("ONLY_IF_RO"))
810     Cmd->Constraint = ConstraintKind::ReadOnly;
811   if (consume("ONLY_IF_RW"))
812     Cmd->Constraint = ConstraintKind::ReadWrite;
813   expect("{");
814 
815   while (!errorCount() && !consume("}")) {
816     StringRef Tok = next();
817     if (Tok == ";") {
818       // Empty commands are allowed. Do nothing here.
819     } else if (SymbolAssignment *Assign = readAssignment(Tok)) {
820       Cmd->SectionCommands.push_back(Assign);
821     } else if (ByteCommand *Data = readByteCommand(Tok)) {
822       Cmd->SectionCommands.push_back(Data);
823     } else if (Tok == "CONSTRUCTORS") {
824       // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors
825       // by name. This is for very old file formats such as ECOFF/XCOFF.
826       // For ELF, we should ignore.
827     } else if (Tok == "FILL") {
828       // We handle the FILL command as an alias for =fillexp section attribute,
829       // which is different from what GNU linkers do.
830       // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html
831       Cmd->Filler = readFill();
832     } else if (Tok == "SORT") {
833       readSort();
834     } else if (Tok == "INCLUDE") {
835       readInclude();
836     } else if (peek() == "(") {
837       Cmd->SectionCommands.push_back(readInputSectionDescription(Tok));
838     } else {
839       // We have a file name and no input sections description. It is not a
840       // commonly used syntax, but still acceptable. In that case, all sections
841       // from the file will be included.
842       auto *ISD = make<InputSectionDescription>(Tok);
843       ISD->SectionPatterns.push_back({{}, StringMatcher({"*"})});
844       Cmd->SectionCommands.push_back(ISD);
845     }
846   }
847 
848   if (consume(">"))
849     Cmd->MemoryRegionName = next();
850 
851   if (consume("AT")) {
852     expect(">");
853     Cmd->LMARegionName = next();
854   }
855 
856   if (Cmd->LMAExpr && !Cmd->LMARegionName.empty())
857     error("section can't have both LMA and a load region");
858 
859   Cmd->Phdrs = readOutputSectionPhdrs();
860 
861   if (peek() == "=" || peek().startswith("=")) {
862     InExpr = true;
863     consume("=");
864     Cmd->Filler = readFill();
865     InExpr = false;
866   }
867 
868   // Consume optional comma following output section command.
869   consume(",");
870 
871   if (Script->ReferencedSymbols.size() > SymbolsReferenced)
872     Cmd->ExpressionsUseSymbols = true;
873   return Cmd;
874 }
875 
876 // Reads a `=<fillexp>` expression and returns its value as a big-endian number.
877 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html
878 // We do not support using symbols in such expressions.
879 //
880 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary
881 // size, while ld.gold always handles it as a 32-bit big-endian number.
882 // We are compatible with ld.gold because it's easier to implement.
883 std::array<uint8_t, 4> ScriptParser::readFill() {
884   uint64_t Value = readExpr()().Val;
885   if (Value > UINT32_MAX)
886     setError("filler expression result does not fit 32-bit: 0x" +
887              Twine::utohexstr(Value));
888 
889   std::array<uint8_t, 4> Buf;
890   write32be(Buf.data(), (uint32_t)Value);
891   return Buf;
892 }
893 
894 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) {
895   expect("(");
896   SymbolAssignment *Cmd = readSymbolAssignment(next());
897   Cmd->Provide = Provide;
898   Cmd->Hidden = Hidden;
899   expect(")");
900   return Cmd;
901 }
902 
903 SymbolAssignment *ScriptParser::readAssignment(StringRef Tok) {
904   // Assert expression returns Dot, so this is equal to ".=."
905   if (Tok == "ASSERT")
906     return make<SymbolAssignment>(".", readAssert(), getCurrentLocation());
907 
908   size_t OldPos = Pos;
909   SymbolAssignment *Cmd = nullptr;
910   if (peek() == "=" || peek() == "+=")
911     Cmd = readSymbolAssignment(Tok);
912   else if (Tok == "PROVIDE")
913     Cmd = readProvideHidden(true, false);
914   else if (Tok == "HIDDEN")
915     Cmd = readProvideHidden(false, true);
916   else if (Tok == "PROVIDE_HIDDEN")
917     Cmd = readProvideHidden(true, true);
918 
919   if (Cmd) {
920     Cmd->CommandString =
921         Tok.str() + " " +
922         llvm::join(Tokens.begin() + OldPos, Tokens.begin() + Pos, " ");
923     expect(";");
924   }
925   return Cmd;
926 }
927 
928 SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef Name) {
929   StringRef Op = next();
930   assert(Op == "=" || Op == "+=");
931   Expr E = readExpr();
932   if (Op == "+=") {
933     std::string Loc = getCurrentLocation();
934     E = [=] { return add(Script->getSymbolValue(Name, Loc), E()); };
935   }
936   return make<SymbolAssignment>(Name, E, getCurrentLocation());
937 }
938 
939 // This is an operator-precedence parser to parse a linker
940 // script expression.
941 Expr ScriptParser::readExpr() {
942   // Our lexer is context-aware. Set the in-expression bit so that
943   // they apply different tokenization rules.
944   bool Orig = InExpr;
945   InExpr = true;
946   Expr E = readExpr1(readPrimary(), 0);
947   InExpr = Orig;
948   return E;
949 }
950 
951 Expr ScriptParser::combine(StringRef Op, Expr L, Expr R) {
952   if (Op == "+")
953     return [=] { return add(L(), R()); };
954   if (Op == "-")
955     return [=] { return sub(L(), R()); };
956   if (Op == "*")
957     return [=] { return L().getValue() * R().getValue(); };
958   if (Op == "/") {
959     std::string Loc = getCurrentLocation();
960     return [=]() -> uint64_t {
961       if (uint64_t RV = R().getValue())
962         return L().getValue() / RV;
963       error(Loc + ": division by zero");
964       return 0;
965     };
966   }
967   if (Op == "%") {
968     std::string Loc = getCurrentLocation();
969     return [=]() -> uint64_t {
970       if (uint64_t RV = R().getValue())
971         return L().getValue() % RV;
972       error(Loc + ": modulo by zero");
973       return 0;
974     };
975   }
976   if (Op == "<<")
977     return [=] { return L().getValue() << R().getValue(); };
978   if (Op == ">>")
979     return [=] { return L().getValue() >> R().getValue(); };
980   if (Op == "<")
981     return [=] { return L().getValue() < R().getValue(); };
982   if (Op == ">")
983     return [=] { return L().getValue() > R().getValue(); };
984   if (Op == ">=")
985     return [=] { return L().getValue() >= R().getValue(); };
986   if (Op == "<=")
987     return [=] { return L().getValue() <= R().getValue(); };
988   if (Op == "==")
989     return [=] { return L().getValue() == R().getValue(); };
990   if (Op == "!=")
991     return [=] { return L().getValue() != R().getValue(); };
992   if (Op == "||")
993     return [=] { return L().getValue() || R().getValue(); };
994   if (Op == "&&")
995     return [=] { return L().getValue() && R().getValue(); };
996   if (Op == "&")
997     return [=] { return bitAnd(L(), R()); };
998   if (Op == "|")
999     return [=] { return bitOr(L(), R()); };
1000   llvm_unreachable("invalid operator");
1001 }
1002 
1003 // This is a part of the operator-precedence parser. This function
1004 // assumes that the remaining token stream starts with an operator.
1005 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) {
1006   while (!atEOF() && !errorCount()) {
1007     // Read an operator and an expression.
1008     if (consume("?"))
1009       return readTernary(Lhs);
1010     StringRef Op1 = peek();
1011     if (precedence(Op1) < MinPrec)
1012       break;
1013     skip();
1014     Expr Rhs = readPrimary();
1015 
1016     // Evaluate the remaining part of the expression first if the
1017     // next operator has greater precedence than the previous one.
1018     // For example, if we have read "+" and "3", and if the next
1019     // operator is "*", then we'll evaluate 3 * ... part first.
1020     while (!atEOF()) {
1021       StringRef Op2 = peek();
1022       if (precedence(Op2) <= precedence(Op1))
1023         break;
1024       Rhs = readExpr1(Rhs, precedence(Op2));
1025     }
1026 
1027     Lhs = combine(Op1, Lhs, Rhs);
1028   }
1029   return Lhs;
1030 }
1031 
1032 Expr ScriptParser::getPageSize() {
1033   std::string Location = getCurrentLocation();
1034   return [=]() -> uint64_t {
1035     if (Target)
1036       return Config->CommonPageSize;
1037     error(Location + ": unable to calculate page size");
1038     return 4096; // Return a dummy value.
1039   };
1040 }
1041 
1042 Expr ScriptParser::readConstant() {
1043   StringRef S = readParenLiteral();
1044   if (S == "COMMONPAGESIZE")
1045     return getPageSize();
1046   if (S == "MAXPAGESIZE")
1047     return [] { return Config->MaxPageSize; };
1048   setError("unknown constant: " + S);
1049   return [] { return 0; };
1050 }
1051 
1052 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with
1053 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may
1054 // have "K" (Ki) or "M" (Mi) suffixes.
1055 static Optional<uint64_t> parseInt(StringRef Tok) {
1056   // Hexadecimal
1057   uint64_t Val;
1058   if (Tok.startswith_lower("0x")) {
1059     if (!to_integer(Tok.substr(2), Val, 16))
1060       return None;
1061     return Val;
1062   }
1063   if (Tok.endswith_lower("H")) {
1064     if (!to_integer(Tok.drop_back(), Val, 16))
1065       return None;
1066     return Val;
1067   }
1068 
1069   // Decimal
1070   if (Tok.endswith_lower("K")) {
1071     if (!to_integer(Tok.drop_back(), Val, 10))
1072       return None;
1073     return Val * 1024;
1074   }
1075   if (Tok.endswith_lower("M")) {
1076     if (!to_integer(Tok.drop_back(), Val, 10))
1077       return None;
1078     return Val * 1024 * 1024;
1079   }
1080   if (!to_integer(Tok, Val, 10))
1081     return None;
1082   return Val;
1083 }
1084 
1085 ByteCommand *ScriptParser::readByteCommand(StringRef Tok) {
1086   int Size = StringSwitch<int>(Tok)
1087                  .Case("BYTE", 1)
1088                  .Case("SHORT", 2)
1089                  .Case("LONG", 4)
1090                  .Case("QUAD", 8)
1091                  .Default(-1);
1092   if (Size == -1)
1093     return nullptr;
1094 
1095   size_t OldPos = Pos;
1096   Expr E = readParenExpr();
1097   std::string CommandString =
1098       Tok.str() + " " +
1099       llvm::join(Tokens.begin() + OldPos, Tokens.begin() + Pos, " ");
1100   return make<ByteCommand>(E, Size, CommandString);
1101 }
1102 
1103 StringRef ScriptParser::readParenLiteral() {
1104   expect("(");
1105   bool Orig = InExpr;
1106   InExpr = false;
1107   StringRef Tok = next();
1108   InExpr = Orig;
1109   expect(")");
1110   return Tok;
1111 }
1112 
1113 static void checkIfExists(OutputSection *Cmd, StringRef Location) {
1114   if (Cmd->Location.empty() && Script->ErrorOnMissingSection)
1115     error(Location + ": undefined section " + Cmd->Name);
1116 }
1117 
1118 Expr ScriptParser::readPrimary() {
1119   if (peek() == "(")
1120     return readParenExpr();
1121 
1122   if (consume("~")) {
1123     Expr E = readPrimary();
1124     return [=] { return ~E().getValue(); };
1125   }
1126   if (consume("!")) {
1127     Expr E = readPrimary();
1128     return [=] { return !E().getValue(); };
1129   }
1130   if (consume("-")) {
1131     Expr E = readPrimary();
1132     return [=] { return -E().getValue(); };
1133   }
1134 
1135   StringRef Tok = next();
1136   std::string Location = getCurrentLocation();
1137 
1138   // Built-in functions are parsed here.
1139   // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html.
1140   if (Tok == "ABSOLUTE") {
1141     Expr Inner = readParenExpr();
1142     return [=] {
1143       ExprValue I = Inner();
1144       I.ForceAbsolute = true;
1145       return I;
1146     };
1147   }
1148   if (Tok == "ADDR") {
1149     StringRef Name = readParenLiteral();
1150     OutputSection *Sec = Script->getOrCreateOutputSection(Name);
1151     Sec->UsedInExpression = true;
1152     return [=]() -> ExprValue {
1153       checkIfExists(Sec, Location);
1154       return {Sec, false, 0, Location};
1155     };
1156   }
1157   if (Tok == "ALIGN") {
1158     expect("(");
1159     Expr E = readExpr();
1160     if (consume(")")) {
1161       E = checkAlignment(E, Location);
1162       return [=] { return alignTo(Script->getDot(), E().getValue()); };
1163     }
1164     expect(",");
1165     Expr E2 = checkAlignment(readExpr(), Location);
1166     expect(")");
1167     return [=] {
1168       ExprValue V = E();
1169       V.Alignment = E2().getValue();
1170       return V;
1171     };
1172   }
1173   if (Tok == "ALIGNOF") {
1174     StringRef Name = readParenLiteral();
1175     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
1176     return [=] {
1177       checkIfExists(Cmd, Location);
1178       return Cmd->Alignment;
1179     };
1180   }
1181   if (Tok == "ASSERT")
1182     return readAssert();
1183   if (Tok == "CONSTANT")
1184     return readConstant();
1185   if (Tok == "DATA_SEGMENT_ALIGN") {
1186     expect("(");
1187     Expr E = readExpr();
1188     expect(",");
1189     readExpr();
1190     expect(")");
1191     return [=] {
1192       return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue()));
1193     };
1194   }
1195   if (Tok == "DATA_SEGMENT_END") {
1196     expect("(");
1197     expect(".");
1198     expect(")");
1199     return [] { return Script->getDot(); };
1200   }
1201   if (Tok == "DATA_SEGMENT_RELRO_END") {
1202     // GNU linkers implements more complicated logic to handle
1203     // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and
1204     // just align to the next page boundary for simplicity.
1205     expect("(");
1206     readExpr();
1207     expect(",");
1208     readExpr();
1209     expect(")");
1210     Expr E = getPageSize();
1211     return [=] { return alignTo(Script->getDot(), E().getValue()); };
1212   }
1213   if (Tok == "DEFINED") {
1214     StringRef Name = readParenLiteral();
1215     return [=] { return Symtab->find(Name) ? 1 : 0; };
1216   }
1217   if (Tok == "LENGTH") {
1218     StringRef Name = readParenLiteral();
1219     if (Script->MemoryRegions.count(Name) == 0) {
1220       setError("memory region not defined: " + Name);
1221       return [] { return 0; };
1222     }
1223     return [=] { return Script->MemoryRegions[Name]->Length; };
1224   }
1225   if (Tok == "LOADADDR") {
1226     StringRef Name = readParenLiteral();
1227     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
1228     Cmd->UsedInExpression = true;
1229     return [=] {
1230       checkIfExists(Cmd, Location);
1231       return Cmd->getLMA();
1232     };
1233   }
1234   if (Tok == "MAX" || Tok == "MIN") {
1235     expect("(");
1236     Expr A = readExpr();
1237     expect(",");
1238     Expr B = readExpr();
1239     expect(")");
1240     if (Tok == "MIN")
1241       return [=] { return std::min(A().getValue(), B().getValue()); };
1242     return [=] { return std::max(A().getValue(), B().getValue()); };
1243   }
1244   if (Tok == "ORIGIN") {
1245     StringRef Name = readParenLiteral();
1246     if (Script->MemoryRegions.count(Name) == 0) {
1247       setError("memory region not defined: " + Name);
1248       return [] { return 0; };
1249     }
1250     return [=] { return Script->MemoryRegions[Name]->Origin; };
1251   }
1252   if (Tok == "SEGMENT_START") {
1253     expect("(");
1254     skip();
1255     expect(",");
1256     Expr E = readExpr();
1257     expect(")");
1258     return [=] { return E(); };
1259   }
1260   if (Tok == "SIZEOF") {
1261     StringRef Name = readParenLiteral();
1262     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
1263     // Linker script does not create an output section if its content is empty.
1264     // We want to allow SIZEOF(.foo) where .foo is a section which happened to
1265     // be empty.
1266     return [=] { return Cmd->Size; };
1267   }
1268   if (Tok == "SIZEOF_HEADERS")
1269     return [=] { return elf::getHeaderSize(); };
1270 
1271   // Tok is the dot.
1272   if (Tok == ".")
1273     return [=] { return Script->getSymbolValue(Tok, Location); };
1274 
1275   // Tok is a literal number.
1276   if (Optional<uint64_t> Val = parseInt(Tok))
1277     return [=] { return *Val; };
1278 
1279   // Tok is a symbol name.
1280   if (!isValidCIdentifier(Tok))
1281     setError("malformed number: " + Tok);
1282   Script->ReferencedSymbols.push_back(Tok);
1283   return [=] { return Script->getSymbolValue(Tok, Location); };
1284 }
1285 
1286 Expr ScriptParser::readTernary(Expr Cond) {
1287   Expr L = readExpr();
1288   expect(":");
1289   Expr R = readExpr();
1290   return [=] { return Cond().getValue() ? L() : R(); };
1291 }
1292 
1293 Expr ScriptParser::readParenExpr() {
1294   expect("(");
1295   Expr E = readExpr();
1296   expect(")");
1297   return E;
1298 }
1299 
1300 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() {
1301   std::vector<StringRef> Phdrs;
1302   while (!errorCount() && peek().startswith(":")) {
1303     StringRef Tok = next();
1304     Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1));
1305   }
1306   return Phdrs;
1307 }
1308 
1309 // Read a program header type name. The next token must be a
1310 // name of a program header type or a constant (e.g. "0x3").
1311 unsigned ScriptParser::readPhdrType() {
1312   StringRef Tok = next();
1313   if (Optional<uint64_t> Val = parseInt(Tok))
1314     return *Val;
1315 
1316   unsigned Ret = StringSwitch<unsigned>(Tok)
1317                      .Case("PT_NULL", PT_NULL)
1318                      .Case("PT_LOAD", PT_LOAD)
1319                      .Case("PT_DYNAMIC", PT_DYNAMIC)
1320                      .Case("PT_INTERP", PT_INTERP)
1321                      .Case("PT_NOTE", PT_NOTE)
1322                      .Case("PT_SHLIB", PT_SHLIB)
1323                      .Case("PT_PHDR", PT_PHDR)
1324                      .Case("PT_TLS", PT_TLS)
1325                      .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME)
1326                      .Case("PT_GNU_STACK", PT_GNU_STACK)
1327                      .Case("PT_GNU_RELRO", PT_GNU_RELRO)
1328                      .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE)
1329                      .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED)
1330                      .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA)
1331                      .Default(-1);
1332 
1333   if (Ret == (unsigned)-1) {
1334     setError("invalid program header type: " + Tok);
1335     return PT_NULL;
1336   }
1337   return Ret;
1338 }
1339 
1340 // Reads an anonymous version declaration.
1341 void ScriptParser::readAnonymousDeclaration() {
1342   std::vector<SymbolVersion> Locals;
1343   std::vector<SymbolVersion> Globals;
1344   std::tie(Locals, Globals) = readSymbols();
1345 
1346   for (SymbolVersion V : Locals) {
1347     if (V.Name == "*")
1348       Config->DefaultSymbolVersion = VER_NDX_LOCAL;
1349     else
1350       Config->VersionScriptLocals.push_back(V);
1351   }
1352 
1353   for (SymbolVersion V : Globals)
1354     Config->VersionScriptGlobals.push_back(V);
1355 
1356   expect(";");
1357 }
1358 
1359 // Reads a non-anonymous version definition,
1360 // e.g. "VerStr { global: foo; bar; local: *; };".
1361 void ScriptParser::readVersionDeclaration(StringRef VerStr) {
1362   // Read a symbol list.
1363   std::vector<SymbolVersion> Locals;
1364   std::vector<SymbolVersion> Globals;
1365   std::tie(Locals, Globals) = readSymbols();
1366 
1367   for (SymbolVersion V : Locals) {
1368     if (V.Name == "*")
1369       Config->DefaultSymbolVersion = VER_NDX_LOCAL;
1370     else
1371       Config->VersionScriptLocals.push_back(V);
1372   }
1373 
1374   // Create a new version definition and add that to the global symbols.
1375   VersionDefinition Ver;
1376   Ver.Name = VerStr;
1377   Ver.Globals = Globals;
1378 
1379   // User-defined version number starts from 2 because 0 and 1 are
1380   // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively.
1381   Ver.Id = Config->VersionDefinitions.size() + 2;
1382   Config->VersionDefinitions.push_back(Ver);
1383 
1384   // Each version may have a parent version. For example, "Ver2"
1385   // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1"
1386   // as a parent. This version hierarchy is, probably against your
1387   // instinct, purely for hint; the runtime doesn't care about it
1388   // at all. In LLD, we simply ignore it.
1389   if (peek() != ";")
1390     skip();
1391   expect(";");
1392 }
1393 
1394 static bool hasWildcard(StringRef S) {
1395   return S.find_first_of("?*[") != StringRef::npos;
1396 }
1397 
1398 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };".
1399 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
1400 ScriptParser::readSymbols() {
1401   std::vector<SymbolVersion> Locals;
1402   std::vector<SymbolVersion> Globals;
1403   std::vector<SymbolVersion> *V = &Globals;
1404 
1405   while (!errorCount()) {
1406     if (consume("}"))
1407       break;
1408     if (consumeLabel("local")) {
1409       V = &Locals;
1410       continue;
1411     }
1412     if (consumeLabel("global")) {
1413       V = &Globals;
1414       continue;
1415     }
1416 
1417     if (consume("extern")) {
1418       std::vector<SymbolVersion> Ext = readVersionExtern();
1419       V->insert(V->end(), Ext.begin(), Ext.end());
1420     } else {
1421       StringRef Tok = next();
1422       V->push_back({unquote(Tok), false, hasWildcard(Tok)});
1423     }
1424     expect(";");
1425   }
1426   return {Locals, Globals};
1427 }
1428 
1429 // Reads an "extern C++" directive, e.g.,
1430 // "extern "C++" { ns::*; "f(int, double)"; };"
1431 //
1432 // The last semicolon is optional. E.g. this is OK:
1433 // "extern "C++" { ns::*; "f(int, double)" };"
1434 std::vector<SymbolVersion> ScriptParser::readVersionExtern() {
1435   StringRef Tok = next();
1436   bool IsCXX = Tok == "\"C++\"";
1437   if (!IsCXX && Tok != "\"C\"")
1438     setError("Unknown language");
1439   expect("{");
1440 
1441   std::vector<SymbolVersion> Ret;
1442   while (!errorCount() && peek() != "}") {
1443     StringRef Tok = next();
1444     Ret.push_back(
1445         {unquote(Tok), IsCXX, !Tok.startswith("\"") && hasWildcard(Tok)});
1446     if (consume("}"))
1447       return Ret;
1448     expect(";");
1449   }
1450 
1451   expect("}");
1452   return Ret;
1453 }
1454 
1455 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2,
1456                                             StringRef S3) {
1457   if (!consume(S1) && !consume(S2) && !consume(S3)) {
1458     setError("expected one of: " + S1 + ", " + S2 + ", or " + S3);
1459     return 0;
1460   }
1461   expect("=");
1462   return readExpr()().getValue();
1463 }
1464 
1465 // Parse the MEMORY command as specified in:
1466 // https://sourceware.org/binutils/docs/ld/MEMORY.html
1467 //
1468 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... }
1469 void ScriptParser::readMemory() {
1470   expect("{");
1471   while (!errorCount() && !consume("}")) {
1472     StringRef Tok = next();
1473     if (Tok == "INCLUDE") {
1474       readInclude();
1475       continue;
1476     }
1477 
1478     uint32_t Flags = 0;
1479     uint32_t NegFlags = 0;
1480     if (consume("(")) {
1481       std::tie(Flags, NegFlags) = readMemoryAttributes();
1482       expect(")");
1483     }
1484     expect(":");
1485 
1486     uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o");
1487     expect(",");
1488     uint64_t Length = readMemoryAssignment("LENGTH", "len", "l");
1489 
1490     // Add the memory region to the region map.
1491     MemoryRegion *MR = make<MemoryRegion>(Tok, Origin, Length, Flags, NegFlags);
1492     if (!Script->MemoryRegions.insert({Tok, MR}).second)
1493       setError("region '" + Tok + "' already defined");
1494   }
1495 }
1496 
1497 // This function parses the attributes used to match against section
1498 // flags when placing output sections in a memory region. These flags
1499 // are only used when an explicit memory region name is not used.
1500 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() {
1501   uint32_t Flags = 0;
1502   uint32_t NegFlags = 0;
1503   bool Invert = false;
1504 
1505   for (char C : next().lower()) {
1506     uint32_t Flag = 0;
1507     if (C == '!')
1508       Invert = !Invert;
1509     else if (C == 'w')
1510       Flag = SHF_WRITE;
1511     else if (C == 'x')
1512       Flag = SHF_EXECINSTR;
1513     else if (C == 'a')
1514       Flag = SHF_ALLOC;
1515     else if (C != 'r')
1516       setError("invalid memory region attribute");
1517 
1518     if (Invert)
1519       NegFlags |= Flag;
1520     else
1521       Flags |= Flag;
1522   }
1523   return {Flags, NegFlags};
1524 }
1525 
1526 void elf::readLinkerScript(MemoryBufferRef MB) {
1527   ScriptParser(MB).readLinkerScript();
1528 }
1529 
1530 void elf::readVersionScript(MemoryBufferRef MB) {
1531   ScriptParser(MB).readVersionScript();
1532 }
1533 
1534 void elf::readDynamicList(MemoryBufferRef MB) {
1535   ScriptParser(MB).readDynamicList();
1536 }
1537 
1538 void elf::readDefsym(StringRef Name, MemoryBufferRef MB) {
1539   ScriptParser(MB).readDefsym(Name);
1540 }
1541