1 //===- ScriptParser.cpp ---------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a recursive-descendent parser for linker scripts.
11 // Parsed results are stored to Config and Script global objects.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ScriptParser.h"
16 #include "Config.h"
17 #include "Driver.h"
18 #include "InputSection.h"
19 #include "LinkerScript.h"
20 #include "Memory.h"
21 #include "OutputSections.h"
22 #include "ScriptLexer.h"
23 #include "Symbols.h"
24 #include "Target.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/ADT/StringSet.h"
28 #include "llvm/ADT/StringSwitch.h"
29 #include "llvm/BinaryFormat/ELF.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/FileSystem.h"
33 #include "llvm/Support/Path.h"
34 #include <cassert>
35 #include <limits>
36 #include <vector>
37 
38 using namespace llvm;
39 using namespace llvm::ELF;
40 using namespace llvm::support::endian;
41 using namespace lld;
42 using namespace lld::elf;
43 
44 static bool isUnderSysroot(StringRef Path);
45 
46 namespace {
47 class ScriptParser final : ScriptLexer {
48 public:
49   ScriptParser(MemoryBufferRef MB)
50       : ScriptLexer(MB),
51         IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {}
52 
53   void readLinkerScript();
54   void readVersionScript();
55   void readDynamicList();
56 
57 private:
58   void addFile(StringRef Path);
59   OutputSection *checkSection(OutputSection *Cmd, StringRef Loccation);
60 
61   void readAsNeeded();
62   void readEntry();
63   void readExtern();
64   void readGroup();
65   void readInclude();
66   void readMemory();
67   void readOutput();
68   void readOutputArch();
69   void readOutputFormat();
70   void readPhdrs();
71   void readSearchDir();
72   void readSections();
73   void readVersion();
74   void readVersionScriptCommand();
75 
76   SymbolAssignment *readAssignment(StringRef Name);
77   BytesDataCommand *readBytesDataCommand(StringRef Tok);
78   uint32_t readFill();
79   uint32_t parseFill(StringRef Tok);
80   void readSectionAddressType(OutputSection *Cmd);
81   OutputSection *readOutputSectionDescription(StringRef OutSec);
82   std::vector<StringRef> readOutputSectionPhdrs();
83   InputSectionDescription *readInputSectionDescription(StringRef Tok);
84   StringMatcher readFilePatterns();
85   std::vector<SectionPattern> readInputSectionsList();
86   InputSectionDescription *readInputSectionRules(StringRef FilePattern);
87   unsigned readPhdrType();
88   SortSectionPolicy readSortKind();
89   SymbolAssignment *readProvideHidden(bool Provide, bool Hidden);
90   SymbolAssignment *readProvideOrAssignment(StringRef Tok);
91   void readSort();
92   AssertCommand *readAssert();
93   Expr readAssertExpr();
94   Expr readConstant();
95   Expr getPageSize();
96 
97   uint64_t readMemoryAssignment(StringRef, StringRef, StringRef);
98   std::pair<uint32_t, uint32_t> readMemoryAttributes();
99 
100   Expr readExpr();
101   Expr readExpr1(Expr Lhs, int MinPrec);
102   StringRef readParenLiteral();
103   Expr readPrimary();
104   Expr readTernary(Expr Cond);
105   Expr readParenExpr();
106 
107   // For parsing version script.
108   std::vector<SymbolVersion> readVersionExtern();
109   void readAnonymousDeclaration();
110   void readVersionDeclaration(StringRef VerStr);
111 
112   std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
113   readSymbols();
114 
115   // True if a script being read is in a subdirectory specified by -sysroot.
116   bool IsUnderSysroot;
117 
118   // A set to detect an INCLUDE() cycle.
119   StringSet<> Seen;
120 };
121 } // namespace
122 
123 static StringRef unquote(StringRef S) {
124   if (S.startswith("\""))
125     return S.substr(1, S.size() - 2);
126   return S;
127 }
128 
129 static bool isUnderSysroot(StringRef Path) {
130   if (Config->Sysroot == "")
131     return false;
132   for (; !Path.empty(); Path = sys::path::parent_path(Path))
133     if (sys::fs::equivalent(Config->Sysroot, Path))
134       return true;
135   return false;
136 }
137 
138 // Some operations only support one non absolute value. Move the
139 // absolute one to the right hand side for convenience.
140 static void moveAbsRight(ExprValue &A, ExprValue &B) {
141   if (A.isAbsolute())
142     std::swap(A, B);
143   if (!B.isAbsolute())
144     error(A.Loc + ": at least one side of the expression must be absolute");
145 }
146 
147 static ExprValue add(ExprValue A, ExprValue B) {
148   moveAbsRight(A, B);
149   uint64_t Val = alignTo(A.Val, A.Alignment) + B.getValue();
150   return {A.Sec, A.ForceAbsolute, Val, A.Loc};
151 }
152 
153 static ExprValue sub(ExprValue A, ExprValue B) {
154   uint64_t Val = alignTo(A.Val, A.Alignment) - B.getValue();
155   return {A.Sec, Val, A.Loc};
156 }
157 
158 static ExprValue mul(ExprValue A, ExprValue B) {
159   return A.getValue() * B.getValue();
160 }
161 
162 static ExprValue div(ExprValue A, ExprValue B) {
163   if (uint64_t BV = B.getValue())
164     return A.getValue() / BV;
165   error("division by zero");
166   return 0;
167 }
168 
169 static ExprValue bitAnd(ExprValue A, ExprValue B) {
170   moveAbsRight(A, B);
171   return {A.Sec, A.ForceAbsolute,
172           (A.getValue() & B.getValue()) - A.getSecAddr(), A.Loc};
173 }
174 
175 static ExprValue bitOr(ExprValue A, ExprValue B) {
176   moveAbsRight(A, B);
177   return {A.Sec, A.ForceAbsolute,
178           (A.getValue() | B.getValue()) - A.getSecAddr(), A.Loc};
179 }
180 
181 void ScriptParser::readDynamicList() {
182   expect("{");
183   readAnonymousDeclaration();
184   if (!atEOF())
185     setError("EOF expected, but got " + next());
186 }
187 
188 void ScriptParser::readVersionScript() {
189   readVersionScriptCommand();
190   if (!atEOF())
191     setError("EOF expected, but got " + next());
192 }
193 
194 void ScriptParser::readVersionScriptCommand() {
195   if (consume("{")) {
196     readAnonymousDeclaration();
197     return;
198   }
199 
200   while (!atEOF() && !ErrorCount && peek() != "}") {
201     StringRef VerStr = next();
202     if (VerStr == "{") {
203       setError("anonymous version definition is used in "
204                "combination with other version definitions");
205       return;
206     }
207     expect("{");
208     readVersionDeclaration(VerStr);
209   }
210 }
211 
212 void ScriptParser::readVersion() {
213   expect("{");
214   readVersionScriptCommand();
215   expect("}");
216 }
217 
218 void ScriptParser::readLinkerScript() {
219   while (!atEOF()) {
220     StringRef Tok = next();
221     if (Tok == ";")
222       continue;
223 
224     if (Tok == "ASSERT") {
225       Script->Opt.Commands.push_back(readAssert());
226     } else if (Tok == "ENTRY") {
227       readEntry();
228     } else if (Tok == "EXTERN") {
229       readExtern();
230     } else if (Tok == "GROUP" || Tok == "INPUT") {
231       readGroup();
232     } else if (Tok == "INCLUDE") {
233       readInclude();
234     } else if (Tok == "MEMORY") {
235       readMemory();
236     } else if (Tok == "OUTPUT") {
237       readOutput();
238     } else if (Tok == "OUTPUT_ARCH") {
239       readOutputArch();
240     } else if (Tok == "OUTPUT_FORMAT") {
241       readOutputFormat();
242     } else if (Tok == "PHDRS") {
243       readPhdrs();
244     } else if (Tok == "SEARCH_DIR") {
245       readSearchDir();
246     } else if (Tok == "SECTIONS") {
247       readSections();
248     } else if (Tok == "VERSION") {
249       readVersion();
250     } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) {
251       Script->Opt.Commands.push_back(Cmd);
252     } else {
253       setError("unknown directive: " + Tok);
254     }
255   }
256 }
257 
258 void ScriptParser::addFile(StringRef S) {
259   if (IsUnderSysroot && S.startswith("/")) {
260     SmallString<128> PathData;
261     StringRef Path = (Config->Sysroot + S).toStringRef(PathData);
262     if (sys::fs::exists(Path)) {
263       Driver->addFile(Saver.save(Path), /*WithLOption=*/false);
264       return;
265     }
266   }
267 
268   if (S.startswith("/")) {
269     Driver->addFile(S, /*WithLOption=*/false);
270   } else if (S.startswith("=")) {
271     if (Config->Sysroot.empty())
272       Driver->addFile(S.substr(1), /*WithLOption=*/false);
273     else
274       Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)),
275                       /*WithLOption=*/false);
276   } else if (S.startswith("-l")) {
277     Driver->addLibrary(S.substr(2));
278   } else if (sys::fs::exists(S)) {
279     Driver->addFile(S, /*WithLOption=*/false);
280   } else {
281     if (Optional<std::string> Path = findFromSearchPaths(S))
282       Driver->addFile(Saver.save(*Path), /*WithLOption=*/true);
283     else
284       setError("unable to find " + S);
285   }
286 }
287 
288 void ScriptParser::readAsNeeded() {
289   expect("(");
290   bool Orig = Config->AsNeeded;
291   Config->AsNeeded = true;
292   while (!ErrorCount && !consume(")"))
293     addFile(unquote(next()));
294   Config->AsNeeded = Orig;
295 }
296 
297 void ScriptParser::readEntry() {
298   // -e <symbol> takes predecence over ENTRY(<symbol>).
299   expect("(");
300   StringRef Tok = next();
301   if (Config->Entry.empty())
302     Config->Entry = Tok;
303   expect(")");
304 }
305 
306 void ScriptParser::readExtern() {
307   expect("(");
308   while (!ErrorCount && !consume(")"))
309     Config->Undefined.push_back(next());
310 }
311 
312 void ScriptParser::readGroup() {
313   expect("(");
314   while (!ErrorCount && !consume(")")) {
315     if (consume("AS_NEEDED"))
316       readAsNeeded();
317     else
318       addFile(unquote(next()));
319   }
320 }
321 
322 void ScriptParser::readInclude() {
323   StringRef Tok = unquote(next());
324 
325   if (!Seen.insert(Tok).second) {
326     setError("there is a cycle in linker script INCLUDEs");
327     return;
328   }
329 
330   // https://sourceware.org/binutils/docs/ld/File-Commands.html:
331   // The file will be searched for in the current directory, and in any
332   // directory specified with the -L option.
333   if (sys::fs::exists(Tok)) {
334     if (Optional<MemoryBufferRef> MB = readFile(Tok))
335       tokenize(*MB);
336     return;
337   }
338   if (Optional<std::string> Path = findFromSearchPaths(Tok)) {
339     if (Optional<MemoryBufferRef> MB = readFile(*Path))
340       tokenize(*MB);
341     return;
342   }
343   setError("cannot open " + Tok);
344 }
345 
346 void ScriptParser::readOutput() {
347   // -o <file> takes predecence over OUTPUT(<file>).
348   expect("(");
349   StringRef Tok = next();
350   if (Config->OutputFile.empty())
351     Config->OutputFile = unquote(Tok);
352   expect(")");
353 }
354 
355 void ScriptParser::readOutputArch() {
356   // OUTPUT_ARCH is ignored for now.
357   expect("(");
358   while (!ErrorCount && !consume(")"))
359     skip();
360 }
361 
362 void ScriptParser::readOutputFormat() {
363   // Error checking only for now.
364   expect("(");
365   skip();
366   if (consume(")"))
367     return;
368   expect(",");
369   skip();
370   expect(",");
371   skip();
372   expect(")");
373 }
374 
375 void ScriptParser::readPhdrs() {
376   expect("{");
377   while (!ErrorCount && !consume("}")) {
378     Script->Opt.PhdrsCommands.push_back(
379         {next(), PT_NULL, false, false, UINT_MAX, nullptr});
380 
381     PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back();
382     PhdrCmd.Type = readPhdrType();
383 
384     while (!ErrorCount && !consume(";")) {
385       if (consume("FILEHDR"))
386         PhdrCmd.HasFilehdr = true;
387       else if (consume("PHDRS"))
388         PhdrCmd.HasPhdrs = true;
389       else if (consume("AT"))
390         PhdrCmd.LMAExpr = readParenExpr();
391       else if (consume("FLAGS"))
392         PhdrCmd.Flags = readParenExpr()().getValue();
393       else
394         setError("unexpected header attribute: " + next());
395     }
396   }
397 }
398 
399 void ScriptParser::readSearchDir() {
400   expect("(");
401   StringRef Tok = next();
402   if (!Config->Nostdlib)
403     Config->SearchPaths.push_back(unquote(Tok));
404   expect(")");
405 }
406 
407 void ScriptParser::readSections() {
408   Script->Opt.HasSections = true;
409 
410   // -no-rosegment is used to avoid placing read only non-executable sections in
411   // their own segment. We do the same if SECTIONS command is present in linker
412   // script. See comment for computeFlags().
413   Config->SingleRoRx = true;
414 
415   expect("{");
416   while (!ErrorCount && !consume("}")) {
417     StringRef Tok = next();
418     BaseCommand *Cmd = readProvideOrAssignment(Tok);
419     if (!Cmd) {
420       if (Tok == "ASSERT")
421         Cmd = readAssert();
422       else
423         Cmd = readOutputSectionDescription(Tok);
424     }
425     Script->Opt.Commands.push_back(Cmd);
426   }
427 }
428 
429 static int precedence(StringRef Op) {
430   return StringSwitch<int>(Op)
431       .Cases("*", "/", 5)
432       .Cases("+", "-", 4)
433       .Cases("<<", ">>", 3)
434       .Cases("<", "<=", ">", ">=", "==", "!=", 2)
435       .Cases("&", "|", 1)
436       .Default(-1);
437 }
438 
439 StringMatcher ScriptParser::readFilePatterns() {
440   std::vector<StringRef> V;
441   while (!ErrorCount && !consume(")"))
442     V.push_back(next());
443   return StringMatcher(V);
444 }
445 
446 SortSectionPolicy ScriptParser::readSortKind() {
447   if (consume("SORT") || consume("SORT_BY_NAME"))
448     return SortSectionPolicy::Name;
449   if (consume("SORT_BY_ALIGNMENT"))
450     return SortSectionPolicy::Alignment;
451   if (consume("SORT_BY_INIT_PRIORITY"))
452     return SortSectionPolicy::Priority;
453   if (consume("SORT_NONE"))
454     return SortSectionPolicy::None;
455   return SortSectionPolicy::Default;
456 }
457 
458 // Reads SECTIONS command contents in the following form:
459 //
460 // <contents> ::= <elem>*
461 // <elem>     ::= <exclude>? <glob-pattern>
462 // <exclude>  ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")"
463 //
464 // For example,
465 //
466 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz)
467 //
468 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o".
469 // The semantics of that is section .foo in any file, section .bar in
470 // any file but a.o, and section .baz in any file but b.o.
471 std::vector<SectionPattern> ScriptParser::readInputSectionsList() {
472   std::vector<SectionPattern> Ret;
473   while (!ErrorCount && peek() != ")") {
474     StringMatcher ExcludeFilePat;
475     if (consume("EXCLUDE_FILE")) {
476       expect("(");
477       ExcludeFilePat = readFilePatterns();
478     }
479 
480     std::vector<StringRef> V;
481     while (!ErrorCount && peek() != ")" && peek() != "EXCLUDE_FILE")
482       V.push_back(next());
483 
484     if (!V.empty())
485       Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)});
486     else
487       setError("section pattern is expected");
488   }
489   return Ret;
490 }
491 
492 // Reads contents of "SECTIONS" directive. That directive contains a
493 // list of glob patterns for input sections. The grammar is as follows.
494 //
495 // <patterns> ::= <section-list>
496 //              | <sort> "(" <section-list> ")"
497 //              | <sort> "(" <sort> "(" <section-list> ")" ")"
498 //
499 // <sort>     ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT"
500 //              | "SORT_BY_INIT_PRIORITY" | "SORT_NONE"
501 //
502 // <section-list> is parsed by readInputSectionsList().
503 InputSectionDescription *
504 ScriptParser::readInputSectionRules(StringRef FilePattern) {
505   auto *Cmd = make<InputSectionDescription>(FilePattern);
506   expect("(");
507 
508   while (!ErrorCount && !consume(")")) {
509     SortSectionPolicy Outer = readSortKind();
510     SortSectionPolicy Inner = SortSectionPolicy::Default;
511     std::vector<SectionPattern> V;
512     if (Outer != SortSectionPolicy::Default) {
513       expect("(");
514       Inner = readSortKind();
515       if (Inner != SortSectionPolicy::Default) {
516         expect("(");
517         V = readInputSectionsList();
518         expect(")");
519       } else {
520         V = readInputSectionsList();
521       }
522       expect(")");
523     } else {
524       V = readInputSectionsList();
525     }
526 
527     for (SectionPattern &Pat : V) {
528       Pat.SortInner = Inner;
529       Pat.SortOuter = Outer;
530     }
531 
532     std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns));
533   }
534   return Cmd;
535 }
536 
537 InputSectionDescription *
538 ScriptParser::readInputSectionDescription(StringRef Tok) {
539   // Input section wildcard can be surrounded by KEEP.
540   // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep
541   if (Tok == "KEEP") {
542     expect("(");
543     StringRef FilePattern = next();
544     InputSectionDescription *Cmd = readInputSectionRules(FilePattern);
545     expect(")");
546     Script->Opt.KeptSections.push_back(Cmd);
547     return Cmd;
548   }
549   return readInputSectionRules(Tok);
550 }
551 
552 void ScriptParser::readSort() {
553   expect("(");
554   expect("CONSTRUCTORS");
555   expect(")");
556 }
557 
558 AssertCommand *ScriptParser::readAssert() {
559   return make<AssertCommand>(readAssertExpr());
560 }
561 
562 Expr ScriptParser::readAssertExpr() {
563   expect("(");
564   Expr E = readExpr();
565   expect(",");
566   StringRef Msg = unquote(next());
567   expect(")");
568 
569   return [=] {
570     if (!E().getValue())
571       error(Msg);
572     return Script->getDot();
573   };
574 }
575 
576 // Reads a FILL(expr) command. We handle the FILL command as an
577 // alias for =fillexp section attribute, which is different from
578 // what GNU linkers do.
579 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html
580 uint32_t ScriptParser::readFill() {
581   expect("(");
582   uint32_t V = parseFill(next());
583   expect(")");
584   return V;
585 }
586 
587 // Reads an expression and/or the special directive "(NOLOAD)" for an
588 // output section definition.
589 //
590 // An output section name can be followed by an address expression
591 // and/or by "(NOLOAD)". This grammar is not LL(1) because "(" can be
592 // interpreted as either the beginning of some expression or "(NOLOAD)".
593 //
594 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html
595 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html
596 void ScriptParser::readSectionAddressType(OutputSection *Cmd) {
597   if (consume("(")) {
598     if (consume("NOLOAD")) {
599       expect(")");
600       Cmd->Noload = true;
601       return;
602     }
603     Cmd->AddrExpr = readExpr();
604     expect(")");
605   } else {
606     Cmd->AddrExpr = readExpr();
607   }
608 
609   if (consume("(")) {
610     expect("NOLOAD");
611     expect(")");
612     Cmd->Noload = true;
613   }
614 }
615 
616 OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) {
617   OutputSection *Cmd =
618       Script->createOutputSection(OutSec, getCurrentLocation());
619 
620   if (peek() != ":")
621     readSectionAddressType(Cmd);
622   expect(":");
623 
624   if (consume("AT"))
625     Cmd->LMAExpr = readParenExpr();
626   if (consume("ALIGN"))
627     Cmd->AlignExpr = readParenExpr();
628   if (consume("SUBALIGN"))
629     Cmd->SubalignExpr = readParenExpr();
630 
631   // Parse constraints.
632   if (consume("ONLY_IF_RO"))
633     Cmd->Constraint = ConstraintKind::ReadOnly;
634   if (consume("ONLY_IF_RW"))
635     Cmd->Constraint = ConstraintKind::ReadWrite;
636   expect("{");
637 
638   while (!ErrorCount && !consume("}")) {
639     StringRef Tok = next();
640     if (Tok == ";") {
641       // Empty commands are allowed. Do nothing here.
642     } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) {
643       Cmd->Commands.push_back(Assign);
644     } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) {
645       Cmd->Commands.push_back(Data);
646     } else if (Tok == "ASSERT") {
647       Cmd->Commands.push_back(readAssert());
648       expect(";");
649     } else if (Tok == "CONSTRUCTORS") {
650       // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors
651       // by name. This is for very old file formats such as ECOFF/XCOFF.
652       // For ELF, we should ignore.
653     } else if (Tok == "FILL") {
654       Cmd->Filler = readFill();
655     } else if (Tok == "SORT") {
656       readSort();
657     } else if (peek() == "(") {
658       Cmd->Commands.push_back(readInputSectionDescription(Tok));
659     } else {
660       setError("unknown command " + Tok);
661     }
662   }
663 
664   if (consume(">"))
665     Cmd->MemoryRegionName = next();
666   else if (peek().startswith(">"))
667     Cmd->MemoryRegionName = next().drop_front();
668 
669   Cmd->Phdrs = readOutputSectionPhdrs();
670 
671   if (consume("="))
672     Cmd->Filler = parseFill(next());
673   else if (peek().startswith("="))
674     Cmd->Filler = parseFill(next().drop_front());
675 
676   // Consume optional comma following output section command.
677   consume(",");
678 
679   return Cmd;
680 }
681 
682 // Parses a given string as a octal/decimal/hexadecimal number and
683 // returns it as a big-endian number. Used for `=<fillexp>`.
684 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html
685 //
686 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary
687 // size, while ld.gold always handles it as a 32-bit big-endian number.
688 // We are compatible with ld.gold because it's easier to implement.
689 uint32_t ScriptParser::parseFill(StringRef Tok) {
690   uint32_t V = 0;
691   if (!to_integer(Tok, V))
692     setError("invalid filler expression: " + Tok);
693 
694   uint32_t Buf;
695   write32be(&Buf, V);
696   return Buf;
697 }
698 
699 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) {
700   expect("(");
701   SymbolAssignment *Cmd = readAssignment(next());
702   Cmd->Provide = Provide;
703   Cmd->Hidden = Hidden;
704   expect(")");
705   expect(";");
706   return Cmd;
707 }
708 
709 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) {
710   SymbolAssignment *Cmd = nullptr;
711   if (peek() == "=" || peek() == "+=") {
712     Cmd = readAssignment(Tok);
713     expect(";");
714   } else if (Tok == "PROVIDE") {
715     Cmd = readProvideHidden(true, false);
716   } else if (Tok == "HIDDEN") {
717     Cmd = readProvideHidden(false, true);
718   } else if (Tok == "PROVIDE_HIDDEN") {
719     Cmd = readProvideHidden(true, true);
720   }
721   return Cmd;
722 }
723 
724 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) {
725   StringRef Op = next();
726   assert(Op == "=" || Op == "+=");
727   Expr E = readExpr();
728   if (Op == "+=") {
729     std::string Loc = getCurrentLocation();
730     E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); };
731   }
732   return make<SymbolAssignment>(Name, E, getCurrentLocation());
733 }
734 
735 // This is an operator-precedence parser to parse a linker
736 // script expression.
737 Expr ScriptParser::readExpr() {
738   // Our lexer is context-aware. Set the in-expression bit so that
739   // they apply different tokenization rules.
740   bool Orig = InExpr;
741   InExpr = true;
742   Expr E = readExpr1(readPrimary(), 0);
743   InExpr = Orig;
744   return E;
745 }
746 
747 static Expr combine(StringRef Op, Expr L, Expr R) {
748   if (Op == "+")
749     return [=] { return add(L(), R()); };
750   if (Op == "-")
751     return [=] { return sub(L(), R()); };
752   if (Op == "*")
753     return [=] { return mul(L(), R()); };
754   if (Op == "/")
755     return [=] { return div(L(), R()); };
756   if (Op == "<<")
757     return [=] { return L().getValue() << R().getValue(); };
758   if (Op == ">>")
759     return [=] { return L().getValue() >> R().getValue(); };
760   if (Op == "<")
761     return [=] { return L().getValue() < R().getValue(); };
762   if (Op == ">")
763     return [=] { return L().getValue() > R().getValue(); };
764   if (Op == ">=")
765     return [=] { return L().getValue() >= R().getValue(); };
766   if (Op == "<=")
767     return [=] { return L().getValue() <= R().getValue(); };
768   if (Op == "==")
769     return [=] { return L().getValue() == R().getValue(); };
770   if (Op == "!=")
771     return [=] { return L().getValue() != R().getValue(); };
772   if (Op == "&")
773     return [=] { return bitAnd(L(), R()); };
774   if (Op == "|")
775     return [=] { return bitOr(L(), R()); };
776   llvm_unreachable("invalid operator");
777 }
778 
779 // This is a part of the operator-precedence parser. This function
780 // assumes that the remaining token stream starts with an operator.
781 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) {
782   while (!atEOF() && !ErrorCount) {
783     // Read an operator and an expression.
784     if (consume("?"))
785       return readTernary(Lhs);
786     StringRef Op1 = peek();
787     if (precedence(Op1) < MinPrec)
788       break;
789     skip();
790     Expr Rhs = readPrimary();
791 
792     // Evaluate the remaining part of the expression first if the
793     // next operator has greater precedence than the previous one.
794     // For example, if we have read "+" and "3", and if the next
795     // operator is "*", then we'll evaluate 3 * ... part first.
796     while (!atEOF()) {
797       StringRef Op2 = peek();
798       if (precedence(Op2) <= precedence(Op1))
799         break;
800       Rhs = readExpr1(Rhs, precedence(Op2));
801     }
802 
803     Lhs = combine(Op1, Lhs, Rhs);
804   }
805   return Lhs;
806 }
807 
808 Expr ScriptParser::getPageSize() {
809   std::string Location = getCurrentLocation();
810   return [=]() -> uint64_t {
811     if (Target)
812       return Target->PageSize;
813     error(Location + ": unable to calculate page size");
814     return 4096; // Return a dummy value.
815   };
816 }
817 
818 Expr ScriptParser::readConstant() {
819   StringRef S = readParenLiteral();
820   if (S == "COMMONPAGESIZE")
821     return getPageSize();
822   if (S == "MAXPAGESIZE")
823     return [] { return Config->MaxPageSize; };
824   setError("unknown constant: " + S);
825   return {};
826 }
827 
828 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with
829 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may
830 // have "K" (Ki) or "M" (Mi) suffixes.
831 static Optional<uint64_t> parseInt(StringRef Tok) {
832   // Negative number
833   if (Tok.startswith("-")) {
834     if (Optional<uint64_t> Val = parseInt(Tok.substr(1)))
835       return -*Val;
836     return None;
837   }
838 
839   // Hexadecimal
840   uint64_t Val;
841   if (Tok.startswith_lower("0x") && to_integer(Tok.substr(2), Val, 16))
842     return Val;
843   if (Tok.endswith_lower("H") && to_integer(Tok.drop_back(), Val, 16))
844     return Val;
845 
846   // Decimal
847   if (Tok.endswith_lower("K")) {
848     if (!to_integer(Tok.drop_back(), Val, 10))
849       return None;
850     return Val * 1024;
851   }
852   if (Tok.endswith_lower("M")) {
853     if (!to_integer(Tok.drop_back(), Val, 10))
854       return None;
855     return Val * 1024 * 1024;
856   }
857   if (!to_integer(Tok, Val, 10))
858     return None;
859   return Val;
860 }
861 
862 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) {
863   int Size = StringSwitch<int>(Tok)
864                  .Case("BYTE", 1)
865                  .Case("SHORT", 2)
866                  .Case("LONG", 4)
867                  .Case("QUAD", 8)
868                  .Default(-1);
869   if (Size == -1)
870     return nullptr;
871 
872   return make<BytesDataCommand>(readParenExpr(), Size);
873 }
874 
875 StringRef ScriptParser::readParenLiteral() {
876   expect("(");
877   StringRef Tok = next();
878   expect(")");
879   return Tok;
880 }
881 
882 OutputSection *ScriptParser::checkSection(OutputSection *Cmd,
883                                           StringRef Location) {
884   if (Cmd->Location.empty() && Script->ErrorOnMissingSection)
885     error(Location + ": undefined section " + Cmd->Name);
886   return Cmd;
887 }
888 
889 Expr ScriptParser::readPrimary() {
890   if (peek() == "(")
891     return readParenExpr();
892 
893   if (consume("~")) {
894     Expr E = readPrimary();
895     return [=] { return ~E().getValue(); };
896   }
897   if (consume("!")) {
898     Expr E = readPrimary();
899     return [=] { return !E().getValue(); };
900   }
901   if (consume("-")) {
902     Expr E = readPrimary();
903     return [=] { return -E().getValue(); };
904   }
905 
906   StringRef Tok = next();
907   std::string Location = getCurrentLocation();
908 
909   // Built-in functions are parsed here.
910   // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html.
911   if (Tok == "ABSOLUTE") {
912     Expr Inner = readParenExpr();
913     return [=] {
914       ExprValue I = Inner();
915       I.ForceAbsolute = true;
916       return I;
917     };
918   }
919   if (Tok == "ADDR") {
920     StringRef Name = readParenLiteral();
921     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
922     return [=]() -> ExprValue {
923       return {checkSection(Cmd, Location), 0, Location};
924     };
925   }
926   if (Tok == "ALIGN") {
927     expect("(");
928     Expr E = readExpr();
929     if (consume(")"))
930       return [=] {
931         return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue()));
932       };
933     expect(",");
934     Expr E2 = readExpr();
935     expect(")");
936     return [=] {
937       ExprValue V = E();
938       V.Alignment = std::max((uint64_t)1, E2().getValue());
939       return V;
940     };
941   }
942   if (Tok == "ALIGNOF") {
943     StringRef Name = readParenLiteral();
944     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
945     return [=] { return checkSection(Cmd, Location)->Alignment; };
946   }
947   if (Tok == "ASSERT")
948     return readAssertExpr();
949   if (Tok == "CONSTANT")
950     return readConstant();
951   if (Tok == "DATA_SEGMENT_ALIGN") {
952     expect("(");
953     Expr E = readExpr();
954     expect(",");
955     readExpr();
956     expect(")");
957     return [=] {
958       return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue()));
959     };
960   }
961   if (Tok == "DATA_SEGMENT_END") {
962     expect("(");
963     expect(".");
964     expect(")");
965     return [] { return Script->getDot(); };
966   }
967   if (Tok == "DATA_SEGMENT_RELRO_END") {
968     // GNU linkers implements more complicated logic to handle
969     // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and
970     // just align to the next page boundary for simplicity.
971     expect("(");
972     readExpr();
973     expect(",");
974     readExpr();
975     expect(")");
976     Expr E = getPageSize();
977     return [=] { return alignTo(Script->getDot(), E().getValue()); };
978   }
979   if (Tok == "DEFINED") {
980     StringRef Name = readParenLiteral();
981     return [=] { return Script->isDefined(Name) ? 1 : 0; };
982   }
983   if (Tok == "LENGTH") {
984     StringRef Name = readParenLiteral();
985     if (Script->Opt.MemoryRegions.count(Name) == 0)
986       setError("memory region not defined: " + Name);
987     return [=] { return Script->Opt.MemoryRegions[Name].Length; };
988   }
989   if (Tok == "LOADADDR") {
990     StringRef Name = readParenLiteral();
991     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
992     return [=] { return checkSection(Cmd, Location)->getLMA(); };
993   }
994   if (Tok == "ORIGIN") {
995     StringRef Name = readParenLiteral();
996     if (Script->Opt.MemoryRegions.count(Name) == 0)
997       setError("memory region not defined: " + Name);
998     return [=] { return Script->Opt.MemoryRegions[Name].Origin; };
999   }
1000   if (Tok == "SEGMENT_START") {
1001     expect("(");
1002     skip();
1003     expect(",");
1004     Expr E = readExpr();
1005     expect(")");
1006     return [=] { return E(); };
1007   }
1008   if (Tok == "SIZEOF") {
1009     StringRef Name = readParenLiteral();
1010     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
1011     // Linker script does not create an output section if its content is empty.
1012     // We want to allow SIZEOF(.foo) where .foo is a section which happened to
1013     // be empty.
1014     return [=] { return Cmd->Size; };
1015   }
1016   if (Tok == "SIZEOF_HEADERS")
1017     return [=] { return elf::getHeaderSize(); };
1018 
1019   // Tok is the dot.
1020   if (Tok == ".")
1021     return [=] { return Script->getSymbolValue(Location, Tok); };
1022 
1023   // Tok is a literal number.
1024   if (Optional<uint64_t> Val = parseInt(Tok))
1025     return [=] { return *Val; };
1026 
1027   // Tok is a symbol name.
1028   if (!isValidCIdentifier(Tok))
1029     setError("malformed number: " + Tok);
1030   Script->Opt.ReferencedSymbols.push_back(Tok);
1031   return [=] { return Script->getSymbolValue(Location, Tok); };
1032 }
1033 
1034 Expr ScriptParser::readTernary(Expr Cond) {
1035   Expr L = readExpr();
1036   expect(":");
1037   Expr R = readExpr();
1038   return [=] { return Cond().getValue() ? L() : R(); };
1039 }
1040 
1041 Expr ScriptParser::readParenExpr() {
1042   expect("(");
1043   Expr E = readExpr();
1044   expect(")");
1045   return E;
1046 }
1047 
1048 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() {
1049   std::vector<StringRef> Phdrs;
1050   while (!ErrorCount && peek().startswith(":")) {
1051     StringRef Tok = next();
1052     Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1));
1053   }
1054   return Phdrs;
1055 }
1056 
1057 // Read a program header type name. The next token must be a
1058 // name of a program header type or a constant (e.g. "0x3").
1059 unsigned ScriptParser::readPhdrType() {
1060   StringRef Tok = next();
1061   if (Optional<uint64_t> Val = parseInt(Tok))
1062     return *Val;
1063 
1064   unsigned Ret = StringSwitch<unsigned>(Tok)
1065                      .Case("PT_NULL", PT_NULL)
1066                      .Case("PT_LOAD", PT_LOAD)
1067                      .Case("PT_DYNAMIC", PT_DYNAMIC)
1068                      .Case("PT_INTERP", PT_INTERP)
1069                      .Case("PT_NOTE", PT_NOTE)
1070                      .Case("PT_SHLIB", PT_SHLIB)
1071                      .Case("PT_PHDR", PT_PHDR)
1072                      .Case("PT_TLS", PT_TLS)
1073                      .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME)
1074                      .Case("PT_GNU_STACK", PT_GNU_STACK)
1075                      .Case("PT_GNU_RELRO", PT_GNU_RELRO)
1076                      .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE)
1077                      .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED)
1078                      .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA)
1079                      .Default(-1);
1080 
1081   if (Ret == (unsigned)-1) {
1082     setError("invalid program header type: " + Tok);
1083     return PT_NULL;
1084   }
1085   return Ret;
1086 }
1087 
1088 // Reads an anonymous version declaration.
1089 void ScriptParser::readAnonymousDeclaration() {
1090   std::vector<SymbolVersion> Locals;
1091   std::vector<SymbolVersion> Globals;
1092   std::tie(Locals, Globals) = readSymbols();
1093 
1094   for (SymbolVersion V : Locals) {
1095     if (V.Name == "*")
1096       Config->DefaultSymbolVersion = VER_NDX_LOCAL;
1097     else
1098       Config->VersionScriptLocals.push_back(V);
1099   }
1100 
1101   for (SymbolVersion V : Globals)
1102     Config->VersionScriptGlobals.push_back(V);
1103 
1104   expect(";");
1105 }
1106 
1107 // Reads a non-anonymous version definition,
1108 // e.g. "VerStr { global: foo; bar; local: *; };".
1109 void ScriptParser::readVersionDeclaration(StringRef VerStr) {
1110   // Read a symbol list.
1111   std::vector<SymbolVersion> Locals;
1112   std::vector<SymbolVersion> Globals;
1113   std::tie(Locals, Globals) = readSymbols();
1114 
1115   for (SymbolVersion V : Locals) {
1116     if (V.Name == "*")
1117       Config->DefaultSymbolVersion = VER_NDX_LOCAL;
1118     else
1119       Config->VersionScriptLocals.push_back(V);
1120   }
1121 
1122   // Create a new version definition and add that to the global symbols.
1123   VersionDefinition Ver;
1124   Ver.Name = VerStr;
1125   Ver.Globals = Globals;
1126 
1127   // User-defined version number starts from 2 because 0 and 1 are
1128   // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively.
1129   Ver.Id = Config->VersionDefinitions.size() + 2;
1130   Config->VersionDefinitions.push_back(Ver);
1131 
1132   // Each version may have a parent version. For example, "Ver2"
1133   // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1"
1134   // as a parent. This version hierarchy is, probably against your
1135   // instinct, purely for hint; the runtime doesn't care about it
1136   // at all. In LLD, we simply ignore it.
1137   if (peek() != ";")
1138     skip();
1139   expect(";");
1140 }
1141 
1142 static bool hasWildcard(StringRef S) {
1143   return S.find_first_of("?*[") != StringRef::npos;
1144 }
1145 
1146 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };".
1147 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
1148 ScriptParser::readSymbols() {
1149   std::vector<SymbolVersion> Locals;
1150   std::vector<SymbolVersion> Globals;
1151   std::vector<SymbolVersion> *V = &Globals;
1152 
1153   while (!ErrorCount) {
1154     if (consume("}"))
1155       break;
1156     if (consumeLabel("local")) {
1157       V = &Locals;
1158       continue;
1159     }
1160     if (consumeLabel("global")) {
1161       V = &Globals;
1162       continue;
1163     }
1164 
1165     if (consume("extern")) {
1166       std::vector<SymbolVersion> Ext = readVersionExtern();
1167       V->insert(V->end(), Ext.begin(), Ext.end());
1168     } else {
1169       StringRef Tok = next();
1170       V->push_back({unquote(Tok), false, hasWildcard(Tok)});
1171     }
1172     expect(";");
1173   }
1174   return {Locals, Globals};
1175 }
1176 
1177 // Reads an "extern C++" directive, e.g.,
1178 // "extern "C++" { ns::*; "f(int, double)"; };"
1179 std::vector<SymbolVersion> ScriptParser::readVersionExtern() {
1180   StringRef Tok = next();
1181   bool IsCXX = Tok == "\"C++\"";
1182   if (!IsCXX && Tok != "\"C\"")
1183     setError("Unknown language");
1184   expect("{");
1185 
1186   std::vector<SymbolVersion> Ret;
1187   while (!ErrorCount && peek() != "}") {
1188     StringRef Tok = next();
1189     bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok);
1190     Ret.push_back({unquote(Tok), IsCXX, HasWildcard});
1191     expect(";");
1192   }
1193 
1194   expect("}");
1195   return Ret;
1196 }
1197 
1198 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2,
1199                                             StringRef S3) {
1200   if (!consume(S1) && !consume(S2) && !consume(S3)) {
1201     setError("expected one of: " + S1 + ", " + S2 + ", or " + S3);
1202     return 0;
1203   }
1204   expect("=");
1205   return readExpr()().getValue();
1206 }
1207 
1208 // Parse the MEMORY command as specified in:
1209 // https://sourceware.org/binutils/docs/ld/MEMORY.html
1210 //
1211 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... }
1212 void ScriptParser::readMemory() {
1213   expect("{");
1214   while (!ErrorCount && !consume("}")) {
1215     StringRef Name = next();
1216 
1217     uint32_t Flags = 0;
1218     uint32_t NegFlags = 0;
1219     if (consume("(")) {
1220       std::tie(Flags, NegFlags) = readMemoryAttributes();
1221       expect(")");
1222     }
1223     expect(":");
1224 
1225     uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o");
1226     expect(",");
1227     uint64_t Length = readMemoryAssignment("LENGTH", "len", "l");
1228 
1229     // Add the memory region to the region map (if it doesn't already exist).
1230     auto It = Script->Opt.MemoryRegions.find(Name);
1231     if (It != Script->Opt.MemoryRegions.end())
1232       setError("region '" + Name + "' already defined");
1233     else
1234       Script->Opt.MemoryRegions[Name] = {Name, Origin, Length, Flags, NegFlags};
1235   }
1236 }
1237 
1238 // This function parses the attributes used to match against section
1239 // flags when placing output sections in a memory region. These flags
1240 // are only used when an explicit memory region name is not used.
1241 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() {
1242   uint32_t Flags = 0;
1243   uint32_t NegFlags = 0;
1244   bool Invert = false;
1245 
1246   for (char C : next().lower()) {
1247     uint32_t Flag = 0;
1248     if (C == '!')
1249       Invert = !Invert;
1250     else if (C == 'w')
1251       Flag = SHF_WRITE;
1252     else if (C == 'x')
1253       Flag = SHF_EXECINSTR;
1254     else if (C == 'a')
1255       Flag = SHF_ALLOC;
1256     else if (C != 'r')
1257       setError("invalid memory region attribute");
1258 
1259     if (Invert)
1260       NegFlags |= Flag;
1261     else
1262       Flags |= Flag;
1263   }
1264   return {Flags, NegFlags};
1265 }
1266 
1267 void elf::readLinkerScript(MemoryBufferRef MB) {
1268   ScriptParser(MB).readLinkerScript();
1269 }
1270 
1271 void elf::readVersionScript(MemoryBufferRef MB) {
1272   ScriptParser(MB).readVersionScript();
1273 }
1274 
1275 void elf::readDynamicList(MemoryBufferRef MB) {
1276   ScriptParser(MB).readDynamicList();
1277 }
1278