1 //===- ScriptParser.cpp ---------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a recursive-descendent parser for linker scripts.
11 // Parsed results are stored to Config and Script global objects.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ScriptParser.h"
16 #include "Config.h"
17 #include "Driver.h"
18 #include "InputSection.h"
19 #include "LinkerScript.h"
20 #include "OutputSections.h"
21 #include "ScriptLexer.h"
22 #include "Symbols.h"
23 #include "Target.h"
24 #include "lld/Common/Memory.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/ADT/StringSet.h"
28 #include "llvm/ADT/StringSwitch.h"
29 #include "llvm/BinaryFormat/ELF.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/FileSystem.h"
33 #include "llvm/Support/Path.h"
34 #include <cassert>
35 #include <limits>
36 #include <vector>
37 
38 using namespace llvm;
39 using namespace llvm::ELF;
40 using namespace llvm::support::endian;
41 using namespace lld;
42 using namespace lld::elf;
43 
44 static bool isUnderSysroot(StringRef Path);
45 
46 namespace {
47 class ScriptParser final : ScriptLexer {
48 public:
49   ScriptParser(MemoryBufferRef MB)
50       : ScriptLexer(MB),
51         IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {}
52 
53   void readLinkerScript();
54   void readVersionScript();
55   void readDynamicList();
56   void readDefsym(StringRef Name);
57 
58 private:
59   void addFile(StringRef Path);
60 
61   void readAsNeeded();
62   void readEntry();
63   void readExtern();
64   void readGroup();
65   void readInclude();
66   void readMemory();
67   void readOutput();
68   void readOutputArch();
69   void readOutputFormat();
70   void readPhdrs();
71   void readRegionAlias();
72   void readSearchDir();
73   void readSections();
74   void readVersion();
75   void readVersionScriptCommand();
76 
77   SymbolAssignment *readAssignment(StringRef Name);
78   ByteCommand *readByteCommand(StringRef Tok);
79   uint32_t readFill();
80   uint32_t parseFill(StringRef Tok);
81   void readSectionAddressType(OutputSection *Cmd);
82   OutputSection *readOutputSectionDescription(StringRef OutSec);
83   std::vector<StringRef> readOutputSectionPhdrs();
84   InputSectionDescription *readInputSectionDescription(StringRef Tok);
85   StringMatcher readFilePatterns();
86   std::vector<SectionPattern> readInputSectionsList();
87   InputSectionDescription *readInputSectionRules(StringRef FilePattern);
88   unsigned readPhdrType();
89   SortSectionPolicy readSortKind();
90   SymbolAssignment *readProvideHidden(bool Provide, bool Hidden);
91   SymbolAssignment *readProvideOrAssignment(StringRef Tok);
92   void readSort();
93   AssertCommand *readAssert();
94   Expr readAssertExpr();
95   Expr readConstant();
96   Expr getPageSize();
97 
98   uint64_t readMemoryAssignment(StringRef, StringRef, StringRef);
99   std::pair<uint32_t, uint32_t> readMemoryAttributes();
100 
101   Expr combine(StringRef Op, Expr L, Expr R);
102   Expr readExpr();
103   Expr readExpr1(Expr Lhs, int MinPrec);
104   StringRef readParenLiteral();
105   Expr readPrimary();
106   Expr readTernary(Expr Cond);
107   Expr readParenExpr();
108 
109   // For parsing version script.
110   std::vector<SymbolVersion> readVersionExtern();
111   void readAnonymousDeclaration();
112   void readVersionDeclaration(StringRef VerStr);
113 
114   std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
115   readSymbols();
116 
117   // True if a script being read is in a subdirectory specified by -sysroot.
118   bool IsUnderSysroot;
119 
120   // A set to detect an INCLUDE() cycle.
121   StringSet<> Seen;
122 };
123 } // namespace
124 
125 static StringRef unquote(StringRef S) {
126   if (S.startswith("\""))
127     return S.substr(1, S.size() - 2);
128   return S;
129 }
130 
131 static bool isUnderSysroot(StringRef Path) {
132   if (Config->Sysroot == "")
133     return false;
134   for (; !Path.empty(); Path = sys::path::parent_path(Path))
135     if (sys::fs::equivalent(Config->Sysroot, Path))
136       return true;
137   return false;
138 }
139 
140 // Some operations only support one non absolute value. Move the
141 // absolute one to the right hand side for convenience.
142 static void moveAbsRight(ExprValue &A, ExprValue &B) {
143   if (A.Sec == nullptr || (A.ForceAbsolute && !B.isAbsolute()))
144     std::swap(A, B);
145   if (!B.isAbsolute())
146     error(A.Loc + ": at least one side of the expression must be absolute");
147 }
148 
149 static ExprValue add(ExprValue A, ExprValue B) {
150   moveAbsRight(A, B);
151   return {A.Sec, A.ForceAbsolute, A.getSectionOffset() + B.getValue(), A.Loc};
152 }
153 
154 static ExprValue sub(ExprValue A, ExprValue B) {
155   // The distance between two symbols in sections is absolute.
156   if (!A.isAbsolute() && !B.isAbsolute())
157     return A.getValue() - B.getValue();
158   return {A.Sec, false, A.getSectionOffset() - B.getValue(), A.Loc};
159 }
160 
161 static ExprValue bitAnd(ExprValue A, ExprValue B) {
162   moveAbsRight(A, B);
163   return {A.Sec, A.ForceAbsolute,
164           (A.getValue() & B.getValue()) - A.getSecAddr(), A.Loc};
165 }
166 
167 static ExprValue bitOr(ExprValue A, ExprValue B) {
168   moveAbsRight(A, B);
169   return {A.Sec, A.ForceAbsolute,
170           (A.getValue() | B.getValue()) - A.getSecAddr(), A.Loc};
171 }
172 
173 void ScriptParser::readDynamicList() {
174   Config->HasDynamicList = true;
175   expect("{");
176   std::vector<SymbolVersion> Locals;
177   std::vector<SymbolVersion> Globals;
178   std::tie(Locals, Globals) = readSymbols();
179   expect(";");
180 
181   if (!atEOF()) {
182     setError("EOF expected, but got " + next());
183     return;
184   }
185   if (!Locals.empty()) {
186     setError("\"local:\" scope not supported in --dynamic-list");
187     return;
188   }
189 
190   for (SymbolVersion V : Globals)
191     Config->DynamicList.push_back(V);
192 }
193 
194 void ScriptParser::readVersionScript() {
195   readVersionScriptCommand();
196   if (!atEOF())
197     setError("EOF expected, but got " + next());
198 }
199 
200 void ScriptParser::readVersionScriptCommand() {
201   if (consume("{")) {
202     readAnonymousDeclaration();
203     return;
204   }
205 
206   while (!atEOF() && !errorCount() && peek() != "}") {
207     StringRef VerStr = next();
208     if (VerStr == "{") {
209       setError("anonymous version definition is used in "
210                "combination with other version definitions");
211       return;
212     }
213     expect("{");
214     readVersionDeclaration(VerStr);
215   }
216 }
217 
218 void ScriptParser::readVersion() {
219   expect("{");
220   readVersionScriptCommand();
221   expect("}");
222 }
223 
224 void ScriptParser::readLinkerScript() {
225   while (!atEOF()) {
226     StringRef Tok = next();
227     if (Tok == ";")
228       continue;
229 
230     if (Tok == "ASSERT") {
231       Script->SectionCommands.push_back(readAssert());
232     } else if (Tok == "ENTRY") {
233       readEntry();
234     } else if (Tok == "EXTERN") {
235       readExtern();
236     } else if (Tok == "GROUP" || Tok == "INPUT") {
237       readGroup();
238     } else if (Tok == "INCLUDE") {
239       readInclude();
240     } else if (Tok == "MEMORY") {
241       readMemory();
242     } else if (Tok == "OUTPUT") {
243       readOutput();
244     } else if (Tok == "OUTPUT_ARCH") {
245       readOutputArch();
246     } else if (Tok == "OUTPUT_FORMAT") {
247       readOutputFormat();
248     } else if (Tok == "PHDRS") {
249       readPhdrs();
250     } else if (Tok == "REGION_ALIAS") {
251       readRegionAlias();
252     } else if (Tok == "SEARCH_DIR") {
253       readSearchDir();
254     } else if (Tok == "SECTIONS") {
255       readSections();
256     } else if (Tok == "VERSION") {
257       readVersion();
258     } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) {
259       Script->SectionCommands.push_back(Cmd);
260     } else {
261       setError("unknown directive: " + Tok);
262     }
263   }
264 }
265 
266 void ScriptParser::readDefsym(StringRef Name) {
267   Expr E = readExpr();
268   if (!atEOF())
269     setError("EOF expected, but got " + next());
270   SymbolAssignment *Cmd = make<SymbolAssignment>(Name, E, getCurrentLocation());
271   Script->SectionCommands.push_back(Cmd);
272 }
273 
274 void ScriptParser::addFile(StringRef S) {
275   if (IsUnderSysroot && S.startswith("/")) {
276     SmallString<128> PathData;
277     StringRef Path = (Config->Sysroot + S).toStringRef(PathData);
278     if (sys::fs::exists(Path)) {
279       Driver->addFile(Saver.save(Path), /*WithLOption=*/false);
280       return;
281     }
282   }
283 
284   if (S.startswith("/")) {
285     Driver->addFile(S, /*WithLOption=*/false);
286   } else if (S.startswith("=")) {
287     if (Config->Sysroot.empty())
288       Driver->addFile(S.substr(1), /*WithLOption=*/false);
289     else
290       Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)),
291                       /*WithLOption=*/false);
292   } else if (S.startswith("-l")) {
293     Driver->addLibrary(S.substr(2));
294   } else if (sys::fs::exists(S)) {
295     Driver->addFile(S, /*WithLOption=*/false);
296   } else {
297     if (Optional<std::string> Path = findFromSearchPaths(S))
298       Driver->addFile(Saver.save(*Path), /*WithLOption=*/true);
299     else
300       setError("unable to find " + S);
301   }
302 }
303 
304 void ScriptParser::readAsNeeded() {
305   expect("(");
306   bool Orig = Config->AsNeeded;
307   Config->AsNeeded = true;
308   while (!errorCount() && !consume(")"))
309     addFile(unquote(next()));
310   Config->AsNeeded = Orig;
311 }
312 
313 void ScriptParser::readEntry() {
314   // -e <symbol> takes predecence over ENTRY(<symbol>).
315   expect("(");
316   StringRef Tok = next();
317   if (Config->Entry.empty())
318     Config->Entry = Tok;
319   expect(")");
320 }
321 
322 void ScriptParser::readExtern() {
323   expect("(");
324   while (!errorCount() && !consume(")"))
325     Config->Undefined.push_back(next());
326 }
327 
328 void ScriptParser::readGroup() {
329   expect("(");
330   while (!errorCount() && !consume(")")) {
331     if (consume("AS_NEEDED"))
332       readAsNeeded();
333     else
334       addFile(unquote(next()));
335   }
336 }
337 
338 void ScriptParser::readInclude() {
339   StringRef Tok = unquote(next());
340 
341   if (!Seen.insert(Tok).second) {
342     setError("there is a cycle in linker script INCLUDEs");
343     return;
344   }
345 
346   if (Optional<std::string> Path = searchLinkerScript(Tok)) {
347     if (Optional<MemoryBufferRef> MB = readFile(*Path))
348       tokenize(*MB);
349     return;
350   }
351   setError("cannot find linker script " + Tok);
352 }
353 
354 void ScriptParser::readOutput() {
355   // -o <file> takes predecence over OUTPUT(<file>).
356   expect("(");
357   StringRef Tok = next();
358   if (Config->OutputFile.empty())
359     Config->OutputFile = unquote(Tok);
360   expect(")");
361 }
362 
363 void ScriptParser::readOutputArch() {
364   // OUTPUT_ARCH is ignored for now.
365   expect("(");
366   while (!errorCount() && !consume(")"))
367     skip();
368 }
369 
370 void ScriptParser::readOutputFormat() {
371   // Error checking only for now.
372   expect("(");
373   skip();
374   if (consume(")"))
375     return;
376   expect(",");
377   skip();
378   expect(",");
379   skip();
380   expect(")");
381 }
382 
383 void ScriptParser::readPhdrs() {
384   expect("{");
385 
386   while (!errorCount() && !consume("}")) {
387     PhdrsCommand Cmd;
388     Cmd.Name = next();
389     Cmd.Type = readPhdrType();
390 
391     while (!errorCount() && !consume(";")) {
392       if (consume("FILEHDR"))
393         Cmd.HasFilehdr = true;
394       else if (consume("PHDRS"))
395         Cmd.HasPhdrs = true;
396       else if (consume("AT"))
397         Cmd.LMAExpr = readParenExpr();
398       else if (consume("FLAGS"))
399         Cmd.Flags = readParenExpr()().getValue();
400       else
401         setError("unexpected header attribute: " + next());
402     }
403 
404     Script->PhdrsCommands.push_back(Cmd);
405   }
406 }
407 
408 void ScriptParser::readRegionAlias() {
409   expect("(");
410   StringRef Alias = unquote(next());
411   expect(",");
412   StringRef Name = next();
413   expect(")");
414 
415   if (Script->MemoryRegions.count(Alias))
416     setError("redefinition of memory region '" + Alias + "'");
417   if (!Script->MemoryRegions.count(Name))
418     setError("memory region '" + Name + "' is not defined");
419   Script->MemoryRegions.insert({Alias, Script->MemoryRegions[Name]});
420 }
421 
422 void ScriptParser::readSearchDir() {
423   expect("(");
424   StringRef Tok = next();
425   if (!Config->Nostdlib)
426     Config->SearchPaths.push_back(unquote(Tok));
427   expect(")");
428 }
429 
430 void ScriptParser::readSections() {
431   Script->HasSectionsCommand = true;
432 
433   // -no-rosegment is used to avoid placing read only non-executable sections in
434   // their own segment. We do the same if SECTIONS command is present in linker
435   // script. See comment for computeFlags().
436   Config->SingleRoRx = true;
437 
438   expect("{");
439   std::vector<BaseCommand *> V;
440   while (!errorCount() && !consume("}")) {
441     StringRef Tok = next();
442     BaseCommand *Cmd = readProvideOrAssignment(Tok);
443     if (!Cmd) {
444       if (Tok == "ASSERT")
445         Cmd = readAssert();
446       else
447         Cmd = readOutputSectionDescription(Tok);
448     }
449     V.push_back(Cmd);
450   }
451 
452   if (!atEOF() && consume("INSERT")) {
453     consume("AFTER");
454     std::vector<BaseCommand *> &Dest = Script->InsertAfterCommands[next()];
455     Dest.insert(Dest.end(), V.begin(), V.end());
456     return;
457   }
458 
459   Script->SectionCommands.insert(Script->SectionCommands.end(), V.begin(),
460                                  V.end());
461 }
462 
463 static int precedence(StringRef Op) {
464   return StringSwitch<int>(Op)
465       .Cases("*", "/", "%", 5)
466       .Cases("+", "-", 4)
467       .Cases("<<", ">>", 3)
468       .Cases("<", "<=", ">", ">=", "==", "!=", 2)
469       .Cases("&", "|", 1)
470       .Default(-1);
471 }
472 
473 StringMatcher ScriptParser::readFilePatterns() {
474   std::vector<StringRef> V;
475   while (!errorCount() && !consume(")"))
476     V.push_back(next());
477   return StringMatcher(V);
478 }
479 
480 SortSectionPolicy ScriptParser::readSortKind() {
481   if (consume("SORT") || consume("SORT_BY_NAME"))
482     return SortSectionPolicy::Name;
483   if (consume("SORT_BY_ALIGNMENT"))
484     return SortSectionPolicy::Alignment;
485   if (consume("SORT_BY_INIT_PRIORITY"))
486     return SortSectionPolicy::Priority;
487   if (consume("SORT_NONE"))
488     return SortSectionPolicy::None;
489   return SortSectionPolicy::Default;
490 }
491 
492 // Reads SECTIONS command contents in the following form:
493 //
494 // <contents> ::= <elem>*
495 // <elem>     ::= <exclude>? <glob-pattern>
496 // <exclude>  ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")"
497 //
498 // For example,
499 //
500 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz)
501 //
502 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o".
503 // The semantics of that is section .foo in any file, section .bar in
504 // any file but a.o, and section .baz in any file but b.o.
505 std::vector<SectionPattern> ScriptParser::readInputSectionsList() {
506   std::vector<SectionPattern> Ret;
507   while (!errorCount() && peek() != ")") {
508     StringMatcher ExcludeFilePat;
509     if (consume("EXCLUDE_FILE")) {
510       expect("(");
511       ExcludeFilePat = readFilePatterns();
512     }
513 
514     std::vector<StringRef> V;
515     while (!errorCount() && peek() != ")" && peek() != "EXCLUDE_FILE")
516       V.push_back(next());
517 
518     if (!V.empty())
519       Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)});
520     else
521       setError("section pattern is expected");
522   }
523   return Ret;
524 }
525 
526 // Reads contents of "SECTIONS" directive. That directive contains a
527 // list of glob patterns for input sections. The grammar is as follows.
528 //
529 // <patterns> ::= <section-list>
530 //              | <sort> "(" <section-list> ")"
531 //              | <sort> "(" <sort> "(" <section-list> ")" ")"
532 //
533 // <sort>     ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT"
534 //              | "SORT_BY_INIT_PRIORITY" | "SORT_NONE"
535 //
536 // <section-list> is parsed by readInputSectionsList().
537 InputSectionDescription *
538 ScriptParser::readInputSectionRules(StringRef FilePattern) {
539   auto *Cmd = make<InputSectionDescription>(FilePattern);
540   expect("(");
541 
542   while (!errorCount() && !consume(")")) {
543     SortSectionPolicy Outer = readSortKind();
544     SortSectionPolicy Inner = SortSectionPolicy::Default;
545     std::vector<SectionPattern> V;
546     if (Outer != SortSectionPolicy::Default) {
547       expect("(");
548       Inner = readSortKind();
549       if (Inner != SortSectionPolicy::Default) {
550         expect("(");
551         V = readInputSectionsList();
552         expect(")");
553       } else {
554         V = readInputSectionsList();
555       }
556       expect(")");
557     } else {
558       V = readInputSectionsList();
559     }
560 
561     for (SectionPattern &Pat : V) {
562       Pat.SortInner = Inner;
563       Pat.SortOuter = Outer;
564     }
565 
566     std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns));
567   }
568   return Cmd;
569 }
570 
571 InputSectionDescription *
572 ScriptParser::readInputSectionDescription(StringRef Tok) {
573   // Input section wildcard can be surrounded by KEEP.
574   // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep
575   if (Tok == "KEEP") {
576     expect("(");
577     StringRef FilePattern = next();
578     InputSectionDescription *Cmd = readInputSectionRules(FilePattern);
579     expect(")");
580     Script->KeptSections.push_back(Cmd);
581     return Cmd;
582   }
583   return readInputSectionRules(Tok);
584 }
585 
586 void ScriptParser::readSort() {
587   expect("(");
588   expect("CONSTRUCTORS");
589   expect(")");
590 }
591 
592 AssertCommand *ScriptParser::readAssert() {
593   return make<AssertCommand>(readAssertExpr());
594 }
595 
596 Expr ScriptParser::readAssertExpr() {
597   expect("(");
598   Expr E = readExpr();
599   expect(",");
600   StringRef Msg = unquote(next());
601   expect(")");
602 
603   return [=] {
604     if (!E().getValue())
605       error(Msg);
606     return Script->getDot();
607   };
608 }
609 
610 // Reads a FILL(expr) command. We handle the FILL command as an
611 // alias for =fillexp section attribute, which is different from
612 // what GNU linkers do.
613 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html
614 uint32_t ScriptParser::readFill() {
615   expect("(");
616   uint32_t V = parseFill(next());
617   expect(")");
618   return V;
619 }
620 
621 // Reads an expression and/or the special directive for an output
622 // section definition. Directive is one of following: "(NOLOAD)",
623 // "(COPY)", "(INFO)" or "(OVERLAY)".
624 //
625 // An output section name can be followed by an address expression
626 // and/or directive. This grammar is not LL(1) because "(" can be
627 // interpreted as either the beginning of some expression or beginning
628 // of directive.
629 //
630 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html
631 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html
632 void ScriptParser::readSectionAddressType(OutputSection *Cmd) {
633   if (consume("(")) {
634     if (consume("NOLOAD")) {
635       expect(")");
636       Cmd->Noload = true;
637       return;
638     }
639     if (consume("COPY") || consume("INFO") || consume("OVERLAY")) {
640       expect(")");
641       Cmd->NonAlloc = true;
642       return;
643     }
644     Cmd->AddrExpr = readExpr();
645     expect(")");
646   } else {
647     Cmd->AddrExpr = readExpr();
648   }
649 
650   if (consume("(")) {
651     expect("NOLOAD");
652     expect(")");
653     Cmd->Noload = true;
654   }
655 }
656 
657 static Expr checkAlignment(Expr E, std::string &Loc) {
658   return [=] {
659     uint64_t Alignment = std::max((uint64_t)1, E().getValue());
660     if (!isPowerOf2_64(Alignment)) {
661       error(Loc + ": alignment must be power of 2");
662       return (uint64_t)1; // Return a dummy value.
663     }
664     return Alignment;
665   };
666 }
667 
668 OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) {
669   OutputSection *Cmd =
670       Script->createOutputSection(OutSec, getCurrentLocation());
671 
672   size_t SymbolsReferenced = Script->ReferencedSymbols.size();
673 
674   if (peek() != ":")
675     readSectionAddressType(Cmd);
676   expect(":");
677 
678   std::string Location = getCurrentLocation();
679   if (consume("AT"))
680     Cmd->LMAExpr = readParenExpr();
681   if (consume("ALIGN"))
682     Cmd->AlignExpr = checkAlignment(readParenExpr(), Location);
683   if (consume("SUBALIGN"))
684     Cmd->SubalignExpr = checkAlignment(readParenExpr(), Location);
685 
686   // Parse constraints.
687   if (consume("ONLY_IF_RO"))
688     Cmd->Constraint = ConstraintKind::ReadOnly;
689   if (consume("ONLY_IF_RW"))
690     Cmd->Constraint = ConstraintKind::ReadWrite;
691   expect("{");
692 
693   while (!errorCount() && !consume("}")) {
694     StringRef Tok = next();
695     if (Tok == ";") {
696       // Empty commands are allowed. Do nothing here.
697     } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) {
698       Cmd->SectionCommands.push_back(Assign);
699     } else if (ByteCommand *Data = readByteCommand(Tok)) {
700       Cmd->SectionCommands.push_back(Data);
701     } else if (Tok == "ASSERT") {
702       Cmd->SectionCommands.push_back(readAssert());
703       expect(";");
704     } else if (Tok == "CONSTRUCTORS") {
705       // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors
706       // by name. This is for very old file formats such as ECOFF/XCOFF.
707       // For ELF, we should ignore.
708     } else if (Tok == "FILL") {
709       Cmd->Filler = readFill();
710     } else if (Tok == "SORT") {
711       readSort();
712     } else if (peek() == "(") {
713       Cmd->SectionCommands.push_back(readInputSectionDescription(Tok));
714     } else {
715       setError("unknown command " + Tok);
716     }
717   }
718 
719   if (consume(">"))
720     Cmd->MemoryRegionName = next();
721 
722   if (consume("AT")) {
723     expect(">");
724     Cmd->LMARegionName = next();
725   }
726 
727   if (Cmd->LMAExpr && !Cmd->LMARegionName.empty())
728     error("section can't have both LMA and a load region");
729 
730   Cmd->Phdrs = readOutputSectionPhdrs();
731 
732   if (consume("="))
733     Cmd->Filler = parseFill(next());
734   else if (peek().startswith("="))
735     Cmd->Filler = parseFill(next().drop_front());
736 
737   // Consume optional comma following output section command.
738   consume(",");
739 
740   if (Script->ReferencedSymbols.size() > SymbolsReferenced)
741     Cmd->ExpressionsUseSymbols = true;
742   return Cmd;
743 }
744 
745 // Parses a given string as a octal/decimal/hexadecimal number and
746 // returns it as a big-endian number. Used for `=<fillexp>`.
747 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html
748 //
749 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary
750 // size, while ld.gold always handles it as a 32-bit big-endian number.
751 // We are compatible with ld.gold because it's easier to implement.
752 uint32_t ScriptParser::parseFill(StringRef Tok) {
753   uint32_t V = 0;
754   if (!to_integer(Tok, V))
755     setError("invalid filler expression: " + Tok);
756 
757   uint32_t Buf;
758   write32be(&Buf, V);
759   return Buf;
760 }
761 
762 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) {
763   expect("(");
764   SymbolAssignment *Cmd = readAssignment(next());
765   Cmd->Provide = Provide;
766   Cmd->Hidden = Hidden;
767   expect(")");
768   expect(";");
769   return Cmd;
770 }
771 
772 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) {
773   SymbolAssignment *Cmd = nullptr;
774   if (peek() == "=" || peek() == "+=") {
775     Cmd = readAssignment(Tok);
776     expect(";");
777   } else if (Tok == "PROVIDE") {
778     Cmd = readProvideHidden(true, false);
779   } else if (Tok == "HIDDEN") {
780     Cmd = readProvideHidden(false, true);
781   } else if (Tok == "PROVIDE_HIDDEN") {
782     Cmd = readProvideHidden(true, true);
783   }
784   return Cmd;
785 }
786 
787 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) {
788   StringRef Op = next();
789   assert(Op == "=" || Op == "+=");
790   Expr E = readExpr();
791   if (Op == "+=") {
792     std::string Loc = getCurrentLocation();
793     E = [=] { return add(Script->getSymbolValue(Name, Loc), E()); };
794   }
795   return make<SymbolAssignment>(Name, E, getCurrentLocation());
796 }
797 
798 // This is an operator-precedence parser to parse a linker
799 // script expression.
800 Expr ScriptParser::readExpr() {
801   // Our lexer is context-aware. Set the in-expression bit so that
802   // they apply different tokenization rules.
803   bool Orig = InExpr;
804   InExpr = true;
805   Expr E = readExpr1(readPrimary(), 0);
806   InExpr = Orig;
807   return E;
808 }
809 
810 Expr ScriptParser::combine(StringRef Op, Expr L, Expr R) {
811   if (Op == "+")
812     return [=] { return add(L(), R()); };
813   if (Op == "-")
814     return [=] { return sub(L(), R()); };
815   if (Op == "*")
816     return [=] { return L().getValue() * R().getValue(); };
817   if (Op == "/") {
818     std::string Loc = getCurrentLocation();
819     return [=]() -> uint64_t {
820       if (uint64_t RV = R().getValue())
821         return L().getValue() / RV;
822       error(Loc + ": division by zero");
823       return 0;
824     };
825   }
826   if (Op == "%") {
827     std::string Loc = getCurrentLocation();
828     return [=]() -> uint64_t {
829       if (uint64_t RV = R().getValue())
830         return L().getValue() % RV;
831       error(Loc + ": modulo by zero");
832       return 0;
833     };
834   }
835   if (Op == "<<")
836     return [=] { return L().getValue() << R().getValue(); };
837   if (Op == ">>")
838     return [=] { return L().getValue() >> R().getValue(); };
839   if (Op == "<")
840     return [=] { return L().getValue() < R().getValue(); };
841   if (Op == ">")
842     return [=] { return L().getValue() > R().getValue(); };
843   if (Op == ">=")
844     return [=] { return L().getValue() >= R().getValue(); };
845   if (Op == "<=")
846     return [=] { return L().getValue() <= R().getValue(); };
847   if (Op == "==")
848     return [=] { return L().getValue() == R().getValue(); };
849   if (Op == "!=")
850     return [=] { return L().getValue() != R().getValue(); };
851   if (Op == "&")
852     return [=] { return bitAnd(L(), R()); };
853   if (Op == "|")
854     return [=] { return bitOr(L(), R()); };
855   llvm_unreachable("invalid operator");
856 }
857 
858 // This is a part of the operator-precedence parser. This function
859 // assumes that the remaining token stream starts with an operator.
860 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) {
861   while (!atEOF() && !errorCount()) {
862     // Read an operator and an expression.
863     if (consume("?"))
864       return readTernary(Lhs);
865     StringRef Op1 = peek();
866     if (precedence(Op1) < MinPrec)
867       break;
868     skip();
869     Expr Rhs = readPrimary();
870 
871     // Evaluate the remaining part of the expression first if the
872     // next operator has greater precedence than the previous one.
873     // For example, if we have read "+" and "3", and if the next
874     // operator is "*", then we'll evaluate 3 * ... part first.
875     while (!atEOF()) {
876       StringRef Op2 = peek();
877       if (precedence(Op2) <= precedence(Op1))
878         break;
879       Rhs = readExpr1(Rhs, precedence(Op2));
880     }
881 
882     Lhs = combine(Op1, Lhs, Rhs);
883   }
884   return Lhs;
885 }
886 
887 Expr ScriptParser::getPageSize() {
888   std::string Location = getCurrentLocation();
889   return [=]() -> uint64_t {
890     if (Target)
891       return Target->PageSize;
892     error(Location + ": unable to calculate page size");
893     return 4096; // Return a dummy value.
894   };
895 }
896 
897 Expr ScriptParser::readConstant() {
898   StringRef S = readParenLiteral();
899   if (S == "COMMONPAGESIZE")
900     return getPageSize();
901   if (S == "MAXPAGESIZE")
902     return [] { return Config->MaxPageSize; };
903   setError("unknown constant: " + S);
904   return [] { return 0; };
905 }
906 
907 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with
908 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may
909 // have "K" (Ki) or "M" (Mi) suffixes.
910 static Optional<uint64_t> parseInt(StringRef Tok) {
911   // Hexadecimal
912   uint64_t Val;
913   if (Tok.startswith_lower("0x")) {
914     if (!to_integer(Tok.substr(2), Val, 16))
915       return None;
916     return Val;
917   }
918   if (Tok.endswith_lower("H")) {
919     if (!to_integer(Tok.drop_back(), Val, 16))
920       return None;
921     return Val;
922   }
923 
924   // Decimal
925   if (Tok.endswith_lower("K")) {
926     if (!to_integer(Tok.drop_back(), Val, 10))
927       return None;
928     return Val * 1024;
929   }
930   if (Tok.endswith_lower("M")) {
931     if (!to_integer(Tok.drop_back(), Val, 10))
932       return None;
933     return Val * 1024 * 1024;
934   }
935   if (!to_integer(Tok, Val, 10))
936     return None;
937   return Val;
938 }
939 
940 ByteCommand *ScriptParser::readByteCommand(StringRef Tok) {
941   int Size = StringSwitch<int>(Tok)
942                  .Case("BYTE", 1)
943                  .Case("SHORT", 2)
944                  .Case("LONG", 4)
945                  .Case("QUAD", 8)
946                  .Default(-1);
947   if (Size == -1)
948     return nullptr;
949   return make<ByteCommand>(readParenExpr(), Size);
950 }
951 
952 StringRef ScriptParser::readParenLiteral() {
953   expect("(");
954   bool Orig = InExpr;
955   InExpr = false;
956   StringRef Tok = next();
957   InExpr = Orig;
958   expect(")");
959   return Tok;
960 }
961 
962 static void checkIfExists(OutputSection *Cmd, StringRef Location) {
963   if (Cmd->Location.empty() && Script->ErrorOnMissingSection)
964     error(Location + ": undefined section " + Cmd->Name);
965 }
966 
967 Expr ScriptParser::readPrimary() {
968   if (peek() == "(")
969     return readParenExpr();
970 
971   if (consume("~")) {
972     Expr E = readPrimary();
973     return [=] { return ~E().getValue(); };
974   }
975   if (consume("!")) {
976     Expr E = readPrimary();
977     return [=] { return !E().getValue(); };
978   }
979   if (consume("-")) {
980     Expr E = readPrimary();
981     return [=] { return -E().getValue(); };
982   }
983 
984   StringRef Tok = next();
985   std::string Location = getCurrentLocation();
986 
987   // Built-in functions are parsed here.
988   // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html.
989   if (Tok == "ABSOLUTE") {
990     Expr Inner = readParenExpr();
991     return [=] {
992       ExprValue I = Inner();
993       I.ForceAbsolute = true;
994       return I;
995     };
996   }
997   if (Tok == "ADDR") {
998     StringRef Name = readParenLiteral();
999     OutputSection *Sec = Script->getOrCreateOutputSection(Name);
1000     return [=]() -> ExprValue {
1001       checkIfExists(Sec, Location);
1002       return {Sec, false, 0, Location};
1003     };
1004   }
1005   if (Tok == "ALIGN") {
1006     expect("(");
1007     Expr E = readExpr();
1008     if (consume(")")) {
1009       E = checkAlignment(E, Location);
1010       return [=] { return alignTo(Script->getDot(), E().getValue()); };
1011     }
1012     expect(",");
1013     Expr E2 = checkAlignment(readExpr(), Location);
1014     expect(")");
1015     return [=] {
1016       ExprValue V = E();
1017       V.Alignment = E2().getValue();
1018       return V;
1019     };
1020   }
1021   if (Tok == "ALIGNOF") {
1022     StringRef Name = readParenLiteral();
1023     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
1024     return [=] {
1025       checkIfExists(Cmd, Location);
1026       return Cmd->Alignment;
1027     };
1028   }
1029   if (Tok == "ASSERT")
1030     return readAssertExpr();
1031   if (Tok == "CONSTANT")
1032     return readConstant();
1033   if (Tok == "DATA_SEGMENT_ALIGN") {
1034     expect("(");
1035     Expr E = readExpr();
1036     expect(",");
1037     readExpr();
1038     expect(")");
1039     return [=] {
1040       return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue()));
1041     };
1042   }
1043   if (Tok == "DATA_SEGMENT_END") {
1044     expect("(");
1045     expect(".");
1046     expect(")");
1047     return [] { return Script->getDot(); };
1048   }
1049   if (Tok == "DATA_SEGMENT_RELRO_END") {
1050     // GNU linkers implements more complicated logic to handle
1051     // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and
1052     // just align to the next page boundary for simplicity.
1053     expect("(");
1054     readExpr();
1055     expect(",");
1056     readExpr();
1057     expect(")");
1058     Expr E = getPageSize();
1059     return [=] { return alignTo(Script->getDot(), E().getValue()); };
1060   }
1061   if (Tok == "DEFINED") {
1062     StringRef Name = readParenLiteral();
1063     return [=] { return Symtab->find(Name) ? 1 : 0; };
1064   }
1065   if (Tok == "LENGTH") {
1066     StringRef Name = readParenLiteral();
1067     if (Script->MemoryRegions.count(Name) == 0) {
1068       setError("memory region not defined: " + Name);
1069       return [] { return 0; };
1070     }
1071     return [=] { return Script->MemoryRegions[Name]->Length; };
1072   }
1073   if (Tok == "LOADADDR") {
1074     StringRef Name = readParenLiteral();
1075     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
1076     return [=] {
1077       checkIfExists(Cmd, Location);
1078       return Cmd->getLMA();
1079     };
1080   }
1081   if (Tok == "ORIGIN") {
1082     StringRef Name = readParenLiteral();
1083     if (Script->MemoryRegions.count(Name) == 0) {
1084       setError("memory region not defined: " + Name);
1085       return [] { return 0; };
1086     }
1087     return [=] { return Script->MemoryRegions[Name]->Origin; };
1088   }
1089   if (Tok == "SEGMENT_START") {
1090     expect("(");
1091     skip();
1092     expect(",");
1093     Expr E = readExpr();
1094     expect(")");
1095     return [=] { return E(); };
1096   }
1097   if (Tok == "SIZEOF") {
1098     StringRef Name = readParenLiteral();
1099     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
1100     // Linker script does not create an output section if its content is empty.
1101     // We want to allow SIZEOF(.foo) where .foo is a section which happened to
1102     // be empty.
1103     return [=] { return Cmd->Size; };
1104   }
1105   if (Tok == "SIZEOF_HEADERS")
1106     return [=] { return elf::getHeaderSize(); };
1107 
1108   // Tok is the dot.
1109   if (Tok == ".")
1110     return [=] { return Script->getSymbolValue(Tok, Location); };
1111 
1112   // Tok is a literal number.
1113   if (Optional<uint64_t> Val = parseInt(Tok))
1114     return [=] { return *Val; };
1115 
1116   // Tok is a symbol name.
1117   if (!isValidCIdentifier(Tok))
1118     setError("malformed number: " + Tok);
1119   Script->ReferencedSymbols.push_back(Tok);
1120   return [=] { return Script->getSymbolValue(Tok, Location); };
1121 }
1122 
1123 Expr ScriptParser::readTernary(Expr Cond) {
1124   Expr L = readExpr();
1125   expect(":");
1126   Expr R = readExpr();
1127   return [=] { return Cond().getValue() ? L() : R(); };
1128 }
1129 
1130 Expr ScriptParser::readParenExpr() {
1131   expect("(");
1132   Expr E = readExpr();
1133   expect(")");
1134   return E;
1135 }
1136 
1137 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() {
1138   std::vector<StringRef> Phdrs;
1139   while (!errorCount() && peek().startswith(":")) {
1140     StringRef Tok = next();
1141     Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1));
1142   }
1143   return Phdrs;
1144 }
1145 
1146 // Read a program header type name. The next token must be a
1147 // name of a program header type or a constant (e.g. "0x3").
1148 unsigned ScriptParser::readPhdrType() {
1149   StringRef Tok = next();
1150   if (Optional<uint64_t> Val = parseInt(Tok))
1151     return *Val;
1152 
1153   unsigned Ret = StringSwitch<unsigned>(Tok)
1154                      .Case("PT_NULL", PT_NULL)
1155                      .Case("PT_LOAD", PT_LOAD)
1156                      .Case("PT_DYNAMIC", PT_DYNAMIC)
1157                      .Case("PT_INTERP", PT_INTERP)
1158                      .Case("PT_NOTE", PT_NOTE)
1159                      .Case("PT_SHLIB", PT_SHLIB)
1160                      .Case("PT_PHDR", PT_PHDR)
1161                      .Case("PT_TLS", PT_TLS)
1162                      .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME)
1163                      .Case("PT_GNU_STACK", PT_GNU_STACK)
1164                      .Case("PT_GNU_RELRO", PT_GNU_RELRO)
1165                      .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE)
1166                      .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED)
1167                      .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA)
1168                      .Default(-1);
1169 
1170   if (Ret == (unsigned)-1) {
1171     setError("invalid program header type: " + Tok);
1172     return PT_NULL;
1173   }
1174   return Ret;
1175 }
1176 
1177 // Reads an anonymous version declaration.
1178 void ScriptParser::readAnonymousDeclaration() {
1179   std::vector<SymbolVersion> Locals;
1180   std::vector<SymbolVersion> Globals;
1181   std::tie(Locals, Globals) = readSymbols();
1182 
1183   for (SymbolVersion V : Locals) {
1184     if (V.Name == "*")
1185       Config->DefaultSymbolVersion = VER_NDX_LOCAL;
1186     else
1187       Config->VersionScriptLocals.push_back(V);
1188   }
1189 
1190   for (SymbolVersion V : Globals)
1191     Config->VersionScriptGlobals.push_back(V);
1192 
1193   expect(";");
1194 }
1195 
1196 // Reads a non-anonymous version definition,
1197 // e.g. "VerStr { global: foo; bar; local: *; };".
1198 void ScriptParser::readVersionDeclaration(StringRef VerStr) {
1199   // Read a symbol list.
1200   std::vector<SymbolVersion> Locals;
1201   std::vector<SymbolVersion> Globals;
1202   std::tie(Locals, Globals) = readSymbols();
1203 
1204   for (SymbolVersion V : Locals) {
1205     if (V.Name == "*")
1206       Config->DefaultSymbolVersion = VER_NDX_LOCAL;
1207     else
1208       Config->VersionScriptLocals.push_back(V);
1209   }
1210 
1211   // Create a new version definition and add that to the global symbols.
1212   VersionDefinition Ver;
1213   Ver.Name = VerStr;
1214   Ver.Globals = Globals;
1215 
1216   // User-defined version number starts from 2 because 0 and 1 are
1217   // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively.
1218   Ver.Id = Config->VersionDefinitions.size() + 2;
1219   Config->VersionDefinitions.push_back(Ver);
1220 
1221   // Each version may have a parent version. For example, "Ver2"
1222   // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1"
1223   // as a parent. This version hierarchy is, probably against your
1224   // instinct, purely for hint; the runtime doesn't care about it
1225   // at all. In LLD, we simply ignore it.
1226   if (peek() != ";")
1227     skip();
1228   expect(";");
1229 }
1230 
1231 static bool hasWildcard(StringRef S) {
1232   return S.find_first_of("?*[") != StringRef::npos;
1233 }
1234 
1235 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };".
1236 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
1237 ScriptParser::readSymbols() {
1238   std::vector<SymbolVersion> Locals;
1239   std::vector<SymbolVersion> Globals;
1240   std::vector<SymbolVersion> *V = &Globals;
1241 
1242   while (!errorCount()) {
1243     if (consume("}"))
1244       break;
1245     if (consumeLabel("local")) {
1246       V = &Locals;
1247       continue;
1248     }
1249     if (consumeLabel("global")) {
1250       V = &Globals;
1251       continue;
1252     }
1253 
1254     if (consume("extern")) {
1255       std::vector<SymbolVersion> Ext = readVersionExtern();
1256       V->insert(V->end(), Ext.begin(), Ext.end());
1257     } else {
1258       StringRef Tok = next();
1259       V->push_back({unquote(Tok), false, hasWildcard(Tok)});
1260     }
1261     expect(";");
1262   }
1263   return {Locals, Globals};
1264 }
1265 
1266 // Reads an "extern C++" directive, e.g.,
1267 // "extern "C++" { ns::*; "f(int, double)"; };"
1268 //
1269 // The last semicolon is optional. E.g. this is OK:
1270 // "extern "C++" { ns::*; "f(int, double)" };"
1271 std::vector<SymbolVersion> ScriptParser::readVersionExtern() {
1272   StringRef Tok = next();
1273   bool IsCXX = Tok == "\"C++\"";
1274   if (!IsCXX && Tok != "\"C\"")
1275     setError("Unknown language");
1276   expect("{");
1277 
1278   std::vector<SymbolVersion> Ret;
1279   while (!errorCount() && peek() != "}") {
1280     StringRef Tok = next();
1281     bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok);
1282     Ret.push_back({unquote(Tok), IsCXX, HasWildcard});
1283     if (consume("}"))
1284       return Ret;
1285     expect(";");
1286   }
1287 
1288   expect("}");
1289   return Ret;
1290 }
1291 
1292 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2,
1293                                             StringRef S3) {
1294   if (!consume(S1) && !consume(S2) && !consume(S3)) {
1295     setError("expected one of: " + S1 + ", " + S2 + ", or " + S3);
1296     return 0;
1297   }
1298   expect("=");
1299   return readExpr()().getValue();
1300 }
1301 
1302 // Parse the MEMORY command as specified in:
1303 // https://sourceware.org/binutils/docs/ld/MEMORY.html
1304 //
1305 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... }
1306 void ScriptParser::readMemory() {
1307   expect("{");
1308   while (!errorCount() && !consume("}")) {
1309     StringRef Name = next();
1310 
1311     uint32_t Flags = 0;
1312     uint32_t NegFlags = 0;
1313     if (consume("(")) {
1314       std::tie(Flags, NegFlags) = readMemoryAttributes();
1315       expect(")");
1316     }
1317     expect(":");
1318 
1319     uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o");
1320     expect(",");
1321     uint64_t Length = readMemoryAssignment("LENGTH", "len", "l");
1322 
1323     // Add the memory region to the region map.
1324     if (Script->MemoryRegions.count(Name))
1325       setError("region '" + Name + "' already defined");
1326     MemoryRegion *MR =
1327         make<MemoryRegion>(Name, Origin, Length, Flags, NegFlags);
1328     Script->MemoryRegions[Name] = MR;
1329   }
1330 }
1331 
1332 // This function parses the attributes used to match against section
1333 // flags when placing output sections in a memory region. These flags
1334 // are only used when an explicit memory region name is not used.
1335 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() {
1336   uint32_t Flags = 0;
1337   uint32_t NegFlags = 0;
1338   bool Invert = false;
1339 
1340   for (char C : next().lower()) {
1341     uint32_t Flag = 0;
1342     if (C == '!')
1343       Invert = !Invert;
1344     else if (C == 'w')
1345       Flag = SHF_WRITE;
1346     else if (C == 'x')
1347       Flag = SHF_EXECINSTR;
1348     else if (C == 'a')
1349       Flag = SHF_ALLOC;
1350     else if (C != 'r')
1351       setError("invalid memory region attribute");
1352 
1353     if (Invert)
1354       NegFlags |= Flag;
1355     else
1356       Flags |= Flag;
1357   }
1358   return {Flags, NegFlags};
1359 }
1360 
1361 void elf::readLinkerScript(MemoryBufferRef MB) {
1362   ScriptParser(MB).readLinkerScript();
1363 }
1364 
1365 void elf::readVersionScript(MemoryBufferRef MB) {
1366   ScriptParser(MB).readVersionScript();
1367 }
1368 
1369 void elf::readDynamicList(MemoryBufferRef MB) {
1370   ScriptParser(MB).readDynamicList();
1371 }
1372 
1373 void elf::readDefsym(StringRef Name, MemoryBufferRef MB) {
1374   ScriptParser(MB).readDefsym(Name);
1375 }
1376