1 //===- ScriptParser.cpp ---------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a recursive-descendent parser for linker scripts.
11 // Parsed results are stored to Config and Script global objects.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ScriptParser.h"
16 #include "Config.h"
17 #include "Driver.h"
18 #include "InputSection.h"
19 #include "LinkerScript.h"
20 #include "Memory.h"
21 #include "OutputSections.h"
22 #include "ScriptLexer.h"
23 #include "Symbols.h"
24 #include "Target.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/ADT/StringSwitch.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/FileSystem.h"
32 #include "llvm/Support/Path.h"
33 #include <cassert>
34 #include <limits>
35 #include <vector>
36 
37 using namespace llvm;
38 using namespace llvm::ELF;
39 using namespace llvm::support::endian;
40 using namespace lld;
41 using namespace lld::elf;
42 
43 static bool isUnderSysroot(StringRef Path);
44 
45 namespace {
46 class ScriptParser final : ScriptLexer {
47 public:
48   ScriptParser(MemoryBufferRef MB)
49       : ScriptLexer(MB),
50         IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {}
51 
52   void readLinkerScript();
53   void readVersionScript();
54   void readDynamicList();
55 
56 private:
57   void addFile(StringRef Path);
58   OutputSection *checkSection(OutputSection *Cmd, StringRef Loccation);
59 
60   void readAsNeeded();
61   void readEntry();
62   void readExtern();
63   void readGroup();
64   void readInclude();
65   void readMemory();
66   void readOutput();
67   void readOutputArch();
68   void readOutputFormat();
69   void readPhdrs();
70   void readSearchDir();
71   void readSections();
72   void readVersion();
73   void readVersionScriptCommand();
74 
75   SymbolAssignment *readAssignment(StringRef Name);
76   BytesDataCommand *readBytesDataCommand(StringRef Tok);
77   uint32_t readFill();
78   uint32_t parseFill(StringRef Tok);
79   void readSectionAddressType(OutputSection *Cmd);
80   OutputSection *readOutputSectionDescription(StringRef OutSec);
81   std::vector<StringRef> readOutputSectionPhdrs();
82   InputSectionDescription *readInputSectionDescription(StringRef Tok);
83   StringMatcher readFilePatterns();
84   std::vector<SectionPattern> readInputSectionsList();
85   InputSectionDescription *readInputSectionRules(StringRef FilePattern);
86   unsigned readPhdrType();
87   SortSectionPolicy readSortKind();
88   SymbolAssignment *readProvideHidden(bool Provide, bool Hidden);
89   SymbolAssignment *readProvideOrAssignment(StringRef Tok);
90   void readSort();
91   AssertCommand *readAssert();
92   Expr readAssertExpr();
93   Expr readConstant();
94   Expr getPageSize();
95 
96   uint64_t readMemoryAssignment(StringRef, StringRef, StringRef);
97   std::pair<uint32_t, uint32_t> readMemoryAttributes();
98 
99   Expr readExpr();
100   Expr readExpr1(Expr Lhs, int MinPrec);
101   StringRef readParenLiteral();
102   Expr readPrimary();
103   Expr readTernary(Expr Cond);
104   Expr readParenExpr();
105 
106   // For parsing version script.
107   std::vector<SymbolVersion> readVersionExtern();
108   void readAnonymousDeclaration();
109   void readVersionDeclaration(StringRef VerStr);
110 
111   std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
112   readSymbols();
113 
114   bool IsUnderSysroot;
115 };
116 } // namespace
117 
118 static StringRef unquote(StringRef S) {
119   if (S.startswith("\""))
120     return S.substr(1, S.size() - 2);
121   return S;
122 }
123 
124 static bool isUnderSysroot(StringRef Path) {
125   if (Config->Sysroot == "")
126     return false;
127   for (; !Path.empty(); Path = sys::path::parent_path(Path))
128     if (sys::fs::equivalent(Config->Sysroot, Path))
129       return true;
130   return false;
131 }
132 
133 // Some operations only support one non absolute value. Move the
134 // absolute one to the right hand side for convenience.
135 static void moveAbsRight(ExprValue &A, ExprValue &B) {
136   if (A.isAbsolute())
137     std::swap(A, B);
138   if (!B.isAbsolute())
139     error(A.Loc + ": at least one side of the expression must be absolute");
140 }
141 
142 static ExprValue add(ExprValue A, ExprValue B) {
143   moveAbsRight(A, B);
144   uint64_t Val = alignTo(A.Val, A.Alignment) + B.getValue();
145   return {A.Sec, A.ForceAbsolute, Val, A.Loc};
146 }
147 
148 static ExprValue sub(ExprValue A, ExprValue B) {
149   uint64_t Val = alignTo(A.Val, A.Alignment) - B.getValue();
150   return {A.Sec, Val, A.Loc};
151 }
152 
153 static ExprValue mul(ExprValue A, ExprValue B) {
154   return A.getValue() * B.getValue();
155 }
156 
157 static ExprValue div(ExprValue A, ExprValue B) {
158   if (uint64_t BV = B.getValue())
159     return A.getValue() / BV;
160   error("division by zero");
161   return 0;
162 }
163 
164 static ExprValue bitAnd(ExprValue A, ExprValue B) {
165   moveAbsRight(A, B);
166   return {A.Sec, A.ForceAbsolute,
167           (A.getValue() & B.getValue()) - A.getSecAddr(), A.Loc};
168 }
169 
170 static ExprValue bitOr(ExprValue A, ExprValue B) {
171   moveAbsRight(A, B);
172   return {A.Sec, A.ForceAbsolute,
173           (A.getValue() | B.getValue()) - A.getSecAddr(), A.Loc};
174 }
175 
176 void ScriptParser::readDynamicList() {
177   expect("{");
178   readAnonymousDeclaration();
179   if (!atEOF())
180     setError("EOF expected, but got " + next());
181 }
182 
183 void ScriptParser::readVersionScript() {
184   readVersionScriptCommand();
185   if (!atEOF())
186     setError("EOF expected, but got " + next());
187 }
188 
189 void ScriptParser::readVersionScriptCommand() {
190   if (consume("{")) {
191     readAnonymousDeclaration();
192     return;
193   }
194 
195   while (!atEOF() && !ErrorCount && peek() != "}") {
196     StringRef VerStr = next();
197     if (VerStr == "{") {
198       setError("anonymous version definition is used in "
199                "combination with other version definitions");
200       return;
201     }
202     expect("{");
203     readVersionDeclaration(VerStr);
204   }
205 }
206 
207 void ScriptParser::readVersion() {
208   expect("{");
209   readVersionScriptCommand();
210   expect("}");
211 }
212 
213 void ScriptParser::readLinkerScript() {
214   while (!atEOF()) {
215     StringRef Tok = next();
216     if (Tok == ";")
217       continue;
218 
219     if (Tok == "ASSERT") {
220       Script->Opt.Commands.push_back(readAssert());
221     } else if (Tok == "ENTRY") {
222       readEntry();
223     } else if (Tok == "EXTERN") {
224       readExtern();
225     } else if (Tok == "GROUP" || Tok == "INPUT") {
226       readGroup();
227     } else if (Tok == "INCLUDE") {
228       readInclude();
229     } else if (Tok == "MEMORY") {
230       readMemory();
231     } else if (Tok == "OUTPUT") {
232       readOutput();
233     } else if (Tok == "OUTPUT_ARCH") {
234       readOutputArch();
235     } else if (Tok == "OUTPUT_FORMAT") {
236       readOutputFormat();
237     } else if (Tok == "PHDRS") {
238       readPhdrs();
239     } else if (Tok == "SEARCH_DIR") {
240       readSearchDir();
241     } else if (Tok == "SECTIONS") {
242       readSections();
243     } else if (Tok == "VERSION") {
244       readVersion();
245     } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) {
246       Script->Opt.Commands.push_back(Cmd);
247     } else {
248       setError("unknown directive: " + Tok);
249     }
250   }
251 }
252 
253 void ScriptParser::addFile(StringRef S) {
254   if (IsUnderSysroot && S.startswith("/")) {
255     SmallString<128> PathData;
256     StringRef Path = (Config->Sysroot + S).toStringRef(PathData);
257     if (sys::fs::exists(Path)) {
258       Driver->addFile(Saver.save(Path), /*WithLOption=*/false);
259       return;
260     }
261   }
262 
263   if (S.startswith("/")) {
264     Driver->addFile(S, /*WithLOption=*/false);
265   } else if (S.startswith("=")) {
266     if (Config->Sysroot.empty())
267       Driver->addFile(S.substr(1), /*WithLOption=*/false);
268     else
269       Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)),
270                       /*WithLOption=*/false);
271   } else if (S.startswith("-l")) {
272     Driver->addLibrary(S.substr(2));
273   } else if (sys::fs::exists(S)) {
274     Driver->addFile(S, /*WithLOption=*/false);
275   } else {
276     if (Optional<std::string> Path = findFromSearchPaths(S))
277       Driver->addFile(Saver.save(*Path), /*WithLOption=*/true);
278     else
279       setError("unable to find " + S);
280   }
281 }
282 
283 void ScriptParser::readAsNeeded() {
284   expect("(");
285   bool Orig = Config->AsNeeded;
286   Config->AsNeeded = true;
287   while (!ErrorCount && !consume(")"))
288     addFile(unquote(next()));
289   Config->AsNeeded = Orig;
290 }
291 
292 void ScriptParser::readEntry() {
293   // -e <symbol> takes predecence over ENTRY(<symbol>).
294   expect("(");
295   StringRef Tok = next();
296   if (Config->Entry.empty())
297     Config->Entry = Tok;
298   expect(")");
299 }
300 
301 void ScriptParser::readExtern() {
302   expect("(");
303   while (!ErrorCount && !consume(")"))
304     Config->Undefined.push_back(next());
305 }
306 
307 void ScriptParser::readGroup() {
308   expect("(");
309   while (!ErrorCount && !consume(")")) {
310     if (consume("AS_NEEDED"))
311       readAsNeeded();
312     else
313       addFile(unquote(next()));
314   }
315 }
316 
317 void ScriptParser::readInclude() {
318   StringRef Tok = unquote(next());
319 
320   // https://sourceware.org/binutils/docs/ld/File-Commands.html:
321   // The file will be searched for in the current directory, and in any
322   // directory specified with the -L option.
323   if (sys::fs::exists(Tok)) {
324     if (Optional<MemoryBufferRef> MB = readFile(Tok))
325       tokenize(*MB);
326     return;
327   }
328   if (Optional<std::string> Path = findFromSearchPaths(Tok)) {
329     if (Optional<MemoryBufferRef> MB = readFile(*Path))
330       tokenize(*MB);
331     return;
332   }
333   setError("cannot open " + Tok);
334 }
335 
336 void ScriptParser::readOutput() {
337   // -o <file> takes predecence over OUTPUT(<file>).
338   expect("(");
339   StringRef Tok = next();
340   if (Config->OutputFile.empty())
341     Config->OutputFile = unquote(Tok);
342   expect(")");
343 }
344 
345 void ScriptParser::readOutputArch() {
346   // OUTPUT_ARCH is ignored for now.
347   expect("(");
348   while (!ErrorCount && !consume(")"))
349     skip();
350 }
351 
352 void ScriptParser::readOutputFormat() {
353   // Error checking only for now.
354   expect("(");
355   skip();
356   if (consume(")"))
357     return;
358   expect(",");
359   skip();
360   expect(",");
361   skip();
362   expect(")");
363 }
364 
365 void ScriptParser::readPhdrs() {
366   expect("{");
367   while (!ErrorCount && !consume("}")) {
368     Script->Opt.PhdrsCommands.push_back(
369         {next(), PT_NULL, false, false, UINT_MAX, nullptr});
370 
371     PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back();
372     PhdrCmd.Type = readPhdrType();
373 
374     while (!ErrorCount && !consume(";")) {
375       if (consume("FILEHDR"))
376         PhdrCmd.HasFilehdr = true;
377       else if (consume("PHDRS"))
378         PhdrCmd.HasPhdrs = true;
379       else if (consume("AT"))
380         PhdrCmd.LMAExpr = readParenExpr();
381       else if (consume("FLAGS"))
382         PhdrCmd.Flags = readParenExpr()().getValue();
383       else
384         setError("unexpected header attribute: " + next());
385     }
386   }
387 }
388 
389 void ScriptParser::readSearchDir() {
390   expect("(");
391   StringRef Tok = next();
392   if (!Config->Nostdlib)
393     Config->SearchPaths.push_back(unquote(Tok));
394   expect(")");
395 }
396 
397 void ScriptParser::readSections() {
398   Script->Opt.HasSections = true;
399 
400   // -no-rosegment is used to avoid placing read only non-executable sections in
401   // their own segment. We do the same if SECTIONS command is present in linker
402   // script. See comment for computeFlags().
403   Config->SingleRoRx = true;
404 
405   expect("{");
406   while (!ErrorCount && !consume("}")) {
407     StringRef Tok = next();
408     BaseCommand *Cmd = readProvideOrAssignment(Tok);
409     if (!Cmd) {
410       if (Tok == "ASSERT")
411         Cmd = readAssert();
412       else
413         Cmd = readOutputSectionDescription(Tok);
414     }
415     Script->Opt.Commands.push_back(Cmd);
416   }
417 }
418 
419 static int precedence(StringRef Op) {
420   return StringSwitch<int>(Op)
421       .Cases("*", "/", 5)
422       .Cases("+", "-", 4)
423       .Cases("<<", ">>", 3)
424       .Cases("<", "<=", ">", ">=", "==", "!=", 2)
425       .Cases("&", "|", 1)
426       .Default(-1);
427 }
428 
429 StringMatcher ScriptParser::readFilePatterns() {
430   std::vector<StringRef> V;
431   while (!ErrorCount && !consume(")"))
432     V.push_back(next());
433   return StringMatcher(V);
434 }
435 
436 SortSectionPolicy ScriptParser::readSortKind() {
437   if (consume("SORT") || consume("SORT_BY_NAME"))
438     return SortSectionPolicy::Name;
439   if (consume("SORT_BY_ALIGNMENT"))
440     return SortSectionPolicy::Alignment;
441   if (consume("SORT_BY_INIT_PRIORITY"))
442     return SortSectionPolicy::Priority;
443   if (consume("SORT_NONE"))
444     return SortSectionPolicy::None;
445   return SortSectionPolicy::Default;
446 }
447 
448 // Reads SECTIONS command contents in the following form:
449 //
450 // <contents> ::= <elem>*
451 // <elem>     ::= <exclude>? <glob-pattern>
452 // <exclude>  ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")"
453 //
454 // For example,
455 //
456 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz)
457 //
458 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o".
459 // The semantics of that is section .foo in any file, section .bar in
460 // any file but a.o, and section .baz in any file but b.o.
461 std::vector<SectionPattern> ScriptParser::readInputSectionsList() {
462   std::vector<SectionPattern> Ret;
463   while (!ErrorCount && peek() != ")") {
464     StringMatcher ExcludeFilePat;
465     if (consume("EXCLUDE_FILE")) {
466       expect("(");
467       ExcludeFilePat = readFilePatterns();
468     }
469 
470     std::vector<StringRef> V;
471     while (!ErrorCount && peek() != ")" && peek() != "EXCLUDE_FILE")
472       V.push_back(next());
473 
474     if (!V.empty())
475       Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)});
476     else
477       setError("section pattern is expected");
478   }
479   return Ret;
480 }
481 
482 // Reads contents of "SECTIONS" directive. That directive contains a
483 // list of glob patterns for input sections. The grammar is as follows.
484 //
485 // <patterns> ::= <section-list>
486 //              | <sort> "(" <section-list> ")"
487 //              | <sort> "(" <sort> "(" <section-list> ")" ")"
488 //
489 // <sort>     ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT"
490 //              | "SORT_BY_INIT_PRIORITY" | "SORT_NONE"
491 //
492 // <section-list> is parsed by readInputSectionsList().
493 InputSectionDescription *
494 ScriptParser::readInputSectionRules(StringRef FilePattern) {
495   auto *Cmd = make<InputSectionDescription>(FilePattern);
496   expect("(");
497 
498   while (!ErrorCount && !consume(")")) {
499     SortSectionPolicy Outer = readSortKind();
500     SortSectionPolicy Inner = SortSectionPolicy::Default;
501     std::vector<SectionPattern> V;
502     if (Outer != SortSectionPolicy::Default) {
503       expect("(");
504       Inner = readSortKind();
505       if (Inner != SortSectionPolicy::Default) {
506         expect("(");
507         V = readInputSectionsList();
508         expect(")");
509       } else {
510         V = readInputSectionsList();
511       }
512       expect(")");
513     } else {
514       V = readInputSectionsList();
515     }
516 
517     for (SectionPattern &Pat : V) {
518       Pat.SortInner = Inner;
519       Pat.SortOuter = Outer;
520     }
521 
522     std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns));
523   }
524   return Cmd;
525 }
526 
527 InputSectionDescription *
528 ScriptParser::readInputSectionDescription(StringRef Tok) {
529   // Input section wildcard can be surrounded by KEEP.
530   // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep
531   if (Tok == "KEEP") {
532     expect("(");
533     StringRef FilePattern = next();
534     InputSectionDescription *Cmd = readInputSectionRules(FilePattern);
535     expect(")");
536     Script->Opt.KeptSections.push_back(Cmd);
537     return Cmd;
538   }
539   return readInputSectionRules(Tok);
540 }
541 
542 void ScriptParser::readSort() {
543   expect("(");
544   expect("CONSTRUCTORS");
545   expect(")");
546 }
547 
548 AssertCommand *ScriptParser::readAssert() {
549   return make<AssertCommand>(readAssertExpr());
550 }
551 
552 Expr ScriptParser::readAssertExpr() {
553   expect("(");
554   Expr E = readExpr();
555   expect(",");
556   StringRef Msg = unquote(next());
557   expect(")");
558 
559   return [=] {
560     if (!E().getValue())
561       error(Msg);
562     return Script->getDot();
563   };
564 }
565 
566 // Reads a FILL(expr) command. We handle the FILL command as an
567 // alias for =fillexp section attribute, which is different from
568 // what GNU linkers do.
569 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html
570 uint32_t ScriptParser::readFill() {
571   expect("(");
572   uint32_t V = parseFill(next());
573   expect(")");
574   return V;
575 }
576 
577 // Reads an expression and/or the special directive "(NOLOAD)" for an
578 // output section definition.
579 //
580 // An output section name can be followed by an address expression
581 // and/or by "(NOLOAD)". This grammar is not LL(1) because "(" can be
582 // interpreted as either the beginning of some expression or "(NOLOAD)".
583 //
584 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html
585 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html
586 void ScriptParser::readSectionAddressType(OutputSection *Cmd) {
587   if (consume("(")) {
588     if (consume("NOLOAD")) {
589       expect(")");
590       Cmd->Noload = true;
591       return;
592     }
593     Cmd->AddrExpr = readExpr();
594     expect(")");
595   } else {
596     Cmd->AddrExpr = readExpr();
597   }
598 
599   if (consume("(")) {
600     expect("NOLOAD");
601     expect(")");
602     Cmd->Noload = true;
603   }
604 }
605 
606 OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) {
607   OutputSection *Cmd =
608       Script->createOutputSection(OutSec, getCurrentLocation());
609 
610   if (peek() != ":")
611     readSectionAddressType(Cmd);
612   expect(":");
613 
614   if (consume("AT"))
615     Cmd->LMAExpr = readParenExpr();
616   if (consume("ALIGN"))
617     Cmd->AlignExpr = readParenExpr();
618   if (consume("SUBALIGN"))
619     Cmd->SubalignExpr = readParenExpr();
620 
621   // Parse constraints.
622   if (consume("ONLY_IF_RO"))
623     Cmd->Constraint = ConstraintKind::ReadOnly;
624   if (consume("ONLY_IF_RW"))
625     Cmd->Constraint = ConstraintKind::ReadWrite;
626   expect("{");
627 
628   while (!ErrorCount && !consume("}")) {
629     StringRef Tok = next();
630     if (Tok == ";") {
631       // Empty commands are allowed. Do nothing here.
632     } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) {
633       Cmd->Commands.push_back(Assign);
634     } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) {
635       Cmd->Commands.push_back(Data);
636     } else if (Tok == "ASSERT") {
637       Cmd->Commands.push_back(readAssert());
638       expect(";");
639     } else if (Tok == "CONSTRUCTORS") {
640       // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors
641       // by name. This is for very old file formats such as ECOFF/XCOFF.
642       // For ELF, we should ignore.
643     } else if (Tok == "FILL") {
644       Cmd->Filler = readFill();
645     } else if (Tok == "SORT") {
646       readSort();
647     } else if (peek() == "(") {
648       Cmd->Commands.push_back(readInputSectionDescription(Tok));
649     } else {
650       setError("unknown command " + Tok);
651     }
652   }
653 
654   if (consume(">"))
655     Cmd->MemoryRegionName = next();
656   else if (peek().startswith(">"))
657     Cmd->MemoryRegionName = next().drop_front();
658 
659   Cmd->Phdrs = readOutputSectionPhdrs();
660 
661   if (consume("="))
662     Cmd->Filler = parseFill(next());
663   else if (peek().startswith("="))
664     Cmd->Filler = parseFill(next().drop_front());
665 
666   // Consume optional comma following output section command.
667   consume(",");
668 
669   return Cmd;
670 }
671 
672 // Parses a given string as a octal/decimal/hexadecimal number and
673 // returns it as a big-endian number. Used for `=<fillexp>`.
674 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html
675 //
676 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary
677 // size, while ld.gold always handles it as a 32-bit big-endian number.
678 // We are compatible with ld.gold because it's easier to implement.
679 uint32_t ScriptParser::parseFill(StringRef Tok) {
680   uint32_t V = 0;
681   if (!to_integer(Tok, V))
682     setError("invalid filler expression: " + Tok);
683 
684   uint32_t Buf;
685   write32be(&Buf, V);
686   return Buf;
687 }
688 
689 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) {
690   expect("(");
691   SymbolAssignment *Cmd = readAssignment(next());
692   Cmd->Provide = Provide;
693   Cmd->Hidden = Hidden;
694   expect(")");
695   expect(";");
696   return Cmd;
697 }
698 
699 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) {
700   SymbolAssignment *Cmd = nullptr;
701   if (peek() == "=" || peek() == "+=") {
702     Cmd = readAssignment(Tok);
703     expect(";");
704   } else if (Tok == "PROVIDE") {
705     Cmd = readProvideHidden(true, false);
706   } else if (Tok == "HIDDEN") {
707     Cmd = readProvideHidden(false, true);
708   } else if (Tok == "PROVIDE_HIDDEN") {
709     Cmd = readProvideHidden(true, true);
710   }
711   return Cmd;
712 }
713 
714 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) {
715   StringRef Op = next();
716   assert(Op == "=" || Op == "+=");
717   Expr E = readExpr();
718   if (Op == "+=") {
719     std::string Loc = getCurrentLocation();
720     E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); };
721   }
722   return make<SymbolAssignment>(Name, E, getCurrentLocation());
723 }
724 
725 // This is an operator-precedence parser to parse a linker
726 // script expression.
727 Expr ScriptParser::readExpr() {
728   // Our lexer is context-aware. Set the in-expression bit so that
729   // they apply different tokenization rules.
730   bool Orig = InExpr;
731   InExpr = true;
732   Expr E = readExpr1(readPrimary(), 0);
733   InExpr = Orig;
734   return E;
735 }
736 
737 static Expr combine(StringRef Op, Expr L, Expr R) {
738   if (Op == "+")
739     return [=] { return add(L(), R()); };
740   if (Op == "-")
741     return [=] { return sub(L(), R()); };
742   if (Op == "*")
743     return [=] { return mul(L(), R()); };
744   if (Op == "/")
745     return [=] { return div(L(), R()); };
746   if (Op == "<<")
747     return [=] { return L().getValue() << R().getValue(); };
748   if (Op == ">>")
749     return [=] { return L().getValue() >> R().getValue(); };
750   if (Op == "<")
751     return [=] { return L().getValue() < R().getValue(); };
752   if (Op == ">")
753     return [=] { return L().getValue() > R().getValue(); };
754   if (Op == ">=")
755     return [=] { return L().getValue() >= R().getValue(); };
756   if (Op == "<=")
757     return [=] { return L().getValue() <= R().getValue(); };
758   if (Op == "==")
759     return [=] { return L().getValue() == R().getValue(); };
760   if (Op == "!=")
761     return [=] { return L().getValue() != R().getValue(); };
762   if (Op == "&")
763     return [=] { return bitAnd(L(), R()); };
764   if (Op == "|")
765     return [=] { return bitOr(L(), R()); };
766   llvm_unreachable("invalid operator");
767 }
768 
769 // This is a part of the operator-precedence parser. This function
770 // assumes that the remaining token stream starts with an operator.
771 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) {
772   while (!atEOF() && !ErrorCount) {
773     // Read an operator and an expression.
774     if (consume("?"))
775       return readTernary(Lhs);
776     StringRef Op1 = peek();
777     if (precedence(Op1) < MinPrec)
778       break;
779     skip();
780     Expr Rhs = readPrimary();
781 
782     // Evaluate the remaining part of the expression first if the
783     // next operator has greater precedence than the previous one.
784     // For example, if we have read "+" and "3", and if the next
785     // operator is "*", then we'll evaluate 3 * ... part first.
786     while (!atEOF()) {
787       StringRef Op2 = peek();
788       if (precedence(Op2) <= precedence(Op1))
789         break;
790       Rhs = readExpr1(Rhs, precedence(Op2));
791     }
792 
793     Lhs = combine(Op1, Lhs, Rhs);
794   }
795   return Lhs;
796 }
797 
798 Expr ScriptParser::getPageSize() {
799   std::string Location = getCurrentLocation();
800   return [=]() -> uint64_t {
801     if (Target)
802       return Target->PageSize;
803     error(Location + ": unable to calculate page size");
804     return 4096; // Return a dummy value.
805   };
806 }
807 
808 Expr ScriptParser::readConstant() {
809   StringRef S = readParenLiteral();
810   if (S == "COMMONPAGESIZE")
811     return getPageSize();
812   if (S == "MAXPAGESIZE")
813     return [] { return Config->MaxPageSize; };
814   setError("unknown constant: " + S);
815   return {};
816 }
817 
818 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with
819 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may
820 // have "K" (Ki) or "M" (Mi) suffixes.
821 static Optional<uint64_t> parseInt(StringRef Tok) {
822   // Negative number
823   if (Tok.startswith("-")) {
824     if (Optional<uint64_t> Val = parseInt(Tok.substr(1)))
825       return -*Val;
826     return None;
827   }
828 
829   // Hexadecimal
830   uint64_t Val;
831   if (Tok.startswith_lower("0x") && to_integer(Tok.substr(2), Val, 16))
832     return Val;
833   if (Tok.endswith_lower("H") && to_integer(Tok.drop_back(), Val, 16))
834     return Val;
835 
836   // Decimal
837   if (Tok.endswith_lower("K")) {
838     if (!to_integer(Tok.drop_back(), Val, 10))
839       return None;
840     return Val * 1024;
841   }
842   if (Tok.endswith_lower("M")) {
843     if (!to_integer(Tok.drop_back(), Val, 10))
844       return None;
845     return Val * 1024 * 1024;
846   }
847   if (!to_integer(Tok, Val, 10))
848     return None;
849   return Val;
850 }
851 
852 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) {
853   int Size = StringSwitch<int>(Tok)
854                  .Case("BYTE", 1)
855                  .Case("SHORT", 2)
856                  .Case("LONG", 4)
857                  .Case("QUAD", 8)
858                  .Default(-1);
859   if (Size == -1)
860     return nullptr;
861 
862   return make<BytesDataCommand>(readParenExpr(), Size);
863 }
864 
865 StringRef ScriptParser::readParenLiteral() {
866   expect("(");
867   StringRef Tok = next();
868   expect(")");
869   return Tok;
870 }
871 
872 OutputSection *ScriptParser::checkSection(OutputSection *Cmd,
873                                           StringRef Location) {
874   if (Cmd->Location.empty() && Script->ErrorOnMissingSection)
875     error(Location + ": undefined section " + Cmd->Name);
876   return Cmd;
877 }
878 
879 Expr ScriptParser::readPrimary() {
880   if (peek() == "(")
881     return readParenExpr();
882 
883   if (consume("~")) {
884     Expr E = readPrimary();
885     return [=] { return ~E().getValue(); };
886   }
887   if (consume("!")) {
888     Expr E = readPrimary();
889     return [=] { return !E().getValue(); };
890   }
891   if (consume("-")) {
892     Expr E = readPrimary();
893     return [=] { return -E().getValue(); };
894   }
895 
896   StringRef Tok = next();
897   std::string Location = getCurrentLocation();
898 
899   // Built-in functions are parsed here.
900   // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html.
901   if (Tok == "ABSOLUTE") {
902     Expr Inner = readParenExpr();
903     return [=] {
904       ExprValue I = Inner();
905       I.ForceAbsolute = true;
906       return I;
907     };
908   }
909   if (Tok == "ADDR") {
910     StringRef Name = readParenLiteral();
911     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
912     return [=]() -> ExprValue {
913       return {checkSection(Cmd, Location), 0, Location};
914     };
915   }
916   if (Tok == "ALIGN") {
917     expect("(");
918     Expr E = readExpr();
919     if (consume(")"))
920       return [=] {
921         return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue()));
922       };
923     expect(",");
924     Expr E2 = readExpr();
925     expect(")");
926     return [=] {
927       ExprValue V = E();
928       V.Alignment = std::max((uint64_t)1, E2().getValue());
929       return V;
930     };
931   }
932   if (Tok == "ALIGNOF") {
933     StringRef Name = readParenLiteral();
934     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
935     return [=] { return checkSection(Cmd, Location)->Alignment; };
936   }
937   if (Tok == "ASSERT")
938     return readAssertExpr();
939   if (Tok == "CONSTANT")
940     return readConstant();
941   if (Tok == "DATA_SEGMENT_ALIGN") {
942     expect("(");
943     Expr E = readExpr();
944     expect(",");
945     readExpr();
946     expect(")");
947     return [=] {
948       return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue()));
949     };
950   }
951   if (Tok == "DATA_SEGMENT_END") {
952     expect("(");
953     expect(".");
954     expect(")");
955     return [] { return Script->getDot(); };
956   }
957   if (Tok == "DATA_SEGMENT_RELRO_END") {
958     // GNU linkers implements more complicated logic to handle
959     // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and
960     // just align to the next page boundary for simplicity.
961     expect("(");
962     readExpr();
963     expect(",");
964     readExpr();
965     expect(")");
966     Expr E = getPageSize();
967     return [=] { return alignTo(Script->getDot(), E().getValue()); };
968   }
969   if (Tok == "DEFINED") {
970     StringRef Name = readParenLiteral();
971     return [=] { return Script->isDefined(Name) ? 1 : 0; };
972   }
973   if (Tok == "LENGTH") {
974     StringRef Name = readParenLiteral();
975     if (Script->Opt.MemoryRegions.count(Name) == 0)
976       setError("memory region not defined: " + Name);
977     return [=] { return Script->Opt.MemoryRegions[Name].Length; };
978   }
979   if (Tok == "LOADADDR") {
980     StringRef Name = readParenLiteral();
981     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
982     return [=] { return checkSection(Cmd, Location)->getLMA(); };
983   }
984   if (Tok == "ORIGIN") {
985     StringRef Name = readParenLiteral();
986     if (Script->Opt.MemoryRegions.count(Name) == 0)
987       setError("memory region not defined: " + Name);
988     return [=] { return Script->Opt.MemoryRegions[Name].Origin; };
989   }
990   if (Tok == "SEGMENT_START") {
991     expect("(");
992     skip();
993     expect(",");
994     Expr E = readExpr();
995     expect(")");
996     return [=] { return E(); };
997   }
998   if (Tok == "SIZEOF") {
999     StringRef Name = readParenLiteral();
1000     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
1001     // Linker script does not create an output section if its content is empty.
1002     // We want to allow SIZEOF(.foo) where .foo is a section which happened to
1003     // be empty.
1004     return [=] { return Cmd->Size; };
1005   }
1006   if (Tok == "SIZEOF_HEADERS")
1007     return [=] { return elf::getHeaderSize(); };
1008 
1009   // Tok is the dot.
1010   if (Tok == ".")
1011     return [=] { return Script->getSymbolValue(Location, Tok); };
1012 
1013   // Tok is a literal number.
1014   if (Optional<uint64_t> Val = parseInt(Tok))
1015     return [=] { return *Val; };
1016 
1017   // Tok is a symbol name.
1018   if (!isValidCIdentifier(Tok))
1019     setError("malformed number: " + Tok);
1020   Script->Opt.ReferencedSymbols.push_back(Tok);
1021   return [=] { return Script->getSymbolValue(Location, Tok); };
1022 }
1023 
1024 Expr ScriptParser::readTernary(Expr Cond) {
1025   Expr L = readExpr();
1026   expect(":");
1027   Expr R = readExpr();
1028   return [=] { return Cond().getValue() ? L() : R(); };
1029 }
1030 
1031 Expr ScriptParser::readParenExpr() {
1032   expect("(");
1033   Expr E = readExpr();
1034   expect(")");
1035   return E;
1036 }
1037 
1038 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() {
1039   std::vector<StringRef> Phdrs;
1040   while (!ErrorCount && peek().startswith(":")) {
1041     StringRef Tok = next();
1042     Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1));
1043   }
1044   return Phdrs;
1045 }
1046 
1047 // Read a program header type name. The next token must be a
1048 // name of a program header type or a constant (e.g. "0x3").
1049 unsigned ScriptParser::readPhdrType() {
1050   StringRef Tok = next();
1051   if (Optional<uint64_t> Val = parseInt(Tok))
1052     return *Val;
1053 
1054   unsigned Ret = StringSwitch<unsigned>(Tok)
1055                      .Case("PT_NULL", PT_NULL)
1056                      .Case("PT_LOAD", PT_LOAD)
1057                      .Case("PT_DYNAMIC", PT_DYNAMIC)
1058                      .Case("PT_INTERP", PT_INTERP)
1059                      .Case("PT_NOTE", PT_NOTE)
1060                      .Case("PT_SHLIB", PT_SHLIB)
1061                      .Case("PT_PHDR", PT_PHDR)
1062                      .Case("PT_TLS", PT_TLS)
1063                      .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME)
1064                      .Case("PT_GNU_STACK", PT_GNU_STACK)
1065                      .Case("PT_GNU_RELRO", PT_GNU_RELRO)
1066                      .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE)
1067                      .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED)
1068                      .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA)
1069                      .Default(-1);
1070 
1071   if (Ret == (unsigned)-1) {
1072     setError("invalid program header type: " + Tok);
1073     return PT_NULL;
1074   }
1075   return Ret;
1076 }
1077 
1078 // Reads an anonymous version declaration.
1079 void ScriptParser::readAnonymousDeclaration() {
1080   std::vector<SymbolVersion> Locals;
1081   std::vector<SymbolVersion> Globals;
1082   std::tie(Locals, Globals) = readSymbols();
1083 
1084   for (SymbolVersion V : Locals) {
1085     if (V.Name == "*")
1086       Config->DefaultSymbolVersion = VER_NDX_LOCAL;
1087     else
1088       Config->VersionScriptLocals.push_back(V);
1089   }
1090 
1091   for (SymbolVersion V : Globals)
1092     Config->VersionScriptGlobals.push_back(V);
1093 
1094   expect(";");
1095 }
1096 
1097 // Reads a non-anonymous version definition,
1098 // e.g. "VerStr { global: foo; bar; local: *; };".
1099 void ScriptParser::readVersionDeclaration(StringRef VerStr) {
1100   // Read a symbol list.
1101   std::vector<SymbolVersion> Locals;
1102   std::vector<SymbolVersion> Globals;
1103   std::tie(Locals, Globals) = readSymbols();
1104 
1105   for (SymbolVersion V : Locals) {
1106     if (V.Name == "*")
1107       Config->DefaultSymbolVersion = VER_NDX_LOCAL;
1108     else
1109       Config->VersionScriptLocals.push_back(V);
1110   }
1111 
1112   // Create a new version definition and add that to the global symbols.
1113   VersionDefinition Ver;
1114   Ver.Name = VerStr;
1115   Ver.Globals = Globals;
1116 
1117   // User-defined version number starts from 2 because 0 and 1 are
1118   // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively.
1119   Ver.Id = Config->VersionDefinitions.size() + 2;
1120   Config->VersionDefinitions.push_back(Ver);
1121 
1122   // Each version may have a parent version. For example, "Ver2"
1123   // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1"
1124   // as a parent. This version hierarchy is, probably against your
1125   // instinct, purely for hint; the runtime doesn't care about it
1126   // at all. In LLD, we simply ignore it.
1127   if (peek() != ";")
1128     skip();
1129   expect(";");
1130 }
1131 
1132 static bool hasWildcard(StringRef S) {
1133   return S.find_first_of("?*[") != StringRef::npos;
1134 }
1135 
1136 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };".
1137 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
1138 ScriptParser::readSymbols() {
1139   std::vector<SymbolVersion> Locals;
1140   std::vector<SymbolVersion> Globals;
1141   std::vector<SymbolVersion> *V = &Globals;
1142 
1143   while (!ErrorCount) {
1144     if (consume("}"))
1145       break;
1146     if (consumeLabel("local")) {
1147       V = &Locals;
1148       continue;
1149     }
1150     if (consumeLabel("global")) {
1151       V = &Globals;
1152       continue;
1153     }
1154 
1155     if (consume("extern")) {
1156       std::vector<SymbolVersion> Ext = readVersionExtern();
1157       V->insert(V->end(), Ext.begin(), Ext.end());
1158     } else {
1159       StringRef Tok = next();
1160       V->push_back({unquote(Tok), false, hasWildcard(Tok)});
1161     }
1162     expect(";");
1163   }
1164   return {Locals, Globals};
1165 }
1166 
1167 // Reads an "extern C++" directive, e.g.,
1168 // "extern "C++" { ns::*; "f(int, double)"; };"
1169 std::vector<SymbolVersion> ScriptParser::readVersionExtern() {
1170   StringRef Tok = next();
1171   bool IsCXX = Tok == "\"C++\"";
1172   if (!IsCXX && Tok != "\"C\"")
1173     setError("Unknown language");
1174   expect("{");
1175 
1176   std::vector<SymbolVersion> Ret;
1177   while (!ErrorCount && peek() != "}") {
1178     StringRef Tok = next();
1179     bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok);
1180     Ret.push_back({unquote(Tok), IsCXX, HasWildcard});
1181     expect(";");
1182   }
1183 
1184   expect("}");
1185   return Ret;
1186 }
1187 
1188 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2,
1189                                             StringRef S3) {
1190   if (!consume(S1) && !consume(S2) && !consume(S3)) {
1191     setError("expected one of: " + S1 + ", " + S2 + ", or " + S3);
1192     return 0;
1193   }
1194   expect("=");
1195   return readExpr()().getValue();
1196 }
1197 
1198 // Parse the MEMORY command as specified in:
1199 // https://sourceware.org/binutils/docs/ld/MEMORY.html
1200 //
1201 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... }
1202 void ScriptParser::readMemory() {
1203   expect("{");
1204   while (!ErrorCount && !consume("}")) {
1205     StringRef Name = next();
1206 
1207     uint32_t Flags = 0;
1208     uint32_t NegFlags = 0;
1209     if (consume("(")) {
1210       std::tie(Flags, NegFlags) = readMemoryAttributes();
1211       expect(")");
1212     }
1213     expect(":");
1214 
1215     uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o");
1216     expect(",");
1217     uint64_t Length = readMemoryAssignment("LENGTH", "len", "l");
1218 
1219     // Add the memory region to the region map (if it doesn't already exist).
1220     auto It = Script->Opt.MemoryRegions.find(Name);
1221     if (It != Script->Opt.MemoryRegions.end())
1222       setError("region '" + Name + "' already defined");
1223     else
1224       Script->Opt.MemoryRegions[Name] = {Name, Origin, Length, Flags, NegFlags};
1225   }
1226 }
1227 
1228 // This function parses the attributes used to match against section
1229 // flags when placing output sections in a memory region. These flags
1230 // are only used when an explicit memory region name is not used.
1231 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() {
1232   uint32_t Flags = 0;
1233   uint32_t NegFlags = 0;
1234   bool Invert = false;
1235 
1236   for (char C : next().lower()) {
1237     uint32_t Flag = 0;
1238     if (C == '!')
1239       Invert = !Invert;
1240     else if (C == 'w')
1241       Flag = SHF_WRITE;
1242     else if (C == 'x')
1243       Flag = SHF_EXECINSTR;
1244     else if (C == 'a')
1245       Flag = SHF_ALLOC;
1246     else if (C != 'r')
1247       setError("invalid memory region attribute");
1248 
1249     if (Invert)
1250       NegFlags |= Flag;
1251     else
1252       Flags |= Flag;
1253   }
1254   return {Flags, NegFlags};
1255 }
1256 
1257 void elf::readLinkerScript(MemoryBufferRef MB) {
1258   ScriptParser(MB).readLinkerScript();
1259 }
1260 
1261 void elf::readVersionScript(MemoryBufferRef MB) {
1262   ScriptParser(MB).readVersionScript();
1263 }
1264 
1265 void elf::readDynamicList(MemoryBufferRef MB) {
1266   ScriptParser(MB).readDynamicList();
1267 }
1268