1 //===- ScriptParser.cpp ---------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a recursive-descendent parser for linker scripts.
11 // Parsed results are stored to Config and Script global objects.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ScriptParser.h"
16 #include "Config.h"
17 #include "Driver.h"
18 #include "InputSection.h"
19 #include "LinkerScript.h"
20 #include "Memory.h"
21 #include "OutputSections.h"
22 #include "ScriptLexer.h"
23 #include "Symbols.h"
24 #include "Target.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/ADT/StringSwitch.h"
28 #include "llvm/Support/Casting.h"
29 #include "llvm/Support/ELF.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/FileSystem.h"
32 #include "llvm/Support/Path.h"
33 #include <cassert>
34 #include <limits>
35 #include <vector>
36 
37 using namespace llvm;
38 using namespace llvm::ELF;
39 using namespace lld;
40 using namespace lld::elf;
41 
42 static bool isUnderSysroot(StringRef Path);
43 
44 namespace {
45 class ScriptParser final : ScriptLexer {
46 public:
47   ScriptParser(MemoryBufferRef MB)
48       : ScriptLexer(MB),
49         IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {}
50 
51   void readLinkerScript();
52   void readVersionScript();
53   void readDynamicList();
54 
55 private:
56   void addFile(StringRef Path);
57 
58   void readAsNeeded();
59   void readEntry();
60   void readExtern();
61   void readGroup();
62   void readInclude();
63   void readMemory();
64   void readOutput();
65   void readOutputArch();
66   void readOutputFormat();
67   void readPhdrs();
68   void readSearchDir();
69   void readSections();
70   void readVersion();
71   void readVersionScriptCommand();
72 
73   SymbolAssignment *readAssignment(StringRef Name);
74   BytesDataCommand *readBytesDataCommand(StringRef Tok);
75   uint32_t readFill();
76   OutputSectionCommand *readOutputSectionDescription(StringRef OutSec);
77   uint32_t readOutputSectionFiller(StringRef Tok);
78   std::vector<StringRef> readOutputSectionPhdrs();
79   InputSectionDescription *readInputSectionDescription(StringRef Tok);
80   StringMatcher readFilePatterns();
81   std::vector<SectionPattern> readInputSectionsList();
82   InputSectionDescription *readInputSectionRules(StringRef FilePattern);
83   unsigned readPhdrType();
84   SortSectionPolicy readSortKind();
85   SymbolAssignment *readProvideHidden(bool Provide, bool Hidden);
86   SymbolAssignment *readProvideOrAssignment(StringRef Tok);
87   void readSort();
88   AssertCommand *readAssert();
89   Expr readAssertExpr();
90 
91   uint64_t readMemoryAssignment(StringRef, StringRef, StringRef);
92   std::pair<uint32_t, uint32_t> readMemoryAttributes();
93 
94   Expr readExpr();
95   Expr readExpr1(Expr Lhs, int MinPrec);
96   StringRef readParenLiteral();
97   Expr readPrimary();
98   Expr readTernary(Expr Cond);
99   Expr readParenExpr();
100 
101   // For parsing version script.
102   std::vector<SymbolVersion> readVersionExtern();
103   void readAnonymousDeclaration();
104   void readVersionDeclaration(StringRef VerStr);
105 
106   std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
107   readSymbols();
108 
109   bool IsUnderSysroot;
110 };
111 } // namespace
112 
113 static bool isUnderSysroot(StringRef Path) {
114   if (Config->Sysroot == "")
115     return false;
116   for (; !Path.empty(); Path = sys::path::parent_path(Path))
117     if (sys::fs::equivalent(Config->Sysroot, Path))
118       return true;
119   return false;
120 }
121 
122 // Some operations only support one non absolute value. Move the
123 // absolute one to the right hand side for convenience.
124 static void moveAbsRight(ExprValue &A, ExprValue &B) {
125   if (A.isAbsolute())
126     std::swap(A, B);
127   if (!B.isAbsolute())
128     error("At least one side of the expression must be absolute");
129 }
130 
131 static ExprValue add(ExprValue A, ExprValue B) {
132   moveAbsRight(A, B);
133   return {A.Sec, A.ForceAbsolute, A.Val + B.getValue()};
134 }
135 
136 static ExprValue sub(ExprValue A, ExprValue B) {
137   return {A.Sec, A.Val - B.getValue()};
138 }
139 
140 static ExprValue mul(ExprValue A, ExprValue B) {
141   return A.getValue() * B.getValue();
142 }
143 
144 static ExprValue div(ExprValue A, ExprValue B) {
145   if (uint64_t BV = B.getValue())
146     return A.getValue() / BV;
147   error("division by zero");
148   return 0;
149 }
150 
151 static ExprValue bitAnd(ExprValue A, ExprValue B) {
152   moveAbsRight(A, B);
153   return {A.Sec, A.ForceAbsolute,
154           (A.getValue() & B.getValue()) - A.getSecAddr()};
155 }
156 
157 static ExprValue bitOr(ExprValue A, ExprValue B) {
158   moveAbsRight(A, B);
159   return {A.Sec, A.ForceAbsolute,
160           (A.getValue() | B.getValue()) - A.getSecAddr()};
161 }
162 
163 void ScriptParser::readDynamicList() {
164   expect("{");
165   readAnonymousDeclaration();
166   if (!atEOF())
167     setError("EOF expected, but got " + next());
168 }
169 
170 void ScriptParser::readVersionScript() {
171   readVersionScriptCommand();
172   if (!atEOF())
173     setError("EOF expected, but got " + next());
174 }
175 
176 void ScriptParser::readVersionScriptCommand() {
177   if (consume("{")) {
178     readAnonymousDeclaration();
179     return;
180   }
181 
182   while (!atEOF() && !Error && peek() != "}") {
183     StringRef VerStr = next();
184     if (VerStr == "{") {
185       setError("anonymous version definition is used in "
186                "combination with other version definitions");
187       return;
188     }
189     expect("{");
190     readVersionDeclaration(VerStr);
191   }
192 }
193 
194 void ScriptParser::readVersion() {
195   expect("{");
196   readVersionScriptCommand();
197   expect("}");
198 }
199 
200 void ScriptParser::readLinkerScript() {
201   while (!atEOF()) {
202     StringRef Tok = next();
203     if (Tok == ";")
204       continue;
205 
206     if (Tok == "ASSERT") {
207       Script->Opt.Commands.push_back(readAssert());
208     } else if (Tok == "ENTRY") {
209       readEntry();
210     } else if (Tok == "EXTERN") {
211       readExtern();
212     } else if (Tok == "GROUP" || Tok == "INPUT") {
213       readGroup();
214     } else if (Tok == "INCLUDE") {
215       readInclude();
216     } else if (Tok == "MEMORY") {
217       readMemory();
218     } else if (Tok == "OUTPUT") {
219       readOutput();
220     } else if (Tok == "OUTPUT_ARCH") {
221       readOutputArch();
222     } else if (Tok == "OUTPUT_FORMAT") {
223       readOutputFormat();
224     } else if (Tok == "PHDRS") {
225       readPhdrs();
226     } else if (Tok == "SEARCH_DIR") {
227       readSearchDir();
228     } else if (Tok == "SECTIONS") {
229       readSections();
230     } else if (Tok == "VERSION") {
231       readVersion();
232     } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) {
233       Script->Opt.Commands.push_back(Cmd);
234     } else {
235       setError("unknown directive: " + Tok);
236     }
237   }
238 }
239 
240 void ScriptParser::addFile(StringRef S) {
241   if (IsUnderSysroot && S.startswith("/")) {
242     SmallString<128> PathData;
243     StringRef Path = (Config->Sysroot + S).toStringRef(PathData);
244     if (sys::fs::exists(Path)) {
245       Driver->addFile(Saver.save(Path));
246       return;
247     }
248   }
249 
250   if (sys::path::is_absolute(S)) {
251     Driver->addFile(S);
252   } else if (S.startswith("=")) {
253     if (Config->Sysroot.empty())
254       Driver->addFile(S.substr(1));
255     else
256       Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)));
257   } else if (S.startswith("-l")) {
258     Driver->addLibrary(S.substr(2));
259   } else if (sys::fs::exists(S)) {
260     Driver->addFile(S);
261   } else {
262     if (Optional<std::string> Path = findFromSearchPaths(S))
263       Driver->addFile(Saver.save(*Path));
264     else
265       setError("unable to find " + S);
266   }
267 }
268 
269 void ScriptParser::readAsNeeded() {
270   expect("(");
271   bool Orig = Config->AsNeeded;
272   Config->AsNeeded = true;
273   while (!Error && !consume(")"))
274     addFile(unquote(next()));
275   Config->AsNeeded = Orig;
276 }
277 
278 void ScriptParser::readEntry() {
279   // -e <symbol> takes predecence over ENTRY(<symbol>).
280   expect("(");
281   StringRef Tok = next();
282   if (Config->Entry.empty())
283     Config->Entry = Tok;
284   expect(")");
285 }
286 
287 void ScriptParser::readExtern() {
288   expect("(");
289   while (!Error && !consume(")"))
290     Config->Undefined.push_back(next());
291 }
292 
293 void ScriptParser::readGroup() {
294   expect("(");
295   while (!Error && !consume(")")) {
296     if (consume("AS_NEEDED"))
297       readAsNeeded();
298     else
299       addFile(unquote(next()));
300   }
301 }
302 
303 void ScriptParser::readInclude() {
304   StringRef Tok = unquote(next());
305 
306   // https://sourceware.org/binutils/docs/ld/File-Commands.html:
307   // The file will be searched for in the current directory, and in any
308   // directory specified with the -L option.
309   if (sys::fs::exists(Tok)) {
310     if (Optional<MemoryBufferRef> MB = readFile(Tok))
311       tokenize(*MB);
312     return;
313   }
314   if (Optional<std::string> Path = findFromSearchPaths(Tok)) {
315     if (Optional<MemoryBufferRef> MB = readFile(*Path))
316       tokenize(*MB);
317     return;
318   }
319   setError("cannot open " + Tok);
320 }
321 
322 void ScriptParser::readOutput() {
323   // -o <file> takes predecence over OUTPUT(<file>).
324   expect("(");
325   StringRef Tok = next();
326   if (Config->OutputFile.empty())
327     Config->OutputFile = unquote(Tok);
328   expect(")");
329 }
330 
331 void ScriptParser::readOutputArch() {
332   // OUTPUT_ARCH is ignored for now.
333   expect("(");
334   while (!Error && !consume(")"))
335     skip();
336 }
337 
338 void ScriptParser::readOutputFormat() {
339   // Error checking only for now.
340   expect("(");
341   skip();
342   if (consume(")"))
343     return;
344   expect(",");
345   skip();
346   expect(",");
347   skip();
348   expect(")");
349 }
350 
351 void ScriptParser::readPhdrs() {
352   expect("{");
353   while (!Error && !consume("}")) {
354     Script->Opt.PhdrsCommands.push_back(
355         {next(), PT_NULL, false, false, UINT_MAX, nullptr});
356 
357     PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back();
358     PhdrCmd.Type = readPhdrType();
359 
360     while (!Error && !consume(";")) {
361       if (consume("FILEHDR"))
362         PhdrCmd.HasFilehdr = true;
363       else if (consume("PHDRS"))
364         PhdrCmd.HasPhdrs = true;
365       else if (consume("AT"))
366         PhdrCmd.LMAExpr = readParenExpr();
367       else if (consume("FLAGS"))
368         PhdrCmd.Flags = readParenExpr()().getValue();
369       else
370         setError("unexpected header attribute: " + next());
371     }
372   }
373 }
374 
375 void ScriptParser::readSearchDir() {
376   expect("(");
377   StringRef Tok = next();
378   if (!Config->Nostdlib)
379     Config->SearchPaths.push_back(unquote(Tok));
380   expect(")");
381 }
382 
383 void ScriptParser::readSections() {
384   Script->Opt.HasSections = true;
385 
386   // -no-rosegment is used to avoid placing read only non-executable sections in
387   // their own segment. We do the same if SECTIONS command is present in linker
388   // script. See comment for computeFlags().
389   Config->SingleRoRx = true;
390 
391   expect("{");
392   while (!Error && !consume("}")) {
393     StringRef Tok = next();
394     BaseCommand *Cmd = readProvideOrAssignment(Tok);
395     if (!Cmd) {
396       if (Tok == "ASSERT")
397         Cmd = readAssert();
398       else
399         Cmd = readOutputSectionDescription(Tok);
400     }
401     Script->Opt.Commands.push_back(Cmd);
402   }
403 }
404 
405 static int precedence(StringRef Op) {
406   return StringSwitch<int>(Op)
407       .Cases("*", "/", 5)
408       .Cases("+", "-", 4)
409       .Cases("<<", ">>", 3)
410       .Cases("<", "<=", ">", ">=", "==", "!=", 2)
411       .Cases("&", "|", 1)
412       .Default(-1);
413 }
414 
415 StringMatcher ScriptParser::readFilePatterns() {
416   std::vector<StringRef> V;
417   while (!Error && !consume(")"))
418     V.push_back(next());
419   return StringMatcher(V);
420 }
421 
422 SortSectionPolicy ScriptParser::readSortKind() {
423   if (consume("SORT") || consume("SORT_BY_NAME"))
424     return SortSectionPolicy::Name;
425   if (consume("SORT_BY_ALIGNMENT"))
426     return SortSectionPolicy::Alignment;
427   if (consume("SORT_BY_INIT_PRIORITY"))
428     return SortSectionPolicy::Priority;
429   if (consume("SORT_NONE"))
430     return SortSectionPolicy::None;
431   return SortSectionPolicy::Default;
432 }
433 
434 // Reads SECTIONS command contents in the following form:
435 //
436 // <contents> ::= <elem>*
437 // <elem>     ::= <exclude>? <glob-pattern>
438 // <exclude>  ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")"
439 //
440 // For example,
441 //
442 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz)
443 //
444 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o".
445 // The semantics of that is section .foo in any file, section .bar in
446 // any file but a.o, and section .baz in any file but b.o.
447 std::vector<SectionPattern> ScriptParser::readInputSectionsList() {
448   std::vector<SectionPattern> Ret;
449   while (!Error && peek() != ")") {
450     StringMatcher ExcludeFilePat;
451     if (consume("EXCLUDE_FILE")) {
452       expect("(");
453       ExcludeFilePat = readFilePatterns();
454     }
455 
456     std::vector<StringRef> V;
457     while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE")
458       V.push_back(next());
459 
460     if (!V.empty())
461       Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)});
462     else
463       setError("section pattern is expected");
464   }
465   return Ret;
466 }
467 
468 // Reads contents of "SECTIONS" directive. That directive contains a
469 // list of glob patterns for input sections. The grammar is as follows.
470 //
471 // <patterns> ::= <section-list>
472 //              | <sort> "(" <section-list> ")"
473 //              | <sort> "(" <sort> "(" <section-list> ")" ")"
474 //
475 // <sort>     ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT"
476 //              | "SORT_BY_INIT_PRIORITY" | "SORT_NONE"
477 //
478 // <section-list> is parsed by readInputSectionsList().
479 InputSectionDescription *
480 ScriptParser::readInputSectionRules(StringRef FilePattern) {
481   auto *Cmd = make<InputSectionDescription>(FilePattern);
482   expect("(");
483 
484   while (!Error && !consume(")")) {
485     SortSectionPolicy Outer = readSortKind();
486     SortSectionPolicy Inner = SortSectionPolicy::Default;
487     std::vector<SectionPattern> V;
488     if (Outer != SortSectionPolicy::Default) {
489       expect("(");
490       Inner = readSortKind();
491       if (Inner != SortSectionPolicy::Default) {
492         expect("(");
493         V = readInputSectionsList();
494         expect(")");
495       } else {
496         V = readInputSectionsList();
497       }
498       expect(")");
499     } else {
500       V = readInputSectionsList();
501     }
502 
503     for (SectionPattern &Pat : V) {
504       Pat.SortInner = Inner;
505       Pat.SortOuter = Outer;
506     }
507 
508     std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns));
509   }
510   return Cmd;
511 }
512 
513 InputSectionDescription *
514 ScriptParser::readInputSectionDescription(StringRef Tok) {
515   // Input section wildcard can be surrounded by KEEP.
516   // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep
517   if (Tok == "KEEP") {
518     expect("(");
519     StringRef FilePattern = next();
520     InputSectionDescription *Cmd = readInputSectionRules(FilePattern);
521     expect(")");
522     Script->Opt.KeptSections.push_back(Cmd);
523     return Cmd;
524   }
525   return readInputSectionRules(Tok);
526 }
527 
528 void ScriptParser::readSort() {
529   expect("(");
530   expect("CONSTRUCTORS");
531   expect(")");
532 }
533 
534 AssertCommand *ScriptParser::readAssert() {
535   return make<AssertCommand>(readAssertExpr());
536 }
537 
538 Expr ScriptParser::readAssertExpr() {
539   expect("(");
540   Expr E = readExpr();
541   expect(",");
542   StringRef Msg = unquote(next());
543   expect(")");
544 
545   return [=] {
546     if (!E().getValue())
547       error(Msg);
548     return Script->getDot();
549   };
550 }
551 
552 // Reads a FILL(expr) command. We handle the FILL command as an
553 // alias for =fillexp section attribute, which is different from
554 // what GNU linkers do.
555 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html
556 uint32_t ScriptParser::readFill() {
557   expect("(");
558   uint32_t V = readOutputSectionFiller(next());
559   expect(")");
560   expect(";");
561   return V;
562 }
563 
564 OutputSectionCommand *
565 ScriptParser::readOutputSectionDescription(StringRef OutSec) {
566   OutputSectionCommand *Cmd = make<OutputSectionCommand>(OutSec);
567   Cmd->Location = getCurrentLocation();
568 
569   // Read an address expression.
570   // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html
571   if (peek() != ":")
572     Cmd->AddrExpr = readExpr();
573 
574   expect(":");
575 
576   if (consume("AT"))
577     Cmd->LMAExpr = readParenExpr();
578   if (consume("ALIGN"))
579     Cmd->AlignExpr = readParenExpr();
580   if (consume("SUBALIGN"))
581     Cmd->SubalignExpr = readParenExpr();
582 
583   // Parse constraints.
584   if (consume("ONLY_IF_RO"))
585     Cmd->Constraint = ConstraintKind::ReadOnly;
586   if (consume("ONLY_IF_RW"))
587     Cmd->Constraint = ConstraintKind::ReadWrite;
588   expect("{");
589 
590   while (!Error && !consume("}")) {
591     StringRef Tok = next();
592     if (Tok == ";") {
593       // Empty commands are allowed. Do nothing here.
594     } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) {
595       Cmd->Commands.push_back(Assign);
596     } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) {
597       Cmd->Commands.push_back(Data);
598     } else if (Tok == "ASSERT") {
599       Cmd->Commands.push_back(readAssert());
600       expect(";");
601     } else if (Tok == "CONSTRUCTORS") {
602       // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors
603       // by name. This is for very old file formats such as ECOFF/XCOFF.
604       // For ELF, we should ignore.
605     } else if (Tok == "FILL") {
606       Cmd->Filler = readFill();
607     } else if (Tok == "SORT") {
608       readSort();
609     } else if (peek() == "(") {
610       Cmd->Commands.push_back(readInputSectionDescription(Tok));
611     } else {
612       setError("unknown command " + Tok);
613     }
614   }
615 
616   if (consume(">"))
617     Cmd->MemoryRegionName = next();
618 
619   Cmd->Phdrs = readOutputSectionPhdrs();
620 
621   if (consume("="))
622     Cmd->Filler = readOutputSectionFiller(next());
623   else if (peek().startswith("="))
624     Cmd->Filler = readOutputSectionFiller(next().drop_front());
625 
626   // Consume optional comma following output section command.
627   consume(",");
628 
629   return Cmd;
630 }
631 
632 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number.
633 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html
634 //
635 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles
636 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them
637 // as 32-bit big-endian values. We will do the same as ld.gold does
638 // because it's simpler than what ld.bfd does.
639 uint32_t ScriptParser::readOutputSectionFiller(StringRef Tok) {
640   uint32_t V;
641   if (!Tok.getAsInteger(0, V))
642     return V;
643   setError("invalid filler expression: " + Tok);
644   return 0;
645 }
646 
647 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) {
648   expect("(");
649   SymbolAssignment *Cmd = readAssignment(next());
650   Cmd->Provide = Provide;
651   Cmd->Hidden = Hidden;
652   expect(")");
653   expect(";");
654   return Cmd;
655 }
656 
657 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) {
658   SymbolAssignment *Cmd = nullptr;
659   if (peek() == "=" || peek() == "+=") {
660     Cmd = readAssignment(Tok);
661     expect(";");
662   } else if (Tok == "PROVIDE") {
663     Cmd = readProvideHidden(true, false);
664   } else if (Tok == "HIDDEN") {
665     Cmd = readProvideHidden(false, true);
666   } else if (Tok == "PROVIDE_HIDDEN") {
667     Cmd = readProvideHidden(true, true);
668   }
669   return Cmd;
670 }
671 
672 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) {
673   StringRef Op = next();
674   assert(Op == "=" || Op == "+=");
675   Expr E = readExpr();
676   if (Op == "+=") {
677     std::string Loc = getCurrentLocation();
678     E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); };
679   }
680   return make<SymbolAssignment>(Name, E, getCurrentLocation());
681 }
682 
683 // This is an operator-precedence parser to parse a linker
684 // script expression.
685 Expr ScriptParser::readExpr() {
686   // Our lexer is context-aware. Set the in-expression bit so that
687   // they apply different tokenization rules.
688   bool Orig = InExpr;
689   InExpr = true;
690   Expr E = readExpr1(readPrimary(), 0);
691   InExpr = Orig;
692   return E;
693 }
694 
695 static Expr combine(StringRef Op, Expr L, Expr R) {
696   if (Op == "+")
697     return [=] { return add(L(), R()); };
698   if (Op == "-")
699     return [=] { return sub(L(), R()); };
700   if (Op == "*")
701     return [=] { return mul(L(), R()); };
702   if (Op == "/")
703     return [=] { return div(L(), R()); };
704   if (Op == "<<")
705     return [=] { return L().getValue() << R().getValue(); };
706   if (Op == ">>")
707     return [=] { return L().getValue() >> R().getValue(); };
708   if (Op == "<")
709     return [=] { return L().getValue() < R().getValue(); };
710   if (Op == ">")
711     return [=] { return L().getValue() > R().getValue(); };
712   if (Op == ">=")
713     return [=] { return L().getValue() >= R().getValue(); };
714   if (Op == "<=")
715     return [=] { return L().getValue() <= R().getValue(); };
716   if (Op == "==")
717     return [=] { return L().getValue() == R().getValue(); };
718   if (Op == "!=")
719     return [=] { return L().getValue() != R().getValue(); };
720   if (Op == "&")
721     return [=] { return bitAnd(L(), R()); };
722   if (Op == "|")
723     return [=] { return bitOr(L(), R()); };
724   llvm_unreachable("invalid operator");
725 }
726 
727 // This is a part of the operator-precedence parser. This function
728 // assumes that the remaining token stream starts with an operator.
729 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) {
730   while (!atEOF() && !Error) {
731     // Read an operator and an expression.
732     if (consume("?"))
733       return readTernary(Lhs);
734     StringRef Op1 = peek();
735     if (precedence(Op1) < MinPrec)
736       break;
737     skip();
738     Expr Rhs = readPrimary();
739 
740     // Evaluate the remaining part of the expression first if the
741     // next operator has greater precedence than the previous one.
742     // For example, if we have read "+" and "3", and if the next
743     // operator is "*", then we'll evaluate 3 * ... part first.
744     while (!atEOF()) {
745       StringRef Op2 = peek();
746       if (precedence(Op2) <= precedence(Op1))
747         break;
748       Rhs = readExpr1(Rhs, precedence(Op2));
749     }
750 
751     Lhs = combine(Op1, Lhs, Rhs);
752   }
753   return Lhs;
754 }
755 
756 uint64_t static getConstant(StringRef S) {
757   if (S == "COMMONPAGESIZE")
758     return Target->PageSize;
759   if (S == "MAXPAGESIZE")
760     return Config->MaxPageSize;
761   error("unknown constant: " + S);
762   return 0;
763 }
764 
765 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with
766 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may
767 // have "K" (Ki) or "M" (Mi) suffixes.
768 static Optional<uint64_t> parseInt(StringRef Tok) {
769   // Negative number
770   if (Tok.startswith("-")) {
771     if (Optional<uint64_t> Val = parseInt(Tok.substr(1)))
772       return -*Val;
773     return None;
774   }
775 
776   // Hexadecimal
777   uint64_t Val;
778   if (Tok.startswith_lower("0x") && !Tok.substr(2).getAsInteger(16, Val))
779     return Val;
780   if (Tok.endswith_lower("H") && !Tok.drop_back().getAsInteger(16, Val))
781     return Val;
782 
783   // Decimal
784   if (Tok.endswith_lower("K")) {
785     if (Tok.drop_back().getAsInteger(10, Val))
786       return None;
787     return Val * 1024;
788   }
789   if (Tok.endswith_lower("M")) {
790     if (Tok.drop_back().getAsInteger(10, Val))
791       return None;
792     return Val * 1024 * 1024;
793   }
794   if (Tok.getAsInteger(10, Val))
795     return None;
796   return Val;
797 }
798 
799 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) {
800   int Size = StringSwitch<int>(Tok)
801                  .Case("BYTE", 1)
802                  .Case("SHORT", 2)
803                  .Case("LONG", 4)
804                  .Case("QUAD", 8)
805                  .Default(-1);
806   if (Size == -1)
807     return nullptr;
808 
809   return make<BytesDataCommand>(readParenExpr(), Size);
810 }
811 
812 StringRef ScriptParser::readParenLiteral() {
813   expect("(");
814   StringRef Tok = next();
815   expect(")");
816   return Tok;
817 }
818 
819 Expr ScriptParser::readPrimary() {
820   if (peek() == "(")
821     return readParenExpr();
822 
823   if (consume("~")) {
824     Expr E = readPrimary();
825     return [=] { return ~E().getValue(); };
826   }
827   if (consume("-")) {
828     Expr E = readPrimary();
829     return [=] { return -E().getValue(); };
830   }
831 
832   StringRef Tok = next();
833   std::string Location = getCurrentLocation();
834 
835   // Built-in functions are parsed here.
836   // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html.
837   if (Tok == "ABSOLUTE") {
838     Expr Inner = readParenExpr();
839     return [=] {
840       ExprValue I = Inner();
841       I.ForceAbsolute = true;
842       return I;
843     };
844   }
845   if (Tok == "ADDR") {
846     StringRef Name = readParenLiteral();
847     return [=]() -> ExprValue {
848       return {Script->getOutputSection(Location, Name), 0};
849     };
850   }
851   if (Tok == "ALIGN") {
852     expect("(");
853     Expr E = readExpr();
854     if (consume(")"))
855       return [=] { return alignTo(Script->getDot(), E().getValue()); };
856     expect(",");
857     Expr E2 = readExpr();
858     expect(")");
859     return [=] { return alignTo(E().getValue(), E2().getValue()); };
860   }
861   if (Tok == "ALIGNOF") {
862     StringRef Name = readParenLiteral();
863     return [=] { return Script->getOutputSection(Location, Name)->Alignment; };
864   }
865   if (Tok == "ASSERT")
866     return readAssertExpr();
867   if (Tok == "CONSTANT") {
868     StringRef Name = readParenLiteral();
869     return [=] { return getConstant(Name); };
870   }
871   if (Tok == "DATA_SEGMENT_ALIGN") {
872     expect("(");
873     Expr E = readExpr();
874     expect(",");
875     readExpr();
876     expect(")");
877     return [=] { return alignTo(Script->getDot(), E().getValue()); };
878   }
879   if (Tok == "DATA_SEGMENT_END") {
880     expect("(");
881     expect(".");
882     expect(")");
883     return [] { return Script->getDot(); };
884   }
885   if (Tok == "DATA_SEGMENT_RELRO_END") {
886     // GNU linkers implements more complicated logic to handle
887     // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and
888     // just align to the next page boundary for simplicity.
889     expect("(");
890     readExpr();
891     expect(",");
892     readExpr();
893     expect(")");
894     return [] { return alignTo(Script->getDot(), Target->PageSize); };
895   }
896   if (Tok == "DEFINED") {
897     StringRef Name = readParenLiteral();
898     return [=] { return Script->isDefined(Name) ? 1 : 0; };
899   }
900   if (Tok == "LOADADDR") {
901     StringRef Name = readParenLiteral();
902     return [=] { return Script->getOutputSection(Location, Name)->getLMA(); };
903   }
904   if (Tok == "SEGMENT_START") {
905     expect("(");
906     skip();
907     expect(",");
908     Expr E = readExpr();
909     expect(")");
910     return [=] { return E(); };
911   }
912   if (Tok == "SIZEOF") {
913     StringRef Name = readParenLiteral();
914     return [=] { return Script->getOutputSectionSize(Name); };
915   }
916   if (Tok == "SIZEOF_HEADERS")
917     return [=] { return elf::getHeaderSize(); };
918 
919   // Tok is the dot.
920   if (Tok == ".")
921     return [=] { return Script->getSymbolValue(Location, Tok); };
922 
923   // Tok is a literal number.
924   if (Optional<uint64_t> Val = parseInt(Tok))
925     return [=] { return *Val; };
926 
927   // Tok is a symbol name.
928   if (!isValidCIdentifier(Tok))
929     setError("malformed number: " + Tok);
930   Script->Opt.ReferencedSymbols.push_back(Tok);
931   return [=] { return Script->getSymbolValue(Location, Tok); };
932 }
933 
934 Expr ScriptParser::readTernary(Expr Cond) {
935   Expr L = readExpr();
936   expect(":");
937   Expr R = readExpr();
938   return [=] { return Cond().getValue() ? L() : R(); };
939 }
940 
941 Expr ScriptParser::readParenExpr() {
942   expect("(");
943   Expr E = readExpr();
944   expect(")");
945   return E;
946 }
947 
948 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() {
949   std::vector<StringRef> Phdrs;
950   while (!Error && peek().startswith(":")) {
951     StringRef Tok = next();
952     Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1));
953   }
954   return Phdrs;
955 }
956 
957 // Read a program header type name. The next token must be a
958 // name of a program header type or a constant (e.g. "0x3").
959 unsigned ScriptParser::readPhdrType() {
960   StringRef Tok = next();
961   if (Optional<uint64_t> Val = parseInt(Tok))
962     return *Val;
963 
964   unsigned Ret = StringSwitch<unsigned>(Tok)
965                      .Case("PT_NULL", PT_NULL)
966                      .Case("PT_LOAD", PT_LOAD)
967                      .Case("PT_DYNAMIC", PT_DYNAMIC)
968                      .Case("PT_INTERP", PT_INTERP)
969                      .Case("PT_NOTE", PT_NOTE)
970                      .Case("PT_SHLIB", PT_SHLIB)
971                      .Case("PT_PHDR", PT_PHDR)
972                      .Case("PT_TLS", PT_TLS)
973                      .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME)
974                      .Case("PT_GNU_STACK", PT_GNU_STACK)
975                      .Case("PT_GNU_RELRO", PT_GNU_RELRO)
976                      .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE)
977                      .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED)
978                      .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA)
979                      .Default(-1);
980 
981   if (Ret == (unsigned)-1) {
982     setError("invalid program header type: " + Tok);
983     return PT_NULL;
984   }
985   return Ret;
986 }
987 
988 // Reads an anonymous version declaration.
989 void ScriptParser::readAnonymousDeclaration() {
990   std::vector<SymbolVersion> Locals;
991   std::vector<SymbolVersion> Globals;
992   std::tie(Locals, Globals) = readSymbols();
993 
994   for (SymbolVersion V : Locals) {
995     if (V.Name == "*")
996       Config->DefaultSymbolVersion = VER_NDX_LOCAL;
997     else
998       Config->VersionScriptLocals.push_back(V);
999   }
1000 
1001   for (SymbolVersion V : Globals)
1002     Config->VersionScriptGlobals.push_back(V);
1003 
1004   expect(";");
1005 }
1006 
1007 // Reads a non-anonymous version definition,
1008 // e.g. "VerStr { global: foo; bar; local: *; };".
1009 void ScriptParser::readVersionDeclaration(StringRef VerStr) {
1010   // Read a symbol list.
1011   std::vector<SymbolVersion> Locals;
1012   std::vector<SymbolVersion> Globals;
1013   std::tie(Locals, Globals) = readSymbols();
1014 
1015   for (SymbolVersion V : Locals) {
1016     if (V.Name == "*")
1017       Config->DefaultSymbolVersion = VER_NDX_LOCAL;
1018     else
1019       Config->VersionScriptLocals.push_back(V);
1020   }
1021 
1022   // Create a new version definition and add that to the global symbols.
1023   VersionDefinition Ver;
1024   Ver.Name = VerStr;
1025   Ver.Globals = Globals;
1026 
1027   // User-defined version number starts from 2 because 0 and 1 are
1028   // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively.
1029   Ver.Id = Config->VersionDefinitions.size() + 2;
1030   Config->VersionDefinitions.push_back(Ver);
1031 
1032   // Each version may have a parent version. For example, "Ver2"
1033   // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1"
1034   // as a parent. This version hierarchy is, probably against your
1035   // instinct, purely for hint; the runtime doesn't care about it
1036   // at all. In LLD, we simply ignore it.
1037   if (peek() != ";")
1038     skip();
1039   expect(";");
1040 }
1041 
1042 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };".
1043 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
1044 ScriptParser::readSymbols() {
1045   std::vector<SymbolVersion> Locals;
1046   std::vector<SymbolVersion> Globals;
1047   std::vector<SymbolVersion> *V = &Globals;
1048 
1049   while (!Error) {
1050     if (consume("}"))
1051       break;
1052     if (consumeLabel("local")) {
1053       V = &Locals;
1054       continue;
1055     }
1056     if (consumeLabel("global")) {
1057       V = &Globals;
1058       continue;
1059     }
1060 
1061     if (consume("extern")) {
1062       std::vector<SymbolVersion> Ext = readVersionExtern();
1063       V->insert(V->end(), Ext.begin(), Ext.end());
1064     } else {
1065       StringRef Tok = next();
1066       V->push_back({unquote(Tok), false, hasWildcard(Tok)});
1067     }
1068     expect(";");
1069   }
1070   return {Locals, Globals};
1071 }
1072 
1073 // Reads an "extern C++" directive, e.g.,
1074 // "extern "C++" { ns::*; "f(int, double)"; };"
1075 std::vector<SymbolVersion> ScriptParser::readVersionExtern() {
1076   StringRef Tok = next();
1077   bool IsCXX = Tok == "\"C++\"";
1078   if (!IsCXX && Tok != "\"C\"")
1079     setError("Unknown language");
1080   expect("{");
1081 
1082   std::vector<SymbolVersion> Ret;
1083   while (!Error && peek() != "}") {
1084     StringRef Tok = next();
1085     bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok);
1086     Ret.push_back({unquote(Tok), IsCXX, HasWildcard});
1087     expect(";");
1088   }
1089 
1090   expect("}");
1091   return Ret;
1092 }
1093 
1094 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2,
1095                                             StringRef S3) {
1096   if (!consume(S1) && !consume(S2) && !consume(S3)) {
1097     setError("expected one of: " + S1 + ", " + S2 + ", or " + S3);
1098     return 0;
1099   }
1100   expect("=");
1101 
1102   // TODO: Fully support constant expressions.
1103   if (Optional<uint64_t> Val = parseInt(next()))
1104     return *Val;
1105   setError("nonconstant expression for " + S1);
1106   return 0;
1107 }
1108 
1109 // Parse the MEMORY command as specified in:
1110 // https://sourceware.org/binutils/docs/ld/MEMORY.html
1111 //
1112 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... }
1113 void ScriptParser::readMemory() {
1114   expect("{");
1115   while (!Error && !consume("}")) {
1116     StringRef Name = next();
1117 
1118     uint32_t Flags = 0;
1119     uint32_t NegFlags = 0;
1120     if (consume("(")) {
1121       std::tie(Flags, NegFlags) = readMemoryAttributes();
1122       expect(")");
1123     }
1124     expect(":");
1125 
1126     uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o");
1127     expect(",");
1128     uint64_t Length = readMemoryAssignment("LENGTH", "len", "l");
1129 
1130     // Add the memory region to the region map (if it doesn't already exist).
1131     auto It = Script->Opt.MemoryRegions.find(Name);
1132     if (It != Script->Opt.MemoryRegions.end())
1133       setError("region '" + Name + "' already defined");
1134     else
1135       Script->Opt.MemoryRegions[Name] = {Name,   Origin, Length,
1136                                          Origin, Flags,  NegFlags};
1137   }
1138 }
1139 
1140 // This function parses the attributes used to match against section
1141 // flags when placing output sections in a memory region. These flags
1142 // are only used when an explicit memory region name is not used.
1143 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() {
1144   uint32_t Flags = 0;
1145   uint32_t NegFlags = 0;
1146   bool Invert = false;
1147 
1148   for (char C : next().lower()) {
1149     uint32_t Flag = 0;
1150     if (C == '!')
1151       Invert = !Invert;
1152     else if (C == 'w')
1153       Flag = SHF_WRITE;
1154     else if (C == 'x')
1155       Flag = SHF_EXECINSTR;
1156     else if (C == 'a')
1157       Flag = SHF_ALLOC;
1158     else if (C != 'r')
1159       setError("invalid memory region attribute");
1160 
1161     if (Invert)
1162       NegFlags |= Flag;
1163     else
1164       Flags |= Flag;
1165   }
1166   return {Flags, NegFlags};
1167 }
1168 
1169 void elf::readLinkerScript(MemoryBufferRef MB) {
1170   ScriptParser(MB).readLinkerScript();
1171 }
1172 
1173 void elf::readVersionScript(MemoryBufferRef MB) {
1174   ScriptParser(MB).readVersionScript();
1175 }
1176 
1177 void elf::readDynamicList(MemoryBufferRef MB) {
1178   ScriptParser(MB).readDynamicList();
1179 }
1180