1 //===- ScriptParser.cpp ---------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a recursive-descendent parser for linker scripts.
11 // Parsed results are stored to Config and Script global objects.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ScriptParser.h"
16 #include "Config.h"
17 #include "Driver.h"
18 #include "InputSection.h"
19 #include "LinkerScript.h"
20 #include "Memory.h"
21 #include "OutputSections.h"
22 #include "ScriptLexer.h"
23 #include "Symbols.h"
24 #include "Target.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/ADT/StringSet.h"
28 #include "llvm/ADT/StringSwitch.h"
29 #include "llvm/BinaryFormat/ELF.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/FileSystem.h"
33 #include "llvm/Support/Path.h"
34 #include <cassert>
35 #include <limits>
36 #include <vector>
37 
38 using namespace llvm;
39 using namespace llvm::ELF;
40 using namespace llvm::support::endian;
41 using namespace lld;
42 using namespace lld::elf;
43 
44 static bool isUnderSysroot(StringRef Path);
45 
46 namespace {
47 class ScriptParser final : ScriptLexer {
48 public:
49   ScriptParser(MemoryBufferRef MB)
50       : ScriptLexer(MB),
51         IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {}
52 
53   void readLinkerScript();
54   void readVersionScript();
55   void readDynamicList();
56 
57 private:
58   void addFile(StringRef Path);
59   OutputSection *checkSection(OutputSection *Cmd, StringRef Loccation);
60 
61   void readAsNeeded();
62   void readEntry();
63   void readExtern();
64   void readGroup();
65   void readInclude();
66   void readMemory();
67   void readOutput();
68   void readOutputArch();
69   void readOutputFormat();
70   void readPhdrs();
71   void readRegionAlias();
72   void readSearchDir();
73   void readSections();
74   void readVersion();
75   void readVersionScriptCommand();
76 
77   SymbolAssignment *readAssignment(StringRef Name);
78   BytesDataCommand *readBytesDataCommand(StringRef Tok);
79   uint32_t readFill();
80   uint32_t parseFill(StringRef Tok);
81   void readSectionAddressType(OutputSection *Cmd);
82   OutputSection *readOutputSectionDescription(StringRef OutSec);
83   std::vector<StringRef> readOutputSectionPhdrs();
84   InputSectionDescription *readInputSectionDescription(StringRef Tok);
85   StringMatcher readFilePatterns();
86   std::vector<SectionPattern> readInputSectionsList();
87   InputSectionDescription *readInputSectionRules(StringRef FilePattern);
88   unsigned readPhdrType();
89   SortSectionPolicy readSortKind();
90   SymbolAssignment *readProvideHidden(bool Provide, bool Hidden);
91   SymbolAssignment *readProvideOrAssignment(StringRef Tok);
92   void readSort();
93   AssertCommand *readAssert();
94   Expr readAssertExpr();
95   Expr readConstant();
96   Expr getPageSize();
97 
98   uint64_t readMemoryAssignment(StringRef, StringRef, StringRef);
99   std::pair<uint32_t, uint32_t> readMemoryAttributes();
100 
101   Expr readExpr();
102   Expr readExpr1(Expr Lhs, int MinPrec);
103   StringRef readParenLiteral();
104   Expr readPrimary();
105   Expr readTernary(Expr Cond);
106   Expr readParenExpr();
107 
108   // For parsing version script.
109   std::vector<SymbolVersion> readVersionExtern();
110   void readAnonymousDeclaration();
111   void readVersionDeclaration(StringRef VerStr);
112 
113   std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
114   readSymbols();
115 
116   // True if a script being read is in a subdirectory specified by -sysroot.
117   bool IsUnderSysroot;
118 
119   // A set to detect an INCLUDE() cycle.
120   StringSet<> Seen;
121 };
122 } // namespace
123 
124 static StringRef unquote(StringRef S) {
125   if (S.startswith("\""))
126     return S.substr(1, S.size() - 2);
127   return S;
128 }
129 
130 static bool isUnderSysroot(StringRef Path) {
131   if (Config->Sysroot == "")
132     return false;
133   for (; !Path.empty(); Path = sys::path::parent_path(Path))
134     if (sys::fs::equivalent(Config->Sysroot, Path))
135       return true;
136   return false;
137 }
138 
139 // Some operations only support one non absolute value. Move the
140 // absolute one to the right hand side for convenience.
141 static void moveAbsRight(ExprValue &A, ExprValue &B) {
142   if (A.isAbsolute())
143     std::swap(A, B);
144   if (!B.isAbsolute())
145     error(A.Loc + ": at least one side of the expression must be absolute");
146 }
147 
148 static ExprValue add(ExprValue A, ExprValue B) {
149   moveAbsRight(A, B);
150   uint64_t Val = alignTo(A.Val, A.Alignment) + B.getValue();
151   return {A.Sec, A.ForceAbsolute, Val, A.Loc};
152 }
153 
154 static ExprValue sub(ExprValue A, ExprValue B) {
155   uint64_t Val = alignTo(A.Val, A.Alignment) - B.getValue();
156   return {A.Sec, Val, A.Loc};
157 }
158 
159 static ExprValue mul(ExprValue A, ExprValue B) {
160   return A.getValue() * B.getValue();
161 }
162 
163 static ExprValue div(ExprValue A, ExprValue B) {
164   if (uint64_t BV = B.getValue())
165     return A.getValue() / BV;
166   error("division by zero");
167   return 0;
168 }
169 
170 static ExprValue bitAnd(ExprValue A, ExprValue B) {
171   moveAbsRight(A, B);
172   return {A.Sec, A.ForceAbsolute,
173           (A.getValue() & B.getValue()) - A.getSecAddr(), A.Loc};
174 }
175 
176 static ExprValue bitOr(ExprValue A, ExprValue B) {
177   moveAbsRight(A, B);
178   return {A.Sec, A.ForceAbsolute,
179           (A.getValue() | B.getValue()) - A.getSecAddr(), A.Loc};
180 }
181 
182 void ScriptParser::readDynamicList() {
183   Config->HasDynamicList = true;
184   expect("{");
185   std::vector<SymbolVersion> Locals;
186   std::vector<SymbolVersion> Globals;
187   std::tie(Locals, Globals) = readSymbols();
188   expect(";");
189 
190   if (!atEOF()) {
191     setError("EOF expected, but got " + next());
192     return;
193   }
194   if (!Locals.empty()) {
195     setError("\"local:\" scope not supported in --dynamic-list");
196     return;
197   }
198 
199   for (SymbolVersion V : Globals)
200     Config->DynamicList.push_back(V);
201 }
202 
203 void ScriptParser::readVersionScript() {
204   readVersionScriptCommand();
205   if (!atEOF())
206     setError("EOF expected, but got " + next());
207 }
208 
209 void ScriptParser::readVersionScriptCommand() {
210   if (consume("{")) {
211     readAnonymousDeclaration();
212     return;
213   }
214 
215   while (!atEOF() && !ErrorCount && peek() != "}") {
216     StringRef VerStr = next();
217     if (VerStr == "{") {
218       setError("anonymous version definition is used in "
219                "combination with other version definitions");
220       return;
221     }
222     expect("{");
223     readVersionDeclaration(VerStr);
224   }
225 }
226 
227 void ScriptParser::readVersion() {
228   expect("{");
229   readVersionScriptCommand();
230   expect("}");
231 }
232 
233 void ScriptParser::readLinkerScript() {
234   while (!atEOF()) {
235     StringRef Tok = next();
236     if (Tok == ";")
237       continue;
238 
239     if (Tok == "ASSERT") {
240       Script->Opt.Commands.push_back(readAssert());
241     } else if (Tok == "ENTRY") {
242       readEntry();
243     } else if (Tok == "EXTERN") {
244       readExtern();
245     } else if (Tok == "GROUP" || Tok == "INPUT") {
246       readGroup();
247     } else if (Tok == "INCLUDE") {
248       readInclude();
249     } else if (Tok == "MEMORY") {
250       readMemory();
251     } else if (Tok == "OUTPUT") {
252       readOutput();
253     } else if (Tok == "OUTPUT_ARCH") {
254       readOutputArch();
255     } else if (Tok == "OUTPUT_FORMAT") {
256       readOutputFormat();
257     } else if (Tok == "PHDRS") {
258       readPhdrs();
259     } else if (Tok == "REGION_ALIAS") {
260       readRegionAlias();
261     } else if (Tok == "SEARCH_DIR") {
262       readSearchDir();
263     } else if (Tok == "SECTIONS") {
264       readSections();
265     } else if (Tok == "VERSION") {
266       readVersion();
267     } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) {
268       Script->Opt.Commands.push_back(Cmd);
269     } else {
270       setError("unknown directive: " + Tok);
271     }
272   }
273 }
274 
275 void ScriptParser::addFile(StringRef S) {
276   if (IsUnderSysroot && S.startswith("/")) {
277     SmallString<128> PathData;
278     StringRef Path = (Config->Sysroot + S).toStringRef(PathData);
279     if (sys::fs::exists(Path)) {
280       Driver->addFile(Saver.save(Path), /*WithLOption=*/false);
281       return;
282     }
283   }
284 
285   if (S.startswith("/")) {
286     Driver->addFile(S, /*WithLOption=*/false);
287   } else if (S.startswith("=")) {
288     if (Config->Sysroot.empty())
289       Driver->addFile(S.substr(1), /*WithLOption=*/false);
290     else
291       Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)),
292                       /*WithLOption=*/false);
293   } else if (S.startswith("-l")) {
294     Driver->addLibrary(S.substr(2));
295   } else if (sys::fs::exists(S)) {
296     Driver->addFile(S, /*WithLOption=*/false);
297   } else {
298     if (Optional<std::string> Path = findFromSearchPaths(S))
299       Driver->addFile(Saver.save(*Path), /*WithLOption=*/true);
300     else
301       setError("unable to find " + S);
302   }
303 }
304 
305 void ScriptParser::readAsNeeded() {
306   expect("(");
307   bool Orig = Config->AsNeeded;
308   Config->AsNeeded = true;
309   while (!ErrorCount && !consume(")"))
310     addFile(unquote(next()));
311   Config->AsNeeded = Orig;
312 }
313 
314 void ScriptParser::readEntry() {
315   // -e <symbol> takes predecence over ENTRY(<symbol>).
316   expect("(");
317   StringRef Tok = next();
318   if (Config->Entry.empty())
319     Config->Entry = Tok;
320   expect(")");
321 }
322 
323 void ScriptParser::readExtern() {
324   expect("(");
325   while (!ErrorCount && !consume(")"))
326     Config->Undefined.push_back(next());
327 }
328 
329 void ScriptParser::readGroup() {
330   expect("(");
331   while (!ErrorCount && !consume(")")) {
332     if (consume("AS_NEEDED"))
333       readAsNeeded();
334     else
335       addFile(unquote(next()));
336   }
337 }
338 
339 void ScriptParser::readInclude() {
340   StringRef Tok = unquote(next());
341 
342   if (!Seen.insert(Tok).second) {
343     setError("there is a cycle in linker script INCLUDEs");
344     return;
345   }
346 
347   // https://sourceware.org/binutils/docs/ld/File-Commands.html:
348   // The file will be searched for in the current directory, and in any
349   // directory specified with the -L option.
350   if (sys::fs::exists(Tok)) {
351     if (Optional<MemoryBufferRef> MB = readFile(Tok))
352       tokenize(*MB);
353     return;
354   }
355   if (Optional<std::string> Path = findFromSearchPaths(Tok)) {
356     if (Optional<MemoryBufferRef> MB = readFile(*Path))
357       tokenize(*MB);
358     return;
359   }
360   setError("cannot open " + Tok);
361 }
362 
363 void ScriptParser::readOutput() {
364   // -o <file> takes predecence over OUTPUT(<file>).
365   expect("(");
366   StringRef Tok = next();
367   if (Config->OutputFile.empty())
368     Config->OutputFile = unquote(Tok);
369   expect(")");
370 }
371 
372 void ScriptParser::readOutputArch() {
373   // OUTPUT_ARCH is ignored for now.
374   expect("(");
375   while (!ErrorCount && !consume(")"))
376     skip();
377 }
378 
379 void ScriptParser::readOutputFormat() {
380   // Error checking only for now.
381   expect("(");
382   skip();
383   if (consume(")"))
384     return;
385   expect(",");
386   skip();
387   expect(",");
388   skip();
389   expect(")");
390 }
391 
392 void ScriptParser::readPhdrs() {
393   expect("{");
394   while (!ErrorCount && !consume("}")) {
395     Script->Opt.PhdrsCommands.push_back(
396         {next(), PT_NULL, false, false, UINT_MAX, nullptr});
397 
398     PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back();
399     PhdrCmd.Type = readPhdrType();
400 
401     while (!ErrorCount && !consume(";")) {
402       if (consume("FILEHDR"))
403         PhdrCmd.HasFilehdr = true;
404       else if (consume("PHDRS"))
405         PhdrCmd.HasPhdrs = true;
406       else if (consume("AT"))
407         PhdrCmd.LMAExpr = readParenExpr();
408       else if (consume("FLAGS"))
409         PhdrCmd.Flags = readParenExpr()().getValue();
410       else
411         setError("unexpected header attribute: " + next());
412     }
413   }
414 }
415 
416 void ScriptParser::readRegionAlias() {
417   expect("(");
418   StringRef Alias = unquote(next());
419   expect(",");
420   StringRef Name = next();
421   expect(")");
422 
423   if (Script->Opt.MemoryRegions.count(Alias))
424     setError("redefinition of memory region '" + Alias + "'");
425   if (!Script->Opt.MemoryRegions.count(Name))
426     setError("memory region '" + Name + "' is not defined");
427   Script->Opt.MemoryRegions[Alias] = Script->Opt.MemoryRegions[Name];
428 }
429 
430 void ScriptParser::readSearchDir() {
431   expect("(");
432   StringRef Tok = next();
433   if (!Config->Nostdlib)
434     Config->SearchPaths.push_back(unquote(Tok));
435   expect(")");
436 }
437 
438 void ScriptParser::readSections() {
439   Script->Opt.HasSections = true;
440 
441   // -no-rosegment is used to avoid placing read only non-executable sections in
442   // their own segment. We do the same if SECTIONS command is present in linker
443   // script. See comment for computeFlags().
444   Config->SingleRoRx = true;
445 
446   expect("{");
447   while (!ErrorCount && !consume("}")) {
448     StringRef Tok = next();
449     BaseCommand *Cmd = readProvideOrAssignment(Tok);
450     if (!Cmd) {
451       if (Tok == "ASSERT")
452         Cmd = readAssert();
453       else
454         Cmd = readOutputSectionDescription(Tok);
455     }
456     Script->Opt.Commands.push_back(Cmd);
457   }
458 }
459 
460 static int precedence(StringRef Op) {
461   return StringSwitch<int>(Op)
462       .Cases("*", "/", 5)
463       .Cases("+", "-", 4)
464       .Cases("<<", ">>", 3)
465       .Cases("<", "<=", ">", ">=", "==", "!=", 2)
466       .Cases("&", "|", 1)
467       .Default(-1);
468 }
469 
470 StringMatcher ScriptParser::readFilePatterns() {
471   std::vector<StringRef> V;
472   while (!ErrorCount && !consume(")"))
473     V.push_back(next());
474   return StringMatcher(V);
475 }
476 
477 SortSectionPolicy ScriptParser::readSortKind() {
478   if (consume("SORT") || consume("SORT_BY_NAME"))
479     return SortSectionPolicy::Name;
480   if (consume("SORT_BY_ALIGNMENT"))
481     return SortSectionPolicy::Alignment;
482   if (consume("SORT_BY_INIT_PRIORITY"))
483     return SortSectionPolicy::Priority;
484   if (consume("SORT_NONE"))
485     return SortSectionPolicy::None;
486   return SortSectionPolicy::Default;
487 }
488 
489 // Reads SECTIONS command contents in the following form:
490 //
491 // <contents> ::= <elem>*
492 // <elem>     ::= <exclude>? <glob-pattern>
493 // <exclude>  ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")"
494 //
495 // For example,
496 //
497 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz)
498 //
499 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o".
500 // The semantics of that is section .foo in any file, section .bar in
501 // any file but a.o, and section .baz in any file but b.o.
502 std::vector<SectionPattern> ScriptParser::readInputSectionsList() {
503   std::vector<SectionPattern> Ret;
504   while (!ErrorCount && peek() != ")") {
505     StringMatcher ExcludeFilePat;
506     if (consume("EXCLUDE_FILE")) {
507       expect("(");
508       ExcludeFilePat = readFilePatterns();
509     }
510 
511     std::vector<StringRef> V;
512     while (!ErrorCount && peek() != ")" && peek() != "EXCLUDE_FILE")
513       V.push_back(next());
514 
515     if (!V.empty())
516       Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)});
517     else
518       setError("section pattern is expected");
519   }
520   return Ret;
521 }
522 
523 // Reads contents of "SECTIONS" directive. That directive contains a
524 // list of glob patterns for input sections. The grammar is as follows.
525 //
526 // <patterns> ::= <section-list>
527 //              | <sort> "(" <section-list> ")"
528 //              | <sort> "(" <sort> "(" <section-list> ")" ")"
529 //
530 // <sort>     ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT"
531 //              | "SORT_BY_INIT_PRIORITY" | "SORT_NONE"
532 //
533 // <section-list> is parsed by readInputSectionsList().
534 InputSectionDescription *
535 ScriptParser::readInputSectionRules(StringRef FilePattern) {
536   auto *Cmd = make<InputSectionDescription>(FilePattern);
537   expect("(");
538 
539   while (!ErrorCount && !consume(")")) {
540     SortSectionPolicy Outer = readSortKind();
541     SortSectionPolicy Inner = SortSectionPolicy::Default;
542     std::vector<SectionPattern> V;
543     if (Outer != SortSectionPolicy::Default) {
544       expect("(");
545       Inner = readSortKind();
546       if (Inner != SortSectionPolicy::Default) {
547         expect("(");
548         V = readInputSectionsList();
549         expect(")");
550       } else {
551         V = readInputSectionsList();
552       }
553       expect(")");
554     } else {
555       V = readInputSectionsList();
556     }
557 
558     for (SectionPattern &Pat : V) {
559       Pat.SortInner = Inner;
560       Pat.SortOuter = Outer;
561     }
562 
563     std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns));
564   }
565   return Cmd;
566 }
567 
568 InputSectionDescription *
569 ScriptParser::readInputSectionDescription(StringRef Tok) {
570   // Input section wildcard can be surrounded by KEEP.
571   // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep
572   if (Tok == "KEEP") {
573     expect("(");
574     StringRef FilePattern = next();
575     InputSectionDescription *Cmd = readInputSectionRules(FilePattern);
576     expect(")");
577     Script->Opt.KeptSections.push_back(Cmd);
578     return Cmd;
579   }
580   return readInputSectionRules(Tok);
581 }
582 
583 void ScriptParser::readSort() {
584   expect("(");
585   expect("CONSTRUCTORS");
586   expect(")");
587 }
588 
589 AssertCommand *ScriptParser::readAssert() {
590   return make<AssertCommand>(readAssertExpr());
591 }
592 
593 Expr ScriptParser::readAssertExpr() {
594   expect("(");
595   Expr E = readExpr();
596   expect(",");
597   StringRef Msg = unquote(next());
598   expect(")");
599 
600   return [=] {
601     if (!E().getValue())
602       error(Msg);
603     return Script->getDot();
604   };
605 }
606 
607 // Reads a FILL(expr) command. We handle the FILL command as an
608 // alias for =fillexp section attribute, which is different from
609 // what GNU linkers do.
610 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html
611 uint32_t ScriptParser::readFill() {
612   expect("(");
613   uint32_t V = parseFill(next());
614   expect(")");
615   return V;
616 }
617 
618 // Reads an expression and/or the special directive "(NOLOAD)" for an
619 // output section definition.
620 //
621 // An output section name can be followed by an address expression
622 // and/or by "(NOLOAD)". This grammar is not LL(1) because "(" can be
623 // interpreted as either the beginning of some expression or "(NOLOAD)".
624 //
625 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html
626 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html
627 void ScriptParser::readSectionAddressType(OutputSection *Cmd) {
628   if (consume("(")) {
629     if (consume("NOLOAD")) {
630       expect(")");
631       Cmd->Noload = true;
632       return;
633     }
634     Cmd->AddrExpr = readExpr();
635     expect(")");
636   } else {
637     Cmd->AddrExpr = readExpr();
638   }
639 
640   if (consume("(")) {
641     expect("NOLOAD");
642     expect(")");
643     Cmd->Noload = true;
644   }
645 }
646 
647 OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) {
648   OutputSection *Cmd =
649       Script->createOutputSection(OutSec, getCurrentLocation());
650 
651   if (peek() != ":")
652     readSectionAddressType(Cmd);
653   expect(":");
654 
655   if (consume("AT"))
656     Cmd->LMAExpr = readParenExpr();
657   if (consume("ALIGN"))
658     Cmd->AlignExpr = readParenExpr();
659   if (consume("SUBALIGN"))
660     Cmd->SubalignExpr = readParenExpr();
661 
662   // Parse constraints.
663   if (consume("ONLY_IF_RO"))
664     Cmd->Constraint = ConstraintKind::ReadOnly;
665   if (consume("ONLY_IF_RW"))
666     Cmd->Constraint = ConstraintKind::ReadWrite;
667   expect("{");
668 
669   while (!ErrorCount && !consume("}")) {
670     StringRef Tok = next();
671     if (Tok == ";") {
672       // Empty commands are allowed. Do nothing here.
673     } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) {
674       Cmd->Commands.push_back(Assign);
675     } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) {
676       Cmd->Commands.push_back(Data);
677     } else if (Tok == "ASSERT") {
678       Cmd->Commands.push_back(readAssert());
679       expect(";");
680     } else if (Tok == "CONSTRUCTORS") {
681       // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors
682       // by name. This is for very old file formats such as ECOFF/XCOFF.
683       // For ELF, we should ignore.
684     } else if (Tok == "FILL") {
685       Cmd->Filler = readFill();
686     } else if (Tok == "SORT") {
687       readSort();
688     } else if (peek() == "(") {
689       Cmd->Commands.push_back(readInputSectionDescription(Tok));
690     } else {
691       setError("unknown command " + Tok);
692     }
693   }
694 
695   if (consume(">"))
696     Cmd->MemoryRegionName = next();
697   else if (peek().startswith(">"))
698     Cmd->MemoryRegionName = next().drop_front();
699 
700   Cmd->Phdrs = readOutputSectionPhdrs();
701 
702   if (consume("="))
703     Cmd->Filler = parseFill(next());
704   else if (peek().startswith("="))
705     Cmd->Filler = parseFill(next().drop_front());
706 
707   // Consume optional comma following output section command.
708   consume(",");
709 
710   return Cmd;
711 }
712 
713 // Parses a given string as a octal/decimal/hexadecimal number and
714 // returns it as a big-endian number. Used for `=<fillexp>`.
715 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html
716 //
717 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary
718 // size, while ld.gold always handles it as a 32-bit big-endian number.
719 // We are compatible with ld.gold because it's easier to implement.
720 uint32_t ScriptParser::parseFill(StringRef Tok) {
721   uint32_t V = 0;
722   if (!to_integer(Tok, V))
723     setError("invalid filler expression: " + Tok);
724 
725   uint32_t Buf;
726   write32be(&Buf, V);
727   return Buf;
728 }
729 
730 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) {
731   expect("(");
732   SymbolAssignment *Cmd = readAssignment(next());
733   Cmd->Provide = Provide;
734   Cmd->Hidden = Hidden;
735   expect(")");
736   expect(";");
737   return Cmd;
738 }
739 
740 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) {
741   SymbolAssignment *Cmd = nullptr;
742   if (peek() == "=" || peek() == "+=") {
743     Cmd = readAssignment(Tok);
744     expect(";");
745   } else if (Tok == "PROVIDE") {
746     Cmd = readProvideHidden(true, false);
747   } else if (Tok == "HIDDEN") {
748     Cmd = readProvideHidden(false, true);
749   } else if (Tok == "PROVIDE_HIDDEN") {
750     Cmd = readProvideHidden(true, true);
751   }
752   return Cmd;
753 }
754 
755 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) {
756   StringRef Op = next();
757   assert(Op == "=" || Op == "+=");
758   Expr E = readExpr();
759   if (Op == "+=") {
760     std::string Loc = getCurrentLocation();
761     E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); };
762   }
763   return make<SymbolAssignment>(Name, E, getCurrentLocation());
764 }
765 
766 // This is an operator-precedence parser to parse a linker
767 // script expression.
768 Expr ScriptParser::readExpr() {
769   // Our lexer is context-aware. Set the in-expression bit so that
770   // they apply different tokenization rules.
771   bool Orig = InExpr;
772   InExpr = true;
773   Expr E = readExpr1(readPrimary(), 0);
774   InExpr = Orig;
775   return E;
776 }
777 
778 static Expr combine(StringRef Op, Expr L, Expr R) {
779   if (Op == "+")
780     return [=] { return add(L(), R()); };
781   if (Op == "-")
782     return [=] { return sub(L(), R()); };
783   if (Op == "*")
784     return [=] { return mul(L(), R()); };
785   if (Op == "/")
786     return [=] { return div(L(), R()); };
787   if (Op == "<<")
788     return [=] { return L().getValue() << R().getValue(); };
789   if (Op == ">>")
790     return [=] { return L().getValue() >> R().getValue(); };
791   if (Op == "<")
792     return [=] { return L().getValue() < R().getValue(); };
793   if (Op == ">")
794     return [=] { return L().getValue() > R().getValue(); };
795   if (Op == ">=")
796     return [=] { return L().getValue() >= R().getValue(); };
797   if (Op == "<=")
798     return [=] { return L().getValue() <= R().getValue(); };
799   if (Op == "==")
800     return [=] { return L().getValue() == R().getValue(); };
801   if (Op == "!=")
802     return [=] { return L().getValue() != R().getValue(); };
803   if (Op == "&")
804     return [=] { return bitAnd(L(), R()); };
805   if (Op == "|")
806     return [=] { return bitOr(L(), R()); };
807   llvm_unreachable("invalid operator");
808 }
809 
810 // This is a part of the operator-precedence parser. This function
811 // assumes that the remaining token stream starts with an operator.
812 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) {
813   while (!atEOF() && !ErrorCount) {
814     // Read an operator and an expression.
815     if (consume("?"))
816       return readTernary(Lhs);
817     StringRef Op1 = peek();
818     if (precedence(Op1) < MinPrec)
819       break;
820     skip();
821     Expr Rhs = readPrimary();
822 
823     // Evaluate the remaining part of the expression first if the
824     // next operator has greater precedence than the previous one.
825     // For example, if we have read "+" and "3", and if the next
826     // operator is "*", then we'll evaluate 3 * ... part first.
827     while (!atEOF()) {
828       StringRef Op2 = peek();
829       if (precedence(Op2) <= precedence(Op1))
830         break;
831       Rhs = readExpr1(Rhs, precedence(Op2));
832     }
833 
834     Lhs = combine(Op1, Lhs, Rhs);
835   }
836   return Lhs;
837 }
838 
839 Expr ScriptParser::getPageSize() {
840   std::string Location = getCurrentLocation();
841   return [=]() -> uint64_t {
842     if (Target)
843       return Target->PageSize;
844     error(Location + ": unable to calculate page size");
845     return 4096; // Return a dummy value.
846   };
847 }
848 
849 Expr ScriptParser::readConstant() {
850   StringRef S = readParenLiteral();
851   if (S == "COMMONPAGESIZE")
852     return getPageSize();
853   if (S == "MAXPAGESIZE")
854     return [] { return Config->MaxPageSize; };
855   setError("unknown constant: " + S);
856   return {};
857 }
858 
859 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with
860 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may
861 // have "K" (Ki) or "M" (Mi) suffixes.
862 static Optional<uint64_t> parseInt(StringRef Tok) {
863   // Negative number
864   if (Tok.startswith("-")) {
865     if (Optional<uint64_t> Val = parseInt(Tok.substr(1)))
866       return -*Val;
867     return None;
868   }
869 
870   // Hexadecimal
871   uint64_t Val;
872   if (Tok.startswith_lower("0x") && to_integer(Tok.substr(2), Val, 16))
873     return Val;
874   if (Tok.endswith_lower("H") && to_integer(Tok.drop_back(), Val, 16))
875     return Val;
876 
877   // Decimal
878   if (Tok.endswith_lower("K")) {
879     if (!to_integer(Tok.drop_back(), Val, 10))
880       return None;
881     return Val * 1024;
882   }
883   if (Tok.endswith_lower("M")) {
884     if (!to_integer(Tok.drop_back(), Val, 10))
885       return None;
886     return Val * 1024 * 1024;
887   }
888   if (!to_integer(Tok, Val, 10))
889     return None;
890   return Val;
891 }
892 
893 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) {
894   int Size = StringSwitch<int>(Tok)
895                  .Case("BYTE", 1)
896                  .Case("SHORT", 2)
897                  .Case("LONG", 4)
898                  .Case("QUAD", 8)
899                  .Default(-1);
900   if (Size == -1)
901     return nullptr;
902 
903   return make<BytesDataCommand>(readParenExpr(), Size);
904 }
905 
906 StringRef ScriptParser::readParenLiteral() {
907   expect("(");
908   StringRef Tok = next();
909   expect(")");
910   return Tok;
911 }
912 
913 OutputSection *ScriptParser::checkSection(OutputSection *Cmd,
914                                           StringRef Location) {
915   if (Cmd->Location.empty() && Script->ErrorOnMissingSection)
916     error(Location + ": undefined section " + Cmd->Name);
917   return Cmd;
918 }
919 
920 Expr ScriptParser::readPrimary() {
921   if (peek() == "(")
922     return readParenExpr();
923 
924   if (consume("~")) {
925     Expr E = readPrimary();
926     return [=] { return ~E().getValue(); };
927   }
928   if (consume("!")) {
929     Expr E = readPrimary();
930     return [=] { return !E().getValue(); };
931   }
932   if (consume("-")) {
933     Expr E = readPrimary();
934     return [=] { return -E().getValue(); };
935   }
936 
937   StringRef Tok = next();
938   std::string Location = getCurrentLocation();
939 
940   // Built-in functions are parsed here.
941   // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html.
942   if (Tok == "ABSOLUTE") {
943     Expr Inner = readParenExpr();
944     return [=] {
945       ExprValue I = Inner();
946       I.ForceAbsolute = true;
947       return I;
948     };
949   }
950   if (Tok == "ADDR") {
951     StringRef Name = readParenLiteral();
952     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
953     return [=]() -> ExprValue {
954       return {checkSection(Cmd, Location), 0, Location};
955     };
956   }
957   if (Tok == "ALIGN") {
958     expect("(");
959     Expr E = readExpr();
960     if (consume(")"))
961       return [=] {
962         return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue()));
963       };
964     expect(",");
965     Expr E2 = readExpr();
966     expect(")");
967     return [=] {
968       ExprValue V = E();
969       V.Alignment = std::max((uint64_t)1, E2().getValue());
970       return V;
971     };
972   }
973   if (Tok == "ALIGNOF") {
974     StringRef Name = readParenLiteral();
975     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
976     return [=] { return checkSection(Cmd, Location)->Alignment; };
977   }
978   if (Tok == "ASSERT")
979     return readAssertExpr();
980   if (Tok == "CONSTANT")
981     return readConstant();
982   if (Tok == "DATA_SEGMENT_ALIGN") {
983     expect("(");
984     Expr E = readExpr();
985     expect(",");
986     readExpr();
987     expect(")");
988     return [=] {
989       return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue()));
990     };
991   }
992   if (Tok == "DATA_SEGMENT_END") {
993     expect("(");
994     expect(".");
995     expect(")");
996     return [] { return Script->getDot(); };
997   }
998   if (Tok == "DATA_SEGMENT_RELRO_END") {
999     // GNU linkers implements more complicated logic to handle
1000     // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and
1001     // just align to the next page boundary for simplicity.
1002     expect("(");
1003     readExpr();
1004     expect(",");
1005     readExpr();
1006     expect(")");
1007     Expr E = getPageSize();
1008     return [=] { return alignTo(Script->getDot(), E().getValue()); };
1009   }
1010   if (Tok == "DEFINED") {
1011     StringRef Name = readParenLiteral();
1012     return [=] { return Script->isDefined(Name) ? 1 : 0; };
1013   }
1014   if (Tok == "LENGTH") {
1015     StringRef Name = readParenLiteral();
1016     if (Script->Opt.MemoryRegions.count(Name) == 0)
1017       setError("memory region not defined: " + Name);
1018     return [=] { return Script->Opt.MemoryRegions[Name]->Length; };
1019   }
1020   if (Tok == "LOADADDR") {
1021     StringRef Name = readParenLiteral();
1022     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
1023     return [=] { return checkSection(Cmd, Location)->getLMA(); };
1024   }
1025   if (Tok == "ORIGIN") {
1026     StringRef Name = readParenLiteral();
1027     if (Script->Opt.MemoryRegions.count(Name) == 0)
1028       setError("memory region not defined: " + Name);
1029     return [=] { return Script->Opt.MemoryRegions[Name]->Origin; };
1030   }
1031   if (Tok == "SEGMENT_START") {
1032     expect("(");
1033     skip();
1034     expect(",");
1035     Expr E = readExpr();
1036     expect(")");
1037     return [=] { return E(); };
1038   }
1039   if (Tok == "SIZEOF") {
1040     StringRef Name = readParenLiteral();
1041     OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
1042     // Linker script does not create an output section if its content is empty.
1043     // We want to allow SIZEOF(.foo) where .foo is a section which happened to
1044     // be empty.
1045     return [=] { return Cmd->Size; };
1046   }
1047   if (Tok == "SIZEOF_HEADERS")
1048     return [=] { return elf::getHeaderSize(); };
1049 
1050   // Tok is the dot.
1051   if (Tok == ".")
1052     return [=] { return Script->getSymbolValue(Location, Tok); };
1053 
1054   // Tok is a literal number.
1055   if (Optional<uint64_t> Val = parseInt(Tok))
1056     return [=] { return *Val; };
1057 
1058   // Tok is a symbol name.
1059   if (!isValidCIdentifier(Tok))
1060     setError("malformed number: " + Tok);
1061   Script->Opt.ReferencedSymbols.push_back(Tok);
1062   return [=] { return Script->getSymbolValue(Location, Tok); };
1063 }
1064 
1065 Expr ScriptParser::readTernary(Expr Cond) {
1066   Expr L = readExpr();
1067   expect(":");
1068   Expr R = readExpr();
1069   return [=] { return Cond().getValue() ? L() : R(); };
1070 }
1071 
1072 Expr ScriptParser::readParenExpr() {
1073   expect("(");
1074   Expr E = readExpr();
1075   expect(")");
1076   return E;
1077 }
1078 
1079 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() {
1080   std::vector<StringRef> Phdrs;
1081   while (!ErrorCount && peek().startswith(":")) {
1082     StringRef Tok = next();
1083     Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1));
1084   }
1085   return Phdrs;
1086 }
1087 
1088 // Read a program header type name. The next token must be a
1089 // name of a program header type or a constant (e.g. "0x3").
1090 unsigned ScriptParser::readPhdrType() {
1091   StringRef Tok = next();
1092   if (Optional<uint64_t> Val = parseInt(Tok))
1093     return *Val;
1094 
1095   unsigned Ret = StringSwitch<unsigned>(Tok)
1096                      .Case("PT_NULL", PT_NULL)
1097                      .Case("PT_LOAD", PT_LOAD)
1098                      .Case("PT_DYNAMIC", PT_DYNAMIC)
1099                      .Case("PT_INTERP", PT_INTERP)
1100                      .Case("PT_NOTE", PT_NOTE)
1101                      .Case("PT_SHLIB", PT_SHLIB)
1102                      .Case("PT_PHDR", PT_PHDR)
1103                      .Case("PT_TLS", PT_TLS)
1104                      .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME)
1105                      .Case("PT_GNU_STACK", PT_GNU_STACK)
1106                      .Case("PT_GNU_RELRO", PT_GNU_RELRO)
1107                      .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE)
1108                      .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED)
1109                      .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA)
1110                      .Default(-1);
1111 
1112   if (Ret == (unsigned)-1) {
1113     setError("invalid program header type: " + Tok);
1114     return PT_NULL;
1115   }
1116   return Ret;
1117 }
1118 
1119 // Reads an anonymous version declaration.
1120 void ScriptParser::readAnonymousDeclaration() {
1121   std::vector<SymbolVersion> Locals;
1122   std::vector<SymbolVersion> Globals;
1123   std::tie(Locals, Globals) = readSymbols();
1124 
1125   for (SymbolVersion V : Locals) {
1126     if (V.Name == "*")
1127       Config->DefaultSymbolVersion = VER_NDX_LOCAL;
1128     else
1129       Config->VersionScriptLocals.push_back(V);
1130   }
1131 
1132   for (SymbolVersion V : Globals)
1133     Config->VersionScriptGlobals.push_back(V);
1134 
1135   expect(";");
1136 }
1137 
1138 // Reads a non-anonymous version definition,
1139 // e.g. "VerStr { global: foo; bar; local: *; };".
1140 void ScriptParser::readVersionDeclaration(StringRef VerStr) {
1141   // Read a symbol list.
1142   std::vector<SymbolVersion> Locals;
1143   std::vector<SymbolVersion> Globals;
1144   std::tie(Locals, Globals) = readSymbols();
1145 
1146   for (SymbolVersion V : Locals) {
1147     if (V.Name == "*")
1148       Config->DefaultSymbolVersion = VER_NDX_LOCAL;
1149     else
1150       Config->VersionScriptLocals.push_back(V);
1151   }
1152 
1153   // Create a new version definition and add that to the global symbols.
1154   VersionDefinition Ver;
1155   Ver.Name = VerStr;
1156   Ver.Globals = Globals;
1157 
1158   // User-defined version number starts from 2 because 0 and 1 are
1159   // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively.
1160   Ver.Id = Config->VersionDefinitions.size() + 2;
1161   Config->VersionDefinitions.push_back(Ver);
1162 
1163   // Each version may have a parent version. For example, "Ver2"
1164   // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1"
1165   // as a parent. This version hierarchy is, probably against your
1166   // instinct, purely for hint; the runtime doesn't care about it
1167   // at all. In LLD, we simply ignore it.
1168   if (peek() != ";")
1169     skip();
1170   expect(";");
1171 }
1172 
1173 static bool hasWildcard(StringRef S) {
1174   return S.find_first_of("?*[") != StringRef::npos;
1175 }
1176 
1177 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };".
1178 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
1179 ScriptParser::readSymbols() {
1180   std::vector<SymbolVersion> Locals;
1181   std::vector<SymbolVersion> Globals;
1182   std::vector<SymbolVersion> *V = &Globals;
1183 
1184   while (!ErrorCount) {
1185     if (consume("}"))
1186       break;
1187     if (consumeLabel("local")) {
1188       V = &Locals;
1189       continue;
1190     }
1191     if (consumeLabel("global")) {
1192       V = &Globals;
1193       continue;
1194     }
1195 
1196     if (consume("extern")) {
1197       std::vector<SymbolVersion> Ext = readVersionExtern();
1198       V->insert(V->end(), Ext.begin(), Ext.end());
1199     } else {
1200       StringRef Tok = next();
1201       V->push_back({unquote(Tok), false, hasWildcard(Tok)});
1202     }
1203     expect(";");
1204   }
1205   return {Locals, Globals};
1206 }
1207 
1208 // Reads an "extern C++" directive, e.g.,
1209 // "extern "C++" { ns::*; "f(int, double)"; };"
1210 std::vector<SymbolVersion> ScriptParser::readVersionExtern() {
1211   StringRef Tok = next();
1212   bool IsCXX = Tok == "\"C++\"";
1213   if (!IsCXX && Tok != "\"C\"")
1214     setError("Unknown language");
1215   expect("{");
1216 
1217   std::vector<SymbolVersion> Ret;
1218   while (!ErrorCount && peek() != "}") {
1219     StringRef Tok = next();
1220     bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok);
1221     Ret.push_back({unquote(Tok), IsCXX, HasWildcard});
1222     expect(";");
1223   }
1224 
1225   expect("}");
1226   return Ret;
1227 }
1228 
1229 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2,
1230                                             StringRef S3) {
1231   if (!consume(S1) && !consume(S2) && !consume(S3)) {
1232     setError("expected one of: " + S1 + ", " + S2 + ", or " + S3);
1233     return 0;
1234   }
1235   expect("=");
1236   return readExpr()().getValue();
1237 }
1238 
1239 // Parse the MEMORY command as specified in:
1240 // https://sourceware.org/binutils/docs/ld/MEMORY.html
1241 //
1242 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... }
1243 void ScriptParser::readMemory() {
1244   expect("{");
1245   while (!ErrorCount && !consume("}")) {
1246     StringRef Name = next();
1247 
1248     uint32_t Flags = 0;
1249     uint32_t NegFlags = 0;
1250     if (consume("(")) {
1251       std::tie(Flags, NegFlags) = readMemoryAttributes();
1252       expect(")");
1253     }
1254     expect(":");
1255 
1256     uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o");
1257     expect(",");
1258     uint64_t Length = readMemoryAssignment("LENGTH", "len", "l");
1259 
1260     // Add the memory region to the region map.
1261     if (Script->Opt.MemoryRegions.count(Name))
1262       setError("region '" + Name + "' already defined");
1263     MemoryRegion *MR = make<MemoryRegion>();
1264     *MR = {Name, Origin, Length, Flags, NegFlags};
1265     Script->Opt.MemoryRegions[Name] = MR;
1266   }
1267 }
1268 
1269 // This function parses the attributes used to match against section
1270 // flags when placing output sections in a memory region. These flags
1271 // are only used when an explicit memory region name is not used.
1272 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() {
1273   uint32_t Flags = 0;
1274   uint32_t NegFlags = 0;
1275   bool Invert = false;
1276 
1277   for (char C : next().lower()) {
1278     uint32_t Flag = 0;
1279     if (C == '!')
1280       Invert = !Invert;
1281     else if (C == 'w')
1282       Flag = SHF_WRITE;
1283     else if (C == 'x')
1284       Flag = SHF_EXECINSTR;
1285     else if (C == 'a')
1286       Flag = SHF_ALLOC;
1287     else if (C != 'r')
1288       setError("invalid memory region attribute");
1289 
1290     if (Invert)
1291       NegFlags |= Flag;
1292     else
1293       Flags |= Flag;
1294   }
1295   return {Flags, NegFlags};
1296 }
1297 
1298 void elf::readLinkerScript(MemoryBufferRef MB) {
1299   ScriptParser(MB).readLinkerScript();
1300 }
1301 
1302 void elf::readVersionScript(MemoryBufferRef MB) {
1303   ScriptParser(MB).readVersionScript();
1304 }
1305 
1306 void elf::readDynamicList(MemoryBufferRef MB) {
1307   ScriptParser(MB).readDynamicList();
1308 }
1309