1 //===- ScriptParser.cpp ---------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a recursive-descendent parser for linker scripts.
10 // Parsed results are stored to Config and Script global objects.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "ScriptParser.h"
15 #include "Config.h"
16 #include "Driver.h"
17 #include "InputSection.h"
18 #include "LinkerScript.h"
19 #include "OutputSections.h"
20 #include "ScriptLexer.h"
21 #include "Symbols.h"
22 #include "Target.h"
23 #include "lld/Common/Memory.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSet.h"
27 #include "llvm/ADT/StringSwitch.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/FileSystem.h"
32 #include "llvm/Support/MathExtras.h"
33 #include "llvm/Support/Path.h"
34 #include "llvm/Support/ScopedPrinter.h"
35 #include "llvm/Support/TimeProfiler.h"
36 #include <cassert>
37 #include <limits>
38 #include <vector>
39
40 using namespace llvm;
41 using namespace llvm::ELF;
42 using namespace llvm::support::endian;
43 using namespace lld;
44 using namespace lld::elf;
45
46 namespace {
47 class ScriptParser final : ScriptLexer {
48 public:
ScriptParser(MemoryBufferRef mb)49 ScriptParser(MemoryBufferRef mb) : ScriptLexer(mb) {
50 // Initialize IsUnderSysroot
51 if (config->sysroot == "")
52 return;
53 StringRef path = mb.getBufferIdentifier();
54 for (; !path.empty(); path = sys::path::parent_path(path)) {
55 if (!sys::fs::equivalent(config->sysroot, path))
56 continue;
57 isUnderSysroot = true;
58 return;
59 }
60 }
61
62 void readLinkerScript();
63 void readVersionScript();
64 void readDynamicList();
65 void readDefsym(StringRef name);
66
67 private:
68 void addFile(StringRef path);
69
70 void readAsNeeded();
71 void readEntry();
72 void readExtern();
73 void readGroup();
74 void readInclude();
75 void readInput();
76 void readMemory();
77 void readOutput();
78 void readOutputArch();
79 void readOutputFormat();
80 void readOverwriteSections();
81 void readPhdrs();
82 void readRegionAlias();
83 void readSearchDir();
84 void readSections();
85 void readTarget();
86 void readVersion();
87 void readVersionScriptCommand();
88
89 SymbolAssignment *readSymbolAssignment(StringRef name);
90 ByteCommand *readByteCommand(StringRef tok);
91 std::array<uint8_t, 4> readFill();
92 bool readSectionDirective(OutputSection *cmd, StringRef tok1, StringRef tok2);
93 void readSectionAddressType(OutputSection *cmd);
94 OutputSection *readOverlaySectionDescription();
95 OutputSection *readOutputSectionDescription(StringRef outSec);
96 std::vector<BaseCommand *> readOverlay();
97 std::vector<StringRef> readOutputSectionPhdrs();
98 std::pair<uint64_t, uint64_t> readInputSectionFlags();
99 InputSectionDescription *readInputSectionDescription(StringRef tok);
100 StringMatcher readFilePatterns();
101 std::vector<SectionPattern> readInputSectionsList();
102 InputSectionDescription *readInputSectionRules(StringRef filePattern,
103 uint64_t withFlags,
104 uint64_t withoutFlags);
105 unsigned readPhdrType();
106 SortSectionPolicy peekSortKind();
107 SortSectionPolicy readSortKind();
108 SymbolAssignment *readProvideHidden(bool provide, bool hidden);
109 SymbolAssignment *readAssignment(StringRef tok);
110 void readSort();
111 Expr readAssert();
112 Expr readConstant();
113 Expr getPageSize();
114
115 Expr readMemoryAssignment(StringRef, StringRef, StringRef);
116 std::pair<uint32_t, uint32_t> readMemoryAttributes();
117
118 Expr combine(StringRef op, Expr l, Expr r);
119 Expr readExpr();
120 Expr readExpr1(Expr lhs, int minPrec);
121 StringRef readParenLiteral();
122 Expr readPrimary();
123 Expr readTernary(Expr cond);
124 Expr readParenExpr();
125
126 // For parsing version script.
127 std::vector<SymbolVersion> readVersionExtern();
128 void readAnonymousDeclaration();
129 void readVersionDeclaration(StringRef verStr);
130
131 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
132 readSymbols();
133
134 // True if a script being read is in a subdirectory specified by -sysroot.
135 bool isUnderSysroot = false;
136
137 // A set to detect an INCLUDE() cycle.
138 StringSet<> seen;
139 };
140 } // namespace
141
unquote(StringRef s)142 static StringRef unquote(StringRef s) {
143 if (s.startswith("\""))
144 return s.substr(1, s.size() - 2);
145 return s;
146 }
147
148 // Some operations only support one non absolute value. Move the
149 // absolute one to the right hand side for convenience.
moveAbsRight(ExprValue & a,ExprValue & b)150 static void moveAbsRight(ExprValue &a, ExprValue &b) {
151 if (a.sec == nullptr || (a.forceAbsolute && !b.isAbsolute()))
152 std::swap(a, b);
153 if (!b.isAbsolute())
154 error(a.loc + ": at least one side of the expression must be absolute");
155 }
156
add(ExprValue a,ExprValue b)157 static ExprValue add(ExprValue a, ExprValue b) {
158 moveAbsRight(a, b);
159 return {a.sec, a.forceAbsolute, a.getSectionOffset() + b.getValue(), a.loc};
160 }
161
sub(ExprValue a,ExprValue b)162 static ExprValue sub(ExprValue a, ExprValue b) {
163 // The distance between two symbols in sections is absolute.
164 if (!a.isAbsolute() && !b.isAbsolute())
165 return a.getValue() - b.getValue();
166 return {a.sec, false, a.getSectionOffset() - b.getValue(), a.loc};
167 }
168
bitAnd(ExprValue a,ExprValue b)169 static ExprValue bitAnd(ExprValue a, ExprValue b) {
170 moveAbsRight(a, b);
171 return {a.sec, a.forceAbsolute,
172 (a.getValue() & b.getValue()) - a.getSecAddr(), a.loc};
173 }
174
bitOr(ExprValue a,ExprValue b)175 static ExprValue bitOr(ExprValue a, ExprValue b) {
176 moveAbsRight(a, b);
177 return {a.sec, a.forceAbsolute,
178 (a.getValue() | b.getValue()) - a.getSecAddr(), a.loc};
179 }
180
readDynamicList()181 void ScriptParser::readDynamicList() {
182 expect("{");
183 std::vector<SymbolVersion> locals;
184 std::vector<SymbolVersion> globals;
185 std::tie(locals, globals) = readSymbols();
186 expect(";");
187
188 if (!atEOF()) {
189 setError("EOF expected, but got " + next());
190 return;
191 }
192 if (!locals.empty()) {
193 setError("\"local:\" scope not supported in --dynamic-list");
194 return;
195 }
196
197 for (SymbolVersion v : globals)
198 config->dynamicList.push_back(v);
199 }
200
readVersionScript()201 void ScriptParser::readVersionScript() {
202 readVersionScriptCommand();
203 if (!atEOF())
204 setError("EOF expected, but got " + next());
205 }
206
readVersionScriptCommand()207 void ScriptParser::readVersionScriptCommand() {
208 if (consume("{")) {
209 readAnonymousDeclaration();
210 return;
211 }
212
213 while (!atEOF() && !errorCount() && peek() != "}") {
214 StringRef verStr = next();
215 if (verStr == "{") {
216 setError("anonymous version definition is used in "
217 "combination with other version definitions");
218 return;
219 }
220 expect("{");
221 readVersionDeclaration(verStr);
222 }
223 }
224
readVersion()225 void ScriptParser::readVersion() {
226 expect("{");
227 readVersionScriptCommand();
228 expect("}");
229 }
230
readLinkerScript()231 void ScriptParser::readLinkerScript() {
232 while (!atEOF()) {
233 StringRef tok = next();
234 if (tok == ";")
235 continue;
236
237 if (tok == "ENTRY") {
238 readEntry();
239 } else if (tok == "EXTERN") {
240 readExtern();
241 } else if (tok == "GROUP") {
242 readGroup();
243 } else if (tok == "INCLUDE") {
244 readInclude();
245 } else if (tok == "INPUT") {
246 readInput();
247 } else if (tok == "MEMORY") {
248 readMemory();
249 } else if (tok == "OUTPUT") {
250 readOutput();
251 } else if (tok == "OUTPUT_ARCH") {
252 readOutputArch();
253 } else if (tok == "OUTPUT_FORMAT") {
254 readOutputFormat();
255 } else if (tok == "OVERWRITE_SECTIONS") {
256 readOverwriteSections();
257 } else if (tok == "PHDRS") {
258 readPhdrs();
259 } else if (tok == "REGION_ALIAS") {
260 readRegionAlias();
261 } else if (tok == "SEARCH_DIR") {
262 readSearchDir();
263 } else if (tok == "SECTIONS") {
264 readSections();
265 } else if (tok == "TARGET") {
266 readTarget();
267 } else if (tok == "VERSION") {
268 readVersion();
269 } else if (SymbolAssignment *cmd = readAssignment(tok)) {
270 script->sectionCommands.push_back(cmd);
271 } else {
272 setError("unknown directive: " + tok);
273 }
274 }
275 }
276
readDefsym(StringRef name)277 void ScriptParser::readDefsym(StringRef name) {
278 if (errorCount())
279 return;
280 Expr e = readExpr();
281 if (!atEOF())
282 setError("EOF expected, but got " + next());
283 SymbolAssignment *cmd = make<SymbolAssignment>(name, e, getCurrentLocation());
284 script->sectionCommands.push_back(cmd);
285 }
286
addFile(StringRef s)287 void ScriptParser::addFile(StringRef s) {
288 if (isUnderSysroot && s.startswith("/")) {
289 SmallString<128> pathData;
290 StringRef path = (config->sysroot + s).toStringRef(pathData);
291 if (sys::fs::exists(path))
292 driver->addFile(saver.save(path), /*withLOption=*/false);
293 else
294 setError("cannot find " + s + " inside " + config->sysroot);
295 return;
296 }
297
298 if (s.startswith("/")) {
299 // Case 1: s is an absolute path. Just open it.
300 driver->addFile(s, /*withLOption=*/false);
301 } else if (s.startswith("=")) {
302 // Case 2: relative to the sysroot.
303 if (config->sysroot.empty())
304 driver->addFile(s.substr(1), /*withLOption=*/false);
305 else
306 driver->addFile(saver.save(config->sysroot + "/" + s.substr(1)),
307 /*withLOption=*/false);
308 } else if (s.startswith("-l")) {
309 // Case 3: search in the list of library paths.
310 driver->addLibrary(s.substr(2));
311 } else {
312 // Case 4: s is a relative path. Search in the directory of the script file.
313 std::string filename = std::string(getCurrentMB().getBufferIdentifier());
314 StringRef directory = sys::path::parent_path(filename);
315 if (!directory.empty()) {
316 SmallString<0> path(directory);
317 sys::path::append(path, s);
318 if (sys::fs::exists(path)) {
319 driver->addFile(path, /*withLOption=*/false);
320 return;
321 }
322 }
323 // Then search in the current working directory.
324 if (sys::fs::exists(s)) {
325 driver->addFile(s, /*withLOption=*/false);
326 } else {
327 // Finally, search in the list of library paths.
328 if (Optional<std::string> path = findFromSearchPaths(s))
329 driver->addFile(saver.save(*path), /*withLOption=*/true);
330 else
331 setError("unable to find " + s);
332 }
333 }
334 }
335
readAsNeeded()336 void ScriptParser::readAsNeeded() {
337 expect("(");
338 bool orig = config->asNeeded;
339 config->asNeeded = true;
340 while (!errorCount() && !consume(")"))
341 addFile(unquote(next()));
342 config->asNeeded = orig;
343 }
344
readEntry()345 void ScriptParser::readEntry() {
346 // -e <symbol> takes predecence over ENTRY(<symbol>).
347 expect("(");
348 StringRef tok = next();
349 if (config->entry.empty())
350 config->entry = tok;
351 expect(")");
352 }
353
readExtern()354 void ScriptParser::readExtern() {
355 expect("(");
356 while (!errorCount() && !consume(")"))
357 config->undefined.push_back(unquote(next()));
358 }
359
readGroup()360 void ScriptParser::readGroup() {
361 bool orig = InputFile::isInGroup;
362 InputFile::isInGroup = true;
363 readInput();
364 InputFile::isInGroup = orig;
365 if (!orig)
366 ++InputFile::nextGroupId;
367 }
368
readInclude()369 void ScriptParser::readInclude() {
370 StringRef tok = unquote(next());
371
372 if (!seen.insert(tok).second) {
373 setError("there is a cycle in linker script INCLUDEs");
374 return;
375 }
376
377 if (Optional<std::string> path = searchScript(tok)) {
378 if (Optional<MemoryBufferRef> mb = readFile(*path))
379 tokenize(*mb);
380 return;
381 }
382 setError("cannot find linker script " + tok);
383 }
384
readInput()385 void ScriptParser::readInput() {
386 expect("(");
387 while (!errorCount() && !consume(")")) {
388 if (consume("AS_NEEDED"))
389 readAsNeeded();
390 else
391 addFile(unquote(next()));
392 }
393 }
394
readOutput()395 void ScriptParser::readOutput() {
396 // -o <file> takes predecence over OUTPUT(<file>).
397 expect("(");
398 StringRef tok = next();
399 if (config->outputFile.empty())
400 config->outputFile = unquote(tok);
401 expect(")");
402 }
403
readOutputArch()404 void ScriptParser::readOutputArch() {
405 // OUTPUT_ARCH is ignored for now.
406 expect("(");
407 while (!errorCount() && !consume(")"))
408 skip();
409 }
410
parseBfdName(StringRef s)411 static std::pair<ELFKind, uint16_t> parseBfdName(StringRef s) {
412 return StringSwitch<std::pair<ELFKind, uint16_t>>(s)
413 .Case("elf32-i386", {ELF32LEKind, EM_386})
414 .Case("elf32-iamcu", {ELF32LEKind, EM_IAMCU})
415 .Case("elf32-littlearm", {ELF32LEKind, EM_ARM})
416 .Case("elf32-x86-64", {ELF32LEKind, EM_X86_64})
417 .Case("elf64-aarch64", {ELF64LEKind, EM_AARCH64})
418 .Case("elf64-littleaarch64", {ELF64LEKind, EM_AARCH64})
419 .Case("elf64-bigaarch64", {ELF64BEKind, EM_AARCH64})
420 .Case("elf32-powerpc", {ELF32BEKind, EM_PPC})
421 .Case("elf32-powerpcle", {ELF32LEKind, EM_PPC})
422 .Case("elf64-powerpc", {ELF64BEKind, EM_PPC64})
423 .Case("elf64-powerpcle", {ELF64LEKind, EM_PPC64})
424 .Case("elf64-x86-64", {ELF64LEKind, EM_X86_64})
425 .Cases("elf32-tradbigmips", "elf32-bigmips", {ELF32BEKind, EM_MIPS})
426 .Case("elf32-ntradbigmips", {ELF32BEKind, EM_MIPS})
427 .Case("elf32-tradlittlemips", {ELF32LEKind, EM_MIPS})
428 .Case("elf32-ntradlittlemips", {ELF32LEKind, EM_MIPS})
429 .Case("elf64-tradbigmips", {ELF64BEKind, EM_MIPS})
430 .Case("elf64-tradlittlemips", {ELF64LEKind, EM_MIPS})
431 .Case("elf32-littleriscv", {ELF32LEKind, EM_RISCV})
432 .Case("elf64-littleriscv", {ELF64LEKind, EM_RISCV})
433 .Case("elf64-sparc", {ELF64BEKind, EM_SPARCV9})
434 .Case("elf32-msp430", {ELF32LEKind, EM_MSP430})
435 .Default({ELFNoneKind, EM_NONE});
436 }
437
438 // Parse OUTPUT_FORMAT(bfdname) or OUTPUT_FORMAT(default, big, little). Choose
439 // big if -EB is specified, little if -EL is specified, or default if neither is
440 // specified.
readOutputFormat()441 void ScriptParser::readOutputFormat() {
442 expect("(");
443
444 StringRef s;
445 config->bfdname = unquote(next());
446 if (!consume(")")) {
447 expect(",");
448 s = unquote(next());
449 if (config->optEB)
450 config->bfdname = s;
451 expect(",");
452 s = unquote(next());
453 if (config->optEL)
454 config->bfdname = s;
455 consume(")");
456 }
457 s = config->bfdname;
458 if (s.consume_back("-freebsd"))
459 config->osabi = ELFOSABI_FREEBSD;
460
461 std::tie(config->ekind, config->emachine) = parseBfdName(s);
462 if (config->emachine == EM_NONE)
463 setError("unknown output format name: " + config->bfdname);
464 if (s == "elf32-ntradlittlemips" || s == "elf32-ntradbigmips")
465 config->mipsN32Abi = true;
466 if (config->emachine == EM_MSP430)
467 config->osabi = ELFOSABI_STANDALONE;
468 }
469
readPhdrs()470 void ScriptParser::readPhdrs() {
471 expect("{");
472
473 while (!errorCount() && !consume("}")) {
474 PhdrsCommand cmd;
475 cmd.name = next();
476 cmd.type = readPhdrType();
477
478 while (!errorCount() && !consume(";")) {
479 if (consume("FILEHDR"))
480 cmd.hasFilehdr = true;
481 else if (consume("PHDRS"))
482 cmd.hasPhdrs = true;
483 else if (consume("AT"))
484 cmd.lmaExpr = readParenExpr();
485 else if (consume("FLAGS"))
486 cmd.flags = readParenExpr()().getValue();
487 else
488 setError("unexpected header attribute: " + next());
489 }
490
491 script->phdrsCommands.push_back(cmd);
492 }
493 }
494
readRegionAlias()495 void ScriptParser::readRegionAlias() {
496 expect("(");
497 StringRef alias = unquote(next());
498 expect(",");
499 StringRef name = next();
500 expect(")");
501
502 if (script->memoryRegions.count(alias))
503 setError("redefinition of memory region '" + alias + "'");
504 if (!script->memoryRegions.count(name))
505 setError("memory region '" + name + "' is not defined");
506 script->memoryRegions.insert({alias, script->memoryRegions[name]});
507 }
508
readSearchDir()509 void ScriptParser::readSearchDir() {
510 expect("(");
511 StringRef tok = next();
512 if (!config->nostdlib)
513 config->searchPaths.push_back(unquote(tok));
514 expect(")");
515 }
516
517 // This reads an overlay description. Overlays are used to describe output
518 // sections that use the same virtual memory range and normally would trigger
519 // linker's sections sanity check failures.
520 // https://sourceware.org/binutils/docs/ld/Overlay-Description.html#Overlay-Description
readOverlay()521 std::vector<BaseCommand *> ScriptParser::readOverlay() {
522 // VA and LMA expressions are optional, though for simplicity of
523 // implementation we assume they are not. That is what OVERLAY was designed
524 // for first of all: to allow sections with overlapping VAs at different LMAs.
525 Expr addrExpr = readExpr();
526 expect(":");
527 expect("AT");
528 Expr lmaExpr = readParenExpr();
529 expect("{");
530
531 std::vector<BaseCommand *> v;
532 OutputSection *prev = nullptr;
533 while (!errorCount() && !consume("}")) {
534 // VA is the same for all sections. The LMAs are consecutive in memory
535 // starting from the base load address specified.
536 OutputSection *os = readOverlaySectionDescription();
537 os->addrExpr = addrExpr;
538 if (prev)
539 os->lmaExpr = [=] { return prev->getLMA() + prev->size; };
540 else
541 os->lmaExpr = lmaExpr;
542 v.push_back(os);
543 prev = os;
544 }
545
546 // According to the specification, at the end of the overlay, the location
547 // counter should be equal to the overlay base address plus size of the
548 // largest section seen in the overlay.
549 // Here we want to create the Dot assignment command to achieve that.
550 Expr moveDot = [=] {
551 uint64_t max = 0;
552 for (BaseCommand *cmd : v)
553 max = std::max(max, cast<OutputSection>(cmd)->size);
554 return addrExpr().getValue() + max;
555 };
556 v.push_back(make<SymbolAssignment>(".", moveDot, getCurrentLocation()));
557 return v;
558 }
559
readOverwriteSections()560 void ScriptParser::readOverwriteSections() {
561 expect("{");
562 while (!errorCount() && !consume("}"))
563 script->overwriteSections.push_back(readOutputSectionDescription(next()));
564 }
565
readSections()566 void ScriptParser::readSections() {
567 expect("{");
568 std::vector<BaseCommand *> v;
569 while (!errorCount() && !consume("}")) {
570 StringRef tok = next();
571 if (tok == "OVERLAY") {
572 for (BaseCommand *cmd : readOverlay())
573 v.push_back(cmd);
574 continue;
575 } else if (tok == "INCLUDE") {
576 readInclude();
577 continue;
578 }
579
580 if (BaseCommand *cmd = readAssignment(tok))
581 v.push_back(cmd);
582 else
583 v.push_back(readOutputSectionDescription(tok));
584 }
585 script->sectionCommands.insert(script->sectionCommands.end(), v.begin(),
586 v.end());
587
588 if (atEOF() || !consume("INSERT")) {
589 script->hasSectionsCommand = true;
590 return;
591 }
592
593 bool isAfter = false;
594 if (consume("AFTER"))
595 isAfter = true;
596 else if (!consume("BEFORE"))
597 setError("expected AFTER/BEFORE, but got '" + next() + "'");
598 StringRef where = next();
599 std::vector<StringRef> names;
600 for (BaseCommand *cmd : v)
601 if (auto *os = dyn_cast<OutputSection>(cmd))
602 names.push_back(os->name);
603 if (!names.empty())
604 script->insertCommands.push_back({std::move(names), isAfter, where});
605 }
606
readTarget()607 void ScriptParser::readTarget() {
608 // TARGET(foo) is an alias for "--format foo". Unlike GNU linkers,
609 // we accept only a limited set of BFD names (i.e. "elf" or "binary")
610 // for --format. We recognize only /^elf/ and "binary" in the linker
611 // script as well.
612 expect("(");
613 StringRef tok = next();
614 expect(")");
615
616 if (tok.startswith("elf"))
617 config->formatBinary = false;
618 else if (tok == "binary")
619 config->formatBinary = true;
620 else
621 setError("unknown target: " + tok);
622 }
623
precedence(StringRef op)624 static int precedence(StringRef op) {
625 return StringSwitch<int>(op)
626 .Cases("*", "/", "%", 8)
627 .Cases("+", "-", 7)
628 .Cases("<<", ">>", 6)
629 .Cases("<", "<=", ">", ">=", "==", "!=", 5)
630 .Case("&", 4)
631 .Case("|", 3)
632 .Case("&&", 2)
633 .Case("||", 1)
634 .Default(-1);
635 }
636
readFilePatterns()637 StringMatcher ScriptParser::readFilePatterns() {
638 StringMatcher Matcher;
639
640 while (!errorCount() && !consume(")"))
641 Matcher.addPattern(SingleStringMatcher(next()));
642 return Matcher;
643 }
644
peekSortKind()645 SortSectionPolicy ScriptParser::peekSortKind() {
646 return StringSwitch<SortSectionPolicy>(peek())
647 .Cases("SORT", "SORT_BY_NAME", SortSectionPolicy::Name)
648 .Case("SORT_BY_ALIGNMENT", SortSectionPolicy::Alignment)
649 .Case("SORT_BY_INIT_PRIORITY", SortSectionPolicy::Priority)
650 .Case("SORT_NONE", SortSectionPolicy::None)
651 .Default(SortSectionPolicy::Default);
652 }
653
readSortKind()654 SortSectionPolicy ScriptParser::readSortKind() {
655 SortSectionPolicy ret = peekSortKind();
656 if (ret != SortSectionPolicy::Default)
657 skip();
658 return ret;
659 }
660
661 // Reads SECTIONS command contents in the following form:
662 //
663 // <contents> ::= <elem>*
664 // <elem> ::= <exclude>? <glob-pattern>
665 // <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")"
666 //
667 // For example,
668 //
669 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz)
670 //
671 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o".
672 // The semantics of that is section .foo in any file, section .bar in
673 // any file but a.o, and section .baz in any file but b.o.
readInputSectionsList()674 std::vector<SectionPattern> ScriptParser::readInputSectionsList() {
675 std::vector<SectionPattern> ret;
676 while (!errorCount() && peek() != ")") {
677 StringMatcher excludeFilePat;
678 if (consume("EXCLUDE_FILE")) {
679 expect("(");
680 excludeFilePat = readFilePatterns();
681 }
682
683 StringMatcher SectionMatcher;
684 // Break if the next token is ), EXCLUDE_FILE, or SORT*.
685 while (!errorCount() && peek() != ")" && peek() != "EXCLUDE_FILE" &&
686 peekSortKind() == SortSectionPolicy::Default)
687 SectionMatcher.addPattern(unquote(next()));
688
689 if (!SectionMatcher.empty())
690 ret.push_back({std::move(excludeFilePat), std::move(SectionMatcher)});
691 else if (excludeFilePat.empty())
692 break;
693 else
694 setError("section pattern is expected");
695 }
696 return ret;
697 }
698
699 // Reads contents of "SECTIONS" directive. That directive contains a
700 // list of glob patterns for input sections. The grammar is as follows.
701 //
702 // <patterns> ::= <section-list>
703 // | <sort> "(" <section-list> ")"
704 // | <sort> "(" <sort> "(" <section-list> ")" ")"
705 //
706 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT"
707 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE"
708 //
709 // <section-list> is parsed by readInputSectionsList().
710 InputSectionDescription *
readInputSectionRules(StringRef filePattern,uint64_t withFlags,uint64_t withoutFlags)711 ScriptParser::readInputSectionRules(StringRef filePattern, uint64_t withFlags,
712 uint64_t withoutFlags) {
713 auto *cmd =
714 make<InputSectionDescription>(filePattern, withFlags, withoutFlags);
715 expect("(");
716
717 while (!errorCount() && !consume(")")) {
718 SortSectionPolicy outer = readSortKind();
719 SortSectionPolicy inner = SortSectionPolicy::Default;
720 std::vector<SectionPattern> v;
721 if (outer != SortSectionPolicy::Default) {
722 expect("(");
723 inner = readSortKind();
724 if (inner != SortSectionPolicy::Default) {
725 expect("(");
726 v = readInputSectionsList();
727 expect(")");
728 } else {
729 v = readInputSectionsList();
730 }
731 expect(")");
732 } else {
733 v = readInputSectionsList();
734 }
735
736 for (SectionPattern &pat : v) {
737 pat.sortInner = inner;
738 pat.sortOuter = outer;
739 }
740
741 std::move(v.begin(), v.end(), std::back_inserter(cmd->sectionPatterns));
742 }
743 return cmd;
744 }
745
746 InputSectionDescription *
readInputSectionDescription(StringRef tok)747 ScriptParser::readInputSectionDescription(StringRef tok) {
748 // Input section wildcard can be surrounded by KEEP.
749 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep
750 uint64_t withFlags = 0;
751 uint64_t withoutFlags = 0;
752 if (tok == "KEEP") {
753 expect("(");
754 if (consume("INPUT_SECTION_FLAGS"))
755 std::tie(withFlags, withoutFlags) = readInputSectionFlags();
756 InputSectionDescription *cmd =
757 readInputSectionRules(next(), withFlags, withoutFlags);
758 expect(")");
759 script->keptSections.push_back(cmd);
760 return cmd;
761 }
762 if (tok == "INPUT_SECTION_FLAGS") {
763 std::tie(withFlags, withoutFlags) = readInputSectionFlags();
764 tok = next();
765 }
766 return readInputSectionRules(tok, withFlags, withoutFlags);
767 }
768
readSort()769 void ScriptParser::readSort() {
770 expect("(");
771 expect("CONSTRUCTORS");
772 expect(")");
773 }
774
readAssert()775 Expr ScriptParser::readAssert() {
776 expect("(");
777 Expr e = readExpr();
778 expect(",");
779 StringRef msg = unquote(next());
780 expect(")");
781
782 return [=] {
783 if (!e().getValue())
784 errorOrWarn(msg);
785 return script->getDot();
786 };
787 }
788
789 // Tries to read the special directive for an output section definition which
790 // can be one of following: "(NOLOAD)", "(COPY)", "(INFO)" or "(OVERLAY)".
791 // Tok1 and Tok2 are next 2 tokens peeked. See comment for readSectionAddressType below.
readSectionDirective(OutputSection * cmd,StringRef tok1,StringRef tok2)792 bool ScriptParser::readSectionDirective(OutputSection *cmd, StringRef tok1, StringRef tok2) {
793 if (tok1 != "(")
794 return false;
795 if (tok2 != "NOLOAD" && tok2 != "COPY" && tok2 != "INFO" && tok2 != "OVERLAY")
796 return false;
797
798 expect("(");
799 if (consume("NOLOAD")) {
800 cmd->noload = true;
801 cmd->type = SHT_NOBITS;
802 } else {
803 skip(); // This is "COPY", "INFO" or "OVERLAY".
804 cmd->nonAlloc = true;
805 }
806 expect(")");
807 return true;
808 }
809
810 // Reads an expression and/or the special directive for an output
811 // section definition. Directive is one of following: "(NOLOAD)",
812 // "(COPY)", "(INFO)" or "(OVERLAY)".
813 //
814 // An output section name can be followed by an address expression
815 // and/or directive. This grammar is not LL(1) because "(" can be
816 // interpreted as either the beginning of some expression or beginning
817 // of directive.
818 //
819 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html
820 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html
readSectionAddressType(OutputSection * cmd)821 void ScriptParser::readSectionAddressType(OutputSection *cmd) {
822 if (readSectionDirective(cmd, peek(), peek2()))
823 return;
824
825 cmd->addrExpr = readExpr();
826 if (peek() == "(" && !readSectionDirective(cmd, "(", peek2()))
827 setError("unknown section directive: " + peek2());
828 }
829
checkAlignment(Expr e,std::string & loc)830 static Expr checkAlignment(Expr e, std::string &loc) {
831 return [=] {
832 uint64_t alignment = std::max((uint64_t)1, e().getValue());
833 if (!isPowerOf2_64(alignment)) {
834 error(loc + ": alignment must be power of 2");
835 return (uint64_t)1; // Return a dummy value.
836 }
837 return alignment;
838 };
839 }
840
readOverlaySectionDescription()841 OutputSection *ScriptParser::readOverlaySectionDescription() {
842 OutputSection *cmd =
843 script->createOutputSection(next(), getCurrentLocation());
844 cmd->inOverlay = true;
845 expect("{");
846 while (!errorCount() && !consume("}")) {
847 uint64_t withFlags = 0;
848 uint64_t withoutFlags = 0;
849 if (consume("INPUT_SECTION_FLAGS"))
850 std::tie(withFlags, withoutFlags) = readInputSectionFlags();
851 cmd->sectionCommands.push_back(
852 readInputSectionRules(next(), withFlags, withoutFlags));
853 }
854 return cmd;
855 }
856
readOutputSectionDescription(StringRef outSec)857 OutputSection *ScriptParser::readOutputSectionDescription(StringRef outSec) {
858 OutputSection *cmd =
859 script->createOutputSection(outSec, getCurrentLocation());
860
861 size_t symbolsReferenced = script->referencedSymbols.size();
862
863 if (peek() != ":")
864 readSectionAddressType(cmd);
865 expect(":");
866
867 std::string location = getCurrentLocation();
868 if (consume("AT"))
869 cmd->lmaExpr = readParenExpr();
870 if (consume("ALIGN"))
871 cmd->alignExpr = checkAlignment(readParenExpr(), location);
872 if (consume("SUBALIGN"))
873 cmd->subalignExpr = checkAlignment(readParenExpr(), location);
874
875 // Parse constraints.
876 if (consume("ONLY_IF_RO"))
877 cmd->constraint = ConstraintKind::ReadOnly;
878 if (consume("ONLY_IF_RW"))
879 cmd->constraint = ConstraintKind::ReadWrite;
880 expect("{");
881
882 while (!errorCount() && !consume("}")) {
883 StringRef tok = next();
884 if (tok == ";") {
885 // Empty commands are allowed. Do nothing here.
886 } else if (SymbolAssignment *assign = readAssignment(tok)) {
887 cmd->sectionCommands.push_back(assign);
888 } else if (ByteCommand *data = readByteCommand(tok)) {
889 cmd->sectionCommands.push_back(data);
890 } else if (tok == "CONSTRUCTORS") {
891 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors
892 // by name. This is for very old file formats such as ECOFF/XCOFF.
893 // For ELF, we should ignore.
894 } else if (tok == "FILL") {
895 // We handle the FILL command as an alias for =fillexp section attribute,
896 // which is different from what GNU linkers do.
897 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html
898 if (peek() != "(")
899 setError("( expected, but got " + peek());
900 cmd->filler = readFill();
901 } else if (tok == "SORT") {
902 readSort();
903 } else if (tok == "INCLUDE") {
904 readInclude();
905 } else if (peek() == "(") {
906 cmd->sectionCommands.push_back(readInputSectionDescription(tok));
907 } else {
908 // We have a file name and no input sections description. It is not a
909 // commonly used syntax, but still acceptable. In that case, all sections
910 // from the file will be included.
911 // FIXME: GNU ld permits INPUT_SECTION_FLAGS to be used here. We do not
912 // handle this case here as it will already have been matched by the
913 // case above.
914 auto *isd = make<InputSectionDescription>(tok);
915 isd->sectionPatterns.push_back({{}, StringMatcher("*")});
916 cmd->sectionCommands.push_back(isd);
917 }
918 }
919
920 if (consume(">"))
921 cmd->memoryRegionName = std::string(next());
922
923 if (consume("AT")) {
924 expect(">");
925 cmd->lmaRegionName = std::string(next());
926 }
927
928 if (cmd->lmaExpr && !cmd->lmaRegionName.empty())
929 error("section can't have both LMA and a load region");
930
931 cmd->phdrs = readOutputSectionPhdrs();
932
933 if (peek() == "=" || peek().startswith("=")) {
934 inExpr = true;
935 consume("=");
936 cmd->filler = readFill();
937 inExpr = false;
938 }
939
940 // Consume optional comma following output section command.
941 consume(",");
942
943 if (script->referencedSymbols.size() > symbolsReferenced)
944 cmd->expressionsUseSymbols = true;
945 return cmd;
946 }
947
948 // Reads a `=<fillexp>` expression and returns its value as a big-endian number.
949 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html
950 // We do not support using symbols in such expressions.
951 //
952 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary
953 // size, while ld.gold always handles it as a 32-bit big-endian number.
954 // We are compatible with ld.gold because it's easier to implement.
955 // Also, we require that expressions with operators must be wrapped into
956 // round brackets. We did it to resolve the ambiguity when parsing scripts like:
957 // SECTIONS { .foo : { ... } =120+3 /DISCARD/ : { ... } }
readFill()958 std::array<uint8_t, 4> ScriptParser::readFill() {
959 uint64_t value = readPrimary()().val;
960 if (value > UINT32_MAX)
961 setError("filler expression result does not fit 32-bit: 0x" +
962 Twine::utohexstr(value));
963
964 std::array<uint8_t, 4> buf;
965 write32be(buf.data(), (uint32_t)value);
966 return buf;
967 }
968
readProvideHidden(bool provide,bool hidden)969 SymbolAssignment *ScriptParser::readProvideHidden(bool provide, bool hidden) {
970 expect("(");
971 SymbolAssignment *cmd = readSymbolAssignment(next());
972 cmd->provide = provide;
973 cmd->hidden = hidden;
974 expect(")");
975 return cmd;
976 }
977
readAssignment(StringRef tok)978 SymbolAssignment *ScriptParser::readAssignment(StringRef tok) {
979 // Assert expression returns Dot, so this is equal to ".=."
980 if (tok == "ASSERT")
981 return make<SymbolAssignment>(".", readAssert(), getCurrentLocation());
982
983 size_t oldPos = pos;
984 SymbolAssignment *cmd = nullptr;
985 if (peek() == "=" || peek() == "+=")
986 cmd = readSymbolAssignment(tok);
987 else if (tok == "PROVIDE")
988 cmd = readProvideHidden(true, false);
989 else if (tok == "HIDDEN")
990 cmd = readProvideHidden(false, true);
991 else if (tok == "PROVIDE_HIDDEN")
992 cmd = readProvideHidden(true, true);
993
994 if (cmd) {
995 cmd->commandString =
996 tok.str() + " " +
997 llvm::join(tokens.begin() + oldPos, tokens.begin() + pos, " ");
998 expect(";");
999 }
1000 return cmd;
1001 }
1002
readSymbolAssignment(StringRef name)1003 SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef name) {
1004 name = unquote(name);
1005 StringRef op = next();
1006 assert(op == "=" || op == "+=");
1007 Expr e = readExpr();
1008 if (op == "+=") {
1009 std::string loc = getCurrentLocation();
1010 e = [=] { return add(script->getSymbolValue(name, loc), e()); };
1011 }
1012 return make<SymbolAssignment>(name, e, getCurrentLocation());
1013 }
1014
1015 // This is an operator-precedence parser to parse a linker
1016 // script expression.
readExpr()1017 Expr ScriptParser::readExpr() {
1018 // Our lexer is context-aware. Set the in-expression bit so that
1019 // they apply different tokenization rules.
1020 bool orig = inExpr;
1021 inExpr = true;
1022 Expr e = readExpr1(readPrimary(), 0);
1023 inExpr = orig;
1024 return e;
1025 }
1026
combine(StringRef op,Expr l,Expr r)1027 Expr ScriptParser::combine(StringRef op, Expr l, Expr r) {
1028 if (op == "+")
1029 return [=] { return add(l(), r()); };
1030 if (op == "-")
1031 return [=] { return sub(l(), r()); };
1032 if (op == "*")
1033 return [=] { return l().getValue() * r().getValue(); };
1034 if (op == "/") {
1035 std::string loc = getCurrentLocation();
1036 return [=]() -> uint64_t {
1037 if (uint64_t rv = r().getValue())
1038 return l().getValue() / rv;
1039 error(loc + ": division by zero");
1040 return 0;
1041 };
1042 }
1043 if (op == "%") {
1044 std::string loc = getCurrentLocation();
1045 return [=]() -> uint64_t {
1046 if (uint64_t rv = r().getValue())
1047 return l().getValue() % rv;
1048 error(loc + ": modulo by zero");
1049 return 0;
1050 };
1051 }
1052 if (op == "<<")
1053 return [=] { return l().getValue() << r().getValue(); };
1054 if (op == ">>")
1055 return [=] { return l().getValue() >> r().getValue(); };
1056 if (op == "<")
1057 return [=] { return l().getValue() < r().getValue(); };
1058 if (op == ">")
1059 return [=] { return l().getValue() > r().getValue(); };
1060 if (op == ">=")
1061 return [=] { return l().getValue() >= r().getValue(); };
1062 if (op == "<=")
1063 return [=] { return l().getValue() <= r().getValue(); };
1064 if (op == "==")
1065 return [=] { return l().getValue() == r().getValue(); };
1066 if (op == "!=")
1067 return [=] { return l().getValue() != r().getValue(); };
1068 if (op == "||")
1069 return [=] { return l().getValue() || r().getValue(); };
1070 if (op == "&&")
1071 return [=] { return l().getValue() && r().getValue(); };
1072 if (op == "&")
1073 return [=] { return bitAnd(l(), r()); };
1074 if (op == "|")
1075 return [=] { return bitOr(l(), r()); };
1076 llvm_unreachable("invalid operator");
1077 }
1078
1079 // This is a part of the operator-precedence parser. This function
1080 // assumes that the remaining token stream starts with an operator.
readExpr1(Expr lhs,int minPrec)1081 Expr ScriptParser::readExpr1(Expr lhs, int minPrec) {
1082 while (!atEOF() && !errorCount()) {
1083 // Read an operator and an expression.
1084 if (consume("?"))
1085 return readTernary(lhs);
1086 StringRef op1 = peek();
1087 if (precedence(op1) < minPrec)
1088 break;
1089 skip();
1090 Expr rhs = readPrimary();
1091
1092 // Evaluate the remaining part of the expression first if the
1093 // next operator has greater precedence than the previous one.
1094 // For example, if we have read "+" and "3", and if the next
1095 // operator is "*", then we'll evaluate 3 * ... part first.
1096 while (!atEOF()) {
1097 StringRef op2 = peek();
1098 if (precedence(op2) <= precedence(op1))
1099 break;
1100 rhs = readExpr1(rhs, precedence(op2));
1101 }
1102
1103 lhs = combine(op1, lhs, rhs);
1104 }
1105 return lhs;
1106 }
1107
getPageSize()1108 Expr ScriptParser::getPageSize() {
1109 std::string location = getCurrentLocation();
1110 return [=]() -> uint64_t {
1111 if (target)
1112 return config->commonPageSize;
1113 error(location + ": unable to calculate page size");
1114 return 4096; // Return a dummy value.
1115 };
1116 }
1117
readConstant()1118 Expr ScriptParser::readConstant() {
1119 StringRef s = readParenLiteral();
1120 if (s == "COMMONPAGESIZE")
1121 return getPageSize();
1122 if (s == "MAXPAGESIZE")
1123 return [] { return config->maxPageSize; };
1124 setError("unknown constant: " + s);
1125 return [] { return 0; };
1126 }
1127
1128 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with
1129 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may
1130 // have "K" (Ki) or "M" (Mi) suffixes.
parseInt(StringRef tok)1131 static Optional<uint64_t> parseInt(StringRef tok) {
1132 // Hexadecimal
1133 uint64_t val;
1134 if (tok.startswith_insensitive("0x")) {
1135 if (!to_integer(tok.substr(2), val, 16))
1136 return None;
1137 return val;
1138 }
1139 if (tok.endswith_insensitive("H")) {
1140 if (!to_integer(tok.drop_back(), val, 16))
1141 return None;
1142 return val;
1143 }
1144
1145 // Decimal
1146 if (tok.endswith_insensitive("K")) {
1147 if (!to_integer(tok.drop_back(), val, 10))
1148 return None;
1149 return val * 1024;
1150 }
1151 if (tok.endswith_insensitive("M")) {
1152 if (!to_integer(tok.drop_back(), val, 10))
1153 return None;
1154 return val * 1024 * 1024;
1155 }
1156 if (!to_integer(tok, val, 10))
1157 return None;
1158 return val;
1159 }
1160
readByteCommand(StringRef tok)1161 ByteCommand *ScriptParser::readByteCommand(StringRef tok) {
1162 int size = StringSwitch<int>(tok)
1163 .Case("BYTE", 1)
1164 .Case("SHORT", 2)
1165 .Case("LONG", 4)
1166 .Case("QUAD", 8)
1167 .Default(-1);
1168 if (size == -1)
1169 return nullptr;
1170
1171 size_t oldPos = pos;
1172 Expr e = readParenExpr();
1173 std::string commandString =
1174 tok.str() + " " +
1175 llvm::join(tokens.begin() + oldPos, tokens.begin() + pos, " ");
1176 return make<ByteCommand>(e, size, commandString);
1177 }
1178
parseFlag(StringRef tok)1179 static llvm::Optional<uint64_t> parseFlag(StringRef tok) {
1180 if (llvm::Optional<uint64_t> asInt = parseInt(tok))
1181 return asInt;
1182 #define CASE_ENT(enum) #enum, ELF::enum
1183 return StringSwitch<llvm::Optional<uint64_t>>(tok)
1184 .Case(CASE_ENT(SHF_WRITE))
1185 .Case(CASE_ENT(SHF_ALLOC))
1186 .Case(CASE_ENT(SHF_EXECINSTR))
1187 .Case(CASE_ENT(SHF_MERGE))
1188 .Case(CASE_ENT(SHF_STRINGS))
1189 .Case(CASE_ENT(SHF_INFO_LINK))
1190 .Case(CASE_ENT(SHF_LINK_ORDER))
1191 .Case(CASE_ENT(SHF_OS_NONCONFORMING))
1192 .Case(CASE_ENT(SHF_GROUP))
1193 .Case(CASE_ENT(SHF_TLS))
1194 .Case(CASE_ENT(SHF_COMPRESSED))
1195 .Case(CASE_ENT(SHF_EXCLUDE))
1196 .Case(CASE_ENT(SHF_ARM_PURECODE))
1197 .Default(None);
1198 #undef CASE_ENT
1199 }
1200
1201 // Reads the '(' <flags> ')' list of section flags in
1202 // INPUT_SECTION_FLAGS '(' <flags> ')' in the
1203 // following form:
1204 // <flags> ::= <flag>
1205 // | <flags> & flag
1206 // <flag> ::= Recognized Flag Name, or Integer value of flag.
1207 // If the first character of <flag> is a ! then this means without flag,
1208 // otherwise with flag.
1209 // Example: SHF_EXECINSTR & !SHF_WRITE means with flag SHF_EXECINSTR and
1210 // without flag SHF_WRITE.
readInputSectionFlags()1211 std::pair<uint64_t, uint64_t> ScriptParser::readInputSectionFlags() {
1212 uint64_t withFlags = 0;
1213 uint64_t withoutFlags = 0;
1214 expect("(");
1215 while (!errorCount()) {
1216 StringRef tok = unquote(next());
1217 bool without = tok.consume_front("!");
1218 if (llvm::Optional<uint64_t> flag = parseFlag(tok)) {
1219 if (without)
1220 withoutFlags |= *flag;
1221 else
1222 withFlags |= *flag;
1223 } else {
1224 setError("unrecognised flag: " + tok);
1225 }
1226 if (consume(")"))
1227 break;
1228 if (!consume("&")) {
1229 next();
1230 setError("expected & or )");
1231 }
1232 }
1233 return std::make_pair(withFlags, withoutFlags);
1234 }
1235
readParenLiteral()1236 StringRef ScriptParser::readParenLiteral() {
1237 expect("(");
1238 bool orig = inExpr;
1239 inExpr = false;
1240 StringRef tok = next();
1241 inExpr = orig;
1242 expect(")");
1243 return tok;
1244 }
1245
checkIfExists(OutputSection * cmd,StringRef location)1246 static void checkIfExists(OutputSection *cmd, StringRef location) {
1247 if (cmd->location.empty() && script->errorOnMissingSection)
1248 error(location + ": undefined section " + cmd->name);
1249 }
1250
isValidSymbolName(StringRef s)1251 static bool isValidSymbolName(StringRef s) {
1252 auto valid = [](char c) {
1253 return isAlnum(c) || c == '$' || c == '.' || c == '_';
1254 };
1255 return !s.empty() && !isDigit(s[0]) && llvm::all_of(s, valid);
1256 }
1257
readPrimary()1258 Expr ScriptParser::readPrimary() {
1259 if (peek() == "(")
1260 return readParenExpr();
1261
1262 if (consume("~")) {
1263 Expr e = readPrimary();
1264 return [=] { return ~e().getValue(); };
1265 }
1266 if (consume("!")) {
1267 Expr e = readPrimary();
1268 return [=] { return !e().getValue(); };
1269 }
1270 if (consume("-")) {
1271 Expr e = readPrimary();
1272 return [=] { return -e().getValue(); };
1273 }
1274
1275 StringRef tok = next();
1276 std::string location = getCurrentLocation();
1277
1278 // Built-in functions are parsed here.
1279 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html.
1280 if (tok == "ABSOLUTE") {
1281 Expr inner = readParenExpr();
1282 return [=] {
1283 ExprValue i = inner();
1284 i.forceAbsolute = true;
1285 return i;
1286 };
1287 }
1288 if (tok == "ADDR") {
1289 StringRef name = readParenLiteral();
1290 OutputSection *sec = script->getOrCreateOutputSection(name);
1291 sec->usedInExpression = true;
1292 return [=]() -> ExprValue {
1293 checkIfExists(sec, location);
1294 return {sec, false, 0, location};
1295 };
1296 }
1297 if (tok == "ALIGN") {
1298 expect("(");
1299 Expr e = readExpr();
1300 if (consume(")")) {
1301 e = checkAlignment(e, location);
1302 return [=] { return alignTo(script->getDot(), e().getValue()); };
1303 }
1304 expect(",");
1305 Expr e2 = checkAlignment(readExpr(), location);
1306 expect(")");
1307 return [=] {
1308 ExprValue v = e();
1309 v.alignment = e2().getValue();
1310 return v;
1311 };
1312 }
1313 if (tok == "ALIGNOF") {
1314 StringRef name = readParenLiteral();
1315 OutputSection *cmd = script->getOrCreateOutputSection(name);
1316 return [=] {
1317 checkIfExists(cmd, location);
1318 return cmd->alignment;
1319 };
1320 }
1321 if (tok == "ASSERT")
1322 return readAssert();
1323 if (tok == "CONSTANT")
1324 return readConstant();
1325 if (tok == "DATA_SEGMENT_ALIGN") {
1326 expect("(");
1327 Expr e = readExpr();
1328 expect(",");
1329 readExpr();
1330 expect(")");
1331 return [=] {
1332 return alignTo(script->getDot(), std::max((uint64_t)1, e().getValue()));
1333 };
1334 }
1335 if (tok == "DATA_SEGMENT_END") {
1336 expect("(");
1337 expect(".");
1338 expect(")");
1339 return [] { return script->getDot(); };
1340 }
1341 if (tok == "DATA_SEGMENT_RELRO_END") {
1342 // GNU linkers implements more complicated logic to handle
1343 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and
1344 // just align to the next page boundary for simplicity.
1345 expect("(");
1346 readExpr();
1347 expect(",");
1348 readExpr();
1349 expect(")");
1350 Expr e = getPageSize();
1351 return [=] { return alignTo(script->getDot(), e().getValue()); };
1352 }
1353 if (tok == "DEFINED") {
1354 StringRef name = unquote(readParenLiteral());
1355 return [=] {
1356 Symbol *b = symtab->find(name);
1357 return (b && b->isDefined()) ? 1 : 0;
1358 };
1359 }
1360 if (tok == "LENGTH") {
1361 StringRef name = readParenLiteral();
1362 if (script->memoryRegions.count(name) == 0) {
1363 setError("memory region not defined: " + name);
1364 return [] { return 0; };
1365 }
1366 return script->memoryRegions[name]->length;
1367 }
1368 if (tok == "LOADADDR") {
1369 StringRef name = readParenLiteral();
1370 OutputSection *cmd = script->getOrCreateOutputSection(name);
1371 cmd->usedInExpression = true;
1372 return [=] {
1373 checkIfExists(cmd, location);
1374 return cmd->getLMA();
1375 };
1376 }
1377 if (tok == "LOG2CEIL") {
1378 expect("(");
1379 Expr a = readExpr();
1380 expect(")");
1381 return [=] {
1382 // LOG2CEIL(0) is defined to be 0.
1383 return llvm::Log2_64_Ceil(std::max(a().getValue(), UINT64_C(1)));
1384 };
1385 }
1386 if (tok == "MAX" || tok == "MIN") {
1387 expect("(");
1388 Expr a = readExpr();
1389 expect(",");
1390 Expr b = readExpr();
1391 expect(")");
1392 if (tok == "MIN")
1393 return [=] { return std::min(a().getValue(), b().getValue()); };
1394 return [=] { return std::max(a().getValue(), b().getValue()); };
1395 }
1396 if (tok == "ORIGIN") {
1397 StringRef name = readParenLiteral();
1398 if (script->memoryRegions.count(name) == 0) {
1399 setError("memory region not defined: " + name);
1400 return [] { return 0; };
1401 }
1402 return script->memoryRegions[name]->origin;
1403 }
1404 if (tok == "SEGMENT_START") {
1405 expect("(");
1406 skip();
1407 expect(",");
1408 Expr e = readExpr();
1409 expect(")");
1410 return [=] { return e(); };
1411 }
1412 if (tok == "SIZEOF") {
1413 StringRef name = readParenLiteral();
1414 OutputSection *cmd = script->getOrCreateOutputSection(name);
1415 // Linker script does not create an output section if its content is empty.
1416 // We want to allow SIZEOF(.foo) where .foo is a section which happened to
1417 // be empty.
1418 return [=] { return cmd->size; };
1419 }
1420 if (tok == "SIZEOF_HEADERS")
1421 return [=] { return elf::getHeaderSize(); };
1422
1423 // Tok is the dot.
1424 if (tok == ".")
1425 return [=] { return script->getSymbolValue(tok, location); };
1426
1427 // Tok is a literal number.
1428 if (Optional<uint64_t> val = parseInt(tok))
1429 return [=] { return *val; };
1430
1431 // Tok is a symbol name.
1432 tok = unquote(tok);
1433 if (!isValidSymbolName(tok))
1434 setError("malformed number: " + tok);
1435 script->referencedSymbols.push_back(tok);
1436 return [=] { return script->getSymbolValue(tok, location); };
1437 }
1438
readTernary(Expr cond)1439 Expr ScriptParser::readTernary(Expr cond) {
1440 Expr l = readExpr();
1441 expect(":");
1442 Expr r = readExpr();
1443 return [=] { return cond().getValue() ? l() : r(); };
1444 }
1445
readParenExpr()1446 Expr ScriptParser::readParenExpr() {
1447 expect("(");
1448 Expr e = readExpr();
1449 expect(")");
1450 return e;
1451 }
1452
readOutputSectionPhdrs()1453 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() {
1454 std::vector<StringRef> phdrs;
1455 while (!errorCount() && peek().startswith(":")) {
1456 StringRef tok = next();
1457 phdrs.push_back((tok.size() == 1) ? next() : tok.substr(1));
1458 }
1459 return phdrs;
1460 }
1461
1462 // Read a program header type name. The next token must be a
1463 // name of a program header type or a constant (e.g. "0x3").
readPhdrType()1464 unsigned ScriptParser::readPhdrType() {
1465 StringRef tok = next();
1466 if (Optional<uint64_t> val = parseInt(tok))
1467 return *val;
1468
1469 unsigned ret = StringSwitch<unsigned>(tok)
1470 .Case("PT_NULL", PT_NULL)
1471 .Case("PT_LOAD", PT_LOAD)
1472 .Case("PT_DYNAMIC", PT_DYNAMIC)
1473 .Case("PT_INTERP", PT_INTERP)
1474 .Case("PT_NOTE", PT_NOTE)
1475 .Case("PT_SHLIB", PT_SHLIB)
1476 .Case("PT_PHDR", PT_PHDR)
1477 .Case("PT_TLS", PT_TLS)
1478 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME)
1479 .Case("PT_GNU_STACK", PT_GNU_STACK)
1480 .Case("PT_GNU_RELRO", PT_GNU_RELRO)
1481 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE)
1482 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED)
1483 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA)
1484 .Default(-1);
1485
1486 if (ret == (unsigned)-1) {
1487 setError("invalid program header type: " + tok);
1488 return PT_NULL;
1489 }
1490 return ret;
1491 }
1492
1493 // Reads an anonymous version declaration.
readAnonymousDeclaration()1494 void ScriptParser::readAnonymousDeclaration() {
1495 std::vector<SymbolVersion> locals;
1496 std::vector<SymbolVersion> globals;
1497 std::tie(locals, globals) = readSymbols();
1498 for (const SymbolVersion &pat : locals)
1499 config->versionDefinitions[VER_NDX_LOCAL].localPatterns.push_back(pat);
1500 for (const SymbolVersion &pat : globals)
1501 config->versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back(pat);
1502
1503 expect(";");
1504 }
1505
1506 // Reads a non-anonymous version definition,
1507 // e.g. "VerStr { global: foo; bar; local: *; };".
readVersionDeclaration(StringRef verStr)1508 void ScriptParser::readVersionDeclaration(StringRef verStr) {
1509 // Read a symbol list.
1510 std::vector<SymbolVersion> locals;
1511 std::vector<SymbolVersion> globals;
1512 std::tie(locals, globals) = readSymbols();
1513
1514 // Create a new version definition and add that to the global symbols.
1515 VersionDefinition ver;
1516 ver.name = verStr;
1517 ver.nonLocalPatterns = std::move(globals);
1518 ver.localPatterns = std::move(locals);
1519 ver.id = config->versionDefinitions.size();
1520 config->versionDefinitions.push_back(ver);
1521
1522 // Each version may have a parent version. For example, "Ver2"
1523 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1"
1524 // as a parent. This version hierarchy is, probably against your
1525 // instinct, purely for hint; the runtime doesn't care about it
1526 // at all. In LLD, we simply ignore it.
1527 if (next() != ";")
1528 expect(";");
1529 }
1530
hasWildcard(StringRef s)1531 bool elf::hasWildcard(StringRef s) {
1532 return s.find_first_of("?*[") != StringRef::npos;
1533 }
1534
1535 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };".
1536 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
readSymbols()1537 ScriptParser::readSymbols() {
1538 std::vector<SymbolVersion> locals;
1539 std::vector<SymbolVersion> globals;
1540 std::vector<SymbolVersion> *v = &globals;
1541
1542 while (!errorCount()) {
1543 if (consume("}"))
1544 break;
1545 if (consumeLabel("local")) {
1546 v = &locals;
1547 continue;
1548 }
1549 if (consumeLabel("global")) {
1550 v = &globals;
1551 continue;
1552 }
1553
1554 if (consume("extern")) {
1555 std::vector<SymbolVersion> ext = readVersionExtern();
1556 v->insert(v->end(), ext.begin(), ext.end());
1557 } else {
1558 StringRef tok = next();
1559 v->push_back({unquote(tok), false, hasWildcard(tok)});
1560 }
1561 expect(";");
1562 }
1563 return {locals, globals};
1564 }
1565
1566 // Reads an "extern C++" directive, e.g.,
1567 // "extern "C++" { ns::*; "f(int, double)"; };"
1568 //
1569 // The last semicolon is optional. E.g. this is OK:
1570 // "extern "C++" { ns::*; "f(int, double)" };"
readVersionExtern()1571 std::vector<SymbolVersion> ScriptParser::readVersionExtern() {
1572 StringRef tok = next();
1573 bool isCXX = tok == "\"C++\"";
1574 if (!isCXX && tok != "\"C\"")
1575 setError("Unknown language");
1576 expect("{");
1577
1578 std::vector<SymbolVersion> ret;
1579 while (!errorCount() && peek() != "}") {
1580 StringRef tok = next();
1581 ret.push_back(
1582 {unquote(tok), isCXX, !tok.startswith("\"") && hasWildcard(tok)});
1583 if (consume("}"))
1584 return ret;
1585 expect(";");
1586 }
1587
1588 expect("}");
1589 return ret;
1590 }
1591
readMemoryAssignment(StringRef s1,StringRef s2,StringRef s3)1592 Expr ScriptParser::readMemoryAssignment(StringRef s1, StringRef s2,
1593 StringRef s3) {
1594 if (!consume(s1) && !consume(s2) && !consume(s3)) {
1595 setError("expected one of: " + s1 + ", " + s2 + ", or " + s3);
1596 return [] { return 0; };
1597 }
1598 expect("=");
1599 return readExpr();
1600 }
1601
1602 // Parse the MEMORY command as specified in:
1603 // https://sourceware.org/binutils/docs/ld/MEMORY.html
1604 //
1605 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... }
readMemory()1606 void ScriptParser::readMemory() {
1607 expect("{");
1608 while (!errorCount() && !consume("}")) {
1609 StringRef tok = next();
1610 if (tok == "INCLUDE") {
1611 readInclude();
1612 continue;
1613 }
1614
1615 uint32_t flags = 0;
1616 uint32_t negFlags = 0;
1617 if (consume("(")) {
1618 std::tie(flags, negFlags) = readMemoryAttributes();
1619 expect(")");
1620 }
1621 expect(":");
1622
1623 Expr origin = readMemoryAssignment("ORIGIN", "org", "o");
1624 expect(",");
1625 Expr length = readMemoryAssignment("LENGTH", "len", "l");
1626
1627 // Add the memory region to the region map.
1628 MemoryRegion *mr = make<MemoryRegion>(tok, origin, length, flags, negFlags);
1629 if (!script->memoryRegions.insert({tok, mr}).second)
1630 setError("region '" + tok + "' already defined");
1631 }
1632 }
1633
1634 // This function parses the attributes used to match against section
1635 // flags when placing output sections in a memory region. These flags
1636 // are only used when an explicit memory region name is not used.
readMemoryAttributes()1637 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() {
1638 uint32_t flags = 0;
1639 uint32_t negFlags = 0;
1640 bool invert = false;
1641
1642 for (char c : next().lower()) {
1643 uint32_t flag = 0;
1644 if (c == '!')
1645 invert = !invert;
1646 else if (c == 'w')
1647 flag = SHF_WRITE;
1648 else if (c == 'x')
1649 flag = SHF_EXECINSTR;
1650 else if (c == 'a')
1651 flag = SHF_ALLOC;
1652 else if (c != 'r')
1653 setError("invalid memory region attribute");
1654
1655 if (invert)
1656 negFlags |= flag;
1657 else
1658 flags |= flag;
1659 }
1660 return {flags, negFlags};
1661 }
1662
readLinkerScript(MemoryBufferRef mb)1663 void elf::readLinkerScript(MemoryBufferRef mb) {
1664 llvm::TimeTraceScope timeScope("Read linker script",
1665 mb.getBufferIdentifier());
1666 ScriptParser(mb).readLinkerScript();
1667 }
1668
readVersionScript(MemoryBufferRef mb)1669 void elf::readVersionScript(MemoryBufferRef mb) {
1670 llvm::TimeTraceScope timeScope("Read version script",
1671 mb.getBufferIdentifier());
1672 ScriptParser(mb).readVersionScript();
1673 }
1674
readDynamicList(MemoryBufferRef mb)1675 void elf::readDynamicList(MemoryBufferRef mb) {
1676 llvm::TimeTraceScope timeScope("Read dynamic list", mb.getBufferIdentifier());
1677 ScriptParser(mb).readDynamicList();
1678 }
1679
readDefsym(StringRef name,MemoryBufferRef mb)1680 void elf::readDefsym(StringRef name, MemoryBufferRef mb) {
1681 llvm::TimeTraceScope timeScope("Read defsym input", name);
1682 ScriptParser(mb).readDefsym(name);
1683 }
1684