1 //===- ScriptParser.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a recursive-descendent parser for linker scripts. 11 // Parsed results are stored to Config and Script global objects. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ScriptParser.h" 16 #include "Config.h" 17 #include "Driver.h" 18 #include "InputSection.h" 19 #include "LinkerScript.h" 20 #include "Memory.h" 21 #include "OutputSections.h" 22 #include "ScriptLexer.h" 23 #include "Symbols.h" 24 #include "Target.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/StringSwitch.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/Support/Casting.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/FileSystem.h" 32 #include "llvm/Support/Path.h" 33 #include <cassert> 34 #include <limits> 35 #include <vector> 36 37 using namespace llvm; 38 using namespace llvm::ELF; 39 using namespace llvm::support::endian; 40 using namespace lld; 41 using namespace lld::elf; 42 43 static bool isUnderSysroot(StringRef Path); 44 45 namespace { 46 class ScriptParser final : ScriptLexer { 47 public: 48 ScriptParser(MemoryBufferRef MB) 49 : ScriptLexer(MB), 50 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 51 52 void readLinkerScript(); 53 void readVersionScript(); 54 void readDynamicList(); 55 56 private: 57 void addFile(StringRef Path); 58 OutputSection *checkSection(OutputSectionCommand *Cmd, StringRef Loccation); 59 60 void readAsNeeded(); 61 void readEntry(); 62 void readExtern(); 63 void readGroup(); 64 void readInclude(); 65 void readMemory(); 66 void readOutput(); 67 void readOutputArch(); 68 void readOutputFormat(); 69 void readPhdrs(); 70 void readSearchDir(); 71 void readSections(); 72 void readVersion(); 73 void readVersionScriptCommand(); 74 75 SymbolAssignment *readAssignment(StringRef Name); 76 BytesDataCommand *readBytesDataCommand(StringRef Tok); 77 uint32_t readFill(); 78 uint32_t parseFill(StringRef Tok); 79 void readSectionAddressType(OutputSectionCommand *Cmd); 80 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 81 std::vector<StringRef> readOutputSectionPhdrs(); 82 InputSectionDescription *readInputSectionDescription(StringRef Tok); 83 StringMatcher readFilePatterns(); 84 std::vector<SectionPattern> readInputSectionsList(); 85 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 86 unsigned readPhdrType(); 87 SortSectionPolicy readSortKind(); 88 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 89 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 90 void readSort(); 91 AssertCommand *readAssert(); 92 Expr readAssertExpr(); 93 94 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 95 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 96 97 Expr readExpr(); 98 Expr readExpr1(Expr Lhs, int MinPrec); 99 StringRef readParenLiteral(); 100 Expr readPrimary(); 101 Expr readTernary(Expr Cond); 102 Expr readParenExpr(); 103 104 // For parsing version script. 105 std::vector<SymbolVersion> readVersionExtern(); 106 void readAnonymousDeclaration(); 107 void readVersionDeclaration(StringRef VerStr); 108 109 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 110 readSymbols(); 111 112 bool IsUnderSysroot; 113 }; 114 } // namespace 115 116 static StringRef unquote(StringRef S) { 117 if (S.startswith("\"")) 118 return S.substr(1, S.size() - 2); 119 return S; 120 } 121 122 static bool isUnderSysroot(StringRef Path) { 123 if (Config->Sysroot == "") 124 return false; 125 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 126 if (sys::fs::equivalent(Config->Sysroot, Path)) 127 return true; 128 return false; 129 } 130 131 // Some operations only support one non absolute value. Move the 132 // absolute one to the right hand side for convenience. 133 static void moveAbsRight(ExprValue &A, ExprValue &B) { 134 if (A.isAbsolute()) 135 std::swap(A, B); 136 if (!B.isAbsolute()) 137 error(A.Loc + ": at least one side of the expression must be absolute"); 138 } 139 140 static ExprValue add(ExprValue A, ExprValue B) { 141 moveAbsRight(A, B); 142 return {A.Sec, A.ForceAbsolute, A.Val + B.getValue(), A.Loc}; 143 } 144 145 static ExprValue sub(ExprValue A, ExprValue B) { 146 return {A.Sec, A.Val - B.getValue(), A.Loc}; 147 } 148 149 static ExprValue mul(ExprValue A, ExprValue B) { 150 return A.getValue() * B.getValue(); 151 } 152 153 static ExprValue div(ExprValue A, ExprValue B) { 154 if (uint64_t BV = B.getValue()) 155 return A.getValue() / BV; 156 error("division by zero"); 157 return 0; 158 } 159 160 static ExprValue bitAnd(ExprValue A, ExprValue B) { 161 moveAbsRight(A, B); 162 return {A.Sec, A.ForceAbsolute, 163 (A.getValue() & B.getValue()) - A.getSecAddr(), A.Loc}; 164 } 165 166 static ExprValue bitOr(ExprValue A, ExprValue B) { 167 moveAbsRight(A, B); 168 return {A.Sec, A.ForceAbsolute, 169 (A.getValue() | B.getValue()) - A.getSecAddr(), A.Loc}; 170 } 171 172 void ScriptParser::readDynamicList() { 173 expect("{"); 174 readAnonymousDeclaration(); 175 if (!atEOF()) 176 setError("EOF expected, but got " + next()); 177 } 178 179 void ScriptParser::readVersionScript() { 180 readVersionScriptCommand(); 181 if (!atEOF()) 182 setError("EOF expected, but got " + next()); 183 } 184 185 void ScriptParser::readVersionScriptCommand() { 186 if (consume("{")) { 187 readAnonymousDeclaration(); 188 return; 189 } 190 191 while (!atEOF() && !Error && peek() != "}") { 192 StringRef VerStr = next(); 193 if (VerStr == "{") { 194 setError("anonymous version definition is used in " 195 "combination with other version definitions"); 196 return; 197 } 198 expect("{"); 199 readVersionDeclaration(VerStr); 200 } 201 } 202 203 void ScriptParser::readVersion() { 204 expect("{"); 205 readVersionScriptCommand(); 206 expect("}"); 207 } 208 209 void ScriptParser::readLinkerScript() { 210 while (!atEOF()) { 211 StringRef Tok = next(); 212 if (Tok == ";") 213 continue; 214 215 if (Tok == "ASSERT") { 216 Script->Opt.Commands.push_back(readAssert()); 217 } else if (Tok == "ENTRY") { 218 readEntry(); 219 } else if (Tok == "EXTERN") { 220 readExtern(); 221 } else if (Tok == "GROUP" || Tok == "INPUT") { 222 readGroup(); 223 } else if (Tok == "INCLUDE") { 224 readInclude(); 225 } else if (Tok == "MEMORY") { 226 readMemory(); 227 } else if (Tok == "OUTPUT") { 228 readOutput(); 229 } else if (Tok == "OUTPUT_ARCH") { 230 readOutputArch(); 231 } else if (Tok == "OUTPUT_FORMAT") { 232 readOutputFormat(); 233 } else if (Tok == "PHDRS") { 234 readPhdrs(); 235 } else if (Tok == "SEARCH_DIR") { 236 readSearchDir(); 237 } else if (Tok == "SECTIONS") { 238 readSections(); 239 } else if (Tok == "VERSION") { 240 readVersion(); 241 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 242 Script->Opt.Commands.push_back(Cmd); 243 } else { 244 setError("unknown directive: " + Tok); 245 } 246 } 247 } 248 249 void ScriptParser::addFile(StringRef S) { 250 if (IsUnderSysroot && S.startswith("/")) { 251 SmallString<128> PathData; 252 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 253 if (sys::fs::exists(Path)) { 254 Driver->addFile(Saver.save(Path), /*WithLOption=*/false); 255 return; 256 } 257 } 258 259 if (sys::path::is_absolute(S)) { 260 Driver->addFile(S, /*WithLOption=*/false); 261 } else if (S.startswith("=")) { 262 if (Config->Sysroot.empty()) 263 Driver->addFile(S.substr(1), /*WithLOption=*/false); 264 else 265 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)), 266 /*WithLOption=*/false); 267 } else if (S.startswith("-l")) { 268 Driver->addLibrary(S.substr(2)); 269 } else if (sys::fs::exists(S)) { 270 Driver->addFile(S, /*WithLOption=*/false); 271 } else { 272 if (Optional<std::string> Path = findFromSearchPaths(S)) 273 Driver->addFile(Saver.save(*Path), /*WithLOption=*/true); 274 else 275 setError("unable to find " + S); 276 } 277 } 278 279 void ScriptParser::readAsNeeded() { 280 expect("("); 281 bool Orig = Config->AsNeeded; 282 Config->AsNeeded = true; 283 while (!Error && !consume(")")) 284 addFile(unquote(next())); 285 Config->AsNeeded = Orig; 286 } 287 288 void ScriptParser::readEntry() { 289 // -e <symbol> takes predecence over ENTRY(<symbol>). 290 expect("("); 291 StringRef Tok = next(); 292 if (Config->Entry.empty()) 293 Config->Entry = Tok; 294 expect(")"); 295 } 296 297 void ScriptParser::readExtern() { 298 expect("("); 299 while (!Error && !consume(")")) 300 Config->Undefined.push_back(next()); 301 } 302 303 void ScriptParser::readGroup() { 304 expect("("); 305 while (!Error && !consume(")")) { 306 if (consume("AS_NEEDED")) 307 readAsNeeded(); 308 else 309 addFile(unquote(next())); 310 } 311 } 312 313 void ScriptParser::readInclude() { 314 StringRef Tok = unquote(next()); 315 316 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 317 // The file will be searched for in the current directory, and in any 318 // directory specified with the -L option. 319 if (sys::fs::exists(Tok)) { 320 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 321 tokenize(*MB); 322 return; 323 } 324 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 325 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 326 tokenize(*MB); 327 return; 328 } 329 setError("cannot open " + Tok); 330 } 331 332 void ScriptParser::readOutput() { 333 // -o <file> takes predecence over OUTPUT(<file>). 334 expect("("); 335 StringRef Tok = next(); 336 if (Config->OutputFile.empty()) 337 Config->OutputFile = unquote(Tok); 338 expect(")"); 339 } 340 341 void ScriptParser::readOutputArch() { 342 // OUTPUT_ARCH is ignored for now. 343 expect("("); 344 while (!Error && !consume(")")) 345 skip(); 346 } 347 348 void ScriptParser::readOutputFormat() { 349 // Error checking only for now. 350 expect("("); 351 skip(); 352 if (consume(")")) 353 return; 354 expect(","); 355 skip(); 356 expect(","); 357 skip(); 358 expect(")"); 359 } 360 361 void ScriptParser::readPhdrs() { 362 expect("{"); 363 while (!Error && !consume("}")) { 364 Script->Opt.PhdrsCommands.push_back( 365 {next(), PT_NULL, false, false, UINT_MAX, nullptr}); 366 367 PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back(); 368 PhdrCmd.Type = readPhdrType(); 369 370 while (!Error && !consume(";")) { 371 if (consume("FILEHDR")) 372 PhdrCmd.HasFilehdr = true; 373 else if (consume("PHDRS")) 374 PhdrCmd.HasPhdrs = true; 375 else if (consume("AT")) 376 PhdrCmd.LMAExpr = readParenExpr(); 377 else if (consume("FLAGS")) 378 PhdrCmd.Flags = readParenExpr()().getValue(); 379 else 380 setError("unexpected header attribute: " + next()); 381 } 382 } 383 } 384 385 void ScriptParser::readSearchDir() { 386 expect("("); 387 StringRef Tok = next(); 388 if (!Config->Nostdlib) 389 Config->SearchPaths.push_back(unquote(Tok)); 390 expect(")"); 391 } 392 393 void ScriptParser::readSections() { 394 Script->Opt.HasSections = true; 395 396 // -no-rosegment is used to avoid placing read only non-executable sections in 397 // their own segment. We do the same if SECTIONS command is present in linker 398 // script. See comment for computeFlags(). 399 Config->SingleRoRx = true; 400 401 expect("{"); 402 while (!Error && !consume("}")) { 403 StringRef Tok = next(); 404 BaseCommand *Cmd = readProvideOrAssignment(Tok); 405 if (!Cmd) { 406 if (Tok == "ASSERT") 407 Cmd = readAssert(); 408 else 409 Cmd = readOutputSectionDescription(Tok); 410 } 411 Script->Opt.Commands.push_back(Cmd); 412 } 413 } 414 415 static int precedence(StringRef Op) { 416 return StringSwitch<int>(Op) 417 .Cases("*", "/", 5) 418 .Cases("+", "-", 4) 419 .Cases("<<", ">>", 3) 420 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 421 .Cases("&", "|", 1) 422 .Default(-1); 423 } 424 425 StringMatcher ScriptParser::readFilePatterns() { 426 std::vector<StringRef> V; 427 while (!Error && !consume(")")) 428 V.push_back(next()); 429 return StringMatcher(V); 430 } 431 432 SortSectionPolicy ScriptParser::readSortKind() { 433 if (consume("SORT") || consume("SORT_BY_NAME")) 434 return SortSectionPolicy::Name; 435 if (consume("SORT_BY_ALIGNMENT")) 436 return SortSectionPolicy::Alignment; 437 if (consume("SORT_BY_INIT_PRIORITY")) 438 return SortSectionPolicy::Priority; 439 if (consume("SORT_NONE")) 440 return SortSectionPolicy::None; 441 return SortSectionPolicy::Default; 442 } 443 444 // Reads SECTIONS command contents in the following form: 445 // 446 // <contents> ::= <elem>* 447 // <elem> ::= <exclude>? <glob-pattern> 448 // <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")" 449 // 450 // For example, 451 // 452 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz) 453 // 454 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". 455 // The semantics of that is section .foo in any file, section .bar in 456 // any file but a.o, and section .baz in any file but b.o. 457 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 458 std::vector<SectionPattern> Ret; 459 while (!Error && peek() != ")") { 460 StringMatcher ExcludeFilePat; 461 if (consume("EXCLUDE_FILE")) { 462 expect("("); 463 ExcludeFilePat = readFilePatterns(); 464 } 465 466 std::vector<StringRef> V; 467 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 468 V.push_back(next()); 469 470 if (!V.empty()) 471 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 472 else 473 setError("section pattern is expected"); 474 } 475 return Ret; 476 } 477 478 // Reads contents of "SECTIONS" directive. That directive contains a 479 // list of glob patterns for input sections. The grammar is as follows. 480 // 481 // <patterns> ::= <section-list> 482 // | <sort> "(" <section-list> ")" 483 // | <sort> "(" <sort> "(" <section-list> ")" ")" 484 // 485 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 486 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 487 // 488 // <section-list> is parsed by readInputSectionsList(). 489 InputSectionDescription * 490 ScriptParser::readInputSectionRules(StringRef FilePattern) { 491 auto *Cmd = make<InputSectionDescription>(FilePattern); 492 expect("("); 493 494 while (!Error && !consume(")")) { 495 SortSectionPolicy Outer = readSortKind(); 496 SortSectionPolicy Inner = SortSectionPolicy::Default; 497 std::vector<SectionPattern> V; 498 if (Outer != SortSectionPolicy::Default) { 499 expect("("); 500 Inner = readSortKind(); 501 if (Inner != SortSectionPolicy::Default) { 502 expect("("); 503 V = readInputSectionsList(); 504 expect(")"); 505 } else { 506 V = readInputSectionsList(); 507 } 508 expect(")"); 509 } else { 510 V = readInputSectionsList(); 511 } 512 513 for (SectionPattern &Pat : V) { 514 Pat.SortInner = Inner; 515 Pat.SortOuter = Outer; 516 } 517 518 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 519 } 520 return Cmd; 521 } 522 523 InputSectionDescription * 524 ScriptParser::readInputSectionDescription(StringRef Tok) { 525 // Input section wildcard can be surrounded by KEEP. 526 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 527 if (Tok == "KEEP") { 528 expect("("); 529 StringRef FilePattern = next(); 530 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 531 expect(")"); 532 Script->Opt.KeptSections.push_back(Cmd); 533 return Cmd; 534 } 535 return readInputSectionRules(Tok); 536 } 537 538 void ScriptParser::readSort() { 539 expect("("); 540 expect("CONSTRUCTORS"); 541 expect(")"); 542 } 543 544 AssertCommand *ScriptParser::readAssert() { 545 return make<AssertCommand>(readAssertExpr()); 546 } 547 548 Expr ScriptParser::readAssertExpr() { 549 expect("("); 550 Expr E = readExpr(); 551 expect(","); 552 StringRef Msg = unquote(next()); 553 expect(")"); 554 555 return [=] { 556 if (!E().getValue()) 557 error(Msg); 558 return Script->getDot(); 559 }; 560 } 561 562 // Reads a FILL(expr) command. We handle the FILL command as an 563 // alias for =fillexp section attribute, which is different from 564 // what GNU linkers do. 565 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 566 uint32_t ScriptParser::readFill() { 567 expect("("); 568 uint32_t V = parseFill(next()); 569 expect(")"); 570 return V; 571 } 572 573 // Reads an expression and/or the special directive "(NOLOAD)" for an 574 // output section definition. 575 // 576 // An output section name can be followed by an address expression 577 // and/or by "(NOLOAD)". This grammar is not LL(1) because "(" can be 578 // interpreted as either the beginning of some expression or "(NOLOAD)". 579 // 580 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html 581 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html 582 void ScriptParser::readSectionAddressType(OutputSectionCommand *Cmd) { 583 if (consume("(")) { 584 if (consume("NOLOAD")) { 585 expect(")"); 586 Cmd->Noload = true; 587 return; 588 } 589 Cmd->AddrExpr = readExpr(); 590 expect(")"); 591 } else { 592 Cmd->AddrExpr = readExpr(); 593 } 594 595 if (consume("(")) { 596 expect("NOLOAD"); 597 expect(")"); 598 Cmd->Noload = true; 599 } 600 } 601 602 OutputSectionCommand * 603 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 604 OutputSectionCommand *Cmd = 605 Script->createOutputSectionCommand(OutSec, getCurrentLocation()); 606 607 if (peek() != ":") 608 readSectionAddressType(Cmd); 609 expect(":"); 610 611 if (consume("AT")) 612 Cmd->LMAExpr = readParenExpr(); 613 if (consume("ALIGN")) 614 Cmd->AlignExpr = readParenExpr(); 615 if (consume("SUBALIGN")) 616 Cmd->SubalignExpr = readParenExpr(); 617 618 // Parse constraints. 619 if (consume("ONLY_IF_RO")) 620 Cmd->Constraint = ConstraintKind::ReadOnly; 621 if (consume("ONLY_IF_RW")) 622 Cmd->Constraint = ConstraintKind::ReadWrite; 623 expect("{"); 624 625 while (!Error && !consume("}")) { 626 StringRef Tok = next(); 627 if (Tok == ";") { 628 // Empty commands are allowed. Do nothing here. 629 } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) { 630 Cmd->Commands.push_back(Assign); 631 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 632 Cmd->Commands.push_back(Data); 633 } else if (Tok == "ASSERT") { 634 Cmd->Commands.push_back(readAssert()); 635 expect(";"); 636 } else if (Tok == "CONSTRUCTORS") { 637 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 638 // by name. This is for very old file formats such as ECOFF/XCOFF. 639 // For ELF, we should ignore. 640 } else if (Tok == "FILL") { 641 Cmd->Filler = readFill(); 642 } else if (Tok == "SORT") { 643 readSort(); 644 } else if (peek() == "(") { 645 Cmd->Commands.push_back(readInputSectionDescription(Tok)); 646 } else { 647 setError("unknown command " + Tok); 648 } 649 } 650 651 if (consume(">")) 652 Cmd->MemoryRegionName = next(); 653 654 Cmd->Phdrs = readOutputSectionPhdrs(); 655 656 if (consume("=")) 657 Cmd->Filler = parseFill(next()); 658 else if (peek().startswith("=")) 659 Cmd->Filler = parseFill(next().drop_front()); 660 661 // Consume optional comma following output section command. 662 consume(","); 663 664 return Cmd; 665 } 666 667 // Parses a given string as a octal/decimal/hexadecimal number and 668 // returns it as a big-endian number. Used for `=<fillexp>`. 669 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 670 // 671 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary 672 // size, while ld.gold always handles it as a 32-bit big-endian number. 673 // We are compatible with ld.gold because it's easier to implement. 674 uint32_t ScriptParser::parseFill(StringRef Tok) { 675 uint32_t V = 0; 676 if (!to_integer(Tok, V)) 677 setError("invalid filler expression: " + Tok); 678 679 uint32_t Buf; 680 write32be(&Buf, V); 681 return Buf; 682 } 683 684 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 685 expect("("); 686 SymbolAssignment *Cmd = readAssignment(next()); 687 Cmd->Provide = Provide; 688 Cmd->Hidden = Hidden; 689 expect(")"); 690 expect(";"); 691 return Cmd; 692 } 693 694 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 695 SymbolAssignment *Cmd = nullptr; 696 if (peek() == "=" || peek() == "+=") { 697 Cmd = readAssignment(Tok); 698 expect(";"); 699 } else if (Tok == "PROVIDE") { 700 Cmd = readProvideHidden(true, false); 701 } else if (Tok == "HIDDEN") { 702 Cmd = readProvideHidden(false, true); 703 } else if (Tok == "PROVIDE_HIDDEN") { 704 Cmd = readProvideHidden(true, true); 705 } 706 return Cmd; 707 } 708 709 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 710 StringRef Op = next(); 711 assert(Op == "=" || Op == "+="); 712 Expr E = readExpr(); 713 if (Op == "+=") { 714 std::string Loc = getCurrentLocation(); 715 E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); }; 716 } 717 return make<SymbolAssignment>(Name, E, getCurrentLocation()); 718 } 719 720 // This is an operator-precedence parser to parse a linker 721 // script expression. 722 Expr ScriptParser::readExpr() { 723 // Our lexer is context-aware. Set the in-expression bit so that 724 // they apply different tokenization rules. 725 bool Orig = InExpr; 726 InExpr = true; 727 Expr E = readExpr1(readPrimary(), 0); 728 InExpr = Orig; 729 return E; 730 } 731 732 static Expr combine(StringRef Op, Expr L, Expr R) { 733 if (Op == "+") 734 return [=] { return add(L(), R()); }; 735 if (Op == "-") 736 return [=] { return sub(L(), R()); }; 737 if (Op == "*") 738 return [=] { return mul(L(), R()); }; 739 if (Op == "/") 740 return [=] { return div(L(), R()); }; 741 if (Op == "<<") 742 return [=] { return L().getValue() << R().getValue(); }; 743 if (Op == ">>") 744 return [=] { return L().getValue() >> R().getValue(); }; 745 if (Op == "<") 746 return [=] { return L().getValue() < R().getValue(); }; 747 if (Op == ">") 748 return [=] { return L().getValue() > R().getValue(); }; 749 if (Op == ">=") 750 return [=] { return L().getValue() >= R().getValue(); }; 751 if (Op == "<=") 752 return [=] { return L().getValue() <= R().getValue(); }; 753 if (Op == "==") 754 return [=] { return L().getValue() == R().getValue(); }; 755 if (Op == "!=") 756 return [=] { return L().getValue() != R().getValue(); }; 757 if (Op == "&") 758 return [=] { return bitAnd(L(), R()); }; 759 if (Op == "|") 760 return [=] { return bitOr(L(), R()); }; 761 llvm_unreachable("invalid operator"); 762 } 763 764 // This is a part of the operator-precedence parser. This function 765 // assumes that the remaining token stream starts with an operator. 766 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 767 while (!atEOF() && !Error) { 768 // Read an operator and an expression. 769 if (consume("?")) 770 return readTernary(Lhs); 771 StringRef Op1 = peek(); 772 if (precedence(Op1) < MinPrec) 773 break; 774 skip(); 775 Expr Rhs = readPrimary(); 776 777 // Evaluate the remaining part of the expression first if the 778 // next operator has greater precedence than the previous one. 779 // For example, if we have read "+" and "3", and if the next 780 // operator is "*", then we'll evaluate 3 * ... part first. 781 while (!atEOF()) { 782 StringRef Op2 = peek(); 783 if (precedence(Op2) <= precedence(Op1)) 784 break; 785 Rhs = readExpr1(Rhs, precedence(Op2)); 786 } 787 788 Lhs = combine(Op1, Lhs, Rhs); 789 } 790 return Lhs; 791 } 792 793 uint64_t static getConstant(StringRef S) { 794 if (S == "COMMONPAGESIZE") 795 return Target->PageSize; 796 if (S == "MAXPAGESIZE") 797 return Config->MaxPageSize; 798 error("unknown constant: " + S); 799 return 0; 800 } 801 802 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with 803 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may 804 // have "K" (Ki) or "M" (Mi) suffixes. 805 static Optional<uint64_t> parseInt(StringRef Tok) { 806 // Negative number 807 if (Tok.startswith("-")) { 808 if (Optional<uint64_t> Val = parseInt(Tok.substr(1))) 809 return -*Val; 810 return None; 811 } 812 813 // Hexadecimal 814 uint64_t Val; 815 if (Tok.startswith_lower("0x") && to_integer(Tok.substr(2), Val, 16)) 816 return Val; 817 if (Tok.endswith_lower("H") && to_integer(Tok.drop_back(), Val, 16)) 818 return Val; 819 820 // Decimal 821 if (Tok.endswith_lower("K")) { 822 if (!to_integer(Tok.drop_back(), Val, 10)) 823 return None; 824 return Val * 1024; 825 } 826 if (Tok.endswith_lower("M")) { 827 if (!to_integer(Tok.drop_back(), Val, 10)) 828 return None; 829 return Val * 1024 * 1024; 830 } 831 if (!to_integer(Tok, Val, 10)) 832 return None; 833 return Val; 834 } 835 836 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 837 int Size = StringSwitch<int>(Tok) 838 .Case("BYTE", 1) 839 .Case("SHORT", 2) 840 .Case("LONG", 4) 841 .Case("QUAD", 8) 842 .Default(-1); 843 if (Size == -1) 844 return nullptr; 845 846 return make<BytesDataCommand>(readParenExpr(), Size); 847 } 848 849 StringRef ScriptParser::readParenLiteral() { 850 expect("("); 851 StringRef Tok = next(); 852 expect(")"); 853 return Tok; 854 } 855 856 OutputSection *ScriptParser::checkSection(OutputSectionCommand *Cmd, 857 StringRef Location) { 858 if (Cmd->Location.empty() && Script->ErrorOnMissingSection) 859 error(Location + ": undefined section " + Cmd->Name); 860 if (Cmd->Sec) 861 return Cmd->Sec; 862 static OutputSection Dummy("", 0, 0); 863 return &Dummy; 864 } 865 866 Expr ScriptParser::readPrimary() { 867 if (peek() == "(") 868 return readParenExpr(); 869 870 if (consume("~")) { 871 Expr E = readPrimary(); 872 return [=] { return ~E().getValue(); }; 873 } 874 if (consume("-")) { 875 Expr E = readPrimary(); 876 return [=] { return -E().getValue(); }; 877 } 878 879 StringRef Tok = next(); 880 std::string Location = getCurrentLocation(); 881 882 // Built-in functions are parsed here. 883 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 884 if (Tok == "ABSOLUTE") { 885 Expr Inner = readParenExpr(); 886 return [=] { 887 ExprValue I = Inner(); 888 I.ForceAbsolute = true; 889 return I; 890 }; 891 } 892 if (Tok == "ADDR") { 893 StringRef Name = readParenLiteral(); 894 OutputSectionCommand *Cmd = Script->getOrCreateOutputSectionCommand(Name); 895 return [=]() -> ExprValue { 896 return {checkSection(Cmd, Location), 0, Location}; 897 }; 898 } 899 if (Tok == "ALIGN") { 900 expect("("); 901 Expr E = readExpr(); 902 if (consume(")")) 903 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 904 expect(","); 905 Expr E2 = readExpr(); 906 expect(")"); 907 return [=] { 908 ExprValue V = E(); 909 V.Alignment = E2().getValue(); 910 return V; 911 }; 912 } 913 if (Tok == "ALIGNOF") { 914 StringRef Name = readParenLiteral(); 915 OutputSectionCommand *Cmd = Script->getOrCreateOutputSectionCommand(Name); 916 return [=] { return checkSection(Cmd, Location)->Alignment; }; 917 } 918 if (Tok == "ASSERT") 919 return readAssertExpr(); 920 if (Tok == "CONSTANT") { 921 StringRef Name = readParenLiteral(); 922 return [=] { return getConstant(Name); }; 923 } 924 if (Tok == "DATA_SEGMENT_ALIGN") { 925 expect("("); 926 Expr E = readExpr(); 927 expect(","); 928 readExpr(); 929 expect(")"); 930 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 931 } 932 if (Tok == "DATA_SEGMENT_END") { 933 expect("("); 934 expect("."); 935 expect(")"); 936 return [] { return Script->getDot(); }; 937 } 938 if (Tok == "DATA_SEGMENT_RELRO_END") { 939 // GNU linkers implements more complicated logic to handle 940 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and 941 // just align to the next page boundary for simplicity. 942 expect("("); 943 readExpr(); 944 expect(","); 945 readExpr(); 946 expect(")"); 947 return [] { return alignTo(Script->getDot(), Target->PageSize); }; 948 } 949 if (Tok == "DEFINED") { 950 StringRef Name = readParenLiteral(); 951 return [=] { return Script->isDefined(Name) ? 1 : 0; }; 952 } 953 if (Tok == "LENGTH") { 954 StringRef Name = readParenLiteral(); 955 if (Script->Opt.MemoryRegions.count(Name) == 0) 956 setError("memory region not defined: " + Name); 957 return [=] { return Script->Opt.MemoryRegions[Name].Length; }; 958 } 959 if (Tok == "LOADADDR") { 960 StringRef Name = readParenLiteral(); 961 OutputSectionCommand *Cmd = Script->getOrCreateOutputSectionCommand(Name); 962 return [=] { return checkSection(Cmd, Location)->getLMA(); }; 963 } 964 if (Tok == "ORIGIN") { 965 StringRef Name = readParenLiteral(); 966 if (Script->Opt.MemoryRegions.count(Name) == 0) 967 setError("memory region not defined: " + Name); 968 return [=] { return Script->Opt.MemoryRegions[Name].Origin; }; 969 } 970 if (Tok == "SEGMENT_START") { 971 expect("("); 972 skip(); 973 expect(","); 974 Expr E = readExpr(); 975 expect(")"); 976 return [=] { return E(); }; 977 } 978 if (Tok == "SIZEOF") { 979 StringRef Name = readParenLiteral(); 980 OutputSectionCommand *Cmd = Script->getOrCreateOutputSectionCommand(Name); 981 // Linker script does not create an output section if its content is empty. 982 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 983 // be empty. 984 return [=] { return Cmd->Sec ? Cmd->Sec->Size : 0; }; 985 } 986 if (Tok == "SIZEOF_HEADERS") 987 return [=] { return elf::getHeaderSize(); }; 988 989 // Tok is the dot. 990 if (Tok == ".") 991 return [=] { return Script->getSymbolValue(Location, Tok); }; 992 993 // Tok is a literal number. 994 if (Optional<uint64_t> Val = parseInt(Tok)) 995 return [=] { return *Val; }; 996 997 // Tok is a symbol name. 998 if (!isValidCIdentifier(Tok)) 999 setError("malformed number: " + Tok); 1000 Script->Opt.ReferencedSymbols.push_back(Tok); 1001 return [=] { return Script->getSymbolValue(Location, Tok); }; 1002 } 1003 1004 Expr ScriptParser::readTernary(Expr Cond) { 1005 Expr L = readExpr(); 1006 expect(":"); 1007 Expr R = readExpr(); 1008 return [=] { return Cond().getValue() ? L() : R(); }; 1009 } 1010 1011 Expr ScriptParser::readParenExpr() { 1012 expect("("); 1013 Expr E = readExpr(); 1014 expect(")"); 1015 return E; 1016 } 1017 1018 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1019 std::vector<StringRef> Phdrs; 1020 while (!Error && peek().startswith(":")) { 1021 StringRef Tok = next(); 1022 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 1023 } 1024 return Phdrs; 1025 } 1026 1027 // Read a program header type name. The next token must be a 1028 // name of a program header type or a constant (e.g. "0x3"). 1029 unsigned ScriptParser::readPhdrType() { 1030 StringRef Tok = next(); 1031 if (Optional<uint64_t> Val = parseInt(Tok)) 1032 return *Val; 1033 1034 unsigned Ret = StringSwitch<unsigned>(Tok) 1035 .Case("PT_NULL", PT_NULL) 1036 .Case("PT_LOAD", PT_LOAD) 1037 .Case("PT_DYNAMIC", PT_DYNAMIC) 1038 .Case("PT_INTERP", PT_INTERP) 1039 .Case("PT_NOTE", PT_NOTE) 1040 .Case("PT_SHLIB", PT_SHLIB) 1041 .Case("PT_PHDR", PT_PHDR) 1042 .Case("PT_TLS", PT_TLS) 1043 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1044 .Case("PT_GNU_STACK", PT_GNU_STACK) 1045 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1046 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1047 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1048 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1049 .Default(-1); 1050 1051 if (Ret == (unsigned)-1) { 1052 setError("invalid program header type: " + Tok); 1053 return PT_NULL; 1054 } 1055 return Ret; 1056 } 1057 1058 // Reads an anonymous version declaration. 1059 void ScriptParser::readAnonymousDeclaration() { 1060 std::vector<SymbolVersion> Locals; 1061 std::vector<SymbolVersion> Globals; 1062 std::tie(Locals, Globals) = readSymbols(); 1063 1064 for (SymbolVersion V : Locals) { 1065 if (V.Name == "*") 1066 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1067 else 1068 Config->VersionScriptLocals.push_back(V); 1069 } 1070 1071 for (SymbolVersion V : Globals) 1072 Config->VersionScriptGlobals.push_back(V); 1073 1074 expect(";"); 1075 } 1076 1077 // Reads a non-anonymous version definition, 1078 // e.g. "VerStr { global: foo; bar; local: *; };". 1079 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1080 // Read a symbol list. 1081 std::vector<SymbolVersion> Locals; 1082 std::vector<SymbolVersion> Globals; 1083 std::tie(Locals, Globals) = readSymbols(); 1084 1085 for (SymbolVersion V : Locals) { 1086 if (V.Name == "*") 1087 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1088 else 1089 Config->VersionScriptLocals.push_back(V); 1090 } 1091 1092 // Create a new version definition and add that to the global symbols. 1093 VersionDefinition Ver; 1094 Ver.Name = VerStr; 1095 Ver.Globals = Globals; 1096 1097 // User-defined version number starts from 2 because 0 and 1 are 1098 // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. 1099 Ver.Id = Config->VersionDefinitions.size() + 2; 1100 Config->VersionDefinitions.push_back(Ver); 1101 1102 // Each version may have a parent version. For example, "Ver2" 1103 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1104 // as a parent. This version hierarchy is, probably against your 1105 // instinct, purely for hint; the runtime doesn't care about it 1106 // at all. In LLD, we simply ignore it. 1107 if (peek() != ";") 1108 skip(); 1109 expect(";"); 1110 } 1111 1112 static bool hasWildcard(StringRef S) { 1113 return S.find_first_of("?*[") != StringRef::npos; 1114 } 1115 1116 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1117 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1118 ScriptParser::readSymbols() { 1119 std::vector<SymbolVersion> Locals; 1120 std::vector<SymbolVersion> Globals; 1121 std::vector<SymbolVersion> *V = &Globals; 1122 1123 while (!Error) { 1124 if (consume("}")) 1125 break; 1126 if (consumeLabel("local")) { 1127 V = &Locals; 1128 continue; 1129 } 1130 if (consumeLabel("global")) { 1131 V = &Globals; 1132 continue; 1133 } 1134 1135 if (consume("extern")) { 1136 std::vector<SymbolVersion> Ext = readVersionExtern(); 1137 V->insert(V->end(), Ext.begin(), Ext.end()); 1138 } else { 1139 StringRef Tok = next(); 1140 V->push_back({unquote(Tok), false, hasWildcard(Tok)}); 1141 } 1142 expect(";"); 1143 } 1144 return {Locals, Globals}; 1145 } 1146 1147 // Reads an "extern C++" directive, e.g., 1148 // "extern "C++" { ns::*; "f(int, double)"; };" 1149 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 1150 StringRef Tok = next(); 1151 bool IsCXX = Tok == "\"C++\""; 1152 if (!IsCXX && Tok != "\"C\"") 1153 setError("Unknown language"); 1154 expect("{"); 1155 1156 std::vector<SymbolVersion> Ret; 1157 while (!Error && peek() != "}") { 1158 StringRef Tok = next(); 1159 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 1160 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 1161 expect(";"); 1162 } 1163 1164 expect("}"); 1165 return Ret; 1166 } 1167 1168 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2, 1169 StringRef S3) { 1170 if (!consume(S1) && !consume(S2) && !consume(S3)) { 1171 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 1172 return 0; 1173 } 1174 expect("="); 1175 return readExpr()().getValue(); 1176 } 1177 1178 // Parse the MEMORY command as specified in: 1179 // https://sourceware.org/binutils/docs/ld/MEMORY.html 1180 // 1181 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 1182 void ScriptParser::readMemory() { 1183 expect("{"); 1184 while (!Error && !consume("}")) { 1185 StringRef Name = next(); 1186 1187 uint32_t Flags = 0; 1188 uint32_t NegFlags = 0; 1189 if (consume("(")) { 1190 std::tie(Flags, NegFlags) = readMemoryAttributes(); 1191 expect(")"); 1192 } 1193 expect(":"); 1194 1195 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 1196 expect(","); 1197 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 1198 1199 // Add the memory region to the region map (if it doesn't already exist). 1200 auto It = Script->Opt.MemoryRegions.find(Name); 1201 if (It != Script->Opt.MemoryRegions.end()) 1202 setError("region '" + Name + "' already defined"); 1203 else 1204 Script->Opt.MemoryRegions[Name] = {Name, Origin, Length, Flags, NegFlags}; 1205 } 1206 } 1207 1208 // This function parses the attributes used to match against section 1209 // flags when placing output sections in a memory region. These flags 1210 // are only used when an explicit memory region name is not used. 1211 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 1212 uint32_t Flags = 0; 1213 uint32_t NegFlags = 0; 1214 bool Invert = false; 1215 1216 for (char C : next().lower()) { 1217 uint32_t Flag = 0; 1218 if (C == '!') 1219 Invert = !Invert; 1220 else if (C == 'w') 1221 Flag = SHF_WRITE; 1222 else if (C == 'x') 1223 Flag = SHF_EXECINSTR; 1224 else if (C == 'a') 1225 Flag = SHF_ALLOC; 1226 else if (C != 'r') 1227 setError("invalid memory region attribute"); 1228 1229 if (Invert) 1230 NegFlags |= Flag; 1231 else 1232 Flags |= Flag; 1233 } 1234 return {Flags, NegFlags}; 1235 } 1236 1237 void elf::readLinkerScript(MemoryBufferRef MB) { 1238 ScriptParser(MB).readLinkerScript(); 1239 } 1240 1241 void elf::readVersionScript(MemoryBufferRef MB) { 1242 ScriptParser(MB).readVersionScript(); 1243 } 1244 1245 void elf::readDynamicList(MemoryBufferRef MB) { 1246 ScriptParser(MB).readDynamicList(); 1247 } 1248