1 //===- ScriptParser.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a recursive-descendent parser for linker scripts. 11 // Parsed results are stored to Config and Script global objects. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ScriptParser.h" 16 #include "Config.h" 17 #include "Driver.h" 18 #include "InputSection.h" 19 #include "LinkerScript.h" 20 #include "Memory.h" 21 #include "OutputSections.h" 22 #include "ScriptLexer.h" 23 #include "Symbols.h" 24 #include "Target.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/StringSwitch.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/Support/Casting.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/FileSystem.h" 32 #include "llvm/Support/Path.h" 33 #include <cassert> 34 #include <limits> 35 #include <vector> 36 37 using namespace llvm; 38 using namespace llvm::ELF; 39 using namespace llvm::support::endian; 40 using namespace lld; 41 using namespace lld::elf; 42 43 static bool isUnderSysroot(StringRef Path); 44 45 namespace { 46 class ScriptParser final : ScriptLexer { 47 public: 48 ScriptParser(MemoryBufferRef MB) 49 : ScriptLexer(MB), 50 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 51 52 void readLinkerScript(); 53 void readVersionScript(); 54 void readDynamicList(); 55 56 private: 57 void addFile(StringRef Path); 58 OutputSection *checkSection(OutputSection *Cmd, StringRef Loccation); 59 60 void readAsNeeded(); 61 void readEntry(); 62 void readExtern(); 63 void readGroup(); 64 void readInclude(); 65 void readMemory(); 66 void readOutput(); 67 void readOutputArch(); 68 void readOutputFormat(); 69 void readPhdrs(); 70 void readSearchDir(); 71 void readSections(); 72 void readVersion(); 73 void readVersionScriptCommand(); 74 75 SymbolAssignment *readAssignment(StringRef Name); 76 BytesDataCommand *readBytesDataCommand(StringRef Tok); 77 uint32_t readFill(); 78 uint32_t parseFill(StringRef Tok); 79 void readSectionAddressType(OutputSection *Cmd); 80 OutputSection *readOutputSectionDescription(StringRef OutSec); 81 std::vector<StringRef> readOutputSectionPhdrs(); 82 InputSectionDescription *readInputSectionDescription(StringRef Tok); 83 StringMatcher readFilePatterns(); 84 std::vector<SectionPattern> readInputSectionsList(); 85 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 86 unsigned readPhdrType(); 87 SortSectionPolicy readSortKind(); 88 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 89 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 90 void readSort(); 91 AssertCommand *readAssert(); 92 Expr readAssertExpr(); 93 94 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 95 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 96 97 Expr readExpr(); 98 Expr readExpr1(Expr Lhs, int MinPrec); 99 StringRef readParenLiteral(); 100 Expr readPrimary(); 101 Expr readTernary(Expr Cond); 102 Expr readParenExpr(); 103 104 // For parsing version script. 105 std::vector<SymbolVersion> readVersionExtern(); 106 void readAnonymousDeclaration(); 107 void readVersionDeclaration(StringRef VerStr); 108 109 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 110 readSymbols(); 111 112 bool IsUnderSysroot; 113 }; 114 } // namespace 115 116 static StringRef unquote(StringRef S) { 117 if (S.startswith("\"")) 118 return S.substr(1, S.size() - 2); 119 return S; 120 } 121 122 static bool isUnderSysroot(StringRef Path) { 123 if (Config->Sysroot == "") 124 return false; 125 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 126 if (sys::fs::equivalent(Config->Sysroot, Path)) 127 return true; 128 return false; 129 } 130 131 // Some operations only support one non absolute value. Move the 132 // absolute one to the right hand side for convenience. 133 static void moveAbsRight(ExprValue &A, ExprValue &B) { 134 if (A.isAbsolute()) 135 std::swap(A, B); 136 if (!B.isAbsolute()) 137 error(A.Loc + ": at least one side of the expression must be absolute"); 138 } 139 140 static ExprValue add(ExprValue A, ExprValue B) { 141 moveAbsRight(A, B); 142 uint64_t Val = alignTo(A.Val, A.Alignment) + B.getValue(); 143 return {A.Sec, A.ForceAbsolute, Val, A.Loc}; 144 } 145 146 static ExprValue sub(ExprValue A, ExprValue B) { 147 uint64_t Val = alignTo(A.Val, A.Alignment) - B.getValue(); 148 return {A.Sec, Val, A.Loc}; 149 } 150 151 static ExprValue mul(ExprValue A, ExprValue B) { 152 return A.getValue() * B.getValue(); 153 } 154 155 static ExprValue div(ExprValue A, ExprValue B) { 156 if (uint64_t BV = B.getValue()) 157 return A.getValue() / BV; 158 error("division by zero"); 159 return 0; 160 } 161 162 static ExprValue bitAnd(ExprValue A, ExprValue B) { 163 moveAbsRight(A, B); 164 return {A.Sec, A.ForceAbsolute, 165 (A.getValue() & B.getValue()) - A.getSecAddr(), A.Loc}; 166 } 167 168 static ExprValue bitOr(ExprValue A, ExprValue B) { 169 moveAbsRight(A, B); 170 return {A.Sec, A.ForceAbsolute, 171 (A.getValue() | B.getValue()) - A.getSecAddr(), A.Loc}; 172 } 173 174 void ScriptParser::readDynamicList() { 175 expect("{"); 176 readAnonymousDeclaration(); 177 if (!atEOF()) 178 setError("EOF expected, but got " + next()); 179 } 180 181 void ScriptParser::readVersionScript() { 182 readVersionScriptCommand(); 183 if (!atEOF()) 184 setError("EOF expected, but got " + next()); 185 } 186 187 void ScriptParser::readVersionScriptCommand() { 188 if (consume("{")) { 189 readAnonymousDeclaration(); 190 return; 191 } 192 193 while (!atEOF() && !Error && peek() != "}") { 194 StringRef VerStr = next(); 195 if (VerStr == "{") { 196 setError("anonymous version definition is used in " 197 "combination with other version definitions"); 198 return; 199 } 200 expect("{"); 201 readVersionDeclaration(VerStr); 202 } 203 } 204 205 void ScriptParser::readVersion() { 206 expect("{"); 207 readVersionScriptCommand(); 208 expect("}"); 209 } 210 211 void ScriptParser::readLinkerScript() { 212 while (!atEOF()) { 213 StringRef Tok = next(); 214 if (Tok == ";") 215 continue; 216 217 if (Tok == "ASSERT") { 218 Script->Opt.Commands.push_back(readAssert()); 219 } else if (Tok == "ENTRY") { 220 readEntry(); 221 } else if (Tok == "EXTERN") { 222 readExtern(); 223 } else if (Tok == "GROUP" || Tok == "INPUT") { 224 readGroup(); 225 } else if (Tok == "INCLUDE") { 226 readInclude(); 227 } else if (Tok == "MEMORY") { 228 readMemory(); 229 } else if (Tok == "OUTPUT") { 230 readOutput(); 231 } else if (Tok == "OUTPUT_ARCH") { 232 readOutputArch(); 233 } else if (Tok == "OUTPUT_FORMAT") { 234 readOutputFormat(); 235 } else if (Tok == "PHDRS") { 236 readPhdrs(); 237 } else if (Tok == "SEARCH_DIR") { 238 readSearchDir(); 239 } else if (Tok == "SECTIONS") { 240 readSections(); 241 } else if (Tok == "VERSION") { 242 readVersion(); 243 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 244 Script->Opt.Commands.push_back(Cmd); 245 } else { 246 setError("unknown directive: " + Tok); 247 } 248 } 249 } 250 251 void ScriptParser::addFile(StringRef S) { 252 if (IsUnderSysroot && S.startswith("/")) { 253 SmallString<128> PathData; 254 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 255 if (sys::fs::exists(Path)) { 256 Driver->addFile(Saver.save(Path), /*WithLOption=*/false); 257 return; 258 } 259 } 260 261 if (S.startswith("/")) { 262 Driver->addFile(S, /*WithLOption=*/false); 263 } else if (S.startswith("=")) { 264 if (Config->Sysroot.empty()) 265 Driver->addFile(S.substr(1), /*WithLOption=*/false); 266 else 267 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)), 268 /*WithLOption=*/false); 269 } else if (S.startswith("-l")) { 270 Driver->addLibrary(S.substr(2)); 271 } else if (sys::fs::exists(S)) { 272 Driver->addFile(S, /*WithLOption=*/false); 273 } else { 274 if (Optional<std::string> Path = findFromSearchPaths(S)) 275 Driver->addFile(Saver.save(*Path), /*WithLOption=*/true); 276 else 277 setError("unable to find " + S); 278 } 279 } 280 281 void ScriptParser::readAsNeeded() { 282 expect("("); 283 bool Orig = Config->AsNeeded; 284 Config->AsNeeded = true; 285 while (!Error && !consume(")")) 286 addFile(unquote(next())); 287 Config->AsNeeded = Orig; 288 } 289 290 void ScriptParser::readEntry() { 291 // -e <symbol> takes predecence over ENTRY(<symbol>). 292 expect("("); 293 StringRef Tok = next(); 294 if (Config->Entry.empty()) 295 Config->Entry = Tok; 296 expect(")"); 297 } 298 299 void ScriptParser::readExtern() { 300 expect("("); 301 while (!Error && !consume(")")) 302 Config->Undefined.push_back(next()); 303 } 304 305 void ScriptParser::readGroup() { 306 expect("("); 307 while (!Error && !consume(")")) { 308 if (consume("AS_NEEDED")) 309 readAsNeeded(); 310 else 311 addFile(unquote(next())); 312 } 313 } 314 315 void ScriptParser::readInclude() { 316 StringRef Tok = unquote(next()); 317 318 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 319 // The file will be searched for in the current directory, and in any 320 // directory specified with the -L option. 321 if (sys::fs::exists(Tok)) { 322 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 323 tokenize(*MB); 324 return; 325 } 326 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 327 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 328 tokenize(*MB); 329 return; 330 } 331 setError("cannot open " + Tok); 332 } 333 334 void ScriptParser::readOutput() { 335 // -o <file> takes predecence over OUTPUT(<file>). 336 expect("("); 337 StringRef Tok = next(); 338 if (Config->OutputFile.empty()) 339 Config->OutputFile = unquote(Tok); 340 expect(")"); 341 } 342 343 void ScriptParser::readOutputArch() { 344 // OUTPUT_ARCH is ignored for now. 345 expect("("); 346 while (!Error && !consume(")")) 347 skip(); 348 } 349 350 void ScriptParser::readOutputFormat() { 351 // Error checking only for now. 352 expect("("); 353 skip(); 354 if (consume(")")) 355 return; 356 expect(","); 357 skip(); 358 expect(","); 359 skip(); 360 expect(")"); 361 } 362 363 void ScriptParser::readPhdrs() { 364 expect("{"); 365 while (!Error && !consume("}")) { 366 Script->Opt.PhdrsCommands.push_back( 367 {next(), PT_NULL, false, false, UINT_MAX, nullptr}); 368 369 PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back(); 370 PhdrCmd.Type = readPhdrType(); 371 372 while (!Error && !consume(";")) { 373 if (consume("FILEHDR")) 374 PhdrCmd.HasFilehdr = true; 375 else if (consume("PHDRS")) 376 PhdrCmd.HasPhdrs = true; 377 else if (consume("AT")) 378 PhdrCmd.LMAExpr = readParenExpr(); 379 else if (consume("FLAGS")) 380 PhdrCmd.Flags = readParenExpr()().getValue(); 381 else 382 setError("unexpected header attribute: " + next()); 383 } 384 } 385 } 386 387 void ScriptParser::readSearchDir() { 388 expect("("); 389 StringRef Tok = next(); 390 if (!Config->Nostdlib) 391 Config->SearchPaths.push_back(unquote(Tok)); 392 expect(")"); 393 } 394 395 void ScriptParser::readSections() { 396 Script->Opt.HasSections = true; 397 398 // -no-rosegment is used to avoid placing read only non-executable sections in 399 // their own segment. We do the same if SECTIONS command is present in linker 400 // script. See comment for computeFlags(). 401 Config->SingleRoRx = true; 402 403 expect("{"); 404 while (!Error && !consume("}")) { 405 StringRef Tok = next(); 406 BaseCommand *Cmd = readProvideOrAssignment(Tok); 407 if (!Cmd) { 408 if (Tok == "ASSERT") 409 Cmd = readAssert(); 410 else 411 Cmd = readOutputSectionDescription(Tok); 412 } 413 Script->Opt.Commands.push_back(Cmd); 414 } 415 } 416 417 static int precedence(StringRef Op) { 418 return StringSwitch<int>(Op) 419 .Cases("*", "/", 5) 420 .Cases("+", "-", 4) 421 .Cases("<<", ">>", 3) 422 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 423 .Cases("&", "|", 1) 424 .Default(-1); 425 } 426 427 StringMatcher ScriptParser::readFilePatterns() { 428 std::vector<StringRef> V; 429 while (!Error && !consume(")")) 430 V.push_back(next()); 431 return StringMatcher(V); 432 } 433 434 SortSectionPolicy ScriptParser::readSortKind() { 435 if (consume("SORT") || consume("SORT_BY_NAME")) 436 return SortSectionPolicy::Name; 437 if (consume("SORT_BY_ALIGNMENT")) 438 return SortSectionPolicy::Alignment; 439 if (consume("SORT_BY_INIT_PRIORITY")) 440 return SortSectionPolicy::Priority; 441 if (consume("SORT_NONE")) 442 return SortSectionPolicy::None; 443 return SortSectionPolicy::Default; 444 } 445 446 // Reads SECTIONS command contents in the following form: 447 // 448 // <contents> ::= <elem>* 449 // <elem> ::= <exclude>? <glob-pattern> 450 // <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")" 451 // 452 // For example, 453 // 454 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz) 455 // 456 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". 457 // The semantics of that is section .foo in any file, section .bar in 458 // any file but a.o, and section .baz in any file but b.o. 459 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 460 std::vector<SectionPattern> Ret; 461 while (!Error && peek() != ")") { 462 StringMatcher ExcludeFilePat; 463 if (consume("EXCLUDE_FILE")) { 464 expect("("); 465 ExcludeFilePat = readFilePatterns(); 466 } 467 468 std::vector<StringRef> V; 469 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 470 V.push_back(next()); 471 472 if (!V.empty()) 473 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 474 else 475 setError("section pattern is expected"); 476 } 477 return Ret; 478 } 479 480 // Reads contents of "SECTIONS" directive. That directive contains a 481 // list of glob patterns for input sections. The grammar is as follows. 482 // 483 // <patterns> ::= <section-list> 484 // | <sort> "(" <section-list> ")" 485 // | <sort> "(" <sort> "(" <section-list> ")" ")" 486 // 487 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 488 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 489 // 490 // <section-list> is parsed by readInputSectionsList(). 491 InputSectionDescription * 492 ScriptParser::readInputSectionRules(StringRef FilePattern) { 493 auto *Cmd = make<InputSectionDescription>(FilePattern); 494 expect("("); 495 496 while (!Error && !consume(")")) { 497 SortSectionPolicy Outer = readSortKind(); 498 SortSectionPolicy Inner = SortSectionPolicy::Default; 499 std::vector<SectionPattern> V; 500 if (Outer != SortSectionPolicy::Default) { 501 expect("("); 502 Inner = readSortKind(); 503 if (Inner != SortSectionPolicy::Default) { 504 expect("("); 505 V = readInputSectionsList(); 506 expect(")"); 507 } else { 508 V = readInputSectionsList(); 509 } 510 expect(")"); 511 } else { 512 V = readInputSectionsList(); 513 } 514 515 for (SectionPattern &Pat : V) { 516 Pat.SortInner = Inner; 517 Pat.SortOuter = Outer; 518 } 519 520 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 521 } 522 return Cmd; 523 } 524 525 InputSectionDescription * 526 ScriptParser::readInputSectionDescription(StringRef Tok) { 527 // Input section wildcard can be surrounded by KEEP. 528 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 529 if (Tok == "KEEP") { 530 expect("("); 531 StringRef FilePattern = next(); 532 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 533 expect(")"); 534 Script->Opt.KeptSections.push_back(Cmd); 535 return Cmd; 536 } 537 return readInputSectionRules(Tok); 538 } 539 540 void ScriptParser::readSort() { 541 expect("("); 542 expect("CONSTRUCTORS"); 543 expect(")"); 544 } 545 546 AssertCommand *ScriptParser::readAssert() { 547 return make<AssertCommand>(readAssertExpr()); 548 } 549 550 Expr ScriptParser::readAssertExpr() { 551 expect("("); 552 Expr E = readExpr(); 553 expect(","); 554 StringRef Msg = unquote(next()); 555 expect(")"); 556 557 return [=] { 558 if (!E().getValue()) 559 error(Msg); 560 return Script->getDot(); 561 }; 562 } 563 564 // Reads a FILL(expr) command. We handle the FILL command as an 565 // alias for =fillexp section attribute, which is different from 566 // what GNU linkers do. 567 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 568 uint32_t ScriptParser::readFill() { 569 expect("("); 570 uint32_t V = parseFill(next()); 571 expect(")"); 572 return V; 573 } 574 575 // Reads an expression and/or the special directive "(NOLOAD)" for an 576 // output section definition. 577 // 578 // An output section name can be followed by an address expression 579 // and/or by "(NOLOAD)". This grammar is not LL(1) because "(" can be 580 // interpreted as either the beginning of some expression or "(NOLOAD)". 581 // 582 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html 583 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html 584 void ScriptParser::readSectionAddressType(OutputSection *Cmd) { 585 if (consume("(")) { 586 if (consume("NOLOAD")) { 587 expect(")"); 588 Cmd->Noload = true; 589 return; 590 } 591 Cmd->AddrExpr = readExpr(); 592 expect(")"); 593 } else { 594 Cmd->AddrExpr = readExpr(); 595 } 596 597 if (consume("(")) { 598 expect("NOLOAD"); 599 expect(")"); 600 Cmd->Noload = true; 601 } 602 } 603 604 OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) { 605 OutputSection *Cmd = 606 Script->createOutputSection(OutSec, getCurrentLocation()); 607 608 if (peek() != ":") 609 readSectionAddressType(Cmd); 610 expect(":"); 611 612 if (consume("AT")) 613 Cmd->LMAExpr = readParenExpr(); 614 if (consume("ALIGN")) 615 Cmd->AlignExpr = readParenExpr(); 616 if (consume("SUBALIGN")) 617 Cmd->SubalignExpr = readParenExpr(); 618 619 // Parse constraints. 620 if (consume("ONLY_IF_RO")) 621 Cmd->Constraint = ConstraintKind::ReadOnly; 622 if (consume("ONLY_IF_RW")) 623 Cmd->Constraint = ConstraintKind::ReadWrite; 624 expect("{"); 625 626 while (!Error && !consume("}")) { 627 StringRef Tok = next(); 628 if (Tok == ";") { 629 // Empty commands are allowed. Do nothing here. 630 } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) { 631 Cmd->Commands.push_back(Assign); 632 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 633 Cmd->Commands.push_back(Data); 634 } else if (Tok == "ASSERT") { 635 Cmd->Commands.push_back(readAssert()); 636 expect(";"); 637 } else if (Tok == "CONSTRUCTORS") { 638 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 639 // by name. This is for very old file formats such as ECOFF/XCOFF. 640 // For ELF, we should ignore. 641 } else if (Tok == "FILL") { 642 Cmd->Filler = readFill(); 643 } else if (Tok == "SORT") { 644 readSort(); 645 } else if (peek() == "(") { 646 Cmd->Commands.push_back(readInputSectionDescription(Tok)); 647 } else { 648 setError("unknown command " + Tok); 649 } 650 } 651 652 if (consume(">")) 653 Cmd->MemoryRegionName = next(); 654 else if (peek().startswith(">")) 655 Cmd->MemoryRegionName = next().drop_front(); 656 657 Cmd->Phdrs = readOutputSectionPhdrs(); 658 659 if (consume("=")) 660 Cmd->Filler = parseFill(next()); 661 else if (peek().startswith("=")) 662 Cmd->Filler = parseFill(next().drop_front()); 663 664 // Consume optional comma following output section command. 665 consume(","); 666 667 return Cmd; 668 } 669 670 // Parses a given string as a octal/decimal/hexadecimal number and 671 // returns it as a big-endian number. Used for `=<fillexp>`. 672 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 673 // 674 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary 675 // size, while ld.gold always handles it as a 32-bit big-endian number. 676 // We are compatible with ld.gold because it's easier to implement. 677 uint32_t ScriptParser::parseFill(StringRef Tok) { 678 uint32_t V = 0; 679 if (!to_integer(Tok, V)) 680 setError("invalid filler expression: " + Tok); 681 682 uint32_t Buf; 683 write32be(&Buf, V); 684 return Buf; 685 } 686 687 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 688 expect("("); 689 SymbolAssignment *Cmd = readAssignment(next()); 690 Cmd->Provide = Provide; 691 Cmd->Hidden = Hidden; 692 expect(")"); 693 expect(";"); 694 return Cmd; 695 } 696 697 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 698 SymbolAssignment *Cmd = nullptr; 699 if (peek() == "=" || peek() == "+=") { 700 Cmd = readAssignment(Tok); 701 expect(";"); 702 } else if (Tok == "PROVIDE") { 703 Cmd = readProvideHidden(true, false); 704 } else if (Tok == "HIDDEN") { 705 Cmd = readProvideHidden(false, true); 706 } else if (Tok == "PROVIDE_HIDDEN") { 707 Cmd = readProvideHidden(true, true); 708 } 709 return Cmd; 710 } 711 712 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 713 StringRef Op = next(); 714 assert(Op == "=" || Op == "+="); 715 Expr E = readExpr(); 716 if (Op == "+=") { 717 std::string Loc = getCurrentLocation(); 718 E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); }; 719 } 720 return make<SymbolAssignment>(Name, E, getCurrentLocation()); 721 } 722 723 // This is an operator-precedence parser to parse a linker 724 // script expression. 725 Expr ScriptParser::readExpr() { 726 // Our lexer is context-aware. Set the in-expression bit so that 727 // they apply different tokenization rules. 728 bool Orig = InExpr; 729 InExpr = true; 730 Expr E = readExpr1(readPrimary(), 0); 731 InExpr = Orig; 732 return E; 733 } 734 735 static Expr combine(StringRef Op, Expr L, Expr R) { 736 if (Op == "+") 737 return [=] { return add(L(), R()); }; 738 if (Op == "-") 739 return [=] { return sub(L(), R()); }; 740 if (Op == "*") 741 return [=] { return mul(L(), R()); }; 742 if (Op == "/") 743 return [=] { return div(L(), R()); }; 744 if (Op == "<<") 745 return [=] { return L().getValue() << R().getValue(); }; 746 if (Op == ">>") 747 return [=] { return L().getValue() >> R().getValue(); }; 748 if (Op == "<") 749 return [=] { return L().getValue() < R().getValue(); }; 750 if (Op == ">") 751 return [=] { return L().getValue() > R().getValue(); }; 752 if (Op == ">=") 753 return [=] { return L().getValue() >= R().getValue(); }; 754 if (Op == "<=") 755 return [=] { return L().getValue() <= R().getValue(); }; 756 if (Op == "==") 757 return [=] { return L().getValue() == R().getValue(); }; 758 if (Op == "!=") 759 return [=] { return L().getValue() != R().getValue(); }; 760 if (Op == "&") 761 return [=] { return bitAnd(L(), R()); }; 762 if (Op == "|") 763 return [=] { return bitOr(L(), R()); }; 764 llvm_unreachable("invalid operator"); 765 } 766 767 // This is a part of the operator-precedence parser. This function 768 // assumes that the remaining token stream starts with an operator. 769 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 770 while (!atEOF() && !Error) { 771 // Read an operator and an expression. 772 if (consume("?")) 773 return readTernary(Lhs); 774 StringRef Op1 = peek(); 775 if (precedence(Op1) < MinPrec) 776 break; 777 skip(); 778 Expr Rhs = readPrimary(); 779 780 // Evaluate the remaining part of the expression first if the 781 // next operator has greater precedence than the previous one. 782 // For example, if we have read "+" and "3", and if the next 783 // operator is "*", then we'll evaluate 3 * ... part first. 784 while (!atEOF()) { 785 StringRef Op2 = peek(); 786 if (precedence(Op2) <= precedence(Op1)) 787 break; 788 Rhs = readExpr1(Rhs, precedence(Op2)); 789 } 790 791 Lhs = combine(Op1, Lhs, Rhs); 792 } 793 return Lhs; 794 } 795 796 uint64_t static getConstant(StringRef S) { 797 if (S == "COMMONPAGESIZE") 798 return Target->PageSize; 799 if (S == "MAXPAGESIZE") 800 return Config->MaxPageSize; 801 error("unknown constant: " + S); 802 return 0; 803 } 804 805 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with 806 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may 807 // have "K" (Ki) or "M" (Mi) suffixes. 808 static Optional<uint64_t> parseInt(StringRef Tok) { 809 // Negative number 810 if (Tok.startswith("-")) { 811 if (Optional<uint64_t> Val = parseInt(Tok.substr(1))) 812 return -*Val; 813 return None; 814 } 815 816 // Hexadecimal 817 uint64_t Val; 818 if (Tok.startswith_lower("0x") && to_integer(Tok.substr(2), Val, 16)) 819 return Val; 820 if (Tok.endswith_lower("H") && to_integer(Tok.drop_back(), Val, 16)) 821 return Val; 822 823 // Decimal 824 if (Tok.endswith_lower("K")) { 825 if (!to_integer(Tok.drop_back(), Val, 10)) 826 return None; 827 return Val * 1024; 828 } 829 if (Tok.endswith_lower("M")) { 830 if (!to_integer(Tok.drop_back(), Val, 10)) 831 return None; 832 return Val * 1024 * 1024; 833 } 834 if (!to_integer(Tok, Val, 10)) 835 return None; 836 return Val; 837 } 838 839 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 840 int Size = StringSwitch<int>(Tok) 841 .Case("BYTE", 1) 842 .Case("SHORT", 2) 843 .Case("LONG", 4) 844 .Case("QUAD", 8) 845 .Default(-1); 846 if (Size == -1) 847 return nullptr; 848 849 return make<BytesDataCommand>(readParenExpr(), Size); 850 } 851 852 StringRef ScriptParser::readParenLiteral() { 853 expect("("); 854 StringRef Tok = next(); 855 expect(")"); 856 return Tok; 857 } 858 859 OutputSection *ScriptParser::checkSection(OutputSection *Cmd, 860 StringRef Location) { 861 if (Cmd->Location.empty() && Script->ErrorOnMissingSection) 862 error(Location + ": undefined section " + Cmd->Name); 863 return Cmd; 864 } 865 866 Expr ScriptParser::readPrimary() { 867 if (peek() == "(") 868 return readParenExpr(); 869 870 if (consume("~")) { 871 Expr E = readPrimary(); 872 return [=] { return ~E().getValue(); }; 873 } 874 if (consume("-")) { 875 Expr E = readPrimary(); 876 return [=] { return -E().getValue(); }; 877 } 878 879 StringRef Tok = next(); 880 std::string Location = getCurrentLocation(); 881 882 // Built-in functions are parsed here. 883 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 884 if (Tok == "ABSOLUTE") { 885 Expr Inner = readParenExpr(); 886 return [=] { 887 ExprValue I = Inner(); 888 I.ForceAbsolute = true; 889 return I; 890 }; 891 } 892 if (Tok == "ADDR") { 893 StringRef Name = readParenLiteral(); 894 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 895 return [=]() -> ExprValue { 896 return {checkSection(Cmd, Location), 0, Location}; 897 }; 898 } 899 if (Tok == "ALIGN") { 900 expect("("); 901 Expr E = readExpr(); 902 if (consume(")")) 903 return [=] { 904 return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue())); 905 }; 906 expect(","); 907 Expr E2 = readExpr(); 908 expect(")"); 909 return [=] { 910 ExprValue V = E(); 911 V.Alignment = std::max((uint64_t)1, E2().getValue()); 912 return V; 913 }; 914 } 915 if (Tok == "ALIGNOF") { 916 StringRef Name = readParenLiteral(); 917 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 918 return [=] { return checkSection(Cmd, Location)->Alignment; }; 919 } 920 if (Tok == "ASSERT") 921 return readAssertExpr(); 922 if (Tok == "CONSTANT") { 923 StringRef Name = readParenLiteral(); 924 return [=] { return getConstant(Name); }; 925 } 926 if (Tok == "DATA_SEGMENT_ALIGN") { 927 expect("("); 928 Expr E = readExpr(); 929 expect(","); 930 readExpr(); 931 expect(")"); 932 return [=] { 933 return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue())); 934 }; 935 } 936 if (Tok == "DATA_SEGMENT_END") { 937 expect("("); 938 expect("."); 939 expect(")"); 940 return [] { return Script->getDot(); }; 941 } 942 if (Tok == "DATA_SEGMENT_RELRO_END") { 943 // GNU linkers implements more complicated logic to handle 944 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and 945 // just align to the next page boundary for simplicity. 946 expect("("); 947 readExpr(); 948 expect(","); 949 readExpr(); 950 expect(")"); 951 return [] { return alignTo(Script->getDot(), Target->PageSize); }; 952 } 953 if (Tok == "DEFINED") { 954 StringRef Name = readParenLiteral(); 955 return [=] { return Script->isDefined(Name) ? 1 : 0; }; 956 } 957 if (Tok == "LENGTH") { 958 StringRef Name = readParenLiteral(); 959 if (Script->Opt.MemoryRegions.count(Name) == 0) 960 setError("memory region not defined: " + Name); 961 return [=] { return Script->Opt.MemoryRegions[Name].Length; }; 962 } 963 if (Tok == "LOADADDR") { 964 StringRef Name = readParenLiteral(); 965 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 966 return [=] { return checkSection(Cmd, Location)->getLMA(); }; 967 } 968 if (Tok == "ORIGIN") { 969 StringRef Name = readParenLiteral(); 970 if (Script->Opt.MemoryRegions.count(Name) == 0) 971 setError("memory region not defined: " + Name); 972 return [=] { return Script->Opt.MemoryRegions[Name].Origin; }; 973 } 974 if (Tok == "SEGMENT_START") { 975 expect("("); 976 skip(); 977 expect(","); 978 Expr E = readExpr(); 979 expect(")"); 980 return [=] { return E(); }; 981 } 982 if (Tok == "SIZEOF") { 983 StringRef Name = readParenLiteral(); 984 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 985 // Linker script does not create an output section if its content is empty. 986 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 987 // be empty. 988 return [=] { return Cmd->Size; }; 989 } 990 if (Tok == "SIZEOF_HEADERS") 991 return [=] { return elf::getHeaderSize(); }; 992 993 // Tok is the dot. 994 if (Tok == ".") 995 return [=] { return Script->getSymbolValue(Location, Tok); }; 996 997 // Tok is a literal number. 998 if (Optional<uint64_t> Val = parseInt(Tok)) 999 return [=] { return *Val; }; 1000 1001 // Tok is a symbol name. 1002 if (!isValidCIdentifier(Tok)) 1003 setError("malformed number: " + Tok); 1004 Script->Opt.ReferencedSymbols.push_back(Tok); 1005 return [=] { return Script->getSymbolValue(Location, Tok); }; 1006 } 1007 1008 Expr ScriptParser::readTernary(Expr Cond) { 1009 Expr L = readExpr(); 1010 expect(":"); 1011 Expr R = readExpr(); 1012 return [=] { return Cond().getValue() ? L() : R(); }; 1013 } 1014 1015 Expr ScriptParser::readParenExpr() { 1016 expect("("); 1017 Expr E = readExpr(); 1018 expect(")"); 1019 return E; 1020 } 1021 1022 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1023 std::vector<StringRef> Phdrs; 1024 while (!Error && peek().startswith(":")) { 1025 StringRef Tok = next(); 1026 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 1027 } 1028 return Phdrs; 1029 } 1030 1031 // Read a program header type name. The next token must be a 1032 // name of a program header type or a constant (e.g. "0x3"). 1033 unsigned ScriptParser::readPhdrType() { 1034 StringRef Tok = next(); 1035 if (Optional<uint64_t> Val = parseInt(Tok)) 1036 return *Val; 1037 1038 unsigned Ret = StringSwitch<unsigned>(Tok) 1039 .Case("PT_NULL", PT_NULL) 1040 .Case("PT_LOAD", PT_LOAD) 1041 .Case("PT_DYNAMIC", PT_DYNAMIC) 1042 .Case("PT_INTERP", PT_INTERP) 1043 .Case("PT_NOTE", PT_NOTE) 1044 .Case("PT_SHLIB", PT_SHLIB) 1045 .Case("PT_PHDR", PT_PHDR) 1046 .Case("PT_TLS", PT_TLS) 1047 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1048 .Case("PT_GNU_STACK", PT_GNU_STACK) 1049 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1050 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1051 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1052 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1053 .Default(-1); 1054 1055 if (Ret == (unsigned)-1) { 1056 setError("invalid program header type: " + Tok); 1057 return PT_NULL; 1058 } 1059 return Ret; 1060 } 1061 1062 // Reads an anonymous version declaration. 1063 void ScriptParser::readAnonymousDeclaration() { 1064 std::vector<SymbolVersion> Locals; 1065 std::vector<SymbolVersion> Globals; 1066 std::tie(Locals, Globals) = readSymbols(); 1067 1068 for (SymbolVersion V : Locals) { 1069 if (V.Name == "*") 1070 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1071 else 1072 Config->VersionScriptLocals.push_back(V); 1073 } 1074 1075 for (SymbolVersion V : Globals) 1076 Config->VersionScriptGlobals.push_back(V); 1077 1078 expect(";"); 1079 } 1080 1081 // Reads a non-anonymous version definition, 1082 // e.g. "VerStr { global: foo; bar; local: *; };". 1083 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1084 // Read a symbol list. 1085 std::vector<SymbolVersion> Locals; 1086 std::vector<SymbolVersion> Globals; 1087 std::tie(Locals, Globals) = readSymbols(); 1088 1089 for (SymbolVersion V : Locals) { 1090 if (V.Name == "*") 1091 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1092 else 1093 Config->VersionScriptLocals.push_back(V); 1094 } 1095 1096 // Create a new version definition and add that to the global symbols. 1097 VersionDefinition Ver; 1098 Ver.Name = VerStr; 1099 Ver.Globals = Globals; 1100 1101 // User-defined version number starts from 2 because 0 and 1 are 1102 // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. 1103 Ver.Id = Config->VersionDefinitions.size() + 2; 1104 Config->VersionDefinitions.push_back(Ver); 1105 1106 // Each version may have a parent version. For example, "Ver2" 1107 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1108 // as a parent. This version hierarchy is, probably against your 1109 // instinct, purely for hint; the runtime doesn't care about it 1110 // at all. In LLD, we simply ignore it. 1111 if (peek() != ";") 1112 skip(); 1113 expect(";"); 1114 } 1115 1116 static bool hasWildcard(StringRef S) { 1117 return S.find_first_of("?*[") != StringRef::npos; 1118 } 1119 1120 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1121 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1122 ScriptParser::readSymbols() { 1123 std::vector<SymbolVersion> Locals; 1124 std::vector<SymbolVersion> Globals; 1125 std::vector<SymbolVersion> *V = &Globals; 1126 1127 while (!Error) { 1128 if (consume("}")) 1129 break; 1130 if (consumeLabel("local")) { 1131 V = &Locals; 1132 continue; 1133 } 1134 if (consumeLabel("global")) { 1135 V = &Globals; 1136 continue; 1137 } 1138 1139 if (consume("extern")) { 1140 std::vector<SymbolVersion> Ext = readVersionExtern(); 1141 V->insert(V->end(), Ext.begin(), Ext.end()); 1142 } else { 1143 StringRef Tok = next(); 1144 V->push_back({unquote(Tok), false, hasWildcard(Tok)}); 1145 } 1146 expect(";"); 1147 } 1148 return {Locals, Globals}; 1149 } 1150 1151 // Reads an "extern C++" directive, e.g., 1152 // "extern "C++" { ns::*; "f(int, double)"; };" 1153 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 1154 StringRef Tok = next(); 1155 bool IsCXX = Tok == "\"C++\""; 1156 if (!IsCXX && Tok != "\"C\"") 1157 setError("Unknown language"); 1158 expect("{"); 1159 1160 std::vector<SymbolVersion> Ret; 1161 while (!Error && peek() != "}") { 1162 StringRef Tok = next(); 1163 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 1164 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 1165 expect(";"); 1166 } 1167 1168 expect("}"); 1169 return Ret; 1170 } 1171 1172 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2, 1173 StringRef S3) { 1174 if (!consume(S1) && !consume(S2) && !consume(S3)) { 1175 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 1176 return 0; 1177 } 1178 expect("="); 1179 return readExpr()().getValue(); 1180 } 1181 1182 // Parse the MEMORY command as specified in: 1183 // https://sourceware.org/binutils/docs/ld/MEMORY.html 1184 // 1185 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 1186 void ScriptParser::readMemory() { 1187 expect("{"); 1188 while (!Error && !consume("}")) { 1189 StringRef Name = next(); 1190 1191 uint32_t Flags = 0; 1192 uint32_t NegFlags = 0; 1193 if (consume("(")) { 1194 std::tie(Flags, NegFlags) = readMemoryAttributes(); 1195 expect(")"); 1196 } 1197 expect(":"); 1198 1199 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 1200 expect(","); 1201 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 1202 1203 // Add the memory region to the region map (if it doesn't already exist). 1204 auto It = Script->Opt.MemoryRegions.find(Name); 1205 if (It != Script->Opt.MemoryRegions.end()) 1206 setError("region '" + Name + "' already defined"); 1207 else 1208 Script->Opt.MemoryRegions[Name] = {Name, Origin, Length, Flags, NegFlags}; 1209 } 1210 } 1211 1212 // This function parses the attributes used to match against section 1213 // flags when placing output sections in a memory region. These flags 1214 // are only used when an explicit memory region name is not used. 1215 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 1216 uint32_t Flags = 0; 1217 uint32_t NegFlags = 0; 1218 bool Invert = false; 1219 1220 for (char C : next().lower()) { 1221 uint32_t Flag = 0; 1222 if (C == '!') 1223 Invert = !Invert; 1224 else if (C == 'w') 1225 Flag = SHF_WRITE; 1226 else if (C == 'x') 1227 Flag = SHF_EXECINSTR; 1228 else if (C == 'a') 1229 Flag = SHF_ALLOC; 1230 else if (C != 'r') 1231 setError("invalid memory region attribute"); 1232 1233 if (Invert) 1234 NegFlags |= Flag; 1235 else 1236 Flags |= Flag; 1237 } 1238 return {Flags, NegFlags}; 1239 } 1240 1241 void elf::readLinkerScript(MemoryBufferRef MB) { 1242 ScriptParser(MB).readLinkerScript(); 1243 } 1244 1245 void elf::readVersionScript(MemoryBufferRef MB) { 1246 ScriptParser(MB).readVersionScript(); 1247 } 1248 1249 void elf::readDynamicList(MemoryBufferRef MB) { 1250 ScriptParser(MB).readDynamicList(); 1251 } 1252