1 //===- ScriptParser.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a recursive-descendent parser for linker scripts. 11 // Parsed results are stored to Config and Script global objects. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ScriptParser.h" 16 #include "Config.h" 17 #include "Driver.h" 18 #include "InputSection.h" 19 #include "LinkerScript.h" 20 #include "Memory.h" 21 #include "OutputSections.h" 22 #include "ScriptLexer.h" 23 #include "Symbols.h" 24 #include "Target.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/StringSwitch.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/Support/Casting.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/FileSystem.h" 32 #include "llvm/Support/Path.h" 33 #include <cassert> 34 #include <limits> 35 #include <vector> 36 37 using namespace llvm; 38 using namespace llvm::ELF; 39 using namespace llvm::support::endian; 40 using namespace lld; 41 using namespace lld::elf; 42 43 static bool isUnderSysroot(StringRef Path); 44 45 namespace { 46 class ScriptParser final : ScriptLexer { 47 public: 48 ScriptParser(MemoryBufferRef MB) 49 : ScriptLexer(MB), 50 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 51 52 void readLinkerScript(); 53 void readVersionScript(); 54 void readDynamicList(); 55 56 private: 57 void addFile(StringRef Path); 58 OutputSection *checkSection(OutputSection *Cmd, StringRef Loccation); 59 60 void readAsNeeded(); 61 void readEntry(); 62 void readExtern(); 63 void readGroup(); 64 void readInclude(); 65 void readMemory(); 66 void readOutput(); 67 void readOutputArch(); 68 void readOutputFormat(); 69 void readPhdrs(); 70 void readSearchDir(); 71 void readSections(); 72 void readVersion(); 73 void readVersionScriptCommand(); 74 75 SymbolAssignment *readAssignment(StringRef Name); 76 BytesDataCommand *readBytesDataCommand(StringRef Tok); 77 uint32_t readFill(); 78 uint32_t parseFill(StringRef Tok); 79 void readSectionAddressType(OutputSection *Cmd); 80 OutputSection *readOutputSectionDescription(StringRef OutSec); 81 std::vector<StringRef> readOutputSectionPhdrs(); 82 InputSectionDescription *readInputSectionDescription(StringRef Tok); 83 StringMatcher readFilePatterns(); 84 std::vector<SectionPattern> readInputSectionsList(); 85 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 86 unsigned readPhdrType(); 87 SortSectionPolicy readSortKind(); 88 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 89 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 90 void readSort(); 91 AssertCommand *readAssert(); 92 Expr readAssertExpr(); 93 Expr readConstant(); 94 Expr getPageSize(); 95 96 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 97 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 98 99 Expr readExpr(); 100 Expr readExpr1(Expr Lhs, int MinPrec); 101 StringRef readParenLiteral(); 102 Expr readPrimary(); 103 Expr readTernary(Expr Cond); 104 Expr readParenExpr(); 105 106 // For parsing version script. 107 std::vector<SymbolVersion> readVersionExtern(); 108 void readAnonymousDeclaration(); 109 void readVersionDeclaration(StringRef VerStr); 110 111 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 112 readSymbols(); 113 114 bool IsUnderSysroot; 115 }; 116 } // namespace 117 118 static StringRef unquote(StringRef S) { 119 if (S.startswith("\"")) 120 return S.substr(1, S.size() - 2); 121 return S; 122 } 123 124 static bool isUnderSysroot(StringRef Path) { 125 if (Config->Sysroot == "") 126 return false; 127 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 128 if (sys::fs::equivalent(Config->Sysroot, Path)) 129 return true; 130 return false; 131 } 132 133 // Some operations only support one non absolute value. Move the 134 // absolute one to the right hand side for convenience. 135 static void moveAbsRight(ExprValue &A, ExprValue &B) { 136 if (A.isAbsolute()) 137 std::swap(A, B); 138 if (!B.isAbsolute()) 139 error(A.Loc + ": at least one side of the expression must be absolute"); 140 } 141 142 static ExprValue add(ExprValue A, ExprValue B) { 143 moveAbsRight(A, B); 144 uint64_t Val = alignTo(A.Val, A.Alignment) + B.getValue(); 145 return {A.Sec, A.ForceAbsolute, Val, A.Loc}; 146 } 147 148 static ExprValue sub(ExprValue A, ExprValue B) { 149 uint64_t Val = alignTo(A.Val, A.Alignment) - B.getValue(); 150 return {A.Sec, Val, A.Loc}; 151 } 152 153 static ExprValue mul(ExprValue A, ExprValue B) { 154 return A.getValue() * B.getValue(); 155 } 156 157 static ExprValue div(ExprValue A, ExprValue B) { 158 if (uint64_t BV = B.getValue()) 159 return A.getValue() / BV; 160 error("division by zero"); 161 return 0; 162 } 163 164 static ExprValue bitAnd(ExprValue A, ExprValue B) { 165 moveAbsRight(A, B); 166 return {A.Sec, A.ForceAbsolute, 167 (A.getValue() & B.getValue()) - A.getSecAddr(), A.Loc}; 168 } 169 170 static ExprValue bitOr(ExprValue A, ExprValue B) { 171 moveAbsRight(A, B); 172 return {A.Sec, A.ForceAbsolute, 173 (A.getValue() | B.getValue()) - A.getSecAddr(), A.Loc}; 174 } 175 176 void ScriptParser::readDynamicList() { 177 expect("{"); 178 readAnonymousDeclaration(); 179 if (!atEOF()) 180 setError("EOF expected, but got " + next()); 181 } 182 183 void ScriptParser::readVersionScript() { 184 readVersionScriptCommand(); 185 if (!atEOF()) 186 setError("EOF expected, but got " + next()); 187 } 188 189 void ScriptParser::readVersionScriptCommand() { 190 if (consume("{")) { 191 readAnonymousDeclaration(); 192 return; 193 } 194 195 while (!atEOF() && !ErrorCount && peek() != "}") { 196 StringRef VerStr = next(); 197 if (VerStr == "{") { 198 setError("anonymous version definition is used in " 199 "combination with other version definitions"); 200 return; 201 } 202 expect("{"); 203 readVersionDeclaration(VerStr); 204 } 205 } 206 207 void ScriptParser::readVersion() { 208 expect("{"); 209 readVersionScriptCommand(); 210 expect("}"); 211 } 212 213 void ScriptParser::readLinkerScript() { 214 while (!atEOF()) { 215 StringRef Tok = next(); 216 if (Tok == ";") 217 continue; 218 219 if (Tok == "ASSERT") { 220 Script->Opt.Commands.push_back(readAssert()); 221 } else if (Tok == "ENTRY") { 222 readEntry(); 223 } else if (Tok == "EXTERN") { 224 readExtern(); 225 } else if (Tok == "GROUP" || Tok == "INPUT") { 226 readGroup(); 227 } else if (Tok == "INCLUDE") { 228 readInclude(); 229 } else if (Tok == "MEMORY") { 230 readMemory(); 231 } else if (Tok == "OUTPUT") { 232 readOutput(); 233 } else if (Tok == "OUTPUT_ARCH") { 234 readOutputArch(); 235 } else if (Tok == "OUTPUT_FORMAT") { 236 readOutputFormat(); 237 } else if (Tok == "PHDRS") { 238 readPhdrs(); 239 } else if (Tok == "SEARCH_DIR") { 240 readSearchDir(); 241 } else if (Tok == "SECTIONS") { 242 readSections(); 243 } else if (Tok == "VERSION") { 244 readVersion(); 245 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 246 Script->Opt.Commands.push_back(Cmd); 247 } else { 248 setError("unknown directive: " + Tok); 249 } 250 } 251 } 252 253 void ScriptParser::addFile(StringRef S) { 254 if (IsUnderSysroot && S.startswith("/")) { 255 SmallString<128> PathData; 256 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 257 if (sys::fs::exists(Path)) { 258 Driver->addFile(Saver.save(Path), /*WithLOption=*/false); 259 return; 260 } 261 } 262 263 if (S.startswith("/")) { 264 Driver->addFile(S, /*WithLOption=*/false); 265 } else if (S.startswith("=")) { 266 if (Config->Sysroot.empty()) 267 Driver->addFile(S.substr(1), /*WithLOption=*/false); 268 else 269 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)), 270 /*WithLOption=*/false); 271 } else if (S.startswith("-l")) { 272 Driver->addLibrary(S.substr(2)); 273 } else if (sys::fs::exists(S)) { 274 Driver->addFile(S, /*WithLOption=*/false); 275 } else { 276 if (Optional<std::string> Path = findFromSearchPaths(S)) 277 Driver->addFile(Saver.save(*Path), /*WithLOption=*/true); 278 else 279 setError("unable to find " + S); 280 } 281 } 282 283 void ScriptParser::readAsNeeded() { 284 expect("("); 285 bool Orig = Config->AsNeeded; 286 Config->AsNeeded = true; 287 while (!ErrorCount && !consume(")")) 288 addFile(unquote(next())); 289 Config->AsNeeded = Orig; 290 } 291 292 void ScriptParser::readEntry() { 293 // -e <symbol> takes predecence over ENTRY(<symbol>). 294 expect("("); 295 StringRef Tok = next(); 296 if (Config->Entry.empty()) 297 Config->Entry = Tok; 298 expect(")"); 299 } 300 301 void ScriptParser::readExtern() { 302 expect("("); 303 while (!ErrorCount && !consume(")")) 304 Config->Undefined.push_back(next()); 305 } 306 307 void ScriptParser::readGroup() { 308 expect("("); 309 while (!ErrorCount && !consume(")")) { 310 if (consume("AS_NEEDED")) 311 readAsNeeded(); 312 else 313 addFile(unquote(next())); 314 } 315 } 316 317 void ScriptParser::readInclude() { 318 StringRef Tok = unquote(next()); 319 320 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 321 // The file will be searched for in the current directory, and in any 322 // directory specified with the -L option. 323 if (sys::fs::exists(Tok)) { 324 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 325 tokenize(*MB); 326 return; 327 } 328 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 329 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 330 tokenize(*MB); 331 return; 332 } 333 setError("cannot open " + Tok); 334 } 335 336 void ScriptParser::readOutput() { 337 // -o <file> takes predecence over OUTPUT(<file>). 338 expect("("); 339 StringRef Tok = next(); 340 if (Config->OutputFile.empty()) 341 Config->OutputFile = unquote(Tok); 342 expect(")"); 343 } 344 345 void ScriptParser::readOutputArch() { 346 // OUTPUT_ARCH is ignored for now. 347 expect("("); 348 while (!ErrorCount && !consume(")")) 349 skip(); 350 } 351 352 void ScriptParser::readOutputFormat() { 353 // Error checking only for now. 354 expect("("); 355 skip(); 356 if (consume(")")) 357 return; 358 expect(","); 359 skip(); 360 expect(","); 361 skip(); 362 expect(")"); 363 } 364 365 void ScriptParser::readPhdrs() { 366 expect("{"); 367 while (!ErrorCount && !consume("}")) { 368 Script->Opt.PhdrsCommands.push_back( 369 {next(), PT_NULL, false, false, UINT_MAX, nullptr}); 370 371 PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back(); 372 PhdrCmd.Type = readPhdrType(); 373 374 while (!ErrorCount && !consume(";")) { 375 if (consume("FILEHDR")) 376 PhdrCmd.HasFilehdr = true; 377 else if (consume("PHDRS")) 378 PhdrCmd.HasPhdrs = true; 379 else if (consume("AT")) 380 PhdrCmd.LMAExpr = readParenExpr(); 381 else if (consume("FLAGS")) 382 PhdrCmd.Flags = readParenExpr()().getValue(); 383 else 384 setError("unexpected header attribute: " + next()); 385 } 386 } 387 } 388 389 void ScriptParser::readSearchDir() { 390 expect("("); 391 StringRef Tok = next(); 392 if (!Config->Nostdlib) 393 Config->SearchPaths.push_back(unquote(Tok)); 394 expect(")"); 395 } 396 397 void ScriptParser::readSections() { 398 Script->Opt.HasSections = true; 399 400 // -no-rosegment is used to avoid placing read only non-executable sections in 401 // their own segment. We do the same if SECTIONS command is present in linker 402 // script. See comment for computeFlags(). 403 Config->SingleRoRx = true; 404 405 expect("{"); 406 while (!ErrorCount && !consume("}")) { 407 StringRef Tok = next(); 408 BaseCommand *Cmd = readProvideOrAssignment(Tok); 409 if (!Cmd) { 410 if (Tok == "ASSERT") 411 Cmd = readAssert(); 412 else 413 Cmd = readOutputSectionDescription(Tok); 414 } 415 Script->Opt.Commands.push_back(Cmd); 416 } 417 } 418 419 static int precedence(StringRef Op) { 420 return StringSwitch<int>(Op) 421 .Cases("*", "/", 5) 422 .Cases("+", "-", 4) 423 .Cases("<<", ">>", 3) 424 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 425 .Cases("&", "|", 1) 426 .Default(-1); 427 } 428 429 StringMatcher ScriptParser::readFilePatterns() { 430 std::vector<StringRef> V; 431 while (!ErrorCount && !consume(")")) 432 V.push_back(next()); 433 return StringMatcher(V); 434 } 435 436 SortSectionPolicy ScriptParser::readSortKind() { 437 if (consume("SORT") || consume("SORT_BY_NAME")) 438 return SortSectionPolicy::Name; 439 if (consume("SORT_BY_ALIGNMENT")) 440 return SortSectionPolicy::Alignment; 441 if (consume("SORT_BY_INIT_PRIORITY")) 442 return SortSectionPolicy::Priority; 443 if (consume("SORT_NONE")) 444 return SortSectionPolicy::None; 445 return SortSectionPolicy::Default; 446 } 447 448 // Reads SECTIONS command contents in the following form: 449 // 450 // <contents> ::= <elem>* 451 // <elem> ::= <exclude>? <glob-pattern> 452 // <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")" 453 // 454 // For example, 455 // 456 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz) 457 // 458 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". 459 // The semantics of that is section .foo in any file, section .bar in 460 // any file but a.o, and section .baz in any file but b.o. 461 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 462 std::vector<SectionPattern> Ret; 463 while (!ErrorCount && peek() != ")") { 464 StringMatcher ExcludeFilePat; 465 if (consume("EXCLUDE_FILE")) { 466 expect("("); 467 ExcludeFilePat = readFilePatterns(); 468 } 469 470 std::vector<StringRef> V; 471 while (!ErrorCount && peek() != ")" && peek() != "EXCLUDE_FILE") 472 V.push_back(next()); 473 474 if (!V.empty()) 475 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 476 else 477 setError("section pattern is expected"); 478 } 479 return Ret; 480 } 481 482 // Reads contents of "SECTIONS" directive. That directive contains a 483 // list of glob patterns for input sections. The grammar is as follows. 484 // 485 // <patterns> ::= <section-list> 486 // | <sort> "(" <section-list> ")" 487 // | <sort> "(" <sort> "(" <section-list> ")" ")" 488 // 489 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 490 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 491 // 492 // <section-list> is parsed by readInputSectionsList(). 493 InputSectionDescription * 494 ScriptParser::readInputSectionRules(StringRef FilePattern) { 495 auto *Cmd = make<InputSectionDescription>(FilePattern); 496 expect("("); 497 498 while (!ErrorCount && !consume(")")) { 499 SortSectionPolicy Outer = readSortKind(); 500 SortSectionPolicy Inner = SortSectionPolicy::Default; 501 std::vector<SectionPattern> V; 502 if (Outer != SortSectionPolicy::Default) { 503 expect("("); 504 Inner = readSortKind(); 505 if (Inner != SortSectionPolicy::Default) { 506 expect("("); 507 V = readInputSectionsList(); 508 expect(")"); 509 } else { 510 V = readInputSectionsList(); 511 } 512 expect(")"); 513 } else { 514 V = readInputSectionsList(); 515 } 516 517 for (SectionPattern &Pat : V) { 518 Pat.SortInner = Inner; 519 Pat.SortOuter = Outer; 520 } 521 522 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 523 } 524 return Cmd; 525 } 526 527 InputSectionDescription * 528 ScriptParser::readInputSectionDescription(StringRef Tok) { 529 // Input section wildcard can be surrounded by KEEP. 530 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 531 if (Tok == "KEEP") { 532 expect("("); 533 StringRef FilePattern = next(); 534 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 535 expect(")"); 536 Script->Opt.KeptSections.push_back(Cmd); 537 return Cmd; 538 } 539 return readInputSectionRules(Tok); 540 } 541 542 void ScriptParser::readSort() { 543 expect("("); 544 expect("CONSTRUCTORS"); 545 expect(")"); 546 } 547 548 AssertCommand *ScriptParser::readAssert() { 549 return make<AssertCommand>(readAssertExpr()); 550 } 551 552 Expr ScriptParser::readAssertExpr() { 553 expect("("); 554 Expr E = readExpr(); 555 expect(","); 556 StringRef Msg = unquote(next()); 557 expect(")"); 558 559 return [=] { 560 if (!E().getValue()) 561 error(Msg); 562 return Script->getDot(); 563 }; 564 } 565 566 // Reads a FILL(expr) command. We handle the FILL command as an 567 // alias for =fillexp section attribute, which is different from 568 // what GNU linkers do. 569 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 570 uint32_t ScriptParser::readFill() { 571 expect("("); 572 uint32_t V = parseFill(next()); 573 expect(")"); 574 return V; 575 } 576 577 // Reads an expression and/or the special directive "(NOLOAD)" for an 578 // output section definition. 579 // 580 // An output section name can be followed by an address expression 581 // and/or by "(NOLOAD)". This grammar is not LL(1) because "(" can be 582 // interpreted as either the beginning of some expression or "(NOLOAD)". 583 // 584 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html 585 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html 586 void ScriptParser::readSectionAddressType(OutputSection *Cmd) { 587 if (consume("(")) { 588 if (consume("NOLOAD")) { 589 expect(")"); 590 Cmd->Noload = true; 591 return; 592 } 593 Cmd->AddrExpr = readExpr(); 594 expect(")"); 595 } else { 596 Cmd->AddrExpr = readExpr(); 597 } 598 599 if (consume("(")) { 600 expect("NOLOAD"); 601 expect(")"); 602 Cmd->Noload = true; 603 } 604 } 605 606 OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) { 607 OutputSection *Cmd = 608 Script->createOutputSection(OutSec, getCurrentLocation()); 609 610 if (peek() != ":") 611 readSectionAddressType(Cmd); 612 expect(":"); 613 614 if (consume("AT")) 615 Cmd->LMAExpr = readParenExpr(); 616 if (consume("ALIGN")) 617 Cmd->AlignExpr = readParenExpr(); 618 if (consume("SUBALIGN")) 619 Cmd->SubalignExpr = readParenExpr(); 620 621 // Parse constraints. 622 if (consume("ONLY_IF_RO")) 623 Cmd->Constraint = ConstraintKind::ReadOnly; 624 if (consume("ONLY_IF_RW")) 625 Cmd->Constraint = ConstraintKind::ReadWrite; 626 expect("{"); 627 628 while (!ErrorCount && !consume("}")) { 629 StringRef Tok = next(); 630 if (Tok == ";") { 631 // Empty commands are allowed. Do nothing here. 632 } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) { 633 Cmd->Commands.push_back(Assign); 634 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 635 Cmd->Commands.push_back(Data); 636 } else if (Tok == "ASSERT") { 637 Cmd->Commands.push_back(readAssert()); 638 expect(";"); 639 } else if (Tok == "CONSTRUCTORS") { 640 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 641 // by name. This is for very old file formats such as ECOFF/XCOFF. 642 // For ELF, we should ignore. 643 } else if (Tok == "FILL") { 644 Cmd->Filler = readFill(); 645 } else if (Tok == "SORT") { 646 readSort(); 647 } else if (peek() == "(") { 648 Cmd->Commands.push_back(readInputSectionDescription(Tok)); 649 } else { 650 setError("unknown command " + Tok); 651 } 652 } 653 654 if (consume(">")) 655 Cmd->MemoryRegionName = next(); 656 else if (peek().startswith(">")) 657 Cmd->MemoryRegionName = next().drop_front(); 658 659 Cmd->Phdrs = readOutputSectionPhdrs(); 660 661 if (consume("=")) 662 Cmd->Filler = parseFill(next()); 663 else if (peek().startswith("=")) 664 Cmd->Filler = parseFill(next().drop_front()); 665 666 // Consume optional comma following output section command. 667 consume(","); 668 669 return Cmd; 670 } 671 672 // Parses a given string as a octal/decimal/hexadecimal number and 673 // returns it as a big-endian number. Used for `=<fillexp>`. 674 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 675 // 676 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary 677 // size, while ld.gold always handles it as a 32-bit big-endian number. 678 // We are compatible with ld.gold because it's easier to implement. 679 uint32_t ScriptParser::parseFill(StringRef Tok) { 680 uint32_t V = 0; 681 if (!to_integer(Tok, V)) 682 setError("invalid filler expression: " + Tok); 683 684 uint32_t Buf; 685 write32be(&Buf, V); 686 return Buf; 687 } 688 689 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 690 expect("("); 691 SymbolAssignment *Cmd = readAssignment(next()); 692 Cmd->Provide = Provide; 693 Cmd->Hidden = Hidden; 694 expect(")"); 695 expect(";"); 696 return Cmd; 697 } 698 699 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 700 SymbolAssignment *Cmd = nullptr; 701 if (peek() == "=" || peek() == "+=") { 702 Cmd = readAssignment(Tok); 703 expect(";"); 704 } else if (Tok == "PROVIDE") { 705 Cmd = readProvideHidden(true, false); 706 } else if (Tok == "HIDDEN") { 707 Cmd = readProvideHidden(false, true); 708 } else if (Tok == "PROVIDE_HIDDEN") { 709 Cmd = readProvideHidden(true, true); 710 } 711 return Cmd; 712 } 713 714 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 715 StringRef Op = next(); 716 assert(Op == "=" || Op == "+="); 717 Expr E = readExpr(); 718 if (Op == "+=") { 719 std::string Loc = getCurrentLocation(); 720 E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); }; 721 } 722 return make<SymbolAssignment>(Name, E, getCurrentLocation()); 723 } 724 725 // This is an operator-precedence parser to parse a linker 726 // script expression. 727 Expr ScriptParser::readExpr() { 728 // Our lexer is context-aware. Set the in-expression bit so that 729 // they apply different tokenization rules. 730 bool Orig = InExpr; 731 InExpr = true; 732 Expr E = readExpr1(readPrimary(), 0); 733 InExpr = Orig; 734 return E; 735 } 736 737 static Expr combine(StringRef Op, Expr L, Expr R) { 738 if (Op == "+") 739 return [=] { return add(L(), R()); }; 740 if (Op == "-") 741 return [=] { return sub(L(), R()); }; 742 if (Op == "*") 743 return [=] { return mul(L(), R()); }; 744 if (Op == "/") 745 return [=] { return div(L(), R()); }; 746 if (Op == "<<") 747 return [=] { return L().getValue() << R().getValue(); }; 748 if (Op == ">>") 749 return [=] { return L().getValue() >> R().getValue(); }; 750 if (Op == "<") 751 return [=] { return L().getValue() < R().getValue(); }; 752 if (Op == ">") 753 return [=] { return L().getValue() > R().getValue(); }; 754 if (Op == ">=") 755 return [=] { return L().getValue() >= R().getValue(); }; 756 if (Op == "<=") 757 return [=] { return L().getValue() <= R().getValue(); }; 758 if (Op == "==") 759 return [=] { return L().getValue() == R().getValue(); }; 760 if (Op == "!=") 761 return [=] { return L().getValue() != R().getValue(); }; 762 if (Op == "&") 763 return [=] { return bitAnd(L(), R()); }; 764 if (Op == "|") 765 return [=] { return bitOr(L(), R()); }; 766 llvm_unreachable("invalid operator"); 767 } 768 769 // This is a part of the operator-precedence parser. This function 770 // assumes that the remaining token stream starts with an operator. 771 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 772 while (!atEOF() && !ErrorCount) { 773 // Read an operator and an expression. 774 if (consume("?")) 775 return readTernary(Lhs); 776 StringRef Op1 = peek(); 777 if (precedence(Op1) < MinPrec) 778 break; 779 skip(); 780 Expr Rhs = readPrimary(); 781 782 // Evaluate the remaining part of the expression first if the 783 // next operator has greater precedence than the previous one. 784 // For example, if we have read "+" and "3", and if the next 785 // operator is "*", then we'll evaluate 3 * ... part first. 786 while (!atEOF()) { 787 StringRef Op2 = peek(); 788 if (precedence(Op2) <= precedence(Op1)) 789 break; 790 Rhs = readExpr1(Rhs, precedence(Op2)); 791 } 792 793 Lhs = combine(Op1, Lhs, Rhs); 794 } 795 return Lhs; 796 } 797 798 Expr ScriptParser::getPageSize() { 799 std::string Location = getCurrentLocation(); 800 return [=]() -> uint64_t { 801 if (Target) 802 return Target->PageSize; 803 error(Location + ": unable to calculate page size"); 804 return 4096; // Return a dummy value. 805 }; 806 } 807 808 Expr ScriptParser::readConstant() { 809 StringRef S = readParenLiteral(); 810 if (S == "COMMONPAGESIZE") 811 return getPageSize(); 812 if (S == "MAXPAGESIZE") 813 return [] { return Config->MaxPageSize; }; 814 setError("unknown constant: " + S); 815 return {}; 816 } 817 818 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with 819 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may 820 // have "K" (Ki) or "M" (Mi) suffixes. 821 static Optional<uint64_t> parseInt(StringRef Tok) { 822 // Negative number 823 if (Tok.startswith("-")) { 824 if (Optional<uint64_t> Val = parseInt(Tok.substr(1))) 825 return -*Val; 826 return None; 827 } 828 829 // Hexadecimal 830 uint64_t Val; 831 if (Tok.startswith_lower("0x") && to_integer(Tok.substr(2), Val, 16)) 832 return Val; 833 if (Tok.endswith_lower("H") && to_integer(Tok.drop_back(), Val, 16)) 834 return Val; 835 836 // Decimal 837 if (Tok.endswith_lower("K")) { 838 if (!to_integer(Tok.drop_back(), Val, 10)) 839 return None; 840 return Val * 1024; 841 } 842 if (Tok.endswith_lower("M")) { 843 if (!to_integer(Tok.drop_back(), Val, 10)) 844 return None; 845 return Val * 1024 * 1024; 846 } 847 if (!to_integer(Tok, Val, 10)) 848 return None; 849 return Val; 850 } 851 852 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 853 int Size = StringSwitch<int>(Tok) 854 .Case("BYTE", 1) 855 .Case("SHORT", 2) 856 .Case("LONG", 4) 857 .Case("QUAD", 8) 858 .Default(-1); 859 if (Size == -1) 860 return nullptr; 861 862 return make<BytesDataCommand>(readParenExpr(), Size); 863 } 864 865 StringRef ScriptParser::readParenLiteral() { 866 expect("("); 867 StringRef Tok = next(); 868 expect(")"); 869 return Tok; 870 } 871 872 OutputSection *ScriptParser::checkSection(OutputSection *Cmd, 873 StringRef Location) { 874 if (Cmd->Location.empty() && Script->ErrorOnMissingSection) 875 error(Location + ": undefined section " + Cmd->Name); 876 return Cmd; 877 } 878 879 Expr ScriptParser::readPrimary() { 880 if (peek() == "(") 881 return readParenExpr(); 882 883 if (consume("~")) { 884 Expr E = readPrimary(); 885 return [=] { return ~E().getValue(); }; 886 } 887 if (consume("!")) { 888 Expr E = readPrimary(); 889 return [=] { return !E().getValue(); }; 890 } 891 if (consume("-")) { 892 Expr E = readPrimary(); 893 return [=] { return -E().getValue(); }; 894 } 895 896 StringRef Tok = next(); 897 std::string Location = getCurrentLocation(); 898 899 // Built-in functions are parsed here. 900 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 901 if (Tok == "ABSOLUTE") { 902 Expr Inner = readParenExpr(); 903 return [=] { 904 ExprValue I = Inner(); 905 I.ForceAbsolute = true; 906 return I; 907 }; 908 } 909 if (Tok == "ADDR") { 910 StringRef Name = readParenLiteral(); 911 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 912 return [=]() -> ExprValue { 913 return {checkSection(Cmd, Location), 0, Location}; 914 }; 915 } 916 if (Tok == "ALIGN") { 917 expect("("); 918 Expr E = readExpr(); 919 if (consume(")")) 920 return [=] { 921 return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue())); 922 }; 923 expect(","); 924 Expr E2 = readExpr(); 925 expect(")"); 926 return [=] { 927 ExprValue V = E(); 928 V.Alignment = std::max((uint64_t)1, E2().getValue()); 929 return V; 930 }; 931 } 932 if (Tok == "ALIGNOF") { 933 StringRef Name = readParenLiteral(); 934 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 935 return [=] { return checkSection(Cmd, Location)->Alignment; }; 936 } 937 if (Tok == "ASSERT") 938 return readAssertExpr(); 939 if (Tok == "CONSTANT") 940 return readConstant(); 941 if (Tok == "DATA_SEGMENT_ALIGN") { 942 expect("("); 943 Expr E = readExpr(); 944 expect(","); 945 readExpr(); 946 expect(")"); 947 return [=] { 948 return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue())); 949 }; 950 } 951 if (Tok == "DATA_SEGMENT_END") { 952 expect("("); 953 expect("."); 954 expect(")"); 955 return [] { return Script->getDot(); }; 956 } 957 if (Tok == "DATA_SEGMENT_RELRO_END") { 958 // GNU linkers implements more complicated logic to handle 959 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and 960 // just align to the next page boundary for simplicity. 961 expect("("); 962 readExpr(); 963 expect(","); 964 readExpr(); 965 expect(")"); 966 Expr E = getPageSize(); 967 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 968 } 969 if (Tok == "DEFINED") { 970 StringRef Name = readParenLiteral(); 971 return [=] { return Script->isDefined(Name) ? 1 : 0; }; 972 } 973 if (Tok == "LENGTH") { 974 StringRef Name = readParenLiteral(); 975 if (Script->Opt.MemoryRegions.count(Name) == 0) 976 setError("memory region not defined: " + Name); 977 return [=] { return Script->Opt.MemoryRegions[Name].Length; }; 978 } 979 if (Tok == "LOADADDR") { 980 StringRef Name = readParenLiteral(); 981 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 982 return [=] { return checkSection(Cmd, Location)->getLMA(); }; 983 } 984 if (Tok == "ORIGIN") { 985 StringRef Name = readParenLiteral(); 986 if (Script->Opt.MemoryRegions.count(Name) == 0) 987 setError("memory region not defined: " + Name); 988 return [=] { return Script->Opt.MemoryRegions[Name].Origin; }; 989 } 990 if (Tok == "SEGMENT_START") { 991 expect("("); 992 skip(); 993 expect(","); 994 Expr E = readExpr(); 995 expect(")"); 996 return [=] { return E(); }; 997 } 998 if (Tok == "SIZEOF") { 999 StringRef Name = readParenLiteral(); 1000 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 1001 // Linker script does not create an output section if its content is empty. 1002 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 1003 // be empty. 1004 return [=] { return Cmd->Size; }; 1005 } 1006 if (Tok == "SIZEOF_HEADERS") 1007 return [=] { return elf::getHeaderSize(); }; 1008 1009 // Tok is the dot. 1010 if (Tok == ".") 1011 return [=] { return Script->getSymbolValue(Location, Tok); }; 1012 1013 // Tok is a literal number. 1014 if (Optional<uint64_t> Val = parseInt(Tok)) 1015 return [=] { return *Val; }; 1016 1017 // Tok is a symbol name. 1018 if (!isValidCIdentifier(Tok)) 1019 setError("malformed number: " + Tok); 1020 Script->Opt.ReferencedSymbols.push_back(Tok); 1021 return [=] { return Script->getSymbolValue(Location, Tok); }; 1022 } 1023 1024 Expr ScriptParser::readTernary(Expr Cond) { 1025 Expr L = readExpr(); 1026 expect(":"); 1027 Expr R = readExpr(); 1028 return [=] { return Cond().getValue() ? L() : R(); }; 1029 } 1030 1031 Expr ScriptParser::readParenExpr() { 1032 expect("("); 1033 Expr E = readExpr(); 1034 expect(")"); 1035 return E; 1036 } 1037 1038 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1039 std::vector<StringRef> Phdrs; 1040 while (!ErrorCount && peek().startswith(":")) { 1041 StringRef Tok = next(); 1042 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 1043 } 1044 return Phdrs; 1045 } 1046 1047 // Read a program header type name. The next token must be a 1048 // name of a program header type or a constant (e.g. "0x3"). 1049 unsigned ScriptParser::readPhdrType() { 1050 StringRef Tok = next(); 1051 if (Optional<uint64_t> Val = parseInt(Tok)) 1052 return *Val; 1053 1054 unsigned Ret = StringSwitch<unsigned>(Tok) 1055 .Case("PT_NULL", PT_NULL) 1056 .Case("PT_LOAD", PT_LOAD) 1057 .Case("PT_DYNAMIC", PT_DYNAMIC) 1058 .Case("PT_INTERP", PT_INTERP) 1059 .Case("PT_NOTE", PT_NOTE) 1060 .Case("PT_SHLIB", PT_SHLIB) 1061 .Case("PT_PHDR", PT_PHDR) 1062 .Case("PT_TLS", PT_TLS) 1063 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1064 .Case("PT_GNU_STACK", PT_GNU_STACK) 1065 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1066 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1067 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1068 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1069 .Default(-1); 1070 1071 if (Ret == (unsigned)-1) { 1072 setError("invalid program header type: " + Tok); 1073 return PT_NULL; 1074 } 1075 return Ret; 1076 } 1077 1078 // Reads an anonymous version declaration. 1079 void ScriptParser::readAnonymousDeclaration() { 1080 std::vector<SymbolVersion> Locals; 1081 std::vector<SymbolVersion> Globals; 1082 std::tie(Locals, Globals) = readSymbols(); 1083 1084 for (SymbolVersion V : Locals) { 1085 if (V.Name == "*") 1086 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1087 else 1088 Config->VersionScriptLocals.push_back(V); 1089 } 1090 1091 for (SymbolVersion V : Globals) 1092 Config->VersionScriptGlobals.push_back(V); 1093 1094 expect(";"); 1095 } 1096 1097 // Reads a non-anonymous version definition, 1098 // e.g. "VerStr { global: foo; bar; local: *; };". 1099 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1100 // Read a symbol list. 1101 std::vector<SymbolVersion> Locals; 1102 std::vector<SymbolVersion> Globals; 1103 std::tie(Locals, Globals) = readSymbols(); 1104 1105 for (SymbolVersion V : Locals) { 1106 if (V.Name == "*") 1107 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1108 else 1109 Config->VersionScriptLocals.push_back(V); 1110 } 1111 1112 // Create a new version definition and add that to the global symbols. 1113 VersionDefinition Ver; 1114 Ver.Name = VerStr; 1115 Ver.Globals = Globals; 1116 1117 // User-defined version number starts from 2 because 0 and 1 are 1118 // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. 1119 Ver.Id = Config->VersionDefinitions.size() + 2; 1120 Config->VersionDefinitions.push_back(Ver); 1121 1122 // Each version may have a parent version. For example, "Ver2" 1123 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1124 // as a parent. This version hierarchy is, probably against your 1125 // instinct, purely for hint; the runtime doesn't care about it 1126 // at all. In LLD, we simply ignore it. 1127 if (peek() != ";") 1128 skip(); 1129 expect(";"); 1130 } 1131 1132 static bool hasWildcard(StringRef S) { 1133 return S.find_first_of("?*[") != StringRef::npos; 1134 } 1135 1136 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1137 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1138 ScriptParser::readSymbols() { 1139 std::vector<SymbolVersion> Locals; 1140 std::vector<SymbolVersion> Globals; 1141 std::vector<SymbolVersion> *V = &Globals; 1142 1143 while (!ErrorCount) { 1144 if (consume("}")) 1145 break; 1146 if (consumeLabel("local")) { 1147 V = &Locals; 1148 continue; 1149 } 1150 if (consumeLabel("global")) { 1151 V = &Globals; 1152 continue; 1153 } 1154 1155 if (consume("extern")) { 1156 std::vector<SymbolVersion> Ext = readVersionExtern(); 1157 V->insert(V->end(), Ext.begin(), Ext.end()); 1158 } else { 1159 StringRef Tok = next(); 1160 V->push_back({unquote(Tok), false, hasWildcard(Tok)}); 1161 } 1162 expect(";"); 1163 } 1164 return {Locals, Globals}; 1165 } 1166 1167 // Reads an "extern C++" directive, e.g., 1168 // "extern "C++" { ns::*; "f(int, double)"; };" 1169 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 1170 StringRef Tok = next(); 1171 bool IsCXX = Tok == "\"C++\""; 1172 if (!IsCXX && Tok != "\"C\"") 1173 setError("Unknown language"); 1174 expect("{"); 1175 1176 std::vector<SymbolVersion> Ret; 1177 while (!ErrorCount && peek() != "}") { 1178 StringRef Tok = next(); 1179 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 1180 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 1181 expect(";"); 1182 } 1183 1184 expect("}"); 1185 return Ret; 1186 } 1187 1188 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2, 1189 StringRef S3) { 1190 if (!consume(S1) && !consume(S2) && !consume(S3)) { 1191 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 1192 return 0; 1193 } 1194 expect("="); 1195 return readExpr()().getValue(); 1196 } 1197 1198 // Parse the MEMORY command as specified in: 1199 // https://sourceware.org/binutils/docs/ld/MEMORY.html 1200 // 1201 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 1202 void ScriptParser::readMemory() { 1203 expect("{"); 1204 while (!ErrorCount && !consume("}")) { 1205 StringRef Name = next(); 1206 1207 uint32_t Flags = 0; 1208 uint32_t NegFlags = 0; 1209 if (consume("(")) { 1210 std::tie(Flags, NegFlags) = readMemoryAttributes(); 1211 expect(")"); 1212 } 1213 expect(":"); 1214 1215 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 1216 expect(","); 1217 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 1218 1219 // Add the memory region to the region map (if it doesn't already exist). 1220 auto It = Script->Opt.MemoryRegions.find(Name); 1221 if (It != Script->Opt.MemoryRegions.end()) 1222 setError("region '" + Name + "' already defined"); 1223 else 1224 Script->Opt.MemoryRegions[Name] = {Name, Origin, Length, Flags, NegFlags}; 1225 } 1226 } 1227 1228 // This function parses the attributes used to match against section 1229 // flags when placing output sections in a memory region. These flags 1230 // are only used when an explicit memory region name is not used. 1231 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 1232 uint32_t Flags = 0; 1233 uint32_t NegFlags = 0; 1234 bool Invert = false; 1235 1236 for (char C : next().lower()) { 1237 uint32_t Flag = 0; 1238 if (C == '!') 1239 Invert = !Invert; 1240 else if (C == 'w') 1241 Flag = SHF_WRITE; 1242 else if (C == 'x') 1243 Flag = SHF_EXECINSTR; 1244 else if (C == 'a') 1245 Flag = SHF_ALLOC; 1246 else if (C != 'r') 1247 setError("invalid memory region attribute"); 1248 1249 if (Invert) 1250 NegFlags |= Flag; 1251 else 1252 Flags |= Flag; 1253 } 1254 return {Flags, NegFlags}; 1255 } 1256 1257 void elf::readLinkerScript(MemoryBufferRef MB) { 1258 ScriptParser(MB).readLinkerScript(); 1259 } 1260 1261 void elf::readVersionScript(MemoryBufferRef MB) { 1262 ScriptParser(MB).readVersionScript(); 1263 } 1264 1265 void elf::readDynamicList(MemoryBufferRef MB) { 1266 ScriptParser(MB).readDynamicList(); 1267 } 1268