1 //===- ScriptParser.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a recursive-descendent parser for linker scripts. 11 // Parsed results are stored to Config and Script global objects. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ScriptParser.h" 16 #include "Config.h" 17 #include "Driver.h" 18 #include "InputSection.h" 19 #include "LinkerScript.h" 20 #include "Memory.h" 21 #include "OutputSections.h" 22 #include "ScriptLexer.h" 23 #include "Symbols.h" 24 #include "Target.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/StringSwitch.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/Support/Casting.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/FileSystem.h" 32 #include "llvm/Support/Path.h" 33 #include <cassert> 34 #include <limits> 35 #include <vector> 36 37 using namespace llvm; 38 using namespace llvm::ELF; 39 using namespace llvm::support::endian; 40 using namespace lld; 41 using namespace lld::elf; 42 43 static bool isUnderSysroot(StringRef Path); 44 45 namespace { 46 class ScriptParser final : ScriptLexer { 47 public: 48 ScriptParser(MemoryBufferRef MB) 49 : ScriptLexer(MB), 50 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 51 52 void readLinkerScript(); 53 void readVersionScript(); 54 void readDynamicList(); 55 56 private: 57 void addFile(StringRef Path); 58 OutputSection *checkSection(OutputSectionCommand *Cmd, StringRef Loccation); 59 60 void readAsNeeded(); 61 void readEntry(); 62 void readExtern(); 63 void readGroup(); 64 void readInclude(); 65 void readMemory(); 66 void readOutput(); 67 void readOutputArch(); 68 void readOutputFormat(); 69 void readPhdrs(); 70 void readSearchDir(); 71 void readSections(); 72 void readVersion(); 73 void readVersionScriptCommand(); 74 75 SymbolAssignment *readAssignment(StringRef Name); 76 BytesDataCommand *readBytesDataCommand(StringRef Tok); 77 uint32_t readFill(); 78 uint32_t parseFill(StringRef Tok); 79 void readSectionAddressType(OutputSectionCommand *Cmd); 80 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 81 std::vector<StringRef> readOutputSectionPhdrs(); 82 InputSectionDescription *readInputSectionDescription(StringRef Tok); 83 StringMatcher readFilePatterns(); 84 std::vector<SectionPattern> readInputSectionsList(); 85 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 86 unsigned readPhdrType(); 87 SortSectionPolicy readSortKind(); 88 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 89 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 90 void readSort(); 91 AssertCommand *readAssert(); 92 Expr readAssertExpr(); 93 94 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 95 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 96 97 Expr readExpr(); 98 Expr readExpr1(Expr Lhs, int MinPrec); 99 StringRef readParenLiteral(); 100 Expr readPrimary(); 101 Expr readTernary(Expr Cond); 102 Expr readParenExpr(); 103 104 // For parsing version script. 105 std::vector<SymbolVersion> readVersionExtern(); 106 void readAnonymousDeclaration(); 107 void readVersionDeclaration(StringRef VerStr); 108 109 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 110 readSymbols(); 111 112 bool IsUnderSysroot; 113 }; 114 } // namespace 115 116 static bool isUnderSysroot(StringRef Path) { 117 if (Config->Sysroot == "") 118 return false; 119 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 120 if (sys::fs::equivalent(Config->Sysroot, Path)) 121 return true; 122 return false; 123 } 124 125 // Some operations only support one non absolute value. Move the 126 // absolute one to the right hand side for convenience. 127 static void moveAbsRight(ExprValue &A, ExprValue &B) { 128 if (A.isAbsolute()) 129 std::swap(A, B); 130 if (!B.isAbsolute()) 131 error(A.Loc + ": at least one side of the expression must be absolute"); 132 } 133 134 static ExprValue add(ExprValue A, ExprValue B) { 135 moveAbsRight(A, B); 136 return {A.Sec, A.ForceAbsolute, A.Val + B.getValue(), A.Loc}; 137 } 138 139 static ExprValue sub(ExprValue A, ExprValue B) { 140 return {A.Sec, A.Val - B.getValue(), A.Loc}; 141 } 142 143 static ExprValue mul(ExprValue A, ExprValue B) { 144 return A.getValue() * B.getValue(); 145 } 146 147 static ExprValue div(ExprValue A, ExprValue B) { 148 if (uint64_t BV = B.getValue()) 149 return A.getValue() / BV; 150 error("division by zero"); 151 return 0; 152 } 153 154 static ExprValue bitAnd(ExprValue A, ExprValue B) { 155 moveAbsRight(A, B); 156 return {A.Sec, A.ForceAbsolute, 157 (A.getValue() & B.getValue()) - A.getSecAddr(), A.Loc}; 158 } 159 160 static ExprValue bitOr(ExprValue A, ExprValue B) { 161 moveAbsRight(A, B); 162 return {A.Sec, A.ForceAbsolute, 163 (A.getValue() | B.getValue()) - A.getSecAddr(), A.Loc}; 164 } 165 166 void ScriptParser::readDynamicList() { 167 expect("{"); 168 readAnonymousDeclaration(); 169 if (!atEOF()) 170 setError("EOF expected, but got " + next()); 171 } 172 173 void ScriptParser::readVersionScript() { 174 readVersionScriptCommand(); 175 if (!atEOF()) 176 setError("EOF expected, but got " + next()); 177 } 178 179 void ScriptParser::readVersionScriptCommand() { 180 if (consume("{")) { 181 readAnonymousDeclaration(); 182 return; 183 } 184 185 while (!atEOF() && !Error && peek() != "}") { 186 StringRef VerStr = next(); 187 if (VerStr == "{") { 188 setError("anonymous version definition is used in " 189 "combination with other version definitions"); 190 return; 191 } 192 expect("{"); 193 readVersionDeclaration(VerStr); 194 } 195 } 196 197 void ScriptParser::readVersion() { 198 expect("{"); 199 readVersionScriptCommand(); 200 expect("}"); 201 } 202 203 void ScriptParser::readLinkerScript() { 204 while (!atEOF()) { 205 StringRef Tok = next(); 206 if (Tok == ";") 207 continue; 208 209 if (Tok == "ASSERT") { 210 Script->Opt.Commands.push_back(readAssert()); 211 } else if (Tok == "ENTRY") { 212 readEntry(); 213 } else if (Tok == "EXTERN") { 214 readExtern(); 215 } else if (Tok == "GROUP" || Tok == "INPUT") { 216 readGroup(); 217 } else if (Tok == "INCLUDE") { 218 readInclude(); 219 } else if (Tok == "MEMORY") { 220 readMemory(); 221 } else if (Tok == "OUTPUT") { 222 readOutput(); 223 } else if (Tok == "OUTPUT_ARCH") { 224 readOutputArch(); 225 } else if (Tok == "OUTPUT_FORMAT") { 226 readOutputFormat(); 227 } else if (Tok == "PHDRS") { 228 readPhdrs(); 229 } else if (Tok == "SEARCH_DIR") { 230 readSearchDir(); 231 } else if (Tok == "SECTIONS") { 232 readSections(); 233 } else if (Tok == "VERSION") { 234 readVersion(); 235 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 236 Script->Opt.Commands.push_back(Cmd); 237 } else { 238 setError("unknown directive: " + Tok); 239 } 240 } 241 } 242 243 void ScriptParser::addFile(StringRef S) { 244 if (IsUnderSysroot && S.startswith("/")) { 245 SmallString<128> PathData; 246 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 247 if (sys::fs::exists(Path)) { 248 Driver->addFile(Saver.save(Path), /*WithLOption=*/false); 249 return; 250 } 251 } 252 253 if (sys::path::is_absolute(S)) { 254 Driver->addFile(S, /*WithLOption=*/false); 255 } else if (S.startswith("=")) { 256 if (Config->Sysroot.empty()) 257 Driver->addFile(S.substr(1), /*WithLOption=*/false); 258 else 259 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)), 260 /*WithLOption=*/false); 261 } else if (S.startswith("-l")) { 262 Driver->addLibrary(S.substr(2)); 263 } else if (sys::fs::exists(S)) { 264 Driver->addFile(S, /*WithLOption=*/false); 265 } else { 266 if (Optional<std::string> Path = findFromSearchPaths(S)) 267 Driver->addFile(Saver.save(*Path), /*WithLOption=*/true); 268 else 269 setError("unable to find " + S); 270 } 271 } 272 273 void ScriptParser::readAsNeeded() { 274 expect("("); 275 bool Orig = Config->AsNeeded; 276 Config->AsNeeded = true; 277 while (!Error && !consume(")")) 278 addFile(unquote(next())); 279 Config->AsNeeded = Orig; 280 } 281 282 void ScriptParser::readEntry() { 283 // -e <symbol> takes predecence over ENTRY(<symbol>). 284 expect("("); 285 StringRef Tok = next(); 286 if (Config->Entry.empty()) 287 Config->Entry = Tok; 288 expect(")"); 289 } 290 291 void ScriptParser::readExtern() { 292 expect("("); 293 while (!Error && !consume(")")) 294 Config->Undefined.push_back(next()); 295 } 296 297 void ScriptParser::readGroup() { 298 expect("("); 299 while (!Error && !consume(")")) { 300 if (consume("AS_NEEDED")) 301 readAsNeeded(); 302 else 303 addFile(unquote(next())); 304 } 305 } 306 307 void ScriptParser::readInclude() { 308 StringRef Tok = unquote(next()); 309 310 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 311 // The file will be searched for in the current directory, and in any 312 // directory specified with the -L option. 313 if (sys::fs::exists(Tok)) { 314 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 315 tokenize(*MB); 316 return; 317 } 318 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 319 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 320 tokenize(*MB); 321 return; 322 } 323 setError("cannot open " + Tok); 324 } 325 326 void ScriptParser::readOutput() { 327 // -o <file> takes predecence over OUTPUT(<file>). 328 expect("("); 329 StringRef Tok = next(); 330 if (Config->OutputFile.empty()) 331 Config->OutputFile = unquote(Tok); 332 expect(")"); 333 } 334 335 void ScriptParser::readOutputArch() { 336 // OUTPUT_ARCH is ignored for now. 337 expect("("); 338 while (!Error && !consume(")")) 339 skip(); 340 } 341 342 void ScriptParser::readOutputFormat() { 343 // Error checking only for now. 344 expect("("); 345 skip(); 346 if (consume(")")) 347 return; 348 expect(","); 349 skip(); 350 expect(","); 351 skip(); 352 expect(")"); 353 } 354 355 void ScriptParser::readPhdrs() { 356 expect("{"); 357 while (!Error && !consume("}")) { 358 Script->Opt.PhdrsCommands.push_back( 359 {next(), PT_NULL, false, false, UINT_MAX, nullptr}); 360 361 PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back(); 362 PhdrCmd.Type = readPhdrType(); 363 364 while (!Error && !consume(";")) { 365 if (consume("FILEHDR")) 366 PhdrCmd.HasFilehdr = true; 367 else if (consume("PHDRS")) 368 PhdrCmd.HasPhdrs = true; 369 else if (consume("AT")) 370 PhdrCmd.LMAExpr = readParenExpr(); 371 else if (consume("FLAGS")) 372 PhdrCmd.Flags = readParenExpr()().getValue(); 373 else 374 setError("unexpected header attribute: " + next()); 375 } 376 } 377 } 378 379 void ScriptParser::readSearchDir() { 380 expect("("); 381 StringRef Tok = next(); 382 if (!Config->Nostdlib) 383 Config->SearchPaths.push_back(unquote(Tok)); 384 expect(")"); 385 } 386 387 void ScriptParser::readSections() { 388 Script->Opt.HasSections = true; 389 390 // -no-rosegment is used to avoid placing read only non-executable sections in 391 // their own segment. We do the same if SECTIONS command is present in linker 392 // script. See comment for computeFlags(). 393 Config->SingleRoRx = true; 394 395 expect("{"); 396 while (!Error && !consume("}")) { 397 StringRef Tok = next(); 398 BaseCommand *Cmd = readProvideOrAssignment(Tok); 399 if (!Cmd) { 400 if (Tok == "ASSERT") 401 Cmd = readAssert(); 402 else 403 Cmd = readOutputSectionDescription(Tok); 404 } 405 Script->Opt.Commands.push_back(Cmd); 406 } 407 } 408 409 static int precedence(StringRef Op) { 410 return StringSwitch<int>(Op) 411 .Cases("*", "/", 5) 412 .Cases("+", "-", 4) 413 .Cases("<<", ">>", 3) 414 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 415 .Cases("&", "|", 1) 416 .Default(-1); 417 } 418 419 StringMatcher ScriptParser::readFilePatterns() { 420 std::vector<StringRef> V; 421 while (!Error && !consume(")")) 422 V.push_back(next()); 423 return StringMatcher(V); 424 } 425 426 SortSectionPolicy ScriptParser::readSortKind() { 427 if (consume("SORT") || consume("SORT_BY_NAME")) 428 return SortSectionPolicy::Name; 429 if (consume("SORT_BY_ALIGNMENT")) 430 return SortSectionPolicy::Alignment; 431 if (consume("SORT_BY_INIT_PRIORITY")) 432 return SortSectionPolicy::Priority; 433 if (consume("SORT_NONE")) 434 return SortSectionPolicy::None; 435 return SortSectionPolicy::Default; 436 } 437 438 // Reads SECTIONS command contents in the following form: 439 // 440 // <contents> ::= <elem>* 441 // <elem> ::= <exclude>? <glob-pattern> 442 // <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")" 443 // 444 // For example, 445 // 446 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz) 447 // 448 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". 449 // The semantics of that is section .foo in any file, section .bar in 450 // any file but a.o, and section .baz in any file but b.o. 451 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 452 std::vector<SectionPattern> Ret; 453 while (!Error && peek() != ")") { 454 StringMatcher ExcludeFilePat; 455 if (consume("EXCLUDE_FILE")) { 456 expect("("); 457 ExcludeFilePat = readFilePatterns(); 458 } 459 460 std::vector<StringRef> V; 461 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 462 V.push_back(next()); 463 464 if (!V.empty()) 465 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 466 else 467 setError("section pattern is expected"); 468 } 469 return Ret; 470 } 471 472 // Reads contents of "SECTIONS" directive. That directive contains a 473 // list of glob patterns for input sections. The grammar is as follows. 474 // 475 // <patterns> ::= <section-list> 476 // | <sort> "(" <section-list> ")" 477 // | <sort> "(" <sort> "(" <section-list> ")" ")" 478 // 479 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 480 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 481 // 482 // <section-list> is parsed by readInputSectionsList(). 483 InputSectionDescription * 484 ScriptParser::readInputSectionRules(StringRef FilePattern) { 485 auto *Cmd = make<InputSectionDescription>(FilePattern); 486 expect("("); 487 488 while (!Error && !consume(")")) { 489 SortSectionPolicy Outer = readSortKind(); 490 SortSectionPolicy Inner = SortSectionPolicy::Default; 491 std::vector<SectionPattern> V; 492 if (Outer != SortSectionPolicy::Default) { 493 expect("("); 494 Inner = readSortKind(); 495 if (Inner != SortSectionPolicy::Default) { 496 expect("("); 497 V = readInputSectionsList(); 498 expect(")"); 499 } else { 500 V = readInputSectionsList(); 501 } 502 expect(")"); 503 } else { 504 V = readInputSectionsList(); 505 } 506 507 for (SectionPattern &Pat : V) { 508 Pat.SortInner = Inner; 509 Pat.SortOuter = Outer; 510 } 511 512 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 513 } 514 return Cmd; 515 } 516 517 InputSectionDescription * 518 ScriptParser::readInputSectionDescription(StringRef Tok) { 519 // Input section wildcard can be surrounded by KEEP. 520 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 521 if (Tok == "KEEP") { 522 expect("("); 523 StringRef FilePattern = next(); 524 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 525 expect(")"); 526 Script->Opt.KeptSections.push_back(Cmd); 527 return Cmd; 528 } 529 return readInputSectionRules(Tok); 530 } 531 532 void ScriptParser::readSort() { 533 expect("("); 534 expect("CONSTRUCTORS"); 535 expect(")"); 536 } 537 538 AssertCommand *ScriptParser::readAssert() { 539 return make<AssertCommand>(readAssertExpr()); 540 } 541 542 Expr ScriptParser::readAssertExpr() { 543 expect("("); 544 Expr E = readExpr(); 545 expect(","); 546 StringRef Msg = unquote(next()); 547 expect(")"); 548 549 return [=] { 550 if (!E().getValue()) 551 error(Msg); 552 return Script->getDot(); 553 }; 554 } 555 556 // Reads a FILL(expr) command. We handle the FILL command as an 557 // alias for =fillexp section attribute, which is different from 558 // what GNU linkers do. 559 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 560 uint32_t ScriptParser::readFill() { 561 expect("("); 562 uint32_t V = parseFill(next()); 563 expect(")"); 564 return V; 565 } 566 567 // Reads an expression and/or the special directive "(NOLOAD)" for an 568 // output section definition. 569 // 570 // An output section name can be followed by an address expression 571 // and/or by "(NOLOAD)". This grammar is not LL(1) because "(" can be 572 // interpreted as either the beginning of some expression or "(NOLOAD)". 573 // 574 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html 575 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html 576 void ScriptParser::readSectionAddressType(OutputSectionCommand *Cmd) { 577 if (consume("(")) { 578 if (consume("NOLOAD")) { 579 expect(")"); 580 Cmd->Noload = true; 581 return; 582 } 583 Cmd->AddrExpr = readExpr(); 584 expect(")"); 585 } else { 586 Cmd->AddrExpr = readExpr(); 587 } 588 589 if (consume("(")) { 590 expect("NOLOAD"); 591 expect(")"); 592 Cmd->Noload = true; 593 } 594 } 595 596 OutputSectionCommand * 597 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 598 OutputSectionCommand *Cmd = 599 Script->createOutputSectionCommand(OutSec, getCurrentLocation()); 600 601 if (peek() != ":") 602 readSectionAddressType(Cmd); 603 expect(":"); 604 605 if (consume("AT")) 606 Cmd->LMAExpr = readParenExpr(); 607 if (consume("ALIGN")) 608 Cmd->AlignExpr = readParenExpr(); 609 if (consume("SUBALIGN")) 610 Cmd->SubalignExpr = readParenExpr(); 611 612 // Parse constraints. 613 if (consume("ONLY_IF_RO")) 614 Cmd->Constraint = ConstraintKind::ReadOnly; 615 if (consume("ONLY_IF_RW")) 616 Cmd->Constraint = ConstraintKind::ReadWrite; 617 expect("{"); 618 619 while (!Error && !consume("}")) { 620 StringRef Tok = next(); 621 if (Tok == ";") { 622 // Empty commands are allowed. Do nothing here. 623 } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) { 624 Cmd->Commands.push_back(Assign); 625 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 626 Cmd->Commands.push_back(Data); 627 } else if (Tok == "ASSERT") { 628 Cmd->Commands.push_back(readAssert()); 629 expect(";"); 630 } else if (Tok == "CONSTRUCTORS") { 631 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 632 // by name. This is for very old file formats such as ECOFF/XCOFF. 633 // For ELF, we should ignore. 634 } else if (Tok == "FILL") { 635 Cmd->Filler = readFill(); 636 } else if (Tok == "SORT") { 637 readSort(); 638 } else if (peek() == "(") { 639 Cmd->Commands.push_back(readInputSectionDescription(Tok)); 640 } else { 641 setError("unknown command " + Tok); 642 } 643 } 644 645 if (consume(">")) 646 Cmd->MemoryRegionName = next(); 647 648 Cmd->Phdrs = readOutputSectionPhdrs(); 649 650 if (consume("=")) 651 Cmd->Filler = parseFill(next()); 652 else if (peek().startswith("=")) 653 Cmd->Filler = parseFill(next().drop_front()); 654 655 // Consume optional comma following output section command. 656 consume(","); 657 658 return Cmd; 659 } 660 661 // Parses a given string as a octal/decimal/hexadecimal number and 662 // returns it as a big-endian number. Used for `=<fillexp>`. 663 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 664 // 665 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary 666 // size, while ld.gold always handles it as a 32-bit big-endian number. 667 // We are compatible with ld.gold because it's easier to implement. 668 uint32_t ScriptParser::parseFill(StringRef Tok) { 669 uint32_t V = 0; 670 if (!to_integer(Tok, V)) 671 setError("invalid filler expression: " + Tok); 672 673 uint32_t Buf; 674 write32be(&Buf, V); 675 return Buf; 676 } 677 678 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 679 expect("("); 680 SymbolAssignment *Cmd = readAssignment(next()); 681 Cmd->Provide = Provide; 682 Cmd->Hidden = Hidden; 683 expect(")"); 684 expect(";"); 685 return Cmd; 686 } 687 688 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 689 SymbolAssignment *Cmd = nullptr; 690 if (peek() == "=" || peek() == "+=") { 691 Cmd = readAssignment(Tok); 692 expect(";"); 693 } else if (Tok == "PROVIDE") { 694 Cmd = readProvideHidden(true, false); 695 } else if (Tok == "HIDDEN") { 696 Cmd = readProvideHidden(false, true); 697 } else if (Tok == "PROVIDE_HIDDEN") { 698 Cmd = readProvideHidden(true, true); 699 } 700 return Cmd; 701 } 702 703 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 704 StringRef Op = next(); 705 assert(Op == "=" || Op == "+="); 706 Expr E = readExpr(); 707 if (Op == "+=") { 708 std::string Loc = getCurrentLocation(); 709 E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); }; 710 } 711 return make<SymbolAssignment>(Name, E, getCurrentLocation()); 712 } 713 714 // This is an operator-precedence parser to parse a linker 715 // script expression. 716 Expr ScriptParser::readExpr() { 717 // Our lexer is context-aware. Set the in-expression bit so that 718 // they apply different tokenization rules. 719 bool Orig = InExpr; 720 InExpr = true; 721 Expr E = readExpr1(readPrimary(), 0); 722 InExpr = Orig; 723 return E; 724 } 725 726 static Expr combine(StringRef Op, Expr L, Expr R) { 727 if (Op == "+") 728 return [=] { return add(L(), R()); }; 729 if (Op == "-") 730 return [=] { return sub(L(), R()); }; 731 if (Op == "*") 732 return [=] { return mul(L(), R()); }; 733 if (Op == "/") 734 return [=] { return div(L(), R()); }; 735 if (Op == "<<") 736 return [=] { return L().getValue() << R().getValue(); }; 737 if (Op == ">>") 738 return [=] { return L().getValue() >> R().getValue(); }; 739 if (Op == "<") 740 return [=] { return L().getValue() < R().getValue(); }; 741 if (Op == ">") 742 return [=] { return L().getValue() > R().getValue(); }; 743 if (Op == ">=") 744 return [=] { return L().getValue() >= R().getValue(); }; 745 if (Op == "<=") 746 return [=] { return L().getValue() <= R().getValue(); }; 747 if (Op == "==") 748 return [=] { return L().getValue() == R().getValue(); }; 749 if (Op == "!=") 750 return [=] { return L().getValue() != R().getValue(); }; 751 if (Op == "&") 752 return [=] { return bitAnd(L(), R()); }; 753 if (Op == "|") 754 return [=] { return bitOr(L(), R()); }; 755 llvm_unreachable("invalid operator"); 756 } 757 758 // This is a part of the operator-precedence parser. This function 759 // assumes that the remaining token stream starts with an operator. 760 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 761 while (!atEOF() && !Error) { 762 // Read an operator and an expression. 763 if (consume("?")) 764 return readTernary(Lhs); 765 StringRef Op1 = peek(); 766 if (precedence(Op1) < MinPrec) 767 break; 768 skip(); 769 Expr Rhs = readPrimary(); 770 771 // Evaluate the remaining part of the expression first if the 772 // next operator has greater precedence than the previous one. 773 // For example, if we have read "+" and "3", and if the next 774 // operator is "*", then we'll evaluate 3 * ... part first. 775 while (!atEOF()) { 776 StringRef Op2 = peek(); 777 if (precedence(Op2) <= precedence(Op1)) 778 break; 779 Rhs = readExpr1(Rhs, precedence(Op2)); 780 } 781 782 Lhs = combine(Op1, Lhs, Rhs); 783 } 784 return Lhs; 785 } 786 787 uint64_t static getConstant(StringRef S) { 788 if (S == "COMMONPAGESIZE") 789 return Target->PageSize; 790 if (S == "MAXPAGESIZE") 791 return Config->MaxPageSize; 792 error("unknown constant: " + S); 793 return 0; 794 } 795 796 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with 797 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may 798 // have "K" (Ki) or "M" (Mi) suffixes. 799 static Optional<uint64_t> parseInt(StringRef Tok) { 800 // Negative number 801 if (Tok.startswith("-")) { 802 if (Optional<uint64_t> Val = parseInt(Tok.substr(1))) 803 return -*Val; 804 return None; 805 } 806 807 // Hexadecimal 808 uint64_t Val; 809 if (Tok.startswith_lower("0x") && to_integer(Tok.substr(2), Val, 16)) 810 return Val; 811 if (Tok.endswith_lower("H") && to_integer(Tok.drop_back(), Val, 16)) 812 return Val; 813 814 // Decimal 815 if (Tok.endswith_lower("K")) { 816 if (!to_integer(Tok.drop_back(), Val, 10)) 817 return None; 818 return Val * 1024; 819 } 820 if (Tok.endswith_lower("M")) { 821 if (!to_integer(Tok.drop_back(), Val, 10)) 822 return None; 823 return Val * 1024 * 1024; 824 } 825 if (!to_integer(Tok, Val, 10)) 826 return None; 827 return Val; 828 } 829 830 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 831 int Size = StringSwitch<int>(Tok) 832 .Case("BYTE", 1) 833 .Case("SHORT", 2) 834 .Case("LONG", 4) 835 .Case("QUAD", 8) 836 .Default(-1); 837 if (Size == -1) 838 return nullptr; 839 840 return make<BytesDataCommand>(readParenExpr(), Size); 841 } 842 843 StringRef ScriptParser::readParenLiteral() { 844 expect("("); 845 StringRef Tok = next(); 846 expect(")"); 847 return Tok; 848 } 849 850 OutputSection *ScriptParser::checkSection(OutputSectionCommand *Cmd, 851 StringRef Location) { 852 if (Cmd->Location.empty() && Script->ErrorOnMissingSection) 853 error(Location + ": undefined section " + Cmd->Name); 854 if (Cmd->Sec) 855 return Cmd->Sec; 856 static OutputSection Dummy("", 0, 0); 857 return &Dummy; 858 } 859 860 Expr ScriptParser::readPrimary() { 861 if (peek() == "(") 862 return readParenExpr(); 863 864 if (consume("~")) { 865 Expr E = readPrimary(); 866 return [=] { return ~E().getValue(); }; 867 } 868 if (consume("-")) { 869 Expr E = readPrimary(); 870 return [=] { return -E().getValue(); }; 871 } 872 873 StringRef Tok = next(); 874 std::string Location = getCurrentLocation(); 875 876 // Built-in functions are parsed here. 877 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 878 if (Tok == "ABSOLUTE") { 879 Expr Inner = readParenExpr(); 880 return [=] { 881 ExprValue I = Inner(); 882 I.ForceAbsolute = true; 883 return I; 884 }; 885 } 886 if (Tok == "ADDR") { 887 StringRef Name = readParenLiteral(); 888 OutputSectionCommand *Cmd = Script->getOrCreateOutputSectionCommand(Name); 889 return [=]() -> ExprValue { 890 return {checkSection(Cmd, Location), 0, Location}; 891 }; 892 } 893 if (Tok == "ALIGN") { 894 expect("("); 895 Expr E = readExpr(); 896 if (consume(")")) 897 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 898 expect(","); 899 Expr E2 = readExpr(); 900 expect(")"); 901 return [=] { 902 ExprValue V = E(); 903 V.Alignment = E2().getValue(); 904 return V; 905 }; 906 } 907 if (Tok == "ALIGNOF") { 908 StringRef Name = readParenLiteral(); 909 OutputSectionCommand *Cmd = Script->getOrCreateOutputSectionCommand(Name); 910 return [=] { return checkSection(Cmd, Location)->Alignment; }; 911 } 912 if (Tok == "ASSERT") 913 return readAssertExpr(); 914 if (Tok == "CONSTANT") { 915 StringRef Name = readParenLiteral(); 916 return [=] { return getConstant(Name); }; 917 } 918 if (Tok == "DATA_SEGMENT_ALIGN") { 919 expect("("); 920 Expr E = readExpr(); 921 expect(","); 922 readExpr(); 923 expect(")"); 924 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 925 } 926 if (Tok == "DATA_SEGMENT_END") { 927 expect("("); 928 expect("."); 929 expect(")"); 930 return [] { return Script->getDot(); }; 931 } 932 if (Tok == "DATA_SEGMENT_RELRO_END") { 933 // GNU linkers implements more complicated logic to handle 934 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and 935 // just align to the next page boundary for simplicity. 936 expect("("); 937 readExpr(); 938 expect(","); 939 readExpr(); 940 expect(")"); 941 return [] { return alignTo(Script->getDot(), Target->PageSize); }; 942 } 943 if (Tok == "DEFINED") { 944 StringRef Name = readParenLiteral(); 945 return [=] { return Script->isDefined(Name) ? 1 : 0; }; 946 } 947 if (Tok == "LENGTH") { 948 StringRef Name = readParenLiteral(); 949 if (Script->Opt.MemoryRegions.count(Name) == 0) 950 setError("memory region not defined: " + Name); 951 return [=] { return Script->Opt.MemoryRegions[Name].Length; }; 952 } 953 if (Tok == "LOADADDR") { 954 StringRef Name = readParenLiteral(); 955 OutputSectionCommand *Cmd = Script->getOrCreateOutputSectionCommand(Name); 956 return [=] { return checkSection(Cmd, Location)->getLMA(); }; 957 } 958 if (Tok == "ORIGIN") { 959 StringRef Name = readParenLiteral(); 960 if (Script->Opt.MemoryRegions.count(Name) == 0) 961 setError("memory region not defined: " + Name); 962 return [=] { return Script->Opt.MemoryRegions[Name].Origin; }; 963 } 964 if (Tok == "SEGMENT_START") { 965 expect("("); 966 skip(); 967 expect(","); 968 Expr E = readExpr(); 969 expect(")"); 970 return [=] { return E(); }; 971 } 972 if (Tok == "SIZEOF") { 973 StringRef Name = readParenLiteral(); 974 OutputSectionCommand *Cmd = Script->getOrCreateOutputSectionCommand(Name); 975 // Linker script does not create an output section if its content is empty. 976 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 977 // be empty. 978 return [=] { return Cmd->Sec ? Cmd->Sec->Size : 0; }; 979 } 980 if (Tok == "SIZEOF_HEADERS") 981 return [=] { return elf::getHeaderSize(); }; 982 983 // Tok is the dot. 984 if (Tok == ".") 985 return [=] { return Script->getSymbolValue(Location, Tok); }; 986 987 // Tok is a literal number. 988 if (Optional<uint64_t> Val = parseInt(Tok)) 989 return [=] { return *Val; }; 990 991 // Tok is a symbol name. 992 if (!isValidCIdentifier(Tok)) 993 setError("malformed number: " + Tok); 994 Script->Opt.ReferencedSymbols.push_back(Tok); 995 return [=] { return Script->getSymbolValue(Location, Tok); }; 996 } 997 998 Expr ScriptParser::readTernary(Expr Cond) { 999 Expr L = readExpr(); 1000 expect(":"); 1001 Expr R = readExpr(); 1002 return [=] { return Cond().getValue() ? L() : R(); }; 1003 } 1004 1005 Expr ScriptParser::readParenExpr() { 1006 expect("("); 1007 Expr E = readExpr(); 1008 expect(")"); 1009 return E; 1010 } 1011 1012 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1013 std::vector<StringRef> Phdrs; 1014 while (!Error && peek().startswith(":")) { 1015 StringRef Tok = next(); 1016 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 1017 } 1018 return Phdrs; 1019 } 1020 1021 // Read a program header type name. The next token must be a 1022 // name of a program header type or a constant (e.g. "0x3"). 1023 unsigned ScriptParser::readPhdrType() { 1024 StringRef Tok = next(); 1025 if (Optional<uint64_t> Val = parseInt(Tok)) 1026 return *Val; 1027 1028 unsigned Ret = StringSwitch<unsigned>(Tok) 1029 .Case("PT_NULL", PT_NULL) 1030 .Case("PT_LOAD", PT_LOAD) 1031 .Case("PT_DYNAMIC", PT_DYNAMIC) 1032 .Case("PT_INTERP", PT_INTERP) 1033 .Case("PT_NOTE", PT_NOTE) 1034 .Case("PT_SHLIB", PT_SHLIB) 1035 .Case("PT_PHDR", PT_PHDR) 1036 .Case("PT_TLS", PT_TLS) 1037 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1038 .Case("PT_GNU_STACK", PT_GNU_STACK) 1039 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1040 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1041 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1042 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1043 .Default(-1); 1044 1045 if (Ret == (unsigned)-1) { 1046 setError("invalid program header type: " + Tok); 1047 return PT_NULL; 1048 } 1049 return Ret; 1050 } 1051 1052 // Reads an anonymous version declaration. 1053 void ScriptParser::readAnonymousDeclaration() { 1054 std::vector<SymbolVersion> Locals; 1055 std::vector<SymbolVersion> Globals; 1056 std::tie(Locals, Globals) = readSymbols(); 1057 1058 for (SymbolVersion V : Locals) { 1059 if (V.Name == "*") 1060 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1061 else 1062 Config->VersionScriptLocals.push_back(V); 1063 } 1064 1065 for (SymbolVersion V : Globals) 1066 Config->VersionScriptGlobals.push_back(V); 1067 1068 expect(";"); 1069 } 1070 1071 // Reads a non-anonymous version definition, 1072 // e.g. "VerStr { global: foo; bar; local: *; };". 1073 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1074 // Read a symbol list. 1075 std::vector<SymbolVersion> Locals; 1076 std::vector<SymbolVersion> Globals; 1077 std::tie(Locals, Globals) = readSymbols(); 1078 1079 for (SymbolVersion V : Locals) { 1080 if (V.Name == "*") 1081 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1082 else 1083 Config->VersionScriptLocals.push_back(V); 1084 } 1085 1086 // Create a new version definition and add that to the global symbols. 1087 VersionDefinition Ver; 1088 Ver.Name = VerStr; 1089 Ver.Globals = Globals; 1090 1091 // User-defined version number starts from 2 because 0 and 1 are 1092 // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. 1093 Ver.Id = Config->VersionDefinitions.size() + 2; 1094 Config->VersionDefinitions.push_back(Ver); 1095 1096 // Each version may have a parent version. For example, "Ver2" 1097 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1098 // as a parent. This version hierarchy is, probably against your 1099 // instinct, purely for hint; the runtime doesn't care about it 1100 // at all. In LLD, we simply ignore it. 1101 if (peek() != ";") 1102 skip(); 1103 expect(";"); 1104 } 1105 1106 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1107 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1108 ScriptParser::readSymbols() { 1109 std::vector<SymbolVersion> Locals; 1110 std::vector<SymbolVersion> Globals; 1111 std::vector<SymbolVersion> *V = &Globals; 1112 1113 while (!Error) { 1114 if (consume("}")) 1115 break; 1116 if (consumeLabel("local")) { 1117 V = &Locals; 1118 continue; 1119 } 1120 if (consumeLabel("global")) { 1121 V = &Globals; 1122 continue; 1123 } 1124 1125 if (consume("extern")) { 1126 std::vector<SymbolVersion> Ext = readVersionExtern(); 1127 V->insert(V->end(), Ext.begin(), Ext.end()); 1128 } else { 1129 StringRef Tok = next(); 1130 V->push_back({unquote(Tok), false, hasWildcard(Tok)}); 1131 } 1132 expect(";"); 1133 } 1134 return {Locals, Globals}; 1135 } 1136 1137 // Reads an "extern C++" directive, e.g., 1138 // "extern "C++" { ns::*; "f(int, double)"; };" 1139 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 1140 StringRef Tok = next(); 1141 bool IsCXX = Tok == "\"C++\""; 1142 if (!IsCXX && Tok != "\"C\"") 1143 setError("Unknown language"); 1144 expect("{"); 1145 1146 std::vector<SymbolVersion> Ret; 1147 while (!Error && peek() != "}") { 1148 StringRef Tok = next(); 1149 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 1150 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 1151 expect(";"); 1152 } 1153 1154 expect("}"); 1155 return Ret; 1156 } 1157 1158 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2, 1159 StringRef S3) { 1160 if (!consume(S1) && !consume(S2) && !consume(S3)) { 1161 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 1162 return 0; 1163 } 1164 expect("="); 1165 return readExpr()().getValue(); 1166 } 1167 1168 // Parse the MEMORY command as specified in: 1169 // https://sourceware.org/binutils/docs/ld/MEMORY.html 1170 // 1171 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 1172 void ScriptParser::readMemory() { 1173 expect("{"); 1174 while (!Error && !consume("}")) { 1175 StringRef Name = next(); 1176 1177 uint32_t Flags = 0; 1178 uint32_t NegFlags = 0; 1179 if (consume("(")) { 1180 std::tie(Flags, NegFlags) = readMemoryAttributes(); 1181 expect(")"); 1182 } 1183 expect(":"); 1184 1185 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 1186 expect(","); 1187 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 1188 1189 // Add the memory region to the region map (if it doesn't already exist). 1190 auto It = Script->Opt.MemoryRegions.find(Name); 1191 if (It != Script->Opt.MemoryRegions.end()) 1192 setError("region '" + Name + "' already defined"); 1193 else 1194 Script->Opt.MemoryRegions[Name] = {Name, Origin, Length, 1195 Origin, Flags, NegFlags}; 1196 } 1197 } 1198 1199 // This function parses the attributes used to match against section 1200 // flags when placing output sections in a memory region. These flags 1201 // are only used when an explicit memory region name is not used. 1202 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 1203 uint32_t Flags = 0; 1204 uint32_t NegFlags = 0; 1205 bool Invert = false; 1206 1207 for (char C : next().lower()) { 1208 uint32_t Flag = 0; 1209 if (C == '!') 1210 Invert = !Invert; 1211 else if (C == 'w') 1212 Flag = SHF_WRITE; 1213 else if (C == 'x') 1214 Flag = SHF_EXECINSTR; 1215 else if (C == 'a') 1216 Flag = SHF_ALLOC; 1217 else if (C != 'r') 1218 setError("invalid memory region attribute"); 1219 1220 if (Invert) 1221 NegFlags |= Flag; 1222 else 1223 Flags |= Flag; 1224 } 1225 return {Flags, NegFlags}; 1226 } 1227 1228 void elf::readLinkerScript(MemoryBufferRef MB) { 1229 ScriptParser(MB).readLinkerScript(); 1230 } 1231 1232 void elf::readVersionScript(MemoryBufferRef MB) { 1233 ScriptParser(MB).readVersionScript(); 1234 } 1235 1236 void elf::readDynamicList(MemoryBufferRef MB) { 1237 ScriptParser(MB).readDynamicList(); 1238 } 1239