1 //===- ScriptParser.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a recursive-descendent parser for linker scripts. 11 // Parsed results are stored to Config and Script global objects. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ScriptParser.h" 16 #include "Config.h" 17 #include "Driver.h" 18 #include "InputSection.h" 19 #include "LinkerScript.h" 20 #include "Memory.h" 21 #include "OutputSections.h" 22 #include "ScriptLexer.h" 23 #include "Symbols.h" 24 #include "Target.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/StringSwitch.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/Support/Casting.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/FileSystem.h" 32 #include "llvm/Support/Path.h" 33 #include <cassert> 34 #include <limits> 35 #include <vector> 36 37 using namespace llvm; 38 using namespace llvm::ELF; 39 using namespace llvm::support::endian; 40 using namespace lld; 41 using namespace lld::elf; 42 43 static bool isUnderSysroot(StringRef Path); 44 45 namespace { 46 class ScriptParser final : ScriptLexer { 47 public: 48 ScriptParser(MemoryBufferRef MB) 49 : ScriptLexer(MB), 50 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 51 52 void readLinkerScript(); 53 void readVersionScript(); 54 void readDynamicList(); 55 56 private: 57 void addFile(StringRef Path); 58 OutputSection *checkSection(OutputSectionCommand *Cmd, StringRef Loccation); 59 60 void readAsNeeded(); 61 void readEntry(); 62 void readExtern(); 63 void readGroup(); 64 void readInclude(); 65 void readMemory(); 66 void readOutput(); 67 void readOutputArch(); 68 void readOutputFormat(); 69 void readPhdrs(); 70 void readSearchDir(); 71 void readSections(); 72 void readVersion(); 73 void readVersionScriptCommand(); 74 75 SymbolAssignment *readAssignment(StringRef Name); 76 BytesDataCommand *readBytesDataCommand(StringRef Tok); 77 uint32_t readFill(); 78 uint32_t parseFill(StringRef Tok); 79 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 80 std::vector<StringRef> readOutputSectionPhdrs(); 81 InputSectionDescription *readInputSectionDescription(StringRef Tok); 82 StringMatcher readFilePatterns(); 83 std::vector<SectionPattern> readInputSectionsList(); 84 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 85 unsigned readPhdrType(); 86 SortSectionPolicy readSortKind(); 87 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 88 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 89 void readSort(); 90 AssertCommand *readAssert(); 91 Expr readAssertExpr(); 92 93 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 94 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 95 96 Expr readExpr(); 97 Expr readExpr1(Expr Lhs, int MinPrec); 98 StringRef readParenLiteral(); 99 Expr readPrimary(); 100 Expr readTernary(Expr Cond); 101 Expr readParenExpr(); 102 103 // For parsing version script. 104 std::vector<SymbolVersion> readVersionExtern(); 105 void readAnonymousDeclaration(); 106 void readVersionDeclaration(StringRef VerStr); 107 108 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 109 readSymbols(); 110 111 bool IsUnderSysroot; 112 }; 113 } // namespace 114 115 static bool isUnderSysroot(StringRef Path) { 116 if (Config->Sysroot == "") 117 return false; 118 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 119 if (sys::fs::equivalent(Config->Sysroot, Path)) 120 return true; 121 return false; 122 } 123 124 // Some operations only support one non absolute value. Move the 125 // absolute one to the right hand side for convenience. 126 static void moveAbsRight(ExprValue &A, ExprValue &B) { 127 if (A.isAbsolute()) 128 std::swap(A, B); 129 if (!B.isAbsolute()) 130 error(A.Loc + ": at least one side of the expression must be absolute"); 131 } 132 133 static ExprValue add(ExprValue A, ExprValue B) { 134 moveAbsRight(A, B); 135 return {A.Sec, A.ForceAbsolute, A.Val + B.getValue(), A.Loc}; 136 } 137 138 static ExprValue sub(ExprValue A, ExprValue B) { 139 return {A.Sec, A.Val - B.getValue(), A.Loc}; 140 } 141 142 static ExprValue mul(ExprValue A, ExprValue B) { 143 return A.getValue() * B.getValue(); 144 } 145 146 static ExprValue div(ExprValue A, ExprValue B) { 147 if (uint64_t BV = B.getValue()) 148 return A.getValue() / BV; 149 error("division by zero"); 150 return 0; 151 } 152 153 static ExprValue bitAnd(ExprValue A, ExprValue B) { 154 moveAbsRight(A, B); 155 return {A.Sec, A.ForceAbsolute, 156 (A.getValue() & B.getValue()) - A.getSecAddr(), A.Loc}; 157 } 158 159 static ExprValue bitOr(ExprValue A, ExprValue B) { 160 moveAbsRight(A, B); 161 return {A.Sec, A.ForceAbsolute, 162 (A.getValue() | B.getValue()) - A.getSecAddr(), A.Loc}; 163 } 164 165 void ScriptParser::readDynamicList() { 166 expect("{"); 167 readAnonymousDeclaration(); 168 if (!atEOF()) 169 setError("EOF expected, but got " + next()); 170 } 171 172 void ScriptParser::readVersionScript() { 173 readVersionScriptCommand(); 174 if (!atEOF()) 175 setError("EOF expected, but got " + next()); 176 } 177 178 void ScriptParser::readVersionScriptCommand() { 179 if (consume("{")) { 180 readAnonymousDeclaration(); 181 return; 182 } 183 184 while (!atEOF() && !Error && peek() != "}") { 185 StringRef VerStr = next(); 186 if (VerStr == "{") { 187 setError("anonymous version definition is used in " 188 "combination with other version definitions"); 189 return; 190 } 191 expect("{"); 192 readVersionDeclaration(VerStr); 193 } 194 } 195 196 void ScriptParser::readVersion() { 197 expect("{"); 198 readVersionScriptCommand(); 199 expect("}"); 200 } 201 202 void ScriptParser::readLinkerScript() { 203 while (!atEOF()) { 204 StringRef Tok = next(); 205 if (Tok == ";") 206 continue; 207 208 if (Tok == "ASSERT") { 209 Script->Opt.Commands.push_back(readAssert()); 210 } else if (Tok == "ENTRY") { 211 readEntry(); 212 } else if (Tok == "EXTERN") { 213 readExtern(); 214 } else if (Tok == "GROUP" || Tok == "INPUT") { 215 readGroup(); 216 } else if (Tok == "INCLUDE") { 217 readInclude(); 218 } else if (Tok == "MEMORY") { 219 readMemory(); 220 } else if (Tok == "OUTPUT") { 221 readOutput(); 222 } else if (Tok == "OUTPUT_ARCH") { 223 readOutputArch(); 224 } else if (Tok == "OUTPUT_FORMAT") { 225 readOutputFormat(); 226 } else if (Tok == "PHDRS") { 227 readPhdrs(); 228 } else if (Tok == "SEARCH_DIR") { 229 readSearchDir(); 230 } else if (Tok == "SECTIONS") { 231 readSections(); 232 } else if (Tok == "VERSION") { 233 readVersion(); 234 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 235 Script->Opt.Commands.push_back(Cmd); 236 } else { 237 setError("unknown directive: " + Tok); 238 } 239 } 240 } 241 242 void ScriptParser::addFile(StringRef S) { 243 if (IsUnderSysroot && S.startswith("/")) { 244 SmallString<128> PathData; 245 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 246 if (sys::fs::exists(Path)) { 247 Driver->addFile(Saver.save(Path), /*WithLOption=*/false); 248 return; 249 } 250 } 251 252 if (sys::path::is_absolute(S)) { 253 Driver->addFile(S, /*WithLOption=*/false); 254 } else if (S.startswith("=")) { 255 if (Config->Sysroot.empty()) 256 Driver->addFile(S.substr(1), /*WithLOption=*/false); 257 else 258 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)), 259 /*WithLOption=*/false); 260 } else if (S.startswith("-l")) { 261 Driver->addLibrary(S.substr(2)); 262 } else if (sys::fs::exists(S)) { 263 Driver->addFile(S, /*WithLOption=*/false); 264 } else { 265 if (Optional<std::string> Path = findFromSearchPaths(S)) 266 Driver->addFile(Saver.save(*Path), /*WithLOption=*/true); 267 else 268 setError("unable to find " + S); 269 } 270 } 271 272 void ScriptParser::readAsNeeded() { 273 expect("("); 274 bool Orig = Config->AsNeeded; 275 Config->AsNeeded = true; 276 while (!Error && !consume(")")) 277 addFile(unquote(next())); 278 Config->AsNeeded = Orig; 279 } 280 281 void ScriptParser::readEntry() { 282 // -e <symbol> takes predecence over ENTRY(<symbol>). 283 expect("("); 284 StringRef Tok = next(); 285 if (Config->Entry.empty()) 286 Config->Entry = Tok; 287 expect(")"); 288 } 289 290 void ScriptParser::readExtern() { 291 expect("("); 292 while (!Error && !consume(")")) 293 Config->Undefined.push_back(next()); 294 } 295 296 void ScriptParser::readGroup() { 297 expect("("); 298 while (!Error && !consume(")")) { 299 if (consume("AS_NEEDED")) 300 readAsNeeded(); 301 else 302 addFile(unquote(next())); 303 } 304 } 305 306 void ScriptParser::readInclude() { 307 StringRef Tok = unquote(next()); 308 309 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 310 // The file will be searched for in the current directory, and in any 311 // directory specified with the -L option. 312 if (sys::fs::exists(Tok)) { 313 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 314 tokenize(*MB); 315 return; 316 } 317 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 318 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 319 tokenize(*MB); 320 return; 321 } 322 setError("cannot open " + Tok); 323 } 324 325 void ScriptParser::readOutput() { 326 // -o <file> takes predecence over OUTPUT(<file>). 327 expect("("); 328 StringRef Tok = next(); 329 if (Config->OutputFile.empty()) 330 Config->OutputFile = unquote(Tok); 331 expect(")"); 332 } 333 334 void ScriptParser::readOutputArch() { 335 // OUTPUT_ARCH is ignored for now. 336 expect("("); 337 while (!Error && !consume(")")) 338 skip(); 339 } 340 341 void ScriptParser::readOutputFormat() { 342 // Error checking only for now. 343 expect("("); 344 skip(); 345 if (consume(")")) 346 return; 347 expect(","); 348 skip(); 349 expect(","); 350 skip(); 351 expect(")"); 352 } 353 354 void ScriptParser::readPhdrs() { 355 expect("{"); 356 while (!Error && !consume("}")) { 357 Script->Opt.PhdrsCommands.push_back( 358 {next(), PT_NULL, false, false, UINT_MAX, nullptr}); 359 360 PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back(); 361 PhdrCmd.Type = readPhdrType(); 362 363 while (!Error && !consume(";")) { 364 if (consume("FILEHDR")) 365 PhdrCmd.HasFilehdr = true; 366 else if (consume("PHDRS")) 367 PhdrCmd.HasPhdrs = true; 368 else if (consume("AT")) 369 PhdrCmd.LMAExpr = readParenExpr(); 370 else if (consume("FLAGS")) 371 PhdrCmd.Flags = readParenExpr()().getValue(); 372 else 373 setError("unexpected header attribute: " + next()); 374 } 375 } 376 } 377 378 void ScriptParser::readSearchDir() { 379 expect("("); 380 StringRef Tok = next(); 381 if (!Config->Nostdlib) 382 Config->SearchPaths.push_back(unquote(Tok)); 383 expect(")"); 384 } 385 386 void ScriptParser::readSections() { 387 Script->Opt.HasSections = true; 388 389 // -no-rosegment is used to avoid placing read only non-executable sections in 390 // their own segment. We do the same if SECTIONS command is present in linker 391 // script. See comment for computeFlags(). 392 Config->SingleRoRx = true; 393 394 expect("{"); 395 while (!Error && !consume("}")) { 396 StringRef Tok = next(); 397 BaseCommand *Cmd = readProvideOrAssignment(Tok); 398 if (!Cmd) { 399 if (Tok == "ASSERT") 400 Cmd = readAssert(); 401 else 402 Cmd = readOutputSectionDescription(Tok); 403 } 404 Script->Opt.Commands.push_back(Cmd); 405 } 406 } 407 408 static int precedence(StringRef Op) { 409 return StringSwitch<int>(Op) 410 .Cases("*", "/", 5) 411 .Cases("+", "-", 4) 412 .Cases("<<", ">>", 3) 413 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 414 .Cases("&", "|", 1) 415 .Default(-1); 416 } 417 418 StringMatcher ScriptParser::readFilePatterns() { 419 std::vector<StringRef> V; 420 while (!Error && !consume(")")) 421 V.push_back(next()); 422 return StringMatcher(V); 423 } 424 425 SortSectionPolicy ScriptParser::readSortKind() { 426 if (consume("SORT") || consume("SORT_BY_NAME")) 427 return SortSectionPolicy::Name; 428 if (consume("SORT_BY_ALIGNMENT")) 429 return SortSectionPolicy::Alignment; 430 if (consume("SORT_BY_INIT_PRIORITY")) 431 return SortSectionPolicy::Priority; 432 if (consume("SORT_NONE")) 433 return SortSectionPolicy::None; 434 return SortSectionPolicy::Default; 435 } 436 437 // Reads SECTIONS command contents in the following form: 438 // 439 // <contents> ::= <elem>* 440 // <elem> ::= <exclude>? <glob-pattern> 441 // <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")" 442 // 443 // For example, 444 // 445 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz) 446 // 447 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". 448 // The semantics of that is section .foo in any file, section .bar in 449 // any file but a.o, and section .baz in any file but b.o. 450 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 451 std::vector<SectionPattern> Ret; 452 while (!Error && peek() != ")") { 453 StringMatcher ExcludeFilePat; 454 if (consume("EXCLUDE_FILE")) { 455 expect("("); 456 ExcludeFilePat = readFilePatterns(); 457 } 458 459 std::vector<StringRef> V; 460 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 461 V.push_back(next()); 462 463 if (!V.empty()) 464 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 465 else 466 setError("section pattern is expected"); 467 } 468 return Ret; 469 } 470 471 // Reads contents of "SECTIONS" directive. That directive contains a 472 // list of glob patterns for input sections. The grammar is as follows. 473 // 474 // <patterns> ::= <section-list> 475 // | <sort> "(" <section-list> ")" 476 // | <sort> "(" <sort> "(" <section-list> ")" ")" 477 // 478 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 479 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 480 // 481 // <section-list> is parsed by readInputSectionsList(). 482 InputSectionDescription * 483 ScriptParser::readInputSectionRules(StringRef FilePattern) { 484 auto *Cmd = make<InputSectionDescription>(FilePattern); 485 expect("("); 486 487 while (!Error && !consume(")")) { 488 SortSectionPolicy Outer = readSortKind(); 489 SortSectionPolicy Inner = SortSectionPolicy::Default; 490 std::vector<SectionPattern> V; 491 if (Outer != SortSectionPolicy::Default) { 492 expect("("); 493 Inner = readSortKind(); 494 if (Inner != SortSectionPolicy::Default) { 495 expect("("); 496 V = readInputSectionsList(); 497 expect(")"); 498 } else { 499 V = readInputSectionsList(); 500 } 501 expect(")"); 502 } else { 503 V = readInputSectionsList(); 504 } 505 506 for (SectionPattern &Pat : V) { 507 Pat.SortInner = Inner; 508 Pat.SortOuter = Outer; 509 } 510 511 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 512 } 513 return Cmd; 514 } 515 516 InputSectionDescription * 517 ScriptParser::readInputSectionDescription(StringRef Tok) { 518 // Input section wildcard can be surrounded by KEEP. 519 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 520 if (Tok == "KEEP") { 521 expect("("); 522 StringRef FilePattern = next(); 523 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 524 expect(")"); 525 Script->Opt.KeptSections.push_back(Cmd); 526 return Cmd; 527 } 528 return readInputSectionRules(Tok); 529 } 530 531 void ScriptParser::readSort() { 532 expect("("); 533 expect("CONSTRUCTORS"); 534 expect(")"); 535 } 536 537 AssertCommand *ScriptParser::readAssert() { 538 return make<AssertCommand>(readAssertExpr()); 539 } 540 541 Expr ScriptParser::readAssertExpr() { 542 expect("("); 543 Expr E = readExpr(); 544 expect(","); 545 StringRef Msg = unquote(next()); 546 expect(")"); 547 548 return [=] { 549 if (!E().getValue()) 550 error(Msg); 551 return Script->getDot(); 552 }; 553 } 554 555 // Reads a FILL(expr) command. We handle the FILL command as an 556 // alias for =fillexp section attribute, which is different from 557 // what GNU linkers do. 558 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 559 uint32_t ScriptParser::readFill() { 560 expect("("); 561 uint32_t V = parseFill(next()); 562 expect(")"); 563 return V; 564 } 565 566 OutputSectionCommand * 567 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 568 OutputSectionCommand *Cmd = 569 Script->createOutputSectionCommand(OutSec, getCurrentLocation()); 570 571 if (peek() != ":") { 572 // Read an address expression. 573 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html 574 if (peek() != "(") 575 Cmd->AddrExpr = readExpr(); 576 577 // Read a section type. Currently, only NOLOAD is supported. 578 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html 579 if (consume("(")) { 580 expect("NOLOAD"); 581 expect(")"); 582 Cmd->Noload = true; 583 } 584 } 585 586 expect(":"); 587 588 if (consume("AT")) 589 Cmd->LMAExpr = readParenExpr(); 590 if (consume("ALIGN")) 591 Cmd->AlignExpr = readParenExpr(); 592 if (consume("SUBALIGN")) 593 Cmd->SubalignExpr = readParenExpr(); 594 595 // Parse constraints. 596 if (consume("ONLY_IF_RO")) 597 Cmd->Constraint = ConstraintKind::ReadOnly; 598 if (consume("ONLY_IF_RW")) 599 Cmd->Constraint = ConstraintKind::ReadWrite; 600 expect("{"); 601 602 while (!Error && !consume("}")) { 603 StringRef Tok = next(); 604 if (Tok == ";") { 605 // Empty commands are allowed. Do nothing here. 606 } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) { 607 Cmd->Commands.push_back(Assign); 608 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 609 Cmd->Commands.push_back(Data); 610 } else if (Tok == "ASSERT") { 611 Cmd->Commands.push_back(readAssert()); 612 expect(";"); 613 } else if (Tok == "CONSTRUCTORS") { 614 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 615 // by name. This is for very old file formats such as ECOFF/XCOFF. 616 // For ELF, we should ignore. 617 } else if (Tok == "FILL") { 618 Cmd->Filler = readFill(); 619 } else if (Tok == "SORT") { 620 readSort(); 621 } else if (peek() == "(") { 622 Cmd->Commands.push_back(readInputSectionDescription(Tok)); 623 } else { 624 setError("unknown command " + Tok); 625 } 626 } 627 628 if (consume(">")) 629 Cmd->MemoryRegionName = next(); 630 631 Cmd->Phdrs = readOutputSectionPhdrs(); 632 633 if (consume("=")) 634 Cmd->Filler = parseFill(next()); 635 else if (peek().startswith("=")) 636 Cmd->Filler = parseFill(next().drop_front()); 637 638 // Consume optional comma following output section command. 639 consume(","); 640 641 return Cmd; 642 } 643 644 // Parses a given string as a octal/decimal/hexadecimal number and 645 // returns it as a big-endian number. Used for `=<fillexp>`. 646 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 647 // 648 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary 649 // size, while ld.gold always handles it as a 32-bit big-endian number. 650 // We are compatible with ld.gold because it's easier to implement. 651 uint32_t ScriptParser::parseFill(StringRef Tok) { 652 uint32_t V = 0; 653 if (!to_integer(Tok, V)) 654 setError("invalid filler expression: " + Tok); 655 656 uint32_t Buf; 657 write32be(&Buf, V); 658 return Buf; 659 } 660 661 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 662 expect("("); 663 SymbolAssignment *Cmd = readAssignment(next()); 664 Cmd->Provide = Provide; 665 Cmd->Hidden = Hidden; 666 expect(")"); 667 expect(";"); 668 return Cmd; 669 } 670 671 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 672 SymbolAssignment *Cmd = nullptr; 673 if (peek() == "=" || peek() == "+=") { 674 Cmd = readAssignment(Tok); 675 expect(";"); 676 } else if (Tok == "PROVIDE") { 677 Cmd = readProvideHidden(true, false); 678 } else if (Tok == "HIDDEN") { 679 Cmd = readProvideHidden(false, true); 680 } else if (Tok == "PROVIDE_HIDDEN") { 681 Cmd = readProvideHidden(true, true); 682 } 683 return Cmd; 684 } 685 686 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 687 StringRef Op = next(); 688 assert(Op == "=" || Op == "+="); 689 Expr E = readExpr(); 690 if (Op == "+=") { 691 std::string Loc = getCurrentLocation(); 692 E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); }; 693 } 694 return make<SymbolAssignment>(Name, E, getCurrentLocation()); 695 } 696 697 // This is an operator-precedence parser to parse a linker 698 // script expression. 699 Expr ScriptParser::readExpr() { 700 // Our lexer is context-aware. Set the in-expression bit so that 701 // they apply different tokenization rules. 702 bool Orig = InExpr; 703 InExpr = true; 704 Expr E = readExpr1(readPrimary(), 0); 705 InExpr = Orig; 706 return E; 707 } 708 709 static Expr combine(StringRef Op, Expr L, Expr R) { 710 if (Op == "+") 711 return [=] { return add(L(), R()); }; 712 if (Op == "-") 713 return [=] { return sub(L(), R()); }; 714 if (Op == "*") 715 return [=] { return mul(L(), R()); }; 716 if (Op == "/") 717 return [=] { return div(L(), R()); }; 718 if (Op == "<<") 719 return [=] { return L().getValue() << R().getValue(); }; 720 if (Op == ">>") 721 return [=] { return L().getValue() >> R().getValue(); }; 722 if (Op == "<") 723 return [=] { return L().getValue() < R().getValue(); }; 724 if (Op == ">") 725 return [=] { return L().getValue() > R().getValue(); }; 726 if (Op == ">=") 727 return [=] { return L().getValue() >= R().getValue(); }; 728 if (Op == "<=") 729 return [=] { return L().getValue() <= R().getValue(); }; 730 if (Op == "==") 731 return [=] { return L().getValue() == R().getValue(); }; 732 if (Op == "!=") 733 return [=] { return L().getValue() != R().getValue(); }; 734 if (Op == "&") 735 return [=] { return bitAnd(L(), R()); }; 736 if (Op == "|") 737 return [=] { return bitOr(L(), R()); }; 738 llvm_unreachable("invalid operator"); 739 } 740 741 // This is a part of the operator-precedence parser. This function 742 // assumes that the remaining token stream starts with an operator. 743 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 744 while (!atEOF() && !Error) { 745 // Read an operator and an expression. 746 if (consume("?")) 747 return readTernary(Lhs); 748 StringRef Op1 = peek(); 749 if (precedence(Op1) < MinPrec) 750 break; 751 skip(); 752 Expr Rhs = readPrimary(); 753 754 // Evaluate the remaining part of the expression first if the 755 // next operator has greater precedence than the previous one. 756 // For example, if we have read "+" and "3", and if the next 757 // operator is "*", then we'll evaluate 3 * ... part first. 758 while (!atEOF()) { 759 StringRef Op2 = peek(); 760 if (precedence(Op2) <= precedence(Op1)) 761 break; 762 Rhs = readExpr1(Rhs, precedence(Op2)); 763 } 764 765 Lhs = combine(Op1, Lhs, Rhs); 766 } 767 return Lhs; 768 } 769 770 uint64_t static getConstant(StringRef S) { 771 if (S == "COMMONPAGESIZE") 772 return Target->PageSize; 773 if (S == "MAXPAGESIZE") 774 return Config->MaxPageSize; 775 error("unknown constant: " + S); 776 return 0; 777 } 778 779 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with 780 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may 781 // have "K" (Ki) or "M" (Mi) suffixes. 782 static Optional<uint64_t> parseInt(StringRef Tok) { 783 // Negative number 784 if (Tok.startswith("-")) { 785 if (Optional<uint64_t> Val = parseInt(Tok.substr(1))) 786 return -*Val; 787 return None; 788 } 789 790 // Hexadecimal 791 uint64_t Val; 792 if (Tok.startswith_lower("0x") && to_integer(Tok.substr(2), Val, 16)) 793 return Val; 794 if (Tok.endswith_lower("H") && to_integer(Tok.drop_back(), Val, 16)) 795 return Val; 796 797 // Decimal 798 if (Tok.endswith_lower("K")) { 799 if (!to_integer(Tok.drop_back(), Val, 10)) 800 return None; 801 return Val * 1024; 802 } 803 if (Tok.endswith_lower("M")) { 804 if (!to_integer(Tok.drop_back(), Val, 10)) 805 return None; 806 return Val * 1024 * 1024; 807 } 808 if (!to_integer(Tok, Val, 10)) 809 return None; 810 return Val; 811 } 812 813 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 814 int Size = StringSwitch<int>(Tok) 815 .Case("BYTE", 1) 816 .Case("SHORT", 2) 817 .Case("LONG", 4) 818 .Case("QUAD", 8) 819 .Default(-1); 820 if (Size == -1) 821 return nullptr; 822 823 return make<BytesDataCommand>(readParenExpr(), Size); 824 } 825 826 StringRef ScriptParser::readParenLiteral() { 827 expect("("); 828 StringRef Tok = next(); 829 expect(")"); 830 return Tok; 831 } 832 833 OutputSection *ScriptParser::checkSection(OutputSectionCommand *Cmd, 834 StringRef Location) { 835 if (Cmd->Location.empty() && Script->ErrorOnMissingSection) 836 error(Location + ": undefined section " + Cmd->Name); 837 if (Cmd->Sec) 838 return Cmd->Sec; 839 static OutputSection Dummy("", 0, 0); 840 return &Dummy; 841 } 842 843 Expr ScriptParser::readPrimary() { 844 if (peek() == "(") 845 return readParenExpr(); 846 847 if (consume("~")) { 848 Expr E = readPrimary(); 849 return [=] { return ~E().getValue(); }; 850 } 851 if (consume("-")) { 852 Expr E = readPrimary(); 853 return [=] { return -E().getValue(); }; 854 } 855 856 StringRef Tok = next(); 857 std::string Location = getCurrentLocation(); 858 859 // Built-in functions are parsed here. 860 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 861 if (Tok == "ABSOLUTE") { 862 Expr Inner = readParenExpr(); 863 return [=] { 864 ExprValue I = Inner(); 865 I.ForceAbsolute = true; 866 return I; 867 }; 868 } 869 if (Tok == "ADDR") { 870 StringRef Name = readParenLiteral(); 871 OutputSectionCommand *Cmd = Script->getOrCreateOutputSectionCommand(Name); 872 return [=]() -> ExprValue { 873 return {checkSection(Cmd, Location), 0, Location}; 874 }; 875 } 876 if (Tok == "ALIGN") { 877 expect("("); 878 Expr E = readExpr(); 879 if (consume(")")) 880 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 881 expect(","); 882 Expr E2 = readExpr(); 883 expect(")"); 884 return [=] { 885 ExprValue V = E(); 886 V.Alignment = E2().getValue(); 887 return V; 888 }; 889 } 890 if (Tok == "ALIGNOF") { 891 StringRef Name = readParenLiteral(); 892 OutputSectionCommand *Cmd = Script->getOrCreateOutputSectionCommand(Name); 893 return [=] { return checkSection(Cmd, Location)->Alignment; }; 894 } 895 if (Tok == "ASSERT") 896 return readAssertExpr(); 897 if (Tok == "CONSTANT") { 898 StringRef Name = readParenLiteral(); 899 return [=] { return getConstant(Name); }; 900 } 901 if (Tok == "DATA_SEGMENT_ALIGN") { 902 expect("("); 903 Expr E = readExpr(); 904 expect(","); 905 readExpr(); 906 expect(")"); 907 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 908 } 909 if (Tok == "DATA_SEGMENT_END") { 910 expect("("); 911 expect("."); 912 expect(")"); 913 return [] { return Script->getDot(); }; 914 } 915 if (Tok == "DATA_SEGMENT_RELRO_END") { 916 // GNU linkers implements more complicated logic to handle 917 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and 918 // just align to the next page boundary for simplicity. 919 expect("("); 920 readExpr(); 921 expect(","); 922 readExpr(); 923 expect(")"); 924 return [] { return alignTo(Script->getDot(), Target->PageSize); }; 925 } 926 if (Tok == "DEFINED") { 927 StringRef Name = readParenLiteral(); 928 return [=] { return Script->isDefined(Name) ? 1 : 0; }; 929 } 930 if (Tok == "LENGTH") { 931 StringRef Name = readParenLiteral(); 932 if (Script->Opt.MemoryRegions.count(Name) == 0) 933 setError("memory region not defined: " + Name); 934 return [=] { return Script->Opt.MemoryRegions[Name].Length; }; 935 } 936 if (Tok == "LOADADDR") { 937 StringRef Name = readParenLiteral(); 938 OutputSectionCommand *Cmd = Script->getOrCreateOutputSectionCommand(Name); 939 return [=] { return checkSection(Cmd, Location)->getLMA(); }; 940 } 941 if (Tok == "ORIGIN") { 942 StringRef Name = readParenLiteral(); 943 if (Script->Opt.MemoryRegions.count(Name) == 0) 944 setError("memory region not defined: " + Name); 945 return [=] { return Script->Opt.MemoryRegions[Name].Origin; }; 946 } 947 if (Tok == "SEGMENT_START") { 948 expect("("); 949 skip(); 950 expect(","); 951 Expr E = readExpr(); 952 expect(")"); 953 return [=] { return E(); }; 954 } 955 if (Tok == "SIZEOF") { 956 StringRef Name = readParenLiteral(); 957 OutputSectionCommand *Cmd = Script->getOrCreateOutputSectionCommand(Name); 958 // Linker script does not create an output section if its content is empty. 959 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 960 // be empty. 961 return [=] { return Cmd->Sec ? Cmd->Sec->Size : 0; }; 962 } 963 if (Tok == "SIZEOF_HEADERS") 964 return [=] { return elf::getHeaderSize(); }; 965 966 // Tok is the dot. 967 if (Tok == ".") 968 return [=] { return Script->getSymbolValue(Location, Tok); }; 969 970 // Tok is a literal number. 971 if (Optional<uint64_t> Val = parseInt(Tok)) 972 return [=] { return *Val; }; 973 974 // Tok is a symbol name. 975 if (!isValidCIdentifier(Tok)) 976 setError("malformed number: " + Tok); 977 Script->Opt.ReferencedSymbols.push_back(Tok); 978 return [=] { return Script->getSymbolValue(Location, Tok); }; 979 } 980 981 Expr ScriptParser::readTernary(Expr Cond) { 982 Expr L = readExpr(); 983 expect(":"); 984 Expr R = readExpr(); 985 return [=] { return Cond().getValue() ? L() : R(); }; 986 } 987 988 Expr ScriptParser::readParenExpr() { 989 expect("("); 990 Expr E = readExpr(); 991 expect(")"); 992 return E; 993 } 994 995 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 996 std::vector<StringRef> Phdrs; 997 while (!Error && peek().startswith(":")) { 998 StringRef Tok = next(); 999 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 1000 } 1001 return Phdrs; 1002 } 1003 1004 // Read a program header type name. The next token must be a 1005 // name of a program header type or a constant (e.g. "0x3"). 1006 unsigned ScriptParser::readPhdrType() { 1007 StringRef Tok = next(); 1008 if (Optional<uint64_t> Val = parseInt(Tok)) 1009 return *Val; 1010 1011 unsigned Ret = StringSwitch<unsigned>(Tok) 1012 .Case("PT_NULL", PT_NULL) 1013 .Case("PT_LOAD", PT_LOAD) 1014 .Case("PT_DYNAMIC", PT_DYNAMIC) 1015 .Case("PT_INTERP", PT_INTERP) 1016 .Case("PT_NOTE", PT_NOTE) 1017 .Case("PT_SHLIB", PT_SHLIB) 1018 .Case("PT_PHDR", PT_PHDR) 1019 .Case("PT_TLS", PT_TLS) 1020 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1021 .Case("PT_GNU_STACK", PT_GNU_STACK) 1022 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1023 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1024 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1025 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1026 .Default(-1); 1027 1028 if (Ret == (unsigned)-1) { 1029 setError("invalid program header type: " + Tok); 1030 return PT_NULL; 1031 } 1032 return Ret; 1033 } 1034 1035 // Reads an anonymous version declaration. 1036 void ScriptParser::readAnonymousDeclaration() { 1037 std::vector<SymbolVersion> Locals; 1038 std::vector<SymbolVersion> Globals; 1039 std::tie(Locals, Globals) = readSymbols(); 1040 1041 for (SymbolVersion V : Locals) { 1042 if (V.Name == "*") 1043 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1044 else 1045 Config->VersionScriptLocals.push_back(V); 1046 } 1047 1048 for (SymbolVersion V : Globals) 1049 Config->VersionScriptGlobals.push_back(V); 1050 1051 expect(";"); 1052 } 1053 1054 // Reads a non-anonymous version definition, 1055 // e.g. "VerStr { global: foo; bar; local: *; };". 1056 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1057 // Read a symbol list. 1058 std::vector<SymbolVersion> Locals; 1059 std::vector<SymbolVersion> Globals; 1060 std::tie(Locals, Globals) = readSymbols(); 1061 1062 for (SymbolVersion V : Locals) { 1063 if (V.Name == "*") 1064 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1065 else 1066 Config->VersionScriptLocals.push_back(V); 1067 } 1068 1069 // Create a new version definition and add that to the global symbols. 1070 VersionDefinition Ver; 1071 Ver.Name = VerStr; 1072 Ver.Globals = Globals; 1073 1074 // User-defined version number starts from 2 because 0 and 1 are 1075 // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. 1076 Ver.Id = Config->VersionDefinitions.size() + 2; 1077 Config->VersionDefinitions.push_back(Ver); 1078 1079 // Each version may have a parent version. For example, "Ver2" 1080 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1081 // as a parent. This version hierarchy is, probably against your 1082 // instinct, purely for hint; the runtime doesn't care about it 1083 // at all. In LLD, we simply ignore it. 1084 if (peek() != ";") 1085 skip(); 1086 expect(";"); 1087 } 1088 1089 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1090 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1091 ScriptParser::readSymbols() { 1092 std::vector<SymbolVersion> Locals; 1093 std::vector<SymbolVersion> Globals; 1094 std::vector<SymbolVersion> *V = &Globals; 1095 1096 while (!Error) { 1097 if (consume("}")) 1098 break; 1099 if (consumeLabel("local")) { 1100 V = &Locals; 1101 continue; 1102 } 1103 if (consumeLabel("global")) { 1104 V = &Globals; 1105 continue; 1106 } 1107 1108 if (consume("extern")) { 1109 std::vector<SymbolVersion> Ext = readVersionExtern(); 1110 V->insert(V->end(), Ext.begin(), Ext.end()); 1111 } else { 1112 StringRef Tok = next(); 1113 V->push_back({unquote(Tok), false, hasWildcard(Tok)}); 1114 } 1115 expect(";"); 1116 } 1117 return {Locals, Globals}; 1118 } 1119 1120 // Reads an "extern C++" directive, e.g., 1121 // "extern "C++" { ns::*; "f(int, double)"; };" 1122 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 1123 StringRef Tok = next(); 1124 bool IsCXX = Tok == "\"C++\""; 1125 if (!IsCXX && Tok != "\"C\"") 1126 setError("Unknown language"); 1127 expect("{"); 1128 1129 std::vector<SymbolVersion> Ret; 1130 while (!Error && peek() != "}") { 1131 StringRef Tok = next(); 1132 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 1133 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 1134 expect(";"); 1135 } 1136 1137 expect("}"); 1138 return Ret; 1139 } 1140 1141 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2, 1142 StringRef S3) { 1143 if (!consume(S1) && !consume(S2) && !consume(S3)) { 1144 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 1145 return 0; 1146 } 1147 expect("="); 1148 return readExpr()().getValue(); 1149 } 1150 1151 // Parse the MEMORY command as specified in: 1152 // https://sourceware.org/binutils/docs/ld/MEMORY.html 1153 // 1154 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 1155 void ScriptParser::readMemory() { 1156 expect("{"); 1157 while (!Error && !consume("}")) { 1158 StringRef Name = next(); 1159 1160 uint32_t Flags = 0; 1161 uint32_t NegFlags = 0; 1162 if (consume("(")) { 1163 std::tie(Flags, NegFlags) = readMemoryAttributes(); 1164 expect(")"); 1165 } 1166 expect(":"); 1167 1168 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 1169 expect(","); 1170 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 1171 1172 // Add the memory region to the region map (if it doesn't already exist). 1173 auto It = Script->Opt.MemoryRegions.find(Name); 1174 if (It != Script->Opt.MemoryRegions.end()) 1175 setError("region '" + Name + "' already defined"); 1176 else 1177 Script->Opt.MemoryRegions[Name] = {Name, Origin, Length, 1178 Origin, Flags, NegFlags}; 1179 } 1180 } 1181 1182 // This function parses the attributes used to match against section 1183 // flags when placing output sections in a memory region. These flags 1184 // are only used when an explicit memory region name is not used. 1185 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 1186 uint32_t Flags = 0; 1187 uint32_t NegFlags = 0; 1188 bool Invert = false; 1189 1190 for (char C : next().lower()) { 1191 uint32_t Flag = 0; 1192 if (C == '!') 1193 Invert = !Invert; 1194 else if (C == 'w') 1195 Flag = SHF_WRITE; 1196 else if (C == 'x') 1197 Flag = SHF_EXECINSTR; 1198 else if (C == 'a') 1199 Flag = SHF_ALLOC; 1200 else if (C != 'r') 1201 setError("invalid memory region attribute"); 1202 1203 if (Invert) 1204 NegFlags |= Flag; 1205 else 1206 Flags |= Flag; 1207 } 1208 return {Flags, NegFlags}; 1209 } 1210 1211 void elf::readLinkerScript(MemoryBufferRef MB) { 1212 ScriptParser(MB).readLinkerScript(); 1213 } 1214 1215 void elf::readVersionScript(MemoryBufferRef MB) { 1216 ScriptParser(MB).readVersionScript(); 1217 } 1218 1219 void elf::readDynamicList(MemoryBufferRef MB) { 1220 ScriptParser(MB).readDynamicList(); 1221 } 1222