1 //===- ScriptParser.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a recursive-descendent parser for linker scripts. 11 // Parsed results are stored to Config and Script global objects. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ScriptParser.h" 16 #include "Config.h" 17 #include "Driver.h" 18 #include "InputSection.h" 19 #include "LinkerScript.h" 20 #include "Memory.h" 21 #include "OutputSections.h" 22 #include "ScriptLexer.h" 23 #include "Symbols.h" 24 #include "Target.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/StringSwitch.h" 28 #include "llvm/Support/Casting.h" 29 #include "llvm/Support/ELF.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/FileSystem.h" 32 #include "llvm/Support/Path.h" 33 #include <cassert> 34 #include <limits> 35 #include <vector> 36 37 using namespace llvm; 38 using namespace llvm::ELF; 39 using namespace llvm::support::endian; 40 using namespace lld; 41 using namespace lld::elf; 42 43 static bool isUnderSysroot(StringRef Path); 44 45 namespace { 46 class ScriptParser final : ScriptLexer { 47 public: 48 ScriptParser(MemoryBufferRef MB) 49 : ScriptLexer(MB), 50 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 51 52 void readLinkerScript(); 53 void readVersionScript(); 54 void readDynamicList(); 55 56 private: 57 void addFile(StringRef Path); 58 OutputSection *checkSection(OutputSectionCommand *Cmd, StringRef Loccation); 59 60 void readAsNeeded(); 61 void readEntry(); 62 void readExtern(); 63 void readGroup(); 64 void readInclude(); 65 void readMemory(); 66 void readOutput(); 67 void readOutputArch(); 68 void readOutputFormat(); 69 void readPhdrs(); 70 void readSearchDir(); 71 void readSections(); 72 void readVersion(); 73 void readVersionScriptCommand(); 74 75 SymbolAssignment *readAssignment(StringRef Name); 76 BytesDataCommand *readBytesDataCommand(StringRef Tok); 77 uint32_t readFill(); 78 uint32_t parseFill(StringRef Tok); 79 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 80 std::vector<StringRef> readOutputSectionPhdrs(); 81 InputSectionDescription *readInputSectionDescription(StringRef Tok); 82 StringMatcher readFilePatterns(); 83 std::vector<SectionPattern> readInputSectionsList(); 84 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 85 unsigned readPhdrType(); 86 SortSectionPolicy readSortKind(); 87 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 88 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 89 void readSort(); 90 AssertCommand *readAssert(); 91 Expr readAssertExpr(); 92 93 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 94 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 95 96 Expr readExpr(); 97 Expr readExpr1(Expr Lhs, int MinPrec); 98 StringRef readParenLiteral(); 99 Expr readPrimary(); 100 Expr readTernary(Expr Cond); 101 Expr readParenExpr(); 102 103 // For parsing version script. 104 std::vector<SymbolVersion> readVersionExtern(); 105 void readAnonymousDeclaration(); 106 void readVersionDeclaration(StringRef VerStr); 107 108 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 109 readSymbols(); 110 111 bool IsUnderSysroot; 112 }; 113 } // namespace 114 115 static bool isUnderSysroot(StringRef Path) { 116 if (Config->Sysroot == "") 117 return false; 118 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 119 if (sys::fs::equivalent(Config->Sysroot, Path)) 120 return true; 121 return false; 122 } 123 124 // Some operations only support one non absolute value. Move the 125 // absolute one to the right hand side for convenience. 126 static void moveAbsRight(ExprValue &A, ExprValue &B) { 127 if (A.isAbsolute()) 128 std::swap(A, B); 129 if (!B.isAbsolute()) 130 error("At least one side of the expression must be absolute"); 131 } 132 133 static ExprValue add(ExprValue A, ExprValue B) { 134 moveAbsRight(A, B); 135 return {A.Sec, A.ForceAbsolute, A.Val + B.getValue()}; 136 } 137 138 static ExprValue sub(ExprValue A, ExprValue B) { 139 return {A.Sec, A.Val - B.getValue()}; 140 } 141 142 static ExprValue mul(ExprValue A, ExprValue B) { 143 return A.getValue() * B.getValue(); 144 } 145 146 static ExprValue div(ExprValue A, ExprValue B) { 147 if (uint64_t BV = B.getValue()) 148 return A.getValue() / BV; 149 error("division by zero"); 150 return 0; 151 } 152 153 static ExprValue bitAnd(ExprValue A, ExprValue B) { 154 moveAbsRight(A, B); 155 return {A.Sec, A.ForceAbsolute, 156 (A.getValue() & B.getValue()) - A.getSecAddr()}; 157 } 158 159 static ExprValue bitOr(ExprValue A, ExprValue B) { 160 moveAbsRight(A, B); 161 return {A.Sec, A.ForceAbsolute, 162 (A.getValue() | B.getValue()) - A.getSecAddr()}; 163 } 164 165 void ScriptParser::readDynamicList() { 166 expect("{"); 167 readAnonymousDeclaration(); 168 if (!atEOF()) 169 setError("EOF expected, but got " + next()); 170 } 171 172 void ScriptParser::readVersionScript() { 173 readVersionScriptCommand(); 174 if (!atEOF()) 175 setError("EOF expected, but got " + next()); 176 } 177 178 void ScriptParser::readVersionScriptCommand() { 179 if (consume("{")) { 180 readAnonymousDeclaration(); 181 return; 182 } 183 184 while (!atEOF() && !Error && peek() != "}") { 185 StringRef VerStr = next(); 186 if (VerStr == "{") { 187 setError("anonymous version definition is used in " 188 "combination with other version definitions"); 189 return; 190 } 191 expect("{"); 192 readVersionDeclaration(VerStr); 193 } 194 } 195 196 void ScriptParser::readVersion() { 197 expect("{"); 198 readVersionScriptCommand(); 199 expect("}"); 200 } 201 202 void ScriptParser::readLinkerScript() { 203 while (!atEOF()) { 204 StringRef Tok = next(); 205 if (Tok == ";") 206 continue; 207 208 if (Tok == "ASSERT") { 209 Script->Opt.Commands.push_back(readAssert()); 210 } else if (Tok == "ENTRY") { 211 readEntry(); 212 } else if (Tok == "EXTERN") { 213 readExtern(); 214 } else if (Tok == "GROUP" || Tok == "INPUT") { 215 readGroup(); 216 } else if (Tok == "INCLUDE") { 217 readInclude(); 218 } else if (Tok == "MEMORY") { 219 readMemory(); 220 } else if (Tok == "OUTPUT") { 221 readOutput(); 222 } else if (Tok == "OUTPUT_ARCH") { 223 readOutputArch(); 224 } else if (Tok == "OUTPUT_FORMAT") { 225 readOutputFormat(); 226 } else if (Tok == "PHDRS") { 227 readPhdrs(); 228 } else if (Tok == "SEARCH_DIR") { 229 readSearchDir(); 230 } else if (Tok == "SECTIONS") { 231 readSections(); 232 } else if (Tok == "VERSION") { 233 readVersion(); 234 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 235 Script->Opt.Commands.push_back(Cmd); 236 } else { 237 setError("unknown directive: " + Tok); 238 } 239 } 240 } 241 242 void ScriptParser::addFile(StringRef S) { 243 if (IsUnderSysroot && S.startswith("/")) { 244 SmallString<128> PathData; 245 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 246 if (sys::fs::exists(Path)) { 247 Driver->addFile(Saver.save(Path), /*WithLOption=*/false); 248 return; 249 } 250 } 251 252 if (sys::path::is_absolute(S)) { 253 Driver->addFile(S, /*WithLOption=*/false); 254 } else if (S.startswith("=")) { 255 if (Config->Sysroot.empty()) 256 Driver->addFile(S.substr(1), /*WithLOption=*/false); 257 else 258 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)), 259 /*WithLOption=*/false); 260 } else if (S.startswith("-l")) { 261 Driver->addLibrary(S.substr(2)); 262 } else if (sys::fs::exists(S)) { 263 Driver->addFile(S, /*WithLOption=*/false); 264 } else { 265 if (Optional<std::string> Path = findFromSearchPaths(S)) 266 Driver->addFile(Saver.save(*Path), /*WithLOption=*/true); 267 else 268 setError("unable to find " + S); 269 } 270 } 271 272 void ScriptParser::readAsNeeded() { 273 expect("("); 274 bool Orig = Config->AsNeeded; 275 Config->AsNeeded = true; 276 while (!Error && !consume(")")) 277 addFile(unquote(next())); 278 Config->AsNeeded = Orig; 279 } 280 281 void ScriptParser::readEntry() { 282 // -e <symbol> takes predecence over ENTRY(<symbol>). 283 expect("("); 284 StringRef Tok = next(); 285 if (Config->Entry.empty()) 286 Config->Entry = Tok; 287 expect(")"); 288 } 289 290 void ScriptParser::readExtern() { 291 expect("("); 292 while (!Error && !consume(")")) 293 Config->Undefined.push_back(next()); 294 } 295 296 void ScriptParser::readGroup() { 297 expect("("); 298 while (!Error && !consume(")")) { 299 if (consume("AS_NEEDED")) 300 readAsNeeded(); 301 else 302 addFile(unquote(next())); 303 } 304 } 305 306 void ScriptParser::readInclude() { 307 StringRef Tok = unquote(next()); 308 309 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 310 // The file will be searched for in the current directory, and in any 311 // directory specified with the -L option. 312 if (sys::fs::exists(Tok)) { 313 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 314 tokenize(*MB); 315 return; 316 } 317 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 318 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 319 tokenize(*MB); 320 return; 321 } 322 setError("cannot open " + Tok); 323 } 324 325 void ScriptParser::readOutput() { 326 // -o <file> takes predecence over OUTPUT(<file>). 327 expect("("); 328 StringRef Tok = next(); 329 if (Config->OutputFile.empty()) 330 Config->OutputFile = unquote(Tok); 331 expect(")"); 332 } 333 334 void ScriptParser::readOutputArch() { 335 // OUTPUT_ARCH is ignored for now. 336 expect("("); 337 while (!Error && !consume(")")) 338 skip(); 339 } 340 341 void ScriptParser::readOutputFormat() { 342 // Error checking only for now. 343 expect("("); 344 skip(); 345 if (consume(")")) 346 return; 347 expect(","); 348 skip(); 349 expect(","); 350 skip(); 351 expect(")"); 352 } 353 354 void ScriptParser::readPhdrs() { 355 expect("{"); 356 while (!Error && !consume("}")) { 357 Script->Opt.PhdrsCommands.push_back( 358 {next(), PT_NULL, false, false, UINT_MAX, nullptr}); 359 360 PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back(); 361 PhdrCmd.Type = readPhdrType(); 362 363 while (!Error && !consume(";")) { 364 if (consume("FILEHDR")) 365 PhdrCmd.HasFilehdr = true; 366 else if (consume("PHDRS")) 367 PhdrCmd.HasPhdrs = true; 368 else if (consume("AT")) 369 PhdrCmd.LMAExpr = readParenExpr(); 370 else if (consume("FLAGS")) 371 PhdrCmd.Flags = readParenExpr()().getValue(); 372 else 373 setError("unexpected header attribute: " + next()); 374 } 375 } 376 } 377 378 void ScriptParser::readSearchDir() { 379 expect("("); 380 StringRef Tok = next(); 381 if (!Config->Nostdlib) 382 Config->SearchPaths.push_back(unquote(Tok)); 383 expect(")"); 384 } 385 386 void ScriptParser::readSections() { 387 Script->Opt.HasSections = true; 388 389 // -no-rosegment is used to avoid placing read only non-executable sections in 390 // their own segment. We do the same if SECTIONS command is present in linker 391 // script. See comment for computeFlags(). 392 Config->SingleRoRx = true; 393 394 expect("{"); 395 while (!Error && !consume("}")) { 396 StringRef Tok = next(); 397 BaseCommand *Cmd = readProvideOrAssignment(Tok); 398 if (!Cmd) { 399 if (Tok == "ASSERT") 400 Cmd = readAssert(); 401 else 402 Cmd = readOutputSectionDescription(Tok); 403 } 404 Script->Opt.Commands.push_back(Cmd); 405 } 406 } 407 408 static int precedence(StringRef Op) { 409 return StringSwitch<int>(Op) 410 .Cases("*", "/", 5) 411 .Cases("+", "-", 4) 412 .Cases("<<", ">>", 3) 413 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 414 .Cases("&", "|", 1) 415 .Default(-1); 416 } 417 418 StringMatcher ScriptParser::readFilePatterns() { 419 std::vector<StringRef> V; 420 while (!Error && !consume(")")) 421 V.push_back(next()); 422 return StringMatcher(V); 423 } 424 425 SortSectionPolicy ScriptParser::readSortKind() { 426 if (consume("SORT") || consume("SORT_BY_NAME")) 427 return SortSectionPolicy::Name; 428 if (consume("SORT_BY_ALIGNMENT")) 429 return SortSectionPolicy::Alignment; 430 if (consume("SORT_BY_INIT_PRIORITY")) 431 return SortSectionPolicy::Priority; 432 if (consume("SORT_NONE")) 433 return SortSectionPolicy::None; 434 return SortSectionPolicy::Default; 435 } 436 437 // Reads SECTIONS command contents in the following form: 438 // 439 // <contents> ::= <elem>* 440 // <elem> ::= <exclude>? <glob-pattern> 441 // <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")" 442 // 443 // For example, 444 // 445 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz) 446 // 447 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". 448 // The semantics of that is section .foo in any file, section .bar in 449 // any file but a.o, and section .baz in any file but b.o. 450 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 451 std::vector<SectionPattern> Ret; 452 while (!Error && peek() != ")") { 453 StringMatcher ExcludeFilePat; 454 if (consume("EXCLUDE_FILE")) { 455 expect("("); 456 ExcludeFilePat = readFilePatterns(); 457 } 458 459 std::vector<StringRef> V; 460 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 461 V.push_back(next()); 462 463 if (!V.empty()) 464 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 465 else 466 setError("section pattern is expected"); 467 } 468 return Ret; 469 } 470 471 // Reads contents of "SECTIONS" directive. That directive contains a 472 // list of glob patterns for input sections. The grammar is as follows. 473 // 474 // <patterns> ::= <section-list> 475 // | <sort> "(" <section-list> ")" 476 // | <sort> "(" <sort> "(" <section-list> ")" ")" 477 // 478 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 479 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 480 // 481 // <section-list> is parsed by readInputSectionsList(). 482 InputSectionDescription * 483 ScriptParser::readInputSectionRules(StringRef FilePattern) { 484 auto *Cmd = make<InputSectionDescription>(FilePattern); 485 expect("("); 486 487 while (!Error && !consume(")")) { 488 SortSectionPolicy Outer = readSortKind(); 489 SortSectionPolicy Inner = SortSectionPolicy::Default; 490 std::vector<SectionPattern> V; 491 if (Outer != SortSectionPolicy::Default) { 492 expect("("); 493 Inner = readSortKind(); 494 if (Inner != SortSectionPolicy::Default) { 495 expect("("); 496 V = readInputSectionsList(); 497 expect(")"); 498 } else { 499 V = readInputSectionsList(); 500 } 501 expect(")"); 502 } else { 503 V = readInputSectionsList(); 504 } 505 506 for (SectionPattern &Pat : V) { 507 Pat.SortInner = Inner; 508 Pat.SortOuter = Outer; 509 } 510 511 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 512 } 513 return Cmd; 514 } 515 516 InputSectionDescription * 517 ScriptParser::readInputSectionDescription(StringRef Tok) { 518 // Input section wildcard can be surrounded by KEEP. 519 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 520 if (Tok == "KEEP") { 521 expect("("); 522 StringRef FilePattern = next(); 523 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 524 expect(")"); 525 Script->Opt.KeptSections.push_back(Cmd); 526 return Cmd; 527 } 528 return readInputSectionRules(Tok); 529 } 530 531 void ScriptParser::readSort() { 532 expect("("); 533 expect("CONSTRUCTORS"); 534 expect(")"); 535 } 536 537 AssertCommand *ScriptParser::readAssert() { 538 return make<AssertCommand>(readAssertExpr()); 539 } 540 541 Expr ScriptParser::readAssertExpr() { 542 expect("("); 543 Expr E = readExpr(); 544 expect(","); 545 StringRef Msg = unquote(next()); 546 expect(")"); 547 548 return [=] { 549 if (!E().getValue()) 550 error(Msg); 551 return Script->getDot(); 552 }; 553 } 554 555 // Reads a FILL(expr) command. We handle the FILL command as an 556 // alias for =fillexp section attribute, which is different from 557 // what GNU linkers do. 558 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 559 uint32_t ScriptParser::readFill() { 560 expect("("); 561 uint32_t V = parseFill(next()); 562 expect(")"); 563 return V; 564 } 565 566 OutputSectionCommand * 567 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 568 OutputSectionCommand *Cmd = 569 Script->createOutputSectionCommand(OutSec, getCurrentLocation()); 570 571 // Read an address expression. 572 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html 573 if (peek() != ":") 574 Cmd->AddrExpr = readExpr(); 575 576 expect(":"); 577 578 if (consume("AT")) 579 Cmd->LMAExpr = readParenExpr(); 580 if (consume("ALIGN")) 581 Cmd->AlignExpr = readParenExpr(); 582 if (consume("SUBALIGN")) 583 Cmd->SubalignExpr = readParenExpr(); 584 585 // Parse constraints. 586 if (consume("ONLY_IF_RO")) 587 Cmd->Constraint = ConstraintKind::ReadOnly; 588 if (consume("ONLY_IF_RW")) 589 Cmd->Constraint = ConstraintKind::ReadWrite; 590 expect("{"); 591 592 while (!Error && !consume("}")) { 593 StringRef Tok = next(); 594 if (Tok == ";") { 595 // Empty commands are allowed. Do nothing here. 596 } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) { 597 Cmd->Commands.push_back(Assign); 598 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 599 Cmd->Commands.push_back(Data); 600 } else if (Tok == "ASSERT") { 601 Cmd->Commands.push_back(readAssert()); 602 expect(";"); 603 } else if (Tok == "CONSTRUCTORS") { 604 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 605 // by name. This is for very old file formats such as ECOFF/XCOFF. 606 // For ELF, we should ignore. 607 } else if (Tok == "FILL") { 608 Cmd->Filler = readFill(); 609 } else if (Tok == "SORT") { 610 readSort(); 611 } else if (peek() == "(") { 612 Cmd->Commands.push_back(readInputSectionDescription(Tok)); 613 } else { 614 setError("unknown command " + Tok); 615 } 616 } 617 618 if (consume(">")) 619 Cmd->MemoryRegionName = next(); 620 621 Cmd->Phdrs = readOutputSectionPhdrs(); 622 623 if (consume("=")) 624 Cmd->Filler = parseFill(next()); 625 else if (peek().startswith("=")) 626 Cmd->Filler = parseFill(next().drop_front()); 627 628 // Consume optional comma following output section command. 629 consume(","); 630 631 return Cmd; 632 } 633 634 // Parses a given string as a octal/decimal/hexadecimal number and 635 // returns it as a big-endian number. Used for `=<fillexp>`. 636 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 637 // 638 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary 639 // size, while ld.gold always handles it as a 32-bit big-endian number. 640 // We are compatible with ld.gold because it's easier to implement. 641 uint32_t ScriptParser::parseFill(StringRef Tok) { 642 uint32_t V = 0; 643 if (!to_integer(Tok, V)) 644 setError("invalid filler expression: " + Tok); 645 646 uint32_t Buf; 647 write32be(&Buf, V); 648 return Buf; 649 } 650 651 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 652 expect("("); 653 SymbolAssignment *Cmd = readAssignment(next()); 654 Cmd->Provide = Provide; 655 Cmd->Hidden = Hidden; 656 expect(")"); 657 expect(";"); 658 return Cmd; 659 } 660 661 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 662 SymbolAssignment *Cmd = nullptr; 663 if (peek() == "=" || peek() == "+=") { 664 Cmd = readAssignment(Tok); 665 expect(";"); 666 } else if (Tok == "PROVIDE") { 667 Cmd = readProvideHidden(true, false); 668 } else if (Tok == "HIDDEN") { 669 Cmd = readProvideHidden(false, true); 670 } else if (Tok == "PROVIDE_HIDDEN") { 671 Cmd = readProvideHidden(true, true); 672 } 673 return Cmd; 674 } 675 676 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 677 StringRef Op = next(); 678 assert(Op == "=" || Op == "+="); 679 Expr E = readExpr(); 680 if (Op == "+=") { 681 std::string Loc = getCurrentLocation(); 682 E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); }; 683 } 684 return make<SymbolAssignment>(Name, E, getCurrentLocation()); 685 } 686 687 // This is an operator-precedence parser to parse a linker 688 // script expression. 689 Expr ScriptParser::readExpr() { 690 // Our lexer is context-aware. Set the in-expression bit so that 691 // they apply different tokenization rules. 692 bool Orig = InExpr; 693 InExpr = true; 694 Expr E = readExpr1(readPrimary(), 0); 695 InExpr = Orig; 696 return E; 697 } 698 699 static Expr combine(StringRef Op, Expr L, Expr R) { 700 if (Op == "+") 701 return [=] { return add(L(), R()); }; 702 if (Op == "-") 703 return [=] { return sub(L(), R()); }; 704 if (Op == "*") 705 return [=] { return mul(L(), R()); }; 706 if (Op == "/") 707 return [=] { return div(L(), R()); }; 708 if (Op == "<<") 709 return [=] { return L().getValue() << R().getValue(); }; 710 if (Op == ">>") 711 return [=] { return L().getValue() >> R().getValue(); }; 712 if (Op == "<") 713 return [=] { return L().getValue() < R().getValue(); }; 714 if (Op == ">") 715 return [=] { return L().getValue() > R().getValue(); }; 716 if (Op == ">=") 717 return [=] { return L().getValue() >= R().getValue(); }; 718 if (Op == "<=") 719 return [=] { return L().getValue() <= R().getValue(); }; 720 if (Op == "==") 721 return [=] { return L().getValue() == R().getValue(); }; 722 if (Op == "!=") 723 return [=] { return L().getValue() != R().getValue(); }; 724 if (Op == "&") 725 return [=] { return bitAnd(L(), R()); }; 726 if (Op == "|") 727 return [=] { return bitOr(L(), R()); }; 728 llvm_unreachable("invalid operator"); 729 } 730 731 // This is a part of the operator-precedence parser. This function 732 // assumes that the remaining token stream starts with an operator. 733 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 734 while (!atEOF() && !Error) { 735 // Read an operator and an expression. 736 if (consume("?")) 737 return readTernary(Lhs); 738 StringRef Op1 = peek(); 739 if (precedence(Op1) < MinPrec) 740 break; 741 skip(); 742 Expr Rhs = readPrimary(); 743 744 // Evaluate the remaining part of the expression first if the 745 // next operator has greater precedence than the previous one. 746 // For example, if we have read "+" and "3", and if the next 747 // operator is "*", then we'll evaluate 3 * ... part first. 748 while (!atEOF()) { 749 StringRef Op2 = peek(); 750 if (precedence(Op2) <= precedence(Op1)) 751 break; 752 Rhs = readExpr1(Rhs, precedence(Op2)); 753 } 754 755 Lhs = combine(Op1, Lhs, Rhs); 756 } 757 return Lhs; 758 } 759 760 uint64_t static getConstant(StringRef S) { 761 if (S == "COMMONPAGESIZE") 762 return Target->PageSize; 763 if (S == "MAXPAGESIZE") 764 return Config->MaxPageSize; 765 error("unknown constant: " + S); 766 return 0; 767 } 768 769 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with 770 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may 771 // have "K" (Ki) or "M" (Mi) suffixes. 772 static Optional<uint64_t> parseInt(StringRef Tok) { 773 // Negative number 774 if (Tok.startswith("-")) { 775 if (Optional<uint64_t> Val = parseInt(Tok.substr(1))) 776 return -*Val; 777 return None; 778 } 779 780 // Hexadecimal 781 uint64_t Val; 782 if (Tok.startswith_lower("0x") && to_integer(Tok.substr(2), Val, 16)) 783 return Val; 784 if (Tok.endswith_lower("H") && to_integer(Tok.drop_back(), Val, 16)) 785 return Val; 786 787 // Decimal 788 if (Tok.endswith_lower("K")) { 789 if (!to_integer(Tok.drop_back(), Val, 10)) 790 return None; 791 return Val * 1024; 792 } 793 if (Tok.endswith_lower("M")) { 794 if (!to_integer(Tok.drop_back(), Val, 10)) 795 return None; 796 return Val * 1024 * 1024; 797 } 798 if (!to_integer(Tok, Val, 10)) 799 return None; 800 return Val; 801 } 802 803 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 804 int Size = StringSwitch<int>(Tok) 805 .Case("BYTE", 1) 806 .Case("SHORT", 2) 807 .Case("LONG", 4) 808 .Case("QUAD", 8) 809 .Default(-1); 810 if (Size == -1) 811 return nullptr; 812 813 return make<BytesDataCommand>(readParenExpr(), Size); 814 } 815 816 StringRef ScriptParser::readParenLiteral() { 817 expect("("); 818 StringRef Tok = next(); 819 expect(")"); 820 return Tok; 821 } 822 823 OutputSection *ScriptParser::checkSection(OutputSectionCommand *Cmd, 824 StringRef Location) { 825 if (Cmd->Location.empty() && Script->ErrorOnMissingSection) 826 error(Location + ": undefined section " + Cmd->Name); 827 if (Cmd->Sec) 828 return Cmd->Sec; 829 static OutputSection Dummy("", 0, 0); 830 return &Dummy; 831 } 832 833 Expr ScriptParser::readPrimary() { 834 if (peek() == "(") 835 return readParenExpr(); 836 837 if (consume("~")) { 838 Expr E = readPrimary(); 839 return [=] { return ~E().getValue(); }; 840 } 841 if (consume("-")) { 842 Expr E = readPrimary(); 843 return [=] { return -E().getValue(); }; 844 } 845 846 StringRef Tok = next(); 847 std::string Location = getCurrentLocation(); 848 849 // Built-in functions are parsed here. 850 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 851 if (Tok == "ABSOLUTE") { 852 Expr Inner = readParenExpr(); 853 return [=] { 854 ExprValue I = Inner(); 855 I.ForceAbsolute = true; 856 return I; 857 }; 858 } 859 if (Tok == "ADDR") { 860 StringRef Name = readParenLiteral(); 861 OutputSectionCommand *Cmd = Script->getOrCreateOutputSectionCommand(Name); 862 return [=]() -> ExprValue { return {checkSection(Cmd, Location), 0}; }; 863 } 864 if (Tok == "ALIGN") { 865 expect("("); 866 Expr E = readExpr(); 867 if (consume(")")) 868 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 869 expect(","); 870 Expr E2 = readExpr(); 871 expect(")"); 872 return [=] { 873 ExprValue V = E(); 874 V.Alignment = E2().getValue(); 875 return V; 876 }; 877 } 878 if (Tok == "ALIGNOF") { 879 StringRef Name = readParenLiteral(); 880 OutputSectionCommand *Cmd = Script->getOrCreateOutputSectionCommand(Name); 881 return [=] { return checkSection(Cmd, Location)->Alignment; }; 882 } 883 if (Tok == "ASSERT") 884 return readAssertExpr(); 885 if (Tok == "CONSTANT") { 886 StringRef Name = readParenLiteral(); 887 return [=] { return getConstant(Name); }; 888 } 889 if (Tok == "DATA_SEGMENT_ALIGN") { 890 expect("("); 891 Expr E = readExpr(); 892 expect(","); 893 readExpr(); 894 expect(")"); 895 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 896 } 897 if (Tok == "DATA_SEGMENT_END") { 898 expect("("); 899 expect("."); 900 expect(")"); 901 return [] { return Script->getDot(); }; 902 } 903 if (Tok == "DATA_SEGMENT_RELRO_END") { 904 // GNU linkers implements more complicated logic to handle 905 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and 906 // just align to the next page boundary for simplicity. 907 expect("("); 908 readExpr(); 909 expect(","); 910 readExpr(); 911 expect(")"); 912 return [] { return alignTo(Script->getDot(), Target->PageSize); }; 913 } 914 if (Tok == "DEFINED") { 915 StringRef Name = readParenLiteral(); 916 return [=] { return Script->isDefined(Name) ? 1 : 0; }; 917 } 918 if (Tok == "LENGTH") { 919 StringRef Name = readParenLiteral(); 920 if (Script->Opt.MemoryRegions.count(Name) == 0) 921 setError("memory region not defined: " + Name); 922 return [=] { return Script->Opt.MemoryRegions[Name].Length; }; 923 } 924 if (Tok == "LOADADDR") { 925 StringRef Name = readParenLiteral(); 926 OutputSectionCommand *Cmd = Script->getOrCreateOutputSectionCommand(Name); 927 return [=] { return checkSection(Cmd, Location)->getLMA(); }; 928 } 929 if (Tok == "ORIGIN") { 930 StringRef Name = readParenLiteral(); 931 if (Script->Opt.MemoryRegions.count(Name) == 0) 932 setError("memory region not defined: " + Name); 933 return [=] { return Script->Opt.MemoryRegions[Name].Origin; }; 934 } 935 if (Tok == "SEGMENT_START") { 936 expect("("); 937 skip(); 938 expect(","); 939 Expr E = readExpr(); 940 expect(")"); 941 return [=] { return E(); }; 942 } 943 if (Tok == "SIZEOF") { 944 StringRef Name = readParenLiteral(); 945 OutputSectionCommand *Cmd = Script->getOrCreateOutputSectionCommand(Name); 946 // Linker script does not create an output section if its content is empty. 947 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 948 // be empty. 949 return [=] { return Cmd->Sec ? Cmd->Sec->Size : 0; }; 950 } 951 if (Tok == "SIZEOF_HEADERS") 952 return [=] { return elf::getHeaderSize(); }; 953 954 // Tok is the dot. 955 if (Tok == ".") 956 return [=] { return Script->getSymbolValue(Location, Tok); }; 957 958 // Tok is a literal number. 959 if (Optional<uint64_t> Val = parseInt(Tok)) 960 return [=] { return *Val; }; 961 962 // Tok is a symbol name. 963 if (!isValidCIdentifier(Tok)) 964 setError("malformed number: " + Tok); 965 Script->Opt.ReferencedSymbols.push_back(Tok); 966 return [=] { return Script->getSymbolValue(Location, Tok); }; 967 } 968 969 Expr ScriptParser::readTernary(Expr Cond) { 970 Expr L = readExpr(); 971 expect(":"); 972 Expr R = readExpr(); 973 return [=] { return Cond().getValue() ? L() : R(); }; 974 } 975 976 Expr ScriptParser::readParenExpr() { 977 expect("("); 978 Expr E = readExpr(); 979 expect(")"); 980 return E; 981 } 982 983 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 984 std::vector<StringRef> Phdrs; 985 while (!Error && peek().startswith(":")) { 986 StringRef Tok = next(); 987 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 988 } 989 return Phdrs; 990 } 991 992 // Read a program header type name. The next token must be a 993 // name of a program header type or a constant (e.g. "0x3"). 994 unsigned ScriptParser::readPhdrType() { 995 StringRef Tok = next(); 996 if (Optional<uint64_t> Val = parseInt(Tok)) 997 return *Val; 998 999 unsigned Ret = StringSwitch<unsigned>(Tok) 1000 .Case("PT_NULL", PT_NULL) 1001 .Case("PT_LOAD", PT_LOAD) 1002 .Case("PT_DYNAMIC", PT_DYNAMIC) 1003 .Case("PT_INTERP", PT_INTERP) 1004 .Case("PT_NOTE", PT_NOTE) 1005 .Case("PT_SHLIB", PT_SHLIB) 1006 .Case("PT_PHDR", PT_PHDR) 1007 .Case("PT_TLS", PT_TLS) 1008 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1009 .Case("PT_GNU_STACK", PT_GNU_STACK) 1010 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1011 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1012 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1013 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1014 .Default(-1); 1015 1016 if (Ret == (unsigned)-1) { 1017 setError("invalid program header type: " + Tok); 1018 return PT_NULL; 1019 } 1020 return Ret; 1021 } 1022 1023 // Reads an anonymous version declaration. 1024 void ScriptParser::readAnonymousDeclaration() { 1025 std::vector<SymbolVersion> Locals; 1026 std::vector<SymbolVersion> Globals; 1027 std::tie(Locals, Globals) = readSymbols(); 1028 1029 for (SymbolVersion V : Locals) { 1030 if (V.Name == "*") 1031 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1032 else 1033 Config->VersionScriptLocals.push_back(V); 1034 } 1035 1036 for (SymbolVersion V : Globals) 1037 Config->VersionScriptGlobals.push_back(V); 1038 1039 expect(";"); 1040 } 1041 1042 // Reads a non-anonymous version definition, 1043 // e.g. "VerStr { global: foo; bar; local: *; };". 1044 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1045 // Read a symbol list. 1046 std::vector<SymbolVersion> Locals; 1047 std::vector<SymbolVersion> Globals; 1048 std::tie(Locals, Globals) = readSymbols(); 1049 1050 for (SymbolVersion V : Locals) { 1051 if (V.Name == "*") 1052 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1053 else 1054 Config->VersionScriptLocals.push_back(V); 1055 } 1056 1057 // Create a new version definition and add that to the global symbols. 1058 VersionDefinition Ver; 1059 Ver.Name = VerStr; 1060 Ver.Globals = Globals; 1061 1062 // User-defined version number starts from 2 because 0 and 1 are 1063 // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. 1064 Ver.Id = Config->VersionDefinitions.size() + 2; 1065 Config->VersionDefinitions.push_back(Ver); 1066 1067 // Each version may have a parent version. For example, "Ver2" 1068 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1069 // as a parent. This version hierarchy is, probably against your 1070 // instinct, purely for hint; the runtime doesn't care about it 1071 // at all. In LLD, we simply ignore it. 1072 if (peek() != ";") 1073 skip(); 1074 expect(";"); 1075 } 1076 1077 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1078 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1079 ScriptParser::readSymbols() { 1080 std::vector<SymbolVersion> Locals; 1081 std::vector<SymbolVersion> Globals; 1082 std::vector<SymbolVersion> *V = &Globals; 1083 1084 while (!Error) { 1085 if (consume("}")) 1086 break; 1087 if (consumeLabel("local")) { 1088 V = &Locals; 1089 continue; 1090 } 1091 if (consumeLabel("global")) { 1092 V = &Globals; 1093 continue; 1094 } 1095 1096 if (consume("extern")) { 1097 std::vector<SymbolVersion> Ext = readVersionExtern(); 1098 V->insert(V->end(), Ext.begin(), Ext.end()); 1099 } else { 1100 StringRef Tok = next(); 1101 V->push_back({unquote(Tok), false, hasWildcard(Tok)}); 1102 } 1103 expect(";"); 1104 } 1105 return {Locals, Globals}; 1106 } 1107 1108 // Reads an "extern C++" directive, e.g., 1109 // "extern "C++" { ns::*; "f(int, double)"; };" 1110 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 1111 StringRef Tok = next(); 1112 bool IsCXX = Tok == "\"C++\""; 1113 if (!IsCXX && Tok != "\"C\"") 1114 setError("Unknown language"); 1115 expect("{"); 1116 1117 std::vector<SymbolVersion> Ret; 1118 while (!Error && peek() != "}") { 1119 StringRef Tok = next(); 1120 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 1121 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 1122 expect(";"); 1123 } 1124 1125 expect("}"); 1126 return Ret; 1127 } 1128 1129 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2, 1130 StringRef S3) { 1131 if (!consume(S1) && !consume(S2) && !consume(S3)) { 1132 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 1133 return 0; 1134 } 1135 expect("="); 1136 return readExpr()().getValue(); 1137 } 1138 1139 // Parse the MEMORY command as specified in: 1140 // https://sourceware.org/binutils/docs/ld/MEMORY.html 1141 // 1142 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 1143 void ScriptParser::readMemory() { 1144 expect("{"); 1145 while (!Error && !consume("}")) { 1146 StringRef Name = next(); 1147 1148 uint32_t Flags = 0; 1149 uint32_t NegFlags = 0; 1150 if (consume("(")) { 1151 std::tie(Flags, NegFlags) = readMemoryAttributes(); 1152 expect(")"); 1153 } 1154 expect(":"); 1155 1156 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 1157 expect(","); 1158 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 1159 1160 // Add the memory region to the region map (if it doesn't already exist). 1161 auto It = Script->Opt.MemoryRegions.find(Name); 1162 if (It != Script->Opt.MemoryRegions.end()) 1163 setError("region '" + Name + "' already defined"); 1164 else 1165 Script->Opt.MemoryRegions[Name] = {Name, Origin, Length, 1166 Origin, Flags, NegFlags}; 1167 } 1168 } 1169 1170 // This function parses the attributes used to match against section 1171 // flags when placing output sections in a memory region. These flags 1172 // are only used when an explicit memory region name is not used. 1173 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 1174 uint32_t Flags = 0; 1175 uint32_t NegFlags = 0; 1176 bool Invert = false; 1177 1178 for (char C : next().lower()) { 1179 uint32_t Flag = 0; 1180 if (C == '!') 1181 Invert = !Invert; 1182 else if (C == 'w') 1183 Flag = SHF_WRITE; 1184 else if (C == 'x') 1185 Flag = SHF_EXECINSTR; 1186 else if (C == 'a') 1187 Flag = SHF_ALLOC; 1188 else if (C != 'r') 1189 setError("invalid memory region attribute"); 1190 1191 if (Invert) 1192 NegFlags |= Flag; 1193 else 1194 Flags |= Flag; 1195 } 1196 return {Flags, NegFlags}; 1197 } 1198 1199 void elf::readLinkerScript(MemoryBufferRef MB) { 1200 ScriptParser(MB).readLinkerScript(); 1201 } 1202 1203 void elf::readVersionScript(MemoryBufferRef MB) { 1204 ScriptParser(MB).readVersionScript(); 1205 } 1206 1207 void elf::readDynamicList(MemoryBufferRef MB) { 1208 ScriptParser(MB).readDynamicList(); 1209 } 1210