1 //===- ScriptParser.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a recursive-descendent parser for linker scripts. 11 // Parsed results are stored to Config and Script global objects. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ScriptParser.h" 16 #include "Config.h" 17 #include "Driver.h" 18 #include "InputSection.h" 19 #include "LinkerScript.h" 20 #include "Memory.h" 21 #include "OutputSections.h" 22 #include "ScriptLexer.h" 23 #include "Symbols.h" 24 #include "Target.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/StringSwitch.h" 28 #include "llvm/Support/Casting.h" 29 #include "llvm/Support/ELF.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/FileSystem.h" 32 #include "llvm/Support/Path.h" 33 #include <cassert> 34 #include <limits> 35 #include <vector> 36 37 using namespace llvm; 38 using namespace llvm::ELF; 39 using namespace llvm::support::endian; 40 using namespace lld; 41 using namespace lld::elf; 42 43 static bool isUnderSysroot(StringRef Path); 44 45 namespace { 46 class ScriptParser final : ScriptLexer { 47 public: 48 ScriptParser(MemoryBufferRef MB) 49 : ScriptLexer(MB), 50 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 51 52 void readLinkerScript(); 53 void readVersionScript(); 54 void readDynamicList(); 55 56 private: 57 void addFile(StringRef Path); 58 59 void readAsNeeded(); 60 void readEntry(); 61 void readExtern(); 62 void readGroup(); 63 void readInclude(); 64 void readMemory(); 65 void readOutput(); 66 void readOutputArch(); 67 void readOutputFormat(); 68 void readPhdrs(); 69 void readSearchDir(); 70 void readSections(); 71 void readVersion(); 72 void readVersionScriptCommand(); 73 74 SymbolAssignment *readAssignment(StringRef Name); 75 BytesDataCommand *readBytesDataCommand(StringRef Tok); 76 uint32_t readFill(); 77 uint32_t parseFill(StringRef Tok); 78 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 79 std::vector<StringRef> readOutputSectionPhdrs(); 80 InputSectionDescription *readInputSectionDescription(StringRef Tok); 81 StringMatcher readFilePatterns(); 82 std::vector<SectionPattern> readInputSectionsList(); 83 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 84 unsigned readPhdrType(); 85 SortSectionPolicy readSortKind(); 86 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 87 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 88 void readSort(); 89 AssertCommand *readAssert(); 90 Expr readAssertExpr(); 91 92 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 93 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 94 95 Expr readExpr(); 96 Expr readExpr1(Expr Lhs, int MinPrec); 97 StringRef readParenLiteral(); 98 Expr readPrimary(); 99 Expr readTernary(Expr Cond); 100 Expr readParenExpr(); 101 102 // For parsing version script. 103 std::vector<SymbolVersion> readVersionExtern(); 104 void readAnonymousDeclaration(); 105 void readVersionDeclaration(StringRef VerStr); 106 107 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 108 readSymbols(); 109 110 bool IsUnderSysroot; 111 }; 112 } // namespace 113 114 static bool isUnderSysroot(StringRef Path) { 115 if (Config->Sysroot == "") 116 return false; 117 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 118 if (sys::fs::equivalent(Config->Sysroot, Path)) 119 return true; 120 return false; 121 } 122 123 // Some operations only support one non absolute value. Move the 124 // absolute one to the right hand side for convenience. 125 static void moveAbsRight(ExprValue &A, ExprValue &B) { 126 if (A.isAbsolute()) 127 std::swap(A, B); 128 if (!B.isAbsolute()) 129 error("At least one side of the expression must be absolute"); 130 } 131 132 static ExprValue add(ExprValue A, ExprValue B) { 133 moveAbsRight(A, B); 134 return {A.Sec, A.ForceAbsolute, A.Val + B.getValue()}; 135 } 136 137 static ExprValue sub(ExprValue A, ExprValue B) { 138 return {A.Sec, A.Val - B.getValue()}; 139 } 140 141 static ExprValue mul(ExprValue A, ExprValue B) { 142 return A.getValue() * B.getValue(); 143 } 144 145 static ExprValue div(ExprValue A, ExprValue B) { 146 if (uint64_t BV = B.getValue()) 147 return A.getValue() / BV; 148 error("division by zero"); 149 return 0; 150 } 151 152 static ExprValue bitAnd(ExprValue A, ExprValue B) { 153 moveAbsRight(A, B); 154 return {A.Sec, A.ForceAbsolute, 155 (A.getValue() & B.getValue()) - A.getSecAddr()}; 156 } 157 158 static ExprValue bitOr(ExprValue A, ExprValue B) { 159 moveAbsRight(A, B); 160 return {A.Sec, A.ForceAbsolute, 161 (A.getValue() | B.getValue()) - A.getSecAddr()}; 162 } 163 164 void ScriptParser::readDynamicList() { 165 expect("{"); 166 readAnonymousDeclaration(); 167 if (!atEOF()) 168 setError("EOF expected, but got " + next()); 169 } 170 171 void ScriptParser::readVersionScript() { 172 readVersionScriptCommand(); 173 if (!atEOF()) 174 setError("EOF expected, but got " + next()); 175 } 176 177 void ScriptParser::readVersionScriptCommand() { 178 if (consume("{")) { 179 readAnonymousDeclaration(); 180 return; 181 } 182 183 while (!atEOF() && !Error && peek() != "}") { 184 StringRef VerStr = next(); 185 if (VerStr == "{") { 186 setError("anonymous version definition is used in " 187 "combination with other version definitions"); 188 return; 189 } 190 expect("{"); 191 readVersionDeclaration(VerStr); 192 } 193 } 194 195 void ScriptParser::readVersion() { 196 expect("{"); 197 readVersionScriptCommand(); 198 expect("}"); 199 } 200 201 void ScriptParser::readLinkerScript() { 202 while (!atEOF()) { 203 StringRef Tok = next(); 204 if (Tok == ";") 205 continue; 206 207 if (Tok == "ASSERT") { 208 Script->Opt.Commands.push_back(readAssert()); 209 } else if (Tok == "ENTRY") { 210 readEntry(); 211 } else if (Tok == "EXTERN") { 212 readExtern(); 213 } else if (Tok == "GROUP" || Tok == "INPUT") { 214 readGroup(); 215 } else if (Tok == "INCLUDE") { 216 readInclude(); 217 } else if (Tok == "MEMORY") { 218 readMemory(); 219 } else if (Tok == "OUTPUT") { 220 readOutput(); 221 } else if (Tok == "OUTPUT_ARCH") { 222 readOutputArch(); 223 } else if (Tok == "OUTPUT_FORMAT") { 224 readOutputFormat(); 225 } else if (Tok == "PHDRS") { 226 readPhdrs(); 227 } else if (Tok == "SEARCH_DIR") { 228 readSearchDir(); 229 } else if (Tok == "SECTIONS") { 230 readSections(); 231 } else if (Tok == "VERSION") { 232 readVersion(); 233 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 234 Script->Opt.Commands.push_back(Cmd); 235 } else { 236 setError("unknown directive: " + Tok); 237 } 238 } 239 } 240 241 void ScriptParser::addFile(StringRef S) { 242 if (IsUnderSysroot && S.startswith("/")) { 243 SmallString<128> PathData; 244 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 245 if (sys::fs::exists(Path)) { 246 Driver->addFile(Saver.save(Path), /*WithLOption=*/false); 247 return; 248 } 249 } 250 251 if (sys::path::is_absolute(S)) { 252 Driver->addFile(S, /*WithLOption=*/false); 253 } else if (S.startswith("=")) { 254 if (Config->Sysroot.empty()) 255 Driver->addFile(S.substr(1), /*WithLOption=*/false); 256 else 257 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)), 258 /*WithLOption=*/false); 259 } else if (S.startswith("-l")) { 260 Driver->addLibrary(S.substr(2)); 261 } else if (sys::fs::exists(S)) { 262 Driver->addFile(S, /*WithLOption=*/false); 263 } else { 264 if (Optional<std::string> Path = findFromSearchPaths(S)) 265 Driver->addFile(Saver.save(*Path), /*WithLOption=*/true); 266 else 267 setError("unable to find " + S); 268 } 269 } 270 271 void ScriptParser::readAsNeeded() { 272 expect("("); 273 bool Orig = Config->AsNeeded; 274 Config->AsNeeded = true; 275 while (!Error && !consume(")")) 276 addFile(unquote(next())); 277 Config->AsNeeded = Orig; 278 } 279 280 void ScriptParser::readEntry() { 281 // -e <symbol> takes predecence over ENTRY(<symbol>). 282 expect("("); 283 StringRef Tok = next(); 284 if (Config->Entry.empty()) 285 Config->Entry = Tok; 286 expect(")"); 287 } 288 289 void ScriptParser::readExtern() { 290 expect("("); 291 while (!Error && !consume(")")) 292 Config->Undefined.push_back(next()); 293 } 294 295 void ScriptParser::readGroup() { 296 expect("("); 297 while (!Error && !consume(")")) { 298 if (consume("AS_NEEDED")) 299 readAsNeeded(); 300 else 301 addFile(unquote(next())); 302 } 303 } 304 305 void ScriptParser::readInclude() { 306 StringRef Tok = unquote(next()); 307 308 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 309 // The file will be searched for in the current directory, and in any 310 // directory specified with the -L option. 311 if (sys::fs::exists(Tok)) { 312 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 313 tokenize(*MB); 314 return; 315 } 316 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 317 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 318 tokenize(*MB); 319 return; 320 } 321 setError("cannot open " + Tok); 322 } 323 324 void ScriptParser::readOutput() { 325 // -o <file> takes predecence over OUTPUT(<file>). 326 expect("("); 327 StringRef Tok = next(); 328 if (Config->OutputFile.empty()) 329 Config->OutputFile = unquote(Tok); 330 expect(")"); 331 } 332 333 void ScriptParser::readOutputArch() { 334 // OUTPUT_ARCH is ignored for now. 335 expect("("); 336 while (!Error && !consume(")")) 337 skip(); 338 } 339 340 void ScriptParser::readOutputFormat() { 341 // Error checking only for now. 342 expect("("); 343 skip(); 344 if (consume(")")) 345 return; 346 expect(","); 347 skip(); 348 expect(","); 349 skip(); 350 expect(")"); 351 } 352 353 void ScriptParser::readPhdrs() { 354 expect("{"); 355 while (!Error && !consume("}")) { 356 Script->Opt.PhdrsCommands.push_back( 357 {next(), PT_NULL, false, false, UINT_MAX, nullptr}); 358 359 PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back(); 360 PhdrCmd.Type = readPhdrType(); 361 362 while (!Error && !consume(";")) { 363 if (consume("FILEHDR")) 364 PhdrCmd.HasFilehdr = true; 365 else if (consume("PHDRS")) 366 PhdrCmd.HasPhdrs = true; 367 else if (consume("AT")) 368 PhdrCmd.LMAExpr = readParenExpr(); 369 else if (consume("FLAGS")) 370 PhdrCmd.Flags = readParenExpr()().getValue(); 371 else 372 setError("unexpected header attribute: " + next()); 373 } 374 } 375 } 376 377 void ScriptParser::readSearchDir() { 378 expect("("); 379 StringRef Tok = next(); 380 if (!Config->Nostdlib) 381 Config->SearchPaths.push_back(unquote(Tok)); 382 expect(")"); 383 } 384 385 void ScriptParser::readSections() { 386 Script->Opt.HasSections = true; 387 388 // -no-rosegment is used to avoid placing read only non-executable sections in 389 // their own segment. We do the same if SECTIONS command is present in linker 390 // script. See comment for computeFlags(). 391 Config->SingleRoRx = true; 392 393 expect("{"); 394 while (!Error && !consume("}")) { 395 StringRef Tok = next(); 396 BaseCommand *Cmd = readProvideOrAssignment(Tok); 397 if (!Cmd) { 398 if (Tok == "ASSERT") 399 Cmd = readAssert(); 400 else 401 Cmd = readOutputSectionDescription(Tok); 402 } 403 Script->Opt.Commands.push_back(Cmd); 404 } 405 } 406 407 static int precedence(StringRef Op) { 408 return StringSwitch<int>(Op) 409 .Cases("*", "/", 5) 410 .Cases("+", "-", 4) 411 .Cases("<<", ">>", 3) 412 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 413 .Cases("&", "|", 1) 414 .Default(-1); 415 } 416 417 StringMatcher ScriptParser::readFilePatterns() { 418 std::vector<StringRef> V; 419 while (!Error && !consume(")")) 420 V.push_back(next()); 421 return StringMatcher(V); 422 } 423 424 SortSectionPolicy ScriptParser::readSortKind() { 425 if (consume("SORT") || consume("SORT_BY_NAME")) 426 return SortSectionPolicy::Name; 427 if (consume("SORT_BY_ALIGNMENT")) 428 return SortSectionPolicy::Alignment; 429 if (consume("SORT_BY_INIT_PRIORITY")) 430 return SortSectionPolicy::Priority; 431 if (consume("SORT_NONE")) 432 return SortSectionPolicy::None; 433 return SortSectionPolicy::Default; 434 } 435 436 // Reads SECTIONS command contents in the following form: 437 // 438 // <contents> ::= <elem>* 439 // <elem> ::= <exclude>? <glob-pattern> 440 // <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")" 441 // 442 // For example, 443 // 444 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz) 445 // 446 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". 447 // The semantics of that is section .foo in any file, section .bar in 448 // any file but a.o, and section .baz in any file but b.o. 449 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 450 std::vector<SectionPattern> Ret; 451 while (!Error && peek() != ")") { 452 StringMatcher ExcludeFilePat; 453 if (consume("EXCLUDE_FILE")) { 454 expect("("); 455 ExcludeFilePat = readFilePatterns(); 456 } 457 458 std::vector<StringRef> V; 459 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 460 V.push_back(next()); 461 462 if (!V.empty()) 463 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 464 else 465 setError("section pattern is expected"); 466 } 467 return Ret; 468 } 469 470 // Reads contents of "SECTIONS" directive. That directive contains a 471 // list of glob patterns for input sections. The grammar is as follows. 472 // 473 // <patterns> ::= <section-list> 474 // | <sort> "(" <section-list> ")" 475 // | <sort> "(" <sort> "(" <section-list> ")" ")" 476 // 477 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 478 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 479 // 480 // <section-list> is parsed by readInputSectionsList(). 481 InputSectionDescription * 482 ScriptParser::readInputSectionRules(StringRef FilePattern) { 483 auto *Cmd = make<InputSectionDescription>(FilePattern); 484 expect("("); 485 486 while (!Error && !consume(")")) { 487 SortSectionPolicy Outer = readSortKind(); 488 SortSectionPolicy Inner = SortSectionPolicy::Default; 489 std::vector<SectionPattern> V; 490 if (Outer != SortSectionPolicy::Default) { 491 expect("("); 492 Inner = readSortKind(); 493 if (Inner != SortSectionPolicy::Default) { 494 expect("("); 495 V = readInputSectionsList(); 496 expect(")"); 497 } else { 498 V = readInputSectionsList(); 499 } 500 expect(")"); 501 } else { 502 V = readInputSectionsList(); 503 } 504 505 for (SectionPattern &Pat : V) { 506 Pat.SortInner = Inner; 507 Pat.SortOuter = Outer; 508 } 509 510 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 511 } 512 return Cmd; 513 } 514 515 InputSectionDescription * 516 ScriptParser::readInputSectionDescription(StringRef Tok) { 517 // Input section wildcard can be surrounded by KEEP. 518 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 519 if (Tok == "KEEP") { 520 expect("("); 521 StringRef FilePattern = next(); 522 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 523 expect(")"); 524 Script->Opt.KeptSections.push_back(Cmd); 525 return Cmd; 526 } 527 return readInputSectionRules(Tok); 528 } 529 530 void ScriptParser::readSort() { 531 expect("("); 532 expect("CONSTRUCTORS"); 533 expect(")"); 534 } 535 536 AssertCommand *ScriptParser::readAssert() { 537 return make<AssertCommand>(readAssertExpr()); 538 } 539 540 Expr ScriptParser::readAssertExpr() { 541 expect("("); 542 Expr E = readExpr(); 543 expect(","); 544 StringRef Msg = unquote(next()); 545 expect(")"); 546 547 return [=] { 548 if (!E().getValue()) 549 error(Msg); 550 return Script->getDot(); 551 }; 552 } 553 554 // Reads a FILL(expr) command. We handle the FILL command as an 555 // alias for =fillexp section attribute, which is different from 556 // what GNU linkers do. 557 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 558 uint32_t ScriptParser::readFill() { 559 expect("("); 560 uint32_t V = parseFill(next()); 561 expect(")"); 562 return V; 563 } 564 565 OutputSectionCommand * 566 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 567 OutputSectionCommand *Cmd = make<OutputSectionCommand>(OutSec); 568 Cmd->Location = getCurrentLocation(); 569 570 // Read an address expression. 571 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html 572 if (peek() != ":") 573 Cmd->AddrExpr = readExpr(); 574 575 expect(":"); 576 577 if (consume("AT")) 578 Cmd->LMAExpr = readParenExpr(); 579 if (consume("ALIGN")) 580 Cmd->AlignExpr = readParenExpr(); 581 if (consume("SUBALIGN")) 582 Cmd->SubalignExpr = readParenExpr(); 583 584 // Parse constraints. 585 if (consume("ONLY_IF_RO")) 586 Cmd->Constraint = ConstraintKind::ReadOnly; 587 if (consume("ONLY_IF_RW")) 588 Cmd->Constraint = ConstraintKind::ReadWrite; 589 expect("{"); 590 591 while (!Error && !consume("}")) { 592 StringRef Tok = next(); 593 if (Tok == ";") { 594 // Empty commands are allowed. Do nothing here. 595 } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) { 596 Cmd->Commands.push_back(Assign); 597 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 598 Cmd->Commands.push_back(Data); 599 } else if (Tok == "ASSERT") { 600 Cmd->Commands.push_back(readAssert()); 601 expect(";"); 602 } else if (Tok == "CONSTRUCTORS") { 603 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 604 // by name. This is for very old file formats such as ECOFF/XCOFF. 605 // For ELF, we should ignore. 606 } else if (Tok == "FILL") { 607 Cmd->Filler = readFill(); 608 } else if (Tok == "SORT") { 609 readSort(); 610 } else if (peek() == "(") { 611 Cmd->Commands.push_back(readInputSectionDescription(Tok)); 612 } else { 613 setError("unknown command " + Tok); 614 } 615 } 616 617 if (consume(">")) 618 Cmd->MemoryRegionName = next(); 619 620 Cmd->Phdrs = readOutputSectionPhdrs(); 621 622 if (consume("=")) 623 Cmd->Filler = parseFill(next()); 624 else if (peek().startswith("=")) 625 Cmd->Filler = parseFill(next().drop_front()); 626 627 // Consume optional comma following output section command. 628 consume(","); 629 630 return Cmd; 631 } 632 633 // Parses a given string as a octal/decimal/hexadecimal number and 634 // returns it as a big-endian number. Used for `=<fillexp>`. 635 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 636 // 637 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary 638 // size, while ld.gold always handles it as a 32-bit big-endian number. 639 // We are compatible with ld.gold because it's easier to implement. 640 uint32_t ScriptParser::parseFill(StringRef Tok) { 641 uint32_t V = 0; 642 if (Tok.getAsInteger(0, V)) 643 setError("invalid filler expression: " + Tok); 644 645 uint32_t Buf; 646 write32be(&Buf, V); 647 return Buf; 648 } 649 650 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 651 expect("("); 652 SymbolAssignment *Cmd = readAssignment(next()); 653 Cmd->Provide = Provide; 654 Cmd->Hidden = Hidden; 655 expect(")"); 656 expect(";"); 657 return Cmd; 658 } 659 660 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 661 SymbolAssignment *Cmd = nullptr; 662 if (peek() == "=" || peek() == "+=") { 663 Cmd = readAssignment(Tok); 664 expect(";"); 665 } else if (Tok == "PROVIDE") { 666 Cmd = readProvideHidden(true, false); 667 } else if (Tok == "HIDDEN") { 668 Cmd = readProvideHidden(false, true); 669 } else if (Tok == "PROVIDE_HIDDEN") { 670 Cmd = readProvideHidden(true, true); 671 } 672 return Cmd; 673 } 674 675 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 676 StringRef Op = next(); 677 assert(Op == "=" || Op == "+="); 678 Expr E = readExpr(); 679 if (Op == "+=") { 680 std::string Loc = getCurrentLocation(); 681 E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); }; 682 } 683 return make<SymbolAssignment>(Name, E, getCurrentLocation()); 684 } 685 686 // This is an operator-precedence parser to parse a linker 687 // script expression. 688 Expr ScriptParser::readExpr() { 689 // Our lexer is context-aware. Set the in-expression bit so that 690 // they apply different tokenization rules. 691 bool Orig = InExpr; 692 InExpr = true; 693 Expr E = readExpr1(readPrimary(), 0); 694 InExpr = Orig; 695 return E; 696 } 697 698 static Expr combine(StringRef Op, Expr L, Expr R) { 699 if (Op == "+") 700 return [=] { return add(L(), R()); }; 701 if (Op == "-") 702 return [=] { return sub(L(), R()); }; 703 if (Op == "*") 704 return [=] { return mul(L(), R()); }; 705 if (Op == "/") 706 return [=] { return div(L(), R()); }; 707 if (Op == "<<") 708 return [=] { return L().getValue() << R().getValue(); }; 709 if (Op == ">>") 710 return [=] { return L().getValue() >> R().getValue(); }; 711 if (Op == "<") 712 return [=] { return L().getValue() < R().getValue(); }; 713 if (Op == ">") 714 return [=] { return L().getValue() > R().getValue(); }; 715 if (Op == ">=") 716 return [=] { return L().getValue() >= R().getValue(); }; 717 if (Op == "<=") 718 return [=] { return L().getValue() <= R().getValue(); }; 719 if (Op == "==") 720 return [=] { return L().getValue() == R().getValue(); }; 721 if (Op == "!=") 722 return [=] { return L().getValue() != R().getValue(); }; 723 if (Op == "&") 724 return [=] { return bitAnd(L(), R()); }; 725 if (Op == "|") 726 return [=] { return bitOr(L(), R()); }; 727 llvm_unreachable("invalid operator"); 728 } 729 730 // This is a part of the operator-precedence parser. This function 731 // assumes that the remaining token stream starts with an operator. 732 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 733 while (!atEOF() && !Error) { 734 // Read an operator and an expression. 735 if (consume("?")) 736 return readTernary(Lhs); 737 StringRef Op1 = peek(); 738 if (precedence(Op1) < MinPrec) 739 break; 740 skip(); 741 Expr Rhs = readPrimary(); 742 743 // Evaluate the remaining part of the expression first if the 744 // next operator has greater precedence than the previous one. 745 // For example, if we have read "+" and "3", and if the next 746 // operator is "*", then we'll evaluate 3 * ... part first. 747 while (!atEOF()) { 748 StringRef Op2 = peek(); 749 if (precedence(Op2) <= precedence(Op1)) 750 break; 751 Rhs = readExpr1(Rhs, precedence(Op2)); 752 } 753 754 Lhs = combine(Op1, Lhs, Rhs); 755 } 756 return Lhs; 757 } 758 759 uint64_t static getConstant(StringRef S) { 760 if (S == "COMMONPAGESIZE") 761 return Target->PageSize; 762 if (S == "MAXPAGESIZE") 763 return Config->MaxPageSize; 764 error("unknown constant: " + S); 765 return 0; 766 } 767 768 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with 769 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may 770 // have "K" (Ki) or "M" (Mi) suffixes. 771 static Optional<uint64_t> parseInt(StringRef Tok) { 772 // Negative number 773 if (Tok.startswith("-")) { 774 if (Optional<uint64_t> Val = parseInt(Tok.substr(1))) 775 return -*Val; 776 return None; 777 } 778 779 // Hexadecimal 780 uint64_t Val; 781 if (Tok.startswith_lower("0x") && !Tok.substr(2).getAsInteger(16, Val)) 782 return Val; 783 if (Tok.endswith_lower("H") && !Tok.drop_back().getAsInteger(16, Val)) 784 return Val; 785 786 // Decimal 787 if (Tok.endswith_lower("K")) { 788 if (Tok.drop_back().getAsInteger(10, Val)) 789 return None; 790 return Val * 1024; 791 } 792 if (Tok.endswith_lower("M")) { 793 if (Tok.drop_back().getAsInteger(10, Val)) 794 return None; 795 return Val * 1024 * 1024; 796 } 797 if (Tok.getAsInteger(10, Val)) 798 return None; 799 return Val; 800 } 801 802 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 803 int Size = StringSwitch<int>(Tok) 804 .Case("BYTE", 1) 805 .Case("SHORT", 2) 806 .Case("LONG", 4) 807 .Case("QUAD", 8) 808 .Default(-1); 809 if (Size == -1) 810 return nullptr; 811 812 return make<BytesDataCommand>(readParenExpr(), Size); 813 } 814 815 StringRef ScriptParser::readParenLiteral() { 816 expect("("); 817 StringRef Tok = next(); 818 expect(")"); 819 return Tok; 820 } 821 822 Expr ScriptParser::readPrimary() { 823 if (peek() == "(") 824 return readParenExpr(); 825 826 if (consume("~")) { 827 Expr E = readPrimary(); 828 return [=] { return ~E().getValue(); }; 829 } 830 if (consume("-")) { 831 Expr E = readPrimary(); 832 return [=] { return -E().getValue(); }; 833 } 834 835 StringRef Tok = next(); 836 std::string Location = getCurrentLocation(); 837 838 // Built-in functions are parsed here. 839 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 840 if (Tok == "ABSOLUTE") { 841 Expr Inner = readParenExpr(); 842 return [=] { 843 ExprValue I = Inner(); 844 I.ForceAbsolute = true; 845 return I; 846 }; 847 } 848 if (Tok == "ADDR") { 849 StringRef Name = readParenLiteral(); 850 return [=]() -> ExprValue { 851 return {Script->getOutputSection(Location, Name), 0}; 852 }; 853 } 854 if (Tok == "ALIGN") { 855 expect("("); 856 Expr E = readExpr(); 857 if (consume(")")) 858 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 859 expect(","); 860 Expr E2 = readExpr(); 861 expect(")"); 862 return [=] { return alignTo(E().getValue(), E2().getValue()); }; 863 } 864 if (Tok == "ALIGNOF") { 865 StringRef Name = readParenLiteral(); 866 return [=] { return Script->getOutputSection(Location, Name)->Alignment; }; 867 } 868 if (Tok == "ASSERT") 869 return readAssertExpr(); 870 if (Tok == "CONSTANT") { 871 StringRef Name = readParenLiteral(); 872 return [=] { return getConstant(Name); }; 873 } 874 if (Tok == "DATA_SEGMENT_ALIGN") { 875 expect("("); 876 Expr E = readExpr(); 877 expect(","); 878 readExpr(); 879 expect(")"); 880 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 881 } 882 if (Tok == "DATA_SEGMENT_END") { 883 expect("("); 884 expect("."); 885 expect(")"); 886 return [] { return Script->getDot(); }; 887 } 888 if (Tok == "DATA_SEGMENT_RELRO_END") { 889 // GNU linkers implements more complicated logic to handle 890 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and 891 // just align to the next page boundary for simplicity. 892 expect("("); 893 readExpr(); 894 expect(","); 895 readExpr(); 896 expect(")"); 897 return [] { return alignTo(Script->getDot(), Target->PageSize); }; 898 } 899 if (Tok == "DEFINED") { 900 StringRef Name = readParenLiteral(); 901 return [=] { return Script->isDefined(Name) ? 1 : 0; }; 902 } 903 if (Tok == "LENGTH") { 904 StringRef Name = readParenLiteral(); 905 if (Script->Opt.MemoryRegions.count(Name) == 0) 906 setError("memory region not defined: " + Name); 907 return [=] { return Script->Opt.MemoryRegions[Name].Length; }; 908 } 909 if (Tok == "LOADADDR") { 910 StringRef Name = readParenLiteral(); 911 return [=] { return Script->getOutputSection(Location, Name)->getLMA(); }; 912 } 913 if (Tok == "ORIGIN") { 914 StringRef Name = readParenLiteral(); 915 if (Script->Opt.MemoryRegions.count(Name) == 0) 916 setError("memory region not defined: " + Name); 917 return [=] { return Script->Opt.MemoryRegions[Name].Origin; }; 918 } 919 if (Tok == "SEGMENT_START") { 920 expect("("); 921 skip(); 922 expect(","); 923 Expr E = readExpr(); 924 expect(")"); 925 return [=] { return E(); }; 926 } 927 if (Tok == "SIZEOF") { 928 StringRef Name = readParenLiteral(); 929 return [=] { return Script->getOutputSectionSize(Name); }; 930 } 931 if (Tok == "SIZEOF_HEADERS") 932 return [=] { return elf::getHeaderSize(); }; 933 934 // Tok is the dot. 935 if (Tok == ".") 936 return [=] { return Script->getSymbolValue(Location, Tok); }; 937 938 // Tok is a literal number. 939 if (Optional<uint64_t> Val = parseInt(Tok)) 940 return [=] { return *Val; }; 941 942 // Tok is a symbol name. 943 if (!isValidCIdentifier(Tok)) 944 setError("malformed number: " + Tok); 945 Script->Opt.ReferencedSymbols.push_back(Tok); 946 return [=] { return Script->getSymbolValue(Location, Tok); }; 947 } 948 949 Expr ScriptParser::readTernary(Expr Cond) { 950 Expr L = readExpr(); 951 expect(":"); 952 Expr R = readExpr(); 953 return [=] { return Cond().getValue() ? L() : R(); }; 954 } 955 956 Expr ScriptParser::readParenExpr() { 957 expect("("); 958 Expr E = readExpr(); 959 expect(")"); 960 return E; 961 } 962 963 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 964 std::vector<StringRef> Phdrs; 965 while (!Error && peek().startswith(":")) { 966 StringRef Tok = next(); 967 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 968 } 969 return Phdrs; 970 } 971 972 // Read a program header type name. The next token must be a 973 // name of a program header type or a constant (e.g. "0x3"). 974 unsigned ScriptParser::readPhdrType() { 975 StringRef Tok = next(); 976 if (Optional<uint64_t> Val = parseInt(Tok)) 977 return *Val; 978 979 unsigned Ret = StringSwitch<unsigned>(Tok) 980 .Case("PT_NULL", PT_NULL) 981 .Case("PT_LOAD", PT_LOAD) 982 .Case("PT_DYNAMIC", PT_DYNAMIC) 983 .Case("PT_INTERP", PT_INTERP) 984 .Case("PT_NOTE", PT_NOTE) 985 .Case("PT_SHLIB", PT_SHLIB) 986 .Case("PT_PHDR", PT_PHDR) 987 .Case("PT_TLS", PT_TLS) 988 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 989 .Case("PT_GNU_STACK", PT_GNU_STACK) 990 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 991 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 992 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 993 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 994 .Default(-1); 995 996 if (Ret == (unsigned)-1) { 997 setError("invalid program header type: " + Tok); 998 return PT_NULL; 999 } 1000 return Ret; 1001 } 1002 1003 // Reads an anonymous version declaration. 1004 void ScriptParser::readAnonymousDeclaration() { 1005 std::vector<SymbolVersion> Locals; 1006 std::vector<SymbolVersion> Globals; 1007 std::tie(Locals, Globals) = readSymbols(); 1008 1009 for (SymbolVersion V : Locals) { 1010 if (V.Name == "*") 1011 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1012 else 1013 Config->VersionScriptLocals.push_back(V); 1014 } 1015 1016 for (SymbolVersion V : Globals) 1017 Config->VersionScriptGlobals.push_back(V); 1018 1019 expect(";"); 1020 } 1021 1022 // Reads a non-anonymous version definition, 1023 // e.g. "VerStr { global: foo; bar; local: *; };". 1024 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1025 // Read a symbol list. 1026 std::vector<SymbolVersion> Locals; 1027 std::vector<SymbolVersion> Globals; 1028 std::tie(Locals, Globals) = readSymbols(); 1029 1030 for (SymbolVersion V : Locals) { 1031 if (V.Name == "*") 1032 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1033 else 1034 Config->VersionScriptLocals.push_back(V); 1035 } 1036 1037 // Create a new version definition and add that to the global symbols. 1038 VersionDefinition Ver; 1039 Ver.Name = VerStr; 1040 Ver.Globals = Globals; 1041 1042 // User-defined version number starts from 2 because 0 and 1 are 1043 // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. 1044 Ver.Id = Config->VersionDefinitions.size() + 2; 1045 Config->VersionDefinitions.push_back(Ver); 1046 1047 // Each version may have a parent version. For example, "Ver2" 1048 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1049 // as a parent. This version hierarchy is, probably against your 1050 // instinct, purely for hint; the runtime doesn't care about it 1051 // at all. In LLD, we simply ignore it. 1052 if (peek() != ";") 1053 skip(); 1054 expect(";"); 1055 } 1056 1057 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1058 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1059 ScriptParser::readSymbols() { 1060 std::vector<SymbolVersion> Locals; 1061 std::vector<SymbolVersion> Globals; 1062 std::vector<SymbolVersion> *V = &Globals; 1063 1064 while (!Error) { 1065 if (consume("}")) 1066 break; 1067 if (consumeLabel("local")) { 1068 V = &Locals; 1069 continue; 1070 } 1071 if (consumeLabel("global")) { 1072 V = &Globals; 1073 continue; 1074 } 1075 1076 if (consume("extern")) { 1077 std::vector<SymbolVersion> Ext = readVersionExtern(); 1078 V->insert(V->end(), Ext.begin(), Ext.end()); 1079 } else { 1080 StringRef Tok = next(); 1081 V->push_back({unquote(Tok), false, hasWildcard(Tok)}); 1082 } 1083 expect(";"); 1084 } 1085 return {Locals, Globals}; 1086 } 1087 1088 // Reads an "extern C++" directive, e.g., 1089 // "extern "C++" { ns::*; "f(int, double)"; };" 1090 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 1091 StringRef Tok = next(); 1092 bool IsCXX = Tok == "\"C++\""; 1093 if (!IsCXX && Tok != "\"C\"") 1094 setError("Unknown language"); 1095 expect("{"); 1096 1097 std::vector<SymbolVersion> Ret; 1098 while (!Error && peek() != "}") { 1099 StringRef Tok = next(); 1100 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 1101 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 1102 expect(";"); 1103 } 1104 1105 expect("}"); 1106 return Ret; 1107 } 1108 1109 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2, 1110 StringRef S3) { 1111 if (!consume(S1) && !consume(S2) && !consume(S3)) { 1112 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 1113 return 0; 1114 } 1115 expect("="); 1116 return readExpr()().getValue(); 1117 } 1118 1119 // Parse the MEMORY command as specified in: 1120 // https://sourceware.org/binutils/docs/ld/MEMORY.html 1121 // 1122 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 1123 void ScriptParser::readMemory() { 1124 expect("{"); 1125 while (!Error && !consume("}")) { 1126 StringRef Name = next(); 1127 1128 uint32_t Flags = 0; 1129 uint32_t NegFlags = 0; 1130 if (consume("(")) { 1131 std::tie(Flags, NegFlags) = readMemoryAttributes(); 1132 expect(")"); 1133 } 1134 expect(":"); 1135 1136 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 1137 expect(","); 1138 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 1139 1140 // Add the memory region to the region map (if it doesn't already exist). 1141 auto It = Script->Opt.MemoryRegions.find(Name); 1142 if (It != Script->Opt.MemoryRegions.end()) 1143 setError("region '" + Name + "' already defined"); 1144 else 1145 Script->Opt.MemoryRegions[Name] = {Name, Origin, Length, 1146 Origin, Flags, NegFlags}; 1147 } 1148 } 1149 1150 // This function parses the attributes used to match against section 1151 // flags when placing output sections in a memory region. These flags 1152 // are only used when an explicit memory region name is not used. 1153 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 1154 uint32_t Flags = 0; 1155 uint32_t NegFlags = 0; 1156 bool Invert = false; 1157 1158 for (char C : next().lower()) { 1159 uint32_t Flag = 0; 1160 if (C == '!') 1161 Invert = !Invert; 1162 else if (C == 'w') 1163 Flag = SHF_WRITE; 1164 else if (C == 'x') 1165 Flag = SHF_EXECINSTR; 1166 else if (C == 'a') 1167 Flag = SHF_ALLOC; 1168 else if (C != 'r') 1169 setError("invalid memory region attribute"); 1170 1171 if (Invert) 1172 NegFlags |= Flag; 1173 else 1174 Flags |= Flag; 1175 } 1176 return {Flags, NegFlags}; 1177 } 1178 1179 void elf::readLinkerScript(MemoryBufferRef MB) { 1180 ScriptParser(MB).readLinkerScript(); 1181 } 1182 1183 void elf::readVersionScript(MemoryBufferRef MB) { 1184 ScriptParser(MB).readVersionScript(); 1185 } 1186 1187 void elf::readDynamicList(MemoryBufferRef MB) { 1188 ScriptParser(MB).readDynamicList(); 1189 } 1190