1 //===- ScriptParser.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a recursive-descendent parser for linker scripts. 11 // Parsed results are stored to Config and Script global objects. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ScriptParser.h" 16 #include "Config.h" 17 #include "Driver.h" 18 #include "InputSection.h" 19 #include "LinkerScript.h" 20 #include "Memory.h" 21 #include "OutputSections.h" 22 #include "ScriptLexer.h" 23 #include "Symbols.h" 24 #include "Target.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/StringSwitch.h" 28 #include "llvm/Support/Casting.h" 29 #include "llvm/Support/ELF.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/FileSystem.h" 32 #include "llvm/Support/Path.h" 33 #include <cassert> 34 #include <limits> 35 #include <vector> 36 37 using namespace llvm; 38 using namespace llvm::ELF; 39 using namespace llvm::support::endian; 40 using namespace lld; 41 using namespace lld::elf; 42 43 static bool isUnderSysroot(StringRef Path); 44 45 namespace { 46 class ScriptParser final : ScriptLexer { 47 public: 48 ScriptParser(MemoryBufferRef MB) 49 : ScriptLexer(MB), 50 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 51 52 void readLinkerScript(); 53 void readVersionScript(); 54 void readDynamicList(); 55 56 private: 57 void addFile(StringRef Path); 58 59 void readAsNeeded(); 60 void readEntry(); 61 void readExtern(); 62 void readGroup(); 63 void readInclude(); 64 void readMemory(); 65 void readOutput(); 66 void readOutputArch(); 67 void readOutputFormat(); 68 void readPhdrs(); 69 void readSearchDir(); 70 void readSections(); 71 void readVersion(); 72 void readVersionScriptCommand(); 73 74 SymbolAssignment *readAssignment(StringRef Name); 75 BytesDataCommand *readBytesDataCommand(StringRef Tok); 76 uint32_t readFill(); 77 uint32_t parseFill(StringRef Tok); 78 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 79 std::vector<StringRef> readOutputSectionPhdrs(); 80 InputSectionDescription *readInputSectionDescription(StringRef Tok); 81 StringMatcher readFilePatterns(); 82 std::vector<SectionPattern> readInputSectionsList(); 83 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 84 unsigned readPhdrType(); 85 SortSectionPolicy readSortKind(); 86 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 87 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 88 void readSort(); 89 AssertCommand *readAssert(); 90 Expr readAssertExpr(); 91 92 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 93 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 94 95 Expr readExpr(); 96 Expr readExpr1(Expr Lhs, int MinPrec); 97 StringRef readParenLiteral(); 98 Expr readPrimary(); 99 Expr readTernary(Expr Cond); 100 Expr readParenExpr(); 101 102 // For parsing version script. 103 std::vector<SymbolVersion> readVersionExtern(); 104 void readAnonymousDeclaration(); 105 void readVersionDeclaration(StringRef VerStr); 106 107 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 108 readSymbols(); 109 110 bool IsUnderSysroot; 111 }; 112 } // namespace 113 114 static bool isUnderSysroot(StringRef Path) { 115 if (Config->Sysroot == "") 116 return false; 117 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 118 if (sys::fs::equivalent(Config->Sysroot, Path)) 119 return true; 120 return false; 121 } 122 123 // Some operations only support one non absolute value. Move the 124 // absolute one to the right hand side for convenience. 125 static void moveAbsRight(ExprValue &A, ExprValue &B) { 126 if (A.isAbsolute()) 127 std::swap(A, B); 128 if (!B.isAbsolute()) 129 error("At least one side of the expression must be absolute"); 130 } 131 132 static ExprValue add(ExprValue A, ExprValue B) { 133 moveAbsRight(A, B); 134 return {A.Sec, A.ForceAbsolute, A.Val + B.getValue()}; 135 } 136 137 static ExprValue sub(ExprValue A, ExprValue B) { 138 return {A.Sec, A.Val - B.getValue()}; 139 } 140 141 static ExprValue mul(ExprValue A, ExprValue B) { 142 return A.getValue() * B.getValue(); 143 } 144 145 static ExprValue div(ExprValue A, ExprValue B) { 146 if (uint64_t BV = B.getValue()) 147 return A.getValue() / BV; 148 error("division by zero"); 149 return 0; 150 } 151 152 static ExprValue bitAnd(ExprValue A, ExprValue B) { 153 moveAbsRight(A, B); 154 return {A.Sec, A.ForceAbsolute, 155 (A.getValue() & B.getValue()) - A.getSecAddr()}; 156 } 157 158 static ExprValue bitOr(ExprValue A, ExprValue B) { 159 moveAbsRight(A, B); 160 return {A.Sec, A.ForceAbsolute, 161 (A.getValue() | B.getValue()) - A.getSecAddr()}; 162 } 163 164 void ScriptParser::readDynamicList() { 165 expect("{"); 166 readAnonymousDeclaration(); 167 if (!atEOF()) 168 setError("EOF expected, but got " + next()); 169 } 170 171 void ScriptParser::readVersionScript() { 172 readVersionScriptCommand(); 173 if (!atEOF()) 174 setError("EOF expected, but got " + next()); 175 } 176 177 void ScriptParser::readVersionScriptCommand() { 178 if (consume("{")) { 179 readAnonymousDeclaration(); 180 return; 181 } 182 183 while (!atEOF() && !Error && peek() != "}") { 184 StringRef VerStr = next(); 185 if (VerStr == "{") { 186 setError("anonymous version definition is used in " 187 "combination with other version definitions"); 188 return; 189 } 190 expect("{"); 191 readVersionDeclaration(VerStr); 192 } 193 } 194 195 void ScriptParser::readVersion() { 196 expect("{"); 197 readVersionScriptCommand(); 198 expect("}"); 199 } 200 201 void ScriptParser::readLinkerScript() { 202 while (!atEOF()) { 203 StringRef Tok = next(); 204 if (Tok == ";") 205 continue; 206 207 if (Tok == "ASSERT") { 208 Script->Opt.Commands.push_back(readAssert()); 209 } else if (Tok == "ENTRY") { 210 readEntry(); 211 } else if (Tok == "EXTERN") { 212 readExtern(); 213 } else if (Tok == "GROUP" || Tok == "INPUT") { 214 readGroup(); 215 } else if (Tok == "INCLUDE") { 216 readInclude(); 217 } else if (Tok == "MEMORY") { 218 readMemory(); 219 } else if (Tok == "OUTPUT") { 220 readOutput(); 221 } else if (Tok == "OUTPUT_ARCH") { 222 readOutputArch(); 223 } else if (Tok == "OUTPUT_FORMAT") { 224 readOutputFormat(); 225 } else if (Tok == "PHDRS") { 226 readPhdrs(); 227 } else if (Tok == "SEARCH_DIR") { 228 readSearchDir(); 229 } else if (Tok == "SECTIONS") { 230 readSections(); 231 } else if (Tok == "VERSION") { 232 readVersion(); 233 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 234 Script->Opt.Commands.push_back(Cmd); 235 } else { 236 setError("unknown directive: " + Tok); 237 } 238 } 239 } 240 241 void ScriptParser::addFile(StringRef S) { 242 if (IsUnderSysroot && S.startswith("/")) { 243 SmallString<128> PathData; 244 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 245 if (sys::fs::exists(Path)) { 246 Driver->addFile(Saver.save(Path), /*WithLOption=*/false); 247 return; 248 } 249 } 250 251 if (sys::path::is_absolute(S)) { 252 Driver->addFile(S, /*WithLOption=*/false); 253 } else if (S.startswith("=")) { 254 if (Config->Sysroot.empty()) 255 Driver->addFile(S.substr(1), /*WithLOption=*/false); 256 else 257 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)), 258 /*WithLOption=*/false); 259 } else if (S.startswith("-l")) { 260 Driver->addLibrary(S.substr(2)); 261 } else if (sys::fs::exists(S)) { 262 Driver->addFile(S, /*WithLOption=*/false); 263 } else { 264 if (Optional<std::string> Path = findFromSearchPaths(S)) 265 Driver->addFile(Saver.save(*Path), /*WithLOption=*/true); 266 else 267 setError("unable to find " + S); 268 } 269 } 270 271 void ScriptParser::readAsNeeded() { 272 expect("("); 273 bool Orig = Config->AsNeeded; 274 Config->AsNeeded = true; 275 while (!Error && !consume(")")) 276 addFile(unquote(next())); 277 Config->AsNeeded = Orig; 278 } 279 280 void ScriptParser::readEntry() { 281 // -e <symbol> takes predecence over ENTRY(<symbol>). 282 expect("("); 283 StringRef Tok = next(); 284 if (Config->Entry.empty()) 285 Config->Entry = Tok; 286 expect(")"); 287 } 288 289 void ScriptParser::readExtern() { 290 expect("("); 291 while (!Error && !consume(")")) 292 Config->Undefined.push_back(next()); 293 } 294 295 void ScriptParser::readGroup() { 296 expect("("); 297 while (!Error && !consume(")")) { 298 if (consume("AS_NEEDED")) 299 readAsNeeded(); 300 else 301 addFile(unquote(next())); 302 } 303 } 304 305 void ScriptParser::readInclude() { 306 StringRef Tok = unquote(next()); 307 308 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 309 // The file will be searched for in the current directory, and in any 310 // directory specified with the -L option. 311 if (sys::fs::exists(Tok)) { 312 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 313 tokenize(*MB); 314 return; 315 } 316 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 317 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 318 tokenize(*MB); 319 return; 320 } 321 setError("cannot open " + Tok); 322 } 323 324 void ScriptParser::readOutput() { 325 // -o <file> takes predecence over OUTPUT(<file>). 326 expect("("); 327 StringRef Tok = next(); 328 if (Config->OutputFile.empty()) 329 Config->OutputFile = unquote(Tok); 330 expect(")"); 331 } 332 333 void ScriptParser::readOutputArch() { 334 // OUTPUT_ARCH is ignored for now. 335 expect("("); 336 while (!Error && !consume(")")) 337 skip(); 338 } 339 340 void ScriptParser::readOutputFormat() { 341 // Error checking only for now. 342 expect("("); 343 skip(); 344 if (consume(")")) 345 return; 346 expect(","); 347 skip(); 348 expect(","); 349 skip(); 350 expect(")"); 351 } 352 353 void ScriptParser::readPhdrs() { 354 expect("{"); 355 while (!Error && !consume("}")) { 356 Script->Opt.PhdrsCommands.push_back( 357 {next(), PT_NULL, false, false, UINT_MAX, nullptr}); 358 359 PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back(); 360 PhdrCmd.Type = readPhdrType(); 361 362 while (!Error && !consume(";")) { 363 if (consume("FILEHDR")) 364 PhdrCmd.HasFilehdr = true; 365 else if (consume("PHDRS")) 366 PhdrCmd.HasPhdrs = true; 367 else if (consume("AT")) 368 PhdrCmd.LMAExpr = readParenExpr(); 369 else if (consume("FLAGS")) 370 PhdrCmd.Flags = readParenExpr()().getValue(); 371 else 372 setError("unexpected header attribute: " + next()); 373 } 374 } 375 } 376 377 void ScriptParser::readSearchDir() { 378 expect("("); 379 StringRef Tok = next(); 380 if (!Config->Nostdlib) 381 Config->SearchPaths.push_back(unquote(Tok)); 382 expect(")"); 383 } 384 385 void ScriptParser::readSections() { 386 Script->Opt.HasSections = true; 387 388 // -no-rosegment is used to avoid placing read only non-executable sections in 389 // their own segment. We do the same if SECTIONS command is present in linker 390 // script. See comment for computeFlags(). 391 Config->SingleRoRx = true; 392 393 expect("{"); 394 while (!Error && !consume("}")) { 395 StringRef Tok = next(); 396 BaseCommand *Cmd = readProvideOrAssignment(Tok); 397 if (!Cmd) { 398 if (Tok == "ASSERT") 399 Cmd = readAssert(); 400 else 401 Cmd = readOutputSectionDescription(Tok); 402 } 403 Script->Opt.Commands.push_back(Cmd); 404 } 405 } 406 407 static int precedence(StringRef Op) { 408 return StringSwitch<int>(Op) 409 .Cases("*", "/", 5) 410 .Cases("+", "-", 4) 411 .Cases("<<", ">>", 3) 412 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 413 .Cases("&", "|", 1) 414 .Default(-1); 415 } 416 417 StringMatcher ScriptParser::readFilePatterns() { 418 std::vector<StringRef> V; 419 while (!Error && !consume(")")) 420 V.push_back(next()); 421 return StringMatcher(V); 422 } 423 424 SortSectionPolicy ScriptParser::readSortKind() { 425 if (consume("SORT") || consume("SORT_BY_NAME")) 426 return SortSectionPolicy::Name; 427 if (consume("SORT_BY_ALIGNMENT")) 428 return SortSectionPolicy::Alignment; 429 if (consume("SORT_BY_INIT_PRIORITY")) 430 return SortSectionPolicy::Priority; 431 if (consume("SORT_NONE")) 432 return SortSectionPolicy::None; 433 return SortSectionPolicy::Default; 434 } 435 436 // Reads SECTIONS command contents in the following form: 437 // 438 // <contents> ::= <elem>* 439 // <elem> ::= <exclude>? <glob-pattern> 440 // <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")" 441 // 442 // For example, 443 // 444 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz) 445 // 446 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". 447 // The semantics of that is section .foo in any file, section .bar in 448 // any file but a.o, and section .baz in any file but b.o. 449 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 450 std::vector<SectionPattern> Ret; 451 while (!Error && peek() != ")") { 452 StringMatcher ExcludeFilePat; 453 if (consume("EXCLUDE_FILE")) { 454 expect("("); 455 ExcludeFilePat = readFilePatterns(); 456 } 457 458 std::vector<StringRef> V; 459 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 460 V.push_back(next()); 461 462 if (!V.empty()) 463 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 464 else 465 setError("section pattern is expected"); 466 } 467 return Ret; 468 } 469 470 // Reads contents of "SECTIONS" directive. That directive contains a 471 // list of glob patterns for input sections. The grammar is as follows. 472 // 473 // <patterns> ::= <section-list> 474 // | <sort> "(" <section-list> ")" 475 // | <sort> "(" <sort> "(" <section-list> ")" ")" 476 // 477 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 478 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 479 // 480 // <section-list> is parsed by readInputSectionsList(). 481 InputSectionDescription * 482 ScriptParser::readInputSectionRules(StringRef FilePattern) { 483 auto *Cmd = make<InputSectionDescription>(FilePattern); 484 expect("("); 485 486 while (!Error && !consume(")")) { 487 SortSectionPolicy Outer = readSortKind(); 488 SortSectionPolicy Inner = SortSectionPolicy::Default; 489 std::vector<SectionPattern> V; 490 if (Outer != SortSectionPolicy::Default) { 491 expect("("); 492 Inner = readSortKind(); 493 if (Inner != SortSectionPolicy::Default) { 494 expect("("); 495 V = readInputSectionsList(); 496 expect(")"); 497 } else { 498 V = readInputSectionsList(); 499 } 500 expect(")"); 501 } else { 502 V = readInputSectionsList(); 503 } 504 505 for (SectionPattern &Pat : V) { 506 Pat.SortInner = Inner; 507 Pat.SortOuter = Outer; 508 } 509 510 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 511 } 512 return Cmd; 513 } 514 515 InputSectionDescription * 516 ScriptParser::readInputSectionDescription(StringRef Tok) { 517 // Input section wildcard can be surrounded by KEEP. 518 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 519 if (Tok == "KEEP") { 520 expect("("); 521 StringRef FilePattern = next(); 522 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 523 expect(")"); 524 Script->Opt.KeptSections.push_back(Cmd); 525 return Cmd; 526 } 527 return readInputSectionRules(Tok); 528 } 529 530 void ScriptParser::readSort() { 531 expect("("); 532 expect("CONSTRUCTORS"); 533 expect(")"); 534 } 535 536 AssertCommand *ScriptParser::readAssert() { 537 return make<AssertCommand>(readAssertExpr()); 538 } 539 540 Expr ScriptParser::readAssertExpr() { 541 expect("("); 542 Expr E = readExpr(); 543 expect(","); 544 StringRef Msg = unquote(next()); 545 expect(")"); 546 547 return [=] { 548 if (!E().getValue()) 549 error(Msg); 550 return Script->getDot(); 551 }; 552 } 553 554 // Reads a FILL(expr) command. We handle the FILL command as an 555 // alias for =fillexp section attribute, which is different from 556 // what GNU linkers do. 557 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 558 uint32_t ScriptParser::readFill() { 559 expect("("); 560 uint32_t V = parseFill(next()); 561 expect(")"); 562 return V; 563 } 564 565 OutputSectionCommand * 566 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 567 OutputSectionCommand *Cmd = make<OutputSectionCommand>(OutSec); 568 Cmd->Location = getCurrentLocation(); 569 570 // Read an address expression. 571 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html 572 if (peek() != ":") 573 Cmd->AddrExpr = readExpr(); 574 575 expect(":"); 576 577 if (consume("AT")) 578 Cmd->LMAExpr = readParenExpr(); 579 if (consume("ALIGN")) 580 Cmd->AlignExpr = readParenExpr(); 581 if (consume("SUBALIGN")) 582 Cmd->SubalignExpr = readParenExpr(); 583 584 // Parse constraints. 585 if (consume("ONLY_IF_RO")) 586 Cmd->Constraint = ConstraintKind::ReadOnly; 587 if (consume("ONLY_IF_RW")) 588 Cmd->Constraint = ConstraintKind::ReadWrite; 589 expect("{"); 590 591 while (!Error && !consume("}")) { 592 StringRef Tok = next(); 593 if (Tok == ";") { 594 // Empty commands are allowed. Do nothing here. 595 } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) { 596 Cmd->Commands.push_back(Assign); 597 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 598 Cmd->Commands.push_back(Data); 599 } else if (Tok == "ASSERT") { 600 Cmd->Commands.push_back(readAssert()); 601 expect(";"); 602 } else if (Tok == "CONSTRUCTORS") { 603 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 604 // by name. This is for very old file formats such as ECOFF/XCOFF. 605 // For ELF, we should ignore. 606 } else if (Tok == "FILL") { 607 Cmd->Filler = readFill(); 608 } else if (Tok == "SORT") { 609 readSort(); 610 } else if (peek() == "(") { 611 Cmd->Commands.push_back(readInputSectionDescription(Tok)); 612 } else { 613 setError("unknown command " + Tok); 614 } 615 } 616 617 if (consume(">")) 618 Cmd->MemoryRegionName = next(); 619 620 Cmd->Phdrs = readOutputSectionPhdrs(); 621 622 if (consume("=")) 623 Cmd->Filler = parseFill(next()); 624 else if (peek().startswith("=")) 625 Cmd->Filler = parseFill(next().drop_front()); 626 627 // Consume optional comma following output section command. 628 consume(","); 629 630 return Cmd; 631 } 632 633 // Parses a given string as a octal/decimal/hexadecimal number and 634 // returns it as a big-endian number. Used for `=<fillexp>`. 635 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 636 // 637 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary 638 // size, while ld.gold always handles it as a 32-bit big-endian number. 639 // We are compatible with ld.gold because it's easier to implement. 640 uint32_t ScriptParser::parseFill(StringRef Tok) { 641 uint32_t V = 0; 642 if (Tok.getAsInteger(0, V)) 643 setError("invalid filler expression: " + Tok); 644 645 uint32_t Buf; 646 write32be(&Buf, V); 647 return Buf; 648 } 649 650 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 651 expect("("); 652 SymbolAssignment *Cmd = readAssignment(next()); 653 Cmd->Provide = Provide; 654 Cmd->Hidden = Hidden; 655 expect(")"); 656 expect(";"); 657 return Cmd; 658 } 659 660 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 661 SymbolAssignment *Cmd = nullptr; 662 if (peek() == "=" || peek() == "+=") { 663 Cmd = readAssignment(Tok); 664 expect(";"); 665 } else if (Tok == "PROVIDE") { 666 Cmd = readProvideHidden(true, false); 667 } else if (Tok == "HIDDEN") { 668 Cmd = readProvideHidden(false, true); 669 } else if (Tok == "PROVIDE_HIDDEN") { 670 Cmd = readProvideHidden(true, true); 671 } 672 return Cmd; 673 } 674 675 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 676 StringRef Op = next(); 677 assert(Op == "=" || Op == "+="); 678 Expr E = readExpr(); 679 if (Op == "+=") { 680 std::string Loc = getCurrentLocation(); 681 E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); }; 682 } 683 return make<SymbolAssignment>(Name, E, getCurrentLocation()); 684 } 685 686 // This is an operator-precedence parser to parse a linker 687 // script expression. 688 Expr ScriptParser::readExpr() { 689 // Our lexer is context-aware. Set the in-expression bit so that 690 // they apply different tokenization rules. 691 bool Orig = InExpr; 692 InExpr = true; 693 Expr E = readExpr1(readPrimary(), 0); 694 InExpr = Orig; 695 return E; 696 } 697 698 static Expr combine(StringRef Op, Expr L, Expr R) { 699 if (Op == "+") 700 return [=] { return add(L(), R()); }; 701 if (Op == "-") 702 return [=] { return sub(L(), R()); }; 703 if (Op == "*") 704 return [=] { return mul(L(), R()); }; 705 if (Op == "/") 706 return [=] { return div(L(), R()); }; 707 if (Op == "<<") 708 return [=] { return L().getValue() << R().getValue(); }; 709 if (Op == ">>") 710 return [=] { return L().getValue() >> R().getValue(); }; 711 if (Op == "<") 712 return [=] { return L().getValue() < R().getValue(); }; 713 if (Op == ">") 714 return [=] { return L().getValue() > R().getValue(); }; 715 if (Op == ">=") 716 return [=] { return L().getValue() >= R().getValue(); }; 717 if (Op == "<=") 718 return [=] { return L().getValue() <= R().getValue(); }; 719 if (Op == "==") 720 return [=] { return L().getValue() == R().getValue(); }; 721 if (Op == "!=") 722 return [=] { return L().getValue() != R().getValue(); }; 723 if (Op == "&") 724 return [=] { return bitAnd(L(), R()); }; 725 if (Op == "|") 726 return [=] { return bitOr(L(), R()); }; 727 llvm_unreachable("invalid operator"); 728 } 729 730 // This is a part of the operator-precedence parser. This function 731 // assumes that the remaining token stream starts with an operator. 732 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 733 while (!atEOF() && !Error) { 734 // Read an operator and an expression. 735 if (consume("?")) 736 return readTernary(Lhs); 737 StringRef Op1 = peek(); 738 if (precedence(Op1) < MinPrec) 739 break; 740 skip(); 741 Expr Rhs = readPrimary(); 742 743 // Evaluate the remaining part of the expression first if the 744 // next operator has greater precedence than the previous one. 745 // For example, if we have read "+" and "3", and if the next 746 // operator is "*", then we'll evaluate 3 * ... part first. 747 while (!atEOF()) { 748 StringRef Op2 = peek(); 749 if (precedence(Op2) <= precedence(Op1)) 750 break; 751 Rhs = readExpr1(Rhs, precedence(Op2)); 752 } 753 754 Lhs = combine(Op1, Lhs, Rhs); 755 } 756 return Lhs; 757 } 758 759 uint64_t static getConstant(StringRef S) { 760 if (S == "COMMONPAGESIZE") 761 return Target->PageSize; 762 if (S == "MAXPAGESIZE") 763 return Config->MaxPageSize; 764 error("unknown constant: " + S); 765 return 0; 766 } 767 768 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with 769 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may 770 // have "K" (Ki) or "M" (Mi) suffixes. 771 static Optional<uint64_t> parseInt(StringRef Tok) { 772 // Negative number 773 if (Tok.startswith("-")) { 774 if (Optional<uint64_t> Val = parseInt(Tok.substr(1))) 775 return -*Val; 776 return None; 777 } 778 779 // Hexadecimal 780 uint64_t Val; 781 if (Tok.startswith_lower("0x") && !Tok.substr(2).getAsInteger(16, Val)) 782 return Val; 783 if (Tok.endswith_lower("H") && !Tok.drop_back().getAsInteger(16, Val)) 784 return Val; 785 786 // Decimal 787 if (Tok.endswith_lower("K")) { 788 if (Tok.drop_back().getAsInteger(10, Val)) 789 return None; 790 return Val * 1024; 791 } 792 if (Tok.endswith_lower("M")) { 793 if (Tok.drop_back().getAsInteger(10, Val)) 794 return None; 795 return Val * 1024 * 1024; 796 } 797 if (Tok.getAsInteger(10, Val)) 798 return None; 799 return Val; 800 } 801 802 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 803 int Size = StringSwitch<int>(Tok) 804 .Case("BYTE", 1) 805 .Case("SHORT", 2) 806 .Case("LONG", 4) 807 .Case("QUAD", 8) 808 .Default(-1); 809 if (Size == -1) 810 return nullptr; 811 812 return make<BytesDataCommand>(readParenExpr(), Size); 813 } 814 815 StringRef ScriptParser::readParenLiteral() { 816 expect("("); 817 StringRef Tok = next(); 818 expect(")"); 819 return Tok; 820 } 821 822 Expr ScriptParser::readPrimary() { 823 if (peek() == "(") 824 return readParenExpr(); 825 826 if (consume("~")) { 827 Expr E = readPrimary(); 828 return [=] { return ~E().getValue(); }; 829 } 830 if (consume("-")) { 831 Expr E = readPrimary(); 832 return [=] { return -E().getValue(); }; 833 } 834 835 StringRef Tok = next(); 836 std::string Location = getCurrentLocation(); 837 838 // Built-in functions are parsed here. 839 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 840 if (Tok == "ABSOLUTE") { 841 Expr Inner = readParenExpr(); 842 return [=] { 843 ExprValue I = Inner(); 844 I.ForceAbsolute = true; 845 return I; 846 }; 847 } 848 if (Tok == "ADDR") { 849 StringRef Name = readParenLiteral(); 850 return [=]() -> ExprValue { 851 return {Script->getOutputSection(Location, Name), 0}; 852 }; 853 } 854 if (Tok == "ALIGN") { 855 expect("("); 856 Expr E = readExpr(); 857 if (consume(")")) 858 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 859 expect(","); 860 Expr E2 = readExpr(); 861 expect(")"); 862 return [=] { return alignTo(E().getValue(), E2().getValue()); }; 863 } 864 if (Tok == "ALIGNOF") { 865 StringRef Name = readParenLiteral(); 866 return [=] { return Script->getOutputSection(Location, Name)->Alignment; }; 867 } 868 if (Tok == "ASSERT") 869 return readAssertExpr(); 870 if (Tok == "CONSTANT") { 871 StringRef Name = readParenLiteral(); 872 return [=] { return getConstant(Name); }; 873 } 874 if (Tok == "DATA_SEGMENT_ALIGN") { 875 expect("("); 876 Expr E = readExpr(); 877 expect(","); 878 readExpr(); 879 expect(")"); 880 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 881 } 882 if (Tok == "DATA_SEGMENT_END") { 883 expect("("); 884 expect("."); 885 expect(")"); 886 return [] { return Script->getDot(); }; 887 } 888 if (Tok == "DATA_SEGMENT_RELRO_END") { 889 // GNU linkers implements more complicated logic to handle 890 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and 891 // just align to the next page boundary for simplicity. 892 expect("("); 893 readExpr(); 894 expect(","); 895 readExpr(); 896 expect(")"); 897 return [] { return alignTo(Script->getDot(), Target->PageSize); }; 898 } 899 if (Tok == "DEFINED") { 900 StringRef Name = readParenLiteral(); 901 return [=] { return Script->isDefined(Name) ? 1 : 0; }; 902 } 903 if (Tok == "LOADADDR") { 904 StringRef Name = readParenLiteral(); 905 return [=] { return Script->getOutputSection(Location, Name)->getLMA(); }; 906 } 907 if (Tok == "SEGMENT_START") { 908 expect("("); 909 skip(); 910 expect(","); 911 Expr E = readExpr(); 912 expect(")"); 913 return [=] { return E(); }; 914 } 915 if (Tok == "SIZEOF") { 916 StringRef Name = readParenLiteral(); 917 return [=] { return Script->getOutputSectionSize(Name); }; 918 } 919 if (Tok == "SIZEOF_HEADERS") 920 return [=] { return elf::getHeaderSize(); }; 921 922 // Tok is the dot. 923 if (Tok == ".") 924 return [=] { return Script->getSymbolValue(Location, Tok); }; 925 926 // Tok is a literal number. 927 if (Optional<uint64_t> Val = parseInt(Tok)) 928 return [=] { return *Val; }; 929 930 // Tok is a symbol name. 931 if (!isValidCIdentifier(Tok)) 932 setError("malformed number: " + Tok); 933 Script->Opt.ReferencedSymbols.push_back(Tok); 934 return [=] { return Script->getSymbolValue(Location, Tok); }; 935 } 936 937 Expr ScriptParser::readTernary(Expr Cond) { 938 Expr L = readExpr(); 939 expect(":"); 940 Expr R = readExpr(); 941 return [=] { return Cond().getValue() ? L() : R(); }; 942 } 943 944 Expr ScriptParser::readParenExpr() { 945 expect("("); 946 Expr E = readExpr(); 947 expect(")"); 948 return E; 949 } 950 951 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 952 std::vector<StringRef> Phdrs; 953 while (!Error && peek().startswith(":")) { 954 StringRef Tok = next(); 955 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 956 } 957 return Phdrs; 958 } 959 960 // Read a program header type name. The next token must be a 961 // name of a program header type or a constant (e.g. "0x3"). 962 unsigned ScriptParser::readPhdrType() { 963 StringRef Tok = next(); 964 if (Optional<uint64_t> Val = parseInt(Tok)) 965 return *Val; 966 967 unsigned Ret = StringSwitch<unsigned>(Tok) 968 .Case("PT_NULL", PT_NULL) 969 .Case("PT_LOAD", PT_LOAD) 970 .Case("PT_DYNAMIC", PT_DYNAMIC) 971 .Case("PT_INTERP", PT_INTERP) 972 .Case("PT_NOTE", PT_NOTE) 973 .Case("PT_SHLIB", PT_SHLIB) 974 .Case("PT_PHDR", PT_PHDR) 975 .Case("PT_TLS", PT_TLS) 976 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 977 .Case("PT_GNU_STACK", PT_GNU_STACK) 978 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 979 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 980 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 981 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 982 .Default(-1); 983 984 if (Ret == (unsigned)-1) { 985 setError("invalid program header type: " + Tok); 986 return PT_NULL; 987 } 988 return Ret; 989 } 990 991 // Reads an anonymous version declaration. 992 void ScriptParser::readAnonymousDeclaration() { 993 std::vector<SymbolVersion> Locals; 994 std::vector<SymbolVersion> Globals; 995 std::tie(Locals, Globals) = readSymbols(); 996 997 for (SymbolVersion V : Locals) { 998 if (V.Name == "*") 999 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1000 else 1001 Config->VersionScriptLocals.push_back(V); 1002 } 1003 1004 for (SymbolVersion V : Globals) 1005 Config->VersionScriptGlobals.push_back(V); 1006 1007 expect(";"); 1008 } 1009 1010 // Reads a non-anonymous version definition, 1011 // e.g. "VerStr { global: foo; bar; local: *; };". 1012 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1013 // Read a symbol list. 1014 std::vector<SymbolVersion> Locals; 1015 std::vector<SymbolVersion> Globals; 1016 std::tie(Locals, Globals) = readSymbols(); 1017 1018 for (SymbolVersion V : Locals) { 1019 if (V.Name == "*") 1020 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1021 else 1022 Config->VersionScriptLocals.push_back(V); 1023 } 1024 1025 // Create a new version definition and add that to the global symbols. 1026 VersionDefinition Ver; 1027 Ver.Name = VerStr; 1028 Ver.Globals = Globals; 1029 1030 // User-defined version number starts from 2 because 0 and 1 are 1031 // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. 1032 Ver.Id = Config->VersionDefinitions.size() + 2; 1033 Config->VersionDefinitions.push_back(Ver); 1034 1035 // Each version may have a parent version. For example, "Ver2" 1036 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1037 // as a parent. This version hierarchy is, probably against your 1038 // instinct, purely for hint; the runtime doesn't care about it 1039 // at all. In LLD, we simply ignore it. 1040 if (peek() != ";") 1041 skip(); 1042 expect(";"); 1043 } 1044 1045 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1046 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1047 ScriptParser::readSymbols() { 1048 std::vector<SymbolVersion> Locals; 1049 std::vector<SymbolVersion> Globals; 1050 std::vector<SymbolVersion> *V = &Globals; 1051 1052 while (!Error) { 1053 if (consume("}")) 1054 break; 1055 if (consumeLabel("local")) { 1056 V = &Locals; 1057 continue; 1058 } 1059 if (consumeLabel("global")) { 1060 V = &Globals; 1061 continue; 1062 } 1063 1064 if (consume("extern")) { 1065 std::vector<SymbolVersion> Ext = readVersionExtern(); 1066 V->insert(V->end(), Ext.begin(), Ext.end()); 1067 } else { 1068 StringRef Tok = next(); 1069 V->push_back({unquote(Tok), false, hasWildcard(Tok)}); 1070 } 1071 expect(";"); 1072 } 1073 return {Locals, Globals}; 1074 } 1075 1076 // Reads an "extern C++" directive, e.g., 1077 // "extern "C++" { ns::*; "f(int, double)"; };" 1078 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 1079 StringRef Tok = next(); 1080 bool IsCXX = Tok == "\"C++\""; 1081 if (!IsCXX && Tok != "\"C\"") 1082 setError("Unknown language"); 1083 expect("{"); 1084 1085 std::vector<SymbolVersion> Ret; 1086 while (!Error && peek() != "}") { 1087 StringRef Tok = next(); 1088 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 1089 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 1090 expect(";"); 1091 } 1092 1093 expect("}"); 1094 return Ret; 1095 } 1096 1097 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2, 1098 StringRef S3) { 1099 if (!consume(S1) && !consume(S2) && !consume(S3)) { 1100 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 1101 return 0; 1102 } 1103 expect("="); 1104 return readExpr()().getValue(); 1105 } 1106 1107 // Parse the MEMORY command as specified in: 1108 // https://sourceware.org/binutils/docs/ld/MEMORY.html 1109 // 1110 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 1111 void ScriptParser::readMemory() { 1112 expect("{"); 1113 while (!Error && !consume("}")) { 1114 StringRef Name = next(); 1115 1116 uint32_t Flags = 0; 1117 uint32_t NegFlags = 0; 1118 if (consume("(")) { 1119 std::tie(Flags, NegFlags) = readMemoryAttributes(); 1120 expect(")"); 1121 } 1122 expect(":"); 1123 1124 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 1125 expect(","); 1126 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 1127 1128 // Add the memory region to the region map (if it doesn't already exist). 1129 auto It = Script->Opt.MemoryRegions.find(Name); 1130 if (It != Script->Opt.MemoryRegions.end()) 1131 setError("region '" + Name + "' already defined"); 1132 else 1133 Script->Opt.MemoryRegions[Name] = {Name, Origin, Length, 1134 Origin, Flags, NegFlags}; 1135 } 1136 } 1137 1138 // This function parses the attributes used to match against section 1139 // flags when placing output sections in a memory region. These flags 1140 // are only used when an explicit memory region name is not used. 1141 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 1142 uint32_t Flags = 0; 1143 uint32_t NegFlags = 0; 1144 bool Invert = false; 1145 1146 for (char C : next().lower()) { 1147 uint32_t Flag = 0; 1148 if (C == '!') 1149 Invert = !Invert; 1150 else if (C == 'w') 1151 Flag = SHF_WRITE; 1152 else if (C == 'x') 1153 Flag = SHF_EXECINSTR; 1154 else if (C == 'a') 1155 Flag = SHF_ALLOC; 1156 else if (C != 'r') 1157 setError("invalid memory region attribute"); 1158 1159 if (Invert) 1160 NegFlags |= Flag; 1161 else 1162 Flags |= Flag; 1163 } 1164 return {Flags, NegFlags}; 1165 } 1166 1167 void elf::readLinkerScript(MemoryBufferRef MB) { 1168 ScriptParser(MB).readLinkerScript(); 1169 } 1170 1171 void elf::readVersionScript(MemoryBufferRef MB) { 1172 ScriptParser(MB).readVersionScript(); 1173 } 1174 1175 void elf::readDynamicList(MemoryBufferRef MB) { 1176 ScriptParser(MB).readDynamicList(); 1177 } 1178