1 //===- ScriptParser.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a recursive-descendent parser for linker scripts. 11 // Parsed results are stored to Config and Script global objects. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ScriptParser.h" 16 #include "Config.h" 17 #include "Driver.h" 18 #include "InputSection.h" 19 #include "LinkerScript.h" 20 #include "Memory.h" 21 #include "OutputSections.h" 22 #include "ScriptLexer.h" 23 #include "Symbols.h" 24 #include "Target.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/StringSwitch.h" 28 #include "llvm/Support/Casting.h" 29 #include "llvm/Support/ELF.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/FileSystem.h" 32 #include "llvm/Support/Path.h" 33 #include <cassert> 34 #include <limits> 35 #include <vector> 36 37 using namespace llvm; 38 using namespace llvm::ELF; 39 using namespace llvm::support::endian; 40 using namespace lld; 41 using namespace lld::elf; 42 43 static bool isUnderSysroot(StringRef Path); 44 45 namespace { 46 class ScriptParser final : ScriptLexer { 47 public: 48 ScriptParser(MemoryBufferRef MB) 49 : ScriptLexer(MB), 50 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 51 52 void readLinkerScript(); 53 void readVersionScript(); 54 void readDynamicList(); 55 56 private: 57 void addFile(StringRef Path); 58 59 void readAsNeeded(); 60 void readEntry(); 61 void readExtern(); 62 void readGroup(); 63 void readInclude(); 64 void readMemory(); 65 void readOutput(); 66 void readOutputArch(); 67 void readOutputFormat(); 68 void readPhdrs(); 69 void readSearchDir(); 70 void readSections(); 71 void readVersion(); 72 void readVersionScriptCommand(); 73 74 SymbolAssignment *readAssignment(StringRef Name); 75 BytesDataCommand *readBytesDataCommand(StringRef Tok); 76 uint32_t readFill(); 77 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 78 uint32_t readOutputSectionFiller(StringRef Tok); 79 std::vector<StringRef> readOutputSectionPhdrs(); 80 InputSectionDescription *readInputSectionDescription(StringRef Tok); 81 StringMatcher readFilePatterns(); 82 std::vector<SectionPattern> readInputSectionsList(); 83 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 84 unsigned readPhdrType(); 85 SortSectionPolicy readSortKind(); 86 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 87 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 88 void readSort(); 89 AssertCommand *readAssert(); 90 Expr readAssertExpr(); 91 92 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 93 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 94 95 Expr readExpr(); 96 Expr readExpr1(Expr Lhs, int MinPrec); 97 StringRef readParenLiteral(); 98 Expr readPrimary(); 99 Expr readTernary(Expr Cond); 100 Expr readParenExpr(); 101 102 // For parsing version script. 103 std::vector<SymbolVersion> readVersionExtern(); 104 void readAnonymousDeclaration(); 105 void readVersionDeclaration(StringRef VerStr); 106 107 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 108 readSymbols(); 109 110 bool IsUnderSysroot; 111 }; 112 } // namespace 113 114 static bool isUnderSysroot(StringRef Path) { 115 if (Config->Sysroot == "") 116 return false; 117 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 118 if (sys::fs::equivalent(Config->Sysroot, Path)) 119 return true; 120 return false; 121 } 122 123 // Some operations only support one non absolute value. Move the 124 // absolute one to the right hand side for convenience. 125 static void moveAbsRight(ExprValue &A, ExprValue &B) { 126 if (A.isAbsolute()) 127 std::swap(A, B); 128 if (!B.isAbsolute()) 129 error("At least one side of the expression must be absolute"); 130 } 131 132 static ExprValue add(ExprValue A, ExprValue B) { 133 moveAbsRight(A, B); 134 return {A.Sec, A.ForceAbsolute, A.Val + B.getValue()}; 135 } 136 137 static ExprValue sub(ExprValue A, ExprValue B) { 138 return {A.Sec, A.Val - B.getValue()}; 139 } 140 141 static ExprValue mul(ExprValue A, ExprValue B) { 142 return A.getValue() * B.getValue(); 143 } 144 145 static ExprValue div(ExprValue A, ExprValue B) { 146 if (uint64_t BV = B.getValue()) 147 return A.getValue() / BV; 148 error("division by zero"); 149 return 0; 150 } 151 152 static ExprValue bitAnd(ExprValue A, ExprValue B) { 153 moveAbsRight(A, B); 154 return {A.Sec, A.ForceAbsolute, 155 (A.getValue() & B.getValue()) - A.getSecAddr()}; 156 } 157 158 static ExprValue bitOr(ExprValue A, ExprValue B) { 159 moveAbsRight(A, B); 160 return {A.Sec, A.ForceAbsolute, 161 (A.getValue() | B.getValue()) - A.getSecAddr()}; 162 } 163 164 void ScriptParser::readDynamicList() { 165 expect("{"); 166 readAnonymousDeclaration(); 167 if (!atEOF()) 168 setError("EOF expected, but got " + next()); 169 } 170 171 void ScriptParser::readVersionScript() { 172 readVersionScriptCommand(); 173 if (!atEOF()) 174 setError("EOF expected, but got " + next()); 175 } 176 177 void ScriptParser::readVersionScriptCommand() { 178 if (consume("{")) { 179 readAnonymousDeclaration(); 180 return; 181 } 182 183 while (!atEOF() && !Error && peek() != "}") { 184 StringRef VerStr = next(); 185 if (VerStr == "{") { 186 setError("anonymous version definition is used in " 187 "combination with other version definitions"); 188 return; 189 } 190 expect("{"); 191 readVersionDeclaration(VerStr); 192 } 193 } 194 195 void ScriptParser::readVersion() { 196 expect("{"); 197 readVersionScriptCommand(); 198 expect("}"); 199 } 200 201 void ScriptParser::readLinkerScript() { 202 while (!atEOF()) { 203 StringRef Tok = next(); 204 if (Tok == ";") 205 continue; 206 207 if (Tok == "ASSERT") { 208 Script->Opt.Commands.push_back(readAssert()); 209 } else if (Tok == "ENTRY") { 210 readEntry(); 211 } else if (Tok == "EXTERN") { 212 readExtern(); 213 } else if (Tok == "GROUP" || Tok == "INPUT") { 214 readGroup(); 215 } else if (Tok == "INCLUDE") { 216 readInclude(); 217 } else if (Tok == "MEMORY") { 218 readMemory(); 219 } else if (Tok == "OUTPUT") { 220 readOutput(); 221 } else if (Tok == "OUTPUT_ARCH") { 222 readOutputArch(); 223 } else if (Tok == "OUTPUT_FORMAT") { 224 readOutputFormat(); 225 } else if (Tok == "PHDRS") { 226 readPhdrs(); 227 } else if (Tok == "SEARCH_DIR") { 228 readSearchDir(); 229 } else if (Tok == "SECTIONS") { 230 readSections(); 231 } else if (Tok == "VERSION") { 232 readVersion(); 233 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 234 Script->Opt.Commands.push_back(Cmd); 235 } else { 236 setError("unknown directive: " + Tok); 237 } 238 } 239 } 240 241 void ScriptParser::addFile(StringRef S) { 242 if (IsUnderSysroot && S.startswith("/")) { 243 SmallString<128> PathData; 244 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 245 if (sys::fs::exists(Path)) { 246 Driver->addFile(Saver.save(Path), /*WithLOption=*/false); 247 return; 248 } 249 } 250 251 if (sys::path::is_absolute(S)) { 252 Driver->addFile(S, /*WithLOption=*/false); 253 } else if (S.startswith("=")) { 254 if (Config->Sysroot.empty()) 255 Driver->addFile(S.substr(1), /*WithLOption=*/false); 256 else 257 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)), 258 /*WithLOption=*/false); 259 } else if (S.startswith("-l")) { 260 Driver->addLibrary(S.substr(2)); 261 } else if (sys::fs::exists(S)) { 262 Driver->addFile(S, /*WithLOption=*/false); 263 } else { 264 if (Optional<std::string> Path = findFromSearchPaths(S)) 265 Driver->addFile(Saver.save(*Path), /*WithLOption=*/true); 266 else 267 setError("unable to find " + S); 268 } 269 } 270 271 void ScriptParser::readAsNeeded() { 272 expect("("); 273 bool Orig = Config->AsNeeded; 274 Config->AsNeeded = true; 275 while (!Error && !consume(")")) 276 addFile(unquote(next())); 277 Config->AsNeeded = Orig; 278 } 279 280 void ScriptParser::readEntry() { 281 // -e <symbol> takes predecence over ENTRY(<symbol>). 282 expect("("); 283 StringRef Tok = next(); 284 if (Config->Entry.empty()) 285 Config->Entry = Tok; 286 expect(")"); 287 } 288 289 void ScriptParser::readExtern() { 290 expect("("); 291 while (!Error && !consume(")")) 292 Config->Undefined.push_back(next()); 293 } 294 295 void ScriptParser::readGroup() { 296 expect("("); 297 while (!Error && !consume(")")) { 298 if (consume("AS_NEEDED")) 299 readAsNeeded(); 300 else 301 addFile(unquote(next())); 302 } 303 } 304 305 void ScriptParser::readInclude() { 306 StringRef Tok = unquote(next()); 307 308 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 309 // The file will be searched for in the current directory, and in any 310 // directory specified with the -L option. 311 if (sys::fs::exists(Tok)) { 312 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 313 tokenize(*MB); 314 return; 315 } 316 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 317 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 318 tokenize(*MB); 319 return; 320 } 321 setError("cannot open " + Tok); 322 } 323 324 void ScriptParser::readOutput() { 325 // -o <file> takes predecence over OUTPUT(<file>). 326 expect("("); 327 StringRef Tok = next(); 328 if (Config->OutputFile.empty()) 329 Config->OutputFile = unquote(Tok); 330 expect(")"); 331 } 332 333 void ScriptParser::readOutputArch() { 334 // OUTPUT_ARCH is ignored for now. 335 expect("("); 336 while (!Error && !consume(")")) 337 skip(); 338 } 339 340 void ScriptParser::readOutputFormat() { 341 // Error checking only for now. 342 expect("("); 343 skip(); 344 if (consume(")")) 345 return; 346 expect(","); 347 skip(); 348 expect(","); 349 skip(); 350 expect(")"); 351 } 352 353 void ScriptParser::readPhdrs() { 354 expect("{"); 355 while (!Error && !consume("}")) { 356 Script->Opt.PhdrsCommands.push_back( 357 {next(), PT_NULL, false, false, UINT_MAX, nullptr}); 358 359 PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back(); 360 PhdrCmd.Type = readPhdrType(); 361 362 while (!Error && !consume(";")) { 363 if (consume("FILEHDR")) 364 PhdrCmd.HasFilehdr = true; 365 else if (consume("PHDRS")) 366 PhdrCmd.HasPhdrs = true; 367 else if (consume("AT")) 368 PhdrCmd.LMAExpr = readParenExpr(); 369 else if (consume("FLAGS")) 370 PhdrCmd.Flags = readParenExpr()().getValue(); 371 else 372 setError("unexpected header attribute: " + next()); 373 } 374 } 375 } 376 377 void ScriptParser::readSearchDir() { 378 expect("("); 379 StringRef Tok = next(); 380 if (!Config->Nostdlib) 381 Config->SearchPaths.push_back(unquote(Tok)); 382 expect(")"); 383 } 384 385 void ScriptParser::readSections() { 386 Script->Opt.HasSections = true; 387 388 // -no-rosegment is used to avoid placing read only non-executable sections in 389 // their own segment. We do the same if SECTIONS command is present in linker 390 // script. See comment for computeFlags(). 391 Config->SingleRoRx = true; 392 393 expect("{"); 394 while (!Error && !consume("}")) { 395 StringRef Tok = next(); 396 BaseCommand *Cmd = readProvideOrAssignment(Tok); 397 if (!Cmd) { 398 if (Tok == "ASSERT") 399 Cmd = readAssert(); 400 else 401 Cmd = readOutputSectionDescription(Tok); 402 } 403 Script->Opt.Commands.push_back(Cmd); 404 } 405 } 406 407 static int precedence(StringRef Op) { 408 return StringSwitch<int>(Op) 409 .Cases("*", "/", 5) 410 .Cases("+", "-", 4) 411 .Cases("<<", ">>", 3) 412 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 413 .Cases("&", "|", 1) 414 .Default(-1); 415 } 416 417 StringMatcher ScriptParser::readFilePatterns() { 418 std::vector<StringRef> V; 419 while (!Error && !consume(")")) 420 V.push_back(next()); 421 return StringMatcher(V); 422 } 423 424 SortSectionPolicy ScriptParser::readSortKind() { 425 if (consume("SORT") || consume("SORT_BY_NAME")) 426 return SortSectionPolicy::Name; 427 if (consume("SORT_BY_ALIGNMENT")) 428 return SortSectionPolicy::Alignment; 429 if (consume("SORT_BY_INIT_PRIORITY")) 430 return SortSectionPolicy::Priority; 431 if (consume("SORT_NONE")) 432 return SortSectionPolicy::None; 433 return SortSectionPolicy::Default; 434 } 435 436 // Reads SECTIONS command contents in the following form: 437 // 438 // <contents> ::= <elem>* 439 // <elem> ::= <exclude>? <glob-pattern> 440 // <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")" 441 // 442 // For example, 443 // 444 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz) 445 // 446 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". 447 // The semantics of that is section .foo in any file, section .bar in 448 // any file but a.o, and section .baz in any file but b.o. 449 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 450 std::vector<SectionPattern> Ret; 451 while (!Error && peek() != ")") { 452 StringMatcher ExcludeFilePat; 453 if (consume("EXCLUDE_FILE")) { 454 expect("("); 455 ExcludeFilePat = readFilePatterns(); 456 } 457 458 std::vector<StringRef> V; 459 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 460 V.push_back(next()); 461 462 if (!V.empty()) 463 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 464 else 465 setError("section pattern is expected"); 466 } 467 return Ret; 468 } 469 470 // Reads contents of "SECTIONS" directive. That directive contains a 471 // list of glob patterns for input sections. The grammar is as follows. 472 // 473 // <patterns> ::= <section-list> 474 // | <sort> "(" <section-list> ")" 475 // | <sort> "(" <sort> "(" <section-list> ")" ")" 476 // 477 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 478 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 479 // 480 // <section-list> is parsed by readInputSectionsList(). 481 InputSectionDescription * 482 ScriptParser::readInputSectionRules(StringRef FilePattern) { 483 auto *Cmd = make<InputSectionDescription>(FilePattern); 484 expect("("); 485 486 while (!Error && !consume(")")) { 487 SortSectionPolicy Outer = readSortKind(); 488 SortSectionPolicy Inner = SortSectionPolicy::Default; 489 std::vector<SectionPattern> V; 490 if (Outer != SortSectionPolicy::Default) { 491 expect("("); 492 Inner = readSortKind(); 493 if (Inner != SortSectionPolicy::Default) { 494 expect("("); 495 V = readInputSectionsList(); 496 expect(")"); 497 } else { 498 V = readInputSectionsList(); 499 } 500 expect(")"); 501 } else { 502 V = readInputSectionsList(); 503 } 504 505 for (SectionPattern &Pat : V) { 506 Pat.SortInner = Inner; 507 Pat.SortOuter = Outer; 508 } 509 510 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 511 } 512 return Cmd; 513 } 514 515 InputSectionDescription * 516 ScriptParser::readInputSectionDescription(StringRef Tok) { 517 // Input section wildcard can be surrounded by KEEP. 518 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 519 if (Tok == "KEEP") { 520 expect("("); 521 StringRef FilePattern = next(); 522 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 523 expect(")"); 524 Script->Opt.KeptSections.push_back(Cmd); 525 return Cmd; 526 } 527 return readInputSectionRules(Tok); 528 } 529 530 void ScriptParser::readSort() { 531 expect("("); 532 expect("CONSTRUCTORS"); 533 expect(")"); 534 } 535 536 AssertCommand *ScriptParser::readAssert() { 537 return make<AssertCommand>(readAssertExpr()); 538 } 539 540 Expr ScriptParser::readAssertExpr() { 541 expect("("); 542 Expr E = readExpr(); 543 expect(","); 544 StringRef Msg = unquote(next()); 545 expect(")"); 546 547 return [=] { 548 if (!E().getValue()) 549 error(Msg); 550 return Script->getDot(); 551 }; 552 } 553 554 // Reads a FILL(expr) command. We handle the FILL command as an 555 // alias for =fillexp section attribute, which is different from 556 // what GNU linkers do. 557 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 558 uint32_t ScriptParser::readFill() { 559 expect("("); 560 uint32_t V = readOutputSectionFiller(next()); 561 expect(")"); 562 expect(";"); 563 return V; 564 } 565 566 OutputSectionCommand * 567 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 568 OutputSectionCommand *Cmd = make<OutputSectionCommand>(OutSec); 569 Cmd->Location = getCurrentLocation(); 570 571 // Read an address expression. 572 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html 573 if (peek() != ":") 574 Cmd->AddrExpr = readExpr(); 575 576 expect(":"); 577 578 if (consume("AT")) 579 Cmd->LMAExpr = readParenExpr(); 580 if (consume("ALIGN")) 581 Cmd->AlignExpr = readParenExpr(); 582 if (consume("SUBALIGN")) 583 Cmd->SubalignExpr = readParenExpr(); 584 585 // Parse constraints. 586 if (consume("ONLY_IF_RO")) 587 Cmd->Constraint = ConstraintKind::ReadOnly; 588 if (consume("ONLY_IF_RW")) 589 Cmd->Constraint = ConstraintKind::ReadWrite; 590 expect("{"); 591 592 while (!Error && !consume("}")) { 593 StringRef Tok = next(); 594 if (Tok == ";") { 595 // Empty commands are allowed. Do nothing here. 596 } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) { 597 Cmd->Commands.push_back(Assign); 598 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 599 Cmd->Commands.push_back(Data); 600 } else if (Tok == "ASSERT") { 601 Cmd->Commands.push_back(readAssert()); 602 expect(";"); 603 } else if (Tok == "CONSTRUCTORS") { 604 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 605 // by name. This is for very old file formats such as ECOFF/XCOFF. 606 // For ELF, we should ignore. 607 } else if (Tok == "FILL") { 608 Cmd->Filler = readFill(); 609 } else if (Tok == "SORT") { 610 readSort(); 611 } else if (peek() == "(") { 612 Cmd->Commands.push_back(readInputSectionDescription(Tok)); 613 } else { 614 setError("unknown command " + Tok); 615 } 616 } 617 618 if (consume(">")) 619 Cmd->MemoryRegionName = next(); 620 621 Cmd->Phdrs = readOutputSectionPhdrs(); 622 623 if (consume("=")) 624 Cmd->Filler = readOutputSectionFiller(next()); 625 else if (peek().startswith("=")) 626 Cmd->Filler = readOutputSectionFiller(next().drop_front()); 627 628 // Consume optional comma following output section command. 629 consume(","); 630 631 return Cmd; 632 } 633 634 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number. 635 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 636 // 637 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles 638 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them 639 // as 32-bit big-endian values. We will do the same as ld.gold does 640 // because it's simpler than what ld.bfd does. 641 uint32_t ScriptParser::readOutputSectionFiller(StringRef Tok) { 642 uint32_t V = 0; 643 if (Tok.getAsInteger(0, V)) 644 setError("invalid filler expression: " + Tok); 645 646 uint32_t Buf; 647 write32be(&Buf, V); 648 return Buf; 649 } 650 651 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 652 expect("("); 653 SymbolAssignment *Cmd = readAssignment(next()); 654 Cmd->Provide = Provide; 655 Cmd->Hidden = Hidden; 656 expect(")"); 657 expect(";"); 658 return Cmd; 659 } 660 661 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 662 SymbolAssignment *Cmd = nullptr; 663 if (peek() == "=" || peek() == "+=") { 664 Cmd = readAssignment(Tok); 665 expect(";"); 666 } else if (Tok == "PROVIDE") { 667 Cmd = readProvideHidden(true, false); 668 } else if (Tok == "HIDDEN") { 669 Cmd = readProvideHidden(false, true); 670 } else if (Tok == "PROVIDE_HIDDEN") { 671 Cmd = readProvideHidden(true, true); 672 } 673 return Cmd; 674 } 675 676 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 677 StringRef Op = next(); 678 assert(Op == "=" || Op == "+="); 679 Expr E = readExpr(); 680 if (Op == "+=") { 681 std::string Loc = getCurrentLocation(); 682 E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); }; 683 } 684 return make<SymbolAssignment>(Name, E, getCurrentLocation()); 685 } 686 687 // This is an operator-precedence parser to parse a linker 688 // script expression. 689 Expr ScriptParser::readExpr() { 690 // Our lexer is context-aware. Set the in-expression bit so that 691 // they apply different tokenization rules. 692 bool Orig = InExpr; 693 InExpr = true; 694 Expr E = readExpr1(readPrimary(), 0); 695 InExpr = Orig; 696 return E; 697 } 698 699 static Expr combine(StringRef Op, Expr L, Expr R) { 700 if (Op == "+") 701 return [=] { return add(L(), R()); }; 702 if (Op == "-") 703 return [=] { return sub(L(), R()); }; 704 if (Op == "*") 705 return [=] { return mul(L(), R()); }; 706 if (Op == "/") 707 return [=] { return div(L(), R()); }; 708 if (Op == "<<") 709 return [=] { return L().getValue() << R().getValue(); }; 710 if (Op == ">>") 711 return [=] { return L().getValue() >> R().getValue(); }; 712 if (Op == "<") 713 return [=] { return L().getValue() < R().getValue(); }; 714 if (Op == ">") 715 return [=] { return L().getValue() > R().getValue(); }; 716 if (Op == ">=") 717 return [=] { return L().getValue() >= R().getValue(); }; 718 if (Op == "<=") 719 return [=] { return L().getValue() <= R().getValue(); }; 720 if (Op == "==") 721 return [=] { return L().getValue() == R().getValue(); }; 722 if (Op == "!=") 723 return [=] { return L().getValue() != R().getValue(); }; 724 if (Op == "&") 725 return [=] { return bitAnd(L(), R()); }; 726 if (Op == "|") 727 return [=] { return bitOr(L(), R()); }; 728 llvm_unreachable("invalid operator"); 729 } 730 731 // This is a part of the operator-precedence parser. This function 732 // assumes that the remaining token stream starts with an operator. 733 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 734 while (!atEOF() && !Error) { 735 // Read an operator and an expression. 736 if (consume("?")) 737 return readTernary(Lhs); 738 StringRef Op1 = peek(); 739 if (precedence(Op1) < MinPrec) 740 break; 741 skip(); 742 Expr Rhs = readPrimary(); 743 744 // Evaluate the remaining part of the expression first if the 745 // next operator has greater precedence than the previous one. 746 // For example, if we have read "+" and "3", and if the next 747 // operator is "*", then we'll evaluate 3 * ... part first. 748 while (!atEOF()) { 749 StringRef Op2 = peek(); 750 if (precedence(Op2) <= precedence(Op1)) 751 break; 752 Rhs = readExpr1(Rhs, precedence(Op2)); 753 } 754 755 Lhs = combine(Op1, Lhs, Rhs); 756 } 757 return Lhs; 758 } 759 760 uint64_t static getConstant(StringRef S) { 761 if (S == "COMMONPAGESIZE") 762 return Target->PageSize; 763 if (S == "MAXPAGESIZE") 764 return Config->MaxPageSize; 765 error("unknown constant: " + S); 766 return 0; 767 } 768 769 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with 770 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may 771 // have "K" (Ki) or "M" (Mi) suffixes. 772 static Optional<uint64_t> parseInt(StringRef Tok) { 773 // Negative number 774 if (Tok.startswith("-")) { 775 if (Optional<uint64_t> Val = parseInt(Tok.substr(1))) 776 return -*Val; 777 return None; 778 } 779 780 // Hexadecimal 781 uint64_t Val; 782 if (Tok.startswith_lower("0x") && !Tok.substr(2).getAsInteger(16, Val)) 783 return Val; 784 if (Tok.endswith_lower("H") && !Tok.drop_back().getAsInteger(16, Val)) 785 return Val; 786 787 // Decimal 788 if (Tok.endswith_lower("K")) { 789 if (Tok.drop_back().getAsInteger(10, Val)) 790 return None; 791 return Val * 1024; 792 } 793 if (Tok.endswith_lower("M")) { 794 if (Tok.drop_back().getAsInteger(10, Val)) 795 return None; 796 return Val * 1024 * 1024; 797 } 798 if (Tok.getAsInteger(10, Val)) 799 return None; 800 return Val; 801 } 802 803 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 804 int Size = StringSwitch<int>(Tok) 805 .Case("BYTE", 1) 806 .Case("SHORT", 2) 807 .Case("LONG", 4) 808 .Case("QUAD", 8) 809 .Default(-1); 810 if (Size == -1) 811 return nullptr; 812 813 return make<BytesDataCommand>(readParenExpr(), Size); 814 } 815 816 StringRef ScriptParser::readParenLiteral() { 817 expect("("); 818 StringRef Tok = next(); 819 expect(")"); 820 return Tok; 821 } 822 823 Expr ScriptParser::readPrimary() { 824 if (peek() == "(") 825 return readParenExpr(); 826 827 if (consume("~")) { 828 Expr E = readPrimary(); 829 return [=] { return ~E().getValue(); }; 830 } 831 if (consume("-")) { 832 Expr E = readPrimary(); 833 return [=] { return -E().getValue(); }; 834 } 835 836 StringRef Tok = next(); 837 std::string Location = getCurrentLocation(); 838 839 // Built-in functions are parsed here. 840 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 841 if (Tok == "ABSOLUTE") { 842 Expr Inner = readParenExpr(); 843 return [=] { 844 ExprValue I = Inner(); 845 I.ForceAbsolute = true; 846 return I; 847 }; 848 } 849 if (Tok == "ADDR") { 850 StringRef Name = readParenLiteral(); 851 return [=]() -> ExprValue { 852 return {Script->getOutputSection(Location, Name), 0}; 853 }; 854 } 855 if (Tok == "ALIGN") { 856 expect("("); 857 Expr E = readExpr(); 858 if (consume(")")) 859 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 860 expect(","); 861 Expr E2 = readExpr(); 862 expect(")"); 863 return [=] { return alignTo(E().getValue(), E2().getValue()); }; 864 } 865 if (Tok == "ALIGNOF") { 866 StringRef Name = readParenLiteral(); 867 return [=] { return Script->getOutputSection(Location, Name)->Alignment; }; 868 } 869 if (Tok == "ASSERT") 870 return readAssertExpr(); 871 if (Tok == "CONSTANT") { 872 StringRef Name = readParenLiteral(); 873 return [=] { return getConstant(Name); }; 874 } 875 if (Tok == "DATA_SEGMENT_ALIGN") { 876 expect("("); 877 Expr E = readExpr(); 878 expect(","); 879 readExpr(); 880 expect(")"); 881 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 882 } 883 if (Tok == "DATA_SEGMENT_END") { 884 expect("("); 885 expect("."); 886 expect(")"); 887 return [] { return Script->getDot(); }; 888 } 889 if (Tok == "DATA_SEGMENT_RELRO_END") { 890 // GNU linkers implements more complicated logic to handle 891 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and 892 // just align to the next page boundary for simplicity. 893 expect("("); 894 readExpr(); 895 expect(","); 896 readExpr(); 897 expect(")"); 898 return [] { return alignTo(Script->getDot(), Target->PageSize); }; 899 } 900 if (Tok == "DEFINED") { 901 StringRef Name = readParenLiteral(); 902 return [=] { return Script->isDefined(Name) ? 1 : 0; }; 903 } 904 if (Tok == "LOADADDR") { 905 StringRef Name = readParenLiteral(); 906 return [=] { return Script->getOutputSection(Location, Name)->getLMA(); }; 907 } 908 if (Tok == "SEGMENT_START") { 909 expect("("); 910 skip(); 911 expect(","); 912 Expr E = readExpr(); 913 expect(")"); 914 return [=] { return E(); }; 915 } 916 if (Tok == "SIZEOF") { 917 StringRef Name = readParenLiteral(); 918 return [=] { return Script->getOutputSectionSize(Name); }; 919 } 920 if (Tok == "SIZEOF_HEADERS") 921 return [=] { return elf::getHeaderSize(); }; 922 923 // Tok is the dot. 924 if (Tok == ".") 925 return [=] { return Script->getSymbolValue(Location, Tok); }; 926 927 // Tok is a literal number. 928 if (Optional<uint64_t> Val = parseInt(Tok)) 929 return [=] { return *Val; }; 930 931 // Tok is a symbol name. 932 if (!isValidCIdentifier(Tok)) 933 setError("malformed number: " + Tok); 934 Script->Opt.ReferencedSymbols.push_back(Tok); 935 return [=] { return Script->getSymbolValue(Location, Tok); }; 936 } 937 938 Expr ScriptParser::readTernary(Expr Cond) { 939 Expr L = readExpr(); 940 expect(":"); 941 Expr R = readExpr(); 942 return [=] { return Cond().getValue() ? L() : R(); }; 943 } 944 945 Expr ScriptParser::readParenExpr() { 946 expect("("); 947 Expr E = readExpr(); 948 expect(")"); 949 return E; 950 } 951 952 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 953 std::vector<StringRef> Phdrs; 954 while (!Error && peek().startswith(":")) { 955 StringRef Tok = next(); 956 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 957 } 958 return Phdrs; 959 } 960 961 // Read a program header type name. The next token must be a 962 // name of a program header type or a constant (e.g. "0x3"). 963 unsigned ScriptParser::readPhdrType() { 964 StringRef Tok = next(); 965 if (Optional<uint64_t> Val = parseInt(Tok)) 966 return *Val; 967 968 unsigned Ret = StringSwitch<unsigned>(Tok) 969 .Case("PT_NULL", PT_NULL) 970 .Case("PT_LOAD", PT_LOAD) 971 .Case("PT_DYNAMIC", PT_DYNAMIC) 972 .Case("PT_INTERP", PT_INTERP) 973 .Case("PT_NOTE", PT_NOTE) 974 .Case("PT_SHLIB", PT_SHLIB) 975 .Case("PT_PHDR", PT_PHDR) 976 .Case("PT_TLS", PT_TLS) 977 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 978 .Case("PT_GNU_STACK", PT_GNU_STACK) 979 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 980 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 981 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 982 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 983 .Default(-1); 984 985 if (Ret == (unsigned)-1) { 986 setError("invalid program header type: " + Tok); 987 return PT_NULL; 988 } 989 return Ret; 990 } 991 992 // Reads an anonymous version declaration. 993 void ScriptParser::readAnonymousDeclaration() { 994 std::vector<SymbolVersion> Locals; 995 std::vector<SymbolVersion> Globals; 996 std::tie(Locals, Globals) = readSymbols(); 997 998 for (SymbolVersion V : Locals) { 999 if (V.Name == "*") 1000 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1001 else 1002 Config->VersionScriptLocals.push_back(V); 1003 } 1004 1005 for (SymbolVersion V : Globals) 1006 Config->VersionScriptGlobals.push_back(V); 1007 1008 expect(";"); 1009 } 1010 1011 // Reads a non-anonymous version definition, 1012 // e.g. "VerStr { global: foo; bar; local: *; };". 1013 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1014 // Read a symbol list. 1015 std::vector<SymbolVersion> Locals; 1016 std::vector<SymbolVersion> Globals; 1017 std::tie(Locals, Globals) = readSymbols(); 1018 1019 for (SymbolVersion V : Locals) { 1020 if (V.Name == "*") 1021 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1022 else 1023 Config->VersionScriptLocals.push_back(V); 1024 } 1025 1026 // Create a new version definition and add that to the global symbols. 1027 VersionDefinition Ver; 1028 Ver.Name = VerStr; 1029 Ver.Globals = Globals; 1030 1031 // User-defined version number starts from 2 because 0 and 1 are 1032 // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. 1033 Ver.Id = Config->VersionDefinitions.size() + 2; 1034 Config->VersionDefinitions.push_back(Ver); 1035 1036 // Each version may have a parent version. For example, "Ver2" 1037 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1038 // as a parent. This version hierarchy is, probably against your 1039 // instinct, purely for hint; the runtime doesn't care about it 1040 // at all. In LLD, we simply ignore it. 1041 if (peek() != ";") 1042 skip(); 1043 expect(";"); 1044 } 1045 1046 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1047 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1048 ScriptParser::readSymbols() { 1049 std::vector<SymbolVersion> Locals; 1050 std::vector<SymbolVersion> Globals; 1051 std::vector<SymbolVersion> *V = &Globals; 1052 1053 while (!Error) { 1054 if (consume("}")) 1055 break; 1056 if (consumeLabel("local")) { 1057 V = &Locals; 1058 continue; 1059 } 1060 if (consumeLabel("global")) { 1061 V = &Globals; 1062 continue; 1063 } 1064 1065 if (consume("extern")) { 1066 std::vector<SymbolVersion> Ext = readVersionExtern(); 1067 V->insert(V->end(), Ext.begin(), Ext.end()); 1068 } else { 1069 StringRef Tok = next(); 1070 V->push_back({unquote(Tok), false, hasWildcard(Tok)}); 1071 } 1072 expect(";"); 1073 } 1074 return {Locals, Globals}; 1075 } 1076 1077 // Reads an "extern C++" directive, e.g., 1078 // "extern "C++" { ns::*; "f(int, double)"; };" 1079 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 1080 StringRef Tok = next(); 1081 bool IsCXX = Tok == "\"C++\""; 1082 if (!IsCXX && Tok != "\"C\"") 1083 setError("Unknown language"); 1084 expect("{"); 1085 1086 std::vector<SymbolVersion> Ret; 1087 while (!Error && peek() != "}") { 1088 StringRef Tok = next(); 1089 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 1090 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 1091 expect(";"); 1092 } 1093 1094 expect("}"); 1095 return Ret; 1096 } 1097 1098 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2, 1099 StringRef S3) { 1100 if (!consume(S1) && !consume(S2) && !consume(S3)) { 1101 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 1102 return 0; 1103 } 1104 expect("="); 1105 1106 // TODO: Fully support constant expressions. 1107 if (Optional<uint64_t> Val = parseInt(next())) 1108 return *Val; 1109 setError("nonconstant expression for " + S1); 1110 return 0; 1111 } 1112 1113 // Parse the MEMORY command as specified in: 1114 // https://sourceware.org/binutils/docs/ld/MEMORY.html 1115 // 1116 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 1117 void ScriptParser::readMemory() { 1118 expect("{"); 1119 while (!Error && !consume("}")) { 1120 StringRef Name = next(); 1121 1122 uint32_t Flags = 0; 1123 uint32_t NegFlags = 0; 1124 if (consume("(")) { 1125 std::tie(Flags, NegFlags) = readMemoryAttributes(); 1126 expect(")"); 1127 } 1128 expect(":"); 1129 1130 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 1131 expect(","); 1132 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 1133 1134 // Add the memory region to the region map (if it doesn't already exist). 1135 auto It = Script->Opt.MemoryRegions.find(Name); 1136 if (It != Script->Opt.MemoryRegions.end()) 1137 setError("region '" + Name + "' already defined"); 1138 else 1139 Script->Opt.MemoryRegions[Name] = {Name, Origin, Length, 1140 Origin, Flags, NegFlags}; 1141 } 1142 } 1143 1144 // This function parses the attributes used to match against section 1145 // flags when placing output sections in a memory region. These flags 1146 // are only used when an explicit memory region name is not used. 1147 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 1148 uint32_t Flags = 0; 1149 uint32_t NegFlags = 0; 1150 bool Invert = false; 1151 1152 for (char C : next().lower()) { 1153 uint32_t Flag = 0; 1154 if (C == '!') 1155 Invert = !Invert; 1156 else if (C == 'w') 1157 Flag = SHF_WRITE; 1158 else if (C == 'x') 1159 Flag = SHF_EXECINSTR; 1160 else if (C == 'a') 1161 Flag = SHF_ALLOC; 1162 else if (C != 'r') 1163 setError("invalid memory region attribute"); 1164 1165 if (Invert) 1166 NegFlags |= Flag; 1167 else 1168 Flags |= Flag; 1169 } 1170 return {Flags, NegFlags}; 1171 } 1172 1173 void elf::readLinkerScript(MemoryBufferRef MB) { 1174 ScriptParser(MB).readLinkerScript(); 1175 } 1176 1177 void elf::readVersionScript(MemoryBufferRef MB) { 1178 ScriptParser(MB).readVersionScript(); 1179 } 1180 1181 void elf::readDynamicList(MemoryBufferRef MB) { 1182 ScriptParser(MB).readDynamicList(); 1183 } 1184