1 //===- ScriptParser.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a recursive-descendent parser for linker scripts. 11 // Parsed results are stored to Config and Script global objects. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ScriptParser.h" 16 #include "Config.h" 17 #include "Driver.h" 18 #include "InputSection.h" 19 #include "LinkerScript.h" 20 #include "Memory.h" 21 #include "OutputSections.h" 22 #include "ScriptLexer.h" 23 #include "Symbols.h" 24 #include "Target.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/StringSwitch.h" 28 #include "llvm/Support/Casting.h" 29 #include "llvm/Support/ELF.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/FileSystem.h" 32 #include "llvm/Support/Path.h" 33 #include <cassert> 34 #include <limits> 35 #include <vector> 36 37 using namespace llvm; 38 using namespace llvm::ELF; 39 using namespace lld; 40 using namespace lld::elf; 41 42 static bool isUnderSysroot(StringRef Path); 43 44 namespace { 45 class ScriptParser final : ScriptLexer { 46 public: 47 ScriptParser(MemoryBufferRef MB) 48 : ScriptLexer(MB), 49 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 50 51 void readLinkerScript(); 52 void readVersionScript(); 53 void readDynamicList(); 54 55 private: 56 void addFile(StringRef Path); 57 58 void readAsNeeded(); 59 void readEntry(); 60 void readExtern(); 61 void readGroup(); 62 void readInclude(); 63 void readMemory(); 64 void readOutput(); 65 void readOutputArch(); 66 void readOutputFormat(); 67 void readPhdrs(); 68 void readSearchDir(); 69 void readSections(); 70 void readVersion(); 71 void readVersionScriptCommand(); 72 73 SymbolAssignment *readAssignment(StringRef Name); 74 BytesDataCommand *readBytesDataCommand(StringRef Tok); 75 uint32_t readFill(); 76 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 77 uint32_t readOutputSectionFiller(StringRef Tok); 78 std::vector<StringRef> readOutputSectionPhdrs(); 79 InputSectionDescription *readInputSectionDescription(StringRef Tok); 80 StringMatcher readFilePatterns(); 81 std::vector<SectionPattern> readInputSectionsList(); 82 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 83 unsigned readPhdrType(); 84 SortSectionPolicy readSortKind(); 85 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 86 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 87 void readSort(); 88 AssertCommand *readAssert(); 89 Expr readAssertExpr(); 90 91 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 92 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 93 94 Expr readExpr(); 95 Expr readExpr1(Expr Lhs, int MinPrec); 96 StringRef readParenLiteral(); 97 Expr readPrimary(); 98 Expr readTernary(Expr Cond); 99 Expr readParenExpr(); 100 101 // For parsing version script. 102 std::vector<SymbolVersion> readVersionExtern(); 103 void readAnonymousDeclaration(); 104 void readVersionDeclaration(StringRef VerStr); 105 106 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 107 readSymbols(); 108 109 bool IsUnderSysroot; 110 }; 111 } // namespace 112 113 static bool isUnderSysroot(StringRef Path) { 114 if (Config->Sysroot == "") 115 return false; 116 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 117 if (sys::fs::equivalent(Config->Sysroot, Path)) 118 return true; 119 return false; 120 } 121 122 // Some operations only support one non absolute value. Move the 123 // absolute one to the right hand side for convenience. 124 static void moveAbsRight(ExprValue &A, ExprValue &B) { 125 if (A.isAbsolute()) 126 std::swap(A, B); 127 if (!B.isAbsolute()) 128 error("At least one side of the expression must be absolute"); 129 } 130 131 static ExprValue add(ExprValue A, ExprValue B) { 132 moveAbsRight(A, B); 133 return {A.Sec, A.ForceAbsolute, A.Val + B.getValue()}; 134 } 135 136 static ExprValue sub(ExprValue A, ExprValue B) { 137 return {A.Sec, A.Val - B.getValue()}; 138 } 139 140 static ExprValue mul(ExprValue A, ExprValue B) { 141 return A.getValue() * B.getValue(); 142 } 143 144 static ExprValue div(ExprValue A, ExprValue B) { 145 if (uint64_t BV = B.getValue()) 146 return A.getValue() / BV; 147 error("division by zero"); 148 return 0; 149 } 150 151 static ExprValue bitAnd(ExprValue A, ExprValue B) { 152 moveAbsRight(A, B); 153 return {A.Sec, A.ForceAbsolute, 154 (A.getValue() & B.getValue()) - A.getSecAddr()}; 155 } 156 157 static ExprValue bitOr(ExprValue A, ExprValue B) { 158 moveAbsRight(A, B); 159 return {A.Sec, A.ForceAbsolute, 160 (A.getValue() | B.getValue()) - A.getSecAddr()}; 161 } 162 163 void ScriptParser::readDynamicList() { 164 expect("{"); 165 readAnonymousDeclaration(); 166 if (!atEOF()) 167 setError("EOF expected, but got " + next()); 168 } 169 170 void ScriptParser::readVersionScript() { 171 readVersionScriptCommand(); 172 if (!atEOF()) 173 setError("EOF expected, but got " + next()); 174 } 175 176 void ScriptParser::readVersionScriptCommand() { 177 if (consume("{")) { 178 readAnonymousDeclaration(); 179 return; 180 } 181 182 while (!atEOF() && !Error && peek() != "}") { 183 StringRef VerStr = next(); 184 if (VerStr == "{") { 185 setError("anonymous version definition is used in " 186 "combination with other version definitions"); 187 return; 188 } 189 expect("{"); 190 readVersionDeclaration(VerStr); 191 } 192 } 193 194 void ScriptParser::readVersion() { 195 expect("{"); 196 readVersionScriptCommand(); 197 expect("}"); 198 } 199 200 void ScriptParser::readLinkerScript() { 201 while (!atEOF()) { 202 StringRef Tok = next(); 203 if (Tok == ";") 204 continue; 205 206 if (Tok == "ASSERT") { 207 Script->Opt.Commands.push_back(readAssert()); 208 } else if (Tok == "ENTRY") { 209 readEntry(); 210 } else if (Tok == "EXTERN") { 211 readExtern(); 212 } else if (Tok == "GROUP" || Tok == "INPUT") { 213 readGroup(); 214 } else if (Tok == "INCLUDE") { 215 readInclude(); 216 } else if (Tok == "MEMORY") { 217 readMemory(); 218 } else if (Tok == "OUTPUT") { 219 readOutput(); 220 } else if (Tok == "OUTPUT_ARCH") { 221 readOutputArch(); 222 } else if (Tok == "OUTPUT_FORMAT") { 223 readOutputFormat(); 224 } else if (Tok == "PHDRS") { 225 readPhdrs(); 226 } else if (Tok == "SEARCH_DIR") { 227 readSearchDir(); 228 } else if (Tok == "SECTIONS") { 229 readSections(); 230 } else if (Tok == "VERSION") { 231 readVersion(); 232 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 233 Script->Opt.Commands.push_back(Cmd); 234 } else { 235 setError("unknown directive: " + Tok); 236 } 237 } 238 } 239 240 void ScriptParser::addFile(StringRef S) { 241 if (IsUnderSysroot && S.startswith("/")) { 242 SmallString<128> PathData; 243 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 244 if (sys::fs::exists(Path)) { 245 Driver->addFile(Saver.save(Path)); 246 return; 247 } 248 } 249 250 if (sys::path::is_absolute(S)) { 251 Driver->addFile(S); 252 } else if (S.startswith("=")) { 253 if (Config->Sysroot.empty()) 254 Driver->addFile(S.substr(1)); 255 else 256 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); 257 } else if (S.startswith("-l")) { 258 Driver->addLibrary(S.substr(2)); 259 } else if (sys::fs::exists(S)) { 260 Driver->addFile(S); 261 } else { 262 if (Optional<std::string> Path = findFromSearchPaths(S)) 263 Driver->addFile(Saver.save(*Path)); 264 else 265 setError("unable to find " + S); 266 } 267 } 268 269 void ScriptParser::readAsNeeded() { 270 expect("("); 271 bool Orig = Config->AsNeeded; 272 Config->AsNeeded = true; 273 while (!Error && !consume(")")) 274 addFile(unquote(next())); 275 Config->AsNeeded = Orig; 276 } 277 278 void ScriptParser::readEntry() { 279 // -e <symbol> takes predecence over ENTRY(<symbol>). 280 expect("("); 281 StringRef Tok = next(); 282 if (Config->Entry.empty()) 283 Config->Entry = Tok; 284 expect(")"); 285 } 286 287 void ScriptParser::readExtern() { 288 expect("("); 289 while (!Error && !consume(")")) 290 Config->Undefined.push_back(next()); 291 } 292 293 void ScriptParser::readGroup() { 294 expect("("); 295 while (!Error && !consume(")")) { 296 if (consume("AS_NEEDED")) 297 readAsNeeded(); 298 else 299 addFile(unquote(next())); 300 } 301 } 302 303 void ScriptParser::readInclude() { 304 StringRef Tok = unquote(next()); 305 306 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 307 // The file will be searched for in the current directory, and in any 308 // directory specified with the -L option. 309 if (sys::fs::exists(Tok)) { 310 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 311 tokenize(*MB); 312 return; 313 } 314 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 315 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 316 tokenize(*MB); 317 return; 318 } 319 setError("cannot open " + Tok); 320 } 321 322 void ScriptParser::readOutput() { 323 // -o <file> takes predecence over OUTPUT(<file>). 324 expect("("); 325 StringRef Tok = next(); 326 if (Config->OutputFile.empty()) 327 Config->OutputFile = unquote(Tok); 328 expect(")"); 329 } 330 331 void ScriptParser::readOutputArch() { 332 // OUTPUT_ARCH is ignored for now. 333 expect("("); 334 while (!Error && !consume(")")) 335 skip(); 336 } 337 338 void ScriptParser::readOutputFormat() { 339 // Error checking only for now. 340 expect("("); 341 skip(); 342 if (consume(")")) 343 return; 344 expect(","); 345 skip(); 346 expect(","); 347 skip(); 348 expect(")"); 349 } 350 351 void ScriptParser::readPhdrs() { 352 expect("{"); 353 while (!Error && !consume("}")) { 354 Script->Opt.PhdrsCommands.push_back( 355 {next(), PT_NULL, false, false, UINT_MAX, nullptr}); 356 357 PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back(); 358 PhdrCmd.Type = readPhdrType(); 359 360 while (!Error && !consume(";")) { 361 if (consume("FILEHDR")) 362 PhdrCmd.HasFilehdr = true; 363 else if (consume("PHDRS")) 364 PhdrCmd.HasPhdrs = true; 365 else if (consume("AT")) 366 PhdrCmd.LMAExpr = readParenExpr(); 367 else if (consume("FLAGS")) 368 PhdrCmd.Flags = readParenExpr()().getValue(); 369 else 370 setError("unexpected header attribute: " + next()); 371 } 372 } 373 } 374 375 void ScriptParser::readSearchDir() { 376 expect("("); 377 StringRef Tok = next(); 378 if (!Config->Nostdlib) 379 Config->SearchPaths.push_back(unquote(Tok)); 380 expect(")"); 381 } 382 383 void ScriptParser::readSections() { 384 Script->Opt.HasSections = true; 385 386 // -no-rosegment is used to avoid placing read only non-executable sections in 387 // their own segment. We do the same if SECTIONS command is present in linker 388 // script. See comment for computeFlags(). 389 Config->SingleRoRx = true; 390 391 expect("{"); 392 while (!Error && !consume("}")) { 393 StringRef Tok = next(); 394 BaseCommand *Cmd = readProvideOrAssignment(Tok); 395 if (!Cmd) { 396 if (Tok == "ASSERT") 397 Cmd = readAssert(); 398 else 399 Cmd = readOutputSectionDescription(Tok); 400 } 401 Script->Opt.Commands.push_back(Cmd); 402 } 403 } 404 405 static int precedence(StringRef Op) { 406 return StringSwitch<int>(Op) 407 .Cases("*", "/", 5) 408 .Cases("+", "-", 4) 409 .Cases("<<", ">>", 3) 410 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 411 .Cases("&", "|", 1) 412 .Default(-1); 413 } 414 415 StringMatcher ScriptParser::readFilePatterns() { 416 std::vector<StringRef> V; 417 while (!Error && !consume(")")) 418 V.push_back(next()); 419 return StringMatcher(V); 420 } 421 422 SortSectionPolicy ScriptParser::readSortKind() { 423 if (consume("SORT") || consume("SORT_BY_NAME")) 424 return SortSectionPolicy::Name; 425 if (consume("SORT_BY_ALIGNMENT")) 426 return SortSectionPolicy::Alignment; 427 if (consume("SORT_BY_INIT_PRIORITY")) 428 return SortSectionPolicy::Priority; 429 if (consume("SORT_NONE")) 430 return SortSectionPolicy::None; 431 return SortSectionPolicy::Default; 432 } 433 434 // Reads SECTIONS command contents in the following form: 435 // 436 // <contents> ::= <elem>* 437 // <elem> ::= <exclude>? <glob-pattern> 438 // <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")" 439 // 440 // For example, 441 // 442 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz) 443 // 444 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". 445 // The semantics of that is section .foo in any file, section .bar in 446 // any file but a.o, and section .baz in any file but b.o. 447 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 448 std::vector<SectionPattern> Ret; 449 while (!Error && peek() != ")") { 450 StringMatcher ExcludeFilePat; 451 if (consume("EXCLUDE_FILE")) { 452 expect("("); 453 ExcludeFilePat = readFilePatterns(); 454 } 455 456 std::vector<StringRef> V; 457 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 458 V.push_back(next()); 459 460 if (!V.empty()) 461 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 462 else 463 setError("section pattern is expected"); 464 } 465 return Ret; 466 } 467 468 // Reads contents of "SECTIONS" directive. That directive contains a 469 // list of glob patterns for input sections. The grammar is as follows. 470 // 471 // <patterns> ::= <section-list> 472 // | <sort> "(" <section-list> ")" 473 // | <sort> "(" <sort> "(" <section-list> ")" ")" 474 // 475 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 476 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 477 // 478 // <section-list> is parsed by readInputSectionsList(). 479 InputSectionDescription * 480 ScriptParser::readInputSectionRules(StringRef FilePattern) { 481 auto *Cmd = make<InputSectionDescription>(FilePattern); 482 expect("("); 483 484 while (!Error && !consume(")")) { 485 SortSectionPolicy Outer = readSortKind(); 486 SortSectionPolicy Inner = SortSectionPolicy::Default; 487 std::vector<SectionPattern> V; 488 if (Outer != SortSectionPolicy::Default) { 489 expect("("); 490 Inner = readSortKind(); 491 if (Inner != SortSectionPolicy::Default) { 492 expect("("); 493 V = readInputSectionsList(); 494 expect(")"); 495 } else { 496 V = readInputSectionsList(); 497 } 498 expect(")"); 499 } else { 500 V = readInputSectionsList(); 501 } 502 503 for (SectionPattern &Pat : V) { 504 Pat.SortInner = Inner; 505 Pat.SortOuter = Outer; 506 } 507 508 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 509 } 510 return Cmd; 511 } 512 513 InputSectionDescription * 514 ScriptParser::readInputSectionDescription(StringRef Tok) { 515 // Input section wildcard can be surrounded by KEEP. 516 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 517 if (Tok == "KEEP") { 518 expect("("); 519 StringRef FilePattern = next(); 520 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 521 expect(")"); 522 Script->Opt.KeptSections.push_back(Cmd); 523 return Cmd; 524 } 525 return readInputSectionRules(Tok); 526 } 527 528 void ScriptParser::readSort() { 529 expect("("); 530 expect("CONSTRUCTORS"); 531 expect(")"); 532 } 533 534 AssertCommand *ScriptParser::readAssert() { 535 return make<AssertCommand>(readAssertExpr()); 536 } 537 538 Expr ScriptParser::readAssertExpr() { 539 expect("("); 540 Expr E = readExpr(); 541 expect(","); 542 StringRef Msg = unquote(next()); 543 expect(")"); 544 545 return [=] { 546 if (!E().getValue()) 547 error(Msg); 548 return Script->getDot(); 549 }; 550 } 551 552 // Reads a FILL(expr) command. We handle the FILL command as an 553 // alias for =fillexp section attribute, which is different from 554 // what GNU linkers do. 555 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 556 uint32_t ScriptParser::readFill() { 557 expect("("); 558 uint32_t V = readOutputSectionFiller(next()); 559 expect(")"); 560 expect(";"); 561 return V; 562 } 563 564 OutputSectionCommand * 565 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 566 OutputSectionCommand *Cmd = make<OutputSectionCommand>(OutSec); 567 Cmd->Location = getCurrentLocation(); 568 569 // Read an address expression. 570 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html 571 if (peek() != ":") 572 Cmd->AddrExpr = readExpr(); 573 574 expect(":"); 575 576 if (consume("AT")) 577 Cmd->LMAExpr = readParenExpr(); 578 if (consume("ALIGN")) 579 Cmd->AlignExpr = readParenExpr(); 580 if (consume("SUBALIGN")) 581 Cmd->SubalignExpr = readParenExpr(); 582 583 // Parse constraints. 584 if (consume("ONLY_IF_RO")) 585 Cmd->Constraint = ConstraintKind::ReadOnly; 586 if (consume("ONLY_IF_RW")) 587 Cmd->Constraint = ConstraintKind::ReadWrite; 588 expect("{"); 589 590 while (!Error && !consume("}")) { 591 StringRef Tok = next(); 592 if (Tok == ";") { 593 // Empty commands are allowed. Do nothing here. 594 } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) { 595 Cmd->Commands.push_back(Assign); 596 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 597 Cmd->Commands.push_back(Data); 598 } else if (Tok == "ASSERT") { 599 Cmd->Commands.push_back(readAssert()); 600 expect(";"); 601 } else if (Tok == "CONSTRUCTORS") { 602 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 603 // by name. This is for very old file formats such as ECOFF/XCOFF. 604 // For ELF, we should ignore. 605 } else if (Tok == "FILL") { 606 Cmd->Filler = readFill(); 607 } else if (Tok == "SORT") { 608 readSort(); 609 } else if (peek() == "(") { 610 Cmd->Commands.push_back(readInputSectionDescription(Tok)); 611 } else { 612 setError("unknown command " + Tok); 613 } 614 } 615 616 if (consume(">")) 617 Cmd->MemoryRegionName = next(); 618 619 Cmd->Phdrs = readOutputSectionPhdrs(); 620 621 if (consume("=")) 622 Cmd->Filler = readOutputSectionFiller(next()); 623 else if (peek().startswith("=")) 624 Cmd->Filler = readOutputSectionFiller(next().drop_front()); 625 626 // Consume optional comma following output section command. 627 consume(","); 628 629 return Cmd; 630 } 631 632 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number. 633 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 634 // 635 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles 636 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them 637 // as 32-bit big-endian values. We will do the same as ld.gold does 638 // because it's simpler than what ld.bfd does. 639 uint32_t ScriptParser::readOutputSectionFiller(StringRef Tok) { 640 uint32_t V; 641 if (!Tok.getAsInteger(0, V)) 642 return V; 643 setError("invalid filler expression: " + Tok); 644 return 0; 645 } 646 647 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 648 expect("("); 649 SymbolAssignment *Cmd = readAssignment(next()); 650 Cmd->Provide = Provide; 651 Cmd->Hidden = Hidden; 652 expect(")"); 653 expect(";"); 654 return Cmd; 655 } 656 657 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 658 SymbolAssignment *Cmd = nullptr; 659 if (peek() == "=" || peek() == "+=") { 660 Cmd = readAssignment(Tok); 661 expect(";"); 662 } else if (Tok == "PROVIDE") { 663 Cmd = readProvideHidden(true, false); 664 } else if (Tok == "HIDDEN") { 665 Cmd = readProvideHidden(false, true); 666 } else if (Tok == "PROVIDE_HIDDEN") { 667 Cmd = readProvideHidden(true, true); 668 } 669 return Cmd; 670 } 671 672 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 673 StringRef Op = next(); 674 assert(Op == "=" || Op == "+="); 675 Expr E = readExpr(); 676 if (Op == "+=") { 677 std::string Loc = getCurrentLocation(); 678 E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); }; 679 } 680 return make<SymbolAssignment>(Name, E, getCurrentLocation()); 681 } 682 683 // This is an operator-precedence parser to parse a linker 684 // script expression. 685 Expr ScriptParser::readExpr() { 686 // Our lexer is context-aware. Set the in-expression bit so that 687 // they apply different tokenization rules. 688 bool Orig = InExpr; 689 InExpr = true; 690 Expr E = readExpr1(readPrimary(), 0); 691 InExpr = Orig; 692 return E; 693 } 694 695 static Expr combine(StringRef Op, Expr L, Expr R) { 696 if (Op == "+") 697 return [=] { return add(L(), R()); }; 698 if (Op == "-") 699 return [=] { return sub(L(), R()); }; 700 if (Op == "*") 701 return [=] { return mul(L(), R()); }; 702 if (Op == "/") 703 return [=] { return div(L(), R()); }; 704 if (Op == "<<") 705 return [=] { return L().getValue() << R().getValue(); }; 706 if (Op == ">>") 707 return [=] { return L().getValue() >> R().getValue(); }; 708 if (Op == "<") 709 return [=] { return L().getValue() < R().getValue(); }; 710 if (Op == ">") 711 return [=] { return L().getValue() > R().getValue(); }; 712 if (Op == ">=") 713 return [=] { return L().getValue() >= R().getValue(); }; 714 if (Op == "<=") 715 return [=] { return L().getValue() <= R().getValue(); }; 716 if (Op == "==") 717 return [=] { return L().getValue() == R().getValue(); }; 718 if (Op == "!=") 719 return [=] { return L().getValue() != R().getValue(); }; 720 if (Op == "&") 721 return [=] { return bitAnd(L(), R()); }; 722 if (Op == "|") 723 return [=] { return bitOr(L(), R()); }; 724 llvm_unreachable("invalid operator"); 725 } 726 727 // This is a part of the operator-precedence parser. This function 728 // assumes that the remaining token stream starts with an operator. 729 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 730 while (!atEOF() && !Error) { 731 // Read an operator and an expression. 732 if (consume("?")) 733 return readTernary(Lhs); 734 StringRef Op1 = peek(); 735 if (precedence(Op1) < MinPrec) 736 break; 737 skip(); 738 Expr Rhs = readPrimary(); 739 740 // Evaluate the remaining part of the expression first if the 741 // next operator has greater precedence than the previous one. 742 // For example, if we have read "+" and "3", and if the next 743 // operator is "*", then we'll evaluate 3 * ... part first. 744 while (!atEOF()) { 745 StringRef Op2 = peek(); 746 if (precedence(Op2) <= precedence(Op1)) 747 break; 748 Rhs = readExpr1(Rhs, precedence(Op2)); 749 } 750 751 Lhs = combine(Op1, Lhs, Rhs); 752 } 753 return Lhs; 754 } 755 756 uint64_t static getConstant(StringRef S) { 757 if (S == "COMMONPAGESIZE") 758 return Target->PageSize; 759 if (S == "MAXPAGESIZE") 760 return Config->MaxPageSize; 761 error("unknown constant: " + S); 762 return 0; 763 } 764 765 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with 766 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may 767 // have "K" (Ki) or "M" (Mi) suffixes. 768 static Optional<uint64_t> parseInt(StringRef Tok) { 769 // Negative number 770 if (Tok.startswith("-")) { 771 if (Optional<uint64_t> Val = parseInt(Tok.substr(1))) 772 return -*Val; 773 return None; 774 } 775 776 // Hexadecimal 777 uint64_t Val; 778 if (Tok.startswith_lower("0x") && !Tok.substr(2).getAsInteger(16, Val)) 779 return Val; 780 if (Tok.endswith_lower("H") && !Tok.drop_back().getAsInteger(16, Val)) 781 return Val; 782 783 // Decimal 784 if (Tok.endswith_lower("K")) { 785 if (Tok.drop_back().getAsInteger(10, Val)) 786 return None; 787 return Val * 1024; 788 } 789 if (Tok.endswith_lower("M")) { 790 if (Tok.drop_back().getAsInteger(10, Val)) 791 return None; 792 return Val * 1024 * 1024; 793 } 794 if (Tok.getAsInteger(10, Val)) 795 return None; 796 return Val; 797 } 798 799 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 800 int Size = StringSwitch<int>(Tok) 801 .Case("BYTE", 1) 802 .Case("SHORT", 2) 803 .Case("LONG", 4) 804 .Case("QUAD", 8) 805 .Default(-1); 806 if (Size == -1) 807 return nullptr; 808 809 return make<BytesDataCommand>(readParenExpr(), Size); 810 } 811 812 StringRef ScriptParser::readParenLiteral() { 813 expect("("); 814 StringRef Tok = next(); 815 expect(")"); 816 return Tok; 817 } 818 819 Expr ScriptParser::readPrimary() { 820 if (peek() == "(") 821 return readParenExpr(); 822 823 if (consume("~")) { 824 Expr E = readPrimary(); 825 return [=] { return ~E().getValue(); }; 826 } 827 if (consume("-")) { 828 Expr E = readPrimary(); 829 return [=] { return -E().getValue(); }; 830 } 831 832 StringRef Tok = next(); 833 std::string Location = getCurrentLocation(); 834 835 // Built-in functions are parsed here. 836 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 837 if (Tok == "ABSOLUTE") { 838 Expr Inner = readParenExpr(); 839 return [=] { 840 ExprValue I = Inner(); 841 I.ForceAbsolute = true; 842 return I; 843 }; 844 } 845 if (Tok == "ADDR") { 846 StringRef Name = readParenLiteral(); 847 return [=]() -> ExprValue { 848 return {Script->getOutputSection(Location, Name), 0}; 849 }; 850 } 851 if (Tok == "ALIGN") { 852 expect("("); 853 Expr E = readExpr(); 854 if (consume(")")) 855 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 856 expect(","); 857 Expr E2 = readExpr(); 858 expect(")"); 859 return [=] { return alignTo(E().getValue(), E2().getValue()); }; 860 } 861 if (Tok == "ALIGNOF") { 862 StringRef Name = readParenLiteral(); 863 return [=] { return Script->getOutputSection(Location, Name)->Alignment; }; 864 } 865 if (Tok == "ASSERT") 866 return readAssertExpr(); 867 if (Tok == "CONSTANT") { 868 StringRef Name = readParenLiteral(); 869 return [=] { return getConstant(Name); }; 870 } 871 if (Tok == "DATA_SEGMENT_ALIGN") { 872 expect("("); 873 Expr E = readExpr(); 874 expect(","); 875 readExpr(); 876 expect(")"); 877 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 878 } 879 if (Tok == "DATA_SEGMENT_END") { 880 expect("("); 881 expect("."); 882 expect(")"); 883 return [] { return Script->getDot(); }; 884 } 885 if (Tok == "DATA_SEGMENT_RELRO_END") { 886 // GNU linkers implements more complicated logic to handle 887 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and 888 // just align to the next page boundary for simplicity. 889 expect("("); 890 readExpr(); 891 expect(","); 892 readExpr(); 893 expect(")"); 894 return [] { return alignTo(Script->getDot(), Target->PageSize); }; 895 } 896 if (Tok == "DEFINED") { 897 StringRef Name = readParenLiteral(); 898 return [=] { return Script->isDefined(Name) ? 1 : 0; }; 899 } 900 if (Tok == "LOADADDR") { 901 StringRef Name = readParenLiteral(); 902 return [=] { return Script->getOutputSection(Location, Name)->getLMA(); }; 903 } 904 if (Tok == "SEGMENT_START") { 905 expect("("); 906 skip(); 907 expect(","); 908 Expr E = readExpr(); 909 expect(")"); 910 return [=] { return E(); }; 911 } 912 if (Tok == "SIZEOF") { 913 StringRef Name = readParenLiteral(); 914 return [=] { return Script->getOutputSectionSize(Name); }; 915 } 916 if (Tok == "SIZEOF_HEADERS") 917 return [=] { return elf::getHeaderSize(); }; 918 919 // Tok is the dot. 920 if (Tok == ".") 921 return [=] { return Script->getSymbolValue(Location, Tok); }; 922 923 // Tok is a literal number. 924 if (Optional<uint64_t> Val = parseInt(Tok)) 925 return [=] { return *Val; }; 926 927 // Tok is a symbol name. 928 if (!isValidCIdentifier(Tok)) 929 setError("malformed number: " + Tok); 930 Script->Opt.ReferencedSymbols.push_back(Tok); 931 return [=] { return Script->getSymbolValue(Location, Tok); }; 932 } 933 934 Expr ScriptParser::readTernary(Expr Cond) { 935 Expr L = readExpr(); 936 expect(":"); 937 Expr R = readExpr(); 938 return [=] { return Cond().getValue() ? L() : R(); }; 939 } 940 941 Expr ScriptParser::readParenExpr() { 942 expect("("); 943 Expr E = readExpr(); 944 expect(")"); 945 return E; 946 } 947 948 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 949 std::vector<StringRef> Phdrs; 950 while (!Error && peek().startswith(":")) { 951 StringRef Tok = next(); 952 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 953 } 954 return Phdrs; 955 } 956 957 // Read a program header type name. The next token must be a 958 // name of a program header type or a constant (e.g. "0x3"). 959 unsigned ScriptParser::readPhdrType() { 960 StringRef Tok = next(); 961 if (Optional<uint64_t> Val = parseInt(Tok)) 962 return *Val; 963 964 unsigned Ret = StringSwitch<unsigned>(Tok) 965 .Case("PT_NULL", PT_NULL) 966 .Case("PT_LOAD", PT_LOAD) 967 .Case("PT_DYNAMIC", PT_DYNAMIC) 968 .Case("PT_INTERP", PT_INTERP) 969 .Case("PT_NOTE", PT_NOTE) 970 .Case("PT_SHLIB", PT_SHLIB) 971 .Case("PT_PHDR", PT_PHDR) 972 .Case("PT_TLS", PT_TLS) 973 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 974 .Case("PT_GNU_STACK", PT_GNU_STACK) 975 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 976 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 977 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 978 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 979 .Default(-1); 980 981 if (Ret == (unsigned)-1) { 982 setError("invalid program header type: " + Tok); 983 return PT_NULL; 984 } 985 return Ret; 986 } 987 988 // Reads an anonymous version declaration. 989 void ScriptParser::readAnonymousDeclaration() { 990 std::vector<SymbolVersion> Locals; 991 std::vector<SymbolVersion> Globals; 992 std::tie(Locals, Globals) = readSymbols(); 993 994 for (SymbolVersion V : Locals) { 995 if (V.Name == "*") 996 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 997 else 998 Config->VersionScriptLocals.push_back(V); 999 } 1000 1001 for (SymbolVersion V : Globals) 1002 Config->VersionScriptGlobals.push_back(V); 1003 1004 expect(";"); 1005 } 1006 1007 // Reads a non-anonymous version definition, 1008 // e.g. "VerStr { global: foo; bar; local: *; };". 1009 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1010 // Read a symbol list. 1011 std::vector<SymbolVersion> Locals; 1012 std::vector<SymbolVersion> Globals; 1013 std::tie(Locals, Globals) = readSymbols(); 1014 1015 for (SymbolVersion V : Locals) { 1016 if (V.Name == "*") 1017 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1018 else 1019 Config->VersionScriptLocals.push_back(V); 1020 } 1021 1022 // Create a new version definition and add that to the global symbols. 1023 VersionDefinition Ver; 1024 Ver.Name = VerStr; 1025 Ver.Globals = Globals; 1026 1027 // User-defined version number starts from 2 because 0 and 1 are 1028 // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. 1029 Ver.Id = Config->VersionDefinitions.size() + 2; 1030 Config->VersionDefinitions.push_back(Ver); 1031 1032 // Each version may have a parent version. For example, "Ver2" 1033 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1034 // as a parent. This version hierarchy is, probably against your 1035 // instinct, purely for hint; the runtime doesn't care about it 1036 // at all. In LLD, we simply ignore it. 1037 if (peek() != ";") 1038 skip(); 1039 expect(";"); 1040 } 1041 1042 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1043 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1044 ScriptParser::readSymbols() { 1045 std::vector<SymbolVersion> Locals; 1046 std::vector<SymbolVersion> Globals; 1047 std::vector<SymbolVersion> *V = &Globals; 1048 1049 while (!Error) { 1050 if (consume("}")) 1051 break; 1052 if (consumeLabel("local")) { 1053 V = &Locals; 1054 continue; 1055 } 1056 if (consumeLabel("global")) { 1057 V = &Globals; 1058 continue; 1059 } 1060 1061 if (consume("extern")) { 1062 std::vector<SymbolVersion> Ext = readVersionExtern(); 1063 V->insert(V->end(), Ext.begin(), Ext.end()); 1064 } else { 1065 StringRef Tok = next(); 1066 V->push_back({unquote(Tok), false, hasWildcard(Tok)}); 1067 } 1068 expect(";"); 1069 } 1070 return {Locals, Globals}; 1071 } 1072 1073 // Reads an "extern C++" directive, e.g., 1074 // "extern "C++" { ns::*; "f(int, double)"; };" 1075 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 1076 StringRef Tok = next(); 1077 bool IsCXX = Tok == "\"C++\""; 1078 if (!IsCXX && Tok != "\"C\"") 1079 setError("Unknown language"); 1080 expect("{"); 1081 1082 std::vector<SymbolVersion> Ret; 1083 while (!Error && peek() != "}") { 1084 StringRef Tok = next(); 1085 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 1086 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 1087 expect(";"); 1088 } 1089 1090 expect("}"); 1091 return Ret; 1092 } 1093 1094 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2, 1095 StringRef S3) { 1096 if (!consume(S1) && !consume(S2) && !consume(S3)) { 1097 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 1098 return 0; 1099 } 1100 expect("="); 1101 1102 // TODO: Fully support constant expressions. 1103 if (Optional<uint64_t> Val = parseInt(next())) 1104 return *Val; 1105 setError("nonconstant expression for " + S1); 1106 return 0; 1107 } 1108 1109 // Parse the MEMORY command as specified in: 1110 // https://sourceware.org/binutils/docs/ld/MEMORY.html 1111 // 1112 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 1113 void ScriptParser::readMemory() { 1114 expect("{"); 1115 while (!Error && !consume("}")) { 1116 StringRef Name = next(); 1117 1118 uint32_t Flags = 0; 1119 uint32_t NegFlags = 0; 1120 if (consume("(")) { 1121 std::tie(Flags, NegFlags) = readMemoryAttributes(); 1122 expect(")"); 1123 } 1124 expect(":"); 1125 1126 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 1127 expect(","); 1128 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 1129 1130 // Add the memory region to the region map (if it doesn't already exist). 1131 auto It = Script->Opt.MemoryRegions.find(Name); 1132 if (It != Script->Opt.MemoryRegions.end()) 1133 setError("region '" + Name + "' already defined"); 1134 else 1135 Script->Opt.MemoryRegions[Name] = {Name, Origin, Length, 1136 Origin, Flags, NegFlags}; 1137 } 1138 } 1139 1140 // This function parses the attributes used to match against section 1141 // flags when placing output sections in a memory region. These flags 1142 // are only used when an explicit memory region name is not used. 1143 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 1144 uint32_t Flags = 0; 1145 uint32_t NegFlags = 0; 1146 bool Invert = false; 1147 1148 for (char C : next().lower()) { 1149 uint32_t Flag = 0; 1150 if (C == '!') 1151 Invert = !Invert; 1152 else if (C == 'w') 1153 Flag = SHF_WRITE; 1154 else if (C == 'x') 1155 Flag = SHF_EXECINSTR; 1156 else if (C == 'a') 1157 Flag = SHF_ALLOC; 1158 else if (C != 'r') 1159 setError("invalid memory region attribute"); 1160 1161 if (Invert) 1162 NegFlags |= Flag; 1163 else 1164 Flags |= Flag; 1165 } 1166 return {Flags, NegFlags}; 1167 } 1168 1169 void elf::readLinkerScript(MemoryBufferRef MB) { 1170 ScriptParser(MB).readLinkerScript(); 1171 } 1172 1173 void elf::readVersionScript(MemoryBufferRef MB) { 1174 ScriptParser(MB).readVersionScript(); 1175 } 1176 1177 void elf::readDynamicList(MemoryBufferRef MB) { 1178 ScriptParser(MB).readDynamicList(); 1179 } 1180