1 //===- ScriptParser.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a recursive-descendent parser for linker scripts. 11 // Parsed results are stored to Config and Script global objects. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ScriptParser.h" 16 #include "Config.h" 17 #include "Driver.h" 18 #include "InputSection.h" 19 #include "LinkerScript.h" 20 #include "Memory.h" 21 #include "OutputSections.h" 22 #include "ScriptLexer.h" 23 #include "Symbols.h" 24 #include "Target.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/StringSet.h" 28 #include "llvm/ADT/StringSwitch.h" 29 #include "llvm/BinaryFormat/ELF.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/FileSystem.h" 33 #include "llvm/Support/Path.h" 34 #include <cassert> 35 #include <limits> 36 #include <vector> 37 38 using namespace llvm; 39 using namespace llvm::ELF; 40 using namespace llvm::support::endian; 41 using namespace lld; 42 using namespace lld::elf; 43 44 static bool isUnderSysroot(StringRef Path); 45 46 namespace { 47 class ScriptParser final : ScriptLexer { 48 public: 49 ScriptParser(MemoryBufferRef MB) 50 : ScriptLexer(MB), 51 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 52 53 void readLinkerScript(); 54 void readVersionScript(); 55 void readDynamicList(); 56 57 private: 58 void addFile(StringRef Path); 59 OutputSection *checkSection(OutputSection *Cmd, StringRef Loccation); 60 61 void readAsNeeded(); 62 void readEntry(); 63 void readExtern(); 64 void readGroup(); 65 void readInclude(); 66 void readMemory(); 67 void readOutput(); 68 void readOutputArch(); 69 void readOutputFormat(); 70 void readPhdrs(); 71 void readSearchDir(); 72 void readSections(); 73 void readVersion(); 74 void readVersionScriptCommand(); 75 76 SymbolAssignment *readAssignment(StringRef Name); 77 BytesDataCommand *readBytesDataCommand(StringRef Tok); 78 uint32_t readFill(); 79 uint32_t parseFill(StringRef Tok); 80 void readSectionAddressType(OutputSection *Cmd); 81 OutputSection *readOutputSectionDescription(StringRef OutSec); 82 std::vector<StringRef> readOutputSectionPhdrs(); 83 InputSectionDescription *readInputSectionDescription(StringRef Tok); 84 StringMatcher readFilePatterns(); 85 std::vector<SectionPattern> readInputSectionsList(); 86 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 87 unsigned readPhdrType(); 88 SortSectionPolicy readSortKind(); 89 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 90 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 91 void readSort(); 92 AssertCommand *readAssert(); 93 Expr readAssertExpr(); 94 Expr readConstant(); 95 Expr getPageSize(); 96 97 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 98 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 99 100 Expr readExpr(); 101 Expr readExpr1(Expr Lhs, int MinPrec); 102 StringRef readParenLiteral(); 103 Expr readPrimary(); 104 Expr readTernary(Expr Cond); 105 Expr readParenExpr(); 106 107 // For parsing version script. 108 std::vector<SymbolVersion> readVersionExtern(); 109 void readAnonymousDeclaration(); 110 void readVersionDeclaration(StringRef VerStr); 111 112 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 113 readSymbols(); 114 115 // True if a script being read is in a subdirectory specified by -sysroot. 116 bool IsUnderSysroot; 117 118 // A set to detect an INCLUDE() cycle. 119 StringSet<> Seen; 120 }; 121 } // namespace 122 123 static StringRef unquote(StringRef S) { 124 if (S.startswith("\"")) 125 return S.substr(1, S.size() - 2); 126 return S; 127 } 128 129 static bool isUnderSysroot(StringRef Path) { 130 if (Config->Sysroot == "") 131 return false; 132 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 133 if (sys::fs::equivalent(Config->Sysroot, Path)) 134 return true; 135 return false; 136 } 137 138 // Some operations only support one non absolute value. Move the 139 // absolute one to the right hand side for convenience. 140 static void moveAbsRight(ExprValue &A, ExprValue &B) { 141 if (A.isAbsolute()) 142 std::swap(A, B); 143 if (!B.isAbsolute()) 144 error(A.Loc + ": at least one side of the expression must be absolute"); 145 } 146 147 static ExprValue add(ExprValue A, ExprValue B) { 148 moveAbsRight(A, B); 149 uint64_t Val = alignTo(A.Val, A.Alignment) + B.getValue(); 150 return {A.Sec, A.ForceAbsolute, Val, A.Loc}; 151 } 152 153 static ExprValue sub(ExprValue A, ExprValue B) { 154 uint64_t Val = alignTo(A.Val, A.Alignment) - B.getValue(); 155 return {A.Sec, Val, A.Loc}; 156 } 157 158 static ExprValue mul(ExprValue A, ExprValue B) { 159 return A.getValue() * B.getValue(); 160 } 161 162 static ExprValue div(ExprValue A, ExprValue B) { 163 if (uint64_t BV = B.getValue()) 164 return A.getValue() / BV; 165 error("division by zero"); 166 return 0; 167 } 168 169 static ExprValue bitAnd(ExprValue A, ExprValue B) { 170 moveAbsRight(A, B); 171 return {A.Sec, A.ForceAbsolute, 172 (A.getValue() & B.getValue()) - A.getSecAddr(), A.Loc}; 173 } 174 175 static ExprValue bitOr(ExprValue A, ExprValue B) { 176 moveAbsRight(A, B); 177 return {A.Sec, A.ForceAbsolute, 178 (A.getValue() | B.getValue()) - A.getSecAddr(), A.Loc}; 179 } 180 181 void ScriptParser::readDynamicList() { 182 expect("{"); 183 readAnonymousDeclaration(); 184 if (!atEOF()) 185 setError("EOF expected, but got " + next()); 186 } 187 188 void ScriptParser::readVersionScript() { 189 readVersionScriptCommand(); 190 if (!atEOF()) 191 setError("EOF expected, but got " + next()); 192 } 193 194 void ScriptParser::readVersionScriptCommand() { 195 if (consume("{")) { 196 readAnonymousDeclaration(); 197 return; 198 } 199 200 while (!atEOF() && !ErrorCount && peek() != "}") { 201 StringRef VerStr = next(); 202 if (VerStr == "{") { 203 setError("anonymous version definition is used in " 204 "combination with other version definitions"); 205 return; 206 } 207 expect("{"); 208 readVersionDeclaration(VerStr); 209 } 210 } 211 212 void ScriptParser::readVersion() { 213 expect("{"); 214 readVersionScriptCommand(); 215 expect("}"); 216 } 217 218 void ScriptParser::readLinkerScript() { 219 while (!atEOF()) { 220 StringRef Tok = next(); 221 if (Tok == ";") 222 continue; 223 224 if (Tok == "ASSERT") { 225 Script->Opt.Commands.push_back(readAssert()); 226 } else if (Tok == "ENTRY") { 227 readEntry(); 228 } else if (Tok == "EXTERN") { 229 readExtern(); 230 } else if (Tok == "GROUP" || Tok == "INPUT") { 231 readGroup(); 232 } else if (Tok == "INCLUDE") { 233 readInclude(); 234 } else if (Tok == "MEMORY") { 235 readMemory(); 236 } else if (Tok == "OUTPUT") { 237 readOutput(); 238 } else if (Tok == "OUTPUT_ARCH") { 239 readOutputArch(); 240 } else if (Tok == "OUTPUT_FORMAT") { 241 readOutputFormat(); 242 } else if (Tok == "PHDRS") { 243 readPhdrs(); 244 } else if (Tok == "SEARCH_DIR") { 245 readSearchDir(); 246 } else if (Tok == "SECTIONS") { 247 readSections(); 248 } else if (Tok == "VERSION") { 249 readVersion(); 250 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 251 Script->Opt.Commands.push_back(Cmd); 252 } else { 253 setError("unknown directive: " + Tok); 254 } 255 } 256 } 257 258 void ScriptParser::addFile(StringRef S) { 259 if (IsUnderSysroot && S.startswith("/")) { 260 SmallString<128> PathData; 261 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 262 if (sys::fs::exists(Path)) { 263 Driver->addFile(Saver.save(Path), /*WithLOption=*/false); 264 return; 265 } 266 } 267 268 if (S.startswith("/")) { 269 Driver->addFile(S, /*WithLOption=*/false); 270 } else if (S.startswith("=")) { 271 if (Config->Sysroot.empty()) 272 Driver->addFile(S.substr(1), /*WithLOption=*/false); 273 else 274 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)), 275 /*WithLOption=*/false); 276 } else if (S.startswith("-l")) { 277 Driver->addLibrary(S.substr(2)); 278 } else if (sys::fs::exists(S)) { 279 Driver->addFile(S, /*WithLOption=*/false); 280 } else { 281 if (Optional<std::string> Path = findFromSearchPaths(S)) 282 Driver->addFile(Saver.save(*Path), /*WithLOption=*/true); 283 else 284 setError("unable to find " + S); 285 } 286 } 287 288 void ScriptParser::readAsNeeded() { 289 expect("("); 290 bool Orig = Config->AsNeeded; 291 Config->AsNeeded = true; 292 while (!ErrorCount && !consume(")")) 293 addFile(unquote(next())); 294 Config->AsNeeded = Orig; 295 } 296 297 void ScriptParser::readEntry() { 298 // -e <symbol> takes predecence over ENTRY(<symbol>). 299 expect("("); 300 StringRef Tok = next(); 301 if (Config->Entry.empty()) 302 Config->Entry = Tok; 303 expect(")"); 304 } 305 306 void ScriptParser::readExtern() { 307 expect("("); 308 while (!ErrorCount && !consume(")")) 309 Config->Undefined.push_back(next()); 310 } 311 312 void ScriptParser::readGroup() { 313 expect("("); 314 while (!ErrorCount && !consume(")")) { 315 if (consume("AS_NEEDED")) 316 readAsNeeded(); 317 else 318 addFile(unquote(next())); 319 } 320 } 321 322 void ScriptParser::readInclude() { 323 StringRef Tok = unquote(next()); 324 325 if (!Seen.insert(Tok).second) { 326 setError("there is a cycle in linker script INCLUDEs"); 327 return; 328 } 329 330 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 331 // The file will be searched for in the current directory, and in any 332 // directory specified with the -L option. 333 if (sys::fs::exists(Tok)) { 334 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 335 tokenize(*MB); 336 return; 337 } 338 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 339 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 340 tokenize(*MB); 341 return; 342 } 343 setError("cannot open " + Tok); 344 } 345 346 void ScriptParser::readOutput() { 347 // -o <file> takes predecence over OUTPUT(<file>). 348 expect("("); 349 StringRef Tok = next(); 350 if (Config->OutputFile.empty()) 351 Config->OutputFile = unquote(Tok); 352 expect(")"); 353 } 354 355 void ScriptParser::readOutputArch() { 356 // OUTPUT_ARCH is ignored for now. 357 expect("("); 358 while (!ErrorCount && !consume(")")) 359 skip(); 360 } 361 362 void ScriptParser::readOutputFormat() { 363 // Error checking only for now. 364 expect("("); 365 skip(); 366 if (consume(")")) 367 return; 368 expect(","); 369 skip(); 370 expect(","); 371 skip(); 372 expect(")"); 373 } 374 375 void ScriptParser::readPhdrs() { 376 expect("{"); 377 while (!ErrorCount && !consume("}")) { 378 Script->Opt.PhdrsCommands.push_back( 379 {next(), PT_NULL, false, false, UINT_MAX, nullptr}); 380 381 PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back(); 382 PhdrCmd.Type = readPhdrType(); 383 384 while (!ErrorCount && !consume(";")) { 385 if (consume("FILEHDR")) 386 PhdrCmd.HasFilehdr = true; 387 else if (consume("PHDRS")) 388 PhdrCmd.HasPhdrs = true; 389 else if (consume("AT")) 390 PhdrCmd.LMAExpr = readParenExpr(); 391 else if (consume("FLAGS")) 392 PhdrCmd.Flags = readParenExpr()().getValue(); 393 else 394 setError("unexpected header attribute: " + next()); 395 } 396 } 397 } 398 399 void ScriptParser::readSearchDir() { 400 expect("("); 401 StringRef Tok = next(); 402 if (!Config->Nostdlib) 403 Config->SearchPaths.push_back(unquote(Tok)); 404 expect(")"); 405 } 406 407 void ScriptParser::readSections() { 408 Script->Opt.HasSections = true; 409 410 // -no-rosegment is used to avoid placing read only non-executable sections in 411 // their own segment. We do the same if SECTIONS command is present in linker 412 // script. See comment for computeFlags(). 413 Config->SingleRoRx = true; 414 415 expect("{"); 416 while (!ErrorCount && !consume("}")) { 417 StringRef Tok = next(); 418 BaseCommand *Cmd = readProvideOrAssignment(Tok); 419 if (!Cmd) { 420 if (Tok == "ASSERT") 421 Cmd = readAssert(); 422 else 423 Cmd = readOutputSectionDescription(Tok); 424 } 425 Script->Opt.Commands.push_back(Cmd); 426 } 427 } 428 429 static int precedence(StringRef Op) { 430 return StringSwitch<int>(Op) 431 .Cases("*", "/", 5) 432 .Cases("+", "-", 4) 433 .Cases("<<", ">>", 3) 434 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 435 .Cases("&", "|", 1) 436 .Default(-1); 437 } 438 439 StringMatcher ScriptParser::readFilePatterns() { 440 std::vector<StringRef> V; 441 while (!ErrorCount && !consume(")")) 442 V.push_back(next()); 443 return StringMatcher(V); 444 } 445 446 SortSectionPolicy ScriptParser::readSortKind() { 447 if (consume("SORT") || consume("SORT_BY_NAME")) 448 return SortSectionPolicy::Name; 449 if (consume("SORT_BY_ALIGNMENT")) 450 return SortSectionPolicy::Alignment; 451 if (consume("SORT_BY_INIT_PRIORITY")) 452 return SortSectionPolicy::Priority; 453 if (consume("SORT_NONE")) 454 return SortSectionPolicy::None; 455 return SortSectionPolicy::Default; 456 } 457 458 // Reads SECTIONS command contents in the following form: 459 // 460 // <contents> ::= <elem>* 461 // <elem> ::= <exclude>? <glob-pattern> 462 // <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")" 463 // 464 // For example, 465 // 466 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz) 467 // 468 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". 469 // The semantics of that is section .foo in any file, section .bar in 470 // any file but a.o, and section .baz in any file but b.o. 471 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 472 std::vector<SectionPattern> Ret; 473 while (!ErrorCount && peek() != ")") { 474 StringMatcher ExcludeFilePat; 475 if (consume("EXCLUDE_FILE")) { 476 expect("("); 477 ExcludeFilePat = readFilePatterns(); 478 } 479 480 std::vector<StringRef> V; 481 while (!ErrorCount && peek() != ")" && peek() != "EXCLUDE_FILE") 482 V.push_back(next()); 483 484 if (!V.empty()) 485 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 486 else 487 setError("section pattern is expected"); 488 } 489 return Ret; 490 } 491 492 // Reads contents of "SECTIONS" directive. That directive contains a 493 // list of glob patterns for input sections. The grammar is as follows. 494 // 495 // <patterns> ::= <section-list> 496 // | <sort> "(" <section-list> ")" 497 // | <sort> "(" <sort> "(" <section-list> ")" ")" 498 // 499 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 500 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 501 // 502 // <section-list> is parsed by readInputSectionsList(). 503 InputSectionDescription * 504 ScriptParser::readInputSectionRules(StringRef FilePattern) { 505 auto *Cmd = make<InputSectionDescription>(FilePattern); 506 expect("("); 507 508 while (!ErrorCount && !consume(")")) { 509 SortSectionPolicy Outer = readSortKind(); 510 SortSectionPolicy Inner = SortSectionPolicy::Default; 511 std::vector<SectionPattern> V; 512 if (Outer != SortSectionPolicy::Default) { 513 expect("("); 514 Inner = readSortKind(); 515 if (Inner != SortSectionPolicy::Default) { 516 expect("("); 517 V = readInputSectionsList(); 518 expect(")"); 519 } else { 520 V = readInputSectionsList(); 521 } 522 expect(")"); 523 } else { 524 V = readInputSectionsList(); 525 } 526 527 for (SectionPattern &Pat : V) { 528 Pat.SortInner = Inner; 529 Pat.SortOuter = Outer; 530 } 531 532 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 533 } 534 return Cmd; 535 } 536 537 InputSectionDescription * 538 ScriptParser::readInputSectionDescription(StringRef Tok) { 539 // Input section wildcard can be surrounded by KEEP. 540 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 541 if (Tok == "KEEP") { 542 expect("("); 543 StringRef FilePattern = next(); 544 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 545 expect(")"); 546 Script->Opt.KeptSections.push_back(Cmd); 547 return Cmd; 548 } 549 return readInputSectionRules(Tok); 550 } 551 552 void ScriptParser::readSort() { 553 expect("("); 554 expect("CONSTRUCTORS"); 555 expect(")"); 556 } 557 558 AssertCommand *ScriptParser::readAssert() { 559 return make<AssertCommand>(readAssertExpr()); 560 } 561 562 Expr ScriptParser::readAssertExpr() { 563 expect("("); 564 Expr E = readExpr(); 565 expect(","); 566 StringRef Msg = unquote(next()); 567 expect(")"); 568 569 return [=] { 570 if (!E().getValue()) 571 error(Msg); 572 return Script->getDot(); 573 }; 574 } 575 576 // Reads a FILL(expr) command. We handle the FILL command as an 577 // alias for =fillexp section attribute, which is different from 578 // what GNU linkers do. 579 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 580 uint32_t ScriptParser::readFill() { 581 expect("("); 582 uint32_t V = parseFill(next()); 583 expect(")"); 584 return V; 585 } 586 587 // Reads an expression and/or the special directive "(NOLOAD)" for an 588 // output section definition. 589 // 590 // An output section name can be followed by an address expression 591 // and/or by "(NOLOAD)". This grammar is not LL(1) because "(" can be 592 // interpreted as either the beginning of some expression or "(NOLOAD)". 593 // 594 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html 595 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html 596 void ScriptParser::readSectionAddressType(OutputSection *Cmd) { 597 if (consume("(")) { 598 if (consume("NOLOAD")) { 599 expect(")"); 600 Cmd->Noload = true; 601 return; 602 } 603 Cmd->AddrExpr = readExpr(); 604 expect(")"); 605 } else { 606 Cmd->AddrExpr = readExpr(); 607 } 608 609 if (consume("(")) { 610 expect("NOLOAD"); 611 expect(")"); 612 Cmd->Noload = true; 613 } 614 } 615 616 OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) { 617 OutputSection *Cmd = 618 Script->createOutputSection(OutSec, getCurrentLocation()); 619 620 if (peek() != ":") 621 readSectionAddressType(Cmd); 622 expect(":"); 623 624 if (consume("AT")) 625 Cmd->LMAExpr = readParenExpr(); 626 if (consume("ALIGN")) 627 Cmd->AlignExpr = readParenExpr(); 628 if (consume("SUBALIGN")) 629 Cmd->SubalignExpr = readParenExpr(); 630 631 // Parse constraints. 632 if (consume("ONLY_IF_RO")) 633 Cmd->Constraint = ConstraintKind::ReadOnly; 634 if (consume("ONLY_IF_RW")) 635 Cmd->Constraint = ConstraintKind::ReadWrite; 636 expect("{"); 637 638 while (!ErrorCount && !consume("}")) { 639 StringRef Tok = next(); 640 if (Tok == ";") { 641 // Empty commands are allowed. Do nothing here. 642 } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) { 643 Cmd->Commands.push_back(Assign); 644 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 645 Cmd->Commands.push_back(Data); 646 } else if (Tok == "ASSERT") { 647 Cmd->Commands.push_back(readAssert()); 648 expect(";"); 649 } else if (Tok == "CONSTRUCTORS") { 650 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 651 // by name. This is for very old file formats such as ECOFF/XCOFF. 652 // For ELF, we should ignore. 653 } else if (Tok == "FILL") { 654 Cmd->Filler = readFill(); 655 } else if (Tok == "SORT") { 656 readSort(); 657 } else if (peek() == "(") { 658 Cmd->Commands.push_back(readInputSectionDescription(Tok)); 659 } else { 660 setError("unknown command " + Tok); 661 } 662 } 663 664 if (consume(">")) 665 Cmd->MemoryRegionName = next(); 666 else if (peek().startswith(">")) 667 Cmd->MemoryRegionName = next().drop_front(); 668 669 Cmd->Phdrs = readOutputSectionPhdrs(); 670 671 if (consume("=")) 672 Cmd->Filler = parseFill(next()); 673 else if (peek().startswith("=")) 674 Cmd->Filler = parseFill(next().drop_front()); 675 676 // Consume optional comma following output section command. 677 consume(","); 678 679 return Cmd; 680 } 681 682 // Parses a given string as a octal/decimal/hexadecimal number and 683 // returns it as a big-endian number. Used for `=<fillexp>`. 684 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 685 // 686 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary 687 // size, while ld.gold always handles it as a 32-bit big-endian number. 688 // We are compatible with ld.gold because it's easier to implement. 689 uint32_t ScriptParser::parseFill(StringRef Tok) { 690 uint32_t V = 0; 691 if (!to_integer(Tok, V)) 692 setError("invalid filler expression: " + Tok); 693 694 uint32_t Buf; 695 write32be(&Buf, V); 696 return Buf; 697 } 698 699 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 700 expect("("); 701 SymbolAssignment *Cmd = readAssignment(next()); 702 Cmd->Provide = Provide; 703 Cmd->Hidden = Hidden; 704 expect(")"); 705 expect(";"); 706 return Cmd; 707 } 708 709 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 710 SymbolAssignment *Cmd = nullptr; 711 if (peek() == "=" || peek() == "+=") { 712 Cmd = readAssignment(Tok); 713 expect(";"); 714 } else if (Tok == "PROVIDE") { 715 Cmd = readProvideHidden(true, false); 716 } else if (Tok == "HIDDEN") { 717 Cmd = readProvideHidden(false, true); 718 } else if (Tok == "PROVIDE_HIDDEN") { 719 Cmd = readProvideHidden(true, true); 720 } 721 return Cmd; 722 } 723 724 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 725 StringRef Op = next(); 726 assert(Op == "=" || Op == "+="); 727 Expr E = readExpr(); 728 if (Op == "+=") { 729 std::string Loc = getCurrentLocation(); 730 E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); }; 731 } 732 return make<SymbolAssignment>(Name, E, getCurrentLocation()); 733 } 734 735 // This is an operator-precedence parser to parse a linker 736 // script expression. 737 Expr ScriptParser::readExpr() { 738 // Our lexer is context-aware. Set the in-expression bit so that 739 // they apply different tokenization rules. 740 bool Orig = InExpr; 741 InExpr = true; 742 Expr E = readExpr1(readPrimary(), 0); 743 InExpr = Orig; 744 return E; 745 } 746 747 static Expr combine(StringRef Op, Expr L, Expr R) { 748 if (Op == "+") 749 return [=] { return add(L(), R()); }; 750 if (Op == "-") 751 return [=] { return sub(L(), R()); }; 752 if (Op == "*") 753 return [=] { return mul(L(), R()); }; 754 if (Op == "/") 755 return [=] { return div(L(), R()); }; 756 if (Op == "<<") 757 return [=] { return L().getValue() << R().getValue(); }; 758 if (Op == ">>") 759 return [=] { return L().getValue() >> R().getValue(); }; 760 if (Op == "<") 761 return [=] { return L().getValue() < R().getValue(); }; 762 if (Op == ">") 763 return [=] { return L().getValue() > R().getValue(); }; 764 if (Op == ">=") 765 return [=] { return L().getValue() >= R().getValue(); }; 766 if (Op == "<=") 767 return [=] { return L().getValue() <= R().getValue(); }; 768 if (Op == "==") 769 return [=] { return L().getValue() == R().getValue(); }; 770 if (Op == "!=") 771 return [=] { return L().getValue() != R().getValue(); }; 772 if (Op == "&") 773 return [=] { return bitAnd(L(), R()); }; 774 if (Op == "|") 775 return [=] { return bitOr(L(), R()); }; 776 llvm_unreachable("invalid operator"); 777 } 778 779 // This is a part of the operator-precedence parser. This function 780 // assumes that the remaining token stream starts with an operator. 781 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 782 while (!atEOF() && !ErrorCount) { 783 // Read an operator and an expression. 784 if (consume("?")) 785 return readTernary(Lhs); 786 StringRef Op1 = peek(); 787 if (precedence(Op1) < MinPrec) 788 break; 789 skip(); 790 Expr Rhs = readPrimary(); 791 792 // Evaluate the remaining part of the expression first if the 793 // next operator has greater precedence than the previous one. 794 // For example, if we have read "+" and "3", and if the next 795 // operator is "*", then we'll evaluate 3 * ... part first. 796 while (!atEOF()) { 797 StringRef Op2 = peek(); 798 if (precedence(Op2) <= precedence(Op1)) 799 break; 800 Rhs = readExpr1(Rhs, precedence(Op2)); 801 } 802 803 Lhs = combine(Op1, Lhs, Rhs); 804 } 805 return Lhs; 806 } 807 808 Expr ScriptParser::getPageSize() { 809 std::string Location = getCurrentLocation(); 810 return [=]() -> uint64_t { 811 if (Target) 812 return Target->PageSize; 813 error(Location + ": unable to calculate page size"); 814 return 4096; // Return a dummy value. 815 }; 816 } 817 818 Expr ScriptParser::readConstant() { 819 StringRef S = readParenLiteral(); 820 if (S == "COMMONPAGESIZE") 821 return getPageSize(); 822 if (S == "MAXPAGESIZE") 823 return [] { return Config->MaxPageSize; }; 824 setError("unknown constant: " + S); 825 return {}; 826 } 827 828 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with 829 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may 830 // have "K" (Ki) or "M" (Mi) suffixes. 831 static Optional<uint64_t> parseInt(StringRef Tok) { 832 // Negative number 833 if (Tok.startswith("-")) { 834 if (Optional<uint64_t> Val = parseInt(Tok.substr(1))) 835 return -*Val; 836 return None; 837 } 838 839 // Hexadecimal 840 uint64_t Val; 841 if (Tok.startswith_lower("0x") && to_integer(Tok.substr(2), Val, 16)) 842 return Val; 843 if (Tok.endswith_lower("H") && to_integer(Tok.drop_back(), Val, 16)) 844 return Val; 845 846 // Decimal 847 if (Tok.endswith_lower("K")) { 848 if (!to_integer(Tok.drop_back(), Val, 10)) 849 return None; 850 return Val * 1024; 851 } 852 if (Tok.endswith_lower("M")) { 853 if (!to_integer(Tok.drop_back(), Val, 10)) 854 return None; 855 return Val * 1024 * 1024; 856 } 857 if (!to_integer(Tok, Val, 10)) 858 return None; 859 return Val; 860 } 861 862 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 863 int Size = StringSwitch<int>(Tok) 864 .Case("BYTE", 1) 865 .Case("SHORT", 2) 866 .Case("LONG", 4) 867 .Case("QUAD", 8) 868 .Default(-1); 869 if (Size == -1) 870 return nullptr; 871 872 return make<BytesDataCommand>(readParenExpr(), Size); 873 } 874 875 StringRef ScriptParser::readParenLiteral() { 876 expect("("); 877 StringRef Tok = next(); 878 expect(")"); 879 return Tok; 880 } 881 882 OutputSection *ScriptParser::checkSection(OutputSection *Cmd, 883 StringRef Location) { 884 if (Cmd->Location.empty() && Script->ErrorOnMissingSection) 885 error(Location + ": undefined section " + Cmd->Name); 886 return Cmd; 887 } 888 889 Expr ScriptParser::readPrimary() { 890 if (peek() == "(") 891 return readParenExpr(); 892 893 if (consume("~")) { 894 Expr E = readPrimary(); 895 return [=] { return ~E().getValue(); }; 896 } 897 if (consume("!")) { 898 Expr E = readPrimary(); 899 return [=] { return !E().getValue(); }; 900 } 901 if (consume("-")) { 902 Expr E = readPrimary(); 903 return [=] { return -E().getValue(); }; 904 } 905 906 StringRef Tok = next(); 907 std::string Location = getCurrentLocation(); 908 909 // Built-in functions are parsed here. 910 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 911 if (Tok == "ABSOLUTE") { 912 Expr Inner = readParenExpr(); 913 return [=] { 914 ExprValue I = Inner(); 915 I.ForceAbsolute = true; 916 return I; 917 }; 918 } 919 if (Tok == "ADDR") { 920 StringRef Name = readParenLiteral(); 921 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 922 return [=]() -> ExprValue { 923 return {checkSection(Cmd, Location), 0, Location}; 924 }; 925 } 926 if (Tok == "ALIGN") { 927 expect("("); 928 Expr E = readExpr(); 929 if (consume(")")) 930 return [=] { 931 return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue())); 932 }; 933 expect(","); 934 Expr E2 = readExpr(); 935 expect(")"); 936 return [=] { 937 ExprValue V = E(); 938 V.Alignment = std::max((uint64_t)1, E2().getValue()); 939 return V; 940 }; 941 } 942 if (Tok == "ALIGNOF") { 943 StringRef Name = readParenLiteral(); 944 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 945 return [=] { return checkSection(Cmd, Location)->Alignment; }; 946 } 947 if (Tok == "ASSERT") 948 return readAssertExpr(); 949 if (Tok == "CONSTANT") 950 return readConstant(); 951 if (Tok == "DATA_SEGMENT_ALIGN") { 952 expect("("); 953 Expr E = readExpr(); 954 expect(","); 955 readExpr(); 956 expect(")"); 957 return [=] { 958 return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue())); 959 }; 960 } 961 if (Tok == "DATA_SEGMENT_END") { 962 expect("("); 963 expect("."); 964 expect(")"); 965 return [] { return Script->getDot(); }; 966 } 967 if (Tok == "DATA_SEGMENT_RELRO_END") { 968 // GNU linkers implements more complicated logic to handle 969 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and 970 // just align to the next page boundary for simplicity. 971 expect("("); 972 readExpr(); 973 expect(","); 974 readExpr(); 975 expect(")"); 976 Expr E = getPageSize(); 977 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 978 } 979 if (Tok == "DEFINED") { 980 StringRef Name = readParenLiteral(); 981 return [=] { return Script->isDefined(Name) ? 1 : 0; }; 982 } 983 if (Tok == "LENGTH") { 984 StringRef Name = readParenLiteral(); 985 if (Script->Opt.MemoryRegions.count(Name) == 0) 986 setError("memory region not defined: " + Name); 987 return [=] { return Script->Opt.MemoryRegions[Name].Length; }; 988 } 989 if (Tok == "LOADADDR") { 990 StringRef Name = readParenLiteral(); 991 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 992 return [=] { return checkSection(Cmd, Location)->getLMA(); }; 993 } 994 if (Tok == "ORIGIN") { 995 StringRef Name = readParenLiteral(); 996 if (Script->Opt.MemoryRegions.count(Name) == 0) 997 setError("memory region not defined: " + Name); 998 return [=] { return Script->Opt.MemoryRegions[Name].Origin; }; 999 } 1000 if (Tok == "SEGMENT_START") { 1001 expect("("); 1002 skip(); 1003 expect(","); 1004 Expr E = readExpr(); 1005 expect(")"); 1006 return [=] { return E(); }; 1007 } 1008 if (Tok == "SIZEOF") { 1009 StringRef Name = readParenLiteral(); 1010 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 1011 // Linker script does not create an output section if its content is empty. 1012 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 1013 // be empty. 1014 return [=] { return Cmd->Size; }; 1015 } 1016 if (Tok == "SIZEOF_HEADERS") 1017 return [=] { return elf::getHeaderSize(); }; 1018 1019 // Tok is the dot. 1020 if (Tok == ".") 1021 return [=] { return Script->getSymbolValue(Location, Tok); }; 1022 1023 // Tok is a literal number. 1024 if (Optional<uint64_t> Val = parseInt(Tok)) 1025 return [=] { return *Val; }; 1026 1027 // Tok is a symbol name. 1028 if (!isValidCIdentifier(Tok)) 1029 setError("malformed number: " + Tok); 1030 Script->Opt.ReferencedSymbols.push_back(Tok); 1031 return [=] { return Script->getSymbolValue(Location, Tok); }; 1032 } 1033 1034 Expr ScriptParser::readTernary(Expr Cond) { 1035 Expr L = readExpr(); 1036 expect(":"); 1037 Expr R = readExpr(); 1038 return [=] { return Cond().getValue() ? L() : R(); }; 1039 } 1040 1041 Expr ScriptParser::readParenExpr() { 1042 expect("("); 1043 Expr E = readExpr(); 1044 expect(")"); 1045 return E; 1046 } 1047 1048 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1049 std::vector<StringRef> Phdrs; 1050 while (!ErrorCount && peek().startswith(":")) { 1051 StringRef Tok = next(); 1052 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 1053 } 1054 return Phdrs; 1055 } 1056 1057 // Read a program header type name. The next token must be a 1058 // name of a program header type or a constant (e.g. "0x3"). 1059 unsigned ScriptParser::readPhdrType() { 1060 StringRef Tok = next(); 1061 if (Optional<uint64_t> Val = parseInt(Tok)) 1062 return *Val; 1063 1064 unsigned Ret = StringSwitch<unsigned>(Tok) 1065 .Case("PT_NULL", PT_NULL) 1066 .Case("PT_LOAD", PT_LOAD) 1067 .Case("PT_DYNAMIC", PT_DYNAMIC) 1068 .Case("PT_INTERP", PT_INTERP) 1069 .Case("PT_NOTE", PT_NOTE) 1070 .Case("PT_SHLIB", PT_SHLIB) 1071 .Case("PT_PHDR", PT_PHDR) 1072 .Case("PT_TLS", PT_TLS) 1073 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1074 .Case("PT_GNU_STACK", PT_GNU_STACK) 1075 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1076 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1077 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1078 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1079 .Default(-1); 1080 1081 if (Ret == (unsigned)-1) { 1082 setError("invalid program header type: " + Tok); 1083 return PT_NULL; 1084 } 1085 return Ret; 1086 } 1087 1088 // Reads an anonymous version declaration. 1089 void ScriptParser::readAnonymousDeclaration() { 1090 std::vector<SymbolVersion> Locals; 1091 std::vector<SymbolVersion> Globals; 1092 std::tie(Locals, Globals) = readSymbols(); 1093 1094 for (SymbolVersion V : Locals) { 1095 if (V.Name == "*") 1096 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1097 else 1098 Config->VersionScriptLocals.push_back(V); 1099 } 1100 1101 for (SymbolVersion V : Globals) 1102 Config->VersionScriptGlobals.push_back(V); 1103 1104 expect(";"); 1105 } 1106 1107 // Reads a non-anonymous version definition, 1108 // e.g. "VerStr { global: foo; bar; local: *; };". 1109 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1110 // Read a symbol list. 1111 std::vector<SymbolVersion> Locals; 1112 std::vector<SymbolVersion> Globals; 1113 std::tie(Locals, Globals) = readSymbols(); 1114 1115 for (SymbolVersion V : Locals) { 1116 if (V.Name == "*") 1117 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1118 else 1119 Config->VersionScriptLocals.push_back(V); 1120 } 1121 1122 // Create a new version definition and add that to the global symbols. 1123 VersionDefinition Ver; 1124 Ver.Name = VerStr; 1125 Ver.Globals = Globals; 1126 1127 // User-defined version number starts from 2 because 0 and 1 are 1128 // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. 1129 Ver.Id = Config->VersionDefinitions.size() + 2; 1130 Config->VersionDefinitions.push_back(Ver); 1131 1132 // Each version may have a parent version. For example, "Ver2" 1133 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1134 // as a parent. This version hierarchy is, probably against your 1135 // instinct, purely for hint; the runtime doesn't care about it 1136 // at all. In LLD, we simply ignore it. 1137 if (peek() != ";") 1138 skip(); 1139 expect(";"); 1140 } 1141 1142 static bool hasWildcard(StringRef S) { 1143 return S.find_first_of("?*[") != StringRef::npos; 1144 } 1145 1146 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1147 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1148 ScriptParser::readSymbols() { 1149 std::vector<SymbolVersion> Locals; 1150 std::vector<SymbolVersion> Globals; 1151 std::vector<SymbolVersion> *V = &Globals; 1152 1153 while (!ErrorCount) { 1154 if (consume("}")) 1155 break; 1156 if (consumeLabel("local")) { 1157 V = &Locals; 1158 continue; 1159 } 1160 if (consumeLabel("global")) { 1161 V = &Globals; 1162 continue; 1163 } 1164 1165 if (consume("extern")) { 1166 std::vector<SymbolVersion> Ext = readVersionExtern(); 1167 V->insert(V->end(), Ext.begin(), Ext.end()); 1168 } else { 1169 StringRef Tok = next(); 1170 V->push_back({unquote(Tok), false, hasWildcard(Tok)}); 1171 } 1172 expect(";"); 1173 } 1174 return {Locals, Globals}; 1175 } 1176 1177 // Reads an "extern C++" directive, e.g., 1178 // "extern "C++" { ns::*; "f(int, double)"; };" 1179 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 1180 StringRef Tok = next(); 1181 bool IsCXX = Tok == "\"C++\""; 1182 if (!IsCXX && Tok != "\"C\"") 1183 setError("Unknown language"); 1184 expect("{"); 1185 1186 std::vector<SymbolVersion> Ret; 1187 while (!ErrorCount && peek() != "}") { 1188 StringRef Tok = next(); 1189 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 1190 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 1191 expect(";"); 1192 } 1193 1194 expect("}"); 1195 return Ret; 1196 } 1197 1198 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2, 1199 StringRef S3) { 1200 if (!consume(S1) && !consume(S2) && !consume(S3)) { 1201 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 1202 return 0; 1203 } 1204 expect("="); 1205 return readExpr()().getValue(); 1206 } 1207 1208 // Parse the MEMORY command as specified in: 1209 // https://sourceware.org/binutils/docs/ld/MEMORY.html 1210 // 1211 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 1212 void ScriptParser::readMemory() { 1213 expect("{"); 1214 while (!ErrorCount && !consume("}")) { 1215 StringRef Name = next(); 1216 1217 uint32_t Flags = 0; 1218 uint32_t NegFlags = 0; 1219 if (consume("(")) { 1220 std::tie(Flags, NegFlags) = readMemoryAttributes(); 1221 expect(")"); 1222 } 1223 expect(":"); 1224 1225 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 1226 expect(","); 1227 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 1228 1229 // Add the memory region to the region map (if it doesn't already exist). 1230 auto It = Script->Opt.MemoryRegions.find(Name); 1231 if (It != Script->Opt.MemoryRegions.end()) 1232 setError("region '" + Name + "' already defined"); 1233 else 1234 Script->Opt.MemoryRegions[Name] = {Name, Origin, Length, Flags, NegFlags}; 1235 } 1236 } 1237 1238 // This function parses the attributes used to match against section 1239 // flags when placing output sections in a memory region. These flags 1240 // are only used when an explicit memory region name is not used. 1241 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 1242 uint32_t Flags = 0; 1243 uint32_t NegFlags = 0; 1244 bool Invert = false; 1245 1246 for (char C : next().lower()) { 1247 uint32_t Flag = 0; 1248 if (C == '!') 1249 Invert = !Invert; 1250 else if (C == 'w') 1251 Flag = SHF_WRITE; 1252 else if (C == 'x') 1253 Flag = SHF_EXECINSTR; 1254 else if (C == 'a') 1255 Flag = SHF_ALLOC; 1256 else if (C != 'r') 1257 setError("invalid memory region attribute"); 1258 1259 if (Invert) 1260 NegFlags |= Flag; 1261 else 1262 Flags |= Flag; 1263 } 1264 return {Flags, NegFlags}; 1265 } 1266 1267 void elf::readLinkerScript(MemoryBufferRef MB) { 1268 ScriptParser(MB).readLinkerScript(); 1269 } 1270 1271 void elf::readVersionScript(MemoryBufferRef MB) { 1272 ScriptParser(MB).readVersionScript(); 1273 } 1274 1275 void elf::readDynamicList(MemoryBufferRef MB) { 1276 ScriptParser(MB).readDynamicList(); 1277 } 1278