1 //===- ScriptParser.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a recursive-descendent parser for linker scripts. 11 // Parsed results are stored to Config and Script global objects. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ScriptParser.h" 16 #include "Config.h" 17 #include "Driver.h" 18 #include "InputSection.h" 19 #include "LinkerScript.h" 20 #include "Memory.h" 21 #include "OutputSections.h" 22 #include "ScriptLexer.h" 23 #include "Symbols.h" 24 #include "Target.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/StringSet.h" 28 #include "llvm/ADT/StringSwitch.h" 29 #include "llvm/BinaryFormat/ELF.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/FileSystem.h" 33 #include "llvm/Support/Path.h" 34 #include <cassert> 35 #include <limits> 36 #include <vector> 37 38 using namespace llvm; 39 using namespace llvm::ELF; 40 using namespace llvm::support::endian; 41 using namespace lld; 42 using namespace lld::elf; 43 44 static bool isUnderSysroot(StringRef Path); 45 46 namespace { 47 class ScriptParser final : ScriptLexer { 48 public: 49 ScriptParser(MemoryBufferRef MB) 50 : ScriptLexer(MB), 51 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 52 53 void readLinkerScript(); 54 void readVersionScript(); 55 void readDynamicList(); 56 57 private: 58 void addFile(StringRef Path); 59 OutputSection *checkSection(OutputSection *Cmd, StringRef Loccation); 60 61 void readAsNeeded(); 62 void readEntry(); 63 void readExtern(); 64 void readGroup(); 65 void readInclude(); 66 void readMemory(); 67 void readOutput(); 68 void readOutputArch(); 69 void readOutputFormat(); 70 void readPhdrs(); 71 void readRegionAlias(); 72 void readSearchDir(); 73 void readSections(); 74 void readVersion(); 75 void readVersionScriptCommand(); 76 77 SymbolAssignment *readAssignment(StringRef Name); 78 BytesDataCommand *readBytesDataCommand(StringRef Tok); 79 uint32_t readFill(); 80 uint32_t parseFill(StringRef Tok); 81 void readSectionAddressType(OutputSection *Cmd); 82 OutputSection *readOutputSectionDescription(StringRef OutSec); 83 std::vector<StringRef> readOutputSectionPhdrs(); 84 InputSectionDescription *readInputSectionDescription(StringRef Tok); 85 StringMatcher readFilePatterns(); 86 std::vector<SectionPattern> readInputSectionsList(); 87 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 88 unsigned readPhdrType(); 89 SortSectionPolicy readSortKind(); 90 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 91 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 92 void readSort(); 93 AssertCommand *readAssert(); 94 Expr readAssertExpr(); 95 Expr readConstant(); 96 Expr getPageSize(); 97 98 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 99 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 100 101 Expr readExpr(); 102 Expr readExpr1(Expr Lhs, int MinPrec); 103 StringRef readParenLiteral(); 104 Expr readPrimary(); 105 Expr readTernary(Expr Cond); 106 Expr readParenExpr(); 107 108 // For parsing version script. 109 std::vector<SymbolVersion> readVersionExtern(); 110 void readAnonymousDeclaration(); 111 void readVersionDeclaration(StringRef VerStr); 112 113 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 114 readSymbols(); 115 116 // True if a script being read is in a subdirectory specified by -sysroot. 117 bool IsUnderSysroot; 118 119 // A set to detect an INCLUDE() cycle. 120 StringSet<> Seen; 121 }; 122 } // namespace 123 124 static StringRef unquote(StringRef S) { 125 if (S.startswith("\"")) 126 return S.substr(1, S.size() - 2); 127 return S; 128 } 129 130 static bool isUnderSysroot(StringRef Path) { 131 if (Config->Sysroot == "") 132 return false; 133 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 134 if (sys::fs::equivalent(Config->Sysroot, Path)) 135 return true; 136 return false; 137 } 138 139 // Some operations only support one non absolute value. Move the 140 // absolute one to the right hand side for convenience. 141 static void moveAbsRight(ExprValue &A, ExprValue &B) { 142 if (A.isAbsolute()) 143 std::swap(A, B); 144 if (!B.isAbsolute()) 145 error(A.Loc + ": at least one side of the expression must be absolute"); 146 } 147 148 static ExprValue add(ExprValue A, ExprValue B) { 149 moveAbsRight(A, B); 150 uint64_t Val = alignTo(A.Val, A.Alignment) + B.getValue(); 151 return {A.Sec, A.ForceAbsolute, Val, A.Loc}; 152 } 153 154 static ExprValue sub(ExprValue A, ExprValue B) { 155 uint64_t Val = alignTo(A.Val, A.Alignment) - B.getValue(); 156 return {A.Sec, Val, A.Loc}; 157 } 158 159 static ExprValue mul(ExprValue A, ExprValue B) { 160 return A.getValue() * B.getValue(); 161 } 162 163 static ExprValue div(ExprValue A, ExprValue B) { 164 if (uint64_t BV = B.getValue()) 165 return A.getValue() / BV; 166 error("division by zero"); 167 return 0; 168 } 169 170 static ExprValue bitAnd(ExprValue A, ExprValue B) { 171 moveAbsRight(A, B); 172 return {A.Sec, A.ForceAbsolute, 173 (A.getValue() & B.getValue()) - A.getSecAddr(), A.Loc}; 174 } 175 176 static ExprValue bitOr(ExprValue A, ExprValue B) { 177 moveAbsRight(A, B); 178 return {A.Sec, A.ForceAbsolute, 179 (A.getValue() | B.getValue()) - A.getSecAddr(), A.Loc}; 180 } 181 182 void ScriptParser::readDynamicList() { 183 Config->HasDynamicList = true; 184 expect("{"); 185 std::vector<SymbolVersion> Locals; 186 std::vector<SymbolVersion> Globals; 187 std::tie(Locals, Globals) = readSymbols(); 188 expect(";"); 189 190 if (!atEOF()) { 191 setError("EOF expected, but got " + next()); 192 return; 193 } 194 if (!Locals.empty()) { 195 setError("\"local:\" scope not supported in --dynamic-list"); 196 return; 197 } 198 199 for (SymbolVersion V : Globals) 200 Config->DynamicList.push_back(V); 201 } 202 203 void ScriptParser::readVersionScript() { 204 readVersionScriptCommand(); 205 if (!atEOF()) 206 setError("EOF expected, but got " + next()); 207 } 208 209 void ScriptParser::readVersionScriptCommand() { 210 if (consume("{")) { 211 readAnonymousDeclaration(); 212 return; 213 } 214 215 while (!atEOF() && !ErrorCount && peek() != "}") { 216 StringRef VerStr = next(); 217 if (VerStr == "{") { 218 setError("anonymous version definition is used in " 219 "combination with other version definitions"); 220 return; 221 } 222 expect("{"); 223 readVersionDeclaration(VerStr); 224 } 225 } 226 227 void ScriptParser::readVersion() { 228 expect("{"); 229 readVersionScriptCommand(); 230 expect("}"); 231 } 232 233 void ScriptParser::readLinkerScript() { 234 while (!atEOF()) { 235 StringRef Tok = next(); 236 if (Tok == ";") 237 continue; 238 239 if (Tok == "ASSERT") { 240 Script->Opt.Commands.push_back(readAssert()); 241 } else if (Tok == "ENTRY") { 242 readEntry(); 243 } else if (Tok == "EXTERN") { 244 readExtern(); 245 } else if (Tok == "GROUP" || Tok == "INPUT") { 246 readGroup(); 247 } else if (Tok == "INCLUDE") { 248 readInclude(); 249 } else if (Tok == "MEMORY") { 250 readMemory(); 251 } else if (Tok == "OUTPUT") { 252 readOutput(); 253 } else if (Tok == "OUTPUT_ARCH") { 254 readOutputArch(); 255 } else if (Tok == "OUTPUT_FORMAT") { 256 readOutputFormat(); 257 } else if (Tok == "PHDRS") { 258 readPhdrs(); 259 } else if (Tok == "REGION_ALIAS") { 260 readRegionAlias(); 261 } else if (Tok == "SEARCH_DIR") { 262 readSearchDir(); 263 } else if (Tok == "SECTIONS") { 264 readSections(); 265 } else if (Tok == "VERSION") { 266 readVersion(); 267 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 268 Script->Opt.Commands.push_back(Cmd); 269 } else { 270 setError("unknown directive: " + Tok); 271 } 272 } 273 } 274 275 void ScriptParser::addFile(StringRef S) { 276 if (IsUnderSysroot && S.startswith("/")) { 277 SmallString<128> PathData; 278 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 279 if (sys::fs::exists(Path)) { 280 Driver->addFile(Saver.save(Path), /*WithLOption=*/false); 281 return; 282 } 283 } 284 285 if (S.startswith("/")) { 286 Driver->addFile(S, /*WithLOption=*/false); 287 } else if (S.startswith("=")) { 288 if (Config->Sysroot.empty()) 289 Driver->addFile(S.substr(1), /*WithLOption=*/false); 290 else 291 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)), 292 /*WithLOption=*/false); 293 } else if (S.startswith("-l")) { 294 Driver->addLibrary(S.substr(2)); 295 } else if (sys::fs::exists(S)) { 296 Driver->addFile(S, /*WithLOption=*/false); 297 } else { 298 if (Optional<std::string> Path = findFromSearchPaths(S)) 299 Driver->addFile(Saver.save(*Path), /*WithLOption=*/true); 300 else 301 setError("unable to find " + S); 302 } 303 } 304 305 void ScriptParser::readAsNeeded() { 306 expect("("); 307 bool Orig = Config->AsNeeded; 308 Config->AsNeeded = true; 309 while (!ErrorCount && !consume(")")) 310 addFile(unquote(next())); 311 Config->AsNeeded = Orig; 312 } 313 314 void ScriptParser::readEntry() { 315 // -e <symbol> takes predecence over ENTRY(<symbol>). 316 expect("("); 317 StringRef Tok = next(); 318 if (Config->Entry.empty()) 319 Config->Entry = Tok; 320 expect(")"); 321 } 322 323 void ScriptParser::readExtern() { 324 expect("("); 325 while (!ErrorCount && !consume(")")) 326 Config->Undefined.push_back(next()); 327 } 328 329 void ScriptParser::readGroup() { 330 expect("("); 331 while (!ErrorCount && !consume(")")) { 332 if (consume("AS_NEEDED")) 333 readAsNeeded(); 334 else 335 addFile(unquote(next())); 336 } 337 } 338 339 void ScriptParser::readInclude() { 340 StringRef Tok = unquote(next()); 341 342 if (!Seen.insert(Tok).second) { 343 setError("there is a cycle in linker script INCLUDEs"); 344 return; 345 } 346 347 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 348 // The file will be searched for in the current directory, and in any 349 // directory specified with the -L option. 350 if (sys::fs::exists(Tok)) { 351 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 352 tokenize(*MB); 353 return; 354 } 355 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 356 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 357 tokenize(*MB); 358 return; 359 } 360 setError("cannot open " + Tok); 361 } 362 363 void ScriptParser::readOutput() { 364 // -o <file> takes predecence over OUTPUT(<file>). 365 expect("("); 366 StringRef Tok = next(); 367 if (Config->OutputFile.empty()) 368 Config->OutputFile = unquote(Tok); 369 expect(")"); 370 } 371 372 void ScriptParser::readOutputArch() { 373 // OUTPUT_ARCH is ignored for now. 374 expect("("); 375 while (!ErrorCount && !consume(")")) 376 skip(); 377 } 378 379 void ScriptParser::readOutputFormat() { 380 // Error checking only for now. 381 expect("("); 382 skip(); 383 if (consume(")")) 384 return; 385 expect(","); 386 skip(); 387 expect(","); 388 skip(); 389 expect(")"); 390 } 391 392 void ScriptParser::readPhdrs() { 393 expect("{"); 394 while (!ErrorCount && !consume("}")) { 395 Script->Opt.PhdrsCommands.push_back( 396 {next(), PT_NULL, false, false, UINT_MAX, nullptr}); 397 398 PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back(); 399 PhdrCmd.Type = readPhdrType(); 400 401 while (!ErrorCount && !consume(";")) { 402 if (consume("FILEHDR")) 403 PhdrCmd.HasFilehdr = true; 404 else if (consume("PHDRS")) 405 PhdrCmd.HasPhdrs = true; 406 else if (consume("AT")) 407 PhdrCmd.LMAExpr = readParenExpr(); 408 else if (consume("FLAGS")) 409 PhdrCmd.Flags = readParenExpr()().getValue(); 410 else 411 setError("unexpected header attribute: " + next()); 412 } 413 } 414 } 415 416 void ScriptParser::readRegionAlias() { 417 expect("("); 418 StringRef Alias = unquote(next()); 419 expect(","); 420 StringRef Name = next(); 421 expect(")"); 422 423 if (Script->Opt.MemoryRegions.count(Alias)) 424 setError("redefinition of memory region '" + Alias + "'"); 425 if (!Script->Opt.MemoryRegions.count(Name)) 426 setError("memory region '" + Name + "' is not defined"); 427 Script->Opt.MemoryRegions[Alias] = Script->Opt.MemoryRegions[Name]; 428 } 429 430 void ScriptParser::readSearchDir() { 431 expect("("); 432 StringRef Tok = next(); 433 if (!Config->Nostdlib) 434 Config->SearchPaths.push_back(unquote(Tok)); 435 expect(")"); 436 } 437 438 void ScriptParser::readSections() { 439 Script->Opt.HasSections = true; 440 441 // -no-rosegment is used to avoid placing read only non-executable sections in 442 // their own segment. We do the same if SECTIONS command is present in linker 443 // script. See comment for computeFlags(). 444 Config->SingleRoRx = true; 445 446 expect("{"); 447 while (!ErrorCount && !consume("}")) { 448 StringRef Tok = next(); 449 BaseCommand *Cmd = readProvideOrAssignment(Tok); 450 if (!Cmd) { 451 if (Tok == "ASSERT") 452 Cmd = readAssert(); 453 else 454 Cmd = readOutputSectionDescription(Tok); 455 } 456 Script->Opt.Commands.push_back(Cmd); 457 } 458 } 459 460 static int precedence(StringRef Op) { 461 return StringSwitch<int>(Op) 462 .Cases("*", "/", 5) 463 .Cases("+", "-", 4) 464 .Cases("<<", ">>", 3) 465 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 466 .Cases("&", "|", 1) 467 .Default(-1); 468 } 469 470 StringMatcher ScriptParser::readFilePatterns() { 471 std::vector<StringRef> V; 472 while (!ErrorCount && !consume(")")) 473 V.push_back(next()); 474 return StringMatcher(V); 475 } 476 477 SortSectionPolicy ScriptParser::readSortKind() { 478 if (consume("SORT") || consume("SORT_BY_NAME")) 479 return SortSectionPolicy::Name; 480 if (consume("SORT_BY_ALIGNMENT")) 481 return SortSectionPolicy::Alignment; 482 if (consume("SORT_BY_INIT_PRIORITY")) 483 return SortSectionPolicy::Priority; 484 if (consume("SORT_NONE")) 485 return SortSectionPolicy::None; 486 return SortSectionPolicy::Default; 487 } 488 489 // Reads SECTIONS command contents in the following form: 490 // 491 // <contents> ::= <elem>* 492 // <elem> ::= <exclude>? <glob-pattern> 493 // <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")" 494 // 495 // For example, 496 // 497 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz) 498 // 499 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". 500 // The semantics of that is section .foo in any file, section .bar in 501 // any file but a.o, and section .baz in any file but b.o. 502 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 503 std::vector<SectionPattern> Ret; 504 while (!ErrorCount && peek() != ")") { 505 StringMatcher ExcludeFilePat; 506 if (consume("EXCLUDE_FILE")) { 507 expect("("); 508 ExcludeFilePat = readFilePatterns(); 509 } 510 511 std::vector<StringRef> V; 512 while (!ErrorCount && peek() != ")" && peek() != "EXCLUDE_FILE") 513 V.push_back(next()); 514 515 if (!V.empty()) 516 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 517 else 518 setError("section pattern is expected"); 519 } 520 return Ret; 521 } 522 523 // Reads contents of "SECTIONS" directive. That directive contains a 524 // list of glob patterns for input sections. The grammar is as follows. 525 // 526 // <patterns> ::= <section-list> 527 // | <sort> "(" <section-list> ")" 528 // | <sort> "(" <sort> "(" <section-list> ")" ")" 529 // 530 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 531 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 532 // 533 // <section-list> is parsed by readInputSectionsList(). 534 InputSectionDescription * 535 ScriptParser::readInputSectionRules(StringRef FilePattern) { 536 auto *Cmd = make<InputSectionDescription>(FilePattern); 537 expect("("); 538 539 while (!ErrorCount && !consume(")")) { 540 SortSectionPolicy Outer = readSortKind(); 541 SortSectionPolicy Inner = SortSectionPolicy::Default; 542 std::vector<SectionPattern> V; 543 if (Outer != SortSectionPolicy::Default) { 544 expect("("); 545 Inner = readSortKind(); 546 if (Inner != SortSectionPolicy::Default) { 547 expect("("); 548 V = readInputSectionsList(); 549 expect(")"); 550 } else { 551 V = readInputSectionsList(); 552 } 553 expect(")"); 554 } else { 555 V = readInputSectionsList(); 556 } 557 558 for (SectionPattern &Pat : V) { 559 Pat.SortInner = Inner; 560 Pat.SortOuter = Outer; 561 } 562 563 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 564 } 565 return Cmd; 566 } 567 568 InputSectionDescription * 569 ScriptParser::readInputSectionDescription(StringRef Tok) { 570 // Input section wildcard can be surrounded by KEEP. 571 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 572 if (Tok == "KEEP") { 573 expect("("); 574 StringRef FilePattern = next(); 575 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 576 expect(")"); 577 Script->Opt.KeptSections.push_back(Cmd); 578 return Cmd; 579 } 580 return readInputSectionRules(Tok); 581 } 582 583 void ScriptParser::readSort() { 584 expect("("); 585 expect("CONSTRUCTORS"); 586 expect(")"); 587 } 588 589 AssertCommand *ScriptParser::readAssert() { 590 return make<AssertCommand>(readAssertExpr()); 591 } 592 593 Expr ScriptParser::readAssertExpr() { 594 expect("("); 595 Expr E = readExpr(); 596 expect(","); 597 StringRef Msg = unquote(next()); 598 expect(")"); 599 600 return [=] { 601 if (!E().getValue()) 602 error(Msg); 603 return Script->getDot(); 604 }; 605 } 606 607 // Reads a FILL(expr) command. We handle the FILL command as an 608 // alias for =fillexp section attribute, which is different from 609 // what GNU linkers do. 610 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 611 uint32_t ScriptParser::readFill() { 612 expect("("); 613 uint32_t V = parseFill(next()); 614 expect(")"); 615 return V; 616 } 617 618 // Reads an expression and/or the special directive "(NOLOAD)" for an 619 // output section definition. 620 // 621 // An output section name can be followed by an address expression 622 // and/or by "(NOLOAD)". This grammar is not LL(1) because "(" can be 623 // interpreted as either the beginning of some expression or "(NOLOAD)". 624 // 625 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html 626 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html 627 void ScriptParser::readSectionAddressType(OutputSection *Cmd) { 628 if (consume("(")) { 629 if (consume("NOLOAD")) { 630 expect(")"); 631 Cmd->Noload = true; 632 return; 633 } 634 Cmd->AddrExpr = readExpr(); 635 expect(")"); 636 } else { 637 Cmd->AddrExpr = readExpr(); 638 } 639 640 if (consume("(")) { 641 expect("NOLOAD"); 642 expect(")"); 643 Cmd->Noload = true; 644 } 645 } 646 647 OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) { 648 OutputSection *Cmd = 649 Script->createOutputSection(OutSec, getCurrentLocation()); 650 651 if (peek() != ":") 652 readSectionAddressType(Cmd); 653 expect(":"); 654 655 if (consume("AT")) 656 Cmd->LMAExpr = readParenExpr(); 657 if (consume("ALIGN")) 658 Cmd->AlignExpr = readParenExpr(); 659 if (consume("SUBALIGN")) 660 Cmd->SubalignExpr = readParenExpr(); 661 662 // Parse constraints. 663 if (consume("ONLY_IF_RO")) 664 Cmd->Constraint = ConstraintKind::ReadOnly; 665 if (consume("ONLY_IF_RW")) 666 Cmd->Constraint = ConstraintKind::ReadWrite; 667 expect("{"); 668 669 while (!ErrorCount && !consume("}")) { 670 StringRef Tok = next(); 671 if (Tok == ";") { 672 // Empty commands are allowed. Do nothing here. 673 } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) { 674 Cmd->Commands.push_back(Assign); 675 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 676 Cmd->Commands.push_back(Data); 677 } else if (Tok == "ASSERT") { 678 Cmd->Commands.push_back(readAssert()); 679 expect(";"); 680 } else if (Tok == "CONSTRUCTORS") { 681 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 682 // by name. This is for very old file formats such as ECOFF/XCOFF. 683 // For ELF, we should ignore. 684 } else if (Tok == "FILL") { 685 Cmd->Filler = readFill(); 686 } else if (Tok == "SORT") { 687 readSort(); 688 } else if (peek() == "(") { 689 Cmd->Commands.push_back(readInputSectionDescription(Tok)); 690 } else { 691 setError("unknown command " + Tok); 692 } 693 } 694 695 if (consume(">")) 696 Cmd->MemoryRegionName = next(); 697 else if (peek().startswith(">")) 698 Cmd->MemoryRegionName = next().drop_front(); 699 700 Cmd->Phdrs = readOutputSectionPhdrs(); 701 702 if (consume("=")) 703 Cmd->Filler = parseFill(next()); 704 else if (peek().startswith("=")) 705 Cmd->Filler = parseFill(next().drop_front()); 706 707 // Consume optional comma following output section command. 708 consume(","); 709 710 return Cmd; 711 } 712 713 // Parses a given string as a octal/decimal/hexadecimal number and 714 // returns it as a big-endian number. Used for `=<fillexp>`. 715 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 716 // 717 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary 718 // size, while ld.gold always handles it as a 32-bit big-endian number. 719 // We are compatible with ld.gold because it's easier to implement. 720 uint32_t ScriptParser::parseFill(StringRef Tok) { 721 uint32_t V = 0; 722 if (!to_integer(Tok, V)) 723 setError("invalid filler expression: " + Tok); 724 725 uint32_t Buf; 726 write32be(&Buf, V); 727 return Buf; 728 } 729 730 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 731 expect("("); 732 SymbolAssignment *Cmd = readAssignment(next()); 733 Cmd->Provide = Provide; 734 Cmd->Hidden = Hidden; 735 expect(")"); 736 expect(";"); 737 return Cmd; 738 } 739 740 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 741 SymbolAssignment *Cmd = nullptr; 742 if (peek() == "=" || peek() == "+=") { 743 Cmd = readAssignment(Tok); 744 expect(";"); 745 } else if (Tok == "PROVIDE") { 746 Cmd = readProvideHidden(true, false); 747 } else if (Tok == "HIDDEN") { 748 Cmd = readProvideHidden(false, true); 749 } else if (Tok == "PROVIDE_HIDDEN") { 750 Cmd = readProvideHidden(true, true); 751 } 752 return Cmd; 753 } 754 755 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 756 StringRef Op = next(); 757 assert(Op == "=" || Op == "+="); 758 Expr E = readExpr(); 759 if (Op == "+=") { 760 std::string Loc = getCurrentLocation(); 761 E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); }; 762 } 763 return make<SymbolAssignment>(Name, E, getCurrentLocation()); 764 } 765 766 // This is an operator-precedence parser to parse a linker 767 // script expression. 768 Expr ScriptParser::readExpr() { 769 // Our lexer is context-aware. Set the in-expression bit so that 770 // they apply different tokenization rules. 771 bool Orig = InExpr; 772 InExpr = true; 773 Expr E = readExpr1(readPrimary(), 0); 774 InExpr = Orig; 775 return E; 776 } 777 778 static Expr combine(StringRef Op, Expr L, Expr R) { 779 if (Op == "+") 780 return [=] { return add(L(), R()); }; 781 if (Op == "-") 782 return [=] { return sub(L(), R()); }; 783 if (Op == "*") 784 return [=] { return mul(L(), R()); }; 785 if (Op == "/") 786 return [=] { return div(L(), R()); }; 787 if (Op == "<<") 788 return [=] { return L().getValue() << R().getValue(); }; 789 if (Op == ">>") 790 return [=] { return L().getValue() >> R().getValue(); }; 791 if (Op == "<") 792 return [=] { return L().getValue() < R().getValue(); }; 793 if (Op == ">") 794 return [=] { return L().getValue() > R().getValue(); }; 795 if (Op == ">=") 796 return [=] { return L().getValue() >= R().getValue(); }; 797 if (Op == "<=") 798 return [=] { return L().getValue() <= R().getValue(); }; 799 if (Op == "==") 800 return [=] { return L().getValue() == R().getValue(); }; 801 if (Op == "!=") 802 return [=] { return L().getValue() != R().getValue(); }; 803 if (Op == "&") 804 return [=] { return bitAnd(L(), R()); }; 805 if (Op == "|") 806 return [=] { return bitOr(L(), R()); }; 807 llvm_unreachable("invalid operator"); 808 } 809 810 // This is a part of the operator-precedence parser. This function 811 // assumes that the remaining token stream starts with an operator. 812 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 813 while (!atEOF() && !ErrorCount) { 814 // Read an operator and an expression. 815 if (consume("?")) 816 return readTernary(Lhs); 817 StringRef Op1 = peek(); 818 if (precedence(Op1) < MinPrec) 819 break; 820 skip(); 821 Expr Rhs = readPrimary(); 822 823 // Evaluate the remaining part of the expression first if the 824 // next operator has greater precedence than the previous one. 825 // For example, if we have read "+" and "3", and if the next 826 // operator is "*", then we'll evaluate 3 * ... part first. 827 while (!atEOF()) { 828 StringRef Op2 = peek(); 829 if (precedence(Op2) <= precedence(Op1)) 830 break; 831 Rhs = readExpr1(Rhs, precedence(Op2)); 832 } 833 834 Lhs = combine(Op1, Lhs, Rhs); 835 } 836 return Lhs; 837 } 838 839 Expr ScriptParser::getPageSize() { 840 std::string Location = getCurrentLocation(); 841 return [=]() -> uint64_t { 842 if (Target) 843 return Target->PageSize; 844 error(Location + ": unable to calculate page size"); 845 return 4096; // Return a dummy value. 846 }; 847 } 848 849 Expr ScriptParser::readConstant() { 850 StringRef S = readParenLiteral(); 851 if (S == "COMMONPAGESIZE") 852 return getPageSize(); 853 if (S == "MAXPAGESIZE") 854 return [] { return Config->MaxPageSize; }; 855 setError("unknown constant: " + S); 856 return {}; 857 } 858 859 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with 860 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may 861 // have "K" (Ki) or "M" (Mi) suffixes. 862 static Optional<uint64_t> parseInt(StringRef Tok) { 863 // Negative number 864 if (Tok.startswith("-")) { 865 if (Optional<uint64_t> Val = parseInt(Tok.substr(1))) 866 return -*Val; 867 return None; 868 } 869 870 // Hexadecimal 871 uint64_t Val; 872 if (Tok.startswith_lower("0x") && to_integer(Tok.substr(2), Val, 16)) 873 return Val; 874 if (Tok.endswith_lower("H") && to_integer(Tok.drop_back(), Val, 16)) 875 return Val; 876 877 // Decimal 878 if (Tok.endswith_lower("K")) { 879 if (!to_integer(Tok.drop_back(), Val, 10)) 880 return None; 881 return Val * 1024; 882 } 883 if (Tok.endswith_lower("M")) { 884 if (!to_integer(Tok.drop_back(), Val, 10)) 885 return None; 886 return Val * 1024 * 1024; 887 } 888 if (!to_integer(Tok, Val, 10)) 889 return None; 890 return Val; 891 } 892 893 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 894 int Size = StringSwitch<int>(Tok) 895 .Case("BYTE", 1) 896 .Case("SHORT", 2) 897 .Case("LONG", 4) 898 .Case("QUAD", 8) 899 .Default(-1); 900 if (Size == -1) 901 return nullptr; 902 903 return make<BytesDataCommand>(readParenExpr(), Size); 904 } 905 906 StringRef ScriptParser::readParenLiteral() { 907 expect("("); 908 StringRef Tok = next(); 909 expect(")"); 910 return Tok; 911 } 912 913 OutputSection *ScriptParser::checkSection(OutputSection *Cmd, 914 StringRef Location) { 915 if (Cmd->Location.empty() && Script->ErrorOnMissingSection) 916 error(Location + ": undefined section " + Cmd->Name); 917 return Cmd; 918 } 919 920 Expr ScriptParser::readPrimary() { 921 if (peek() == "(") 922 return readParenExpr(); 923 924 if (consume("~")) { 925 Expr E = readPrimary(); 926 return [=] { return ~E().getValue(); }; 927 } 928 if (consume("!")) { 929 Expr E = readPrimary(); 930 return [=] { return !E().getValue(); }; 931 } 932 if (consume("-")) { 933 Expr E = readPrimary(); 934 return [=] { return -E().getValue(); }; 935 } 936 937 StringRef Tok = next(); 938 std::string Location = getCurrentLocation(); 939 940 // Built-in functions are parsed here. 941 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 942 if (Tok == "ABSOLUTE") { 943 Expr Inner = readParenExpr(); 944 return [=] { 945 ExprValue I = Inner(); 946 I.ForceAbsolute = true; 947 return I; 948 }; 949 } 950 if (Tok == "ADDR") { 951 StringRef Name = readParenLiteral(); 952 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 953 return [=]() -> ExprValue { 954 return {checkSection(Cmd, Location), 0, Location}; 955 }; 956 } 957 if (Tok == "ALIGN") { 958 expect("("); 959 Expr E = readExpr(); 960 if (consume(")")) 961 return [=] { 962 return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue())); 963 }; 964 expect(","); 965 Expr E2 = readExpr(); 966 expect(")"); 967 return [=] { 968 ExprValue V = E(); 969 V.Alignment = std::max((uint64_t)1, E2().getValue()); 970 return V; 971 }; 972 } 973 if (Tok == "ALIGNOF") { 974 StringRef Name = readParenLiteral(); 975 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 976 return [=] { return checkSection(Cmd, Location)->Alignment; }; 977 } 978 if (Tok == "ASSERT") 979 return readAssertExpr(); 980 if (Tok == "CONSTANT") 981 return readConstant(); 982 if (Tok == "DATA_SEGMENT_ALIGN") { 983 expect("("); 984 Expr E = readExpr(); 985 expect(","); 986 readExpr(); 987 expect(")"); 988 return [=] { 989 return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue())); 990 }; 991 } 992 if (Tok == "DATA_SEGMENT_END") { 993 expect("("); 994 expect("."); 995 expect(")"); 996 return [] { return Script->getDot(); }; 997 } 998 if (Tok == "DATA_SEGMENT_RELRO_END") { 999 // GNU linkers implements more complicated logic to handle 1000 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and 1001 // just align to the next page boundary for simplicity. 1002 expect("("); 1003 readExpr(); 1004 expect(","); 1005 readExpr(); 1006 expect(")"); 1007 Expr E = getPageSize(); 1008 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 1009 } 1010 if (Tok == "DEFINED") { 1011 StringRef Name = readParenLiteral(); 1012 return [=] { return Script->isDefined(Name) ? 1 : 0; }; 1013 } 1014 if (Tok == "LENGTH") { 1015 StringRef Name = readParenLiteral(); 1016 if (Script->Opt.MemoryRegions.count(Name) == 0) 1017 setError("memory region not defined: " + Name); 1018 return [=] { return Script->Opt.MemoryRegions[Name]->Length; }; 1019 } 1020 if (Tok == "LOADADDR") { 1021 StringRef Name = readParenLiteral(); 1022 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 1023 return [=] { return checkSection(Cmd, Location)->getLMA(); }; 1024 } 1025 if (Tok == "ORIGIN") { 1026 StringRef Name = readParenLiteral(); 1027 if (Script->Opt.MemoryRegions.count(Name) == 0) 1028 setError("memory region not defined: " + Name); 1029 return [=] { return Script->Opt.MemoryRegions[Name]->Origin; }; 1030 } 1031 if (Tok == "SEGMENT_START") { 1032 expect("("); 1033 skip(); 1034 expect(","); 1035 Expr E = readExpr(); 1036 expect(")"); 1037 return [=] { return E(); }; 1038 } 1039 if (Tok == "SIZEOF") { 1040 StringRef Name = readParenLiteral(); 1041 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 1042 // Linker script does not create an output section if its content is empty. 1043 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 1044 // be empty. 1045 return [=] { return Cmd->Size; }; 1046 } 1047 if (Tok == "SIZEOF_HEADERS") 1048 return [=] { return elf::getHeaderSize(); }; 1049 1050 // Tok is the dot. 1051 if (Tok == ".") 1052 return [=] { return Script->getSymbolValue(Location, Tok); }; 1053 1054 // Tok is a literal number. 1055 if (Optional<uint64_t> Val = parseInt(Tok)) 1056 return [=] { return *Val; }; 1057 1058 // Tok is a symbol name. 1059 if (!isValidCIdentifier(Tok)) 1060 setError("malformed number: " + Tok); 1061 Script->Opt.ReferencedSymbols.push_back(Tok); 1062 return [=] { return Script->getSymbolValue(Location, Tok); }; 1063 } 1064 1065 Expr ScriptParser::readTernary(Expr Cond) { 1066 Expr L = readExpr(); 1067 expect(":"); 1068 Expr R = readExpr(); 1069 return [=] { return Cond().getValue() ? L() : R(); }; 1070 } 1071 1072 Expr ScriptParser::readParenExpr() { 1073 expect("("); 1074 Expr E = readExpr(); 1075 expect(")"); 1076 return E; 1077 } 1078 1079 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1080 std::vector<StringRef> Phdrs; 1081 while (!ErrorCount && peek().startswith(":")) { 1082 StringRef Tok = next(); 1083 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 1084 } 1085 return Phdrs; 1086 } 1087 1088 // Read a program header type name. The next token must be a 1089 // name of a program header type or a constant (e.g. "0x3"). 1090 unsigned ScriptParser::readPhdrType() { 1091 StringRef Tok = next(); 1092 if (Optional<uint64_t> Val = parseInt(Tok)) 1093 return *Val; 1094 1095 unsigned Ret = StringSwitch<unsigned>(Tok) 1096 .Case("PT_NULL", PT_NULL) 1097 .Case("PT_LOAD", PT_LOAD) 1098 .Case("PT_DYNAMIC", PT_DYNAMIC) 1099 .Case("PT_INTERP", PT_INTERP) 1100 .Case("PT_NOTE", PT_NOTE) 1101 .Case("PT_SHLIB", PT_SHLIB) 1102 .Case("PT_PHDR", PT_PHDR) 1103 .Case("PT_TLS", PT_TLS) 1104 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1105 .Case("PT_GNU_STACK", PT_GNU_STACK) 1106 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1107 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1108 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1109 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1110 .Default(-1); 1111 1112 if (Ret == (unsigned)-1) { 1113 setError("invalid program header type: " + Tok); 1114 return PT_NULL; 1115 } 1116 return Ret; 1117 } 1118 1119 // Reads an anonymous version declaration. 1120 void ScriptParser::readAnonymousDeclaration() { 1121 std::vector<SymbolVersion> Locals; 1122 std::vector<SymbolVersion> Globals; 1123 std::tie(Locals, Globals) = readSymbols(); 1124 1125 for (SymbolVersion V : Locals) { 1126 if (V.Name == "*") 1127 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1128 else 1129 Config->VersionScriptLocals.push_back(V); 1130 } 1131 1132 for (SymbolVersion V : Globals) 1133 Config->VersionScriptGlobals.push_back(V); 1134 1135 expect(";"); 1136 } 1137 1138 // Reads a non-anonymous version definition, 1139 // e.g. "VerStr { global: foo; bar; local: *; };". 1140 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1141 // Read a symbol list. 1142 std::vector<SymbolVersion> Locals; 1143 std::vector<SymbolVersion> Globals; 1144 std::tie(Locals, Globals) = readSymbols(); 1145 1146 for (SymbolVersion V : Locals) { 1147 if (V.Name == "*") 1148 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1149 else 1150 Config->VersionScriptLocals.push_back(V); 1151 } 1152 1153 // Create a new version definition and add that to the global symbols. 1154 VersionDefinition Ver; 1155 Ver.Name = VerStr; 1156 Ver.Globals = Globals; 1157 1158 // User-defined version number starts from 2 because 0 and 1 are 1159 // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. 1160 Ver.Id = Config->VersionDefinitions.size() + 2; 1161 Config->VersionDefinitions.push_back(Ver); 1162 1163 // Each version may have a parent version. For example, "Ver2" 1164 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1165 // as a parent. This version hierarchy is, probably against your 1166 // instinct, purely for hint; the runtime doesn't care about it 1167 // at all. In LLD, we simply ignore it. 1168 if (peek() != ";") 1169 skip(); 1170 expect(";"); 1171 } 1172 1173 static bool hasWildcard(StringRef S) { 1174 return S.find_first_of("?*[") != StringRef::npos; 1175 } 1176 1177 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1178 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1179 ScriptParser::readSymbols() { 1180 std::vector<SymbolVersion> Locals; 1181 std::vector<SymbolVersion> Globals; 1182 std::vector<SymbolVersion> *V = &Globals; 1183 1184 while (!ErrorCount) { 1185 if (consume("}")) 1186 break; 1187 if (consumeLabel("local")) { 1188 V = &Locals; 1189 continue; 1190 } 1191 if (consumeLabel("global")) { 1192 V = &Globals; 1193 continue; 1194 } 1195 1196 if (consume("extern")) { 1197 std::vector<SymbolVersion> Ext = readVersionExtern(); 1198 V->insert(V->end(), Ext.begin(), Ext.end()); 1199 } else { 1200 StringRef Tok = next(); 1201 V->push_back({unquote(Tok), false, hasWildcard(Tok)}); 1202 } 1203 expect(";"); 1204 } 1205 return {Locals, Globals}; 1206 } 1207 1208 // Reads an "extern C++" directive, e.g., 1209 // "extern "C++" { ns::*; "f(int, double)"; };" 1210 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 1211 StringRef Tok = next(); 1212 bool IsCXX = Tok == "\"C++\""; 1213 if (!IsCXX && Tok != "\"C\"") 1214 setError("Unknown language"); 1215 expect("{"); 1216 1217 std::vector<SymbolVersion> Ret; 1218 while (!ErrorCount && peek() != "}") { 1219 StringRef Tok = next(); 1220 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 1221 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 1222 expect(";"); 1223 } 1224 1225 expect("}"); 1226 return Ret; 1227 } 1228 1229 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2, 1230 StringRef S3) { 1231 if (!consume(S1) && !consume(S2) && !consume(S3)) { 1232 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 1233 return 0; 1234 } 1235 expect("="); 1236 return readExpr()().getValue(); 1237 } 1238 1239 // Parse the MEMORY command as specified in: 1240 // https://sourceware.org/binutils/docs/ld/MEMORY.html 1241 // 1242 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 1243 void ScriptParser::readMemory() { 1244 expect("{"); 1245 while (!ErrorCount && !consume("}")) { 1246 StringRef Name = next(); 1247 1248 uint32_t Flags = 0; 1249 uint32_t NegFlags = 0; 1250 if (consume("(")) { 1251 std::tie(Flags, NegFlags) = readMemoryAttributes(); 1252 expect(")"); 1253 } 1254 expect(":"); 1255 1256 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 1257 expect(","); 1258 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 1259 1260 // Add the memory region to the region map. 1261 if (Script->Opt.MemoryRegions.count(Name)) 1262 setError("region '" + Name + "' already defined"); 1263 MemoryRegion *MR = make<MemoryRegion>(); 1264 *MR = {Name, Origin, Length, Flags, NegFlags}; 1265 Script->Opt.MemoryRegions[Name] = MR; 1266 } 1267 } 1268 1269 // This function parses the attributes used to match against section 1270 // flags when placing output sections in a memory region. These flags 1271 // are only used when an explicit memory region name is not used. 1272 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 1273 uint32_t Flags = 0; 1274 uint32_t NegFlags = 0; 1275 bool Invert = false; 1276 1277 for (char C : next().lower()) { 1278 uint32_t Flag = 0; 1279 if (C == '!') 1280 Invert = !Invert; 1281 else if (C == 'w') 1282 Flag = SHF_WRITE; 1283 else if (C == 'x') 1284 Flag = SHF_EXECINSTR; 1285 else if (C == 'a') 1286 Flag = SHF_ALLOC; 1287 else if (C != 'r') 1288 setError("invalid memory region attribute"); 1289 1290 if (Invert) 1291 NegFlags |= Flag; 1292 else 1293 Flags |= Flag; 1294 } 1295 return {Flags, NegFlags}; 1296 } 1297 1298 void elf::readLinkerScript(MemoryBufferRef MB) { 1299 ScriptParser(MB).readLinkerScript(); 1300 } 1301 1302 void elf::readVersionScript(MemoryBufferRef MB) { 1303 ScriptParser(MB).readVersionScript(); 1304 } 1305 1306 void elf::readDynamicList(MemoryBufferRef MB) { 1307 ScriptParser(MB).readDynamicList(); 1308 } 1309