1 //===- ScriptParser.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a recursive-descendent parser for linker scripts. 11 // Parsed results are stored to Config and Script global objects. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ScriptParser.h" 16 #include "Config.h" 17 #include "Driver.h" 18 #include "InputSection.h" 19 #include "LinkerScript.h" 20 #include "OutputSections.h" 21 #include "ScriptLexer.h" 22 #include "Symbols.h" 23 #include "Target.h" 24 #include "lld/Common/Memory.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/StringSet.h" 28 #include "llvm/ADT/StringSwitch.h" 29 #include "llvm/BinaryFormat/ELF.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/FileSystem.h" 33 #include "llvm/Support/Path.h" 34 #include <cassert> 35 #include <limits> 36 #include <vector> 37 38 using namespace llvm; 39 using namespace llvm::ELF; 40 using namespace llvm::support::endian; 41 using namespace lld; 42 using namespace lld::elf; 43 44 static bool isUnderSysroot(StringRef Path); 45 46 namespace { 47 class ScriptParser final : ScriptLexer { 48 public: 49 ScriptParser(MemoryBufferRef MB) 50 : ScriptLexer(MB), 51 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 52 53 void readLinkerScript(); 54 void readVersionScript(); 55 void readDynamicList(); 56 void readDefsym(StringRef Name); 57 58 private: 59 void addFile(StringRef Path); 60 61 void readAsNeeded(); 62 void readEntry(); 63 void readExtern(); 64 void readGroup(); 65 void readInclude(); 66 void readMemory(); 67 void readOutput(); 68 void readOutputArch(); 69 void readOutputFormat(); 70 void readPhdrs(); 71 void readRegionAlias(); 72 void readSearchDir(); 73 void readSections(); 74 void readVersion(); 75 void readVersionScriptCommand(); 76 77 SymbolAssignment *readAssignment(StringRef Name); 78 ByteCommand *readByteCommand(StringRef Tok); 79 uint32_t readFill(); 80 uint32_t parseFill(StringRef Tok); 81 void readSectionAddressType(OutputSection *Cmd); 82 OutputSection *readOutputSectionDescription(StringRef OutSec); 83 std::vector<StringRef> readOutputSectionPhdrs(); 84 InputSectionDescription *readInputSectionDescription(StringRef Tok); 85 StringMatcher readFilePatterns(); 86 std::vector<SectionPattern> readInputSectionsList(); 87 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 88 unsigned readPhdrType(); 89 SortSectionPolicy readSortKind(); 90 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 91 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 92 void readSort(); 93 AssertCommand *readAssert(); 94 Expr readAssertExpr(); 95 Expr readConstant(); 96 Expr getPageSize(); 97 98 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 99 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 100 101 Expr readExpr(); 102 Expr readExpr1(Expr Lhs, int MinPrec); 103 StringRef readParenLiteral(); 104 Expr readPrimary(); 105 Expr readTernary(Expr Cond); 106 Expr readParenExpr(); 107 108 // For parsing version script. 109 std::vector<SymbolVersion> readVersionExtern(); 110 void readAnonymousDeclaration(); 111 void readVersionDeclaration(StringRef VerStr); 112 113 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 114 readSymbols(); 115 116 // True if a script being read is in a subdirectory specified by -sysroot. 117 bool IsUnderSysroot; 118 119 // A set to detect an INCLUDE() cycle. 120 StringSet<> Seen; 121 }; 122 } // namespace 123 124 static StringRef unquote(StringRef S) { 125 if (S.startswith("\"")) 126 return S.substr(1, S.size() - 2); 127 return S; 128 } 129 130 static bool isUnderSysroot(StringRef Path) { 131 if (Config->Sysroot == "") 132 return false; 133 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 134 if (sys::fs::equivalent(Config->Sysroot, Path)) 135 return true; 136 return false; 137 } 138 139 // Some operations only support one non absolute value. Move the 140 // absolute one to the right hand side for convenience. 141 static void moveAbsRight(ExprValue &A, ExprValue &B) { 142 if (A.Sec == nullptr || (A.ForceAbsolute && !B.isAbsolute())) 143 std::swap(A, B); 144 if (!B.isAbsolute()) 145 error(A.Loc + ": at least one side of the expression must be absolute"); 146 } 147 148 static ExprValue add(ExprValue A, ExprValue B) { 149 moveAbsRight(A, B); 150 return {A.Sec, A.ForceAbsolute, A.getSectionOffset() + B.getValue(), A.Loc}; 151 } 152 153 static ExprValue sub(ExprValue A, ExprValue B) { 154 // The distance between two symbols in sections is absolute. 155 if (!A.isAbsolute() && !B.isAbsolute()) 156 return A.getValue() - B.getValue(); 157 return {A.Sec, false, A.getSectionOffset() - B.getValue(), A.Loc}; 158 } 159 160 static ExprValue mul(ExprValue A, ExprValue B) { 161 return A.getValue() * B.getValue(); 162 } 163 164 static ExprValue div(ExprValue A, ExprValue B) { 165 if (uint64_t BV = B.getValue()) 166 return A.getValue() / BV; 167 error("division by zero"); 168 return 0; 169 } 170 171 static ExprValue bitAnd(ExprValue A, ExprValue B) { 172 moveAbsRight(A, B); 173 return {A.Sec, A.ForceAbsolute, 174 (A.getValue() & B.getValue()) - A.getSecAddr(), A.Loc}; 175 } 176 177 static ExprValue bitOr(ExprValue A, ExprValue B) { 178 moveAbsRight(A, B); 179 return {A.Sec, A.ForceAbsolute, 180 (A.getValue() | B.getValue()) - A.getSecAddr(), A.Loc}; 181 } 182 183 void ScriptParser::readDynamicList() { 184 Config->HasDynamicList = true; 185 expect("{"); 186 std::vector<SymbolVersion> Locals; 187 std::vector<SymbolVersion> Globals; 188 std::tie(Locals, Globals) = readSymbols(); 189 expect(";"); 190 191 if (!atEOF()) { 192 setError("EOF expected, but got " + next()); 193 return; 194 } 195 if (!Locals.empty()) { 196 setError("\"local:\" scope not supported in --dynamic-list"); 197 return; 198 } 199 200 for (SymbolVersion V : Globals) 201 Config->DynamicList.push_back(V); 202 } 203 204 void ScriptParser::readVersionScript() { 205 readVersionScriptCommand(); 206 if (!atEOF()) 207 setError("EOF expected, but got " + next()); 208 } 209 210 void ScriptParser::readVersionScriptCommand() { 211 if (consume("{")) { 212 readAnonymousDeclaration(); 213 return; 214 } 215 216 while (!atEOF() && !errorCount() && peek() != "}") { 217 StringRef VerStr = next(); 218 if (VerStr == "{") { 219 setError("anonymous version definition is used in " 220 "combination with other version definitions"); 221 return; 222 } 223 expect("{"); 224 readVersionDeclaration(VerStr); 225 } 226 } 227 228 void ScriptParser::readVersion() { 229 expect("{"); 230 readVersionScriptCommand(); 231 expect("}"); 232 } 233 234 void ScriptParser::readLinkerScript() { 235 while (!atEOF()) { 236 StringRef Tok = next(); 237 if (Tok == ";") 238 continue; 239 240 if (Tok == "ASSERT") { 241 Script->SectionCommands.push_back(readAssert()); 242 } else if (Tok == "ENTRY") { 243 readEntry(); 244 } else if (Tok == "EXTERN") { 245 readExtern(); 246 } else if (Tok == "GROUP" || Tok == "INPUT") { 247 readGroup(); 248 } else if (Tok == "INCLUDE") { 249 readInclude(); 250 } else if (Tok == "MEMORY") { 251 readMemory(); 252 } else if (Tok == "OUTPUT") { 253 readOutput(); 254 } else if (Tok == "OUTPUT_ARCH") { 255 readOutputArch(); 256 } else if (Tok == "OUTPUT_FORMAT") { 257 readOutputFormat(); 258 } else if (Tok == "PHDRS") { 259 readPhdrs(); 260 } else if (Tok == "REGION_ALIAS") { 261 readRegionAlias(); 262 } else if (Tok == "SEARCH_DIR") { 263 readSearchDir(); 264 } else if (Tok == "SECTIONS") { 265 readSections(); 266 } else if (Tok == "VERSION") { 267 readVersion(); 268 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 269 Script->SectionCommands.push_back(Cmd); 270 } else { 271 setError("unknown directive: " + Tok); 272 } 273 } 274 } 275 276 void ScriptParser::readDefsym(StringRef Name) { 277 Expr E = readExpr(); 278 if (!atEOF()) 279 setError("EOF expected, but got " + next()); 280 SymbolAssignment *Cmd = make<SymbolAssignment>(Name, E, getCurrentLocation()); 281 Script->SectionCommands.push_back(Cmd); 282 } 283 284 void ScriptParser::addFile(StringRef S) { 285 if (IsUnderSysroot && S.startswith("/")) { 286 SmallString<128> PathData; 287 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 288 if (sys::fs::exists(Path)) { 289 Driver->addFile(Saver.save(Path), /*WithLOption=*/false); 290 return; 291 } 292 } 293 294 if (S.startswith("/")) { 295 Driver->addFile(S, /*WithLOption=*/false); 296 } else if (S.startswith("=")) { 297 if (Config->Sysroot.empty()) 298 Driver->addFile(S.substr(1), /*WithLOption=*/false); 299 else 300 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)), 301 /*WithLOption=*/false); 302 } else if (S.startswith("-l")) { 303 Driver->addLibrary(S.substr(2)); 304 } else if (sys::fs::exists(S)) { 305 Driver->addFile(S, /*WithLOption=*/false); 306 } else { 307 if (Optional<std::string> Path = findFromSearchPaths(S)) 308 Driver->addFile(Saver.save(*Path), /*WithLOption=*/true); 309 else 310 setError("unable to find " + S); 311 } 312 } 313 314 void ScriptParser::readAsNeeded() { 315 expect("("); 316 bool Orig = Config->AsNeeded; 317 Config->AsNeeded = true; 318 while (!errorCount() && !consume(")")) 319 addFile(unquote(next())); 320 Config->AsNeeded = Orig; 321 } 322 323 void ScriptParser::readEntry() { 324 // -e <symbol> takes predecence over ENTRY(<symbol>). 325 expect("("); 326 StringRef Tok = next(); 327 if (Config->Entry.empty()) 328 Config->Entry = Tok; 329 expect(")"); 330 } 331 332 void ScriptParser::readExtern() { 333 expect("("); 334 while (!errorCount() && !consume(")")) 335 Config->Undefined.push_back(next()); 336 } 337 338 void ScriptParser::readGroup() { 339 expect("("); 340 while (!errorCount() && !consume(")")) { 341 if (consume("AS_NEEDED")) 342 readAsNeeded(); 343 else 344 addFile(unquote(next())); 345 } 346 } 347 348 void ScriptParser::readInclude() { 349 StringRef Tok = unquote(next()); 350 351 if (!Seen.insert(Tok).second) { 352 setError("there is a cycle in linker script INCLUDEs"); 353 return; 354 } 355 356 if (Optional<std::string> Path = searchLinkerScript(Tok)) { 357 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 358 tokenize(*MB); 359 return; 360 } 361 setError("cannot find linker script " + Tok); 362 } 363 364 void ScriptParser::readOutput() { 365 // -o <file> takes predecence over OUTPUT(<file>). 366 expect("("); 367 StringRef Tok = next(); 368 if (Config->OutputFile.empty()) 369 Config->OutputFile = unquote(Tok); 370 expect(")"); 371 } 372 373 void ScriptParser::readOutputArch() { 374 // OUTPUT_ARCH is ignored for now. 375 expect("("); 376 while (!errorCount() && !consume(")")) 377 skip(); 378 } 379 380 void ScriptParser::readOutputFormat() { 381 // Error checking only for now. 382 expect("("); 383 skip(); 384 if (consume(")")) 385 return; 386 expect(","); 387 skip(); 388 expect(","); 389 skip(); 390 expect(")"); 391 } 392 393 void ScriptParser::readPhdrs() { 394 expect("{"); 395 396 while (!errorCount() && !consume("}")) { 397 PhdrsCommand Cmd; 398 Cmd.Name = next(); 399 Cmd.Type = readPhdrType(); 400 401 while (!errorCount() && !consume(";")) { 402 if (consume("FILEHDR")) 403 Cmd.HasFilehdr = true; 404 else if (consume("PHDRS")) 405 Cmd.HasPhdrs = true; 406 else if (consume("AT")) 407 Cmd.LMAExpr = readParenExpr(); 408 else if (consume("FLAGS")) 409 Cmd.Flags = readParenExpr()().getValue(); 410 else 411 setError("unexpected header attribute: " + next()); 412 } 413 414 Script->PhdrsCommands.push_back(Cmd); 415 } 416 } 417 418 void ScriptParser::readRegionAlias() { 419 expect("("); 420 StringRef Alias = unquote(next()); 421 expect(","); 422 StringRef Name = next(); 423 expect(")"); 424 425 if (Script->MemoryRegions.count(Alias)) 426 setError("redefinition of memory region '" + Alias + "'"); 427 if (!Script->MemoryRegions.count(Name)) 428 setError("memory region '" + Name + "' is not defined"); 429 Script->MemoryRegions.insert({Alias, Script->MemoryRegions[Name]}); 430 } 431 432 void ScriptParser::readSearchDir() { 433 expect("("); 434 StringRef Tok = next(); 435 if (!Config->Nostdlib) 436 Config->SearchPaths.push_back(unquote(Tok)); 437 expect(")"); 438 } 439 440 void ScriptParser::readSections() { 441 Script->HasSectionsCommand = true; 442 443 // -no-rosegment is used to avoid placing read only non-executable sections in 444 // their own segment. We do the same if SECTIONS command is present in linker 445 // script. See comment for computeFlags(). 446 Config->SingleRoRx = true; 447 448 expect("{"); 449 while (!errorCount() && !consume("}")) { 450 StringRef Tok = next(); 451 BaseCommand *Cmd = readProvideOrAssignment(Tok); 452 if (!Cmd) { 453 if (Tok == "ASSERT") 454 Cmd = readAssert(); 455 else 456 Cmd = readOutputSectionDescription(Tok); 457 } 458 Script->SectionCommands.push_back(Cmd); 459 } 460 } 461 462 static int precedence(StringRef Op) { 463 return StringSwitch<int>(Op) 464 .Cases("*", "/", 5) 465 .Cases("+", "-", 4) 466 .Cases("<<", ">>", 3) 467 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 468 .Cases("&", "|", 1) 469 .Default(-1); 470 } 471 472 StringMatcher ScriptParser::readFilePatterns() { 473 std::vector<StringRef> V; 474 while (!errorCount() && !consume(")")) 475 V.push_back(next()); 476 return StringMatcher(V); 477 } 478 479 SortSectionPolicy ScriptParser::readSortKind() { 480 if (consume("SORT") || consume("SORT_BY_NAME")) 481 return SortSectionPolicy::Name; 482 if (consume("SORT_BY_ALIGNMENT")) 483 return SortSectionPolicy::Alignment; 484 if (consume("SORT_BY_INIT_PRIORITY")) 485 return SortSectionPolicy::Priority; 486 if (consume("SORT_NONE")) 487 return SortSectionPolicy::None; 488 return SortSectionPolicy::Default; 489 } 490 491 // Reads SECTIONS command contents in the following form: 492 // 493 // <contents> ::= <elem>* 494 // <elem> ::= <exclude>? <glob-pattern> 495 // <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")" 496 // 497 // For example, 498 // 499 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz) 500 // 501 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". 502 // The semantics of that is section .foo in any file, section .bar in 503 // any file but a.o, and section .baz in any file but b.o. 504 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 505 std::vector<SectionPattern> Ret; 506 while (!errorCount() && peek() != ")") { 507 StringMatcher ExcludeFilePat; 508 if (consume("EXCLUDE_FILE")) { 509 expect("("); 510 ExcludeFilePat = readFilePatterns(); 511 } 512 513 std::vector<StringRef> V; 514 while (!errorCount() && peek() != ")" && peek() != "EXCLUDE_FILE") 515 V.push_back(next()); 516 517 if (!V.empty()) 518 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 519 else 520 setError("section pattern is expected"); 521 } 522 return Ret; 523 } 524 525 // Reads contents of "SECTIONS" directive. That directive contains a 526 // list of glob patterns for input sections. The grammar is as follows. 527 // 528 // <patterns> ::= <section-list> 529 // | <sort> "(" <section-list> ")" 530 // | <sort> "(" <sort> "(" <section-list> ")" ")" 531 // 532 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 533 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 534 // 535 // <section-list> is parsed by readInputSectionsList(). 536 InputSectionDescription * 537 ScriptParser::readInputSectionRules(StringRef FilePattern) { 538 auto *Cmd = make<InputSectionDescription>(FilePattern); 539 expect("("); 540 541 while (!errorCount() && !consume(")")) { 542 SortSectionPolicy Outer = readSortKind(); 543 SortSectionPolicy Inner = SortSectionPolicy::Default; 544 std::vector<SectionPattern> V; 545 if (Outer != SortSectionPolicy::Default) { 546 expect("("); 547 Inner = readSortKind(); 548 if (Inner != SortSectionPolicy::Default) { 549 expect("("); 550 V = readInputSectionsList(); 551 expect(")"); 552 } else { 553 V = readInputSectionsList(); 554 } 555 expect(")"); 556 } else { 557 V = readInputSectionsList(); 558 } 559 560 for (SectionPattern &Pat : V) { 561 Pat.SortInner = Inner; 562 Pat.SortOuter = Outer; 563 } 564 565 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 566 } 567 return Cmd; 568 } 569 570 InputSectionDescription * 571 ScriptParser::readInputSectionDescription(StringRef Tok) { 572 // Input section wildcard can be surrounded by KEEP. 573 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 574 if (Tok == "KEEP") { 575 expect("("); 576 StringRef FilePattern = next(); 577 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 578 expect(")"); 579 Script->KeptSections.push_back(Cmd); 580 return Cmd; 581 } 582 return readInputSectionRules(Tok); 583 } 584 585 void ScriptParser::readSort() { 586 expect("("); 587 expect("CONSTRUCTORS"); 588 expect(")"); 589 } 590 591 AssertCommand *ScriptParser::readAssert() { 592 return make<AssertCommand>(readAssertExpr()); 593 } 594 595 Expr ScriptParser::readAssertExpr() { 596 expect("("); 597 Expr E = readExpr(); 598 expect(","); 599 StringRef Msg = unquote(next()); 600 expect(")"); 601 602 return [=] { 603 if (!E().getValue()) 604 error(Msg); 605 return Script->getDot(); 606 }; 607 } 608 609 // Reads a FILL(expr) command. We handle the FILL command as an 610 // alias for =fillexp section attribute, which is different from 611 // what GNU linkers do. 612 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 613 uint32_t ScriptParser::readFill() { 614 expect("("); 615 uint32_t V = parseFill(next()); 616 expect(")"); 617 return V; 618 } 619 620 // Reads an expression and/or the special directive "(NOLOAD)" for an 621 // output section definition. 622 // 623 // An output section name can be followed by an address expression 624 // and/or by "(NOLOAD)". This grammar is not LL(1) because "(" can be 625 // interpreted as either the beginning of some expression or "(NOLOAD)". 626 // 627 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html 628 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html 629 void ScriptParser::readSectionAddressType(OutputSection *Cmd) { 630 if (consume("(")) { 631 if (consume("NOLOAD")) { 632 expect(")"); 633 Cmd->Noload = true; 634 return; 635 } 636 Cmd->AddrExpr = readExpr(); 637 expect(")"); 638 } else { 639 Cmd->AddrExpr = readExpr(); 640 } 641 642 if (consume("(")) { 643 expect("NOLOAD"); 644 expect(")"); 645 Cmd->Noload = true; 646 } 647 } 648 649 static Expr checkAlignment(Expr E, std::string &Loc) { 650 return [=] { 651 uint64_t Alignment = std::max((uint64_t)1, E().getValue()); 652 if (!isPowerOf2_64(Alignment)) { 653 error(Loc + ": alignment must be power of 2"); 654 return (uint64_t)1; // Return a dummy value. 655 } 656 return Alignment; 657 }; 658 } 659 660 OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) { 661 OutputSection *Cmd = 662 Script->createOutputSection(OutSec, getCurrentLocation()); 663 664 if (peek() != ":") 665 readSectionAddressType(Cmd); 666 expect(":"); 667 668 std::string Location = getCurrentLocation(); 669 if (consume("AT")) 670 Cmd->LMAExpr = readParenExpr(); 671 if (consume("ALIGN")) 672 Cmd->AlignExpr = checkAlignment(readParenExpr(), Location); 673 if (consume("SUBALIGN")) 674 Cmd->SubalignExpr = checkAlignment(readParenExpr(), Location); 675 676 // Parse constraints. 677 if (consume("ONLY_IF_RO")) 678 Cmd->Constraint = ConstraintKind::ReadOnly; 679 if (consume("ONLY_IF_RW")) 680 Cmd->Constraint = ConstraintKind::ReadWrite; 681 expect("{"); 682 683 while (!errorCount() && !consume("}")) { 684 StringRef Tok = next(); 685 if (Tok == ";") { 686 // Empty commands are allowed. Do nothing here. 687 } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) { 688 Cmd->SectionCommands.push_back(Assign); 689 } else if (ByteCommand *Data = readByteCommand(Tok)) { 690 Cmd->SectionCommands.push_back(Data); 691 } else if (Tok == "ASSERT") { 692 Cmd->SectionCommands.push_back(readAssert()); 693 expect(";"); 694 } else if (Tok == "CONSTRUCTORS") { 695 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 696 // by name. This is for very old file formats such as ECOFF/XCOFF. 697 // For ELF, we should ignore. 698 } else if (Tok == "FILL") { 699 Cmd->Filler = readFill(); 700 } else if (Tok == "SORT") { 701 readSort(); 702 } else if (peek() == "(") { 703 Cmd->SectionCommands.push_back(readInputSectionDescription(Tok)); 704 } else { 705 setError("unknown command " + Tok); 706 } 707 } 708 709 if (consume(">")) 710 Cmd->MemoryRegionName = next(); 711 712 if (consume("AT")) { 713 expect(">"); 714 Cmd->LMARegionName = next(); 715 } 716 717 if (Cmd->LMAExpr && !Cmd->LMARegionName.empty()) 718 error("section can't have both LMA and a load region"); 719 720 Cmd->Phdrs = readOutputSectionPhdrs(); 721 722 if (consume("=")) 723 Cmd->Filler = parseFill(next()); 724 else if (peek().startswith("=")) 725 Cmd->Filler = parseFill(next().drop_front()); 726 727 // Consume optional comma following output section command. 728 consume(","); 729 730 return Cmd; 731 } 732 733 // Parses a given string as a octal/decimal/hexadecimal number and 734 // returns it as a big-endian number. Used for `=<fillexp>`. 735 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 736 // 737 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary 738 // size, while ld.gold always handles it as a 32-bit big-endian number. 739 // We are compatible with ld.gold because it's easier to implement. 740 uint32_t ScriptParser::parseFill(StringRef Tok) { 741 uint32_t V = 0; 742 if (!to_integer(Tok, V)) 743 setError("invalid filler expression: " + Tok); 744 745 uint32_t Buf; 746 write32be(&Buf, V); 747 return Buf; 748 } 749 750 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 751 expect("("); 752 SymbolAssignment *Cmd = readAssignment(next()); 753 Cmd->Provide = Provide; 754 Cmd->Hidden = Hidden; 755 expect(")"); 756 expect(";"); 757 return Cmd; 758 } 759 760 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 761 SymbolAssignment *Cmd = nullptr; 762 if (peek() == "=" || peek() == "+=") { 763 Cmd = readAssignment(Tok); 764 expect(";"); 765 } else if (Tok == "PROVIDE") { 766 Cmd = readProvideHidden(true, false); 767 } else if (Tok == "HIDDEN") { 768 Cmd = readProvideHidden(false, true); 769 } else if (Tok == "PROVIDE_HIDDEN") { 770 Cmd = readProvideHidden(true, true); 771 } 772 return Cmd; 773 } 774 775 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 776 StringRef Op = next(); 777 assert(Op == "=" || Op == "+="); 778 Expr E = readExpr(); 779 if (Op == "+=") { 780 std::string Loc = getCurrentLocation(); 781 E = [=] { return add(Script->getSymbolValue(Name, Loc), E()); }; 782 } 783 return make<SymbolAssignment>(Name, E, getCurrentLocation()); 784 } 785 786 // This is an operator-precedence parser to parse a linker 787 // script expression. 788 Expr ScriptParser::readExpr() { 789 // Our lexer is context-aware. Set the in-expression bit so that 790 // they apply different tokenization rules. 791 bool Orig = InExpr; 792 InExpr = true; 793 Expr E = readExpr1(readPrimary(), 0); 794 InExpr = Orig; 795 return E; 796 } 797 798 static Expr combine(StringRef Op, Expr L, Expr R) { 799 if (Op == "+") 800 return [=] { return add(L(), R()); }; 801 if (Op == "-") 802 return [=] { return sub(L(), R()); }; 803 if (Op == "*") 804 return [=] { return mul(L(), R()); }; 805 if (Op == "/") 806 return [=] { return div(L(), R()); }; 807 if (Op == "<<") 808 return [=] { return L().getValue() << R().getValue(); }; 809 if (Op == ">>") 810 return [=] { return L().getValue() >> R().getValue(); }; 811 if (Op == "<") 812 return [=] { return L().getValue() < R().getValue(); }; 813 if (Op == ">") 814 return [=] { return L().getValue() > R().getValue(); }; 815 if (Op == ">=") 816 return [=] { return L().getValue() >= R().getValue(); }; 817 if (Op == "<=") 818 return [=] { return L().getValue() <= R().getValue(); }; 819 if (Op == "==") 820 return [=] { return L().getValue() == R().getValue(); }; 821 if (Op == "!=") 822 return [=] { return L().getValue() != R().getValue(); }; 823 if (Op == "&") 824 return [=] { return bitAnd(L(), R()); }; 825 if (Op == "|") 826 return [=] { return bitOr(L(), R()); }; 827 llvm_unreachable("invalid operator"); 828 } 829 830 // This is a part of the operator-precedence parser. This function 831 // assumes that the remaining token stream starts with an operator. 832 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 833 while (!atEOF() && !errorCount()) { 834 // Read an operator and an expression. 835 if (consume("?")) 836 return readTernary(Lhs); 837 StringRef Op1 = peek(); 838 if (precedence(Op1) < MinPrec) 839 break; 840 skip(); 841 Expr Rhs = readPrimary(); 842 843 // Evaluate the remaining part of the expression first if the 844 // next operator has greater precedence than the previous one. 845 // For example, if we have read "+" and "3", and if the next 846 // operator is "*", then we'll evaluate 3 * ... part first. 847 while (!atEOF()) { 848 StringRef Op2 = peek(); 849 if (precedence(Op2) <= precedence(Op1)) 850 break; 851 Rhs = readExpr1(Rhs, precedence(Op2)); 852 } 853 854 Lhs = combine(Op1, Lhs, Rhs); 855 } 856 return Lhs; 857 } 858 859 Expr ScriptParser::getPageSize() { 860 std::string Location = getCurrentLocation(); 861 return [=]() -> uint64_t { 862 if (Target) 863 return Target->PageSize; 864 error(Location + ": unable to calculate page size"); 865 return 4096; // Return a dummy value. 866 }; 867 } 868 869 Expr ScriptParser::readConstant() { 870 StringRef S = readParenLiteral(); 871 if (S == "COMMONPAGESIZE") 872 return getPageSize(); 873 if (S == "MAXPAGESIZE") 874 return [] { return Config->MaxPageSize; }; 875 setError("unknown constant: " + S); 876 return {}; 877 } 878 879 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with 880 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may 881 // have "K" (Ki) or "M" (Mi) suffixes. 882 static Optional<uint64_t> parseInt(StringRef Tok) { 883 // Hexadecimal 884 uint64_t Val; 885 if (Tok.startswith_lower("0x")) { 886 if (!to_integer(Tok.substr(2), Val, 16)) 887 return None; 888 return Val; 889 } 890 if (Tok.endswith_lower("H")) { 891 if (!to_integer(Tok.drop_back(), Val, 16)) 892 return None; 893 return Val; 894 } 895 896 // Decimal 897 if (Tok.endswith_lower("K")) { 898 if (!to_integer(Tok.drop_back(), Val, 10)) 899 return None; 900 return Val * 1024; 901 } 902 if (Tok.endswith_lower("M")) { 903 if (!to_integer(Tok.drop_back(), Val, 10)) 904 return None; 905 return Val * 1024 * 1024; 906 } 907 if (!to_integer(Tok, Val, 10)) 908 return None; 909 return Val; 910 } 911 912 ByteCommand *ScriptParser::readByteCommand(StringRef Tok) { 913 int Size = StringSwitch<int>(Tok) 914 .Case("BYTE", 1) 915 .Case("SHORT", 2) 916 .Case("LONG", 4) 917 .Case("QUAD", 8) 918 .Default(-1); 919 if (Size == -1) 920 return nullptr; 921 return make<ByteCommand>(readParenExpr(), Size); 922 } 923 924 StringRef ScriptParser::readParenLiteral() { 925 expect("("); 926 bool Orig = InExpr; 927 InExpr = false; 928 StringRef Tok = next(); 929 InExpr = Orig; 930 expect(")"); 931 return Tok; 932 } 933 934 static void checkIfExists(OutputSection *Cmd, StringRef Location) { 935 if (Cmd->Location.empty() && Script->ErrorOnMissingSection) 936 error(Location + ": undefined section " + Cmd->Name); 937 } 938 939 Expr ScriptParser::readPrimary() { 940 if (peek() == "(") 941 return readParenExpr(); 942 943 if (consume("~")) { 944 Expr E = readPrimary(); 945 return [=] { return ~E().getValue(); }; 946 } 947 if (consume("!")) { 948 Expr E = readPrimary(); 949 return [=] { return !E().getValue(); }; 950 } 951 if (consume("-")) { 952 Expr E = readPrimary(); 953 return [=] { return -E().getValue(); }; 954 } 955 956 StringRef Tok = next(); 957 std::string Location = getCurrentLocation(); 958 959 // Built-in functions are parsed here. 960 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 961 if (Tok == "ABSOLUTE") { 962 Expr Inner = readParenExpr(); 963 return [=] { 964 ExprValue I = Inner(); 965 I.ForceAbsolute = true; 966 return I; 967 }; 968 } 969 if (Tok == "ADDR") { 970 StringRef Name = readParenLiteral(); 971 OutputSection *Sec = Script->getOrCreateOutputSection(Name); 972 return [=]() -> ExprValue { 973 checkIfExists(Sec, Location); 974 return {Sec, false, 0, Location}; 975 }; 976 } 977 if (Tok == "ALIGN") { 978 expect("("); 979 Expr E = readExpr(); 980 if (consume(")")) { 981 E = checkAlignment(E, Location); 982 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 983 } 984 expect(","); 985 Expr E2 = checkAlignment(readExpr(), Location); 986 expect(")"); 987 return [=] { 988 ExprValue V = E(); 989 V.Alignment = E2().getValue(); 990 return V; 991 }; 992 } 993 if (Tok == "ALIGNOF") { 994 StringRef Name = readParenLiteral(); 995 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 996 return [=] { 997 checkIfExists(Cmd, Location); 998 return Cmd->Alignment; 999 }; 1000 } 1001 if (Tok == "ASSERT") 1002 return readAssertExpr(); 1003 if (Tok == "CONSTANT") 1004 return readConstant(); 1005 if (Tok == "DATA_SEGMENT_ALIGN") { 1006 expect("("); 1007 Expr E = readExpr(); 1008 expect(","); 1009 readExpr(); 1010 expect(")"); 1011 return [=] { 1012 return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue())); 1013 }; 1014 } 1015 if (Tok == "DATA_SEGMENT_END") { 1016 expect("("); 1017 expect("."); 1018 expect(")"); 1019 return [] { return Script->getDot(); }; 1020 } 1021 if (Tok == "DATA_SEGMENT_RELRO_END") { 1022 // GNU linkers implements more complicated logic to handle 1023 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and 1024 // just align to the next page boundary for simplicity. 1025 expect("("); 1026 readExpr(); 1027 expect(","); 1028 readExpr(); 1029 expect(")"); 1030 Expr E = getPageSize(); 1031 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 1032 } 1033 if (Tok == "DEFINED") { 1034 StringRef Name = readParenLiteral(); 1035 return [=] { return Symtab->find(Name) ? 1 : 0; }; 1036 } 1037 if (Tok == "LENGTH") { 1038 StringRef Name = readParenLiteral(); 1039 if (Script->MemoryRegions.count(Name) == 0) 1040 setError("memory region not defined: " + Name); 1041 return [=] { return Script->MemoryRegions[Name]->Length; }; 1042 } 1043 if (Tok == "LOADADDR") { 1044 StringRef Name = readParenLiteral(); 1045 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 1046 return [=] { 1047 checkIfExists(Cmd, Location); 1048 return Cmd->getLMA(); 1049 }; 1050 } 1051 if (Tok == "ORIGIN") { 1052 StringRef Name = readParenLiteral(); 1053 if (Script->MemoryRegions.count(Name) == 0) 1054 setError("memory region not defined: " + Name); 1055 return [=] { return Script->MemoryRegions[Name]->Origin; }; 1056 } 1057 if (Tok == "SEGMENT_START") { 1058 expect("("); 1059 skip(); 1060 expect(","); 1061 Expr E = readExpr(); 1062 expect(")"); 1063 return [=] { return E(); }; 1064 } 1065 if (Tok == "SIZEOF") { 1066 StringRef Name = readParenLiteral(); 1067 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 1068 // Linker script does not create an output section if its content is empty. 1069 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 1070 // be empty. 1071 return [=] { return Cmd->Size; }; 1072 } 1073 if (Tok == "SIZEOF_HEADERS") 1074 return [=] { return elf::getHeaderSize(); }; 1075 1076 // Tok is the dot. 1077 if (Tok == ".") 1078 return [=] { return Script->getSymbolValue(Tok, Location); }; 1079 1080 // Tok is a literal number. 1081 if (Optional<uint64_t> Val = parseInt(Tok)) 1082 return [=] { return *Val; }; 1083 1084 // Tok is a symbol name. 1085 if (!isValidCIdentifier(Tok)) 1086 setError("malformed number: " + Tok); 1087 Script->ReferencedSymbols.push_back(Tok); 1088 return [=] { return Script->getSymbolValue(Tok, Location); }; 1089 } 1090 1091 Expr ScriptParser::readTernary(Expr Cond) { 1092 Expr L = readExpr(); 1093 expect(":"); 1094 Expr R = readExpr(); 1095 return [=] { return Cond().getValue() ? L() : R(); }; 1096 } 1097 1098 Expr ScriptParser::readParenExpr() { 1099 expect("("); 1100 Expr E = readExpr(); 1101 expect(")"); 1102 return E; 1103 } 1104 1105 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1106 std::vector<StringRef> Phdrs; 1107 while (!errorCount() && peek().startswith(":")) { 1108 StringRef Tok = next(); 1109 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 1110 } 1111 return Phdrs; 1112 } 1113 1114 // Read a program header type name. The next token must be a 1115 // name of a program header type or a constant (e.g. "0x3"). 1116 unsigned ScriptParser::readPhdrType() { 1117 StringRef Tok = next(); 1118 if (Optional<uint64_t> Val = parseInt(Tok)) 1119 return *Val; 1120 1121 unsigned Ret = StringSwitch<unsigned>(Tok) 1122 .Case("PT_NULL", PT_NULL) 1123 .Case("PT_LOAD", PT_LOAD) 1124 .Case("PT_DYNAMIC", PT_DYNAMIC) 1125 .Case("PT_INTERP", PT_INTERP) 1126 .Case("PT_NOTE", PT_NOTE) 1127 .Case("PT_SHLIB", PT_SHLIB) 1128 .Case("PT_PHDR", PT_PHDR) 1129 .Case("PT_TLS", PT_TLS) 1130 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1131 .Case("PT_GNU_STACK", PT_GNU_STACK) 1132 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1133 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1134 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1135 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1136 .Default(-1); 1137 1138 if (Ret == (unsigned)-1) { 1139 setError("invalid program header type: " + Tok); 1140 return PT_NULL; 1141 } 1142 return Ret; 1143 } 1144 1145 // Reads an anonymous version declaration. 1146 void ScriptParser::readAnonymousDeclaration() { 1147 std::vector<SymbolVersion> Locals; 1148 std::vector<SymbolVersion> Globals; 1149 std::tie(Locals, Globals) = readSymbols(); 1150 1151 for (SymbolVersion V : Locals) { 1152 if (V.Name == "*") 1153 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1154 else 1155 Config->VersionScriptLocals.push_back(V); 1156 } 1157 1158 for (SymbolVersion V : Globals) 1159 Config->VersionScriptGlobals.push_back(V); 1160 1161 expect(";"); 1162 } 1163 1164 // Reads a non-anonymous version definition, 1165 // e.g. "VerStr { global: foo; bar; local: *; };". 1166 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1167 // Read a symbol list. 1168 std::vector<SymbolVersion> Locals; 1169 std::vector<SymbolVersion> Globals; 1170 std::tie(Locals, Globals) = readSymbols(); 1171 1172 for (SymbolVersion V : Locals) { 1173 if (V.Name == "*") 1174 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1175 else 1176 Config->VersionScriptLocals.push_back(V); 1177 } 1178 1179 // Create a new version definition and add that to the global symbols. 1180 VersionDefinition Ver; 1181 Ver.Name = VerStr; 1182 Ver.Globals = Globals; 1183 1184 // User-defined version number starts from 2 because 0 and 1 are 1185 // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. 1186 Ver.Id = Config->VersionDefinitions.size() + 2; 1187 Config->VersionDefinitions.push_back(Ver); 1188 1189 // Each version may have a parent version. For example, "Ver2" 1190 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1191 // as a parent. This version hierarchy is, probably against your 1192 // instinct, purely for hint; the runtime doesn't care about it 1193 // at all. In LLD, we simply ignore it. 1194 if (peek() != ";") 1195 skip(); 1196 expect(";"); 1197 } 1198 1199 static bool hasWildcard(StringRef S) { 1200 return S.find_first_of("?*[") != StringRef::npos; 1201 } 1202 1203 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1204 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1205 ScriptParser::readSymbols() { 1206 std::vector<SymbolVersion> Locals; 1207 std::vector<SymbolVersion> Globals; 1208 std::vector<SymbolVersion> *V = &Globals; 1209 1210 while (!errorCount()) { 1211 if (consume("}")) 1212 break; 1213 if (consumeLabel("local")) { 1214 V = &Locals; 1215 continue; 1216 } 1217 if (consumeLabel("global")) { 1218 V = &Globals; 1219 continue; 1220 } 1221 1222 if (consume("extern")) { 1223 std::vector<SymbolVersion> Ext = readVersionExtern(); 1224 V->insert(V->end(), Ext.begin(), Ext.end()); 1225 } else { 1226 StringRef Tok = next(); 1227 V->push_back({unquote(Tok), false, hasWildcard(Tok)}); 1228 } 1229 expect(";"); 1230 } 1231 return {Locals, Globals}; 1232 } 1233 1234 // Reads an "extern C++" directive, e.g., 1235 // "extern "C++" { ns::*; "f(int, double)"; };" 1236 // 1237 // The last semicolon is optional. E.g. this is OK: 1238 // "extern "C++" { ns::*; "f(int, double)" };" 1239 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 1240 StringRef Tok = next(); 1241 bool IsCXX = Tok == "\"C++\""; 1242 if (!IsCXX && Tok != "\"C\"") 1243 setError("Unknown language"); 1244 expect("{"); 1245 1246 std::vector<SymbolVersion> Ret; 1247 while (!errorCount() && peek() != "}") { 1248 StringRef Tok = next(); 1249 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 1250 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 1251 if (consume("}")) 1252 return Ret; 1253 expect(";"); 1254 } 1255 1256 expect("}"); 1257 return Ret; 1258 } 1259 1260 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2, 1261 StringRef S3) { 1262 if (!consume(S1) && !consume(S2) && !consume(S3)) { 1263 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 1264 return 0; 1265 } 1266 expect("="); 1267 return readExpr()().getValue(); 1268 } 1269 1270 // Parse the MEMORY command as specified in: 1271 // https://sourceware.org/binutils/docs/ld/MEMORY.html 1272 // 1273 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 1274 void ScriptParser::readMemory() { 1275 expect("{"); 1276 while (!errorCount() && !consume("}")) { 1277 StringRef Name = next(); 1278 1279 uint32_t Flags = 0; 1280 uint32_t NegFlags = 0; 1281 if (consume("(")) { 1282 std::tie(Flags, NegFlags) = readMemoryAttributes(); 1283 expect(")"); 1284 } 1285 expect(":"); 1286 1287 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 1288 expect(","); 1289 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 1290 1291 // Add the memory region to the region map. 1292 if (Script->MemoryRegions.count(Name)) 1293 setError("region '" + Name + "' already defined"); 1294 MemoryRegion *MR = 1295 make<MemoryRegion>(Name, Origin, Length, Flags, NegFlags); 1296 Script->MemoryRegions[Name] = MR; 1297 } 1298 } 1299 1300 // This function parses the attributes used to match against section 1301 // flags when placing output sections in a memory region. These flags 1302 // are only used when an explicit memory region name is not used. 1303 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 1304 uint32_t Flags = 0; 1305 uint32_t NegFlags = 0; 1306 bool Invert = false; 1307 1308 for (char C : next().lower()) { 1309 uint32_t Flag = 0; 1310 if (C == '!') 1311 Invert = !Invert; 1312 else if (C == 'w') 1313 Flag = SHF_WRITE; 1314 else if (C == 'x') 1315 Flag = SHF_EXECINSTR; 1316 else if (C == 'a') 1317 Flag = SHF_ALLOC; 1318 else if (C != 'r') 1319 setError("invalid memory region attribute"); 1320 1321 if (Invert) 1322 NegFlags |= Flag; 1323 else 1324 Flags |= Flag; 1325 } 1326 return {Flags, NegFlags}; 1327 } 1328 1329 void elf::readLinkerScript(MemoryBufferRef MB) { 1330 ScriptParser(MB).readLinkerScript(); 1331 } 1332 1333 void elf::readVersionScript(MemoryBufferRef MB) { 1334 ScriptParser(MB).readVersionScript(); 1335 } 1336 1337 void elf::readDynamicList(MemoryBufferRef MB) { 1338 ScriptParser(MB).readDynamicList(); 1339 } 1340 1341 void elf::readDefsym(StringRef Name, MemoryBufferRef MB) { 1342 ScriptParser(MB).readDefsym(Name); 1343 } 1344