1 //===- ScriptParser.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a recursive-descendent parser for linker scripts. 11 // Parsed results are stored to Config and Script global objects. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ScriptParser.h" 16 #include "Config.h" 17 #include "Driver.h" 18 #include "InputSection.h" 19 #include "LinkerScript.h" 20 #include "OutputSections.h" 21 #include "ScriptLexer.h" 22 #include "Symbols.h" 23 #include "Target.h" 24 #include "lld/Common/Memory.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/StringSet.h" 28 #include "llvm/ADT/StringSwitch.h" 29 #include "llvm/BinaryFormat/ELF.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/FileSystem.h" 33 #include "llvm/Support/Path.h" 34 #include <cassert> 35 #include <limits> 36 #include <vector> 37 38 using namespace llvm; 39 using namespace llvm::ELF; 40 using namespace llvm::support::endian; 41 using namespace lld; 42 using namespace lld::elf; 43 44 static bool isUnderSysroot(StringRef Path); 45 46 namespace { 47 class ScriptParser final : ScriptLexer { 48 public: 49 ScriptParser(MemoryBufferRef MB) 50 : ScriptLexer(MB), 51 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 52 53 void readLinkerScript(); 54 void readVersionScript(); 55 void readDynamicList(); 56 void readDefsym(StringRef Name); 57 58 private: 59 void addFile(StringRef Path); 60 61 void readAsNeeded(); 62 void readEntry(); 63 void readExtern(); 64 void readGroup(); 65 void readInclude(); 66 void readMemory(); 67 void readOutput(); 68 void readOutputArch(); 69 void readOutputFormat(); 70 void readPhdrs(); 71 void readRegionAlias(); 72 void readSearchDir(); 73 void readSections(); 74 void readVersion(); 75 void readVersionScriptCommand(); 76 77 SymbolAssignment *readAssignment(StringRef Name); 78 ByteCommand *readByteCommand(StringRef Tok); 79 uint32_t readFill(); 80 uint32_t parseFill(StringRef Tok); 81 void readSectionAddressType(OutputSection *Cmd); 82 OutputSection *readOutputSectionDescription(StringRef OutSec); 83 std::vector<StringRef> readOutputSectionPhdrs(); 84 InputSectionDescription *readInputSectionDescription(StringRef Tok); 85 StringMatcher readFilePatterns(); 86 std::vector<SectionPattern> readInputSectionsList(); 87 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 88 unsigned readPhdrType(); 89 SortSectionPolicy readSortKind(); 90 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 91 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 92 void readSort(); 93 AssertCommand *readAssert(); 94 Expr readAssertExpr(); 95 Expr readConstant(); 96 Expr getPageSize(); 97 98 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 99 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 100 101 Expr combine(StringRef Op, Expr L, Expr R); 102 Expr readExpr(); 103 Expr readExpr1(Expr Lhs, int MinPrec); 104 StringRef readParenLiteral(); 105 Expr readPrimary(); 106 Expr readTernary(Expr Cond); 107 Expr readParenExpr(); 108 109 // For parsing version script. 110 std::vector<SymbolVersion> readVersionExtern(); 111 void readAnonymousDeclaration(); 112 void readVersionDeclaration(StringRef VerStr); 113 114 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 115 readSymbols(); 116 117 // True if a script being read is in a subdirectory specified by -sysroot. 118 bool IsUnderSysroot; 119 120 // A set to detect an INCLUDE() cycle. 121 StringSet<> Seen; 122 }; 123 } // namespace 124 125 static StringRef unquote(StringRef S) { 126 if (S.startswith("\"")) 127 return S.substr(1, S.size() - 2); 128 return S; 129 } 130 131 static bool isUnderSysroot(StringRef Path) { 132 if (Config->Sysroot == "") 133 return false; 134 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 135 if (sys::fs::equivalent(Config->Sysroot, Path)) 136 return true; 137 return false; 138 } 139 140 // Some operations only support one non absolute value. Move the 141 // absolute one to the right hand side for convenience. 142 static void moveAbsRight(ExprValue &A, ExprValue &B) { 143 if (A.Sec == nullptr || (A.ForceAbsolute && !B.isAbsolute())) 144 std::swap(A, B); 145 if (!B.isAbsolute()) 146 error(A.Loc + ": at least one side of the expression must be absolute"); 147 } 148 149 static ExprValue add(ExprValue A, ExprValue B) { 150 moveAbsRight(A, B); 151 return {A.Sec, A.ForceAbsolute, A.getSectionOffset() + B.getValue(), A.Loc}; 152 } 153 154 static ExprValue sub(ExprValue A, ExprValue B) { 155 // The distance between two symbols in sections is absolute. 156 if (!A.isAbsolute() && !B.isAbsolute()) 157 return A.getValue() - B.getValue(); 158 return {A.Sec, false, A.getSectionOffset() - B.getValue(), A.Loc}; 159 } 160 161 static ExprValue bitAnd(ExprValue A, ExprValue B) { 162 moveAbsRight(A, B); 163 return {A.Sec, A.ForceAbsolute, 164 (A.getValue() & B.getValue()) - A.getSecAddr(), A.Loc}; 165 } 166 167 static ExprValue bitOr(ExprValue A, ExprValue B) { 168 moveAbsRight(A, B); 169 return {A.Sec, A.ForceAbsolute, 170 (A.getValue() | B.getValue()) - A.getSecAddr(), A.Loc}; 171 } 172 173 void ScriptParser::readDynamicList() { 174 Config->HasDynamicList = true; 175 expect("{"); 176 std::vector<SymbolVersion> Locals; 177 std::vector<SymbolVersion> Globals; 178 std::tie(Locals, Globals) = readSymbols(); 179 expect(";"); 180 181 if (!atEOF()) { 182 setError("EOF expected, but got " + next()); 183 return; 184 } 185 if (!Locals.empty()) { 186 setError("\"local:\" scope not supported in --dynamic-list"); 187 return; 188 } 189 190 for (SymbolVersion V : Globals) 191 Config->DynamicList.push_back(V); 192 } 193 194 void ScriptParser::readVersionScript() { 195 readVersionScriptCommand(); 196 if (!atEOF()) 197 setError("EOF expected, but got " + next()); 198 } 199 200 void ScriptParser::readVersionScriptCommand() { 201 if (consume("{")) { 202 readAnonymousDeclaration(); 203 return; 204 } 205 206 while (!atEOF() && !errorCount() && peek() != "}") { 207 StringRef VerStr = next(); 208 if (VerStr == "{") { 209 setError("anonymous version definition is used in " 210 "combination with other version definitions"); 211 return; 212 } 213 expect("{"); 214 readVersionDeclaration(VerStr); 215 } 216 } 217 218 void ScriptParser::readVersion() { 219 expect("{"); 220 readVersionScriptCommand(); 221 expect("}"); 222 } 223 224 void ScriptParser::readLinkerScript() { 225 while (!atEOF()) { 226 StringRef Tok = next(); 227 if (Tok == ";") 228 continue; 229 230 if (Tok == "ASSERT") { 231 Script->SectionCommands.push_back(readAssert()); 232 } else if (Tok == "ENTRY") { 233 readEntry(); 234 } else if (Tok == "EXTERN") { 235 readExtern(); 236 } else if (Tok == "GROUP" || Tok == "INPUT") { 237 readGroup(); 238 } else if (Tok == "INCLUDE") { 239 readInclude(); 240 } else if (Tok == "MEMORY") { 241 readMemory(); 242 } else if (Tok == "OUTPUT") { 243 readOutput(); 244 } else if (Tok == "OUTPUT_ARCH") { 245 readOutputArch(); 246 } else if (Tok == "OUTPUT_FORMAT") { 247 readOutputFormat(); 248 } else if (Tok == "PHDRS") { 249 readPhdrs(); 250 } else if (Tok == "REGION_ALIAS") { 251 readRegionAlias(); 252 } else if (Tok == "SEARCH_DIR") { 253 readSearchDir(); 254 } else if (Tok == "SECTIONS") { 255 readSections(); 256 } else if (Tok == "VERSION") { 257 readVersion(); 258 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 259 Script->SectionCommands.push_back(Cmd); 260 } else { 261 setError("unknown directive: " + Tok); 262 } 263 } 264 } 265 266 void ScriptParser::readDefsym(StringRef Name) { 267 Expr E = readExpr(); 268 if (!atEOF()) 269 setError("EOF expected, but got " + next()); 270 SymbolAssignment *Cmd = make<SymbolAssignment>(Name, E, getCurrentLocation()); 271 Script->SectionCommands.push_back(Cmd); 272 } 273 274 void ScriptParser::addFile(StringRef S) { 275 if (IsUnderSysroot && S.startswith("/")) { 276 SmallString<128> PathData; 277 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 278 if (sys::fs::exists(Path)) { 279 Driver->addFile(Saver.save(Path), /*WithLOption=*/false); 280 return; 281 } 282 } 283 284 if (S.startswith("/")) { 285 Driver->addFile(S, /*WithLOption=*/false); 286 } else if (S.startswith("=")) { 287 if (Config->Sysroot.empty()) 288 Driver->addFile(S.substr(1), /*WithLOption=*/false); 289 else 290 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)), 291 /*WithLOption=*/false); 292 } else if (S.startswith("-l")) { 293 Driver->addLibrary(S.substr(2)); 294 } else if (sys::fs::exists(S)) { 295 Driver->addFile(S, /*WithLOption=*/false); 296 } else { 297 if (Optional<std::string> Path = findFromSearchPaths(S)) 298 Driver->addFile(Saver.save(*Path), /*WithLOption=*/true); 299 else 300 setError("unable to find " + S); 301 } 302 } 303 304 void ScriptParser::readAsNeeded() { 305 expect("("); 306 bool Orig = Config->AsNeeded; 307 Config->AsNeeded = true; 308 while (!errorCount() && !consume(")")) 309 addFile(unquote(next())); 310 Config->AsNeeded = Orig; 311 } 312 313 void ScriptParser::readEntry() { 314 // -e <symbol> takes predecence over ENTRY(<symbol>). 315 expect("("); 316 StringRef Tok = next(); 317 if (Config->Entry.empty()) 318 Config->Entry = Tok; 319 expect(")"); 320 } 321 322 void ScriptParser::readExtern() { 323 expect("("); 324 while (!errorCount() && !consume(")")) 325 Config->Undefined.push_back(next()); 326 } 327 328 void ScriptParser::readGroup() { 329 expect("("); 330 while (!errorCount() && !consume(")")) { 331 if (consume("AS_NEEDED")) 332 readAsNeeded(); 333 else 334 addFile(unquote(next())); 335 } 336 } 337 338 void ScriptParser::readInclude() { 339 StringRef Tok = unquote(next()); 340 341 if (!Seen.insert(Tok).second) { 342 setError("there is a cycle in linker script INCLUDEs"); 343 return; 344 } 345 346 if (Optional<std::string> Path = searchLinkerScript(Tok)) { 347 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 348 tokenize(*MB); 349 return; 350 } 351 setError("cannot find linker script " + Tok); 352 } 353 354 void ScriptParser::readOutput() { 355 // -o <file> takes predecence over OUTPUT(<file>). 356 expect("("); 357 StringRef Tok = next(); 358 if (Config->OutputFile.empty()) 359 Config->OutputFile = unquote(Tok); 360 expect(")"); 361 } 362 363 void ScriptParser::readOutputArch() { 364 // OUTPUT_ARCH is ignored for now. 365 expect("("); 366 while (!errorCount() && !consume(")")) 367 skip(); 368 } 369 370 void ScriptParser::readOutputFormat() { 371 // Error checking only for now. 372 expect("("); 373 skip(); 374 if (consume(")")) 375 return; 376 expect(","); 377 skip(); 378 expect(","); 379 skip(); 380 expect(")"); 381 } 382 383 void ScriptParser::readPhdrs() { 384 expect("{"); 385 386 while (!errorCount() && !consume("}")) { 387 PhdrsCommand Cmd; 388 Cmd.Name = next(); 389 Cmd.Type = readPhdrType(); 390 391 while (!errorCount() && !consume(";")) { 392 if (consume("FILEHDR")) 393 Cmd.HasFilehdr = true; 394 else if (consume("PHDRS")) 395 Cmd.HasPhdrs = true; 396 else if (consume("AT")) 397 Cmd.LMAExpr = readParenExpr(); 398 else if (consume("FLAGS")) 399 Cmd.Flags = readParenExpr()().getValue(); 400 else 401 setError("unexpected header attribute: " + next()); 402 } 403 404 Script->PhdrsCommands.push_back(Cmd); 405 } 406 } 407 408 void ScriptParser::readRegionAlias() { 409 expect("("); 410 StringRef Alias = unquote(next()); 411 expect(","); 412 StringRef Name = next(); 413 expect(")"); 414 415 if (Script->MemoryRegions.count(Alias)) 416 setError("redefinition of memory region '" + Alias + "'"); 417 if (!Script->MemoryRegions.count(Name)) 418 setError("memory region '" + Name + "' is not defined"); 419 Script->MemoryRegions.insert({Alias, Script->MemoryRegions[Name]}); 420 } 421 422 void ScriptParser::readSearchDir() { 423 expect("("); 424 StringRef Tok = next(); 425 if (!Config->Nostdlib) 426 Config->SearchPaths.push_back(unquote(Tok)); 427 expect(")"); 428 } 429 430 void ScriptParser::readSections() { 431 Script->HasSectionsCommand = true; 432 433 // -no-rosegment is used to avoid placing read only non-executable sections in 434 // their own segment. We do the same if SECTIONS command is present in linker 435 // script. See comment for computeFlags(). 436 Config->SingleRoRx = true; 437 438 expect("{"); 439 while (!errorCount() && !consume("}")) { 440 StringRef Tok = next(); 441 BaseCommand *Cmd = readProvideOrAssignment(Tok); 442 if (!Cmd) { 443 if (Tok == "ASSERT") 444 Cmd = readAssert(); 445 else 446 Cmd = readOutputSectionDescription(Tok); 447 } 448 Script->SectionCommands.push_back(Cmd); 449 } 450 } 451 452 static int precedence(StringRef Op) { 453 return StringSwitch<int>(Op) 454 .Cases("*", "/", "%", 5) 455 .Cases("+", "-", 4) 456 .Cases("<<", ">>", 3) 457 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 458 .Cases("&", "|", 1) 459 .Default(-1); 460 } 461 462 StringMatcher ScriptParser::readFilePatterns() { 463 std::vector<StringRef> V; 464 while (!errorCount() && !consume(")")) 465 V.push_back(next()); 466 return StringMatcher(V); 467 } 468 469 SortSectionPolicy ScriptParser::readSortKind() { 470 if (consume("SORT") || consume("SORT_BY_NAME")) 471 return SortSectionPolicy::Name; 472 if (consume("SORT_BY_ALIGNMENT")) 473 return SortSectionPolicy::Alignment; 474 if (consume("SORT_BY_INIT_PRIORITY")) 475 return SortSectionPolicy::Priority; 476 if (consume("SORT_NONE")) 477 return SortSectionPolicy::None; 478 return SortSectionPolicy::Default; 479 } 480 481 // Reads SECTIONS command contents in the following form: 482 // 483 // <contents> ::= <elem>* 484 // <elem> ::= <exclude>? <glob-pattern> 485 // <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")" 486 // 487 // For example, 488 // 489 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz) 490 // 491 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". 492 // The semantics of that is section .foo in any file, section .bar in 493 // any file but a.o, and section .baz in any file but b.o. 494 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 495 std::vector<SectionPattern> Ret; 496 while (!errorCount() && peek() != ")") { 497 StringMatcher ExcludeFilePat; 498 if (consume("EXCLUDE_FILE")) { 499 expect("("); 500 ExcludeFilePat = readFilePatterns(); 501 } 502 503 std::vector<StringRef> V; 504 while (!errorCount() && peek() != ")" && peek() != "EXCLUDE_FILE") 505 V.push_back(next()); 506 507 if (!V.empty()) 508 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 509 else 510 setError("section pattern is expected"); 511 } 512 return Ret; 513 } 514 515 // Reads contents of "SECTIONS" directive. That directive contains a 516 // list of glob patterns for input sections. The grammar is as follows. 517 // 518 // <patterns> ::= <section-list> 519 // | <sort> "(" <section-list> ")" 520 // | <sort> "(" <sort> "(" <section-list> ")" ")" 521 // 522 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 523 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 524 // 525 // <section-list> is parsed by readInputSectionsList(). 526 InputSectionDescription * 527 ScriptParser::readInputSectionRules(StringRef FilePattern) { 528 auto *Cmd = make<InputSectionDescription>(FilePattern); 529 expect("("); 530 531 while (!errorCount() && !consume(")")) { 532 SortSectionPolicy Outer = readSortKind(); 533 SortSectionPolicy Inner = SortSectionPolicy::Default; 534 std::vector<SectionPattern> V; 535 if (Outer != SortSectionPolicy::Default) { 536 expect("("); 537 Inner = readSortKind(); 538 if (Inner != SortSectionPolicy::Default) { 539 expect("("); 540 V = readInputSectionsList(); 541 expect(")"); 542 } else { 543 V = readInputSectionsList(); 544 } 545 expect(")"); 546 } else { 547 V = readInputSectionsList(); 548 } 549 550 for (SectionPattern &Pat : V) { 551 Pat.SortInner = Inner; 552 Pat.SortOuter = Outer; 553 } 554 555 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 556 } 557 return Cmd; 558 } 559 560 InputSectionDescription * 561 ScriptParser::readInputSectionDescription(StringRef Tok) { 562 // Input section wildcard can be surrounded by KEEP. 563 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 564 if (Tok == "KEEP") { 565 expect("("); 566 StringRef FilePattern = next(); 567 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 568 expect(")"); 569 Script->KeptSections.push_back(Cmd); 570 return Cmd; 571 } 572 return readInputSectionRules(Tok); 573 } 574 575 void ScriptParser::readSort() { 576 expect("("); 577 expect("CONSTRUCTORS"); 578 expect(")"); 579 } 580 581 AssertCommand *ScriptParser::readAssert() { 582 return make<AssertCommand>(readAssertExpr()); 583 } 584 585 Expr ScriptParser::readAssertExpr() { 586 expect("("); 587 Expr E = readExpr(); 588 expect(","); 589 StringRef Msg = unquote(next()); 590 expect(")"); 591 592 return [=] { 593 if (!E().getValue()) 594 error(Msg); 595 return Script->getDot(); 596 }; 597 } 598 599 // Reads a FILL(expr) command. We handle the FILL command as an 600 // alias for =fillexp section attribute, which is different from 601 // what GNU linkers do. 602 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 603 uint32_t ScriptParser::readFill() { 604 expect("("); 605 uint32_t V = parseFill(next()); 606 expect(")"); 607 return V; 608 } 609 610 // Reads an expression and/or the special directive for an output 611 // section definition. Directive is one of following: "(NOLOAD)", 612 // "(COPY)", "(INFO)" or "(OVERLAY)". 613 // 614 // An output section name can be followed by an address expression 615 // and/or directive. This grammar is not LL(1) because "(" can be 616 // interpreted as either the beginning of some expression or beginning 617 // of directive. 618 // 619 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html 620 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html 621 void ScriptParser::readSectionAddressType(OutputSection *Cmd) { 622 if (consume("(")) { 623 if (consume("NOLOAD")) { 624 expect(")"); 625 Cmd->Noload = true; 626 return; 627 } 628 if (consume("COPY") || consume("INFO") || consume("OVERLAY")) { 629 expect(")"); 630 Cmd->NonAlloc = true; 631 return; 632 } 633 Cmd->AddrExpr = readExpr(); 634 expect(")"); 635 } else { 636 Cmd->AddrExpr = readExpr(); 637 } 638 639 if (consume("(")) { 640 expect("NOLOAD"); 641 expect(")"); 642 Cmd->Noload = true; 643 } 644 } 645 646 static Expr checkAlignment(Expr E, std::string &Loc) { 647 return [=] { 648 uint64_t Alignment = std::max((uint64_t)1, E().getValue()); 649 if (!isPowerOf2_64(Alignment)) { 650 error(Loc + ": alignment must be power of 2"); 651 return (uint64_t)1; // Return a dummy value. 652 } 653 return Alignment; 654 }; 655 } 656 657 OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) { 658 OutputSection *Cmd = 659 Script->createOutputSection(OutSec, getCurrentLocation()); 660 661 size_t SymbolsReferenced = Script->ReferencedSymbols.size(); 662 663 if (peek() != ":") 664 readSectionAddressType(Cmd); 665 expect(":"); 666 667 std::string Location = getCurrentLocation(); 668 if (consume("AT")) 669 Cmd->LMAExpr = readParenExpr(); 670 if (consume("ALIGN")) 671 Cmd->AlignExpr = checkAlignment(readParenExpr(), Location); 672 if (consume("SUBALIGN")) 673 Cmd->SubalignExpr = checkAlignment(readParenExpr(), Location); 674 675 // Parse constraints. 676 if (consume("ONLY_IF_RO")) 677 Cmd->Constraint = ConstraintKind::ReadOnly; 678 if (consume("ONLY_IF_RW")) 679 Cmd->Constraint = ConstraintKind::ReadWrite; 680 expect("{"); 681 682 while (!errorCount() && !consume("}")) { 683 StringRef Tok = next(); 684 if (Tok == ";") { 685 // Empty commands are allowed. Do nothing here. 686 } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) { 687 Cmd->SectionCommands.push_back(Assign); 688 } else if (ByteCommand *Data = readByteCommand(Tok)) { 689 Cmd->SectionCommands.push_back(Data); 690 } else if (Tok == "ASSERT") { 691 Cmd->SectionCommands.push_back(readAssert()); 692 expect(";"); 693 } else if (Tok == "CONSTRUCTORS") { 694 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 695 // by name. This is for very old file formats such as ECOFF/XCOFF. 696 // For ELF, we should ignore. 697 } else if (Tok == "FILL") { 698 Cmd->Filler = readFill(); 699 } else if (Tok == "SORT") { 700 readSort(); 701 } else if (peek() == "(") { 702 Cmd->SectionCommands.push_back(readInputSectionDescription(Tok)); 703 } else { 704 setError("unknown command " + Tok); 705 } 706 } 707 708 if (consume(">")) 709 Cmd->MemoryRegionName = next(); 710 711 if (consume("AT")) { 712 expect(">"); 713 Cmd->LMARegionName = next(); 714 } 715 716 if (Cmd->LMAExpr && !Cmd->LMARegionName.empty()) 717 error("section can't have both LMA and a load region"); 718 719 Cmd->Phdrs = readOutputSectionPhdrs(); 720 721 if (consume("=")) 722 Cmd->Filler = parseFill(next()); 723 else if (peek().startswith("=")) 724 Cmd->Filler = parseFill(next().drop_front()); 725 726 // Consume optional comma following output section command. 727 consume(","); 728 729 if (Script->ReferencedSymbols.size() > SymbolsReferenced) 730 Cmd->ExpressionsUseSymbols = true; 731 return Cmd; 732 } 733 734 // Parses a given string as a octal/decimal/hexadecimal number and 735 // returns it as a big-endian number. Used for `=<fillexp>`. 736 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 737 // 738 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary 739 // size, while ld.gold always handles it as a 32-bit big-endian number. 740 // We are compatible with ld.gold because it's easier to implement. 741 uint32_t ScriptParser::parseFill(StringRef Tok) { 742 uint32_t V = 0; 743 if (!to_integer(Tok, V)) 744 setError("invalid filler expression: " + Tok); 745 746 uint32_t Buf; 747 write32be(&Buf, V); 748 return Buf; 749 } 750 751 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 752 expect("("); 753 SymbolAssignment *Cmd = readAssignment(next()); 754 Cmd->Provide = Provide; 755 Cmd->Hidden = Hidden; 756 expect(")"); 757 expect(";"); 758 return Cmd; 759 } 760 761 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 762 SymbolAssignment *Cmd = nullptr; 763 if (peek() == "=" || peek() == "+=") { 764 Cmd = readAssignment(Tok); 765 expect(";"); 766 } else if (Tok == "PROVIDE") { 767 Cmd = readProvideHidden(true, false); 768 } else if (Tok == "HIDDEN") { 769 Cmd = readProvideHidden(false, true); 770 } else if (Tok == "PROVIDE_HIDDEN") { 771 Cmd = readProvideHidden(true, true); 772 } 773 return Cmd; 774 } 775 776 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 777 StringRef Op = next(); 778 assert(Op == "=" || Op == "+="); 779 Expr E = readExpr(); 780 if (Op == "+=") { 781 std::string Loc = getCurrentLocation(); 782 E = [=] { return add(Script->getSymbolValue(Name, Loc), E()); }; 783 } 784 return make<SymbolAssignment>(Name, E, getCurrentLocation()); 785 } 786 787 // This is an operator-precedence parser to parse a linker 788 // script expression. 789 Expr ScriptParser::readExpr() { 790 // Our lexer is context-aware. Set the in-expression bit so that 791 // they apply different tokenization rules. 792 bool Orig = InExpr; 793 InExpr = true; 794 Expr E = readExpr1(readPrimary(), 0); 795 InExpr = Orig; 796 return E; 797 } 798 799 Expr ScriptParser::combine(StringRef Op, Expr L, Expr R) { 800 if (Op == "+") 801 return [=] { return add(L(), R()); }; 802 if (Op == "-") 803 return [=] { return sub(L(), R()); }; 804 if (Op == "*") 805 return [=] { return L().getValue() * R().getValue(); }; 806 if (Op == "/") { 807 std::string Loc = getCurrentLocation(); 808 return [=]() -> uint64_t { 809 if (uint64_t RV = R().getValue()) 810 return L().getValue() / RV; 811 error(Loc + ": division by zero"); 812 return 0; 813 }; 814 } 815 if (Op == "%") { 816 std::string Loc = getCurrentLocation(); 817 return [=]() -> uint64_t { 818 if (uint64_t RV = R().getValue()) 819 return L().getValue() % RV; 820 error(Loc + ": modulo by zero"); 821 return 0; 822 }; 823 } 824 if (Op == "<<") 825 return [=] { return L().getValue() << R().getValue(); }; 826 if (Op == ">>") 827 return [=] { return L().getValue() >> R().getValue(); }; 828 if (Op == "<") 829 return [=] { return L().getValue() < R().getValue(); }; 830 if (Op == ">") 831 return [=] { return L().getValue() > R().getValue(); }; 832 if (Op == ">=") 833 return [=] { return L().getValue() >= R().getValue(); }; 834 if (Op == "<=") 835 return [=] { return L().getValue() <= R().getValue(); }; 836 if (Op == "==") 837 return [=] { return L().getValue() == R().getValue(); }; 838 if (Op == "!=") 839 return [=] { return L().getValue() != R().getValue(); }; 840 if (Op == "&") 841 return [=] { return bitAnd(L(), R()); }; 842 if (Op == "|") 843 return [=] { return bitOr(L(), R()); }; 844 llvm_unreachable("invalid operator"); 845 } 846 847 // This is a part of the operator-precedence parser. This function 848 // assumes that the remaining token stream starts with an operator. 849 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 850 while (!atEOF() && !errorCount()) { 851 // Read an operator and an expression. 852 if (consume("?")) 853 return readTernary(Lhs); 854 StringRef Op1 = peek(); 855 if (precedence(Op1) < MinPrec) 856 break; 857 skip(); 858 Expr Rhs = readPrimary(); 859 860 // Evaluate the remaining part of the expression first if the 861 // next operator has greater precedence than the previous one. 862 // For example, if we have read "+" and "3", and if the next 863 // operator is "*", then we'll evaluate 3 * ... part first. 864 while (!atEOF()) { 865 StringRef Op2 = peek(); 866 if (precedence(Op2) <= precedence(Op1)) 867 break; 868 Rhs = readExpr1(Rhs, precedence(Op2)); 869 } 870 871 Lhs = combine(Op1, Lhs, Rhs); 872 } 873 return Lhs; 874 } 875 876 Expr ScriptParser::getPageSize() { 877 std::string Location = getCurrentLocation(); 878 return [=]() -> uint64_t { 879 if (Target) 880 return Target->PageSize; 881 error(Location + ": unable to calculate page size"); 882 return 4096; // Return a dummy value. 883 }; 884 } 885 886 Expr ScriptParser::readConstant() { 887 StringRef S = readParenLiteral(); 888 if (S == "COMMONPAGESIZE") 889 return getPageSize(); 890 if (S == "MAXPAGESIZE") 891 return [] { return Config->MaxPageSize; }; 892 setError("unknown constant: " + S); 893 return [] { return 0; }; 894 } 895 896 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with 897 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may 898 // have "K" (Ki) or "M" (Mi) suffixes. 899 static Optional<uint64_t> parseInt(StringRef Tok) { 900 // Hexadecimal 901 uint64_t Val; 902 if (Tok.startswith_lower("0x")) { 903 if (!to_integer(Tok.substr(2), Val, 16)) 904 return None; 905 return Val; 906 } 907 if (Tok.endswith_lower("H")) { 908 if (!to_integer(Tok.drop_back(), Val, 16)) 909 return None; 910 return Val; 911 } 912 913 // Decimal 914 if (Tok.endswith_lower("K")) { 915 if (!to_integer(Tok.drop_back(), Val, 10)) 916 return None; 917 return Val * 1024; 918 } 919 if (Tok.endswith_lower("M")) { 920 if (!to_integer(Tok.drop_back(), Val, 10)) 921 return None; 922 return Val * 1024 * 1024; 923 } 924 if (!to_integer(Tok, Val, 10)) 925 return None; 926 return Val; 927 } 928 929 ByteCommand *ScriptParser::readByteCommand(StringRef Tok) { 930 int Size = StringSwitch<int>(Tok) 931 .Case("BYTE", 1) 932 .Case("SHORT", 2) 933 .Case("LONG", 4) 934 .Case("QUAD", 8) 935 .Default(-1); 936 if (Size == -1) 937 return nullptr; 938 return make<ByteCommand>(readParenExpr(), Size); 939 } 940 941 StringRef ScriptParser::readParenLiteral() { 942 expect("("); 943 bool Orig = InExpr; 944 InExpr = false; 945 StringRef Tok = next(); 946 InExpr = Orig; 947 expect(")"); 948 return Tok; 949 } 950 951 static void checkIfExists(OutputSection *Cmd, StringRef Location) { 952 if (Cmd->Location.empty() && Script->ErrorOnMissingSection) 953 error(Location + ": undefined section " + Cmd->Name); 954 } 955 956 Expr ScriptParser::readPrimary() { 957 if (peek() == "(") 958 return readParenExpr(); 959 960 if (consume("~")) { 961 Expr E = readPrimary(); 962 return [=] { return ~E().getValue(); }; 963 } 964 if (consume("!")) { 965 Expr E = readPrimary(); 966 return [=] { return !E().getValue(); }; 967 } 968 if (consume("-")) { 969 Expr E = readPrimary(); 970 return [=] { return -E().getValue(); }; 971 } 972 973 StringRef Tok = next(); 974 std::string Location = getCurrentLocation(); 975 976 // Built-in functions are parsed here. 977 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 978 if (Tok == "ABSOLUTE") { 979 Expr Inner = readParenExpr(); 980 return [=] { 981 ExprValue I = Inner(); 982 I.ForceAbsolute = true; 983 return I; 984 }; 985 } 986 if (Tok == "ADDR") { 987 StringRef Name = readParenLiteral(); 988 OutputSection *Sec = Script->getOrCreateOutputSection(Name); 989 return [=]() -> ExprValue { 990 checkIfExists(Sec, Location); 991 return {Sec, false, 0, Location}; 992 }; 993 } 994 if (Tok == "ALIGN") { 995 expect("("); 996 Expr E = readExpr(); 997 if (consume(")")) { 998 E = checkAlignment(E, Location); 999 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 1000 } 1001 expect(","); 1002 Expr E2 = checkAlignment(readExpr(), Location); 1003 expect(")"); 1004 return [=] { 1005 ExprValue V = E(); 1006 V.Alignment = E2().getValue(); 1007 return V; 1008 }; 1009 } 1010 if (Tok == "ALIGNOF") { 1011 StringRef Name = readParenLiteral(); 1012 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 1013 return [=] { 1014 checkIfExists(Cmd, Location); 1015 return Cmd->Alignment; 1016 }; 1017 } 1018 if (Tok == "ASSERT") 1019 return readAssertExpr(); 1020 if (Tok == "CONSTANT") 1021 return readConstant(); 1022 if (Tok == "DATA_SEGMENT_ALIGN") { 1023 expect("("); 1024 Expr E = readExpr(); 1025 expect(","); 1026 readExpr(); 1027 expect(")"); 1028 return [=] { 1029 return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue())); 1030 }; 1031 } 1032 if (Tok == "DATA_SEGMENT_END") { 1033 expect("("); 1034 expect("."); 1035 expect(")"); 1036 return [] { return Script->getDot(); }; 1037 } 1038 if (Tok == "DATA_SEGMENT_RELRO_END") { 1039 // GNU linkers implements more complicated logic to handle 1040 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and 1041 // just align to the next page boundary for simplicity. 1042 expect("("); 1043 readExpr(); 1044 expect(","); 1045 readExpr(); 1046 expect(")"); 1047 Expr E = getPageSize(); 1048 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 1049 } 1050 if (Tok == "DEFINED") { 1051 StringRef Name = readParenLiteral(); 1052 return [=] { return Symtab->find(Name) ? 1 : 0; }; 1053 } 1054 if (Tok == "LENGTH") { 1055 StringRef Name = readParenLiteral(); 1056 if (Script->MemoryRegions.count(Name) == 0) { 1057 setError("memory region not defined: " + Name); 1058 return [] { return 0; }; 1059 } 1060 return [=] { return Script->MemoryRegions[Name]->Length; }; 1061 } 1062 if (Tok == "LOADADDR") { 1063 StringRef Name = readParenLiteral(); 1064 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 1065 return [=] { 1066 checkIfExists(Cmd, Location); 1067 return Cmd->getLMA(); 1068 }; 1069 } 1070 if (Tok == "ORIGIN") { 1071 StringRef Name = readParenLiteral(); 1072 if (Script->MemoryRegions.count(Name) == 0) { 1073 setError("memory region not defined: " + Name); 1074 return [] { return 0; }; 1075 } 1076 return [=] { return Script->MemoryRegions[Name]->Origin; }; 1077 } 1078 if (Tok == "SEGMENT_START") { 1079 expect("("); 1080 skip(); 1081 expect(","); 1082 Expr E = readExpr(); 1083 expect(")"); 1084 return [=] { return E(); }; 1085 } 1086 if (Tok == "SIZEOF") { 1087 StringRef Name = readParenLiteral(); 1088 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 1089 // Linker script does not create an output section if its content is empty. 1090 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 1091 // be empty. 1092 return [=] { return Cmd->Size; }; 1093 } 1094 if (Tok == "SIZEOF_HEADERS") 1095 return [=] { return elf::getHeaderSize(); }; 1096 1097 // Tok is the dot. 1098 if (Tok == ".") 1099 return [=] { return Script->getSymbolValue(Tok, Location); }; 1100 1101 // Tok is a literal number. 1102 if (Optional<uint64_t> Val = parseInt(Tok)) 1103 return [=] { return *Val; }; 1104 1105 // Tok is a symbol name. 1106 if (!isValidCIdentifier(Tok)) 1107 setError("malformed number: " + Tok); 1108 Script->ReferencedSymbols.push_back(Tok); 1109 return [=] { return Script->getSymbolValue(Tok, Location); }; 1110 } 1111 1112 Expr ScriptParser::readTernary(Expr Cond) { 1113 Expr L = readExpr(); 1114 expect(":"); 1115 Expr R = readExpr(); 1116 return [=] { return Cond().getValue() ? L() : R(); }; 1117 } 1118 1119 Expr ScriptParser::readParenExpr() { 1120 expect("("); 1121 Expr E = readExpr(); 1122 expect(")"); 1123 return E; 1124 } 1125 1126 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1127 std::vector<StringRef> Phdrs; 1128 while (!errorCount() && peek().startswith(":")) { 1129 StringRef Tok = next(); 1130 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 1131 } 1132 return Phdrs; 1133 } 1134 1135 // Read a program header type name. The next token must be a 1136 // name of a program header type or a constant (e.g. "0x3"). 1137 unsigned ScriptParser::readPhdrType() { 1138 StringRef Tok = next(); 1139 if (Optional<uint64_t> Val = parseInt(Tok)) 1140 return *Val; 1141 1142 unsigned Ret = StringSwitch<unsigned>(Tok) 1143 .Case("PT_NULL", PT_NULL) 1144 .Case("PT_LOAD", PT_LOAD) 1145 .Case("PT_DYNAMIC", PT_DYNAMIC) 1146 .Case("PT_INTERP", PT_INTERP) 1147 .Case("PT_NOTE", PT_NOTE) 1148 .Case("PT_SHLIB", PT_SHLIB) 1149 .Case("PT_PHDR", PT_PHDR) 1150 .Case("PT_TLS", PT_TLS) 1151 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1152 .Case("PT_GNU_STACK", PT_GNU_STACK) 1153 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1154 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1155 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1156 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1157 .Default(-1); 1158 1159 if (Ret == (unsigned)-1) { 1160 setError("invalid program header type: " + Tok); 1161 return PT_NULL; 1162 } 1163 return Ret; 1164 } 1165 1166 // Reads an anonymous version declaration. 1167 void ScriptParser::readAnonymousDeclaration() { 1168 std::vector<SymbolVersion> Locals; 1169 std::vector<SymbolVersion> Globals; 1170 std::tie(Locals, Globals) = readSymbols(); 1171 1172 for (SymbolVersion V : Locals) { 1173 if (V.Name == "*") 1174 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1175 else 1176 Config->VersionScriptLocals.push_back(V); 1177 } 1178 1179 for (SymbolVersion V : Globals) 1180 Config->VersionScriptGlobals.push_back(V); 1181 1182 expect(";"); 1183 } 1184 1185 // Reads a non-anonymous version definition, 1186 // e.g. "VerStr { global: foo; bar; local: *; };". 1187 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1188 // Read a symbol list. 1189 std::vector<SymbolVersion> Locals; 1190 std::vector<SymbolVersion> Globals; 1191 std::tie(Locals, Globals) = readSymbols(); 1192 1193 for (SymbolVersion V : Locals) { 1194 if (V.Name == "*") 1195 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1196 else 1197 Config->VersionScriptLocals.push_back(V); 1198 } 1199 1200 // Create a new version definition and add that to the global symbols. 1201 VersionDefinition Ver; 1202 Ver.Name = VerStr; 1203 Ver.Globals = Globals; 1204 1205 // User-defined version number starts from 2 because 0 and 1 are 1206 // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. 1207 Ver.Id = Config->VersionDefinitions.size() + 2; 1208 Config->VersionDefinitions.push_back(Ver); 1209 1210 // Each version may have a parent version. For example, "Ver2" 1211 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1212 // as a parent. This version hierarchy is, probably against your 1213 // instinct, purely for hint; the runtime doesn't care about it 1214 // at all. In LLD, we simply ignore it. 1215 if (peek() != ";") 1216 skip(); 1217 expect(";"); 1218 } 1219 1220 static bool hasWildcard(StringRef S) { 1221 return S.find_first_of("?*[") != StringRef::npos; 1222 } 1223 1224 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1225 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1226 ScriptParser::readSymbols() { 1227 std::vector<SymbolVersion> Locals; 1228 std::vector<SymbolVersion> Globals; 1229 std::vector<SymbolVersion> *V = &Globals; 1230 1231 while (!errorCount()) { 1232 if (consume("}")) 1233 break; 1234 if (consumeLabel("local")) { 1235 V = &Locals; 1236 continue; 1237 } 1238 if (consumeLabel("global")) { 1239 V = &Globals; 1240 continue; 1241 } 1242 1243 if (consume("extern")) { 1244 std::vector<SymbolVersion> Ext = readVersionExtern(); 1245 V->insert(V->end(), Ext.begin(), Ext.end()); 1246 } else { 1247 StringRef Tok = next(); 1248 V->push_back({unquote(Tok), false, hasWildcard(Tok)}); 1249 } 1250 expect(";"); 1251 } 1252 return {Locals, Globals}; 1253 } 1254 1255 // Reads an "extern C++" directive, e.g., 1256 // "extern "C++" { ns::*; "f(int, double)"; };" 1257 // 1258 // The last semicolon is optional. E.g. this is OK: 1259 // "extern "C++" { ns::*; "f(int, double)" };" 1260 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 1261 StringRef Tok = next(); 1262 bool IsCXX = Tok == "\"C++\""; 1263 if (!IsCXX && Tok != "\"C\"") 1264 setError("Unknown language"); 1265 expect("{"); 1266 1267 std::vector<SymbolVersion> Ret; 1268 while (!errorCount() && peek() != "}") { 1269 StringRef Tok = next(); 1270 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 1271 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 1272 if (consume("}")) 1273 return Ret; 1274 expect(";"); 1275 } 1276 1277 expect("}"); 1278 return Ret; 1279 } 1280 1281 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2, 1282 StringRef S3) { 1283 if (!consume(S1) && !consume(S2) && !consume(S3)) { 1284 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 1285 return 0; 1286 } 1287 expect("="); 1288 return readExpr()().getValue(); 1289 } 1290 1291 // Parse the MEMORY command as specified in: 1292 // https://sourceware.org/binutils/docs/ld/MEMORY.html 1293 // 1294 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 1295 void ScriptParser::readMemory() { 1296 expect("{"); 1297 while (!errorCount() && !consume("}")) { 1298 StringRef Name = next(); 1299 1300 uint32_t Flags = 0; 1301 uint32_t NegFlags = 0; 1302 if (consume("(")) { 1303 std::tie(Flags, NegFlags) = readMemoryAttributes(); 1304 expect(")"); 1305 } 1306 expect(":"); 1307 1308 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 1309 expect(","); 1310 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 1311 1312 // Add the memory region to the region map. 1313 if (Script->MemoryRegions.count(Name)) 1314 setError("region '" + Name + "' already defined"); 1315 MemoryRegion *MR = 1316 make<MemoryRegion>(Name, Origin, Length, Flags, NegFlags); 1317 Script->MemoryRegions[Name] = MR; 1318 } 1319 } 1320 1321 // This function parses the attributes used to match against section 1322 // flags when placing output sections in a memory region. These flags 1323 // are only used when an explicit memory region name is not used. 1324 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 1325 uint32_t Flags = 0; 1326 uint32_t NegFlags = 0; 1327 bool Invert = false; 1328 1329 for (char C : next().lower()) { 1330 uint32_t Flag = 0; 1331 if (C == '!') 1332 Invert = !Invert; 1333 else if (C == 'w') 1334 Flag = SHF_WRITE; 1335 else if (C == 'x') 1336 Flag = SHF_EXECINSTR; 1337 else if (C == 'a') 1338 Flag = SHF_ALLOC; 1339 else if (C != 'r') 1340 setError("invalid memory region attribute"); 1341 1342 if (Invert) 1343 NegFlags |= Flag; 1344 else 1345 Flags |= Flag; 1346 } 1347 return {Flags, NegFlags}; 1348 } 1349 1350 void elf::readLinkerScript(MemoryBufferRef MB) { 1351 ScriptParser(MB).readLinkerScript(); 1352 } 1353 1354 void elf::readVersionScript(MemoryBufferRef MB) { 1355 ScriptParser(MB).readVersionScript(); 1356 } 1357 1358 void elf::readDynamicList(MemoryBufferRef MB) { 1359 ScriptParser(MB).readDynamicList(); 1360 } 1361 1362 void elf::readDefsym(StringRef Name, MemoryBufferRef MB) { 1363 ScriptParser(MB).readDefsym(Name); 1364 } 1365