1 //===- ScriptParser.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a recursive-descendent parser for linker scripts. 11 // Parsed results are stored to Config and Script global objects. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ScriptParser.h" 16 #include "Config.h" 17 #include "Driver.h" 18 #include "InputSection.h" 19 #include "LinkerScript.h" 20 #include "OutputSections.h" 21 #include "ScriptLexer.h" 22 #include "Symbols.h" 23 #include "Target.h" 24 #include "lld/Common/Memory.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/StringSet.h" 28 #include "llvm/ADT/StringSwitch.h" 29 #include "llvm/BinaryFormat/ELF.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/FileSystem.h" 33 #include "llvm/Support/Path.h" 34 #include <cassert> 35 #include <limits> 36 #include <vector> 37 38 using namespace llvm; 39 using namespace llvm::ELF; 40 using namespace llvm::support::endian; 41 using namespace lld; 42 using namespace lld::elf; 43 44 static bool isUnderSysroot(StringRef Path); 45 46 namespace { 47 class ScriptParser final : ScriptLexer { 48 public: 49 ScriptParser(MemoryBufferRef MB) 50 : ScriptLexer(MB), 51 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 52 53 void readLinkerScript(); 54 void readVersionScript(); 55 void readDynamicList(); 56 void readDefsym(StringRef Name); 57 58 private: 59 void addFile(StringRef Path); 60 61 void readAsNeeded(); 62 void readEntry(); 63 void readExtern(); 64 void readGroup(); 65 void readInclude(); 66 void readInput(); 67 void readMemory(); 68 void readOutput(); 69 void readOutputArch(); 70 void readOutputFormat(); 71 void readPhdrs(); 72 void readRegionAlias(); 73 void readSearchDir(); 74 void readSections(); 75 void readTarget(); 76 void readVersion(); 77 void readVersionScriptCommand(); 78 79 SymbolAssignment *readSymbolAssignment(StringRef Name); 80 ByteCommand *readByteCommand(StringRef Tok); 81 uint32_t readFill(); 82 uint32_t parseFill(StringRef Tok); 83 bool readSectionDirective(OutputSection *Cmd, StringRef Tok1, StringRef Tok2); 84 void readSectionAddressType(OutputSection *Cmd); 85 OutputSection *readOverlaySectionDescription(); 86 OutputSection *readOutputSectionDescription(StringRef OutSec); 87 std::vector<BaseCommand *> readOverlay(); 88 std::vector<StringRef> readOutputSectionPhdrs(); 89 InputSectionDescription *readInputSectionDescription(StringRef Tok); 90 StringMatcher readFilePatterns(); 91 std::vector<SectionPattern> readInputSectionsList(); 92 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 93 unsigned readPhdrType(); 94 SortSectionPolicy readSortKind(); 95 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 96 SymbolAssignment *readAssignment(StringRef Tok); 97 std::pair<ELFKind, uint16_t> readBfdName(); 98 void readSort(); 99 Expr readAssert(); 100 Expr readConstant(); 101 Expr getPageSize(); 102 103 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 104 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 105 106 Expr combine(StringRef Op, Expr L, Expr R); 107 Expr readExpr(); 108 Expr readExpr1(Expr Lhs, int MinPrec); 109 StringRef readParenLiteral(); 110 Expr readPrimary(); 111 Expr readTernary(Expr Cond); 112 Expr readParenExpr(); 113 114 // For parsing version script. 115 std::vector<SymbolVersion> readVersionExtern(); 116 void readAnonymousDeclaration(); 117 void readVersionDeclaration(StringRef VerStr); 118 119 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 120 readSymbols(); 121 122 // True if a script being read is in a subdirectory specified by -sysroot. 123 bool IsUnderSysroot; 124 125 // A set to detect an INCLUDE() cycle. 126 StringSet<> Seen; 127 }; 128 } // namespace 129 130 static StringRef unquote(StringRef S) { 131 if (S.startswith("\"")) 132 return S.substr(1, S.size() - 2); 133 return S; 134 } 135 136 static bool isUnderSysroot(StringRef Path) { 137 if (Config->Sysroot == "") 138 return false; 139 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 140 if (sys::fs::equivalent(Config->Sysroot, Path)) 141 return true; 142 return false; 143 } 144 145 // Some operations only support one non absolute value. Move the 146 // absolute one to the right hand side for convenience. 147 static void moveAbsRight(ExprValue &A, ExprValue &B) { 148 if (A.Sec == nullptr || (A.ForceAbsolute && !B.isAbsolute())) 149 std::swap(A, B); 150 if (!B.isAbsolute()) 151 error(A.Loc + ": at least one side of the expression must be absolute"); 152 } 153 154 static ExprValue add(ExprValue A, ExprValue B) { 155 moveAbsRight(A, B); 156 return {A.Sec, A.ForceAbsolute, A.getSectionOffset() + B.getValue(), A.Loc}; 157 } 158 159 static ExprValue sub(ExprValue A, ExprValue B) { 160 // The distance between two symbols in sections is absolute. 161 if (!A.isAbsolute() && !B.isAbsolute()) 162 return A.getValue() - B.getValue(); 163 return {A.Sec, false, A.getSectionOffset() - B.getValue(), A.Loc}; 164 } 165 166 static ExprValue bitAnd(ExprValue A, ExprValue B) { 167 moveAbsRight(A, B); 168 return {A.Sec, A.ForceAbsolute, 169 (A.getValue() & B.getValue()) - A.getSecAddr(), A.Loc}; 170 } 171 172 static ExprValue bitOr(ExprValue A, ExprValue B) { 173 moveAbsRight(A, B); 174 return {A.Sec, A.ForceAbsolute, 175 (A.getValue() | B.getValue()) - A.getSecAddr(), A.Loc}; 176 } 177 178 void ScriptParser::readDynamicList() { 179 Config->HasDynamicList = true; 180 expect("{"); 181 std::vector<SymbolVersion> Locals; 182 std::vector<SymbolVersion> Globals; 183 std::tie(Locals, Globals) = readSymbols(); 184 expect(";"); 185 186 if (!atEOF()) { 187 setError("EOF expected, but got " + next()); 188 return; 189 } 190 if (!Locals.empty()) { 191 setError("\"local:\" scope not supported in --dynamic-list"); 192 return; 193 } 194 195 for (SymbolVersion V : Globals) 196 Config->DynamicList.push_back(V); 197 } 198 199 void ScriptParser::readVersionScript() { 200 readVersionScriptCommand(); 201 if (!atEOF()) 202 setError("EOF expected, but got " + next()); 203 } 204 205 void ScriptParser::readVersionScriptCommand() { 206 if (consume("{")) { 207 readAnonymousDeclaration(); 208 return; 209 } 210 211 while (!atEOF() && !errorCount() && peek() != "}") { 212 StringRef VerStr = next(); 213 if (VerStr == "{") { 214 setError("anonymous version definition is used in " 215 "combination with other version definitions"); 216 return; 217 } 218 expect("{"); 219 readVersionDeclaration(VerStr); 220 } 221 } 222 223 void ScriptParser::readVersion() { 224 expect("{"); 225 readVersionScriptCommand(); 226 expect("}"); 227 } 228 229 void ScriptParser::readLinkerScript() { 230 while (!atEOF()) { 231 StringRef Tok = next(); 232 if (Tok == ";") 233 continue; 234 235 if (Tok == "ENTRY") { 236 readEntry(); 237 } else if (Tok == "EXTERN") { 238 readExtern(); 239 } else if (Tok == "GROUP") { 240 readGroup(); 241 } else if (Tok == "INCLUDE") { 242 readInclude(); 243 } else if (Tok == "INPUT") { 244 readInput(); 245 } else if (Tok == "MEMORY") { 246 readMemory(); 247 } else if (Tok == "OUTPUT") { 248 readOutput(); 249 } else if (Tok == "OUTPUT_ARCH") { 250 readOutputArch(); 251 } else if (Tok == "OUTPUT_FORMAT") { 252 readOutputFormat(); 253 } else if (Tok == "PHDRS") { 254 readPhdrs(); 255 } else if (Tok == "REGION_ALIAS") { 256 readRegionAlias(); 257 } else if (Tok == "SEARCH_DIR") { 258 readSearchDir(); 259 } else if (Tok == "SECTIONS") { 260 readSections(); 261 } else if (Tok == "TARGET") { 262 readTarget(); 263 } else if (Tok == "VERSION") { 264 readVersion(); 265 } else if (SymbolAssignment *Cmd = readAssignment(Tok)) { 266 Script->SectionCommands.push_back(Cmd); 267 } else { 268 setError("unknown directive: " + Tok); 269 } 270 } 271 } 272 273 void ScriptParser::readDefsym(StringRef Name) { 274 Expr E = readExpr(); 275 if (!atEOF()) 276 setError("EOF expected, but got " + next()); 277 SymbolAssignment *Cmd = make<SymbolAssignment>(Name, E, getCurrentLocation()); 278 Script->SectionCommands.push_back(Cmd); 279 } 280 281 void ScriptParser::addFile(StringRef S) { 282 if (IsUnderSysroot && S.startswith("/")) { 283 SmallString<128> PathData; 284 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 285 if (sys::fs::exists(Path)) { 286 Driver->addFile(Saver.save(Path), /*WithLOption=*/false); 287 return; 288 } 289 } 290 291 if (S.startswith("/")) { 292 Driver->addFile(S, /*WithLOption=*/false); 293 } else if (S.startswith("=")) { 294 if (Config->Sysroot.empty()) 295 Driver->addFile(S.substr(1), /*WithLOption=*/false); 296 else 297 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)), 298 /*WithLOption=*/false); 299 } else if (S.startswith("-l")) { 300 Driver->addLibrary(S.substr(2)); 301 } else if (sys::fs::exists(S)) { 302 Driver->addFile(S, /*WithLOption=*/false); 303 } else { 304 if (Optional<std::string> Path = findFromSearchPaths(S)) 305 Driver->addFile(Saver.save(*Path), /*WithLOption=*/true); 306 else 307 setError("unable to find " + S); 308 } 309 } 310 311 void ScriptParser::readAsNeeded() { 312 expect("("); 313 bool Orig = Config->AsNeeded; 314 Config->AsNeeded = true; 315 while (!errorCount() && !consume(")")) 316 addFile(unquote(next())); 317 Config->AsNeeded = Orig; 318 } 319 320 void ScriptParser::readEntry() { 321 // -e <symbol> takes predecence over ENTRY(<symbol>). 322 expect("("); 323 StringRef Tok = next(); 324 if (Config->Entry.empty()) 325 Config->Entry = Tok; 326 expect(")"); 327 } 328 329 void ScriptParser::readExtern() { 330 expect("("); 331 while (!errorCount() && !consume(")")) 332 Config->Undefined.push_back(next()); 333 } 334 335 void ScriptParser::readGroup() { 336 bool Orig = InputFile::IsInGroup; 337 InputFile::IsInGroup = true; 338 readInput(); 339 InputFile::IsInGroup = Orig; 340 if (!Orig) 341 ++InputFile::NextGroupId; 342 } 343 344 void ScriptParser::readInclude() { 345 StringRef Tok = unquote(next()); 346 347 if (!Seen.insert(Tok).second) { 348 setError("there is a cycle in linker script INCLUDEs"); 349 return; 350 } 351 352 if (Optional<std::string> Path = searchScript(Tok)) { 353 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 354 tokenize(*MB); 355 return; 356 } 357 setError("cannot find linker script " + Tok); 358 } 359 360 void ScriptParser::readInput() { 361 expect("("); 362 while (!errorCount() && !consume(")")) { 363 if (consume("AS_NEEDED")) 364 readAsNeeded(); 365 else 366 addFile(unquote(next())); 367 } 368 } 369 370 void ScriptParser::readOutput() { 371 // -o <file> takes predecence over OUTPUT(<file>). 372 expect("("); 373 StringRef Tok = next(); 374 if (Config->OutputFile.empty()) 375 Config->OutputFile = unquote(Tok); 376 expect(")"); 377 } 378 379 void ScriptParser::readOutputArch() { 380 // OUTPUT_ARCH is ignored for now. 381 expect("("); 382 while (!errorCount() && !consume(")")) 383 skip(); 384 } 385 386 std::pair<ELFKind, uint16_t> ScriptParser::readBfdName() { 387 StringRef S = next(); 388 if (S == "elf32-i386") 389 return {ELF32LEKind, EM_386}; 390 if (S == "elf32-iamcu") 391 return {ELF32LEKind, EM_IAMCU}; 392 if (S == "elf32-x86-64") 393 return {ELF32LEKind, EM_X86_64}; 394 if (S == "elf64-littleaarch64") 395 return {ELF64LEKind, EM_AARCH64}; 396 if (S == "elf64-x86-64") 397 return {ELF64LEKind, EM_X86_64}; 398 399 setError("unknown output format name: " + S); 400 return {ELFNoneKind, EM_NONE}; 401 } 402 403 // Parse OUTPUT_FORMAT(bfdname) or OUTPUT_FORMAT(bfdname, big, little). 404 // Currently we ignore big and little parameters. 405 void ScriptParser::readOutputFormat() { 406 expect("("); 407 408 std::pair<ELFKind, uint16_t> P = readBfdName(); 409 if (Config->EKind == ELFNoneKind) { 410 Config->EKind = P.first; 411 Config->EMachine = P.second; 412 } 413 414 if (consume(")")) 415 return; 416 expect(","); 417 skip(); 418 expect(","); 419 skip(); 420 expect(")"); 421 } 422 423 void ScriptParser::readPhdrs() { 424 expect("{"); 425 426 while (!errorCount() && !consume("}")) { 427 PhdrsCommand Cmd; 428 Cmd.Name = next(); 429 Cmd.Type = readPhdrType(); 430 431 while (!errorCount() && !consume(";")) { 432 if (consume("FILEHDR")) 433 Cmd.HasFilehdr = true; 434 else if (consume("PHDRS")) 435 Cmd.HasPhdrs = true; 436 else if (consume("AT")) 437 Cmd.LMAExpr = readParenExpr(); 438 else if (consume("FLAGS")) 439 Cmd.Flags = readParenExpr()().getValue(); 440 else 441 setError("unexpected header attribute: " + next()); 442 } 443 444 Script->PhdrsCommands.push_back(Cmd); 445 } 446 } 447 448 void ScriptParser::readRegionAlias() { 449 expect("("); 450 StringRef Alias = unquote(next()); 451 expect(","); 452 StringRef Name = next(); 453 expect(")"); 454 455 if (Script->MemoryRegions.count(Alias)) 456 setError("redefinition of memory region '" + Alias + "'"); 457 if (!Script->MemoryRegions.count(Name)) 458 setError("memory region '" + Name + "' is not defined"); 459 Script->MemoryRegions.insert({Alias, Script->MemoryRegions[Name]}); 460 } 461 462 void ScriptParser::readSearchDir() { 463 expect("("); 464 StringRef Tok = next(); 465 if (!Config->Nostdlib) 466 Config->SearchPaths.push_back(unquote(Tok)); 467 expect(")"); 468 } 469 470 // This reads an overlay description. Overlays are used to describe output 471 // sections that use the same virtual memory range and normally would trigger 472 // linker's sections sanity check failures. 473 // https://sourceware.org/binutils/docs/ld/Overlay-Description.html#Overlay-Description 474 std::vector<BaseCommand *> ScriptParser::readOverlay() { 475 // VA and LMA expressions are optional, though for simplicity of 476 // implementation we assume they are not. That is what OVERLAY was designed 477 // for first of all: to allow sections with overlapping VAs at different LMAs. 478 Expr AddrExpr = readExpr(); 479 expect(":"); 480 expect("AT"); 481 Expr LMAExpr = readParenExpr(); 482 expect("{"); 483 484 std::vector<BaseCommand *> V; 485 OutputSection *Prev = nullptr; 486 while (!errorCount() && !consume("}")) { 487 // VA is the same for all sections. The LMAs are consecutive in memory 488 // starting from the base load address specified. 489 OutputSection *OS = readOverlaySectionDescription(); 490 OS->AddrExpr = AddrExpr; 491 if (Prev) 492 OS->LMAExpr = [=] { return Prev->getLMA() + Prev->Size; }; 493 else 494 OS->LMAExpr = LMAExpr; 495 V.push_back(OS); 496 Prev = OS; 497 } 498 499 // According to the specification, at the end of the overlay, the location 500 // counter should be equal to the overlay base address plus size of the 501 // largest section seen in the overlay. 502 // Here we want to create the Dot assignment command to achieve that. 503 Expr MoveDot = [=] { 504 uint64_t Max = 0; 505 for (BaseCommand *Cmd : V) 506 Max = std::max(Max, cast<OutputSection>(Cmd)->Size); 507 return AddrExpr().getValue() + Max; 508 }; 509 V.push_back(make<SymbolAssignment>(".", MoveDot, getCurrentLocation())); 510 return V; 511 } 512 513 void ScriptParser::readSections() { 514 Script->HasSectionsCommand = true; 515 516 // -no-rosegment is used to avoid placing read only non-executable sections in 517 // their own segment. We do the same if SECTIONS command is present in linker 518 // script. See comment for computeFlags(). 519 Config->SingleRoRx = true; 520 521 expect("{"); 522 std::vector<BaseCommand *> V; 523 while (!errorCount() && !consume("}")) { 524 StringRef Tok = next(); 525 if (Tok == "OVERLAY") { 526 for (BaseCommand *Cmd : readOverlay()) 527 V.push_back(Cmd); 528 continue; 529 } else if (Tok == "INCLUDE") { 530 readInclude(); 531 continue; 532 } 533 534 if (BaseCommand *Cmd = readAssignment(Tok)) 535 V.push_back(Cmd); 536 else 537 V.push_back(readOutputSectionDescription(Tok)); 538 } 539 540 if (!atEOF() && consume("INSERT")) { 541 std::vector<BaseCommand *> *Dest = nullptr; 542 if (consume("AFTER")) 543 Dest = &Script->InsertAfterCommands[next()]; 544 else if (consume("BEFORE")) 545 Dest = &Script->InsertBeforeCommands[next()]; 546 else 547 setError("expected AFTER/BEFORE, but got '" + next() + "'"); 548 if (Dest) 549 Dest->insert(Dest->end(), V.begin(), V.end()); 550 return; 551 } 552 553 Script->SectionCommands.insert(Script->SectionCommands.end(), V.begin(), 554 V.end()); 555 } 556 557 void ScriptParser::readTarget() { 558 // TARGET(foo) is an alias for "--format foo". Unlike GNU linkers, 559 // we accept only a limited set of BFD names (i.e. "elf" or "binary") 560 // for --format. We recognize only /^elf/ and "binary" in the linker 561 // script as well. 562 expect("("); 563 StringRef Tok = next(); 564 expect(")"); 565 566 if (Tok.startswith("elf")) 567 Config->FormatBinary = false; 568 else if (Tok == "binary") 569 Config->FormatBinary = true; 570 else 571 setError("unknown target: " + Tok); 572 } 573 574 static int precedence(StringRef Op) { 575 return StringSwitch<int>(Op) 576 .Cases("*", "/", "%", 8) 577 .Cases("+", "-", 7) 578 .Cases("<<", ">>", 6) 579 .Cases("<", "<=", ">", ">=", "==", "!=", 5) 580 .Case("&", 4) 581 .Case("|", 3) 582 .Case("&&", 2) 583 .Case("||", 1) 584 .Default(-1); 585 } 586 587 StringMatcher ScriptParser::readFilePatterns() { 588 std::vector<StringRef> V; 589 while (!errorCount() && !consume(")")) 590 V.push_back(next()); 591 return StringMatcher(V); 592 } 593 594 SortSectionPolicy ScriptParser::readSortKind() { 595 if (consume("SORT") || consume("SORT_BY_NAME")) 596 return SortSectionPolicy::Name; 597 if (consume("SORT_BY_ALIGNMENT")) 598 return SortSectionPolicy::Alignment; 599 if (consume("SORT_BY_INIT_PRIORITY")) 600 return SortSectionPolicy::Priority; 601 if (consume("SORT_NONE")) 602 return SortSectionPolicy::None; 603 return SortSectionPolicy::Default; 604 } 605 606 // Reads SECTIONS command contents in the following form: 607 // 608 // <contents> ::= <elem>* 609 // <elem> ::= <exclude>? <glob-pattern> 610 // <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")" 611 // 612 // For example, 613 // 614 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz) 615 // 616 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". 617 // The semantics of that is section .foo in any file, section .bar in 618 // any file but a.o, and section .baz in any file but b.o. 619 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 620 std::vector<SectionPattern> Ret; 621 while (!errorCount() && peek() != ")") { 622 StringMatcher ExcludeFilePat; 623 if (consume("EXCLUDE_FILE")) { 624 expect("("); 625 ExcludeFilePat = readFilePatterns(); 626 } 627 628 std::vector<StringRef> V; 629 while (!errorCount() && peek() != ")" && peek() != "EXCLUDE_FILE") 630 V.push_back(next()); 631 632 if (!V.empty()) 633 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 634 else 635 setError("section pattern is expected"); 636 } 637 return Ret; 638 } 639 640 // Reads contents of "SECTIONS" directive. That directive contains a 641 // list of glob patterns for input sections. The grammar is as follows. 642 // 643 // <patterns> ::= <section-list> 644 // | <sort> "(" <section-list> ")" 645 // | <sort> "(" <sort> "(" <section-list> ")" ")" 646 // 647 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 648 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 649 // 650 // <section-list> is parsed by readInputSectionsList(). 651 InputSectionDescription * 652 ScriptParser::readInputSectionRules(StringRef FilePattern) { 653 auto *Cmd = make<InputSectionDescription>(FilePattern); 654 expect("("); 655 656 while (!errorCount() && !consume(")")) { 657 SortSectionPolicy Outer = readSortKind(); 658 SortSectionPolicy Inner = SortSectionPolicy::Default; 659 std::vector<SectionPattern> V; 660 if (Outer != SortSectionPolicy::Default) { 661 expect("("); 662 Inner = readSortKind(); 663 if (Inner != SortSectionPolicy::Default) { 664 expect("("); 665 V = readInputSectionsList(); 666 expect(")"); 667 } else { 668 V = readInputSectionsList(); 669 } 670 expect(")"); 671 } else { 672 V = readInputSectionsList(); 673 } 674 675 for (SectionPattern &Pat : V) { 676 Pat.SortInner = Inner; 677 Pat.SortOuter = Outer; 678 } 679 680 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 681 } 682 return Cmd; 683 } 684 685 InputSectionDescription * 686 ScriptParser::readInputSectionDescription(StringRef Tok) { 687 // Input section wildcard can be surrounded by KEEP. 688 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 689 if (Tok == "KEEP") { 690 expect("("); 691 StringRef FilePattern = next(); 692 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 693 expect(")"); 694 Script->KeptSections.push_back(Cmd); 695 return Cmd; 696 } 697 return readInputSectionRules(Tok); 698 } 699 700 void ScriptParser::readSort() { 701 expect("("); 702 expect("CONSTRUCTORS"); 703 expect(")"); 704 } 705 706 Expr ScriptParser::readAssert() { 707 expect("("); 708 Expr E = readExpr(); 709 expect(","); 710 StringRef Msg = unquote(next()); 711 expect(")"); 712 713 return [=] { 714 if (!E().getValue()) 715 error(Msg); 716 return Script->getDot(); 717 }; 718 } 719 720 // Reads a FILL(expr) command. We handle the FILL command as an 721 // alias for =fillexp section attribute, which is different from 722 // what GNU linkers do. 723 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 724 uint32_t ScriptParser::readFill() { 725 expect("("); 726 uint32_t V = parseFill(next()); 727 expect(")"); 728 return V; 729 } 730 731 // Tries to read the special directive for an output section definition which 732 // can be one of following: "(NOLOAD)", "(COPY)", "(INFO)" or "(OVERLAY)". 733 // Tok1 and Tok2 are next 2 tokens peeked. See comment for readSectionAddressType below. 734 bool ScriptParser::readSectionDirective(OutputSection *Cmd, StringRef Tok1, StringRef Tok2) { 735 if (Tok1 != "(") 736 return false; 737 if (Tok2 != "NOLOAD" && Tok2 != "COPY" && Tok2 != "INFO" && Tok2 != "OVERLAY") 738 return false; 739 740 expect("("); 741 if (consume("NOLOAD")) { 742 Cmd->Noload = true; 743 } else { 744 skip(); // This is "COPY", "INFO" or "OVERLAY". 745 Cmd->NonAlloc = true; 746 } 747 expect(")"); 748 return true; 749 } 750 751 // Reads an expression and/or the special directive for an output 752 // section definition. Directive is one of following: "(NOLOAD)", 753 // "(COPY)", "(INFO)" or "(OVERLAY)". 754 // 755 // An output section name can be followed by an address expression 756 // and/or directive. This grammar is not LL(1) because "(" can be 757 // interpreted as either the beginning of some expression or beginning 758 // of directive. 759 // 760 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html 761 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html 762 void ScriptParser::readSectionAddressType(OutputSection *Cmd) { 763 if (readSectionDirective(Cmd, peek(), peek2())) 764 return; 765 766 Cmd->AddrExpr = readExpr(); 767 if (peek() == "(" && !readSectionDirective(Cmd, "(", peek2())) 768 setError("unknown section directive: " + peek2()); 769 } 770 771 static Expr checkAlignment(Expr E, std::string &Loc) { 772 return [=] { 773 uint64_t Alignment = std::max((uint64_t)1, E().getValue()); 774 if (!isPowerOf2_64(Alignment)) { 775 error(Loc + ": alignment must be power of 2"); 776 return (uint64_t)1; // Return a dummy value. 777 } 778 return Alignment; 779 }; 780 } 781 782 OutputSection *ScriptParser::readOverlaySectionDescription() { 783 OutputSection *Cmd = 784 Script->createOutputSection(next(), getCurrentLocation()); 785 Cmd->InOverlay = true; 786 expect("{"); 787 while (!errorCount() && !consume("}")) 788 Cmd->SectionCommands.push_back(readInputSectionRules(next())); 789 Cmd->Phdrs = readOutputSectionPhdrs(); 790 return Cmd; 791 } 792 793 OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) { 794 OutputSection *Cmd = 795 Script->createOutputSection(OutSec, getCurrentLocation()); 796 797 size_t SymbolsReferenced = Script->ReferencedSymbols.size(); 798 799 if (peek() != ":") 800 readSectionAddressType(Cmd); 801 expect(":"); 802 803 std::string Location = getCurrentLocation(); 804 if (consume("AT")) 805 Cmd->LMAExpr = readParenExpr(); 806 if (consume("ALIGN")) 807 Cmd->AlignExpr = checkAlignment(readParenExpr(), Location); 808 if (consume("SUBALIGN")) 809 Cmd->SubalignExpr = checkAlignment(readParenExpr(), Location); 810 811 // Parse constraints. 812 if (consume("ONLY_IF_RO")) 813 Cmd->Constraint = ConstraintKind::ReadOnly; 814 if (consume("ONLY_IF_RW")) 815 Cmd->Constraint = ConstraintKind::ReadWrite; 816 expect("{"); 817 818 while (!errorCount() && !consume("}")) { 819 StringRef Tok = next(); 820 if (Tok == ";") { 821 // Empty commands are allowed. Do nothing here. 822 } else if (SymbolAssignment *Assign = readAssignment(Tok)) { 823 Cmd->SectionCommands.push_back(Assign); 824 } else if (ByteCommand *Data = readByteCommand(Tok)) { 825 Cmd->SectionCommands.push_back(Data); 826 } else if (Tok == "CONSTRUCTORS") { 827 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 828 // by name. This is for very old file formats such as ECOFF/XCOFF. 829 // For ELF, we should ignore. 830 } else if (Tok == "FILL") { 831 Cmd->Filler = readFill(); 832 } else if (Tok == "SORT") { 833 readSort(); 834 } else if (Tok == "INCLUDE") { 835 readInclude(); 836 } else if (peek() == "(") { 837 Cmd->SectionCommands.push_back(readInputSectionDescription(Tok)); 838 } else { 839 setError("unknown command " + Tok); 840 } 841 } 842 843 if (consume(">")) 844 Cmd->MemoryRegionName = next(); 845 846 if (consume("AT")) { 847 expect(">"); 848 Cmd->LMARegionName = next(); 849 } 850 851 if (Cmd->LMAExpr && !Cmd->LMARegionName.empty()) 852 error("section can't have both LMA and a load region"); 853 854 Cmd->Phdrs = readOutputSectionPhdrs(); 855 856 if (consume("=")) 857 Cmd->Filler = parseFill(next()); 858 else if (peek().startswith("=")) 859 Cmd->Filler = parseFill(next().drop_front()); 860 861 // Consume optional comma following output section command. 862 consume(","); 863 864 if (Script->ReferencedSymbols.size() > SymbolsReferenced) 865 Cmd->ExpressionsUseSymbols = true; 866 return Cmd; 867 } 868 869 // Parses a given string as a octal/decimal/hexadecimal number and 870 // returns it as a big-endian number. Used for `=<fillexp>`. 871 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 872 // 873 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary 874 // size, while ld.gold always handles it as a 32-bit big-endian number. 875 // We are compatible with ld.gold because it's easier to implement. 876 uint32_t ScriptParser::parseFill(StringRef Tok) { 877 uint32_t V = 0; 878 if (!to_integer(Tok, V)) 879 setError("invalid filler expression: " + Tok); 880 881 uint32_t Buf; 882 write32be(&Buf, V); 883 return Buf; 884 } 885 886 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 887 expect("("); 888 SymbolAssignment *Cmd = readSymbolAssignment(next()); 889 Cmd->Provide = Provide; 890 Cmd->Hidden = Hidden; 891 expect(")"); 892 return Cmd; 893 } 894 895 SymbolAssignment *ScriptParser::readAssignment(StringRef Tok) { 896 // Assert expression returns Dot, so this is equal to ".=." 897 if (Tok == "ASSERT") 898 return make<SymbolAssignment>(".", readAssert(), getCurrentLocation()); 899 900 size_t OldPos = Pos; 901 SymbolAssignment *Cmd = nullptr; 902 if (peek() == "=" || peek() == "+=") 903 Cmd = readSymbolAssignment(Tok); 904 else if (Tok == "PROVIDE") 905 Cmd = readProvideHidden(true, false); 906 else if (Tok == "HIDDEN") 907 Cmd = readProvideHidden(false, true); 908 else if (Tok == "PROVIDE_HIDDEN") 909 Cmd = readProvideHidden(true, true); 910 911 if (Cmd) { 912 Cmd->CommandString = 913 Tok.str() + " " + 914 llvm::join(Tokens.begin() + OldPos, Tokens.begin() + Pos, " "); 915 expect(";"); 916 } 917 return Cmd; 918 } 919 920 SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef Name) { 921 StringRef Op = next(); 922 assert(Op == "=" || Op == "+="); 923 Expr E = readExpr(); 924 if (Op == "+=") { 925 std::string Loc = getCurrentLocation(); 926 E = [=] { return add(Script->getSymbolValue(Name, Loc), E()); }; 927 } 928 return make<SymbolAssignment>(Name, E, getCurrentLocation()); 929 } 930 931 // This is an operator-precedence parser to parse a linker 932 // script expression. 933 Expr ScriptParser::readExpr() { 934 // Our lexer is context-aware. Set the in-expression bit so that 935 // they apply different tokenization rules. 936 bool Orig = InExpr; 937 InExpr = true; 938 Expr E = readExpr1(readPrimary(), 0); 939 InExpr = Orig; 940 return E; 941 } 942 943 Expr ScriptParser::combine(StringRef Op, Expr L, Expr R) { 944 if (Op == "+") 945 return [=] { return add(L(), R()); }; 946 if (Op == "-") 947 return [=] { return sub(L(), R()); }; 948 if (Op == "*") 949 return [=] { return L().getValue() * R().getValue(); }; 950 if (Op == "/") { 951 std::string Loc = getCurrentLocation(); 952 return [=]() -> uint64_t { 953 if (uint64_t RV = R().getValue()) 954 return L().getValue() / RV; 955 error(Loc + ": division by zero"); 956 return 0; 957 }; 958 } 959 if (Op == "%") { 960 std::string Loc = getCurrentLocation(); 961 return [=]() -> uint64_t { 962 if (uint64_t RV = R().getValue()) 963 return L().getValue() % RV; 964 error(Loc + ": modulo by zero"); 965 return 0; 966 }; 967 } 968 if (Op == "<<") 969 return [=] { return L().getValue() << R().getValue(); }; 970 if (Op == ">>") 971 return [=] { return L().getValue() >> R().getValue(); }; 972 if (Op == "<") 973 return [=] { return L().getValue() < R().getValue(); }; 974 if (Op == ">") 975 return [=] { return L().getValue() > R().getValue(); }; 976 if (Op == ">=") 977 return [=] { return L().getValue() >= R().getValue(); }; 978 if (Op == "<=") 979 return [=] { return L().getValue() <= R().getValue(); }; 980 if (Op == "==") 981 return [=] { return L().getValue() == R().getValue(); }; 982 if (Op == "!=") 983 return [=] { return L().getValue() != R().getValue(); }; 984 if (Op == "||") 985 return [=] { return L().getValue() || R().getValue(); }; 986 if (Op == "&&") 987 return [=] { return L().getValue() && R().getValue(); }; 988 if (Op == "&") 989 return [=] { return bitAnd(L(), R()); }; 990 if (Op == "|") 991 return [=] { return bitOr(L(), R()); }; 992 llvm_unreachable("invalid operator"); 993 } 994 995 // This is a part of the operator-precedence parser. This function 996 // assumes that the remaining token stream starts with an operator. 997 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 998 while (!atEOF() && !errorCount()) { 999 // Read an operator and an expression. 1000 if (consume("?")) 1001 return readTernary(Lhs); 1002 StringRef Op1 = peek(); 1003 if (precedence(Op1) < MinPrec) 1004 break; 1005 skip(); 1006 Expr Rhs = readPrimary(); 1007 1008 // Evaluate the remaining part of the expression first if the 1009 // next operator has greater precedence than the previous one. 1010 // For example, if we have read "+" and "3", and if the next 1011 // operator is "*", then we'll evaluate 3 * ... part first. 1012 while (!atEOF()) { 1013 StringRef Op2 = peek(); 1014 if (precedence(Op2) <= precedence(Op1)) 1015 break; 1016 Rhs = readExpr1(Rhs, precedence(Op2)); 1017 } 1018 1019 Lhs = combine(Op1, Lhs, Rhs); 1020 } 1021 return Lhs; 1022 } 1023 1024 Expr ScriptParser::getPageSize() { 1025 std::string Location = getCurrentLocation(); 1026 return [=]() -> uint64_t { 1027 if (Target) 1028 return Target->PageSize; 1029 error(Location + ": unable to calculate page size"); 1030 return 4096; // Return a dummy value. 1031 }; 1032 } 1033 1034 Expr ScriptParser::readConstant() { 1035 StringRef S = readParenLiteral(); 1036 if (S == "COMMONPAGESIZE") 1037 return getPageSize(); 1038 if (S == "MAXPAGESIZE") 1039 return [] { return Config->MaxPageSize; }; 1040 setError("unknown constant: " + S); 1041 return [] { return 0; }; 1042 } 1043 1044 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with 1045 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may 1046 // have "K" (Ki) or "M" (Mi) suffixes. 1047 static Optional<uint64_t> parseInt(StringRef Tok) { 1048 // Hexadecimal 1049 uint64_t Val; 1050 if (Tok.startswith_lower("0x")) { 1051 if (!to_integer(Tok.substr(2), Val, 16)) 1052 return None; 1053 return Val; 1054 } 1055 if (Tok.endswith_lower("H")) { 1056 if (!to_integer(Tok.drop_back(), Val, 16)) 1057 return None; 1058 return Val; 1059 } 1060 1061 // Decimal 1062 if (Tok.endswith_lower("K")) { 1063 if (!to_integer(Tok.drop_back(), Val, 10)) 1064 return None; 1065 return Val * 1024; 1066 } 1067 if (Tok.endswith_lower("M")) { 1068 if (!to_integer(Tok.drop_back(), Val, 10)) 1069 return None; 1070 return Val * 1024 * 1024; 1071 } 1072 if (!to_integer(Tok, Val, 10)) 1073 return None; 1074 return Val; 1075 } 1076 1077 ByteCommand *ScriptParser::readByteCommand(StringRef Tok) { 1078 int Size = StringSwitch<int>(Tok) 1079 .Case("BYTE", 1) 1080 .Case("SHORT", 2) 1081 .Case("LONG", 4) 1082 .Case("QUAD", 8) 1083 .Default(-1); 1084 if (Size == -1) 1085 return nullptr; 1086 1087 size_t OldPos = Pos; 1088 Expr E = readParenExpr(); 1089 std::string CommandString = 1090 Tok.str() + " " + 1091 llvm::join(Tokens.begin() + OldPos, Tokens.begin() + Pos, " "); 1092 return make<ByteCommand>(E, Size, CommandString); 1093 } 1094 1095 StringRef ScriptParser::readParenLiteral() { 1096 expect("("); 1097 bool Orig = InExpr; 1098 InExpr = false; 1099 StringRef Tok = next(); 1100 InExpr = Orig; 1101 expect(")"); 1102 return Tok; 1103 } 1104 1105 static void checkIfExists(OutputSection *Cmd, StringRef Location) { 1106 if (Cmd->Location.empty() && Script->ErrorOnMissingSection) 1107 error(Location + ": undefined section " + Cmd->Name); 1108 } 1109 1110 Expr ScriptParser::readPrimary() { 1111 if (peek() == "(") 1112 return readParenExpr(); 1113 1114 if (consume("~")) { 1115 Expr E = readPrimary(); 1116 return [=] { return ~E().getValue(); }; 1117 } 1118 if (consume("!")) { 1119 Expr E = readPrimary(); 1120 return [=] { return !E().getValue(); }; 1121 } 1122 if (consume("-")) { 1123 Expr E = readPrimary(); 1124 return [=] { return -E().getValue(); }; 1125 } 1126 1127 StringRef Tok = next(); 1128 std::string Location = getCurrentLocation(); 1129 1130 // Built-in functions are parsed here. 1131 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 1132 if (Tok == "ABSOLUTE") { 1133 Expr Inner = readParenExpr(); 1134 return [=] { 1135 ExprValue I = Inner(); 1136 I.ForceAbsolute = true; 1137 return I; 1138 }; 1139 } 1140 if (Tok == "ADDR") { 1141 StringRef Name = readParenLiteral(); 1142 OutputSection *Sec = Script->getOrCreateOutputSection(Name); 1143 return [=]() -> ExprValue { 1144 checkIfExists(Sec, Location); 1145 return {Sec, false, 0, Location}; 1146 }; 1147 } 1148 if (Tok == "ALIGN") { 1149 expect("("); 1150 Expr E = readExpr(); 1151 if (consume(")")) { 1152 E = checkAlignment(E, Location); 1153 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 1154 } 1155 expect(","); 1156 Expr E2 = checkAlignment(readExpr(), Location); 1157 expect(")"); 1158 return [=] { 1159 ExprValue V = E(); 1160 V.Alignment = E2().getValue(); 1161 return V; 1162 }; 1163 } 1164 if (Tok == "ALIGNOF") { 1165 StringRef Name = readParenLiteral(); 1166 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 1167 return [=] { 1168 checkIfExists(Cmd, Location); 1169 return Cmd->Alignment; 1170 }; 1171 } 1172 if (Tok == "ASSERT") 1173 return readAssert(); 1174 if (Tok == "CONSTANT") 1175 return readConstant(); 1176 if (Tok == "DATA_SEGMENT_ALIGN") { 1177 expect("("); 1178 Expr E = readExpr(); 1179 expect(","); 1180 readExpr(); 1181 expect(")"); 1182 return [=] { 1183 return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue())); 1184 }; 1185 } 1186 if (Tok == "DATA_SEGMENT_END") { 1187 expect("("); 1188 expect("."); 1189 expect(")"); 1190 return [] { return Script->getDot(); }; 1191 } 1192 if (Tok == "DATA_SEGMENT_RELRO_END") { 1193 // GNU linkers implements more complicated logic to handle 1194 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and 1195 // just align to the next page boundary for simplicity. 1196 expect("("); 1197 readExpr(); 1198 expect(","); 1199 readExpr(); 1200 expect(")"); 1201 Expr E = getPageSize(); 1202 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 1203 } 1204 if (Tok == "DEFINED") { 1205 StringRef Name = readParenLiteral(); 1206 return [=] { return Symtab->find(Name) ? 1 : 0; }; 1207 } 1208 if (Tok == "LENGTH") { 1209 StringRef Name = readParenLiteral(); 1210 if (Script->MemoryRegions.count(Name) == 0) { 1211 setError("memory region not defined: " + Name); 1212 return [] { return 0; }; 1213 } 1214 return [=] { return Script->MemoryRegions[Name]->Length; }; 1215 } 1216 if (Tok == "LOADADDR") { 1217 StringRef Name = readParenLiteral(); 1218 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 1219 return [=] { 1220 checkIfExists(Cmd, Location); 1221 return Cmd->getLMA(); 1222 }; 1223 } 1224 if (Tok == "MAX" || Tok == "MIN") { 1225 expect("("); 1226 Expr A = readExpr(); 1227 expect(","); 1228 Expr B = readExpr(); 1229 expect(")"); 1230 if (Tok == "MIN") 1231 return [=] { return std::min(A().getValue(), B().getValue()); }; 1232 return [=] { return std::max(A().getValue(), B().getValue()); }; 1233 } 1234 if (Tok == "ORIGIN") { 1235 StringRef Name = readParenLiteral(); 1236 if (Script->MemoryRegions.count(Name) == 0) { 1237 setError("memory region not defined: " + Name); 1238 return [] { return 0; }; 1239 } 1240 return [=] { return Script->MemoryRegions[Name]->Origin; }; 1241 } 1242 if (Tok == "SEGMENT_START") { 1243 expect("("); 1244 skip(); 1245 expect(","); 1246 Expr E = readExpr(); 1247 expect(")"); 1248 return [=] { return E(); }; 1249 } 1250 if (Tok == "SIZEOF") { 1251 StringRef Name = readParenLiteral(); 1252 OutputSection *Cmd = Script->getOrCreateOutputSection(Name); 1253 // Linker script does not create an output section if its content is empty. 1254 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 1255 // be empty. 1256 return [=] { return Cmd->Size; }; 1257 } 1258 if (Tok == "SIZEOF_HEADERS") 1259 return [=] { return elf::getHeaderSize(); }; 1260 1261 // Tok is the dot. 1262 if (Tok == ".") 1263 return [=] { return Script->getSymbolValue(Tok, Location); }; 1264 1265 // Tok is a literal number. 1266 if (Optional<uint64_t> Val = parseInt(Tok)) 1267 return [=] { return *Val; }; 1268 1269 // Tok is a symbol name. 1270 if (!isValidCIdentifier(Tok)) 1271 setError("malformed number: " + Tok); 1272 Script->ReferencedSymbols.push_back(Tok); 1273 return [=] { return Script->getSymbolValue(Tok, Location); }; 1274 } 1275 1276 Expr ScriptParser::readTernary(Expr Cond) { 1277 Expr L = readExpr(); 1278 expect(":"); 1279 Expr R = readExpr(); 1280 return [=] { return Cond().getValue() ? L() : R(); }; 1281 } 1282 1283 Expr ScriptParser::readParenExpr() { 1284 expect("("); 1285 Expr E = readExpr(); 1286 expect(")"); 1287 return E; 1288 } 1289 1290 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1291 std::vector<StringRef> Phdrs; 1292 while (!errorCount() && peek().startswith(":")) { 1293 StringRef Tok = next(); 1294 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 1295 } 1296 return Phdrs; 1297 } 1298 1299 // Read a program header type name. The next token must be a 1300 // name of a program header type or a constant (e.g. "0x3"). 1301 unsigned ScriptParser::readPhdrType() { 1302 StringRef Tok = next(); 1303 if (Optional<uint64_t> Val = parseInt(Tok)) 1304 return *Val; 1305 1306 unsigned Ret = StringSwitch<unsigned>(Tok) 1307 .Case("PT_NULL", PT_NULL) 1308 .Case("PT_LOAD", PT_LOAD) 1309 .Case("PT_DYNAMIC", PT_DYNAMIC) 1310 .Case("PT_INTERP", PT_INTERP) 1311 .Case("PT_NOTE", PT_NOTE) 1312 .Case("PT_SHLIB", PT_SHLIB) 1313 .Case("PT_PHDR", PT_PHDR) 1314 .Case("PT_TLS", PT_TLS) 1315 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1316 .Case("PT_GNU_STACK", PT_GNU_STACK) 1317 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1318 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1319 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1320 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1321 .Default(-1); 1322 1323 if (Ret == (unsigned)-1) { 1324 setError("invalid program header type: " + Tok); 1325 return PT_NULL; 1326 } 1327 return Ret; 1328 } 1329 1330 // Reads an anonymous version declaration. 1331 void ScriptParser::readAnonymousDeclaration() { 1332 std::vector<SymbolVersion> Locals; 1333 std::vector<SymbolVersion> Globals; 1334 std::tie(Locals, Globals) = readSymbols(); 1335 1336 for (SymbolVersion V : Locals) { 1337 if (V.Name == "*") 1338 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1339 else 1340 Config->VersionScriptLocals.push_back(V); 1341 } 1342 1343 for (SymbolVersion V : Globals) 1344 Config->VersionScriptGlobals.push_back(V); 1345 1346 expect(";"); 1347 } 1348 1349 // Reads a non-anonymous version definition, 1350 // e.g. "VerStr { global: foo; bar; local: *; };". 1351 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1352 // Read a symbol list. 1353 std::vector<SymbolVersion> Locals; 1354 std::vector<SymbolVersion> Globals; 1355 std::tie(Locals, Globals) = readSymbols(); 1356 1357 for (SymbolVersion V : Locals) { 1358 if (V.Name == "*") 1359 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1360 else 1361 Config->VersionScriptLocals.push_back(V); 1362 } 1363 1364 // Create a new version definition and add that to the global symbols. 1365 VersionDefinition Ver; 1366 Ver.Name = VerStr; 1367 Ver.Globals = Globals; 1368 1369 // User-defined version number starts from 2 because 0 and 1 are 1370 // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. 1371 Ver.Id = Config->VersionDefinitions.size() + 2; 1372 Config->VersionDefinitions.push_back(Ver); 1373 1374 // Each version may have a parent version. For example, "Ver2" 1375 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1376 // as a parent. This version hierarchy is, probably against your 1377 // instinct, purely for hint; the runtime doesn't care about it 1378 // at all. In LLD, we simply ignore it. 1379 if (peek() != ";") 1380 skip(); 1381 expect(";"); 1382 } 1383 1384 static bool hasWildcard(StringRef S) { 1385 return S.find_first_of("?*[") != StringRef::npos; 1386 } 1387 1388 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1389 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1390 ScriptParser::readSymbols() { 1391 std::vector<SymbolVersion> Locals; 1392 std::vector<SymbolVersion> Globals; 1393 std::vector<SymbolVersion> *V = &Globals; 1394 1395 while (!errorCount()) { 1396 if (consume("}")) 1397 break; 1398 if (consumeLabel("local")) { 1399 V = &Locals; 1400 continue; 1401 } 1402 if (consumeLabel("global")) { 1403 V = &Globals; 1404 continue; 1405 } 1406 1407 if (consume("extern")) { 1408 std::vector<SymbolVersion> Ext = readVersionExtern(); 1409 V->insert(V->end(), Ext.begin(), Ext.end()); 1410 } else { 1411 StringRef Tok = next(); 1412 V->push_back({unquote(Tok), false, hasWildcard(Tok)}); 1413 } 1414 expect(";"); 1415 } 1416 return {Locals, Globals}; 1417 } 1418 1419 // Reads an "extern C++" directive, e.g., 1420 // "extern "C++" { ns::*; "f(int, double)"; };" 1421 // 1422 // The last semicolon is optional. E.g. this is OK: 1423 // "extern "C++" { ns::*; "f(int, double)" };" 1424 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 1425 StringRef Tok = next(); 1426 bool IsCXX = Tok == "\"C++\""; 1427 if (!IsCXX && Tok != "\"C\"") 1428 setError("Unknown language"); 1429 expect("{"); 1430 1431 std::vector<SymbolVersion> Ret; 1432 while (!errorCount() && peek() != "}") { 1433 StringRef Tok = next(); 1434 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 1435 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 1436 if (consume("}")) 1437 return Ret; 1438 expect(";"); 1439 } 1440 1441 expect("}"); 1442 return Ret; 1443 } 1444 1445 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2, 1446 StringRef S3) { 1447 if (!consume(S1) && !consume(S2) && !consume(S3)) { 1448 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 1449 return 0; 1450 } 1451 expect("="); 1452 return readExpr()().getValue(); 1453 } 1454 1455 // Parse the MEMORY command as specified in: 1456 // https://sourceware.org/binutils/docs/ld/MEMORY.html 1457 // 1458 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 1459 void ScriptParser::readMemory() { 1460 expect("{"); 1461 while (!errorCount() && !consume("}")) { 1462 StringRef Tok = next(); 1463 if (Tok == "INCLUDE") { 1464 readInclude(); 1465 continue; 1466 } 1467 1468 uint32_t Flags = 0; 1469 uint32_t NegFlags = 0; 1470 if (consume("(")) { 1471 std::tie(Flags, NegFlags) = readMemoryAttributes(); 1472 expect(")"); 1473 } 1474 expect(":"); 1475 1476 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 1477 expect(","); 1478 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 1479 1480 // Add the memory region to the region map. 1481 MemoryRegion *MR = make<MemoryRegion>(Tok, Origin, Length, Flags, NegFlags); 1482 if (!Script->MemoryRegions.insert({Tok, MR}).second) 1483 setError("region '" + Tok + "' already defined"); 1484 } 1485 } 1486 1487 // This function parses the attributes used to match against section 1488 // flags when placing output sections in a memory region. These flags 1489 // are only used when an explicit memory region name is not used. 1490 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 1491 uint32_t Flags = 0; 1492 uint32_t NegFlags = 0; 1493 bool Invert = false; 1494 1495 for (char C : next().lower()) { 1496 uint32_t Flag = 0; 1497 if (C == '!') 1498 Invert = !Invert; 1499 else if (C == 'w') 1500 Flag = SHF_WRITE; 1501 else if (C == 'x') 1502 Flag = SHF_EXECINSTR; 1503 else if (C == 'a') 1504 Flag = SHF_ALLOC; 1505 else if (C != 'r') 1506 setError("invalid memory region attribute"); 1507 1508 if (Invert) 1509 NegFlags |= Flag; 1510 else 1511 Flags |= Flag; 1512 } 1513 return {Flags, NegFlags}; 1514 } 1515 1516 void elf::readLinkerScript(MemoryBufferRef MB) { 1517 ScriptParser(MB).readLinkerScript(); 1518 } 1519 1520 void elf::readVersionScript(MemoryBufferRef MB) { 1521 ScriptParser(MB).readVersionScript(); 1522 } 1523 1524 void elf::readDynamicList(MemoryBufferRef MB) { 1525 ScriptParser(MB).readDynamicList(); 1526 } 1527 1528 void elf::readDefsym(StringRef Name, MemoryBufferRef MB) { 1529 ScriptParser(MB).readDefsym(Name); 1530 } 1531