1 //===- LinkerScript.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the parser/evaluator of the linker script. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LinkerScript.h" 15 #include "Config.h" 16 #include "Driver.h" 17 #include "InputSection.h" 18 #include "Memory.h" 19 #include "OutputSections.h" 20 #include "ScriptLexer.h" 21 #include "Strings.h" 22 #include "SymbolTable.h" 23 #include "Symbols.h" 24 #include "SyntheticSections.h" 25 #include "Target.h" 26 #include "Writer.h" 27 #include "llvm/ADT/STLExtras.h" 28 #include "llvm/ADT/SmallString.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/ADT/StringSwitch.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/ELF.h" 33 #include "llvm/Support/Endian.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/FileSystem.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/Path.h" 38 #include <algorithm> 39 #include <cassert> 40 #include <cstddef> 41 #include <cstdint> 42 #include <iterator> 43 #include <limits> 44 #include <memory> 45 #include <string> 46 #include <tuple> 47 #include <vector> 48 49 using namespace llvm; 50 using namespace llvm::ELF; 51 using namespace llvm::object; 52 using namespace llvm::support::endian; 53 using namespace lld; 54 using namespace lld::elf; 55 56 LinkerScript *elf::Script; 57 58 uint64_t ExprValue::getValue() const { 59 if (Sec) 60 return Sec->getOffset(Val) + Sec->getOutputSection()->Addr; 61 return Val; 62 } 63 64 uint64_t ExprValue::getSecAddr() const { 65 if (Sec) 66 return Sec->getOffset(0) + Sec->getOutputSection()->Addr; 67 return 0; 68 } 69 70 // Some operations only support one non absolute value. Move the 71 // absolute one to the right hand side for convenience. 72 static void moveAbsRight(ExprValue &A, ExprValue &B) { 73 if (A.isAbsolute()) 74 std::swap(A, B); 75 if (!B.isAbsolute()) 76 error("At least one side of the expression must be absolute"); 77 } 78 79 static ExprValue add(ExprValue A, ExprValue B) { 80 moveAbsRight(A, B); 81 return {A.Sec, A.ForceAbsolute, A.Val + B.getValue()}; 82 } 83 static ExprValue sub(ExprValue A, ExprValue B) { 84 return {A.Sec, A.Val - B.getValue()}; 85 } 86 static ExprValue mul(ExprValue A, ExprValue B) { 87 return A.getValue() * B.getValue(); 88 } 89 static ExprValue div(ExprValue A, ExprValue B) { 90 if (uint64_t BV = B.getValue()) 91 return A.getValue() / BV; 92 error("division by zero"); 93 return 0; 94 } 95 static ExprValue leftShift(ExprValue A, ExprValue B) { 96 return A.getValue() << B.getValue(); 97 } 98 static ExprValue rightShift(ExprValue A, ExprValue B) { 99 return A.getValue() >> B.getValue(); 100 } 101 static ExprValue bitAnd(ExprValue A, ExprValue B) { 102 moveAbsRight(A, B); 103 return {A.Sec, A.ForceAbsolute, 104 (A.getValue() & B.getValue()) - A.getSecAddr()}; 105 } 106 static ExprValue bitOr(ExprValue A, ExprValue B) { 107 moveAbsRight(A, B); 108 return {A.Sec, A.ForceAbsolute, 109 (A.getValue() | B.getValue()) - A.getSecAddr()}; 110 } 111 static ExprValue bitNot(ExprValue A) { return ~A.getValue(); } 112 static ExprValue minus(ExprValue A) { return -A.getValue(); } 113 114 template <class ELFT> static SymbolBody *addRegular(SymbolAssignment *Cmd) { 115 Symbol *Sym; 116 uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 117 std::tie(Sym, std::ignore) = Symtab<ELFT>::X->insert( 118 Cmd->Name, /*Type*/ 0, Visibility, /*CanOmitFromDynSym*/ false, 119 /*File*/ nullptr); 120 Sym->Binding = STB_GLOBAL; 121 ExprValue Value = Cmd->Expression(); 122 SectionBase *Sec = Value.isAbsolute() ? nullptr : Value.Sec; 123 replaceBody<DefinedRegular>(Sym, Cmd->Name, /*IsLocal=*/false, Visibility, 124 STT_NOTYPE, 0, 0, Sec, nullptr); 125 return Sym->body(); 126 } 127 128 static bool isUnderSysroot(StringRef Path) { 129 if (Config->Sysroot == "") 130 return false; 131 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 132 if (sys::fs::equivalent(Config->Sysroot, Path)) 133 return true; 134 return false; 135 } 136 137 OutputSection *LinkerScript::getOutputSection(const Twine &Loc, 138 StringRef Name) { 139 static OutputSection FakeSec("", 0, 0); 140 141 for (OutputSection *Sec : *OutputSections) 142 if (Sec->Name == Name) 143 return Sec; 144 145 if (ErrorOnMissingSection) 146 error(Loc + ": undefined section " + Name); 147 return &FakeSec; 148 } 149 150 // This function is essentially the same as getOutputSection(Name)->Size, 151 // but it won't print out an error message if a given section is not found. 152 // 153 // Linker script does not create an output section if its content is empty. 154 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 155 // be empty. That is why this function is different from getOutputSection(). 156 uint64_t LinkerScript::getOutputSectionSize(StringRef Name) { 157 for (OutputSection *Sec : *OutputSections) 158 if (Sec->Name == Name) 159 return Sec->Size; 160 return 0; 161 } 162 163 void LinkerScript::setDot(Expr E, const Twine &Loc, bool InSec) { 164 uint64_t Val = E().getValue(); 165 if (Val < Dot) { 166 if (InSec) 167 error(Loc + ": unable to move location counter backward for: " + 168 CurOutSec->Name); 169 else 170 error(Loc + ": unable to move location counter backward"); 171 } 172 Dot = Val; 173 // Update to location counter means update to section size. 174 if (InSec) 175 CurOutSec->Size = Dot - CurOutSec->Addr; 176 } 177 178 // Sets value of a symbol. Two kinds of symbols are processed: synthetic 179 // symbols, whose value is an offset from beginning of section and regular 180 // symbols whose value is absolute. 181 void LinkerScript::assignSymbol(SymbolAssignment *Cmd, bool InSec) { 182 if (Cmd->Name == ".") { 183 setDot(Cmd->Expression, Cmd->Location, InSec); 184 return; 185 } 186 187 if (!Cmd->Sym) 188 return; 189 190 auto *Sym = cast<DefinedRegular>(Cmd->Sym); 191 ExprValue V = Cmd->Expression(); 192 if (V.isAbsolute()) { 193 Sym->Value = V.getValue(); 194 } else { 195 Sym->Section = V.Sec; 196 if (Sym->Section->Flags & SHF_ALLOC) 197 Sym->Value = V.Val; 198 else 199 Sym->Value = V.getValue(); 200 } 201 } 202 203 static SymbolBody *findSymbol(StringRef S) { 204 switch (Config->EKind) { 205 case ELF32LEKind: 206 return Symtab<ELF32LE>::X->find(S); 207 case ELF32BEKind: 208 return Symtab<ELF32BE>::X->find(S); 209 case ELF64LEKind: 210 return Symtab<ELF64LE>::X->find(S); 211 case ELF64BEKind: 212 return Symtab<ELF64BE>::X->find(S); 213 default: 214 llvm_unreachable("unknown Config->EKind"); 215 } 216 } 217 218 static SymbolBody *addRegularSymbol(SymbolAssignment *Cmd) { 219 switch (Config->EKind) { 220 case ELF32LEKind: 221 return addRegular<ELF32LE>(Cmd); 222 case ELF32BEKind: 223 return addRegular<ELF32BE>(Cmd); 224 case ELF64LEKind: 225 return addRegular<ELF64LE>(Cmd); 226 case ELF64BEKind: 227 return addRegular<ELF64BE>(Cmd); 228 default: 229 llvm_unreachable("unknown Config->EKind"); 230 } 231 } 232 233 void LinkerScript::addSymbol(SymbolAssignment *Cmd) { 234 if (Cmd->Name == ".") 235 return; 236 237 // If a symbol was in PROVIDE(), we need to define it only when 238 // it is a referenced undefined symbol. 239 SymbolBody *B = findSymbol(Cmd->Name); 240 if (Cmd->Provide && (!B || B->isDefined())) 241 return; 242 243 Cmd->Sym = addRegularSymbol(Cmd); 244 } 245 246 bool SymbolAssignment::classof(const BaseCommand *C) { 247 return C->Kind == AssignmentKind; 248 } 249 250 bool OutputSectionCommand::classof(const BaseCommand *C) { 251 return C->Kind == OutputSectionKind; 252 } 253 254 bool InputSectionDescription::classof(const BaseCommand *C) { 255 return C->Kind == InputSectionKind; 256 } 257 258 bool AssertCommand::classof(const BaseCommand *C) { 259 return C->Kind == AssertKind; 260 } 261 262 bool BytesDataCommand::classof(const BaseCommand *C) { 263 return C->Kind == BytesDataKind; 264 } 265 266 static StringRef basename(InputSectionBase *S) { 267 if (S->File) 268 return sys::path::filename(S->File->getName()); 269 return ""; 270 } 271 272 bool LinkerScript::shouldKeep(InputSectionBase *S) { 273 for (InputSectionDescription *ID : Opt.KeptSections) 274 if (ID->FilePat.match(basename(S))) 275 for (SectionPattern &P : ID->SectionPatterns) 276 if (P.SectionPat.match(S->Name)) 277 return true; 278 return false; 279 } 280 281 static bool comparePriority(InputSectionBase *A, InputSectionBase *B) { 282 return getPriority(A->Name) < getPriority(B->Name); 283 } 284 285 static bool compareName(InputSectionBase *A, InputSectionBase *B) { 286 return A->Name < B->Name; 287 } 288 289 static bool compareAlignment(InputSectionBase *A, InputSectionBase *B) { 290 // ">" is not a mistake. Larger alignments are placed before smaller 291 // alignments in order to reduce the amount of padding necessary. 292 // This is compatible with GNU. 293 return A->Alignment > B->Alignment; 294 } 295 296 static std::function<bool(InputSectionBase *, InputSectionBase *)> 297 getComparator(SortSectionPolicy K) { 298 switch (K) { 299 case SortSectionPolicy::Alignment: 300 return compareAlignment; 301 case SortSectionPolicy::Name: 302 return compareName; 303 case SortSectionPolicy::Priority: 304 return comparePriority; 305 default: 306 llvm_unreachable("unknown sort policy"); 307 } 308 } 309 310 static bool matchConstraints(ArrayRef<InputSectionBase *> Sections, 311 ConstraintKind Kind) { 312 if (Kind == ConstraintKind::NoConstraint) 313 return true; 314 bool IsRW = llvm::any_of(Sections, [=](InputSectionBase *Sec2) { 315 auto *Sec = static_cast<InputSectionBase *>(Sec2); 316 return Sec->Flags & SHF_WRITE; 317 }); 318 return (IsRW && Kind == ConstraintKind::ReadWrite) || 319 (!IsRW && Kind == ConstraintKind::ReadOnly); 320 } 321 322 static void sortSections(InputSectionBase **Begin, InputSectionBase **End, 323 SortSectionPolicy K) { 324 if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) 325 std::stable_sort(Begin, End, getComparator(K)); 326 } 327 328 // Compute and remember which sections the InputSectionDescription matches. 329 void LinkerScript::computeInputSections(InputSectionDescription *I) { 330 // Collects all sections that satisfy constraints of I 331 // and attach them to I. 332 for (SectionPattern &Pat : I->SectionPatterns) { 333 size_t SizeBefore = I->Sections.size(); 334 335 for (InputSectionBase *S : InputSections) { 336 if (S->Assigned) 337 continue; 338 // For -emit-relocs we have to ignore entries like 339 // .rela.dyn : { *(.rela.data) } 340 // which are common because they are in the default bfd script. 341 if (S->Type == SHT_REL || S->Type == SHT_RELA) 342 continue; 343 344 StringRef Filename = basename(S); 345 if (!I->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename)) 346 continue; 347 if (!Pat.SectionPat.match(S->Name)) 348 continue; 349 I->Sections.push_back(S); 350 S->Assigned = true; 351 } 352 353 // Sort sections as instructed by SORT-family commands and --sort-section 354 // option. Because SORT-family commands can be nested at most two depth 355 // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command 356 // line option is respected even if a SORT command is given, the exact 357 // behavior we have here is a bit complicated. Here are the rules. 358 // 359 // 1. If two SORT commands are given, --sort-section is ignored. 360 // 2. If one SORT command is given, and if it is not SORT_NONE, 361 // --sort-section is handled as an inner SORT command. 362 // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. 363 // 4. If no SORT command is given, sort according to --sort-section. 364 InputSectionBase **Begin = I->Sections.data() + SizeBefore; 365 InputSectionBase **End = I->Sections.data() + I->Sections.size(); 366 if (Pat.SortOuter != SortSectionPolicy::None) { 367 if (Pat.SortInner == SortSectionPolicy::Default) 368 sortSections(Begin, End, Config->SortSection); 369 else 370 sortSections(Begin, End, Pat.SortInner); 371 sortSections(Begin, End, Pat.SortOuter); 372 } 373 } 374 } 375 376 void LinkerScript::discard(ArrayRef<InputSectionBase *> V) { 377 for (InputSectionBase *S : V) { 378 S->Live = false; 379 if (S == InX::ShStrTab) 380 error("discarding .shstrtab section is not allowed"); 381 discard(S->DependentSections); 382 } 383 } 384 385 std::vector<InputSectionBase *> 386 LinkerScript::createInputSectionList(OutputSectionCommand &OutCmd) { 387 std::vector<InputSectionBase *> Ret; 388 389 for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { 390 auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); 391 if (!Cmd) 392 continue; 393 computeInputSections(Cmd); 394 for (InputSectionBase *S : Cmd->Sections) 395 Ret.push_back(static_cast<InputSectionBase *>(S)); 396 } 397 398 return Ret; 399 } 400 401 void LinkerScript::processCommands(OutputSectionFactory &Factory) { 402 // A symbol can be assigned before any section is mentioned in the linker 403 // script. In an DSO, the symbol values are addresses, so the only important 404 // section values are: 405 // * SHN_UNDEF 406 // * SHN_ABS 407 // * Any value meaning a regular section. 408 // To handle that, create a dummy aether section that fills the void before 409 // the linker scripts switches to another section. It has an index of one 410 // which will map to whatever the first actual section is. 411 Aether = make<OutputSection>("", 0, SHF_ALLOC); 412 Aether->SectionIndex = 1; 413 CurOutSec = Aether; 414 Dot = 0; 415 416 for (unsigned I = 0; I < Opt.Commands.size(); ++I) { 417 auto Iter = Opt.Commands.begin() + I; 418 const std::unique_ptr<BaseCommand> &Base1 = *Iter; 419 420 // Handle symbol assignments outside of any output section. 421 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { 422 addSymbol(Cmd); 423 continue; 424 } 425 426 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { 427 std::vector<InputSectionBase *> V = createInputSectionList(*Cmd); 428 429 // The output section name `/DISCARD/' is special. 430 // Any input section assigned to it is discarded. 431 if (Cmd->Name == "/DISCARD/") { 432 discard(V); 433 continue; 434 } 435 436 // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive 437 // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input 438 // sections satisfy a given constraint. If not, a directive is handled 439 // as if it wasn't present from the beginning. 440 // 441 // Because we'll iterate over Commands many more times, the easiest 442 // way to "make it as if it wasn't present" is to just remove it. 443 if (!matchConstraints(V, Cmd->Constraint)) { 444 for (InputSectionBase *S : V) 445 S->Assigned = false; 446 Opt.Commands.erase(Iter); 447 --I; 448 continue; 449 } 450 451 // A directive may contain symbol definitions like this: 452 // ".foo : { ...; bar = .; }". Handle them. 453 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 454 if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) 455 addSymbol(OutCmd); 456 457 // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign 458 // is given, input sections are aligned to that value, whether the 459 // given value is larger or smaller than the original section alignment. 460 if (Cmd->SubalignExpr) { 461 uint32_t Subalign = Cmd->SubalignExpr().getValue(); 462 for (InputSectionBase *S : V) 463 S->Alignment = Subalign; 464 } 465 466 // Add input sections to an output section. 467 for (InputSectionBase *S : V) 468 Factory.addInputSec(S, Cmd->Name); 469 } 470 } 471 CurOutSec = nullptr; 472 } 473 474 // Add sections that didn't match any sections command. 475 void LinkerScript::addOrphanSections(OutputSectionFactory &Factory) { 476 for (InputSectionBase *S : InputSections) 477 if (S->Live && !S->OutSec) 478 Factory.addInputSec(S, getOutputSectionName(S->Name)); 479 } 480 481 static bool isTbss(OutputSection *Sec) { 482 return (Sec->Flags & SHF_TLS) && Sec->Type == SHT_NOBITS; 483 } 484 485 void LinkerScript::output(InputSection *S) { 486 if (!AlreadyOutputIS.insert(S).second) 487 return; 488 bool IsTbss = isTbss(CurOutSec); 489 490 uint64_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; 491 Pos = alignTo(Pos, S->Alignment); 492 S->OutSecOff = Pos - CurOutSec->Addr; 493 Pos += S->getSize(); 494 495 // Update output section size after adding each section. This is so that 496 // SIZEOF works correctly in the case below: 497 // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } 498 CurOutSec->Size = Pos - CurOutSec->Addr; 499 500 // If there is a memory region associated with this input section, then 501 // place the section in that region and update the region index. 502 if (CurMemRegion) { 503 CurMemRegion->Offset += CurOutSec->Size; 504 uint64_t CurSize = CurMemRegion->Offset - CurMemRegion->Origin; 505 if (CurSize > CurMemRegion->Length) { 506 uint64_t OverflowAmt = CurSize - CurMemRegion->Length; 507 error("section '" + CurOutSec->Name + "' will not fit in region '" + 508 CurMemRegion->Name + "': overflowed by " + Twine(OverflowAmt) + 509 " bytes"); 510 } 511 } 512 513 if (IsTbss) 514 ThreadBssOffset = Pos - Dot; 515 else 516 Dot = Pos; 517 } 518 519 void LinkerScript::flush() { 520 assert(CurOutSec); 521 if (!AlreadyOutputOS.insert(CurOutSec).second) 522 return; 523 for (InputSection *I : CurOutSec->Sections) 524 output(I); 525 } 526 527 void LinkerScript::switchTo(OutputSection *Sec) { 528 if (CurOutSec == Sec) 529 return; 530 if (AlreadyOutputOS.count(Sec)) 531 return; 532 533 CurOutSec = Sec; 534 535 Dot = alignTo(Dot, CurOutSec->Alignment); 536 CurOutSec->Addr = isTbss(CurOutSec) ? Dot + ThreadBssOffset : Dot; 537 538 // If neither AT nor AT> is specified for an allocatable section, the linker 539 // will set the LMA such that the difference between VMA and LMA for the 540 // section is the same as the preceding output section in the same region 541 // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html 542 if (LMAOffset) 543 CurOutSec->LMAOffset = LMAOffset(); 544 } 545 546 void LinkerScript::process(BaseCommand &Base) { 547 // This handles the assignments to symbol or to a location counter (.) 548 if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { 549 assignSymbol(AssignCmd, true); 550 return; 551 } 552 553 // Handle BYTE(), SHORT(), LONG(), or QUAD(). 554 if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) { 555 DataCmd->Offset = Dot - CurOutSec->Addr; 556 Dot += DataCmd->Size; 557 CurOutSec->Size = Dot - CurOutSec->Addr; 558 return; 559 } 560 561 if (auto *AssertCmd = dyn_cast<AssertCommand>(&Base)) { 562 AssertCmd->Expression(); 563 return; 564 } 565 566 // It handles single input section description command, 567 // calculates and assigns the offsets for each section and also 568 // updates the output section size. 569 auto &ICmd = cast<InputSectionDescription>(Base); 570 for (InputSectionBase *IB : ICmd.Sections) { 571 // We tentatively added all synthetic sections at the beginning and removed 572 // empty ones afterwards (because there is no way to know whether they were 573 // going be empty or not other than actually running linker scripts.) 574 // We need to ignore remains of empty sections. 575 if (auto *Sec = dyn_cast<SyntheticSection>(IB)) 576 if (Sec->empty()) 577 continue; 578 579 if (!IB->Live) 580 continue; 581 assert(CurOutSec == IB->OutSec || AlreadyOutputOS.count(IB->OutSec)); 582 output(cast<InputSection>(IB)); 583 } 584 } 585 586 static OutputSection * 587 findSection(StringRef Name, const std::vector<OutputSection *> &Sections) { 588 auto End = Sections.end(); 589 auto HasName = [=](OutputSection *Sec) { return Sec->Name == Name; }; 590 auto I = std::find_if(Sections.begin(), End, HasName); 591 std::vector<OutputSection *> Ret; 592 if (I == End) 593 return nullptr; 594 assert(std::find_if(I + 1, End, HasName) == End); 595 return *I; 596 } 597 598 // This function searches for a memory region to place the given output 599 // section in. If found, a pointer to the appropriate memory region is 600 // returned. Otherwise, a nullptr is returned. 601 MemoryRegion *LinkerScript::findMemoryRegion(OutputSectionCommand *Cmd, 602 OutputSection *Sec) { 603 // If a memory region name was specified in the output section command, 604 // then try to find that region first. 605 if (!Cmd->MemoryRegionName.empty()) { 606 auto It = Opt.MemoryRegions.find(Cmd->MemoryRegionName); 607 if (It != Opt.MemoryRegions.end()) 608 return &It->second; 609 error("memory region '" + Cmd->MemoryRegionName + "' not declared"); 610 return nullptr; 611 } 612 613 // The memory region name is empty, thus a suitable region must be 614 // searched for in the region map. If the region map is empty, just 615 // return. Note that this check doesn't happen at the very beginning 616 // so that uses of undeclared regions can be caught. 617 if (!Opt.MemoryRegions.size()) 618 return nullptr; 619 620 // See if a region can be found by matching section flags. 621 for (auto &MRI : Opt.MemoryRegions) { 622 MemoryRegion &MR = MRI.second; 623 if ((MR.Flags & Sec->Flags) != 0 && (MR.NegFlags & Sec->Flags) == 0) 624 return &MR; 625 } 626 627 // Otherwise, no suitable region was found. 628 if (Sec->Flags & SHF_ALLOC) 629 error("no memory region specified for section '" + Sec->Name + "'"); 630 return nullptr; 631 } 632 633 // This function assigns offsets to input sections and an output section 634 // for a single sections command (e.g. ".text { *(.text); }"). 635 void LinkerScript::assignOffsets(OutputSectionCommand *Cmd) { 636 OutputSection *Sec = findSection(Cmd->Name, *OutputSections); 637 if (!Sec) 638 return; 639 640 if (Cmd->AddrExpr && Sec->Flags & SHF_ALLOC) 641 setDot(Cmd->AddrExpr, Cmd->Location); 642 643 if (Cmd->LMAExpr) { 644 uint64_t D = Dot; 645 LMAOffset = [=] { return Cmd->LMAExpr().getValue() - D; }; 646 } 647 648 // Handle align (e.g. ".foo : ALIGN(16) { ... }"). 649 if (Cmd->AlignExpr) 650 Sec->updateAlignment(Cmd->AlignExpr().getValue()); 651 652 // Try and find an appropriate memory region to assign offsets in. 653 CurMemRegion = findMemoryRegion(Cmd, Sec); 654 if (CurMemRegion) 655 Dot = CurMemRegion->Offset; 656 switchTo(Sec); 657 658 // Find the last section output location. We will output orphan sections 659 // there so that end symbols point to the correct location. 660 auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), 661 [](const std::unique_ptr<BaseCommand> &Cmd) { 662 return !isa<SymbolAssignment>(*Cmd); 663 }) 664 .base(); 665 for (auto I = Cmd->Commands.begin(); I != E; ++I) 666 process(**I); 667 flush(); 668 std::for_each(E, Cmd->Commands.end(), 669 [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); 670 } 671 672 void LinkerScript::removeEmptyCommands() { 673 // It is common practice to use very generic linker scripts. So for any 674 // given run some of the output sections in the script will be empty. 675 // We could create corresponding empty output sections, but that would 676 // clutter the output. 677 // We instead remove trivially empty sections. The bfd linker seems even 678 // more aggressive at removing them. 679 auto Pos = std::remove_if( 680 Opt.Commands.begin(), Opt.Commands.end(), 681 [&](const std::unique_ptr<BaseCommand> &Base) { 682 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 683 return !findSection(Cmd->Name, *OutputSections); 684 return false; 685 }); 686 Opt.Commands.erase(Pos, Opt.Commands.end()); 687 } 688 689 static bool isAllSectionDescription(const OutputSectionCommand &Cmd) { 690 for (const std::unique_ptr<BaseCommand> &I : Cmd.Commands) 691 if (!isa<InputSectionDescription>(*I)) 692 return false; 693 return true; 694 } 695 696 void LinkerScript::adjustSectionsBeforeSorting() { 697 // If the output section contains only symbol assignments, create a 698 // corresponding output section. The bfd linker seems to only create them if 699 // '.' is assigned to, but creating these section should not have any bad 700 // consequeces and gives us a section to put the symbol in. 701 uint64_t Flags = SHF_ALLOC; 702 uint32_t Type = SHT_NOBITS; 703 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 704 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 705 if (!Cmd) 706 continue; 707 if (OutputSection *Sec = findSection(Cmd->Name, *OutputSections)) { 708 Flags = Sec->Flags; 709 Type = Sec->Type; 710 continue; 711 } 712 713 if (isAllSectionDescription(*Cmd)) 714 continue; 715 716 auto *OutSec = make<OutputSection>(Cmd->Name, Type, Flags); 717 OutputSections->push_back(OutSec); 718 } 719 } 720 721 void LinkerScript::adjustSectionsAfterSorting() { 722 placeOrphanSections(); 723 724 // If output section command doesn't specify any segments, 725 // and we haven't previously assigned any section to segment, 726 // then we simply assign section to the very first load segment. 727 // Below is an example of such linker script: 728 // PHDRS { seg PT_LOAD; } 729 // SECTIONS { .aaa : { *(.aaa) } } 730 std::vector<StringRef> DefPhdrs; 731 auto FirstPtLoad = 732 std::find_if(Opt.PhdrsCommands.begin(), Opt.PhdrsCommands.end(), 733 [](const PhdrsCommand &Cmd) { return Cmd.Type == PT_LOAD; }); 734 if (FirstPtLoad != Opt.PhdrsCommands.end()) 735 DefPhdrs.push_back(FirstPtLoad->Name); 736 737 // Walk the commands and propagate the program headers to commands that don't 738 // explicitly specify them. 739 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 740 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 741 if (!Cmd) 742 continue; 743 if (Cmd->Phdrs.empty()) 744 Cmd->Phdrs = DefPhdrs; 745 else 746 DefPhdrs = Cmd->Phdrs; 747 } 748 749 removeEmptyCommands(); 750 } 751 752 // When placing orphan sections, we want to place them after symbol assignments 753 // so that an orphan after 754 // begin_foo = .; 755 // foo : { *(foo) } 756 // end_foo = .; 757 // doesn't break the intended meaning of the begin/end symbols. 758 // We don't want to go over sections since Writer<ELFT>::sortSections is the 759 // one in charge of deciding the order of the sections. 760 // We don't want to go over alignments, since doing so in 761 // rx_sec : { *(rx_sec) } 762 // . = ALIGN(0x1000); 763 // /* The RW PT_LOAD starts here*/ 764 // rw_sec : { *(rw_sec) } 765 // would mean that the RW PT_LOAD would become unaligned. 766 static bool shouldSkip(const BaseCommand &Cmd) { 767 if (isa<OutputSectionCommand>(Cmd)) 768 return false; 769 const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); 770 if (!Assign) 771 return true; 772 return Assign->Name != "."; 773 } 774 775 // Orphan sections are sections present in the input files which are 776 // not explicitly placed into the output file by the linker script. 777 // 778 // When the control reaches this function, Opt.Commands contains 779 // output section commands for non-orphan sections only. This function 780 // adds new elements for orphan sections to Opt.Commands so that all 781 // sections are explicitly handled by Opt.Commands. 782 // 783 // Writer<ELFT>::sortSections has already sorted output sections. 784 // What we need to do is to scan OutputSections vector and 785 // Opt.Commands in parallel to find orphan sections. If there is an 786 // output section that doesn't have a corresponding entry in 787 // Opt.Commands, we will insert a new entry to Opt.Commands. 788 // 789 // There is some ambiguity as to where exactly a new entry should be 790 // inserted, because Opt.Commands contains not only output section 791 // commands but other types of commands such as symbol assignment 792 // expressions. There's no correct answer here due to the lack of the 793 // formal specification of the linker script. We use heuristics to 794 // determine whether a new output command should be added before or 795 // after another commands. For the details, look at shouldSkip 796 // function. 797 void LinkerScript::placeOrphanSections() { 798 // The OutputSections are already in the correct order. 799 // This loops creates or moves commands as needed so that they are in the 800 // correct order. 801 int CmdIndex = 0; 802 803 // As a horrible special case, skip the first . assignment if it is before any 804 // section. We do this because it is common to set a load address by starting 805 // the script with ". = 0xabcd" and the expectation is that every section is 806 // after that. 807 auto FirstSectionOrDotAssignment = 808 std::find_if(Opt.Commands.begin(), Opt.Commands.end(), 809 [](const std::unique_ptr<BaseCommand> &Cmd) { 810 if (isa<OutputSectionCommand>(*Cmd)) 811 return true; 812 const auto *Assign = dyn_cast<SymbolAssignment>(Cmd.get()); 813 if (!Assign) 814 return false; 815 return Assign->Name == "."; 816 }); 817 if (FirstSectionOrDotAssignment != Opt.Commands.end()) { 818 CmdIndex = FirstSectionOrDotAssignment - Opt.Commands.begin(); 819 if (isa<SymbolAssignment>(**FirstSectionOrDotAssignment)) 820 ++CmdIndex; 821 } 822 823 for (OutputSection *Sec : *OutputSections) { 824 StringRef Name = Sec->Name; 825 826 // Find the last spot where we can insert a command and still get the 827 // correct result. 828 auto CmdIter = Opt.Commands.begin() + CmdIndex; 829 auto E = Opt.Commands.end(); 830 while (CmdIter != E && shouldSkip(**CmdIter)) { 831 ++CmdIter; 832 ++CmdIndex; 833 } 834 835 auto Pos = 836 std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { 837 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 838 return Cmd && Cmd->Name == Name; 839 }); 840 if (Pos == E) { 841 Opt.Commands.insert(CmdIter, 842 llvm::make_unique<OutputSectionCommand>(Name)); 843 ++CmdIndex; 844 continue; 845 } 846 847 // Continue from where we found it. 848 CmdIndex = (Pos - Opt.Commands.begin()) + 1; 849 } 850 } 851 852 void LinkerScript::processNonSectionCommands() { 853 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 854 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) 855 assignSymbol(Cmd); 856 else if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) 857 Cmd->Expression(); 858 } 859 } 860 861 void LinkerScript::assignAddresses(std::vector<PhdrEntry> &Phdrs) { 862 // Assign addresses as instructed by linker script SECTIONS sub-commands. 863 Dot = 0; 864 ErrorOnMissingSection = true; 865 switchTo(Aether); 866 867 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 868 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { 869 assignSymbol(Cmd); 870 continue; 871 } 872 873 if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { 874 Cmd->Expression(); 875 continue; 876 } 877 878 auto *Cmd = cast<OutputSectionCommand>(Base.get()); 879 assignOffsets(Cmd); 880 } 881 882 uint64_t MinVA = std::numeric_limits<uint64_t>::max(); 883 for (OutputSection *Sec : *OutputSections) { 884 if (Sec->Flags & SHF_ALLOC) 885 MinVA = std::min<uint64_t>(MinVA, Sec->Addr); 886 else 887 Sec->Addr = 0; 888 } 889 890 allocateHeaders(Phdrs, *OutputSections, MinVA); 891 } 892 893 // Creates program headers as instructed by PHDRS linker script command. 894 std::vector<PhdrEntry> LinkerScript::createPhdrs() { 895 std::vector<PhdrEntry> Ret; 896 897 // Process PHDRS and FILEHDR keywords because they are not 898 // real output sections and cannot be added in the following loop. 899 for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) { 900 Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags); 901 PhdrEntry &Phdr = Ret.back(); 902 903 if (Cmd.HasFilehdr) 904 Phdr.add(Out::ElfHeader); 905 if (Cmd.HasPhdrs) 906 Phdr.add(Out::ProgramHeaders); 907 908 if (Cmd.LMAExpr) { 909 Phdr.p_paddr = Cmd.LMAExpr().getValue(); 910 Phdr.HasLMA = true; 911 } 912 } 913 914 // Add output sections to program headers. 915 for (OutputSection *Sec : *OutputSections) { 916 if (!(Sec->Flags & SHF_ALLOC)) 917 break; 918 919 // Assign headers specified by linker script 920 for (size_t Id : getPhdrIndices(Sec->Name)) { 921 Ret[Id].add(Sec); 922 if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) 923 Ret[Id].p_flags |= Sec->getPhdrFlags(); 924 } 925 } 926 return Ret; 927 } 928 929 bool LinkerScript::ignoreInterpSection() { 930 // Ignore .interp section in case we have PHDRS specification 931 // and PT_INTERP isn't listed. 932 return !Opt.PhdrsCommands.empty() && 933 llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { 934 return Cmd.Type == PT_INTERP; 935 }) == Opt.PhdrsCommands.end(); 936 } 937 938 uint32_t LinkerScript::getFiller(StringRef Name) { 939 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 940 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 941 if (Cmd->Name == Name) 942 return Cmd->Filler; 943 return 0; 944 } 945 946 static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { 947 switch (Size) { 948 case 1: 949 *Buf = (uint8_t)Data; 950 break; 951 case 2: 952 write16(Buf, Data, Config->Endianness); 953 break; 954 case 4: 955 write32(Buf, Data, Config->Endianness); 956 break; 957 case 8: 958 write64(Buf, Data, Config->Endianness); 959 break; 960 default: 961 llvm_unreachable("unsupported Size argument"); 962 } 963 } 964 965 void LinkerScript::writeDataBytes(StringRef Name, uint8_t *Buf) { 966 int I = getSectionIndex(Name); 967 if (I == INT_MAX) 968 return; 969 970 auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()); 971 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 972 if (auto *Data = dyn_cast<BytesDataCommand>(Base.get())) 973 writeInt(Buf + Data->Offset, Data->Expression().getValue(), Data->Size); 974 } 975 976 bool LinkerScript::hasLMA(StringRef Name) { 977 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 978 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 979 if (Cmd->LMAExpr && Cmd->Name == Name) 980 return true; 981 return false; 982 } 983 984 // Returns the index of the given section name in linker script 985 // SECTIONS commands. Sections are laid out as the same order as they 986 // were in the script. If a given name did not appear in the script, 987 // it returns INT_MAX, so that it will be laid out at end of file. 988 int LinkerScript::getSectionIndex(StringRef Name) { 989 for (int I = 0, E = Opt.Commands.size(); I != E; ++I) 990 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get())) 991 if (Cmd->Name == Name) 992 return I; 993 return INT_MAX; 994 } 995 996 ExprValue LinkerScript::getSymbolValue(const Twine &Loc, StringRef S) { 997 if (S == ".") 998 return {CurOutSec, Dot - CurOutSec->Addr}; 999 if (SymbolBody *B = findSymbol(S)) { 1000 if (auto *D = dyn_cast<DefinedRegular>(B)) 1001 return {D->Section, D->Value}; 1002 if (auto *C = dyn_cast<DefinedCommon>(B)) 1003 return {InX::Common, C->Offset}; 1004 } 1005 error(Loc + ": symbol not found: " + S); 1006 return 0; 1007 } 1008 1009 bool LinkerScript::isDefined(StringRef S) { return findSymbol(S) != nullptr; } 1010 1011 // Returns indices of ELF headers containing specific section, identified 1012 // by Name. Each index is a zero based number of ELF header listed within 1013 // PHDRS {} script block. 1014 std::vector<size_t> LinkerScript::getPhdrIndices(StringRef SectionName) { 1015 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 1016 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 1017 if (!Cmd || Cmd->Name != SectionName) 1018 continue; 1019 1020 std::vector<size_t> Ret; 1021 for (StringRef PhdrName : Cmd->Phdrs) 1022 Ret.push_back(getPhdrIndex(Cmd->Location, PhdrName)); 1023 return Ret; 1024 } 1025 return {}; 1026 } 1027 1028 size_t LinkerScript::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { 1029 size_t I = 0; 1030 for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { 1031 if (Cmd.Name == PhdrName) 1032 return I; 1033 ++I; 1034 } 1035 error(Loc + ": section header '" + PhdrName + "' is not listed in PHDRS"); 1036 return 0; 1037 } 1038 1039 class elf::ScriptParser final : public ScriptLexer { 1040 typedef void (ScriptParser::*Handler)(); 1041 1042 public: 1043 ScriptParser(MemoryBufferRef MB) 1044 : ScriptLexer(MB), 1045 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 1046 1047 void readLinkerScript(); 1048 void readVersionScript(); 1049 void readDynamicList(); 1050 1051 private: 1052 void addFile(StringRef Path); 1053 1054 void readAsNeeded(); 1055 void readEntry(); 1056 void readExtern(); 1057 void readGroup(); 1058 void readInclude(); 1059 void readMemory(); 1060 void readOutput(); 1061 void readOutputArch(); 1062 void readOutputFormat(); 1063 void readPhdrs(); 1064 void readSearchDir(); 1065 void readSections(); 1066 void readVersion(); 1067 void readVersionScriptCommand(); 1068 1069 SymbolAssignment *readAssignment(StringRef Name); 1070 BytesDataCommand *readBytesDataCommand(StringRef Tok); 1071 uint32_t readFill(); 1072 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 1073 uint32_t readOutputSectionFiller(StringRef Tok); 1074 std::vector<StringRef> readOutputSectionPhdrs(); 1075 InputSectionDescription *readInputSectionDescription(StringRef Tok); 1076 StringMatcher readFilePatterns(); 1077 std::vector<SectionPattern> readInputSectionsList(); 1078 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 1079 unsigned readPhdrType(); 1080 SortSectionPolicy readSortKind(); 1081 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 1082 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 1083 void readSort(); 1084 Expr readAssert(); 1085 1086 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 1087 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 1088 1089 Expr readExpr(); 1090 Expr readExpr1(Expr Lhs, int MinPrec); 1091 StringRef readParenLiteral(); 1092 Expr readPrimary(); 1093 Expr readTernary(Expr Cond); 1094 Expr readParenExpr(); 1095 1096 // For parsing version script. 1097 std::vector<SymbolVersion> readVersionExtern(); 1098 void readAnonymousDeclaration(); 1099 void readVersionDeclaration(StringRef VerStr); 1100 1101 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1102 readSymbols(); 1103 1104 bool IsUnderSysroot; 1105 }; 1106 1107 void ScriptParser::readDynamicList() { 1108 expect("{"); 1109 readAnonymousDeclaration(); 1110 if (!atEOF()) 1111 setError("EOF expected, but got " + next()); 1112 } 1113 1114 void ScriptParser::readVersionScript() { 1115 readVersionScriptCommand(); 1116 if (!atEOF()) 1117 setError("EOF expected, but got " + next()); 1118 } 1119 1120 void ScriptParser::readVersionScriptCommand() { 1121 if (consume("{")) { 1122 readAnonymousDeclaration(); 1123 return; 1124 } 1125 1126 while (!atEOF() && !Error && peek() != "}") { 1127 StringRef VerStr = next(); 1128 if (VerStr == "{") { 1129 setError("anonymous version definition is used in " 1130 "combination with other version definitions"); 1131 return; 1132 } 1133 expect("{"); 1134 readVersionDeclaration(VerStr); 1135 } 1136 } 1137 1138 void ScriptParser::readVersion() { 1139 expect("{"); 1140 readVersionScriptCommand(); 1141 expect("}"); 1142 } 1143 1144 void ScriptParser::readLinkerScript() { 1145 while (!atEOF()) { 1146 StringRef Tok = next(); 1147 if (Tok == ";") 1148 continue; 1149 1150 if (Tok == "ASSERT") { 1151 Script->Opt.Commands.emplace_back(new AssertCommand(readAssert())); 1152 } else if (Tok == "ENTRY") { 1153 readEntry(); 1154 } else if (Tok == "EXTERN") { 1155 readExtern(); 1156 } else if (Tok == "GROUP" || Tok == "INPUT") { 1157 readGroup(); 1158 } else if (Tok == "INCLUDE") { 1159 readInclude(); 1160 } else if (Tok == "MEMORY") { 1161 readMemory(); 1162 } else if (Tok == "OUTPUT") { 1163 readOutput(); 1164 } else if (Tok == "OUTPUT_ARCH") { 1165 readOutputArch(); 1166 } else if (Tok == "OUTPUT_FORMAT") { 1167 readOutputFormat(); 1168 } else if (Tok == "PHDRS") { 1169 readPhdrs(); 1170 } else if (Tok == "SEARCH_DIR") { 1171 readSearchDir(); 1172 } else if (Tok == "SECTIONS") { 1173 readSections(); 1174 } else if (Tok == "VERSION") { 1175 readVersion(); 1176 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 1177 Script->Opt.Commands.emplace_back(Cmd); 1178 } else { 1179 setError("unknown directive: " + Tok); 1180 } 1181 } 1182 } 1183 1184 void ScriptParser::addFile(StringRef S) { 1185 if (IsUnderSysroot && S.startswith("/")) { 1186 SmallString<128> PathData; 1187 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 1188 if (sys::fs::exists(Path)) { 1189 Driver->addFile(Saver.save(Path)); 1190 return; 1191 } 1192 } 1193 1194 if (sys::path::is_absolute(S)) { 1195 Driver->addFile(S); 1196 } else if (S.startswith("=")) { 1197 if (Config->Sysroot.empty()) 1198 Driver->addFile(S.substr(1)); 1199 else 1200 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); 1201 } else if (S.startswith("-l")) { 1202 Driver->addLibrary(S.substr(2)); 1203 } else if (sys::fs::exists(S)) { 1204 Driver->addFile(S); 1205 } else { 1206 if (Optional<std::string> Path = findFromSearchPaths(S)) 1207 Driver->addFile(Saver.save(*Path)); 1208 else 1209 setError("unable to find " + S); 1210 } 1211 } 1212 1213 void ScriptParser::readAsNeeded() { 1214 expect("("); 1215 bool Orig = Config->AsNeeded; 1216 Config->AsNeeded = true; 1217 while (!Error && !consume(")")) 1218 addFile(unquote(next())); 1219 Config->AsNeeded = Orig; 1220 } 1221 1222 void ScriptParser::readEntry() { 1223 // -e <symbol> takes predecence over ENTRY(<symbol>). 1224 expect("("); 1225 StringRef Tok = next(); 1226 if (Config->Entry.empty()) 1227 Config->Entry = Tok; 1228 expect(")"); 1229 } 1230 1231 void ScriptParser::readExtern() { 1232 expect("("); 1233 while (!Error && !consume(")")) 1234 Config->Undefined.push_back(next()); 1235 } 1236 1237 void ScriptParser::readGroup() { 1238 expect("("); 1239 while (!Error && !consume(")")) { 1240 StringRef Tok = next(); 1241 if (Tok == "AS_NEEDED") 1242 readAsNeeded(); 1243 else 1244 addFile(unquote(Tok)); 1245 } 1246 } 1247 1248 void ScriptParser::readInclude() { 1249 StringRef Tok = unquote(next()); 1250 1251 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 1252 // The file will be searched for in the current directory, and in any 1253 // directory specified with the -L option. 1254 if (sys::fs::exists(Tok)) { 1255 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 1256 tokenize(*MB); 1257 return; 1258 } 1259 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 1260 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 1261 tokenize(*MB); 1262 return; 1263 } 1264 setError("cannot open " + Tok); 1265 } 1266 1267 void ScriptParser::readOutput() { 1268 // -o <file> takes predecence over OUTPUT(<file>). 1269 expect("("); 1270 StringRef Tok = next(); 1271 if (Config->OutputFile.empty()) 1272 Config->OutputFile = unquote(Tok); 1273 expect(")"); 1274 } 1275 1276 void ScriptParser::readOutputArch() { 1277 // OUTPUT_ARCH is ignored for now. 1278 expect("("); 1279 while (!Error && !consume(")")) 1280 skip(); 1281 } 1282 1283 void ScriptParser::readOutputFormat() { 1284 // Error checking only for now. 1285 expect("("); 1286 skip(); 1287 StringRef Tok = next(); 1288 if (Tok == ")") 1289 return; 1290 if (Tok != ",") { 1291 setError("unexpected token: " + Tok); 1292 return; 1293 } 1294 skip(); 1295 expect(","); 1296 skip(); 1297 expect(")"); 1298 } 1299 1300 void ScriptParser::readPhdrs() { 1301 expect("{"); 1302 while (!Error && !consume("}")) { 1303 StringRef Tok = next(); 1304 Script->Opt.PhdrsCommands.push_back( 1305 {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); 1306 PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back(); 1307 1308 PhdrCmd.Type = readPhdrType(); 1309 do { 1310 Tok = next(); 1311 if (Tok == ";") 1312 break; 1313 if (Tok == "FILEHDR") 1314 PhdrCmd.HasFilehdr = true; 1315 else if (Tok == "PHDRS") 1316 PhdrCmd.HasPhdrs = true; 1317 else if (Tok == "AT") 1318 PhdrCmd.LMAExpr = readParenExpr(); 1319 else if (Tok == "FLAGS") { 1320 expect("("); 1321 // Passing 0 for the value of dot is a bit of a hack. It means that 1322 // we accept expressions like ".|1". 1323 PhdrCmd.Flags = readExpr()().getValue(); 1324 expect(")"); 1325 } else 1326 setError("unexpected header attribute: " + Tok); 1327 } while (!Error); 1328 } 1329 } 1330 1331 void ScriptParser::readSearchDir() { 1332 expect("("); 1333 StringRef Tok = next(); 1334 if (!Config->Nostdlib) 1335 Config->SearchPaths.push_back(unquote(Tok)); 1336 expect(")"); 1337 } 1338 1339 void ScriptParser::readSections() { 1340 Script->Opt.HasSections = true; 1341 // -no-rosegment is used to avoid placing read only non-executable sections in 1342 // their own segment. We do the same if SECTIONS command is present in linker 1343 // script. See comment for computeFlags(). 1344 Config->SingleRoRx = true; 1345 1346 expect("{"); 1347 while (!Error && !consume("}")) { 1348 StringRef Tok = next(); 1349 BaseCommand *Cmd = readProvideOrAssignment(Tok); 1350 if (!Cmd) { 1351 if (Tok == "ASSERT") 1352 Cmd = new AssertCommand(readAssert()); 1353 else 1354 Cmd = readOutputSectionDescription(Tok); 1355 } 1356 Script->Opt.Commands.emplace_back(Cmd); 1357 } 1358 } 1359 1360 static int precedence(StringRef Op) { 1361 return StringSwitch<int>(Op) 1362 .Cases("*", "/", 5) 1363 .Cases("+", "-", 4) 1364 .Cases("<<", ">>", 3) 1365 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 1366 .Cases("&", "|", 1) 1367 .Default(-1); 1368 } 1369 1370 StringMatcher ScriptParser::readFilePatterns() { 1371 std::vector<StringRef> V; 1372 while (!Error && !consume(")")) 1373 V.push_back(next()); 1374 return StringMatcher(V); 1375 } 1376 1377 SortSectionPolicy ScriptParser::readSortKind() { 1378 if (consume("SORT") || consume("SORT_BY_NAME")) 1379 return SortSectionPolicy::Name; 1380 if (consume("SORT_BY_ALIGNMENT")) 1381 return SortSectionPolicy::Alignment; 1382 if (consume("SORT_BY_INIT_PRIORITY")) 1383 return SortSectionPolicy::Priority; 1384 if (consume("SORT_NONE")) 1385 return SortSectionPolicy::None; 1386 return SortSectionPolicy::Default; 1387 } 1388 1389 // Method reads a list of sequence of excluded files and section globs given in 1390 // a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ 1391 // Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) 1392 // The semantics of that is next: 1393 // * Include .foo.1 from every file. 1394 // * Include .foo.2 from every file but a.o 1395 // * Include .foo.3 from every file but b.o 1396 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 1397 std::vector<SectionPattern> Ret; 1398 while (!Error && peek() != ")") { 1399 StringMatcher ExcludeFilePat; 1400 if (consume("EXCLUDE_FILE")) { 1401 expect("("); 1402 ExcludeFilePat = readFilePatterns(); 1403 } 1404 1405 std::vector<StringRef> V; 1406 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 1407 V.push_back(next()); 1408 1409 if (!V.empty()) 1410 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 1411 else 1412 setError("section pattern is expected"); 1413 } 1414 return Ret; 1415 } 1416 1417 // Reads contents of "SECTIONS" directive. That directive contains a 1418 // list of glob patterns for input sections. The grammar is as follows. 1419 // 1420 // <patterns> ::= <section-list> 1421 // | <sort> "(" <section-list> ")" 1422 // | <sort> "(" <sort> "(" <section-list> ")" ")" 1423 // 1424 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 1425 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 1426 // 1427 // <section-list> is parsed by readInputSectionsList(). 1428 InputSectionDescription * 1429 ScriptParser::readInputSectionRules(StringRef FilePattern) { 1430 auto *Cmd = new InputSectionDescription(FilePattern); 1431 expect("("); 1432 while (!Error && !consume(")")) { 1433 SortSectionPolicy Outer = readSortKind(); 1434 SortSectionPolicy Inner = SortSectionPolicy::Default; 1435 std::vector<SectionPattern> V; 1436 if (Outer != SortSectionPolicy::Default) { 1437 expect("("); 1438 Inner = readSortKind(); 1439 if (Inner != SortSectionPolicy::Default) { 1440 expect("("); 1441 V = readInputSectionsList(); 1442 expect(")"); 1443 } else { 1444 V = readInputSectionsList(); 1445 } 1446 expect(")"); 1447 } else { 1448 V = readInputSectionsList(); 1449 } 1450 1451 for (SectionPattern &Pat : V) { 1452 Pat.SortInner = Inner; 1453 Pat.SortOuter = Outer; 1454 } 1455 1456 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 1457 } 1458 return Cmd; 1459 } 1460 1461 InputSectionDescription * 1462 ScriptParser::readInputSectionDescription(StringRef Tok) { 1463 // Input section wildcard can be surrounded by KEEP. 1464 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 1465 if (Tok == "KEEP") { 1466 expect("("); 1467 StringRef FilePattern = next(); 1468 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 1469 expect(")"); 1470 Script->Opt.KeptSections.push_back(Cmd); 1471 return Cmd; 1472 } 1473 return readInputSectionRules(Tok); 1474 } 1475 1476 void ScriptParser::readSort() { 1477 expect("("); 1478 expect("CONSTRUCTORS"); 1479 expect(")"); 1480 } 1481 1482 Expr ScriptParser::readAssert() { 1483 expect("("); 1484 Expr E = readExpr(); 1485 expect(","); 1486 StringRef Msg = unquote(next()); 1487 expect(")"); 1488 return [=] { 1489 if (!E().getValue()) 1490 error(Msg); 1491 return Script->getDot(); 1492 }; 1493 } 1494 1495 // Reads a FILL(expr) command. We handle the FILL command as an 1496 // alias for =fillexp section attribute, which is different from 1497 // what GNU linkers do. 1498 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 1499 uint32_t ScriptParser::readFill() { 1500 expect("("); 1501 uint32_t V = readOutputSectionFiller(next()); 1502 expect(")"); 1503 expect(";"); 1504 return V; 1505 } 1506 1507 OutputSectionCommand * 1508 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 1509 OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); 1510 Cmd->Location = getCurrentLocation(); 1511 1512 // Read an address expression. 1513 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address 1514 if (peek() != ":") 1515 Cmd->AddrExpr = readExpr(); 1516 1517 expect(":"); 1518 1519 if (consume("AT")) 1520 Cmd->LMAExpr = readParenExpr(); 1521 if (consume("ALIGN")) 1522 Cmd->AlignExpr = readParenExpr(); 1523 if (consume("SUBALIGN")) 1524 Cmd->SubalignExpr = readParenExpr(); 1525 1526 // Parse constraints. 1527 if (consume("ONLY_IF_RO")) 1528 Cmd->Constraint = ConstraintKind::ReadOnly; 1529 if (consume("ONLY_IF_RW")) 1530 Cmd->Constraint = ConstraintKind::ReadWrite; 1531 expect("{"); 1532 1533 while (!Error && !consume("}")) { 1534 StringRef Tok = next(); 1535 if (Tok == ";") { 1536 // Empty commands are allowed. Do nothing here. 1537 } else if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok)) { 1538 Cmd->Commands.emplace_back(Assignment); 1539 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 1540 Cmd->Commands.emplace_back(Data); 1541 } else if (Tok == "ASSERT") { 1542 Cmd->Commands.emplace_back(new AssertCommand(readAssert())); 1543 expect(";"); 1544 } else if (Tok == "CONSTRUCTORS") { 1545 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 1546 // by name. This is for very old file formats such as ECOFF/XCOFF. 1547 // For ELF, we should ignore. 1548 } else if (Tok == "FILL") { 1549 Cmd->Filler = readFill(); 1550 } else if (Tok == "SORT") { 1551 readSort(); 1552 } else if (peek() == "(") { 1553 Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); 1554 } else { 1555 setError("unknown command " + Tok); 1556 } 1557 } 1558 1559 if (consume(">")) 1560 Cmd->MemoryRegionName = next(); 1561 1562 Cmd->Phdrs = readOutputSectionPhdrs(); 1563 1564 if (consume("=")) 1565 Cmd->Filler = readOutputSectionFiller(next()); 1566 else if (peek().startswith("=")) 1567 Cmd->Filler = readOutputSectionFiller(next().drop_front()); 1568 1569 // Consume optional comma following output section command. 1570 consume(","); 1571 1572 return Cmd; 1573 } 1574 1575 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number. 1576 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 1577 // 1578 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles 1579 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them 1580 // as 32-bit big-endian values. We will do the same as ld.gold does 1581 // because it's simpler than what ld.bfd does. 1582 uint32_t ScriptParser::readOutputSectionFiller(StringRef Tok) { 1583 uint32_t V; 1584 if (!Tok.getAsInteger(0, V)) 1585 return V; 1586 setError("invalid filler expression: " + Tok); 1587 return 0; 1588 } 1589 1590 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 1591 expect("("); 1592 SymbolAssignment *Cmd = readAssignment(next()); 1593 Cmd->Provide = Provide; 1594 Cmd->Hidden = Hidden; 1595 expect(")"); 1596 expect(";"); 1597 return Cmd; 1598 } 1599 1600 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 1601 SymbolAssignment *Cmd = nullptr; 1602 if (peek() == "=" || peek() == "+=") { 1603 Cmd = readAssignment(Tok); 1604 expect(";"); 1605 } else if (Tok == "PROVIDE") { 1606 Cmd = readProvideHidden(true, false); 1607 } else if (Tok == "HIDDEN") { 1608 Cmd = readProvideHidden(false, true); 1609 } else if (Tok == "PROVIDE_HIDDEN") { 1610 Cmd = readProvideHidden(true, true); 1611 } 1612 return Cmd; 1613 } 1614 1615 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 1616 StringRef Op = next(); 1617 assert(Op == "=" || Op == "+="); 1618 Expr E = readExpr(); 1619 if (Op == "+=") { 1620 std::string Loc = getCurrentLocation(); 1621 E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); }; 1622 } 1623 return new SymbolAssignment(Name, E, getCurrentLocation()); 1624 } 1625 1626 // This is an operator-precedence parser to parse a linker 1627 // script expression. 1628 Expr ScriptParser::readExpr() { 1629 // Our lexer is context-aware. Set the in-expression bit so that 1630 // they apply different tokenization rules. 1631 bool Orig = InExpr; 1632 InExpr = true; 1633 Expr E = readExpr1(readPrimary(), 0); 1634 InExpr = Orig; 1635 return E; 1636 } 1637 1638 static Expr combine(StringRef Op, Expr L, Expr R) { 1639 if (Op == "*") 1640 return [=] { return mul(L(), R()); }; 1641 if (Op == "/") { 1642 return [=] { return div(L(), R()); }; 1643 } 1644 if (Op == "+") 1645 return [=] { return add(L(), R()); }; 1646 if (Op == "-") 1647 return [=] { return sub(L(), R()); }; 1648 if (Op == "<<") 1649 return [=] { return leftShift(L(), R()); }; 1650 if (Op == ">>") 1651 return [=] { return rightShift(L(), R()); }; 1652 if (Op == "<") 1653 return [=] { return L().getValue() < R().getValue(); }; 1654 if (Op == ">") 1655 return [=] { return L().getValue() > R().getValue(); }; 1656 if (Op == ">=") 1657 return [=] { return L().getValue() >= R().getValue(); }; 1658 if (Op == "<=") 1659 return [=] { return L().getValue() <= R().getValue(); }; 1660 if (Op == "==") 1661 return [=] { return L().getValue() == R().getValue(); }; 1662 if (Op == "!=") 1663 return [=] { return L().getValue() != R().getValue(); }; 1664 if (Op == "&") 1665 return [=] { return bitAnd(L(), R()); }; 1666 if (Op == "|") 1667 return [=] { return bitOr(L(), R()); }; 1668 llvm_unreachable("invalid operator"); 1669 } 1670 1671 // This is a part of the operator-precedence parser. This function 1672 // assumes that the remaining token stream starts with an operator. 1673 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 1674 while (!atEOF() && !Error) { 1675 // Read an operator and an expression. 1676 if (consume("?")) 1677 return readTernary(Lhs); 1678 StringRef Op1 = peek(); 1679 if (precedence(Op1) < MinPrec) 1680 break; 1681 skip(); 1682 Expr Rhs = readPrimary(); 1683 1684 // Evaluate the remaining part of the expression first if the 1685 // next operator has greater precedence than the previous one. 1686 // For example, if we have read "+" and "3", and if the next 1687 // operator is "*", then we'll evaluate 3 * ... part first. 1688 while (!atEOF()) { 1689 StringRef Op2 = peek(); 1690 if (precedence(Op2) <= precedence(Op1)) 1691 break; 1692 Rhs = readExpr1(Rhs, precedence(Op2)); 1693 } 1694 1695 Lhs = combine(Op1, Lhs, Rhs); 1696 } 1697 return Lhs; 1698 } 1699 1700 uint64_t static getConstant(StringRef S) { 1701 if (S == "COMMONPAGESIZE") 1702 return Target->PageSize; 1703 if (S == "MAXPAGESIZE") 1704 return Config->MaxPageSize; 1705 error("unknown constant: " + S); 1706 return 0; 1707 } 1708 1709 // Parses Tok as an integer. Returns true if successful. 1710 // It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") 1711 // and decimal numbers. Decimal numbers may have "K" (kilo) or 1712 // "M" (mega) prefixes. 1713 static bool readInteger(StringRef Tok, uint64_t &Result) { 1714 // Negative number 1715 if (Tok.startswith("-")) { 1716 if (!readInteger(Tok.substr(1), Result)) 1717 return false; 1718 Result = -Result; 1719 return true; 1720 } 1721 1722 // Hexadecimal 1723 if (Tok.startswith_lower("0x")) 1724 return !Tok.substr(2).getAsInteger(16, Result); 1725 if (Tok.endswith_lower("H")) 1726 return !Tok.drop_back().getAsInteger(16, Result); 1727 1728 // Decimal 1729 int Suffix = 1; 1730 if (Tok.endswith_lower("K")) { 1731 Suffix = 1024; 1732 Tok = Tok.drop_back(); 1733 } else if (Tok.endswith_lower("M")) { 1734 Suffix = 1024 * 1024; 1735 Tok = Tok.drop_back(); 1736 } 1737 if (Tok.getAsInteger(10, Result)) 1738 return false; 1739 Result *= Suffix; 1740 return true; 1741 } 1742 1743 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 1744 int Size = StringSwitch<unsigned>(Tok) 1745 .Case("BYTE", 1) 1746 .Case("SHORT", 2) 1747 .Case("LONG", 4) 1748 .Case("QUAD", 8) 1749 .Default(-1); 1750 if (Size == -1) 1751 return nullptr; 1752 1753 return new BytesDataCommand(readParenExpr(), Size); 1754 } 1755 1756 StringRef ScriptParser::readParenLiteral() { 1757 expect("("); 1758 StringRef Tok = next(); 1759 expect(")"); 1760 return Tok; 1761 } 1762 1763 Expr ScriptParser::readPrimary() { 1764 if (peek() == "(") 1765 return readParenExpr(); 1766 1767 StringRef Tok = next(); 1768 std::string Location = getCurrentLocation(); 1769 1770 if (Tok == "~") { 1771 Expr E = readPrimary(); 1772 return [=] { return bitNot(E()); }; 1773 } 1774 if (Tok == "-") { 1775 Expr E = readPrimary(); 1776 return [=] { return minus(E()); }; 1777 } 1778 1779 // Built-in functions are parsed here. 1780 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 1781 if (Tok == "ABSOLUTE") { 1782 Expr Inner = readParenExpr(); 1783 return [=] { 1784 ExprValue I = Inner(); 1785 I.ForceAbsolute = true; 1786 return I; 1787 }; 1788 } 1789 if (Tok == "ADDR") { 1790 StringRef Name = readParenLiteral(); 1791 return [=]() -> ExprValue { 1792 return {Script->getOutputSection(Location, Name), 0}; 1793 }; 1794 } 1795 if (Tok == "ALIGN") { 1796 expect("("); 1797 Expr E = readExpr(); 1798 if (consume(",")) { 1799 Expr E2 = readExpr(); 1800 expect(")"); 1801 return [=] { return alignTo(E().getValue(), E2().getValue()); }; 1802 } 1803 expect(")"); 1804 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 1805 } 1806 if (Tok == "ALIGNOF") { 1807 StringRef Name = readParenLiteral(); 1808 return [=] { return Script->getOutputSection(Location, Name)->Alignment; }; 1809 } 1810 if (Tok == "ASSERT") 1811 return readAssert(); 1812 if (Tok == "CONSTANT") { 1813 StringRef Name = readParenLiteral(); 1814 return [=] { return getConstant(Name); }; 1815 } 1816 if (Tok == "DATA_SEGMENT_ALIGN") { 1817 expect("("); 1818 Expr E = readExpr(); 1819 expect(","); 1820 readExpr(); 1821 expect(")"); 1822 return [=] { return alignTo(Script->getDot(), E().getValue()); }; 1823 } 1824 if (Tok == "DATA_SEGMENT_END") { 1825 expect("("); 1826 expect("."); 1827 expect(")"); 1828 return [] { return Script->getDot(); }; 1829 } 1830 if (Tok == "DATA_SEGMENT_RELRO_END") { 1831 // GNU linkers implements more complicated logic to handle 1832 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and 1833 // just align to the next page boundary for simplicity. 1834 expect("("); 1835 readExpr(); 1836 expect(","); 1837 readExpr(); 1838 expect(")"); 1839 return [] { return alignTo(Script->getDot(), Target->PageSize); }; 1840 } 1841 if (Tok == "DEFINED") { 1842 StringRef Name = readParenLiteral(); 1843 return [=] { return Script->isDefined(Name) ? 1 : 0; }; 1844 } 1845 if (Tok == "LOADADDR") { 1846 StringRef Name = readParenLiteral(); 1847 return [=] { return Script->getOutputSection(Location, Name)->getLMA(); }; 1848 } 1849 if (Tok == "SEGMENT_START") { 1850 expect("("); 1851 skip(); 1852 expect(","); 1853 Expr E = readExpr(); 1854 expect(")"); 1855 return [=] { return E(); }; 1856 } 1857 if (Tok == "SIZEOF") { 1858 StringRef Name = readParenLiteral(); 1859 return [=] { return Script->getOutputSectionSize(Name); }; 1860 } 1861 if (Tok == "SIZEOF_HEADERS") 1862 return [=] { return elf::getHeaderSize(); }; 1863 1864 // Tok is a literal number. 1865 uint64_t V; 1866 if (readInteger(Tok, V)) 1867 return [=] { return V; }; 1868 1869 // Tok is a symbol name. 1870 if (Tok != ".") { 1871 if (!isValidCIdentifier(Tok)) 1872 setError("malformed number: " + Tok); 1873 Script->Opt.UndefinedSymbols.push_back(Tok); 1874 } 1875 return [=] { return Script->getSymbolValue(Location, Tok); }; 1876 } 1877 1878 Expr ScriptParser::readTernary(Expr Cond) { 1879 Expr L = readExpr(); 1880 expect(":"); 1881 Expr R = readExpr(); 1882 return [=] { return Cond().getValue() ? L() : R(); }; 1883 } 1884 1885 Expr ScriptParser::readParenExpr() { 1886 expect("("); 1887 Expr E = readExpr(); 1888 expect(")"); 1889 return E; 1890 } 1891 1892 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1893 std::vector<StringRef> Phdrs; 1894 while (!Error && peek().startswith(":")) { 1895 StringRef Tok = next(); 1896 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 1897 } 1898 return Phdrs; 1899 } 1900 1901 // Read a program header type name. The next token must be a 1902 // name of a program header type or a constant (e.g. "0x3"). 1903 unsigned ScriptParser::readPhdrType() { 1904 StringRef Tok = next(); 1905 uint64_t Val; 1906 if (readInteger(Tok, Val)) 1907 return Val; 1908 1909 unsigned Ret = StringSwitch<unsigned>(Tok) 1910 .Case("PT_NULL", PT_NULL) 1911 .Case("PT_LOAD", PT_LOAD) 1912 .Case("PT_DYNAMIC", PT_DYNAMIC) 1913 .Case("PT_INTERP", PT_INTERP) 1914 .Case("PT_NOTE", PT_NOTE) 1915 .Case("PT_SHLIB", PT_SHLIB) 1916 .Case("PT_PHDR", PT_PHDR) 1917 .Case("PT_TLS", PT_TLS) 1918 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1919 .Case("PT_GNU_STACK", PT_GNU_STACK) 1920 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1921 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1922 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1923 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1924 .Default(-1); 1925 1926 if (Ret == (unsigned)-1) { 1927 setError("invalid program header type: " + Tok); 1928 return PT_NULL; 1929 } 1930 return Ret; 1931 } 1932 1933 // Reads an anonymous version declaration. 1934 void ScriptParser::readAnonymousDeclaration() { 1935 std::vector<SymbolVersion> Locals; 1936 std::vector<SymbolVersion> Globals; 1937 std::tie(Locals, Globals) = readSymbols(); 1938 1939 for (SymbolVersion V : Locals) { 1940 if (V.Name == "*") 1941 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1942 else 1943 Config->VersionScriptLocals.push_back(V); 1944 } 1945 1946 for (SymbolVersion V : Globals) 1947 Config->VersionScriptGlobals.push_back(V); 1948 1949 expect(";"); 1950 } 1951 1952 // Reads a non-anonymous version definition, 1953 // e.g. "VerStr { global: foo; bar; local: *; };". 1954 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1955 // Read a symbol list. 1956 std::vector<SymbolVersion> Locals; 1957 std::vector<SymbolVersion> Globals; 1958 std::tie(Locals, Globals) = readSymbols(); 1959 1960 for (SymbolVersion V : Locals) { 1961 if (V.Name == "*") 1962 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1963 else 1964 Config->VersionScriptLocals.push_back(V); 1965 } 1966 1967 // Create a new version definition and add that to the global symbols. 1968 VersionDefinition Ver; 1969 Ver.Name = VerStr; 1970 Ver.Globals = Globals; 1971 1972 // User-defined version number starts from 2 because 0 and 1 are 1973 // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. 1974 Ver.Id = Config->VersionDefinitions.size() + 2; 1975 Config->VersionDefinitions.push_back(Ver); 1976 1977 // Each version may have a parent version. For example, "Ver2" 1978 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1979 // as a parent. This version hierarchy is, probably against your 1980 // instinct, purely for hint; the runtime doesn't care about it 1981 // at all. In LLD, we simply ignore it. 1982 if (peek() != ";") 1983 skip(); 1984 expect(";"); 1985 } 1986 1987 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1988 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1989 ScriptParser::readSymbols() { 1990 std::vector<SymbolVersion> Locals; 1991 std::vector<SymbolVersion> Globals; 1992 std::vector<SymbolVersion> *V = &Globals; 1993 1994 while (!Error) { 1995 if (consume("}")) 1996 break; 1997 if (consumeLabel("local")) { 1998 V = &Locals; 1999 continue; 2000 } 2001 if (consumeLabel("global")) { 2002 V = &Globals; 2003 continue; 2004 } 2005 2006 if (consume("extern")) { 2007 std::vector<SymbolVersion> Ext = readVersionExtern(); 2008 V->insert(V->end(), Ext.begin(), Ext.end()); 2009 } else { 2010 StringRef Tok = next(); 2011 V->push_back({unquote(Tok), false, hasWildcard(Tok)}); 2012 } 2013 expect(";"); 2014 } 2015 return {Locals, Globals}; 2016 } 2017 2018 // Reads an "extern C++" directive, e.g., 2019 // "extern "C++" { ns::*; "f(int, double)"; };" 2020 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 2021 StringRef Tok = next(); 2022 bool IsCXX = Tok == "\"C++\""; 2023 if (!IsCXX && Tok != "\"C\"") 2024 setError("Unknown language"); 2025 expect("{"); 2026 2027 std::vector<SymbolVersion> Ret; 2028 while (!Error && peek() != "}") { 2029 StringRef Tok = next(); 2030 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 2031 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 2032 expect(";"); 2033 } 2034 2035 expect("}"); 2036 return Ret; 2037 } 2038 2039 uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2, 2040 StringRef S3) { 2041 if (!(consume(S1) || consume(S2) || consume(S3))) { 2042 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 2043 return 0; 2044 } 2045 expect("="); 2046 2047 // TODO: Fully support constant expressions. 2048 uint64_t Val; 2049 if (!readInteger(next(), Val)) 2050 setError("nonconstant expression for " + S1); 2051 return Val; 2052 } 2053 2054 // Parse the MEMORY command as specified in: 2055 // https://sourceware.org/binutils/docs/ld/MEMORY.html 2056 // 2057 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 2058 void ScriptParser::readMemory() { 2059 expect("{"); 2060 while (!Error && !consume("}")) { 2061 StringRef Name = next(); 2062 2063 uint32_t Flags = 0; 2064 uint32_t NegFlags = 0; 2065 if (consume("(")) { 2066 std::tie(Flags, NegFlags) = readMemoryAttributes(); 2067 expect(")"); 2068 } 2069 expect(":"); 2070 2071 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 2072 expect(","); 2073 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 2074 2075 // Add the memory region to the region map (if it doesn't already exist). 2076 auto It = Script->Opt.MemoryRegions.find(Name); 2077 if (It != Script->Opt.MemoryRegions.end()) 2078 setError("region '" + Name + "' already defined"); 2079 else 2080 Script->Opt.MemoryRegions[Name] = {Name, Origin, Length, 2081 Origin, Flags, NegFlags}; 2082 } 2083 } 2084 2085 // This function parses the attributes used to match against section 2086 // flags when placing output sections in a memory region. These flags 2087 // are only used when an explicit memory region name is not used. 2088 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 2089 uint32_t Flags = 0; 2090 uint32_t NegFlags = 0; 2091 bool Invert = false; 2092 2093 for (char C : next().lower()) { 2094 uint32_t Flag = 0; 2095 if (C == '!') 2096 Invert = !Invert; 2097 else if (C == 'w') 2098 Flag = SHF_WRITE; 2099 else if (C == 'x') 2100 Flag = SHF_EXECINSTR; 2101 else if (C == 'a') 2102 Flag = SHF_ALLOC; 2103 else if (C != 'r') 2104 setError("invalid memory region attribute"); 2105 2106 if (Invert) 2107 NegFlags |= Flag; 2108 else 2109 Flags |= Flag; 2110 } 2111 return {Flags, NegFlags}; 2112 } 2113 2114 void elf::readLinkerScript(MemoryBufferRef MB) { 2115 ScriptParser(MB).readLinkerScript(); 2116 } 2117 2118 void elf::readVersionScript(MemoryBufferRef MB) { 2119 ScriptParser(MB).readVersionScript(); 2120 } 2121 2122 void elf::readDynamicList(MemoryBufferRef MB) { 2123 ScriptParser(MB).readDynamicList(); 2124 } 2125