1 //===- LinkerScript.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the parser/evaluator of the linker script. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LinkerScript.h" 15 #include "Config.h" 16 #include "Driver.h" 17 #include "InputSection.h" 18 #include "Memory.h" 19 #include "OutputSections.h" 20 #include "ScriptLexer.h" 21 #include "Strings.h" 22 #include "SymbolTable.h" 23 #include "Symbols.h" 24 #include "SyntheticSections.h" 25 #include "Target.h" 26 #include "Writer.h" 27 #include "llvm/ADT/STLExtras.h" 28 #include "llvm/ADT/SmallString.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/ADT/StringSwitch.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/ELF.h" 33 #include "llvm/Support/Endian.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/FileSystem.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/Path.h" 38 #include <algorithm> 39 #include <cassert> 40 #include <cstddef> 41 #include <cstdint> 42 #include <iterator> 43 #include <limits> 44 #include <memory> 45 #include <string> 46 #include <tuple> 47 #include <vector> 48 49 using namespace llvm; 50 using namespace llvm::ELF; 51 using namespace llvm::object; 52 using namespace llvm::support::endian; 53 using namespace lld; 54 using namespace lld::elf; 55 56 LinkerScriptBase *elf::ScriptBase; 57 ScriptConfiguration *elf::ScriptConfig; 58 59 template <class ELFT> static SymbolBody *addRegular(SymbolAssignment *Cmd) { 60 Symbol *Sym; 61 uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 62 std::tie(Sym, std::ignore) = Symtab<ELFT>::X->insert( 63 Cmd->Name, /*Type*/ 0, Visibility, /*CanOmitFromDynSym*/ false, 64 /*File*/ nullptr); 65 Sym->Binding = STB_GLOBAL; 66 SectionBase *Sec = 67 Cmd->Expression.IsAbsolute() ? nullptr : Cmd->Expression.Section(); 68 replaceBody<DefinedRegular>(Sym, Cmd->Name, /*IsLocal=*/false, Visibility, 69 STT_NOTYPE, 0, 0, Sec, nullptr); 70 return Sym->body(); 71 } 72 73 static bool isUnderSysroot(StringRef Path) { 74 if (Config->Sysroot == "") 75 return false; 76 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 77 if (sys::fs::equivalent(Config->Sysroot, Path)) 78 return true; 79 return false; 80 } 81 82 OutputSection *LinkerScriptBase::getOutputSection(const Twine &Loc, 83 StringRef Name) { 84 static OutputSection FakeSec("", 0, 0); 85 86 for (OutputSection *Sec : *OutputSections) 87 if (Sec->Name == Name) 88 return Sec; 89 90 error(Loc + ": undefined section " + Name); 91 return &FakeSec; 92 } 93 94 // This function is essentially the same as getOutputSection(Name)->Size, 95 // but it won't print out an error message if a given section is not found. 96 // 97 // Linker script does not create an output section if its content is empty. 98 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 99 // be empty. That is why this function is different from getOutputSection(). 100 uint64_t LinkerScriptBase::getOutputSectionSize(StringRef Name) { 101 for (OutputSection *Sec : *OutputSections) 102 if (Sec->Name == Name) 103 return Sec->Size; 104 return 0; 105 } 106 107 void LinkerScriptBase::setDot(Expr E, const Twine &Loc, bool InSec) { 108 uint64_t Val = E(); 109 if (Val < Dot) { 110 if (InSec) 111 error(Loc + ": unable to move location counter backward for: " + 112 CurOutSec->Name); 113 else 114 error(Loc + ": unable to move location counter backward"); 115 } 116 Dot = Val; 117 // Update to location counter means update to section size. 118 if (InSec) 119 CurOutSec->Size = Dot - CurOutSec->Addr; 120 } 121 122 // Sets value of a symbol. Two kinds of symbols are processed: synthetic 123 // symbols, whose value is an offset from beginning of section and regular 124 // symbols whose value is absolute. 125 void LinkerScriptBase::assignSymbol(SymbolAssignment *Cmd, bool InSec) { 126 if (Cmd->Name == ".") { 127 setDot(Cmd->Expression, Cmd->Location, InSec); 128 return; 129 } 130 131 if (!Cmd->Sym) 132 return; 133 134 auto *Sym = cast<DefinedRegular>(Cmd->Sym); 135 Sym->Value = Cmd->Expression(); 136 if (!Cmd->Expression.IsAbsolute()) { 137 Sym->Section = Cmd->Expression.Section(); 138 if (auto *Sec = dyn_cast_or_null<OutputSection>(Sym->Section)) 139 if (Sec->Flags & SHF_ALLOC) 140 Sym->Value -= Sec->Addr; 141 } 142 } 143 144 template <class ELFT> 145 void LinkerScript<ELFT>::addSymbol(SymbolAssignment *Cmd) { 146 if (Cmd->Name == ".") 147 return; 148 149 // If a symbol was in PROVIDE(), we need to define it only when 150 // it is a referenced undefined symbol. 151 SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name); 152 if (Cmd->Provide && (!B || B->isDefined())) 153 return; 154 155 Cmd->Sym = addRegular<ELFT>(Cmd); 156 157 // If there are sections, then let the value be assigned later in 158 // `assignAddresses`. 159 if (!ScriptConfig->HasSections) 160 assignSymbol(Cmd); 161 } 162 163 bool SymbolAssignment::classof(const BaseCommand *C) { 164 return C->Kind == AssignmentKind; 165 } 166 167 bool OutputSectionCommand::classof(const BaseCommand *C) { 168 return C->Kind == OutputSectionKind; 169 } 170 171 bool InputSectionDescription::classof(const BaseCommand *C) { 172 return C->Kind == InputSectionKind; 173 } 174 175 bool AssertCommand::classof(const BaseCommand *C) { 176 return C->Kind == AssertKind; 177 } 178 179 bool BytesDataCommand::classof(const BaseCommand *C) { 180 return C->Kind == BytesDataKind; 181 } 182 183 template <class ELFT> LinkerScript<ELFT>::LinkerScript() = default; 184 template <class ELFT> LinkerScript<ELFT>::~LinkerScript() = default; 185 186 static StringRef basename(InputSectionBase *S) { 187 if (S->File) 188 return sys::path::filename(S->File->getName()); 189 return ""; 190 } 191 192 bool LinkerScriptBase::shouldKeep(InputSectionBase *S) { 193 for (InputSectionDescription *ID : Opt.KeptSections) 194 if (ID->FilePat.match(basename(S))) 195 for (SectionPattern &P : ID->SectionPatterns) 196 if (P.SectionPat.match(S->Name)) 197 return true; 198 return false; 199 } 200 201 static bool comparePriority(InputSectionBase *A, InputSectionBase *B) { 202 return getPriority(A->Name) < getPriority(B->Name); 203 } 204 205 static bool compareName(InputSectionBase *A, InputSectionBase *B) { 206 return A->Name < B->Name; 207 } 208 209 static bool compareAlignment(InputSectionBase *A, InputSectionBase *B) { 210 // ">" is not a mistake. Larger alignments are placed before smaller 211 // alignments in order to reduce the amount of padding necessary. 212 // This is compatible with GNU. 213 return A->Alignment > B->Alignment; 214 } 215 216 static std::function<bool(InputSectionBase *, InputSectionBase *)> 217 getComparator(SortSectionPolicy K) { 218 switch (K) { 219 case SortSectionPolicy::Alignment: 220 return compareAlignment; 221 case SortSectionPolicy::Name: 222 return compareName; 223 case SortSectionPolicy::Priority: 224 return comparePriority; 225 default: 226 llvm_unreachable("unknown sort policy"); 227 } 228 } 229 230 static bool matchConstraints(ArrayRef<InputSectionBase *> Sections, 231 ConstraintKind Kind) { 232 if (Kind == ConstraintKind::NoConstraint) 233 return true; 234 bool IsRW = llvm::any_of(Sections, [=](InputSectionBase *Sec2) { 235 auto *Sec = static_cast<InputSectionBase *>(Sec2); 236 return Sec->Flags & SHF_WRITE; 237 }); 238 return (IsRW && Kind == ConstraintKind::ReadWrite) || 239 (!IsRW && Kind == ConstraintKind::ReadOnly); 240 } 241 242 static void sortSections(InputSectionBase **Begin, InputSectionBase **End, 243 SortSectionPolicy K) { 244 if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) 245 std::stable_sort(Begin, End, getComparator(K)); 246 } 247 248 // Compute and remember which sections the InputSectionDescription matches. 249 void LinkerScriptBase::computeInputSections(InputSectionDescription *I) { 250 // Collects all sections that satisfy constraints of I 251 // and attach them to I. 252 for (SectionPattern &Pat : I->SectionPatterns) { 253 size_t SizeBefore = I->Sections.size(); 254 255 for (InputSectionBase *S : InputSections) { 256 if (S->Assigned) 257 continue; 258 // For -emit-relocs we have to ignore entries like 259 // .rela.dyn : { *(.rela.data) } 260 // which are common because they are in the default bfd script. 261 if (S->Type == SHT_REL || S->Type == SHT_RELA) 262 continue; 263 264 StringRef Filename = basename(S); 265 if (!I->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename)) 266 continue; 267 if (!Pat.SectionPat.match(S->Name)) 268 continue; 269 I->Sections.push_back(S); 270 S->Assigned = true; 271 } 272 273 // Sort sections as instructed by SORT-family commands and --sort-section 274 // option. Because SORT-family commands can be nested at most two depth 275 // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command 276 // line option is respected even if a SORT command is given, the exact 277 // behavior we have here is a bit complicated. Here are the rules. 278 // 279 // 1. If two SORT commands are given, --sort-section is ignored. 280 // 2. If one SORT command is given, and if it is not SORT_NONE, 281 // --sort-section is handled as an inner SORT command. 282 // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. 283 // 4. If no SORT command is given, sort according to --sort-section. 284 InputSectionBase **Begin = I->Sections.data() + SizeBefore; 285 InputSectionBase **End = I->Sections.data() + I->Sections.size(); 286 if (Pat.SortOuter != SortSectionPolicy::None) { 287 if (Pat.SortInner == SortSectionPolicy::Default) 288 sortSections(Begin, End, Config->SortSection); 289 else 290 sortSections(Begin, End, Pat.SortInner); 291 sortSections(Begin, End, Pat.SortOuter); 292 } 293 } 294 } 295 296 template <class ELFT> 297 void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase *> V) { 298 for (InputSectionBase *S : V) { 299 S->Live = false; 300 if (S == In<ELFT>::ShStrTab) 301 error("discarding .shstrtab section is not allowed"); 302 discard(S->DependentSections); 303 } 304 } 305 306 std::vector<InputSectionBase *> 307 LinkerScriptBase::createInputSectionList(OutputSectionCommand &OutCmd) { 308 std::vector<InputSectionBase *> Ret; 309 310 for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { 311 auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); 312 if (!Cmd) 313 continue; 314 computeInputSections(Cmd); 315 for (InputSectionBase *S : Cmd->Sections) 316 Ret.push_back(static_cast<InputSectionBase *>(S)); 317 } 318 319 return Ret; 320 } 321 322 template <class ELFT> 323 void LinkerScript<ELFT>::processCommands(OutputSectionFactory &Factory) { 324 // A symbol can be assigned before any section is mentioned in the linker 325 // script. In an DSO, the symbol values are addresses, so the only important 326 // section values are: 327 // * SHN_UNDEF 328 // * SHN_ABS 329 // * Any value meaning a regular section. 330 // To handle that, create a dummy aether section that fills the void before 331 // the linker scripts switches to another section. It has an index of one 332 // which will map to whatever the first actual section is. 333 Aether = make<OutputSection>("", 0, SHF_ALLOC); 334 Aether->SectionIndex = 1; 335 CurOutSec = Aether; 336 337 for (unsigned I = 0; I < Opt.Commands.size(); ++I) { 338 auto Iter = Opt.Commands.begin() + I; 339 const std::unique_ptr<BaseCommand> &Base1 = *Iter; 340 341 // Handle symbol assignments outside of any output section. 342 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { 343 addSymbol(Cmd); 344 continue; 345 } 346 347 if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) { 348 // If we don't have SECTIONS then output sections have already been 349 // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses 350 // will not be called, so ASSERT should be evaluated now. 351 if (!Opt.HasSections) 352 Cmd->Expression(); 353 continue; 354 } 355 356 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { 357 std::vector<InputSectionBase *> V = createInputSectionList(*Cmd); 358 359 // The output section name `/DISCARD/' is special. 360 // Any input section assigned to it is discarded. 361 if (Cmd->Name == "/DISCARD/") { 362 discard(V); 363 continue; 364 } 365 366 // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive 367 // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input 368 // sections satisfy a given constraint. If not, a directive is handled 369 // as if it wasn't present from the beginning. 370 // 371 // Because we'll iterate over Commands many more times, the easiest 372 // way to "make it as if it wasn't present" is to just remove it. 373 if (!matchConstraints(V, Cmd->Constraint)) { 374 for (InputSectionBase *S : V) 375 S->Assigned = false; 376 Opt.Commands.erase(Iter); 377 --I; 378 continue; 379 } 380 381 // A directive may contain symbol definitions like this: 382 // ".foo : { ...; bar = .; }". Handle them. 383 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 384 if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) 385 addSymbol(OutCmd); 386 387 // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign 388 // is given, input sections are aligned to that value, whether the 389 // given value is larger or smaller than the original section alignment. 390 if (Cmd->SubalignExpr) { 391 uint32_t Subalign = Cmd->SubalignExpr(); 392 for (InputSectionBase *S : V) 393 S->Alignment = Subalign; 394 } 395 396 // Add input sections to an output section. 397 for (InputSectionBase *S : V) 398 Factory.addInputSec(S, Cmd->Name); 399 } 400 } 401 CurOutSec = nullptr; 402 } 403 404 // Add sections that didn't match any sections command. 405 void LinkerScriptBase::addOrphanSections(OutputSectionFactory &Factory) { 406 for (InputSectionBase *S : InputSections) 407 if (S->Live && !S->OutSec) 408 Factory.addInputSec(S, getOutputSectionName(S->Name)); 409 } 410 411 static bool isTbss(OutputSection *Sec) { 412 return (Sec->Flags & SHF_TLS) && Sec->Type == SHT_NOBITS; 413 } 414 415 void LinkerScriptBase::output(InputSection *S) { 416 if (!AlreadyOutputIS.insert(S).second) 417 return; 418 bool IsTbss = isTbss(CurOutSec); 419 420 uint64_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; 421 Pos = alignTo(Pos, S->Alignment); 422 S->OutSecOff = Pos - CurOutSec->Addr; 423 Pos += S->getSize(); 424 425 // Update output section size after adding each section. This is so that 426 // SIZEOF works correctly in the case below: 427 // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } 428 CurOutSec->Size = Pos - CurOutSec->Addr; 429 430 // If there is a memory region associated with this input section, then 431 // place the section in that region and update the region index. 432 if (CurMemRegion) { 433 CurMemRegion->Offset += CurOutSec->Size; 434 uint64_t CurSize = CurMemRegion->Offset - CurMemRegion->Origin; 435 if (CurSize > CurMemRegion->Length) { 436 uint64_t OverflowAmt = CurSize - CurMemRegion->Length; 437 error("section '" + CurOutSec->Name + "' will not fit in region '" + 438 CurMemRegion->Name + "': overflowed by " + Twine(OverflowAmt) + 439 " bytes"); 440 } 441 } 442 443 if (IsTbss) 444 ThreadBssOffset = Pos - Dot; 445 else 446 Dot = Pos; 447 } 448 449 void LinkerScriptBase::flush() { 450 assert(CurOutSec); 451 if (!AlreadyOutputOS.insert(CurOutSec).second) 452 return; 453 for (InputSection *I : CurOutSec->Sections) 454 output(I); 455 } 456 457 void LinkerScriptBase::switchTo(OutputSection *Sec) { 458 if (CurOutSec == Sec) 459 return; 460 if (AlreadyOutputOS.count(Sec)) 461 return; 462 463 CurOutSec = Sec; 464 465 Dot = alignTo(Dot, CurOutSec->Alignment); 466 CurOutSec->Addr = isTbss(CurOutSec) ? Dot + ThreadBssOffset : Dot; 467 468 // If neither AT nor AT> is specified for an allocatable section, the linker 469 // will set the LMA such that the difference between VMA and LMA for the 470 // section is the same as the preceding output section in the same region 471 // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html 472 if (LMAOffset) 473 CurOutSec->LMAOffset = LMAOffset(); 474 } 475 476 void LinkerScriptBase::process(BaseCommand &Base) { 477 // This handles the assignments to symbol or to a location counter (.) 478 if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { 479 assignSymbol(AssignCmd, true); 480 return; 481 } 482 483 // Handle BYTE(), SHORT(), LONG(), or QUAD(). 484 if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) { 485 DataCmd->Offset = Dot - CurOutSec->Addr; 486 Dot += DataCmd->Size; 487 CurOutSec->Size = Dot - CurOutSec->Addr; 488 return; 489 } 490 491 if (auto *AssertCmd = dyn_cast<AssertCommand>(&Base)) { 492 AssertCmd->Expression(); 493 return; 494 } 495 496 // It handles single input section description command, 497 // calculates and assigns the offsets for each section and also 498 // updates the output section size. 499 auto &ICmd = cast<InputSectionDescription>(Base); 500 for (InputSectionBase *IB : ICmd.Sections) { 501 // We tentatively added all synthetic sections at the beginning and removed 502 // empty ones afterwards (because there is no way to know whether they were 503 // going be empty or not other than actually running linker scripts.) 504 // We need to ignore remains of empty sections. 505 if (auto *Sec = dyn_cast<SyntheticSection>(IB)) 506 if (Sec->empty()) 507 continue; 508 509 if (!IB->Live) 510 continue; 511 assert(CurOutSec == IB->OutSec || AlreadyOutputOS.count(IB->OutSec)); 512 output(cast<InputSection>(IB)); 513 } 514 } 515 516 static OutputSection * 517 findSection(StringRef Name, const std::vector<OutputSection *> &Sections) { 518 auto End = Sections.end(); 519 auto HasName = [=](OutputSection *Sec) { return Sec->Name == Name; }; 520 auto I = std::find_if(Sections.begin(), End, HasName); 521 std::vector<OutputSection *> Ret; 522 if (I == End) 523 return nullptr; 524 assert(std::find_if(I + 1, End, HasName) == End); 525 return *I; 526 } 527 528 // This function searches for a memory region to place the given output 529 // section in. If found, a pointer to the appropriate memory region is 530 // returned. Otherwise, a nullptr is returned. 531 MemoryRegion *LinkerScriptBase::findMemoryRegion(OutputSectionCommand *Cmd, 532 OutputSection *Sec) { 533 // If a memory region name was specified in the output section command, 534 // then try to find that region first. 535 if (!Cmd->MemoryRegionName.empty()) { 536 auto It = Opt.MemoryRegions.find(Cmd->MemoryRegionName); 537 if (It != Opt.MemoryRegions.end()) 538 return &It->second; 539 error("memory region '" + Cmd->MemoryRegionName + "' not declared"); 540 return nullptr; 541 } 542 543 // The memory region name is empty, thus a suitable region must be 544 // searched for in the region map. If the region map is empty, just 545 // return. Note that this check doesn't happen at the very beginning 546 // so that uses of undeclared regions can be caught. 547 if (!Opt.MemoryRegions.size()) 548 return nullptr; 549 550 // See if a region can be found by matching section flags. 551 for (auto &MRI : Opt.MemoryRegions) { 552 MemoryRegion &MR = MRI.second; 553 if ((MR.Flags & Sec->Flags) != 0 && (MR.NegFlags & Sec->Flags) == 0) 554 return &MR; 555 } 556 557 // Otherwise, no suitable region was found. 558 if (Sec->Flags & SHF_ALLOC) 559 error("no memory region specified for section '" + Sec->Name + "'"); 560 return nullptr; 561 } 562 563 // This function assigns offsets to input sections and an output section 564 // for a single sections command (e.g. ".text { *(.text); }"). 565 void LinkerScriptBase::assignOffsets(OutputSectionCommand *Cmd) { 566 OutputSection *Sec = findSection(Cmd->Name, *OutputSections); 567 if (!Sec) 568 return; 569 570 if (Cmd->AddrExpr && Sec->Flags & SHF_ALLOC) 571 setDot(Cmd->AddrExpr, Cmd->Location); 572 573 if (Cmd->LMAExpr) { 574 uint64_t D = Dot; 575 LMAOffset = [=] { return Cmd->LMAExpr() - D; }; 576 } 577 578 // Handle align (e.g. ".foo : ALIGN(16) { ... }"). 579 if (Cmd->AlignExpr) 580 Sec->updateAlignment(Cmd->AlignExpr()); 581 582 // Try and find an appropriate memory region to assign offsets in. 583 CurMemRegion = findMemoryRegion(Cmd, Sec); 584 if (CurMemRegion) 585 Dot = CurMemRegion->Offset; 586 switchTo(Sec); 587 588 // Find the last section output location. We will output orphan sections 589 // there so that end symbols point to the correct location. 590 auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), 591 [](const std::unique_ptr<BaseCommand> &Cmd) { 592 return !isa<SymbolAssignment>(*Cmd); 593 }) 594 .base(); 595 for (auto I = Cmd->Commands.begin(); I != E; ++I) 596 process(**I); 597 flush(); 598 std::for_each(E, Cmd->Commands.end(), 599 [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); 600 } 601 602 void LinkerScriptBase::removeEmptyCommands() { 603 // It is common practice to use very generic linker scripts. So for any 604 // given run some of the output sections in the script will be empty. 605 // We could create corresponding empty output sections, but that would 606 // clutter the output. 607 // We instead remove trivially empty sections. The bfd linker seems even 608 // more aggressive at removing them. 609 auto Pos = std::remove_if( 610 Opt.Commands.begin(), Opt.Commands.end(), 611 [&](const std::unique_ptr<BaseCommand> &Base) { 612 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 613 return !findSection(Cmd->Name, *OutputSections); 614 return false; 615 }); 616 Opt.Commands.erase(Pos, Opt.Commands.end()); 617 } 618 619 static bool isAllSectionDescription(const OutputSectionCommand &Cmd) { 620 for (const std::unique_ptr<BaseCommand> &I : Cmd.Commands) 621 if (!isa<InputSectionDescription>(*I)) 622 return false; 623 return true; 624 } 625 626 void LinkerScriptBase::adjustSectionsBeforeSorting() { 627 // If the output section contains only symbol assignments, create a 628 // corresponding output section. The bfd linker seems to only create them if 629 // '.' is assigned to, but creating these section should not have any bad 630 // consequeces and gives us a section to put the symbol in. 631 uint64_t Flags = SHF_ALLOC; 632 uint32_t Type = SHT_NOBITS; 633 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 634 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 635 if (!Cmd) 636 continue; 637 if (OutputSection *Sec = findSection(Cmd->Name, *OutputSections)) { 638 Flags = Sec->Flags; 639 Type = Sec->Type; 640 continue; 641 } 642 643 if (isAllSectionDescription(*Cmd)) 644 continue; 645 646 auto *OutSec = make<OutputSection>(Cmd->Name, Type, Flags); 647 OutputSections->push_back(OutSec); 648 } 649 } 650 651 void LinkerScriptBase::adjustSectionsAfterSorting() { 652 placeOrphanSections(); 653 654 // If output section command doesn't specify any segments, 655 // and we haven't previously assigned any section to segment, 656 // then we simply assign section to the very first load segment. 657 // Below is an example of such linker script: 658 // PHDRS { seg PT_LOAD; } 659 // SECTIONS { .aaa : { *(.aaa) } } 660 std::vector<StringRef> DefPhdrs; 661 auto FirstPtLoad = 662 std::find_if(Opt.PhdrsCommands.begin(), Opt.PhdrsCommands.end(), 663 [](const PhdrsCommand &Cmd) { return Cmd.Type == PT_LOAD; }); 664 if (FirstPtLoad != Opt.PhdrsCommands.end()) 665 DefPhdrs.push_back(FirstPtLoad->Name); 666 667 // Walk the commands and propagate the program headers to commands that don't 668 // explicitly specify them. 669 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 670 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 671 if (!Cmd) 672 continue; 673 if (Cmd->Phdrs.empty()) 674 Cmd->Phdrs = DefPhdrs; 675 else 676 DefPhdrs = Cmd->Phdrs; 677 } 678 679 removeEmptyCommands(); 680 } 681 682 // When placing orphan sections, we want to place them after symbol assignments 683 // so that an orphan after 684 // begin_foo = .; 685 // foo : { *(foo) } 686 // end_foo = .; 687 // doesn't break the intended meaning of the begin/end symbols. 688 // We don't want to go over sections since Writer<ELFT>::sortSections is the 689 // one in charge of deciding the order of the sections. 690 // We don't want to go over alignments, since doing so in 691 // rx_sec : { *(rx_sec) } 692 // . = ALIGN(0x1000); 693 // /* The RW PT_LOAD starts here*/ 694 // rw_sec : { *(rw_sec) } 695 // would mean that the RW PT_LOAD would become unaligned. 696 static bool shouldSkip(const BaseCommand &Cmd) { 697 if (isa<OutputSectionCommand>(Cmd)) 698 return false; 699 const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); 700 if (!Assign) 701 return true; 702 return Assign->Name != "."; 703 } 704 705 // Orphan sections are sections present in the input files which are 706 // not explicitly placed into the output file by the linker script. 707 // 708 // When the control reaches this function, Opt.Commands contains 709 // output section commands for non-orphan sections only. This function 710 // adds new elements for orphan sections to Opt.Commands so that all 711 // sections are explicitly handled by Opt.Commands. 712 // 713 // Writer<ELFT>::sortSections has already sorted output sections. 714 // What we need to do is to scan OutputSections vector and 715 // Opt.Commands in parallel to find orphan sections. If there is an 716 // output section that doesn't have a corresponding entry in 717 // Opt.Commands, we will insert a new entry to Opt.Commands. 718 // 719 // There is some ambiguity as to where exactly a new entry should be 720 // inserted, because Opt.Commands contains not only output section 721 // commands but other types of commands such as symbol assignment 722 // expressions. There's no correct answer here due to the lack of the 723 // formal specification of the linker script. We use heuristics to 724 // determine whether a new output command should be added before or 725 // after another commands. For the details, look at shouldSkip 726 // function. 727 void LinkerScriptBase::placeOrphanSections() { 728 // The OutputSections are already in the correct order. 729 // This loops creates or moves commands as needed so that they are in the 730 // correct order. 731 int CmdIndex = 0; 732 733 // As a horrible special case, skip the first . assignment if it is before any 734 // section. We do this because it is common to set a load address by starting 735 // the script with ". = 0xabcd" and the expectation is that every section is 736 // after that. 737 auto FirstSectionOrDotAssignment = 738 std::find_if(Opt.Commands.begin(), Opt.Commands.end(), 739 [](const std::unique_ptr<BaseCommand> &Cmd) { 740 if (isa<OutputSectionCommand>(*Cmd)) 741 return true; 742 const auto *Assign = dyn_cast<SymbolAssignment>(Cmd.get()); 743 if (!Assign) 744 return false; 745 return Assign->Name == "."; 746 }); 747 if (FirstSectionOrDotAssignment != Opt.Commands.end()) { 748 CmdIndex = FirstSectionOrDotAssignment - Opt.Commands.begin(); 749 if (isa<SymbolAssignment>(**FirstSectionOrDotAssignment)) 750 ++CmdIndex; 751 } 752 753 for (OutputSection *Sec : *OutputSections) { 754 StringRef Name = Sec->Name; 755 756 // Find the last spot where we can insert a command and still get the 757 // correct result. 758 auto CmdIter = Opt.Commands.begin() + CmdIndex; 759 auto E = Opt.Commands.end(); 760 while (CmdIter != E && shouldSkip(**CmdIter)) { 761 ++CmdIter; 762 ++CmdIndex; 763 } 764 765 auto Pos = 766 std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { 767 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 768 return Cmd && Cmd->Name == Name; 769 }); 770 if (Pos == E) { 771 Opt.Commands.insert(CmdIter, 772 llvm::make_unique<OutputSectionCommand>(Name)); 773 ++CmdIndex; 774 continue; 775 } 776 777 // Continue from where we found it. 778 CmdIndex = (Pos - Opt.Commands.begin()) + 1; 779 } 780 } 781 782 void LinkerScriptBase::assignAddresses(std::vector<PhdrEntry> &Phdrs) { 783 // Assign addresses as instructed by linker script SECTIONS sub-commands. 784 Dot = 0; 785 switchTo(Aether); 786 787 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 788 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { 789 assignSymbol(Cmd); 790 continue; 791 } 792 793 if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { 794 Cmd->Expression(); 795 continue; 796 } 797 798 auto *Cmd = cast<OutputSectionCommand>(Base.get()); 799 assignOffsets(Cmd); 800 } 801 802 uint64_t MinVA = std::numeric_limits<uint64_t>::max(); 803 for (OutputSection *Sec : *OutputSections) { 804 if (Sec->Flags & SHF_ALLOC) 805 MinVA = std::min<uint64_t>(MinVA, Sec->Addr); 806 else 807 Sec->Addr = 0; 808 } 809 810 allocateHeaders(Phdrs, *OutputSections, MinVA); 811 } 812 813 // Creates program headers as instructed by PHDRS linker script command. 814 std::vector<PhdrEntry> LinkerScriptBase::createPhdrs() { 815 std::vector<PhdrEntry> Ret; 816 817 // Process PHDRS and FILEHDR keywords because they are not 818 // real output sections and cannot be added in the following loop. 819 for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) { 820 Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags); 821 PhdrEntry &Phdr = Ret.back(); 822 823 if (Cmd.HasFilehdr) 824 Phdr.add(Out::ElfHeader); 825 if (Cmd.HasPhdrs) 826 Phdr.add(Out::ProgramHeaders); 827 828 if (Cmd.LMAExpr) { 829 Phdr.p_paddr = Cmd.LMAExpr(); 830 Phdr.HasLMA = true; 831 } 832 } 833 834 // Add output sections to program headers. 835 for (OutputSection *Sec : *OutputSections) { 836 if (!(Sec->Flags & SHF_ALLOC)) 837 break; 838 839 // Assign headers specified by linker script 840 for (size_t Id : getPhdrIndices(Sec->Name)) { 841 Ret[Id].add(Sec); 842 if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) 843 Ret[Id].p_flags |= Sec->getPhdrFlags(); 844 } 845 } 846 return Ret; 847 } 848 849 bool LinkerScriptBase::ignoreInterpSection() { 850 // Ignore .interp section in case we have PHDRS specification 851 // and PT_INTERP isn't listed. 852 return !Opt.PhdrsCommands.empty() && 853 llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { 854 return Cmd.Type == PT_INTERP; 855 }) == Opt.PhdrsCommands.end(); 856 } 857 858 uint32_t LinkerScriptBase::getFiller(StringRef Name) { 859 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 860 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 861 if (Cmd->Name == Name) 862 return Cmd->Filler; 863 return 0; 864 } 865 866 template <class ELFT> 867 static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { 868 const endianness E = ELFT::TargetEndianness; 869 870 switch (Size) { 871 case 1: 872 *Buf = (uint8_t)Data; 873 break; 874 case 2: 875 write16<E>(Buf, Data); 876 break; 877 case 4: 878 write32<E>(Buf, Data); 879 break; 880 case 8: 881 write64<E>(Buf, Data); 882 break; 883 default: 884 llvm_unreachable("unsupported Size argument"); 885 } 886 } 887 888 template <class ELFT> 889 void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) { 890 int I = getSectionIndex(Name); 891 if (I == INT_MAX) 892 return; 893 894 auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()); 895 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 896 if (auto *Data = dyn_cast<BytesDataCommand>(Base.get())) 897 writeInt<ELFT>(Buf + Data->Offset, Data->Expression(), Data->Size); 898 } 899 900 bool LinkerScriptBase::hasLMA(StringRef Name) { 901 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 902 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 903 if (Cmd->LMAExpr && Cmd->Name == Name) 904 return true; 905 return false; 906 } 907 908 // Returns the index of the given section name in linker script 909 // SECTIONS commands. Sections are laid out as the same order as they 910 // were in the script. If a given name did not appear in the script, 911 // it returns INT_MAX, so that it will be laid out at end of file. 912 int LinkerScriptBase::getSectionIndex(StringRef Name) { 913 for (int I = 0, E = Opt.Commands.size(); I != E; ++I) 914 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get())) 915 if (Cmd->Name == Name) 916 return I; 917 return INT_MAX; 918 } 919 920 template <class ELFT> 921 uint64_t LinkerScript<ELFT>::getSymbolValue(const Twine &Loc, StringRef S) { 922 if (S == ".") 923 return Dot; 924 if (SymbolBody *B = Symtab<ELFT>::X->find(S)) 925 return B->getVA<ELFT>(); 926 error(Loc + ": symbol not found: " + S); 927 return 0; 928 } 929 930 template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) { 931 return Symtab<ELFT>::X->find(S) != nullptr; 932 } 933 934 template <class ELFT> bool LinkerScript<ELFT>::isAbsolute(StringRef S) { 935 if (S == ".") 936 return false; 937 SymbolBody *Sym = Symtab<ELFT>::X->find(S); 938 auto *DR = dyn_cast_or_null<DefinedRegular>(Sym); 939 return DR && !DR->Section; 940 } 941 942 // Gets section symbol belongs to. Symbol "." doesn't belong to any 943 // specific section but isn't absolute at the same time, so we try 944 // to find suitable section for it as well. 945 template <class ELFT> 946 OutputSection *LinkerScript<ELFT>::getSymbolSection(StringRef S) { 947 if (SymbolBody *Sym = Symtab<ELFT>::X->find(S)) 948 return Sym->getOutputSection<ELFT>(); 949 return CurOutSec; 950 } 951 952 // Returns indices of ELF headers containing specific section, identified 953 // by Name. Each index is a zero based number of ELF header listed within 954 // PHDRS {} script block. 955 std::vector<size_t> LinkerScriptBase::getPhdrIndices(StringRef SectionName) { 956 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 957 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 958 if (!Cmd || Cmd->Name != SectionName) 959 continue; 960 961 std::vector<size_t> Ret; 962 for (StringRef PhdrName : Cmd->Phdrs) 963 Ret.push_back(getPhdrIndex(Cmd->Location, PhdrName)); 964 return Ret; 965 } 966 return {}; 967 } 968 969 size_t LinkerScriptBase::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { 970 size_t I = 0; 971 for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { 972 if (Cmd.Name == PhdrName) 973 return I; 974 ++I; 975 } 976 error(Loc + ": section header '" + PhdrName + "' is not listed in PHDRS"); 977 return 0; 978 } 979 980 class elf::ScriptParser final : public ScriptLexer { 981 typedef void (ScriptParser::*Handler)(); 982 983 public: 984 ScriptParser(MemoryBufferRef MB) 985 : ScriptLexer(MB), 986 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 987 988 void readLinkerScript(); 989 void readVersionScript(); 990 void readDynamicList(); 991 992 private: 993 void addFile(StringRef Path); 994 995 void readAsNeeded(); 996 void readEntry(); 997 void readExtern(); 998 void readGroup(); 999 void readInclude(); 1000 void readMemory(); 1001 void readOutput(); 1002 void readOutputArch(); 1003 void readOutputFormat(); 1004 void readPhdrs(); 1005 void readSearchDir(); 1006 void readSections(); 1007 void readVersion(); 1008 void readVersionScriptCommand(); 1009 1010 SymbolAssignment *readAssignment(StringRef Name); 1011 BytesDataCommand *readBytesDataCommand(StringRef Tok); 1012 uint32_t readFill(); 1013 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 1014 uint32_t readOutputSectionFiller(StringRef Tok); 1015 std::vector<StringRef> readOutputSectionPhdrs(); 1016 InputSectionDescription *readInputSectionDescription(StringRef Tok); 1017 StringMatcher readFilePatterns(); 1018 std::vector<SectionPattern> readInputSectionsList(); 1019 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 1020 unsigned readPhdrType(); 1021 SortSectionPolicy readSortKind(); 1022 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 1023 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 1024 void readSort(); 1025 Expr readAssert(); 1026 1027 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 1028 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 1029 1030 Expr readExpr(); 1031 Expr readExpr1(Expr Lhs, int MinPrec); 1032 StringRef readParenLiteral(); 1033 Expr readPrimary(); 1034 Expr readTernary(Expr Cond); 1035 Expr readParenExpr(); 1036 1037 // For parsing version script. 1038 std::vector<SymbolVersion> readVersionExtern(); 1039 void readAnonymousDeclaration(); 1040 void readVersionDeclaration(StringRef VerStr); 1041 1042 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1043 readSymbols(); 1044 1045 ScriptConfiguration &Opt = *ScriptConfig; 1046 bool IsUnderSysroot; 1047 }; 1048 1049 void ScriptParser::readDynamicList() { 1050 expect("{"); 1051 readAnonymousDeclaration(); 1052 if (!atEOF()) 1053 setError("EOF expected, but got " + next()); 1054 } 1055 1056 void ScriptParser::readVersionScript() { 1057 readVersionScriptCommand(); 1058 if (!atEOF()) 1059 setError("EOF expected, but got " + next()); 1060 } 1061 1062 void ScriptParser::readVersionScriptCommand() { 1063 if (consume("{")) { 1064 readAnonymousDeclaration(); 1065 return; 1066 } 1067 1068 while (!atEOF() && !Error && peek() != "}") { 1069 StringRef VerStr = next(); 1070 if (VerStr == "{") { 1071 setError("anonymous version definition is used in " 1072 "combination with other version definitions"); 1073 return; 1074 } 1075 expect("{"); 1076 readVersionDeclaration(VerStr); 1077 } 1078 } 1079 1080 void ScriptParser::readVersion() { 1081 expect("{"); 1082 readVersionScriptCommand(); 1083 expect("}"); 1084 } 1085 1086 void ScriptParser::readLinkerScript() { 1087 while (!atEOF()) { 1088 StringRef Tok = next(); 1089 if (Tok == ";") 1090 continue; 1091 1092 if (Tok == "ASSERT") { 1093 Opt.Commands.emplace_back(new AssertCommand(readAssert())); 1094 } else if (Tok == "ENTRY") { 1095 readEntry(); 1096 } else if (Tok == "EXTERN") { 1097 readExtern(); 1098 } else if (Tok == "GROUP" || Tok == "INPUT") { 1099 readGroup(); 1100 } else if (Tok == "INCLUDE") { 1101 readInclude(); 1102 } else if (Tok == "MEMORY") { 1103 readMemory(); 1104 } else if (Tok == "OUTPUT") { 1105 readOutput(); 1106 } else if (Tok == "OUTPUT_ARCH") { 1107 readOutputArch(); 1108 } else if (Tok == "OUTPUT_FORMAT") { 1109 readOutputFormat(); 1110 } else if (Tok == "PHDRS") { 1111 readPhdrs(); 1112 } else if (Tok == "SEARCH_DIR") { 1113 readSearchDir(); 1114 } else if (Tok == "SECTIONS") { 1115 readSections(); 1116 } else if (Tok == "VERSION") { 1117 readVersion(); 1118 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 1119 Opt.Commands.emplace_back(Cmd); 1120 } else { 1121 setError("unknown directive: " + Tok); 1122 } 1123 } 1124 } 1125 1126 void ScriptParser::addFile(StringRef S) { 1127 if (IsUnderSysroot && S.startswith("/")) { 1128 SmallString<128> PathData; 1129 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 1130 if (sys::fs::exists(Path)) { 1131 Driver->addFile(Saver.save(Path)); 1132 return; 1133 } 1134 } 1135 1136 if (sys::path::is_absolute(S)) { 1137 Driver->addFile(S); 1138 } else if (S.startswith("=")) { 1139 if (Config->Sysroot.empty()) 1140 Driver->addFile(S.substr(1)); 1141 else 1142 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); 1143 } else if (S.startswith("-l")) { 1144 Driver->addLibrary(S.substr(2)); 1145 } else if (sys::fs::exists(S)) { 1146 Driver->addFile(S); 1147 } else { 1148 if (Optional<std::string> Path = findFromSearchPaths(S)) 1149 Driver->addFile(Saver.save(*Path)); 1150 else 1151 setError("unable to find " + S); 1152 } 1153 } 1154 1155 void ScriptParser::readAsNeeded() { 1156 expect("("); 1157 bool Orig = Config->AsNeeded; 1158 Config->AsNeeded = true; 1159 while (!Error && !consume(")")) 1160 addFile(unquote(next())); 1161 Config->AsNeeded = Orig; 1162 } 1163 1164 void ScriptParser::readEntry() { 1165 // -e <symbol> takes predecence over ENTRY(<symbol>). 1166 expect("("); 1167 StringRef Tok = next(); 1168 if (Config->Entry.empty()) 1169 Config->Entry = Tok; 1170 expect(")"); 1171 } 1172 1173 void ScriptParser::readExtern() { 1174 expect("("); 1175 while (!Error && !consume(")")) 1176 Config->Undefined.push_back(next()); 1177 } 1178 1179 void ScriptParser::readGroup() { 1180 expect("("); 1181 while (!Error && !consume(")")) { 1182 StringRef Tok = next(); 1183 if (Tok == "AS_NEEDED") 1184 readAsNeeded(); 1185 else 1186 addFile(unquote(Tok)); 1187 } 1188 } 1189 1190 void ScriptParser::readInclude() { 1191 StringRef Tok = unquote(next()); 1192 1193 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 1194 // The file will be searched for in the current directory, and in any 1195 // directory specified with the -L option. 1196 if (sys::fs::exists(Tok)) { 1197 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 1198 tokenize(*MB); 1199 return; 1200 } 1201 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 1202 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 1203 tokenize(*MB); 1204 return; 1205 } 1206 setError("cannot open " + Tok); 1207 } 1208 1209 void ScriptParser::readOutput() { 1210 // -o <file> takes predecence over OUTPUT(<file>). 1211 expect("("); 1212 StringRef Tok = next(); 1213 if (Config->OutputFile.empty()) 1214 Config->OutputFile = unquote(Tok); 1215 expect(")"); 1216 } 1217 1218 void ScriptParser::readOutputArch() { 1219 // OUTPUT_ARCH is ignored for now. 1220 expect("("); 1221 while (!Error && !consume(")")) 1222 skip(); 1223 } 1224 1225 void ScriptParser::readOutputFormat() { 1226 // Error checking only for now. 1227 expect("("); 1228 skip(); 1229 StringRef Tok = next(); 1230 if (Tok == ")") 1231 return; 1232 if (Tok != ",") { 1233 setError("unexpected token: " + Tok); 1234 return; 1235 } 1236 skip(); 1237 expect(","); 1238 skip(); 1239 expect(")"); 1240 } 1241 1242 void ScriptParser::readPhdrs() { 1243 expect("{"); 1244 while (!Error && !consume("}")) { 1245 StringRef Tok = next(); 1246 Opt.PhdrsCommands.push_back( 1247 {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); 1248 PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back(); 1249 1250 PhdrCmd.Type = readPhdrType(); 1251 do { 1252 Tok = next(); 1253 if (Tok == ";") 1254 break; 1255 if (Tok == "FILEHDR") 1256 PhdrCmd.HasFilehdr = true; 1257 else if (Tok == "PHDRS") 1258 PhdrCmd.HasPhdrs = true; 1259 else if (Tok == "AT") 1260 PhdrCmd.LMAExpr = readParenExpr(); 1261 else if (Tok == "FLAGS") { 1262 expect("("); 1263 // Passing 0 for the value of dot is a bit of a hack. It means that 1264 // we accept expressions like ".|1". 1265 PhdrCmd.Flags = readExpr()(); 1266 expect(")"); 1267 } else 1268 setError("unexpected header attribute: " + Tok); 1269 } while (!Error); 1270 } 1271 } 1272 1273 void ScriptParser::readSearchDir() { 1274 expect("("); 1275 StringRef Tok = next(); 1276 if (!Config->Nostdlib) 1277 Config->SearchPaths.push_back(unquote(Tok)); 1278 expect(")"); 1279 } 1280 1281 void ScriptParser::readSections() { 1282 Opt.HasSections = true; 1283 // -no-rosegment is used to avoid placing read only non-executable sections in 1284 // their own segment. We do the same if SECTIONS command is present in linker 1285 // script. See comment for computeFlags(). 1286 Config->SingleRoRx = true; 1287 1288 expect("{"); 1289 while (!Error && !consume("}")) { 1290 StringRef Tok = next(); 1291 BaseCommand *Cmd = readProvideOrAssignment(Tok); 1292 if (!Cmd) { 1293 if (Tok == "ASSERT") 1294 Cmd = new AssertCommand(readAssert()); 1295 else 1296 Cmd = readOutputSectionDescription(Tok); 1297 } 1298 Opt.Commands.emplace_back(Cmd); 1299 } 1300 } 1301 1302 static int precedence(StringRef Op) { 1303 return StringSwitch<int>(Op) 1304 .Cases("*", "/", 5) 1305 .Cases("+", "-", 4) 1306 .Cases("<<", ">>", 3) 1307 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 1308 .Cases("&", "|", 1) 1309 .Default(-1); 1310 } 1311 1312 StringMatcher ScriptParser::readFilePatterns() { 1313 std::vector<StringRef> V; 1314 while (!Error && !consume(")")) 1315 V.push_back(next()); 1316 return StringMatcher(V); 1317 } 1318 1319 SortSectionPolicy ScriptParser::readSortKind() { 1320 if (consume("SORT") || consume("SORT_BY_NAME")) 1321 return SortSectionPolicy::Name; 1322 if (consume("SORT_BY_ALIGNMENT")) 1323 return SortSectionPolicy::Alignment; 1324 if (consume("SORT_BY_INIT_PRIORITY")) 1325 return SortSectionPolicy::Priority; 1326 if (consume("SORT_NONE")) 1327 return SortSectionPolicy::None; 1328 return SortSectionPolicy::Default; 1329 } 1330 1331 // Method reads a list of sequence of excluded files and section globs given in 1332 // a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ 1333 // Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) 1334 // The semantics of that is next: 1335 // * Include .foo.1 from every file. 1336 // * Include .foo.2 from every file but a.o 1337 // * Include .foo.3 from every file but b.o 1338 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 1339 std::vector<SectionPattern> Ret; 1340 while (!Error && peek() != ")") { 1341 StringMatcher ExcludeFilePat; 1342 if (consume("EXCLUDE_FILE")) { 1343 expect("("); 1344 ExcludeFilePat = readFilePatterns(); 1345 } 1346 1347 std::vector<StringRef> V; 1348 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 1349 V.push_back(next()); 1350 1351 if (!V.empty()) 1352 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 1353 else 1354 setError("section pattern is expected"); 1355 } 1356 return Ret; 1357 } 1358 1359 // Reads contents of "SECTIONS" directive. That directive contains a 1360 // list of glob patterns for input sections. The grammar is as follows. 1361 // 1362 // <patterns> ::= <section-list> 1363 // | <sort> "(" <section-list> ")" 1364 // | <sort> "(" <sort> "(" <section-list> ")" ")" 1365 // 1366 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 1367 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 1368 // 1369 // <section-list> is parsed by readInputSectionsList(). 1370 InputSectionDescription * 1371 ScriptParser::readInputSectionRules(StringRef FilePattern) { 1372 auto *Cmd = new InputSectionDescription(FilePattern); 1373 expect("("); 1374 while (!Error && !consume(")")) { 1375 SortSectionPolicy Outer = readSortKind(); 1376 SortSectionPolicy Inner = SortSectionPolicy::Default; 1377 std::vector<SectionPattern> V; 1378 if (Outer != SortSectionPolicy::Default) { 1379 expect("("); 1380 Inner = readSortKind(); 1381 if (Inner != SortSectionPolicy::Default) { 1382 expect("("); 1383 V = readInputSectionsList(); 1384 expect(")"); 1385 } else { 1386 V = readInputSectionsList(); 1387 } 1388 expect(")"); 1389 } else { 1390 V = readInputSectionsList(); 1391 } 1392 1393 for (SectionPattern &Pat : V) { 1394 Pat.SortInner = Inner; 1395 Pat.SortOuter = Outer; 1396 } 1397 1398 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 1399 } 1400 return Cmd; 1401 } 1402 1403 InputSectionDescription * 1404 ScriptParser::readInputSectionDescription(StringRef Tok) { 1405 // Input section wildcard can be surrounded by KEEP. 1406 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 1407 if (Tok == "KEEP") { 1408 expect("("); 1409 StringRef FilePattern = next(); 1410 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 1411 expect(")"); 1412 Opt.KeptSections.push_back(Cmd); 1413 return Cmd; 1414 } 1415 return readInputSectionRules(Tok); 1416 } 1417 1418 void ScriptParser::readSort() { 1419 expect("("); 1420 expect("CONSTRUCTORS"); 1421 expect(")"); 1422 } 1423 1424 Expr ScriptParser::readAssert() { 1425 expect("("); 1426 Expr E = readExpr(); 1427 expect(","); 1428 StringRef Msg = unquote(next()); 1429 expect(")"); 1430 return [=] { 1431 if (!E()) 1432 error(Msg); 1433 return ScriptBase->getDot(); 1434 }; 1435 } 1436 1437 // Reads a FILL(expr) command. We handle the FILL command as an 1438 // alias for =fillexp section attribute, which is different from 1439 // what GNU linkers do. 1440 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 1441 uint32_t ScriptParser::readFill() { 1442 expect("("); 1443 uint32_t V = readOutputSectionFiller(next()); 1444 expect(")"); 1445 expect(";"); 1446 return V; 1447 } 1448 1449 OutputSectionCommand * 1450 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 1451 OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); 1452 Cmd->Location = getCurrentLocation(); 1453 1454 // Read an address expression. 1455 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address 1456 if (peek() != ":") 1457 Cmd->AddrExpr = readExpr(); 1458 1459 expect(":"); 1460 1461 if (consume("AT")) 1462 Cmd->LMAExpr = readParenExpr(); 1463 if (consume("ALIGN")) 1464 Cmd->AlignExpr = readParenExpr(); 1465 if (consume("SUBALIGN")) 1466 Cmd->SubalignExpr = readParenExpr(); 1467 1468 // Parse constraints. 1469 if (consume("ONLY_IF_RO")) 1470 Cmd->Constraint = ConstraintKind::ReadOnly; 1471 if (consume("ONLY_IF_RW")) 1472 Cmd->Constraint = ConstraintKind::ReadWrite; 1473 expect("{"); 1474 1475 while (!Error && !consume("}")) { 1476 StringRef Tok = next(); 1477 if (Tok == ";") { 1478 // Empty commands are allowed. Do nothing here. 1479 } else if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok)) { 1480 Cmd->Commands.emplace_back(Assignment); 1481 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 1482 Cmd->Commands.emplace_back(Data); 1483 } else if (Tok == "ASSERT") { 1484 Cmd->Commands.emplace_back(new AssertCommand(readAssert())); 1485 expect(";"); 1486 } else if (Tok == "CONSTRUCTORS") { 1487 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 1488 // by name. This is for very old file formats such as ECOFF/XCOFF. 1489 // For ELF, we should ignore. 1490 } else if (Tok == "FILL") { 1491 Cmd->Filler = readFill(); 1492 } else if (Tok == "SORT") { 1493 readSort(); 1494 } else if (peek() == "(") { 1495 Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); 1496 } else { 1497 setError("unknown command " + Tok); 1498 } 1499 } 1500 1501 if (consume(">")) 1502 Cmd->MemoryRegionName = next(); 1503 1504 Cmd->Phdrs = readOutputSectionPhdrs(); 1505 1506 if (consume("=")) 1507 Cmd->Filler = readOutputSectionFiller(next()); 1508 else if (peek().startswith("=")) 1509 Cmd->Filler = readOutputSectionFiller(next().drop_front()); 1510 1511 // Consume optional comma following output section command. 1512 consume(","); 1513 1514 return Cmd; 1515 } 1516 1517 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number. 1518 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 1519 // 1520 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles 1521 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them 1522 // as 32-bit big-endian values. We will do the same as ld.gold does 1523 // because it's simpler than what ld.bfd does. 1524 uint32_t ScriptParser::readOutputSectionFiller(StringRef Tok) { 1525 uint32_t V; 1526 if (!Tok.getAsInteger(0, V)) 1527 return V; 1528 setError("invalid filler expression: " + Tok); 1529 return 0; 1530 } 1531 1532 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 1533 expect("("); 1534 SymbolAssignment *Cmd = readAssignment(next()); 1535 Cmd->Provide = Provide; 1536 Cmd->Hidden = Hidden; 1537 expect(")"); 1538 expect(";"); 1539 return Cmd; 1540 } 1541 1542 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 1543 SymbolAssignment *Cmd = nullptr; 1544 if (peek() == "=" || peek() == "+=") { 1545 Cmd = readAssignment(Tok); 1546 expect(";"); 1547 } else if (Tok == "PROVIDE") { 1548 Cmd = readProvideHidden(true, false); 1549 } else if (Tok == "HIDDEN") { 1550 Cmd = readProvideHidden(false, true); 1551 } else if (Tok == "PROVIDE_HIDDEN") { 1552 Cmd = readProvideHidden(true, true); 1553 } 1554 return Cmd; 1555 } 1556 1557 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 1558 StringRef Op = next(); 1559 Expr E; 1560 assert(Op == "=" || Op == "+="); 1561 if (consume("ABSOLUTE")) { 1562 E = readExpr(); 1563 E.IsAbsolute = [] { return true; }; 1564 } else { 1565 E = readExpr(); 1566 } 1567 if (Op == "+=") { 1568 std::string Loc = getCurrentLocation(); 1569 E = [=] { return ScriptBase->getSymbolValue(Loc, Name) + E(); }; 1570 } 1571 return new SymbolAssignment(Name, E, getCurrentLocation()); 1572 } 1573 1574 // This is an operator-precedence parser to parse a linker 1575 // script expression. 1576 Expr ScriptParser::readExpr() { 1577 // Our lexer is context-aware. Set the in-expression bit so that 1578 // they apply different tokenization rules. 1579 bool Orig = InExpr; 1580 InExpr = true; 1581 Expr E = readExpr1(readPrimary(), 0); 1582 InExpr = Orig; 1583 return E; 1584 } 1585 1586 static Expr combine(StringRef Op, Expr L, Expr R) { 1587 auto IsAbs = [=] { return L.IsAbsolute() && R.IsAbsolute(); }; 1588 auto GetOutSec = [=] { 1589 SectionBase *S = L.Section(); 1590 return S ? S : R.Section(); 1591 }; 1592 1593 if (Op == "*") 1594 return [=] { return L() * R(); }; 1595 if (Op == "/") { 1596 return [=]() -> uint64_t { 1597 uint64_t RHS = R(); 1598 if (RHS == 0) { 1599 error("division by zero"); 1600 return 0; 1601 } 1602 return L() / RHS; 1603 }; 1604 } 1605 if (Op == "+") 1606 return {[=] { return L() + R(); }, IsAbs, GetOutSec}; 1607 if (Op == "-") 1608 return {[=] { return L() - R(); }, IsAbs, GetOutSec}; 1609 if (Op == "<<") 1610 return [=] { return L() << R(); }; 1611 if (Op == ">>") 1612 return [=] { return L() >> R(); }; 1613 if (Op == "<") 1614 return [=] { return L() < R(); }; 1615 if (Op == ">") 1616 return [=] { return L() > R(); }; 1617 if (Op == ">=") 1618 return [=] { return L() >= R(); }; 1619 if (Op == "<=") 1620 return [=] { return L() <= R(); }; 1621 if (Op == "==") 1622 return [=] { return L() == R(); }; 1623 if (Op == "!=") 1624 return [=] { return L() != R(); }; 1625 if (Op == "&") 1626 return [=] { return L() & R(); }; 1627 if (Op == "|") 1628 return [=] { return L() | R(); }; 1629 llvm_unreachable("invalid operator"); 1630 } 1631 1632 // This is a part of the operator-precedence parser. This function 1633 // assumes that the remaining token stream starts with an operator. 1634 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 1635 while (!atEOF() && !Error) { 1636 // Read an operator and an expression. 1637 if (consume("?")) 1638 return readTernary(Lhs); 1639 StringRef Op1 = peek(); 1640 if (precedence(Op1) < MinPrec) 1641 break; 1642 skip(); 1643 Expr Rhs = readPrimary(); 1644 1645 // Evaluate the remaining part of the expression first if the 1646 // next operator has greater precedence than the previous one. 1647 // For example, if we have read "+" and "3", and if the next 1648 // operator is "*", then we'll evaluate 3 * ... part first. 1649 while (!atEOF()) { 1650 StringRef Op2 = peek(); 1651 if (precedence(Op2) <= precedence(Op1)) 1652 break; 1653 Rhs = readExpr1(Rhs, precedence(Op2)); 1654 } 1655 1656 Lhs = combine(Op1, Lhs, Rhs); 1657 } 1658 return Lhs; 1659 } 1660 1661 uint64_t static getConstant(StringRef S) { 1662 if (S == "COMMONPAGESIZE") 1663 return Target->PageSize; 1664 if (S == "MAXPAGESIZE") 1665 return Config->MaxPageSize; 1666 error("unknown constant: " + S); 1667 return 0; 1668 } 1669 1670 // Parses Tok as an integer. Returns true if successful. 1671 // It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") 1672 // and decimal numbers. Decimal numbers may have "K" (kilo) or 1673 // "M" (mega) prefixes. 1674 static bool readInteger(StringRef Tok, uint64_t &Result) { 1675 // Negative number 1676 if (Tok.startswith("-")) { 1677 if (!readInteger(Tok.substr(1), Result)) 1678 return false; 1679 Result = -Result; 1680 return true; 1681 } 1682 1683 // Hexadecimal 1684 if (Tok.startswith_lower("0x")) 1685 return !Tok.substr(2).getAsInteger(16, Result); 1686 if (Tok.endswith_lower("H")) 1687 return !Tok.drop_back().getAsInteger(16, Result); 1688 1689 // Decimal 1690 int Suffix = 1; 1691 if (Tok.endswith_lower("K")) { 1692 Suffix = 1024; 1693 Tok = Tok.drop_back(); 1694 } else if (Tok.endswith_lower("M")) { 1695 Suffix = 1024 * 1024; 1696 Tok = Tok.drop_back(); 1697 } 1698 if (Tok.getAsInteger(10, Result)) 1699 return false; 1700 Result *= Suffix; 1701 return true; 1702 } 1703 1704 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 1705 int Size = StringSwitch<unsigned>(Tok) 1706 .Case("BYTE", 1) 1707 .Case("SHORT", 2) 1708 .Case("LONG", 4) 1709 .Case("QUAD", 8) 1710 .Default(-1); 1711 if (Size == -1) 1712 return nullptr; 1713 1714 return new BytesDataCommand(readParenExpr(), Size); 1715 } 1716 1717 StringRef ScriptParser::readParenLiteral() { 1718 expect("("); 1719 StringRef Tok = next(); 1720 expect(")"); 1721 return Tok; 1722 } 1723 1724 Expr ScriptParser::readPrimary() { 1725 if (peek() == "(") 1726 return readParenExpr(); 1727 1728 StringRef Tok = next(); 1729 std::string Location = getCurrentLocation(); 1730 1731 if (Tok == "~") { 1732 Expr E = readPrimary(); 1733 return [=] { return ~E(); }; 1734 } 1735 if (Tok == "-") { 1736 Expr E = readPrimary(); 1737 return [=] { return -E(); }; 1738 } 1739 1740 // Built-in functions are parsed here. 1741 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 1742 if (Tok == "ADDR") { 1743 StringRef Name = readParenLiteral(); 1744 return {[=] { return ScriptBase->getOutputSection(Location, Name)->Addr; }, 1745 [=] { return false; }, 1746 [=] { return ScriptBase->getOutputSection(Location, Name); }}; 1747 } 1748 if (Tok == "LOADADDR") { 1749 StringRef Name = readParenLiteral(); 1750 return 1751 [=] { return ScriptBase->getOutputSection(Location, Name)->getLMA(); }; 1752 } 1753 if (Tok == "ASSERT") 1754 return readAssert(); 1755 if (Tok == "ALIGN") { 1756 expect("("); 1757 Expr E = readExpr(); 1758 if (consume(",")) { 1759 Expr E2 = readExpr(); 1760 expect(")"); 1761 return [=] { return alignTo(E(), E2()); }; 1762 } 1763 expect(")"); 1764 return [=] { return alignTo(ScriptBase->getDot(), E()); }; 1765 } 1766 if (Tok == "CONSTANT") { 1767 StringRef Name = readParenLiteral(); 1768 return [=] { return getConstant(Name); }; 1769 } 1770 if (Tok == "DEFINED") { 1771 StringRef Name = readParenLiteral(); 1772 return [=] { return ScriptBase->isDefined(Name) ? 1 : 0; }; 1773 } 1774 if (Tok == "SEGMENT_START") { 1775 expect("("); 1776 skip(); 1777 expect(","); 1778 Expr E = readExpr(); 1779 expect(")"); 1780 return [=] { return E(); }; 1781 } 1782 if (Tok == "DATA_SEGMENT_ALIGN") { 1783 expect("("); 1784 Expr E = readExpr(); 1785 expect(","); 1786 readExpr(); 1787 expect(")"); 1788 return [=] { return alignTo(ScriptBase->getDot(), E()); }; 1789 } 1790 if (Tok == "DATA_SEGMENT_END") { 1791 expect("("); 1792 expect("."); 1793 expect(")"); 1794 return []() { return ScriptBase->getDot(); }; 1795 } 1796 // GNU linkers implements more complicated logic to handle 1797 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to 1798 // the next page boundary for simplicity. 1799 if (Tok == "DATA_SEGMENT_RELRO_END") { 1800 expect("("); 1801 readExpr(); 1802 expect(","); 1803 readExpr(); 1804 expect(")"); 1805 return []() { return alignTo(ScriptBase->getDot(), Target->PageSize); }; 1806 } 1807 if (Tok == "SIZEOF") { 1808 StringRef Name = readParenLiteral(); 1809 return [=] { return ScriptBase->getOutputSectionSize(Name); }; 1810 } 1811 if (Tok == "ALIGNOF") { 1812 StringRef Name = readParenLiteral(); 1813 return 1814 [=] { return ScriptBase->getOutputSection(Location, Name)->Alignment; }; 1815 } 1816 if (Tok == "SIZEOF_HEADERS") 1817 return [=] { return elf::getHeaderSize(); }; 1818 1819 // Tok is a literal number. 1820 uint64_t V; 1821 if (readInteger(Tok, V)) 1822 return [=] { return V; }; 1823 1824 // Tok is a symbol name. 1825 if (Tok != "." && !isValidCIdentifier(Tok)) 1826 setError("malformed number: " + Tok); 1827 return {[=] { return ScriptBase->getSymbolValue(Location, Tok); }, 1828 [=] { return ScriptBase->isAbsolute(Tok); }, 1829 [=] { return ScriptBase->getSymbolSection(Tok); }}; 1830 } 1831 1832 Expr ScriptParser::readTernary(Expr Cond) { 1833 Expr L = readExpr(); 1834 expect(":"); 1835 Expr R = readExpr(); 1836 return [=] { return Cond() ? L() : R(); }; 1837 } 1838 1839 Expr ScriptParser::readParenExpr() { 1840 expect("("); 1841 Expr E = readExpr(); 1842 expect(")"); 1843 return E; 1844 } 1845 1846 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1847 std::vector<StringRef> Phdrs; 1848 while (!Error && peek().startswith(":")) { 1849 StringRef Tok = next(); 1850 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 1851 } 1852 return Phdrs; 1853 } 1854 1855 // Read a program header type name. The next token must be a 1856 // name of a program header type or a constant (e.g. "0x3"). 1857 unsigned ScriptParser::readPhdrType() { 1858 StringRef Tok = next(); 1859 uint64_t Val; 1860 if (readInteger(Tok, Val)) 1861 return Val; 1862 1863 unsigned Ret = StringSwitch<unsigned>(Tok) 1864 .Case("PT_NULL", PT_NULL) 1865 .Case("PT_LOAD", PT_LOAD) 1866 .Case("PT_DYNAMIC", PT_DYNAMIC) 1867 .Case("PT_INTERP", PT_INTERP) 1868 .Case("PT_NOTE", PT_NOTE) 1869 .Case("PT_SHLIB", PT_SHLIB) 1870 .Case("PT_PHDR", PT_PHDR) 1871 .Case("PT_TLS", PT_TLS) 1872 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1873 .Case("PT_GNU_STACK", PT_GNU_STACK) 1874 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1875 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1876 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1877 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1878 .Default(-1); 1879 1880 if (Ret == (unsigned)-1) { 1881 setError("invalid program header type: " + Tok); 1882 return PT_NULL; 1883 } 1884 return Ret; 1885 } 1886 1887 // Reads an anonymous version declaration. 1888 void ScriptParser::readAnonymousDeclaration() { 1889 std::vector<SymbolVersion> Locals; 1890 std::vector<SymbolVersion> Globals; 1891 std::tie(Locals, Globals) = readSymbols(); 1892 1893 for (SymbolVersion V : Locals) { 1894 if (V.Name == "*") 1895 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1896 else 1897 Config->VersionScriptLocals.push_back(V); 1898 } 1899 1900 for (SymbolVersion V : Globals) 1901 Config->VersionScriptGlobals.push_back(V); 1902 1903 expect(";"); 1904 } 1905 1906 // Reads a non-anonymous version definition, 1907 // e.g. "VerStr { global: foo; bar; local: *; };". 1908 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1909 // Read a symbol list. 1910 std::vector<SymbolVersion> Locals; 1911 std::vector<SymbolVersion> Globals; 1912 std::tie(Locals, Globals) = readSymbols(); 1913 1914 for (SymbolVersion V : Locals) { 1915 if (V.Name == "*") 1916 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1917 else 1918 Config->VersionScriptLocals.push_back(V); 1919 } 1920 1921 // Create a new version definition and add that to the global symbols. 1922 VersionDefinition Ver; 1923 Ver.Name = VerStr; 1924 Ver.Globals = Globals; 1925 1926 // User-defined version number starts from 2 because 0 and 1 are 1927 // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. 1928 Ver.Id = Config->VersionDefinitions.size() + 2; 1929 Config->VersionDefinitions.push_back(Ver); 1930 1931 // Each version may have a parent version. For example, "Ver2" 1932 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1933 // as a parent. This version hierarchy is, probably against your 1934 // instinct, purely for hint; the runtime doesn't care about it 1935 // at all. In LLD, we simply ignore it. 1936 if (peek() != ";") 1937 skip(); 1938 expect(";"); 1939 } 1940 1941 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1942 std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> 1943 ScriptParser::readSymbols() { 1944 std::vector<SymbolVersion> Locals; 1945 std::vector<SymbolVersion> Globals; 1946 std::vector<SymbolVersion> *V = &Globals; 1947 1948 while (!Error) { 1949 if (consume("}")) 1950 break; 1951 if (consumeLabel("local")) { 1952 V = &Locals; 1953 continue; 1954 } 1955 if (consumeLabel("global")) { 1956 V = &Globals; 1957 continue; 1958 } 1959 1960 if (consume("extern")) { 1961 std::vector<SymbolVersion> Ext = readVersionExtern(); 1962 V->insert(V->end(), Ext.begin(), Ext.end()); 1963 } else { 1964 StringRef Tok = next(); 1965 V->push_back({unquote(Tok), false, hasWildcard(Tok)}); 1966 } 1967 expect(";"); 1968 } 1969 return {Locals, Globals}; 1970 } 1971 1972 // Reads an "extern C++" directive, e.g., 1973 // "extern "C++" { ns::*; "f(int, double)"; };" 1974 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 1975 StringRef Tok = next(); 1976 bool IsCXX = Tok == "\"C++\""; 1977 if (!IsCXX && Tok != "\"C\"") 1978 setError("Unknown language"); 1979 expect("{"); 1980 1981 std::vector<SymbolVersion> Ret; 1982 while (!Error && peek() != "}") { 1983 StringRef Tok = next(); 1984 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 1985 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 1986 expect(";"); 1987 } 1988 1989 expect("}"); 1990 return Ret; 1991 } 1992 1993 uint64_t ScriptParser::readMemoryAssignment( 1994 StringRef S1, StringRef S2, StringRef S3) { 1995 if (!(consume(S1) || consume(S2) || consume(S3))) { 1996 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 1997 return 0; 1998 } 1999 expect("="); 2000 2001 // TODO: Fully support constant expressions. 2002 uint64_t Val; 2003 if (!readInteger(next(), Val)) 2004 setError("nonconstant expression for "+ S1); 2005 return Val; 2006 } 2007 2008 // Parse the MEMORY command as specified in: 2009 // https://sourceware.org/binutils/docs/ld/MEMORY.html 2010 // 2011 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 2012 void ScriptParser::readMemory() { 2013 expect("{"); 2014 while (!Error && !consume("}")) { 2015 StringRef Name = next(); 2016 2017 uint32_t Flags = 0; 2018 uint32_t NegFlags = 0; 2019 if (consume("(")) { 2020 std::tie(Flags, NegFlags) = readMemoryAttributes(); 2021 expect(")"); 2022 } 2023 expect(":"); 2024 2025 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 2026 expect(","); 2027 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 2028 2029 // Add the memory region to the region map (if it doesn't already exist). 2030 auto It = Opt.MemoryRegions.find(Name); 2031 if (It != Opt.MemoryRegions.end()) 2032 setError("region '" + Name + "' already defined"); 2033 else 2034 Opt.MemoryRegions[Name] = {Name, Origin, Length, Origin, Flags, NegFlags}; 2035 } 2036 } 2037 2038 // This function parses the attributes used to match against section 2039 // flags when placing output sections in a memory region. These flags 2040 // are only used when an explicit memory region name is not used. 2041 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 2042 uint32_t Flags = 0; 2043 uint32_t NegFlags = 0; 2044 bool Invert = false; 2045 2046 for (char C : next().lower()) { 2047 uint32_t Flag = 0; 2048 if (C == '!') 2049 Invert = !Invert; 2050 else if (C == 'w') 2051 Flag = SHF_WRITE; 2052 else if (C == 'x') 2053 Flag = SHF_EXECINSTR; 2054 else if (C == 'a') 2055 Flag = SHF_ALLOC; 2056 else if (C != 'r') 2057 setError("invalid memory region attribute"); 2058 2059 if (Invert) 2060 NegFlags |= Flag; 2061 else 2062 Flags |= Flag; 2063 } 2064 return {Flags, NegFlags}; 2065 } 2066 2067 void elf::readLinkerScript(MemoryBufferRef MB) { 2068 ScriptParser(MB).readLinkerScript(); 2069 } 2070 2071 void elf::readVersionScript(MemoryBufferRef MB) { 2072 ScriptParser(MB).readVersionScript(); 2073 } 2074 2075 void elf::readDynamicList(MemoryBufferRef MB) { 2076 ScriptParser(MB).readDynamicList(); 2077 } 2078 2079 template class elf::LinkerScript<ELF32LE>; 2080 template class elf::LinkerScript<ELF32BE>; 2081 template class elf::LinkerScript<ELF64LE>; 2082 template class elf::LinkerScript<ELF64BE>; 2083