1 //===- LinkerScript.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the parser/evaluator of the linker script. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LinkerScript.h" 15 #include "Config.h" 16 #include "Driver.h" 17 #include "InputSection.h" 18 #include "Memory.h" 19 #include "OutputSections.h" 20 #include "ScriptLexer.h" 21 #include "Strings.h" 22 #include "SymbolTable.h" 23 #include "Symbols.h" 24 #include "SyntheticSections.h" 25 #include "Target.h" 26 #include "Writer.h" 27 #include "llvm/ADT/STLExtras.h" 28 #include "llvm/ADT/SmallString.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/ADT/StringSwitch.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/ELF.h" 33 #include "llvm/Support/Endian.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/FileSystem.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/Path.h" 38 #include <algorithm> 39 #include <cassert> 40 #include <cstddef> 41 #include <cstdint> 42 #include <iterator> 43 #include <limits> 44 #include <memory> 45 #include <string> 46 #include <tuple> 47 #include <vector> 48 49 using namespace llvm; 50 using namespace llvm::ELF; 51 using namespace llvm::object; 52 using namespace llvm::support::endian; 53 using namespace lld; 54 using namespace lld::elf; 55 56 LinkerScriptBase *elf::ScriptBase; 57 ScriptConfiguration *elf::ScriptConfig; 58 59 template <class ELFT> static SymbolBody *addRegular(SymbolAssignment *Cmd) { 60 Symbol *Sym; 61 uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 62 std::tie(Sym, std::ignore) = Symtab<ELFT>::X->insert( 63 Cmd->Name, /*Type*/ 0, Visibility, /*CanOmitFromDynSym*/ false, 64 /*File*/ nullptr); 65 Sym->Binding = STB_GLOBAL; 66 replaceBody<DefinedRegular>(Sym, Cmd->Name, /*IsLocal=*/false, Visibility, 67 STT_NOTYPE, 0, 0, nullptr, nullptr); 68 return Sym->body(); 69 } 70 71 template <class ELFT> static SymbolBody *addSynthetic(SymbolAssignment *Cmd) { 72 Symbol *Sym; 73 uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 74 const OutputSection *Sec = 75 ScriptConfig->HasSections ? nullptr : Cmd->Expression.Section(); 76 std::tie(Sym, std::ignore) = Symtab<ELFT>::X->insert( 77 Cmd->Name, /*Type*/ 0, Visibility, /*CanOmitFromDynSym*/ false, 78 /*File*/ nullptr); 79 Sym->Binding = STB_GLOBAL; 80 replaceBody<DefinedSynthetic>(Sym, Cmd->Name, 0, Sec); 81 return Sym->body(); 82 } 83 84 static bool isUnderSysroot(StringRef Path) { 85 if (Config->Sysroot == "") 86 return false; 87 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 88 if (sys::fs::equivalent(Config->Sysroot, Path)) 89 return true; 90 return false; 91 } 92 93 template <class ELFT> 94 void LinkerScript<ELFT>::setDot(Expr E, const Twine &Loc, bool InSec) { 95 uintX_t Val = E(Dot); 96 if (Val < Dot) { 97 if (InSec) 98 error(Loc + ": unable to move location counter backward for: " + 99 CurOutSec->Name); 100 else 101 error(Loc + ": unable to move location counter backward"); 102 } 103 Dot = Val; 104 // Update to location counter means update to section size. 105 if (InSec) 106 CurOutSec->Size = Dot - CurOutSec->Addr; 107 } 108 109 // Sets value of a symbol. Two kinds of symbols are processed: synthetic 110 // symbols, whose value is an offset from beginning of section and regular 111 // symbols whose value is absolute. 112 template <class ELFT> 113 void LinkerScript<ELFT>::assignSymbol(SymbolAssignment *Cmd, bool InSec) { 114 if (Cmd->Name == ".") { 115 setDot(Cmd->Expression, Cmd->Location, InSec); 116 return; 117 } 118 119 if (!Cmd->Sym) 120 return; 121 122 if (auto *Body = dyn_cast<DefinedSynthetic>(Cmd->Sym)) { 123 Body->Section = Cmd->Expression.Section(); 124 if (Body->Section) { 125 uint64_t VA = 0; 126 if (Body->Section->Flags & SHF_ALLOC) 127 VA = Body->Section->Addr; 128 Body->Value = Cmd->Expression(Dot) - VA; 129 } 130 return; 131 } 132 133 cast<DefinedRegular>(Cmd->Sym)->Value = Cmd->Expression(Dot); 134 } 135 136 template <class ELFT> 137 void LinkerScript<ELFT>::addSymbol(SymbolAssignment *Cmd) { 138 if (Cmd->Name == ".") 139 return; 140 141 // If a symbol was in PROVIDE(), we need to define it only when 142 // it is a referenced undefined symbol. 143 SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name); 144 if (Cmd->Provide && (!B || B->isDefined())) 145 return; 146 147 // Otherwise, create a new symbol if one does not exist or an 148 // undefined one does exist. 149 if (Cmd->Expression.IsAbsolute()) 150 Cmd->Sym = addRegular<ELFT>(Cmd); 151 else 152 Cmd->Sym = addSynthetic<ELFT>(Cmd); 153 154 // If there are sections, then let the value be assigned later in 155 // `assignAddresses`. 156 if (!ScriptConfig->HasSections) 157 assignSymbol(Cmd); 158 } 159 160 bool SymbolAssignment::classof(const BaseCommand *C) { 161 return C->Kind == AssignmentKind; 162 } 163 164 bool OutputSectionCommand::classof(const BaseCommand *C) { 165 return C->Kind == OutputSectionKind; 166 } 167 168 bool InputSectionDescription::classof(const BaseCommand *C) { 169 return C->Kind == InputSectionKind; 170 } 171 172 bool AssertCommand::classof(const BaseCommand *C) { 173 return C->Kind == AssertKind; 174 } 175 176 bool BytesDataCommand::classof(const BaseCommand *C) { 177 return C->Kind == BytesDataKind; 178 } 179 180 template <class ELFT> LinkerScript<ELFT>::LinkerScript() = default; 181 template <class ELFT> LinkerScript<ELFT>::~LinkerScript() = default; 182 183 static StringRef basename(InputSectionBase *S) { 184 if (S->File) 185 return sys::path::filename(S->File->getName()); 186 return ""; 187 } 188 189 template <class ELFT> bool LinkerScript<ELFT>::shouldKeep(InputSectionBase *S) { 190 for (InputSectionDescription *ID : Opt.KeptSections) 191 if (ID->FilePat.match(basename(S))) 192 for (SectionPattern &P : ID->SectionPatterns) 193 if (P.SectionPat.match(S->Name)) 194 return true; 195 return false; 196 } 197 198 static bool comparePriority(InputSectionBase *A, InputSectionBase *B) { 199 return getPriority(A->Name) < getPriority(B->Name); 200 } 201 202 static bool compareName(InputSectionBase *A, InputSectionBase *B) { 203 return A->Name < B->Name; 204 } 205 206 static bool compareAlignment(InputSectionBase *A, InputSectionBase *B) { 207 // ">" is not a mistake. Larger alignments are placed before smaller 208 // alignments in order to reduce the amount of padding necessary. 209 // This is compatible with GNU. 210 return A->Alignment > B->Alignment; 211 } 212 213 static std::function<bool(InputSectionBase *, InputSectionBase *)> 214 getComparator(SortSectionPolicy K) { 215 switch (K) { 216 case SortSectionPolicy::Alignment: 217 return compareAlignment; 218 case SortSectionPolicy::Name: 219 return compareName; 220 case SortSectionPolicy::Priority: 221 return comparePriority; 222 default: 223 llvm_unreachable("unknown sort policy"); 224 } 225 } 226 227 template <class ELFT> 228 static bool matchConstraints(ArrayRef<InputSectionBase *> Sections, 229 ConstraintKind Kind) { 230 if (Kind == ConstraintKind::NoConstraint) 231 return true; 232 bool IsRW = llvm::any_of(Sections, [=](InputSectionBase *Sec2) { 233 auto *Sec = static_cast<InputSectionBase *>(Sec2); 234 return Sec->Flags & SHF_WRITE; 235 }); 236 return (IsRW && Kind == ConstraintKind::ReadWrite) || 237 (!IsRW && Kind == ConstraintKind::ReadOnly); 238 } 239 240 static void sortSections(InputSectionBase **Begin, InputSectionBase **End, 241 SortSectionPolicy K) { 242 if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) 243 std::stable_sort(Begin, End, getComparator(K)); 244 } 245 246 // Compute and remember which sections the InputSectionDescription matches. 247 template <class ELFT> 248 void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { 249 // Collects all sections that satisfy constraints of I 250 // and attach them to I. 251 for (SectionPattern &Pat : I->SectionPatterns) { 252 size_t SizeBefore = I->Sections.size(); 253 254 for (InputSectionBase *S : InputSections) { 255 if (S->Assigned) 256 continue; 257 // For -emit-relocs we have to ignore entries like 258 // .rela.dyn : { *(.rela.data) } 259 // which are common because they are in the default bfd script. 260 if (S->Type == SHT_REL || S->Type == SHT_RELA) 261 continue; 262 263 StringRef Filename = basename(S); 264 if (!I->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename)) 265 continue; 266 if (!Pat.SectionPat.match(S->Name)) 267 continue; 268 I->Sections.push_back(S); 269 S->Assigned = true; 270 } 271 272 // Sort sections as instructed by SORT-family commands and --sort-section 273 // option. Because SORT-family commands can be nested at most two depth 274 // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command 275 // line option is respected even if a SORT command is given, the exact 276 // behavior we have here is a bit complicated. Here are the rules. 277 // 278 // 1. If two SORT commands are given, --sort-section is ignored. 279 // 2. If one SORT command is given, and if it is not SORT_NONE, 280 // --sort-section is handled as an inner SORT command. 281 // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. 282 // 4. If no SORT command is given, sort according to --sort-section. 283 InputSectionBase **Begin = I->Sections.data() + SizeBefore; 284 InputSectionBase **End = I->Sections.data() + I->Sections.size(); 285 if (Pat.SortOuter != SortSectionPolicy::None) { 286 if (Pat.SortInner == SortSectionPolicy::Default) 287 sortSections(Begin, End, Config->SortSection); 288 else 289 sortSections(Begin, End, Pat.SortInner); 290 sortSections(Begin, End, Pat.SortOuter); 291 } 292 } 293 } 294 295 template <class ELFT> 296 void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase *> V) { 297 for (InputSectionBase *S : V) { 298 S->Live = false; 299 if (S == In<ELFT>::ShStrTab) 300 error("discarding .shstrtab section is not allowed"); 301 discard(S->DependentSections); 302 } 303 } 304 305 template <class ELFT> 306 std::vector<InputSectionBase *> 307 LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) { 308 std::vector<InputSectionBase *> Ret; 309 310 for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { 311 auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); 312 if (!Cmd) 313 continue; 314 computeInputSections(Cmd); 315 for (InputSectionBase *S : Cmd->Sections) 316 Ret.push_back(static_cast<InputSectionBase *>(S)); 317 } 318 319 return Ret; 320 } 321 322 template <class ELFT> 323 void LinkerScript<ELFT>::processCommands(OutputSectionFactory &Factory) { 324 for (unsigned I = 0; I < Opt.Commands.size(); ++I) { 325 auto Iter = Opt.Commands.begin() + I; 326 const std::unique_ptr<BaseCommand> &Base1 = *Iter; 327 328 // Handle symbol assignments outside of any output section. 329 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { 330 addSymbol(Cmd); 331 continue; 332 } 333 334 if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) { 335 // If we don't have SECTIONS then output sections have already been 336 // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses 337 // will not be called, so ASSERT should be evaluated now. 338 if (!Opt.HasSections) 339 Cmd->Expression(0); 340 continue; 341 } 342 343 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { 344 std::vector<InputSectionBase *> V = createInputSectionList(*Cmd); 345 346 // The output section name `/DISCARD/' is special. 347 // Any input section assigned to it is discarded. 348 if (Cmd->Name == "/DISCARD/") { 349 discard(V); 350 continue; 351 } 352 353 // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive 354 // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input 355 // sections satisfy a given constraint. If not, a directive is handled 356 // as if it wasn't present from the beginning. 357 // 358 // Because we'll iterate over Commands many more times, the easiest 359 // way to "make it as if it wasn't present" is to just remove it. 360 if (!matchConstraints<ELFT>(V, Cmd->Constraint)) { 361 for (InputSectionBase *S : V) 362 S->Assigned = false; 363 Opt.Commands.erase(Iter); 364 --I; 365 continue; 366 } 367 368 // A directive may contain symbol definitions like this: 369 // ".foo : { ...; bar = .; }". Handle them. 370 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 371 if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) 372 addSymbol(OutCmd); 373 374 // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign 375 // is given, input sections are aligned to that value, whether the 376 // given value is larger or smaller than the original section alignment. 377 if (Cmd->SubalignExpr) { 378 uint32_t Subalign = Cmd->SubalignExpr(0); 379 for (InputSectionBase *S : V) 380 S->Alignment = Subalign; 381 } 382 383 // Add input sections to an output section. 384 for (InputSectionBase *S : V) 385 Factory.addInputSec<ELFT>(S, Cmd->Name); 386 } 387 } 388 } 389 390 // Add sections that didn't match any sections command. 391 template <class ELFT> 392 void LinkerScript<ELFT>::addOrphanSections(OutputSectionFactory &Factory) { 393 for (InputSectionBase *S : InputSections) 394 if (S->Live && !S->OutSec) 395 Factory.addInputSec<ELFT>(S, getOutputSectionName(S->Name)); 396 } 397 398 template <class ELFT> static bool isTbss(OutputSection *Sec) { 399 return (Sec->Flags & SHF_TLS) && Sec->Type == SHT_NOBITS; 400 } 401 402 template <class ELFT> void LinkerScript<ELFT>::output(InputSection *S) { 403 if (!AlreadyOutputIS.insert(S).second) 404 return; 405 bool IsTbss = isTbss<ELFT>(CurOutSec); 406 407 uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; 408 Pos = alignTo(Pos, S->Alignment); 409 S->OutSecOff = Pos - CurOutSec->Addr; 410 Pos += S->template getSize<ELFT>(); 411 412 // Update output section size after adding each section. This is so that 413 // SIZEOF works correctly in the case below: 414 // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } 415 CurOutSec->Size = Pos - CurOutSec->Addr; 416 417 // If there is a memory region associated with this input section, then 418 // place the section in that region and update the region index. 419 if (CurMemRegion) { 420 CurMemRegion->Offset += CurOutSec->Size; 421 uint64_t CurSize = CurMemRegion->Offset - CurMemRegion->Origin; 422 if (CurSize > CurMemRegion->Length) { 423 uint64_t OverflowAmt = CurSize - CurMemRegion->Length; 424 error("section '" + CurOutSec->Name + "' will not fit in region '" + 425 CurMemRegion->Name + "': overflowed by " + Twine(OverflowAmt) + 426 " bytes"); 427 } 428 } 429 430 if (IsTbss) 431 ThreadBssOffset = Pos - Dot; 432 else 433 Dot = Pos; 434 } 435 436 template <class ELFT> void LinkerScript<ELFT>::flush() { 437 assert(CurOutSec); 438 if (!AlreadyOutputOS.insert(CurOutSec).second) 439 return; 440 for (InputSection *I : CurOutSec->Sections) 441 output(I); 442 } 443 444 template <class ELFT> void LinkerScript<ELFT>::switchTo(OutputSection *Sec) { 445 if (CurOutSec == Sec) 446 return; 447 if (AlreadyOutputOS.count(Sec)) 448 return; 449 450 CurOutSec = Sec; 451 452 Dot = alignTo(Dot, CurOutSec->Addralign); 453 CurOutSec->Addr = isTbss<ELFT>(CurOutSec) ? Dot + ThreadBssOffset : Dot; 454 455 // If neither AT nor AT> is specified for an allocatable section, the linker 456 // will set the LMA such that the difference between VMA and LMA for the 457 // section is the same as the preceding output section in the same region 458 // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html 459 if (LMAOffset) 460 CurOutSec->LMAOffset = LMAOffset(); 461 } 462 463 template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) { 464 // This handles the assignments to symbol or to a location counter (.) 465 if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { 466 assignSymbol(AssignCmd, true); 467 return; 468 } 469 470 // Handle BYTE(), SHORT(), LONG(), or QUAD(). 471 if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) { 472 DataCmd->Offset = Dot - CurOutSec->Addr; 473 Dot += DataCmd->Size; 474 CurOutSec->Size = Dot - CurOutSec->Addr; 475 return; 476 } 477 478 if (auto *AssertCmd = dyn_cast<AssertCommand>(&Base)) { 479 AssertCmd->Expression(Dot); 480 return; 481 } 482 483 // It handles single input section description command, 484 // calculates and assigns the offsets for each section and also 485 // updates the output section size. 486 auto &ICmd = cast<InputSectionDescription>(Base); 487 for (InputSectionBase *IB : ICmd.Sections) { 488 // We tentatively added all synthetic sections at the beginning and removed 489 // empty ones afterwards (because there is no way to know whether they were 490 // going be empty or not other than actually running linker scripts.) 491 // We need to ignore remains of empty sections. 492 if (auto *Sec = dyn_cast<SyntheticSection>(IB)) 493 if (Sec->empty()) 494 continue; 495 496 if (!IB->Live) 497 continue; 498 assert(CurOutSec == IB->OutSec || AlreadyOutputOS.count(IB->OutSec)); 499 output(cast<InputSection>(IB)); 500 } 501 } 502 503 template <class ELFT> 504 static OutputSection * 505 findSection(StringRef Name, const std::vector<OutputSection *> &Sections) { 506 auto End = Sections.end(); 507 auto HasName = [=](OutputSection *Sec) { return Sec->Name == Name; }; 508 auto I = std::find_if(Sections.begin(), End, HasName); 509 std::vector<OutputSection *> Ret; 510 if (I == End) 511 return nullptr; 512 assert(std::find_if(I + 1, End, HasName) == End); 513 return *I; 514 } 515 516 // This function searches for a memory region to place the given output 517 // section in. If found, a pointer to the appropriate memory region is 518 // returned. Otherwise, a nullptr is returned. 519 template <class ELFT> 520 MemoryRegion *LinkerScript<ELFT>::findMemoryRegion(OutputSectionCommand *Cmd, 521 OutputSection *Sec) { 522 // If a memory region name was specified in the output section command, 523 // then try to find that region first. 524 if (!Cmd->MemoryRegionName.empty()) { 525 auto It = Opt.MemoryRegions.find(Cmd->MemoryRegionName); 526 if (It != Opt.MemoryRegions.end()) 527 return &It->second; 528 error("memory region '" + Cmd->MemoryRegionName + "' not declared"); 529 return nullptr; 530 } 531 532 // The memory region name is empty, thus a suitable region must be 533 // searched for in the region map. If the region map is empty, just 534 // return. Note that this check doesn't happen at the very beginning 535 // so that uses of undeclared regions can be caught. 536 if (!Opt.MemoryRegions.size()) 537 return nullptr; 538 539 // See if a region can be found by matching section flags. 540 for (auto &MRI : Opt.MemoryRegions) { 541 MemoryRegion &MR = MRI.second; 542 if ((MR.Flags & Sec->Flags) != 0 && (MR.NegFlags & Sec->Flags) == 0) 543 return &MR; 544 } 545 546 // Otherwise, no suitable region was found. 547 if (Sec->Flags & SHF_ALLOC) 548 error("no memory region specified for section '" + Sec->Name + "'"); 549 return nullptr; 550 } 551 552 // This function assigns offsets to input sections and an output section 553 // for a single sections command (e.g. ".text { *(.text); }"). 554 template <class ELFT> 555 void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) { 556 if (Cmd->LMAExpr) { 557 uintX_t D = Dot; 558 LMAOffset = [=] { return Cmd->LMAExpr(D) - D; }; 559 } 560 OutputSection *Sec = findSection<ELFT>(Cmd->Name, *OutputSections); 561 if (!Sec) 562 return; 563 564 if (Cmd->AddrExpr && Sec->Flags & SHF_ALLOC) 565 setDot(Cmd->AddrExpr, Cmd->Location); 566 567 // Handle align (e.g. ".foo : ALIGN(16) { ... }"). 568 if (Cmd->AlignExpr) 569 Sec->updateAlignment(Cmd->AlignExpr(0)); 570 571 // Try and find an appropriate memory region to assign offsets in. 572 CurMemRegion = findMemoryRegion(Cmd, Sec); 573 if (CurMemRegion) 574 Dot = CurMemRegion->Offset; 575 switchTo(Sec); 576 577 // Find the last section output location. We will output orphan sections 578 // there so that end symbols point to the correct location. 579 auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), 580 [](const std::unique_ptr<BaseCommand> &Cmd) { 581 return !isa<SymbolAssignment>(*Cmd); 582 }) 583 .base(); 584 for (auto I = Cmd->Commands.begin(); I != E; ++I) 585 process(**I); 586 flush(); 587 std::for_each(E, Cmd->Commands.end(), 588 [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); 589 } 590 591 template <class ELFT> void LinkerScript<ELFT>::removeEmptyCommands() { 592 // It is common practice to use very generic linker scripts. So for any 593 // given run some of the output sections in the script will be empty. 594 // We could create corresponding empty output sections, but that would 595 // clutter the output. 596 // We instead remove trivially empty sections. The bfd linker seems even 597 // more aggressive at removing them. 598 auto Pos = std::remove_if( 599 Opt.Commands.begin(), Opt.Commands.end(), 600 [&](const std::unique_ptr<BaseCommand> &Base) { 601 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 602 return !findSection<ELFT>(Cmd->Name, *OutputSections); 603 return false; 604 }); 605 Opt.Commands.erase(Pos, Opt.Commands.end()); 606 } 607 608 static bool isAllSectionDescription(const OutputSectionCommand &Cmd) { 609 for (const std::unique_ptr<BaseCommand> &I : Cmd.Commands) 610 if (!isa<InputSectionDescription>(*I)) 611 return false; 612 return true; 613 } 614 615 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() { 616 // If the output section contains only symbol assignments, create a 617 // corresponding output section. The bfd linker seems to only create them if 618 // '.' is assigned to, but creating these section should not have any bad 619 // consequeces and gives us a section to put the symbol in. 620 uintX_t Flags = SHF_ALLOC; 621 uint32_t Type = SHT_NOBITS; 622 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 623 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 624 if (!Cmd) 625 continue; 626 if (OutputSection *Sec = findSection<ELFT>(Cmd->Name, *OutputSections)) { 627 Flags = Sec->Flags; 628 Type = Sec->Type; 629 continue; 630 } 631 632 if (isAllSectionDescription(*Cmd)) 633 continue; 634 635 auto *OutSec = make<OutputSection>(Cmd->Name, Type, Flags); 636 OutputSections->push_back(OutSec); 637 } 638 } 639 640 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsAfterSorting() { 641 placeOrphanSections(); 642 643 // If output section command doesn't specify any segments, 644 // and we haven't previously assigned any section to segment, 645 // then we simply assign section to the very first load segment. 646 // Below is an example of such linker script: 647 // PHDRS { seg PT_LOAD; } 648 // SECTIONS { .aaa : { *(.aaa) } } 649 std::vector<StringRef> DefPhdrs; 650 auto FirstPtLoad = 651 std::find_if(Opt.PhdrsCommands.begin(), Opt.PhdrsCommands.end(), 652 [](const PhdrsCommand &Cmd) { return Cmd.Type == PT_LOAD; }); 653 if (FirstPtLoad != Opt.PhdrsCommands.end()) 654 DefPhdrs.push_back(FirstPtLoad->Name); 655 656 // Walk the commands and propagate the program headers to commands that don't 657 // explicitly specify them. 658 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 659 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 660 if (!Cmd) 661 continue; 662 if (Cmd->Phdrs.empty()) 663 Cmd->Phdrs = DefPhdrs; 664 else 665 DefPhdrs = Cmd->Phdrs; 666 } 667 668 removeEmptyCommands(); 669 } 670 671 // When placing orphan sections, we want to place them after symbol assignments 672 // so that an orphan after 673 // begin_foo = .; 674 // foo : { *(foo) } 675 // end_foo = .; 676 // doesn't break the intended meaning of the begin/end symbols. 677 // We don't want to go over sections since Writer<ELFT>::sortSections is the 678 // one in charge of deciding the order of the sections. 679 // We don't want to go over alignments, since doing so in 680 // rx_sec : { *(rx_sec) } 681 // . = ALIGN(0x1000); 682 // /* The RW PT_LOAD starts here*/ 683 // rw_sec : { *(rw_sec) } 684 // would mean that the RW PT_LOAD would become unaligned. 685 static bool shouldSkip(const BaseCommand &Cmd) { 686 if (isa<OutputSectionCommand>(Cmd)) 687 return false; 688 const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); 689 if (!Assign) 690 return true; 691 return Assign->Name != "."; 692 } 693 694 // Orphan sections are sections present in the input files which are 695 // not explicitly placed into the output file by the linker script. 696 // 697 // When the control reaches this function, Opt.Commands contains 698 // output section commands for non-orphan sections only. This function 699 // adds new elements for orphan sections to Opt.Commands so that all 700 // sections are explicitly handled by Opt.Commands. 701 // 702 // Writer<ELFT>::sortSections has already sorted output sections. 703 // What we need to do is to scan OutputSections vector and 704 // Opt.Commands in parallel to find orphan sections. If there is an 705 // output section that doesn't have a corresponding entry in 706 // Opt.Commands, we will insert a new entry to Opt.Commands. 707 // 708 // There is some ambiguity as to where exactly a new entry should be 709 // inserted, because Opt.Commands contains not only output section 710 // commands but other types of commands such as symbol assignment 711 // expressions. There's no correct answer here due to the lack of the 712 // formal specification of the linker script. We use heuristics to 713 // determine whether a new output command should be added before or 714 // after another commands. For the details, look at shouldSkip 715 // function. 716 template <class ELFT> void LinkerScript<ELFT>::placeOrphanSections() { 717 // The OutputSections are already in the correct order. 718 // This loops creates or moves commands as needed so that they are in the 719 // correct order. 720 int CmdIndex = 0; 721 722 // As a horrible special case, skip the first . assignment if it is before any 723 // section. We do this because it is common to set a load address by starting 724 // the script with ". = 0xabcd" and the expectation is that every section is 725 // after that. 726 auto FirstSectionOrDotAssignment = 727 std::find_if(Opt.Commands.begin(), Opt.Commands.end(), 728 [](const std::unique_ptr<BaseCommand> &Cmd) { 729 if (isa<OutputSectionCommand>(*Cmd)) 730 return true; 731 const auto *Assign = dyn_cast<SymbolAssignment>(Cmd.get()); 732 if (!Assign) 733 return false; 734 return Assign->Name == "."; 735 }); 736 if (FirstSectionOrDotAssignment != Opt.Commands.end()) { 737 CmdIndex = FirstSectionOrDotAssignment - Opt.Commands.begin(); 738 if (isa<SymbolAssignment>(**FirstSectionOrDotAssignment)) 739 ++CmdIndex; 740 } 741 742 for (OutputSection *Sec : *OutputSections) { 743 StringRef Name = Sec->Name; 744 745 // Find the last spot where we can insert a command and still get the 746 // correct result. 747 auto CmdIter = Opt.Commands.begin() + CmdIndex; 748 auto E = Opt.Commands.end(); 749 while (CmdIter != E && shouldSkip(**CmdIter)) { 750 ++CmdIter; 751 ++CmdIndex; 752 } 753 754 auto Pos = 755 std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { 756 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 757 return Cmd && Cmd->Name == Name; 758 }); 759 if (Pos == E) { 760 Opt.Commands.insert(CmdIter, 761 llvm::make_unique<OutputSectionCommand>(Name)); 762 ++CmdIndex; 763 continue; 764 } 765 766 // Continue from where we found it. 767 CmdIndex = (Pos - Opt.Commands.begin()) + 1; 768 } 769 } 770 771 template <class ELFT> 772 void LinkerScript<ELFT>::assignAddresses(std::vector<PhdrEntry> &Phdrs) { 773 // Assign addresses as instructed by linker script SECTIONS sub-commands. 774 Dot = 0; 775 776 // A symbol can be assigned before any section is mentioned in the linker 777 // script. In an DSO, the symbol values are addresses, so the only important 778 // section values are: 779 // * SHN_UNDEF 780 // * SHN_ABS 781 // * Any value meaning a regular section. 782 // To handle that, create a dummy aether section that fills the void before 783 // the linker scripts switches to another section. It has an index of one 784 // which will map to whatever the first actual section is. 785 auto *Aether = make<OutputSection>("", 0, SHF_ALLOC); 786 Aether->SectionIndex = 1; 787 switchTo(Aether); 788 789 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 790 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { 791 assignSymbol(Cmd); 792 continue; 793 } 794 795 if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { 796 Cmd->Expression(Dot); 797 continue; 798 } 799 800 auto *Cmd = cast<OutputSectionCommand>(Base.get()); 801 assignOffsets(Cmd); 802 } 803 804 uintX_t MinVA = std::numeric_limits<uintX_t>::max(); 805 for (OutputSection *Sec : *OutputSections) { 806 if (Sec->Flags & SHF_ALLOC) 807 MinVA = std::min<uint64_t>(MinVA, Sec->Addr); 808 else 809 Sec->Addr = 0; 810 } 811 812 allocateHeaders<ELFT>(Phdrs, *OutputSections, MinVA); 813 } 814 815 // Creates program headers as instructed by PHDRS linker script command. 816 template <class ELFT> std::vector<PhdrEntry> LinkerScript<ELFT>::createPhdrs() { 817 std::vector<PhdrEntry> Ret; 818 819 // Process PHDRS and FILEHDR keywords because they are not 820 // real output sections and cannot be added in the following loop. 821 for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) { 822 Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags); 823 PhdrEntry &Phdr = Ret.back(); 824 825 if (Cmd.HasFilehdr) 826 Phdr.add(Out::ElfHeader); 827 if (Cmd.HasPhdrs) 828 Phdr.add(Out::ProgramHeaders); 829 830 if (Cmd.LMAExpr) { 831 Phdr.p_paddr = Cmd.LMAExpr(0); 832 Phdr.HasLMA = true; 833 } 834 } 835 836 // Add output sections to program headers. 837 for (OutputSection *Sec : *OutputSections) { 838 if (!(Sec->Flags & SHF_ALLOC)) 839 break; 840 841 // Assign headers specified by linker script 842 for (size_t Id : getPhdrIndices(Sec->Name)) { 843 Ret[Id].add(Sec); 844 if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) 845 Ret[Id].p_flags |= Sec->getPhdrFlags(); 846 } 847 } 848 return Ret; 849 } 850 851 template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() { 852 // Ignore .interp section in case we have PHDRS specification 853 // and PT_INTERP isn't listed. 854 return !Opt.PhdrsCommands.empty() && 855 llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { 856 return Cmd.Type == PT_INTERP; 857 }) == Opt.PhdrsCommands.end(); 858 } 859 860 template <class ELFT> uint32_t LinkerScript<ELFT>::getFiller(StringRef Name) { 861 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 862 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 863 if (Cmd->Name == Name) 864 return Cmd->Filler; 865 return 0; 866 } 867 868 template <class ELFT> 869 static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { 870 const endianness E = ELFT::TargetEndianness; 871 872 switch (Size) { 873 case 1: 874 *Buf = (uint8_t)Data; 875 break; 876 case 2: 877 write16<E>(Buf, Data); 878 break; 879 case 4: 880 write32<E>(Buf, Data); 881 break; 882 case 8: 883 write64<E>(Buf, Data); 884 break; 885 default: 886 llvm_unreachable("unsupported Size argument"); 887 } 888 } 889 890 template <class ELFT> 891 void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) { 892 int I = getSectionIndex(Name); 893 if (I == INT_MAX) 894 return; 895 896 auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()); 897 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 898 if (auto *Data = dyn_cast<BytesDataCommand>(Base.get())) 899 writeInt<ELFT>(Buf + Data->Offset, Data->Expression(0), Data->Size); 900 } 901 902 template <class ELFT> bool LinkerScript<ELFT>::hasLMA(StringRef Name) { 903 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 904 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 905 if (Cmd->LMAExpr && Cmd->Name == Name) 906 return true; 907 return false; 908 } 909 910 // Returns the index of the given section name in linker script 911 // SECTIONS commands. Sections are laid out as the same order as they 912 // were in the script. If a given name did not appear in the script, 913 // it returns INT_MAX, so that it will be laid out at end of file. 914 template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) { 915 for (int I = 0, E = Opt.Commands.size(); I != E; ++I) 916 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get())) 917 if (Cmd->Name == Name) 918 return I; 919 return INT_MAX; 920 } 921 922 template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() { 923 return !Opt.PhdrsCommands.empty(); 924 } 925 926 template <class ELFT> 927 const OutputSection *LinkerScript<ELFT>::getOutputSection(const Twine &Loc, 928 StringRef Name) { 929 static OutputSection FakeSec("", 0, 0); 930 931 for (OutputSection *Sec : *OutputSections) 932 if (Sec->Name == Name) 933 return Sec; 934 935 error(Loc + ": undefined section " + Name); 936 return &FakeSec; 937 } 938 939 // This function is essentially the same as getOutputSection(Name)->Size, 940 // but it won't print out an error message if a given section is not found. 941 // 942 // Linker script does not create an output section if its content is empty. 943 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 944 // be empty. That is why this function is different from getOutputSection(). 945 template <class ELFT> 946 uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) { 947 for (OutputSection *Sec : *OutputSections) 948 if (Sec->Name == Name) 949 return Sec->Size; 950 return 0; 951 } 952 953 template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() { 954 return elf::getHeaderSize<ELFT>(); 955 } 956 957 template <class ELFT> 958 uint64_t LinkerScript<ELFT>::getSymbolValue(const Twine &Loc, StringRef S) { 959 if (SymbolBody *B = Symtab<ELFT>::X->find(S)) 960 return B->getVA<ELFT>(); 961 error(Loc + ": symbol not found: " + S); 962 return 0; 963 } 964 965 template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) { 966 return Symtab<ELFT>::X->find(S) != nullptr; 967 } 968 969 template <class ELFT> bool LinkerScript<ELFT>::isAbsolute(StringRef S) { 970 SymbolBody *Sym = Symtab<ELFT>::X->find(S); 971 auto *DR = dyn_cast_or_null<DefinedRegular>(Sym); 972 return DR && !DR->Section; 973 } 974 975 // Gets section symbol belongs to. Symbol "." doesn't belong to any 976 // specific section but isn't absolute at the same time, so we try 977 // to find suitable section for it as well. 978 template <class ELFT> 979 const OutputSection *LinkerScript<ELFT>::getSymbolSection(StringRef S) { 980 if (SymbolBody *Sym = Symtab<ELFT>::X->find(S)) 981 return Sym->getOutputSection<ELFT>(); 982 return CurOutSec; 983 } 984 985 // Returns indices of ELF headers containing specific section, identified 986 // by Name. Each index is a zero based number of ELF header listed within 987 // PHDRS {} script block. 988 template <class ELFT> 989 std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { 990 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 991 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 992 if (!Cmd || Cmd->Name != SectionName) 993 continue; 994 995 std::vector<size_t> Ret; 996 for (StringRef PhdrName : Cmd->Phdrs) 997 Ret.push_back(getPhdrIndex(Cmd->Location, PhdrName)); 998 return Ret; 999 } 1000 return {}; 1001 } 1002 1003 template <class ELFT> 1004 size_t LinkerScript<ELFT>::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { 1005 size_t I = 0; 1006 for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { 1007 if (Cmd.Name == PhdrName) 1008 return I; 1009 ++I; 1010 } 1011 error(Loc + ": section header '" + PhdrName + "' is not listed in PHDRS"); 1012 return 0; 1013 } 1014 1015 class elf::ScriptParser final : public ScriptLexer { 1016 typedef void (ScriptParser::*Handler)(); 1017 1018 public: 1019 ScriptParser(MemoryBufferRef MB) 1020 : ScriptLexer(MB), 1021 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 1022 1023 void readLinkerScript(); 1024 void readVersionScript(); 1025 void readDynamicList(); 1026 1027 private: 1028 void addFile(StringRef Path); 1029 1030 void readAsNeeded(); 1031 void readEntry(); 1032 void readExtern(); 1033 void readGroup(); 1034 void readInclude(); 1035 void readMemory(); 1036 void readOutput(); 1037 void readOutputArch(); 1038 void readOutputFormat(); 1039 void readPhdrs(); 1040 void readSearchDir(); 1041 void readSections(); 1042 void readVersion(); 1043 void readVersionScriptCommand(); 1044 1045 SymbolAssignment *readAssignment(StringRef Name); 1046 BytesDataCommand *readBytesDataCommand(StringRef Tok); 1047 uint32_t readFill(); 1048 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 1049 uint32_t readOutputSectionFiller(StringRef Tok); 1050 std::vector<StringRef> readOutputSectionPhdrs(); 1051 InputSectionDescription *readInputSectionDescription(StringRef Tok); 1052 StringMatcher readFilePatterns(); 1053 std::vector<SectionPattern> readInputSectionsList(); 1054 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 1055 unsigned readPhdrType(); 1056 SortSectionPolicy readSortKind(); 1057 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 1058 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 1059 void readSort(); 1060 Expr readAssert(); 1061 1062 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 1063 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 1064 1065 Expr readExpr(); 1066 Expr readExpr1(Expr Lhs, int MinPrec); 1067 StringRef readParenLiteral(); 1068 Expr readPrimary(); 1069 Expr readTernary(Expr Cond); 1070 Expr readParenExpr(); 1071 1072 // For parsing version script. 1073 std::vector<SymbolVersion> readVersionExtern(); 1074 void readAnonymousDeclaration(); 1075 void readVersionDeclaration(StringRef VerStr); 1076 std::vector<SymbolVersion> readSymbols(); 1077 void readLocals(); 1078 1079 ScriptConfiguration &Opt = *ScriptConfig; 1080 bool IsUnderSysroot; 1081 }; 1082 1083 void ScriptParser::readDynamicList() { 1084 expect("{"); 1085 readAnonymousDeclaration(); 1086 if (!atEOF()) 1087 setError("EOF expected, but got " + next()); 1088 } 1089 1090 void ScriptParser::readVersionScript() { 1091 readVersionScriptCommand(); 1092 if (!atEOF()) 1093 setError("EOF expected, but got " + next()); 1094 } 1095 1096 void ScriptParser::readVersionScriptCommand() { 1097 if (consume("{")) { 1098 readAnonymousDeclaration(); 1099 return; 1100 } 1101 1102 while (!atEOF() && !Error && peek() != "}") { 1103 StringRef VerStr = next(); 1104 if (VerStr == "{") { 1105 setError("anonymous version definition is used in " 1106 "combination with other version definitions"); 1107 return; 1108 } 1109 expect("{"); 1110 readVersionDeclaration(VerStr); 1111 } 1112 } 1113 1114 void ScriptParser::readVersion() { 1115 expect("{"); 1116 readVersionScriptCommand(); 1117 expect("}"); 1118 } 1119 1120 void ScriptParser::readLinkerScript() { 1121 while (!atEOF()) { 1122 StringRef Tok = next(); 1123 if (Tok == ";") 1124 continue; 1125 1126 if (Tok == "ASSERT") { 1127 Opt.Commands.emplace_back(new AssertCommand(readAssert())); 1128 } else if (Tok == "ENTRY") { 1129 readEntry(); 1130 } else if (Tok == "EXTERN") { 1131 readExtern(); 1132 } else if (Tok == "GROUP" || Tok == "INPUT") { 1133 readGroup(); 1134 } else if (Tok == "INCLUDE") { 1135 readInclude(); 1136 } else if (Tok == "MEMORY") { 1137 readMemory(); 1138 } else if (Tok == "OUTPUT") { 1139 readOutput(); 1140 } else if (Tok == "OUTPUT_ARCH") { 1141 readOutputArch(); 1142 } else if (Tok == "OUTPUT_FORMAT") { 1143 readOutputFormat(); 1144 } else if (Tok == "PHDRS") { 1145 readPhdrs(); 1146 } else if (Tok == "SEARCH_DIR") { 1147 readSearchDir(); 1148 } else if (Tok == "SECTIONS") { 1149 readSections(); 1150 } else if (Tok == "VERSION") { 1151 readVersion(); 1152 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 1153 Opt.Commands.emplace_back(Cmd); 1154 } else { 1155 setError("unknown directive: " + Tok); 1156 } 1157 } 1158 } 1159 1160 void ScriptParser::addFile(StringRef S) { 1161 if (IsUnderSysroot && S.startswith("/")) { 1162 SmallString<128> PathData; 1163 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 1164 if (sys::fs::exists(Path)) { 1165 Driver->addFile(Saver.save(Path)); 1166 return; 1167 } 1168 } 1169 1170 if (sys::path::is_absolute(S)) { 1171 Driver->addFile(S); 1172 } else if (S.startswith("=")) { 1173 if (Config->Sysroot.empty()) 1174 Driver->addFile(S.substr(1)); 1175 else 1176 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); 1177 } else if (S.startswith("-l")) { 1178 Driver->addLibrary(S.substr(2)); 1179 } else if (sys::fs::exists(S)) { 1180 Driver->addFile(S); 1181 } else { 1182 if (Optional<std::string> Path = findFromSearchPaths(S)) 1183 Driver->addFile(Saver.save(*Path)); 1184 else 1185 setError("unable to find " + S); 1186 } 1187 } 1188 1189 void ScriptParser::readAsNeeded() { 1190 expect("("); 1191 bool Orig = Config->AsNeeded; 1192 Config->AsNeeded = true; 1193 while (!Error && !consume(")")) 1194 addFile(unquote(next())); 1195 Config->AsNeeded = Orig; 1196 } 1197 1198 void ScriptParser::readEntry() { 1199 // -e <symbol> takes predecence over ENTRY(<symbol>). 1200 expect("("); 1201 StringRef Tok = next(); 1202 if (Config->Entry.empty()) 1203 Config->Entry = Tok; 1204 expect(")"); 1205 } 1206 1207 void ScriptParser::readExtern() { 1208 expect("("); 1209 while (!Error && !consume(")")) 1210 Config->Undefined.push_back(next()); 1211 } 1212 1213 void ScriptParser::readGroup() { 1214 expect("("); 1215 while (!Error && !consume(")")) { 1216 StringRef Tok = next(); 1217 if (Tok == "AS_NEEDED") 1218 readAsNeeded(); 1219 else 1220 addFile(unquote(Tok)); 1221 } 1222 } 1223 1224 void ScriptParser::readInclude() { 1225 StringRef Tok = unquote(next()); 1226 1227 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 1228 // The file will be searched for in the current directory, and in any 1229 // directory specified with the -L option. 1230 if (sys::fs::exists(Tok)) { 1231 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 1232 tokenize(*MB); 1233 return; 1234 } 1235 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 1236 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 1237 tokenize(*MB); 1238 return; 1239 } 1240 setError("cannot open " + Tok); 1241 } 1242 1243 void ScriptParser::readOutput() { 1244 // -o <file> takes predecence over OUTPUT(<file>). 1245 expect("("); 1246 StringRef Tok = next(); 1247 if (Config->OutputFile.empty()) 1248 Config->OutputFile = unquote(Tok); 1249 expect(")"); 1250 } 1251 1252 void ScriptParser::readOutputArch() { 1253 // OUTPUT_ARCH is ignored for now. 1254 expect("("); 1255 while (!Error && !consume(")")) 1256 skip(); 1257 } 1258 1259 void ScriptParser::readOutputFormat() { 1260 // Error checking only for now. 1261 expect("("); 1262 skip(); 1263 StringRef Tok = next(); 1264 if (Tok == ")") 1265 return; 1266 if (Tok != ",") { 1267 setError("unexpected token: " + Tok); 1268 return; 1269 } 1270 skip(); 1271 expect(","); 1272 skip(); 1273 expect(")"); 1274 } 1275 1276 void ScriptParser::readPhdrs() { 1277 expect("{"); 1278 while (!Error && !consume("}")) { 1279 StringRef Tok = next(); 1280 Opt.PhdrsCommands.push_back( 1281 {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); 1282 PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back(); 1283 1284 PhdrCmd.Type = readPhdrType(); 1285 do { 1286 Tok = next(); 1287 if (Tok == ";") 1288 break; 1289 if (Tok == "FILEHDR") 1290 PhdrCmd.HasFilehdr = true; 1291 else if (Tok == "PHDRS") 1292 PhdrCmd.HasPhdrs = true; 1293 else if (Tok == "AT") 1294 PhdrCmd.LMAExpr = readParenExpr(); 1295 else if (Tok == "FLAGS") { 1296 expect("("); 1297 // Passing 0 for the value of dot is a bit of a hack. It means that 1298 // we accept expressions like ".|1". 1299 PhdrCmd.Flags = readExpr()(0); 1300 expect(")"); 1301 } else 1302 setError("unexpected header attribute: " + Tok); 1303 } while (!Error); 1304 } 1305 } 1306 1307 void ScriptParser::readSearchDir() { 1308 expect("("); 1309 StringRef Tok = next(); 1310 if (!Config->Nostdlib) 1311 Config->SearchPaths.push_back(unquote(Tok)); 1312 expect(")"); 1313 } 1314 1315 void ScriptParser::readSections() { 1316 Opt.HasSections = true; 1317 // -no-rosegment is used to avoid placing read only non-executable sections in 1318 // their own segment. We do the same if SECTIONS command is present in linker 1319 // script. See comment for computeFlags(). 1320 Config->SingleRoRx = true; 1321 1322 expect("{"); 1323 while (!Error && !consume("}")) { 1324 StringRef Tok = next(); 1325 BaseCommand *Cmd = readProvideOrAssignment(Tok); 1326 if (!Cmd) { 1327 if (Tok == "ASSERT") 1328 Cmd = new AssertCommand(readAssert()); 1329 else 1330 Cmd = readOutputSectionDescription(Tok); 1331 } 1332 Opt.Commands.emplace_back(Cmd); 1333 } 1334 } 1335 1336 static int precedence(StringRef Op) { 1337 return StringSwitch<int>(Op) 1338 .Cases("*", "/", 5) 1339 .Cases("+", "-", 4) 1340 .Cases("<<", ">>", 3) 1341 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 1342 .Cases("&", "|", 1) 1343 .Default(-1); 1344 } 1345 1346 StringMatcher ScriptParser::readFilePatterns() { 1347 std::vector<StringRef> V; 1348 while (!Error && !consume(")")) 1349 V.push_back(next()); 1350 return StringMatcher(V); 1351 } 1352 1353 SortSectionPolicy ScriptParser::readSortKind() { 1354 if (consume("SORT") || consume("SORT_BY_NAME")) 1355 return SortSectionPolicy::Name; 1356 if (consume("SORT_BY_ALIGNMENT")) 1357 return SortSectionPolicy::Alignment; 1358 if (consume("SORT_BY_INIT_PRIORITY")) 1359 return SortSectionPolicy::Priority; 1360 if (consume("SORT_NONE")) 1361 return SortSectionPolicy::None; 1362 return SortSectionPolicy::Default; 1363 } 1364 1365 // Method reads a list of sequence of excluded files and section globs given in 1366 // a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ 1367 // Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) 1368 // The semantics of that is next: 1369 // * Include .foo.1 from every file. 1370 // * Include .foo.2 from every file but a.o 1371 // * Include .foo.3 from every file but b.o 1372 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 1373 std::vector<SectionPattern> Ret; 1374 while (!Error && peek() != ")") { 1375 StringMatcher ExcludeFilePat; 1376 if (consume("EXCLUDE_FILE")) { 1377 expect("("); 1378 ExcludeFilePat = readFilePatterns(); 1379 } 1380 1381 std::vector<StringRef> V; 1382 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 1383 V.push_back(next()); 1384 1385 if (!V.empty()) 1386 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 1387 else 1388 setError("section pattern is expected"); 1389 } 1390 return Ret; 1391 } 1392 1393 // Reads contents of "SECTIONS" directive. That directive contains a 1394 // list of glob patterns for input sections. The grammar is as follows. 1395 // 1396 // <patterns> ::= <section-list> 1397 // | <sort> "(" <section-list> ")" 1398 // | <sort> "(" <sort> "(" <section-list> ")" ")" 1399 // 1400 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 1401 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 1402 // 1403 // <section-list> is parsed by readInputSectionsList(). 1404 InputSectionDescription * 1405 ScriptParser::readInputSectionRules(StringRef FilePattern) { 1406 auto *Cmd = new InputSectionDescription(FilePattern); 1407 expect("("); 1408 while (!Error && !consume(")")) { 1409 SortSectionPolicy Outer = readSortKind(); 1410 SortSectionPolicy Inner = SortSectionPolicy::Default; 1411 std::vector<SectionPattern> V; 1412 if (Outer != SortSectionPolicy::Default) { 1413 expect("("); 1414 Inner = readSortKind(); 1415 if (Inner != SortSectionPolicy::Default) { 1416 expect("("); 1417 V = readInputSectionsList(); 1418 expect(")"); 1419 } else { 1420 V = readInputSectionsList(); 1421 } 1422 expect(")"); 1423 } else { 1424 V = readInputSectionsList(); 1425 } 1426 1427 for (SectionPattern &Pat : V) { 1428 Pat.SortInner = Inner; 1429 Pat.SortOuter = Outer; 1430 } 1431 1432 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 1433 } 1434 return Cmd; 1435 } 1436 1437 InputSectionDescription * 1438 ScriptParser::readInputSectionDescription(StringRef Tok) { 1439 // Input section wildcard can be surrounded by KEEP. 1440 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 1441 if (Tok == "KEEP") { 1442 expect("("); 1443 StringRef FilePattern = next(); 1444 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 1445 expect(")"); 1446 Opt.KeptSections.push_back(Cmd); 1447 return Cmd; 1448 } 1449 return readInputSectionRules(Tok); 1450 } 1451 1452 void ScriptParser::readSort() { 1453 expect("("); 1454 expect("CONSTRUCTORS"); 1455 expect(")"); 1456 } 1457 1458 Expr ScriptParser::readAssert() { 1459 expect("("); 1460 Expr E = readExpr(); 1461 expect(","); 1462 StringRef Msg = unquote(next()); 1463 expect(")"); 1464 return [=](uint64_t Dot) { 1465 if (!E(Dot)) 1466 error(Msg); 1467 return Dot; 1468 }; 1469 } 1470 1471 // Reads a FILL(expr) command. We handle the FILL command as an 1472 // alias for =fillexp section attribute, which is different from 1473 // what GNU linkers do. 1474 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 1475 uint32_t ScriptParser::readFill() { 1476 expect("("); 1477 uint32_t V = readOutputSectionFiller(next()); 1478 expect(")"); 1479 expect(";"); 1480 return V; 1481 } 1482 1483 OutputSectionCommand * 1484 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 1485 OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); 1486 Cmd->Location = getCurrentLocation(); 1487 1488 // Read an address expression. 1489 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address 1490 if (peek() != ":") 1491 Cmd->AddrExpr = readExpr(); 1492 1493 expect(":"); 1494 1495 if (consume("AT")) 1496 Cmd->LMAExpr = readParenExpr(); 1497 if (consume("ALIGN")) 1498 Cmd->AlignExpr = readParenExpr(); 1499 if (consume("SUBALIGN")) 1500 Cmd->SubalignExpr = readParenExpr(); 1501 1502 // Parse constraints. 1503 if (consume("ONLY_IF_RO")) 1504 Cmd->Constraint = ConstraintKind::ReadOnly; 1505 if (consume("ONLY_IF_RW")) 1506 Cmd->Constraint = ConstraintKind::ReadWrite; 1507 expect("{"); 1508 1509 while (!Error && !consume("}")) { 1510 StringRef Tok = next(); 1511 if (Tok == ";") { 1512 // Empty commands are allowed. Do nothing here. 1513 } else if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok)) { 1514 Cmd->Commands.emplace_back(Assignment); 1515 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 1516 Cmd->Commands.emplace_back(Data); 1517 } else if (Tok == "ASSERT") { 1518 Cmd->Commands.emplace_back(new AssertCommand(readAssert())); 1519 expect(";"); 1520 } else if (Tok == "CONSTRUCTORS") { 1521 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 1522 // by name. This is for very old file formats such as ECOFF/XCOFF. 1523 // For ELF, we should ignore. 1524 } else if (Tok == "FILL") { 1525 Cmd->Filler = readFill(); 1526 } else if (Tok == "SORT") { 1527 readSort(); 1528 } else if (peek() == "(") { 1529 Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); 1530 } else { 1531 setError("unknown command " + Tok); 1532 } 1533 } 1534 1535 if (consume(">")) 1536 Cmd->MemoryRegionName = next(); 1537 1538 Cmd->Phdrs = readOutputSectionPhdrs(); 1539 1540 if (consume("=")) 1541 Cmd->Filler = readOutputSectionFiller(next()); 1542 else if (peek().startswith("=")) 1543 Cmd->Filler = readOutputSectionFiller(next().drop_front()); 1544 1545 // Consume optional comma following output section command. 1546 consume(","); 1547 1548 return Cmd; 1549 } 1550 1551 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number. 1552 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 1553 // 1554 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles 1555 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them 1556 // as 32-bit big-endian values. We will do the same as ld.gold does 1557 // because it's simpler than what ld.bfd does. 1558 uint32_t ScriptParser::readOutputSectionFiller(StringRef Tok) { 1559 uint32_t V; 1560 if (!Tok.getAsInteger(0, V)) 1561 return V; 1562 setError("invalid filler expression: " + Tok); 1563 return 0; 1564 } 1565 1566 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 1567 expect("("); 1568 SymbolAssignment *Cmd = readAssignment(next()); 1569 Cmd->Provide = Provide; 1570 Cmd->Hidden = Hidden; 1571 expect(")"); 1572 expect(";"); 1573 return Cmd; 1574 } 1575 1576 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 1577 SymbolAssignment *Cmd = nullptr; 1578 if (peek() == "=" || peek() == "+=") { 1579 Cmd = readAssignment(Tok); 1580 expect(";"); 1581 } else if (Tok == "PROVIDE") { 1582 Cmd = readProvideHidden(true, false); 1583 } else if (Tok == "HIDDEN") { 1584 Cmd = readProvideHidden(false, true); 1585 } else if (Tok == "PROVIDE_HIDDEN") { 1586 Cmd = readProvideHidden(true, true); 1587 } 1588 return Cmd; 1589 } 1590 1591 static uint64_t getSymbolValue(const Twine &Loc, StringRef S, uint64_t Dot) { 1592 if (S == ".") 1593 return Dot; 1594 return ScriptBase->getSymbolValue(Loc, S); 1595 } 1596 1597 static bool isAbsolute(StringRef S) { 1598 if (S == ".") 1599 return false; 1600 return ScriptBase->isAbsolute(S); 1601 } 1602 1603 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 1604 StringRef Op = next(); 1605 Expr E; 1606 assert(Op == "=" || Op == "+="); 1607 if (consume("ABSOLUTE")) { 1608 E = readExpr(); 1609 E.IsAbsolute = [] { return true; }; 1610 } else { 1611 E = readExpr(); 1612 } 1613 if (Op == "+=") { 1614 std::string Loc = getCurrentLocation(); 1615 E = [=](uint64_t Dot) { 1616 return getSymbolValue(Loc, Name, Dot) + E(Dot); 1617 }; 1618 } 1619 return new SymbolAssignment(Name, E, getCurrentLocation()); 1620 } 1621 1622 // This is an operator-precedence parser to parse a linker 1623 // script expression. 1624 Expr ScriptParser::readExpr() { 1625 // Our lexer is context-aware. Set the in-expression bit so that 1626 // they apply different tokenization rules. 1627 bool Orig = InExpr; 1628 InExpr = true; 1629 Expr E = readExpr1(readPrimary(), 0); 1630 InExpr = Orig; 1631 return E; 1632 } 1633 1634 static Expr combine(StringRef Op, Expr L, Expr R) { 1635 auto IsAbs = [=] { return L.IsAbsolute() && R.IsAbsolute(); }; 1636 auto GetOutSec = [=] { 1637 const OutputSection *S = L.Section(); 1638 return S ? S : R.Section(); 1639 }; 1640 1641 if (Op == "*") 1642 return [=](uint64_t Dot) { return L(Dot) * R(Dot); }; 1643 if (Op == "/") { 1644 return [=](uint64_t Dot) -> uint64_t { 1645 uint64_t RHS = R(Dot); 1646 if (RHS == 0) { 1647 error("division by zero"); 1648 return 0; 1649 } 1650 return L(Dot) / RHS; 1651 }; 1652 } 1653 if (Op == "+") 1654 return {[=](uint64_t Dot) { return L(Dot) + R(Dot); }, IsAbs, GetOutSec}; 1655 if (Op == "-") 1656 return {[=](uint64_t Dot) { return L(Dot) - R(Dot); }, IsAbs, GetOutSec}; 1657 if (Op == "<<") 1658 return [=](uint64_t Dot) { return L(Dot) << R(Dot); }; 1659 if (Op == ">>") 1660 return [=](uint64_t Dot) { return L(Dot) >> R(Dot); }; 1661 if (Op == "<") 1662 return [=](uint64_t Dot) { return L(Dot) < R(Dot); }; 1663 if (Op == ">") 1664 return [=](uint64_t Dot) { return L(Dot) > R(Dot); }; 1665 if (Op == ">=") 1666 return [=](uint64_t Dot) { return L(Dot) >= R(Dot); }; 1667 if (Op == "<=") 1668 return [=](uint64_t Dot) { return L(Dot) <= R(Dot); }; 1669 if (Op == "==") 1670 return [=](uint64_t Dot) { return L(Dot) == R(Dot); }; 1671 if (Op == "!=") 1672 return [=](uint64_t Dot) { return L(Dot) != R(Dot); }; 1673 if (Op == "&") 1674 return [=](uint64_t Dot) { return L(Dot) & R(Dot); }; 1675 if (Op == "|") 1676 return [=](uint64_t Dot) { return L(Dot) | R(Dot); }; 1677 llvm_unreachable("invalid operator"); 1678 } 1679 1680 // This is a part of the operator-precedence parser. This function 1681 // assumes that the remaining token stream starts with an operator. 1682 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 1683 while (!atEOF() && !Error) { 1684 // Read an operator and an expression. 1685 if (consume("?")) 1686 return readTernary(Lhs); 1687 StringRef Op1 = peek(); 1688 if (precedence(Op1) < MinPrec) 1689 break; 1690 skip(); 1691 Expr Rhs = readPrimary(); 1692 1693 // Evaluate the remaining part of the expression first if the 1694 // next operator has greater precedence than the previous one. 1695 // For example, if we have read "+" and "3", and if the next 1696 // operator is "*", then we'll evaluate 3 * ... part first. 1697 while (!atEOF()) { 1698 StringRef Op2 = peek(); 1699 if (precedence(Op2) <= precedence(Op1)) 1700 break; 1701 Rhs = readExpr1(Rhs, precedence(Op2)); 1702 } 1703 1704 Lhs = combine(Op1, Lhs, Rhs); 1705 } 1706 return Lhs; 1707 } 1708 1709 uint64_t static getConstant(StringRef S) { 1710 if (S == "COMMONPAGESIZE") 1711 return Target->PageSize; 1712 if (S == "MAXPAGESIZE") 1713 return Config->MaxPageSize; 1714 error("unknown constant: " + S); 1715 return 0; 1716 } 1717 1718 // Parses Tok as an integer. Returns true if successful. 1719 // It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") 1720 // and decimal numbers. Decimal numbers may have "K" (kilo) or 1721 // "M" (mega) prefixes. 1722 static bool readInteger(StringRef Tok, uint64_t &Result) { 1723 // Negative number 1724 if (Tok.startswith("-")) { 1725 if (!readInteger(Tok.substr(1), Result)) 1726 return false; 1727 Result = -Result; 1728 return true; 1729 } 1730 1731 // Hexadecimal 1732 if (Tok.startswith_lower("0x")) 1733 return !Tok.substr(2).getAsInteger(16, Result); 1734 if (Tok.endswith_lower("H")) 1735 return !Tok.drop_back().getAsInteger(16, Result); 1736 1737 // Decimal 1738 int Suffix = 1; 1739 if (Tok.endswith_lower("K")) { 1740 Suffix = 1024; 1741 Tok = Tok.drop_back(); 1742 } else if (Tok.endswith_lower("M")) { 1743 Suffix = 1024 * 1024; 1744 Tok = Tok.drop_back(); 1745 } 1746 if (Tok.getAsInteger(10, Result)) 1747 return false; 1748 Result *= Suffix; 1749 return true; 1750 } 1751 1752 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 1753 int Size = StringSwitch<unsigned>(Tok) 1754 .Case("BYTE", 1) 1755 .Case("SHORT", 2) 1756 .Case("LONG", 4) 1757 .Case("QUAD", 8) 1758 .Default(-1); 1759 if (Size == -1) 1760 return nullptr; 1761 1762 return new BytesDataCommand(readParenExpr(), Size); 1763 } 1764 1765 StringRef ScriptParser::readParenLiteral() { 1766 expect("("); 1767 StringRef Tok = next(); 1768 expect(")"); 1769 return Tok; 1770 } 1771 1772 Expr ScriptParser::readPrimary() { 1773 if (peek() == "(") 1774 return readParenExpr(); 1775 1776 StringRef Tok = next(); 1777 std::string Location = getCurrentLocation(); 1778 1779 if (Tok == "~") { 1780 Expr E = readPrimary(); 1781 return [=](uint64_t Dot) { return ~E(Dot); }; 1782 } 1783 if (Tok == "-") { 1784 Expr E = readPrimary(); 1785 return [=](uint64_t Dot) { return -E(Dot); }; 1786 } 1787 1788 // Built-in functions are parsed here. 1789 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 1790 if (Tok == "ADDR") { 1791 StringRef Name = readParenLiteral(); 1792 return {[=](uint64_t Dot) { 1793 return ScriptBase->getOutputSection(Location, Name)->Addr; 1794 }, 1795 [=] { return false; }, 1796 [=] { return ScriptBase->getOutputSection(Location, Name); }}; 1797 } 1798 if (Tok == "LOADADDR") { 1799 StringRef Name = readParenLiteral(); 1800 return [=](uint64_t Dot) { 1801 return ScriptBase->getOutputSection(Location, Name)->getLMA(); 1802 }; 1803 } 1804 if (Tok == "ASSERT") 1805 return readAssert(); 1806 if (Tok == "ALIGN") { 1807 expect("("); 1808 Expr E = readExpr(); 1809 if (consume(",")) { 1810 Expr E2 = readExpr(); 1811 expect(")"); 1812 return [=](uint64_t Dot) { return alignTo(E(Dot), E2(Dot)); }; 1813 } 1814 expect(")"); 1815 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1816 } 1817 if (Tok == "CONSTANT") { 1818 StringRef Name = readParenLiteral(); 1819 return [=](uint64_t Dot) { return getConstant(Name); }; 1820 } 1821 if (Tok == "DEFINED") { 1822 StringRef Name = readParenLiteral(); 1823 return [=](uint64_t Dot) { return ScriptBase->isDefined(Name) ? 1 : 0; }; 1824 } 1825 if (Tok == "SEGMENT_START") { 1826 expect("("); 1827 skip(); 1828 expect(","); 1829 Expr E = readExpr(); 1830 expect(")"); 1831 return [=](uint64_t Dot) { return E(Dot); }; 1832 } 1833 if (Tok == "DATA_SEGMENT_ALIGN") { 1834 expect("("); 1835 Expr E = readExpr(); 1836 expect(","); 1837 readExpr(); 1838 expect(")"); 1839 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1840 } 1841 if (Tok == "DATA_SEGMENT_END") { 1842 expect("("); 1843 expect("."); 1844 expect(")"); 1845 return [](uint64_t Dot) { return Dot; }; 1846 } 1847 // GNU linkers implements more complicated logic to handle 1848 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to 1849 // the next page boundary for simplicity. 1850 if (Tok == "DATA_SEGMENT_RELRO_END") { 1851 expect("("); 1852 readExpr(); 1853 expect(","); 1854 readExpr(); 1855 expect(")"); 1856 return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); }; 1857 } 1858 if (Tok == "SIZEOF") { 1859 StringRef Name = readParenLiteral(); 1860 return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); }; 1861 } 1862 if (Tok == "ALIGNOF") { 1863 StringRef Name = readParenLiteral(); 1864 return [=](uint64_t Dot) { 1865 return ScriptBase->getOutputSection(Location, Name)->Addralign; 1866 }; 1867 } 1868 if (Tok == "SIZEOF_HEADERS") 1869 return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); }; 1870 1871 // Tok is a literal number. 1872 uint64_t V; 1873 if (readInteger(Tok, V)) 1874 return [=](uint64_t Dot) { return V; }; 1875 1876 // Tok is a symbol name. 1877 if (Tok != "." && !isValidCIdentifier(Tok)) 1878 setError("malformed number: " + Tok); 1879 return {[=](uint64_t Dot) { return getSymbolValue(Location, Tok, Dot); }, 1880 [=] { return isAbsolute(Tok); }, 1881 [=] { return ScriptBase->getSymbolSection(Tok); }}; 1882 } 1883 1884 Expr ScriptParser::readTernary(Expr Cond) { 1885 Expr L = readExpr(); 1886 expect(":"); 1887 Expr R = readExpr(); 1888 return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); }; 1889 } 1890 1891 Expr ScriptParser::readParenExpr() { 1892 expect("("); 1893 Expr E = readExpr(); 1894 expect(")"); 1895 return E; 1896 } 1897 1898 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1899 std::vector<StringRef> Phdrs; 1900 while (!Error && peek().startswith(":")) { 1901 StringRef Tok = next(); 1902 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 1903 } 1904 return Phdrs; 1905 } 1906 1907 // Read a program header type name. The next token must be a 1908 // name of a program header type or a constant (e.g. "0x3"). 1909 unsigned ScriptParser::readPhdrType() { 1910 StringRef Tok = next(); 1911 uint64_t Val; 1912 if (readInteger(Tok, Val)) 1913 return Val; 1914 1915 unsigned Ret = StringSwitch<unsigned>(Tok) 1916 .Case("PT_NULL", PT_NULL) 1917 .Case("PT_LOAD", PT_LOAD) 1918 .Case("PT_DYNAMIC", PT_DYNAMIC) 1919 .Case("PT_INTERP", PT_INTERP) 1920 .Case("PT_NOTE", PT_NOTE) 1921 .Case("PT_SHLIB", PT_SHLIB) 1922 .Case("PT_PHDR", PT_PHDR) 1923 .Case("PT_TLS", PT_TLS) 1924 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1925 .Case("PT_GNU_STACK", PT_GNU_STACK) 1926 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1927 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1928 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1929 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1930 .Default(-1); 1931 1932 if (Ret == (unsigned)-1) { 1933 setError("invalid program header type: " + Tok); 1934 return PT_NULL; 1935 } 1936 return Ret; 1937 } 1938 1939 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1940 void ScriptParser::readAnonymousDeclaration() { 1941 // Read global symbols first. "global:" is default, so if there's 1942 // no label, we assume global symbols. 1943 if (peek() != "local") { 1944 if (consume("global")) 1945 expect(":"); 1946 for (SymbolVersion V : readSymbols()) 1947 Config->VersionScriptGlobals.push_back(V); 1948 } 1949 readLocals(); 1950 expect("}"); 1951 expect(";"); 1952 } 1953 1954 void ScriptParser::readLocals() { 1955 if (!consume("local")) 1956 return; 1957 expect(":"); 1958 std::vector<SymbolVersion> Locals = readSymbols(); 1959 for (SymbolVersion V : Locals) { 1960 if (V.Name == "*") { 1961 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1962 continue; 1963 } 1964 Config->VersionScriptLocals.push_back(V); 1965 } 1966 } 1967 1968 // Reads a list of symbols, e.g. "VerStr { global: foo; bar; local: *; };". 1969 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1970 // Identifiers start at 2 because 0 and 1 are reserved 1971 // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants. 1972 uint16_t VersionId = Config->VersionDefinitions.size() + 2; 1973 Config->VersionDefinitions.push_back({VerStr, VersionId}); 1974 1975 // Read global symbols. 1976 if (peek() != "local") { 1977 if (consume("global")) 1978 expect(":"); 1979 Config->VersionDefinitions.back().Globals = readSymbols(); 1980 } 1981 readLocals(); 1982 expect("}"); 1983 1984 // Each version may have a parent version. For example, "Ver2" 1985 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1986 // as a parent. This version hierarchy is, probably against your 1987 // instinct, purely for hint; the runtime doesn't care about it 1988 // at all. In LLD, we simply ignore it. 1989 if (peek() != ";") 1990 skip(); 1991 expect(";"); 1992 } 1993 1994 // Reads a list of symbols for a versions cript. 1995 std::vector<SymbolVersion> ScriptParser::readSymbols() { 1996 std::vector<SymbolVersion> Ret; 1997 for (;;) { 1998 if (consume("extern")) { 1999 for (SymbolVersion V : readVersionExtern()) 2000 Ret.push_back(V); 2001 continue; 2002 } 2003 2004 if (peek() == "}" || (peek() == "local" && peek(1) == ":") || Error) 2005 break; 2006 StringRef Tok = next(); 2007 Ret.push_back({unquote(Tok), false, hasWildcard(Tok)}); 2008 expect(";"); 2009 } 2010 return Ret; 2011 } 2012 2013 // Reads an "extern C++" directive, e.g., 2014 // "extern "C++" { ns::*; "f(int, double)"; };" 2015 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 2016 StringRef Tok = next(); 2017 bool IsCXX = Tok == "\"C++\""; 2018 if (!IsCXX && Tok != "\"C\"") 2019 setError("Unknown language"); 2020 expect("{"); 2021 2022 std::vector<SymbolVersion> Ret; 2023 while (!Error && peek() != "}") { 2024 StringRef Tok = next(); 2025 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 2026 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 2027 expect(";"); 2028 } 2029 2030 expect("}"); 2031 expect(";"); 2032 return Ret; 2033 } 2034 2035 uint64_t ScriptParser::readMemoryAssignment( 2036 StringRef S1, StringRef S2, StringRef S3) { 2037 if (!(consume(S1) || consume(S2) || consume(S3))) { 2038 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 2039 return 0; 2040 } 2041 expect("="); 2042 2043 // TODO: Fully support constant expressions. 2044 uint64_t Val; 2045 if (!readInteger(next(), Val)) 2046 setError("nonconstant expression for "+ S1); 2047 return Val; 2048 } 2049 2050 // Parse the MEMORY command as specified in: 2051 // https://sourceware.org/binutils/docs/ld/MEMORY.html 2052 // 2053 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 2054 void ScriptParser::readMemory() { 2055 expect("{"); 2056 while (!Error && !consume("}")) { 2057 StringRef Name = next(); 2058 2059 uint32_t Flags = 0; 2060 uint32_t NegFlags = 0; 2061 if (consume("(")) { 2062 std::tie(Flags, NegFlags) = readMemoryAttributes(); 2063 expect(")"); 2064 } 2065 expect(":"); 2066 2067 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 2068 expect(","); 2069 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 2070 2071 // Add the memory region to the region map (if it doesn't already exist). 2072 auto It = Opt.MemoryRegions.find(Name); 2073 if (It != Opt.MemoryRegions.end()) 2074 setError("region '" + Name + "' already defined"); 2075 else 2076 Opt.MemoryRegions[Name] = {Name, Origin, Length, Origin, Flags, NegFlags}; 2077 } 2078 } 2079 2080 // This function parses the attributes used to match against section 2081 // flags when placing output sections in a memory region. These flags 2082 // are only used when an explicit memory region name is not used. 2083 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 2084 uint32_t Flags = 0; 2085 uint32_t NegFlags = 0; 2086 bool Invert = false; 2087 2088 for (char C : next().lower()) { 2089 uint32_t Flag = 0; 2090 if (C == '!') 2091 Invert = !Invert; 2092 else if (C == 'w') 2093 Flag = SHF_WRITE; 2094 else if (C == 'x') 2095 Flag = SHF_EXECINSTR; 2096 else if (C == 'a') 2097 Flag = SHF_ALLOC; 2098 else if (C != 'r') 2099 setError("invalid memory region attribute"); 2100 2101 if (Invert) 2102 NegFlags |= Flag; 2103 else 2104 Flags |= Flag; 2105 } 2106 return {Flags, NegFlags}; 2107 } 2108 2109 void elf::readLinkerScript(MemoryBufferRef MB) { 2110 ScriptParser(MB).readLinkerScript(); 2111 } 2112 2113 void elf::readVersionScript(MemoryBufferRef MB) { 2114 ScriptParser(MB).readVersionScript(); 2115 } 2116 2117 void elf::readDynamicList(MemoryBufferRef MB) { 2118 ScriptParser(MB).readDynamicList(); 2119 } 2120 2121 template class elf::LinkerScript<ELF32LE>; 2122 template class elf::LinkerScript<ELF32BE>; 2123 template class elf::LinkerScript<ELF64LE>; 2124 template class elf::LinkerScript<ELF64BE>; 2125