1 //===- LinkerScript.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the parser/evaluator of the linker script. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LinkerScript.h" 15 #include "Config.h" 16 #include "Driver.h" 17 #include "InputSection.h" 18 #include "Memory.h" 19 #include "OutputSections.h" 20 #include "ScriptLexer.h" 21 #include "Strings.h" 22 #include "SymbolTable.h" 23 #include "Symbols.h" 24 #include "SyntheticSections.h" 25 #include "Target.h" 26 #include "Writer.h" 27 #include "llvm/ADT/STLExtras.h" 28 #include "llvm/ADT/SmallString.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/ADT/StringSwitch.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/ELF.h" 33 #include "llvm/Support/Endian.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/FileSystem.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/Path.h" 38 #include <algorithm> 39 #include <cassert> 40 #include <cstddef> 41 #include <cstdint> 42 #include <iterator> 43 #include <limits> 44 #include <memory> 45 #include <string> 46 #include <tuple> 47 #include <vector> 48 49 using namespace llvm; 50 using namespace llvm::ELF; 51 using namespace llvm::object; 52 using namespace llvm::support::endian; 53 using namespace lld; 54 using namespace lld::elf; 55 56 LinkerScriptBase *elf::ScriptBase; 57 ScriptConfiguration *elf::ScriptConfig; 58 59 template <class ELFT> static SymbolBody *addRegular(SymbolAssignment *Cmd) { 60 uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 61 Symbol *Sym = Symtab<ELFT>::X->addUndefined( 62 Cmd->Name, /*IsLocal=*/false, STB_GLOBAL, Visibility, 63 /*Type*/ 0, 64 /*CanOmitFromDynSym*/ false, /*File*/ nullptr); 65 66 replaceBody<DefinedRegular<ELFT>>(Sym, Cmd->Name, /*IsLocal=*/false, 67 Visibility, STT_NOTYPE, 0, 0, nullptr, 68 nullptr); 69 return Sym->body(); 70 } 71 72 template <class ELFT> static SymbolBody *addSynthetic(SymbolAssignment *Cmd) { 73 uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 74 const OutputSectionBase *Sec = 75 ScriptConfig->HasSections ? nullptr : Cmd->Expression.Section(); 76 Symbol *Sym = Symtab<ELFT>::X->addUndefined( 77 Cmd->Name, /*IsLocal=*/false, STB_GLOBAL, Visibility, 78 /*Type*/ 0, 79 /*CanOmitFromDynSym*/ false, /*File*/ nullptr); 80 81 replaceBody<DefinedSynthetic>(Sym, Cmd->Name, 0, Sec); 82 return Sym->body(); 83 } 84 85 static bool isUnderSysroot(StringRef Path) { 86 if (Config->Sysroot == "") 87 return false; 88 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 89 if (sys::fs::equivalent(Config->Sysroot, Path)) 90 return true; 91 return false; 92 } 93 94 template <class ELFT> void LinkerScript<ELFT>::setDot(Expr E, bool InSec) { 95 uintX_t Val = E(Dot); 96 if (Val < Dot) { 97 if (InSec) 98 error("unable to move location counter backward for: " + CurOutSec->Name); 99 else 100 error("unable to move location counter backward"); 101 } 102 Dot = Val; 103 // Update to location counter means update to section size. 104 if (InSec) 105 CurOutSec->Size = Dot - CurOutSec->Addr; 106 } 107 108 // Sets value of a symbol. Two kinds of symbols are processed: synthetic 109 // symbols, whose value is an offset from beginning of section and regular 110 // symbols whose value is absolute. 111 template <class ELFT> 112 void LinkerScript<ELFT>::assignSymbol(SymbolAssignment *Cmd, bool InSec) { 113 if (Cmd->Name == ".") { 114 setDot(Cmd->Expression, InSec); 115 return; 116 } 117 118 if (!Cmd->Sym) 119 return; 120 121 if (auto *Body = dyn_cast<DefinedSynthetic>(Cmd->Sym)) { 122 Body->Section = Cmd->Expression.Section(); 123 if (Body->Section) { 124 uint64_t VA = 0; 125 if (Body->Section->Flags & SHF_ALLOC) 126 VA = Body->Section->Addr; 127 Body->Value = Cmd->Expression(Dot) - VA; 128 } 129 return; 130 } 131 132 cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(Dot); 133 } 134 135 template <class ELFT> 136 void LinkerScript<ELFT>::addSymbol(SymbolAssignment *Cmd) { 137 if (Cmd->Name == ".") 138 return; 139 140 // If a symbol was in PROVIDE(), we need to define it only when 141 // it is a referenced undefined symbol. 142 SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name); 143 if (Cmd->Provide && (!B || B->isDefined())) 144 return; 145 146 // Otherwise, create a new symbol if one does not exist or an 147 // undefined one does exist. 148 if (Cmd->Expression.IsAbsolute()) 149 Cmd->Sym = addRegular<ELFT>(Cmd); 150 else 151 Cmd->Sym = addSynthetic<ELFT>(Cmd); 152 153 // If there are sections, then let the value be assigned later in 154 // `assignAddresses`. 155 if (!ScriptConfig->HasSections) 156 assignSymbol(Cmd); 157 } 158 159 bool SymbolAssignment::classof(const BaseCommand *C) { 160 return C->Kind == AssignmentKind; 161 } 162 163 bool OutputSectionCommand::classof(const BaseCommand *C) { 164 return C->Kind == OutputSectionKind; 165 } 166 167 bool InputSectionDescription::classof(const BaseCommand *C) { 168 return C->Kind == InputSectionKind; 169 } 170 171 bool AssertCommand::classof(const BaseCommand *C) { 172 return C->Kind == AssertKind; 173 } 174 175 bool BytesDataCommand::classof(const BaseCommand *C) { 176 return C->Kind == BytesDataKind; 177 } 178 179 template <class ELFT> LinkerScript<ELFT>::LinkerScript() = default; 180 template <class ELFT> LinkerScript<ELFT>::~LinkerScript() = default; 181 182 template <class ELFT> static StringRef basename(InputSectionBase<ELFT> *S) { 183 if (S->getFile()) 184 return sys::path::filename(S->getFile()->getName()); 185 return ""; 186 } 187 188 template <class ELFT> 189 bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { 190 for (InputSectionDescription *ID : Opt.KeptSections) 191 if (ID->FilePat.match(basename(S))) 192 for (SectionPattern &P : ID->SectionPatterns) 193 if (P.SectionPat.match(S->Name)) 194 return true; 195 return false; 196 } 197 198 static bool comparePriority(InputSectionData *A, InputSectionData *B) { 199 return getPriority(A->Name) < getPriority(B->Name); 200 } 201 202 static bool compareName(InputSectionData *A, InputSectionData *B) { 203 return A->Name < B->Name; 204 } 205 206 static bool compareAlignment(InputSectionData *A, InputSectionData *B) { 207 // ">" is not a mistake. Larger alignments are placed before smaller 208 // alignments in order to reduce the amount of padding necessary. 209 // This is compatible with GNU. 210 return A->Alignment > B->Alignment; 211 } 212 213 static std::function<bool(InputSectionData *, InputSectionData *)> 214 getComparator(SortSectionPolicy K) { 215 switch (K) { 216 case SortSectionPolicy::Alignment: 217 return compareAlignment; 218 case SortSectionPolicy::Name: 219 return compareName; 220 case SortSectionPolicy::Priority: 221 return comparePriority; 222 default: 223 llvm_unreachable("unknown sort policy"); 224 } 225 } 226 227 template <class ELFT> 228 static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections, 229 ConstraintKind Kind) { 230 if (Kind == ConstraintKind::NoConstraint) 231 return true; 232 bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) { 233 auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2); 234 return Sec->Flags & SHF_WRITE; 235 }); 236 return (IsRW && Kind == ConstraintKind::ReadWrite) || 237 (!IsRW && Kind == ConstraintKind::ReadOnly); 238 } 239 240 static void sortSections(InputSectionData **Begin, InputSectionData **End, 241 SortSectionPolicy K) { 242 if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) 243 std::stable_sort(Begin, End, getComparator(K)); 244 } 245 246 // Compute and remember which sections the InputSectionDescription matches. 247 template <class ELFT> 248 void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { 249 // Collects all sections that satisfy constraints of I 250 // and attach them to I. 251 for (SectionPattern &Pat : I->SectionPatterns) { 252 size_t SizeBefore = I->Sections.size(); 253 254 for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) { 255 if (S->Assigned) 256 continue; 257 // For -emit-relocs we have to ignore entries like 258 // .rela.dyn : { *(.rela.data) } 259 // which are common because they are in the default bfd script. 260 if (S->Type == SHT_REL || S->Type == SHT_RELA) 261 continue; 262 263 StringRef Filename = basename(S); 264 if (!I->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename)) 265 continue; 266 if (!Pat.SectionPat.match(S->Name)) 267 continue; 268 I->Sections.push_back(S); 269 S->Assigned = true; 270 } 271 272 // Sort sections as instructed by SORT-family commands and --sort-section 273 // option. Because SORT-family commands can be nested at most two depth 274 // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command 275 // line option is respected even if a SORT command is given, the exact 276 // behavior we have here is a bit complicated. Here are the rules. 277 // 278 // 1. If two SORT commands are given, --sort-section is ignored. 279 // 2. If one SORT command is given, and if it is not SORT_NONE, 280 // --sort-section is handled as an inner SORT command. 281 // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. 282 // 4. If no SORT command is given, sort according to --sort-section. 283 InputSectionData **Begin = I->Sections.data() + SizeBefore; 284 InputSectionData **End = I->Sections.data() + I->Sections.size(); 285 if (Pat.SortOuter != SortSectionPolicy::None) { 286 if (Pat.SortInner == SortSectionPolicy::Default) 287 sortSections(Begin, End, Config->SortSection); 288 else 289 sortSections(Begin, End, Pat.SortInner); 290 sortSections(Begin, End, Pat.SortOuter); 291 } 292 } 293 } 294 295 template <class ELFT> 296 void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) { 297 for (InputSectionBase<ELFT> *S : V) { 298 S->Live = false; 299 if (S == In<ELFT>::ShStrTab) 300 error("discarding .shstrtab section is not allowed"); 301 discard(S->DependentSections); 302 } 303 } 304 305 template <class ELFT> 306 std::vector<InputSectionBase<ELFT> *> 307 LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) { 308 std::vector<InputSectionBase<ELFT> *> Ret; 309 310 for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { 311 auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); 312 if (!Cmd) 313 continue; 314 computeInputSections(Cmd); 315 for (InputSectionData *S : Cmd->Sections) 316 Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S)); 317 } 318 319 return Ret; 320 } 321 322 template <class ELFT> 323 void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) { 324 for (unsigned I = 0; I < Opt.Commands.size(); ++I) { 325 auto Iter = Opt.Commands.begin() + I; 326 const std::unique_ptr<BaseCommand> &Base1 = *Iter; 327 328 // Handle symbol assignments outside of any output section. 329 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { 330 addSymbol(Cmd); 331 continue; 332 } 333 334 if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) { 335 // If we don't have SECTIONS then output sections have already been 336 // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses 337 // will not be called, so ASSERT should be evaluated now. 338 if (!Opt.HasSections) 339 Cmd->Expression(0); 340 continue; 341 } 342 343 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { 344 std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd); 345 346 // The output section name `/DISCARD/' is special. 347 // Any input section assigned to it is discarded. 348 if (Cmd->Name == "/DISCARD/") { 349 discard(V); 350 continue; 351 } 352 353 // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive 354 // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input 355 // sections satisfy a given constraint. If not, a directive is handled 356 // as if it wasn't present from the beginning. 357 // 358 // Because we'll iterate over Commands many more times, the easiest 359 // way to "make it as if it wasn't present" is to just remove it. 360 if (!matchConstraints<ELFT>(V, Cmd->Constraint)) { 361 for (InputSectionBase<ELFT> *S : V) 362 S->Assigned = false; 363 Opt.Commands.erase(Iter); 364 --I; 365 continue; 366 } 367 368 // A directive may contain symbol definitions like this: 369 // ".foo : { ...; bar = .; }". Handle them. 370 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 371 if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) 372 addSymbol(OutCmd); 373 374 // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign 375 // is given, input sections are aligned to that value, whether the 376 // given value is larger or smaller than the original section alignment. 377 if (Cmd->SubalignExpr) { 378 uint32_t Subalign = Cmd->SubalignExpr(0); 379 for (InputSectionBase<ELFT> *S : V) 380 S->Alignment = Subalign; 381 } 382 383 // Add input sections to an output section. 384 for (InputSectionBase<ELFT> *S : V) 385 Factory.addInputSec(S, Cmd->Name); 386 } 387 } 388 } 389 390 // Add sections that didn't match any sections command. 391 template <class ELFT> 392 void LinkerScript<ELFT>::addOrphanSections( 393 OutputSectionFactory<ELFT> &Factory) { 394 for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) 395 if (S->Live && !S->OutSec) 396 Factory.addInputSec(S, getOutputSectionName(S->Name)); 397 } 398 399 template <class ELFT> static bool isTbss(OutputSectionBase *Sec) { 400 return (Sec->Flags & SHF_TLS) && Sec->Type == SHT_NOBITS; 401 } 402 403 template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) { 404 if (!AlreadyOutputIS.insert(S).second) 405 return; 406 bool IsTbss = isTbss<ELFT>(CurOutSec); 407 408 uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; 409 Pos = alignTo(Pos, S->Alignment); 410 S->OutSecOff = Pos - CurOutSec->Addr; 411 Pos += S->getSize(); 412 413 // Update output section size after adding each section. This is so that 414 // SIZEOF works correctly in the case below: 415 // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } 416 CurOutSec->Size = Pos - CurOutSec->Addr; 417 418 // If there is a memory region associated with this input section, then 419 // place the section in that region and update the region index. 420 if (CurMemRegion) { 421 CurMemRegion->Offset += CurOutSec->Size; 422 uint64_t CurSize = CurMemRegion->Offset - CurMemRegion->Origin; 423 if (CurSize > CurMemRegion->Length) { 424 uint64_t OverflowAmt = CurSize - CurMemRegion->Length; 425 error("section '" + CurOutSec->Name + "' will not fit in region '" + 426 CurMemRegion->Name + "': overflowed by " + Twine(OverflowAmt) + 427 " bytes"); 428 } 429 } 430 431 if (IsTbss) 432 ThreadBssOffset = Pos - Dot; 433 else 434 Dot = Pos; 435 } 436 437 template <class ELFT> void LinkerScript<ELFT>::flush() { 438 if (!CurOutSec || !AlreadyOutputOS.insert(CurOutSec).second) 439 return; 440 if (auto *OutSec = dyn_cast<OutputSection<ELFT>>(CurOutSec)) { 441 for (InputSection<ELFT> *I : OutSec->Sections) 442 output(I); 443 } else { 444 Dot += CurOutSec->Size; 445 } 446 } 447 448 template <class ELFT> 449 void LinkerScript<ELFT>::switchTo(OutputSectionBase *Sec) { 450 if (CurOutSec == Sec) 451 return; 452 if (AlreadyOutputOS.count(Sec)) 453 return; 454 455 flush(); 456 CurOutSec = Sec; 457 458 Dot = alignTo(Dot, CurOutSec->Addralign); 459 CurOutSec->Addr = isTbss<ELFT>(CurOutSec) ? Dot + ThreadBssOffset : Dot; 460 461 // If neither AT nor AT> is specified for an allocatable section, the linker 462 // will set the LMA such that the difference between VMA and LMA for the 463 // section is the same as the preceding output section in the same region 464 // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html 465 CurOutSec->setLMAOffset(LMAOffset); 466 } 467 468 template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) { 469 // This handles the assignments to symbol or to a location counter (.) 470 if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { 471 assignSymbol(AssignCmd, true); 472 return; 473 } 474 475 // Handle BYTE(), SHORT(), LONG(), or QUAD(). 476 if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) { 477 DataCmd->Offset = Dot - CurOutSec->Addr; 478 Dot += DataCmd->Size; 479 CurOutSec->Size = Dot - CurOutSec->Addr; 480 return; 481 } 482 483 if (auto *AssertCmd = dyn_cast<AssertCommand>(&Base)) { 484 AssertCmd->Expression(Dot); 485 return; 486 } 487 488 // It handles single input section description command, 489 // calculates and assigns the offsets for each section and also 490 // updates the output section size. 491 auto &ICmd = cast<InputSectionDescription>(Base); 492 for (InputSectionData *ID : ICmd.Sections) { 493 // We tentatively added all synthetic sections at the beginning and removed 494 // empty ones afterwards (because there is no way to know whether they were 495 // going be empty or not other than actually running linker scripts.) 496 // We need to ignore remains of empty sections. 497 if (auto *Sec = dyn_cast<SyntheticSection<ELFT>>(ID)) 498 if (Sec->empty()) 499 continue; 500 501 auto *IB = static_cast<InputSectionBase<ELFT> *>(ID); 502 switchTo(IB->OutSec); 503 if (auto *I = dyn_cast<InputSection<ELFT>>(IB)) 504 output(I); 505 else 506 flush(); 507 } 508 } 509 510 template <class ELFT> 511 static OutputSectionBase * 512 findSection(StringRef Name, const std::vector<OutputSectionBase *> &Sections) { 513 auto End = Sections.end(); 514 auto HasName = [=](OutputSectionBase *Sec) { return Sec->getName() == Name; }; 515 auto I = std::find_if(Sections.begin(), End, HasName); 516 std::vector<OutputSectionBase *> Ret; 517 if (I == End) 518 return nullptr; 519 assert(std::find_if(I + 1, End, HasName) == End); 520 return *I; 521 } 522 523 // This function searches for a memory region to place the given output 524 // section in. If found, a pointer to the appropriate memory region is 525 // returned. Otherwise, a nullptr is returned. 526 template <class ELFT> 527 MemoryRegion *LinkerScript<ELFT>::findMemoryRegion(OutputSectionCommand *Cmd, 528 OutputSectionBase *Sec) { 529 // If a memory region name was specified in the output section command, 530 // then try to find that region first. 531 if (!Cmd->MemoryRegionName.empty()) { 532 auto It = Opt.MemoryRegions.find(Cmd->MemoryRegionName); 533 if (It != Opt.MemoryRegions.end()) 534 return &It->second; 535 error("memory region '" + Cmd->MemoryRegionName + "' not declared"); 536 return nullptr; 537 } 538 539 // The memory region name is empty, thus a suitable region must be 540 // searched for in the region map. If the region map is empty, just 541 // return. Note that this check doesn't happen at the very beginning 542 // so that uses of undeclared regions can be caught. 543 if (!Opt.MemoryRegions.size()) 544 return nullptr; 545 546 // See if a region can be found by matching section flags. 547 for (auto &MRI : Opt.MemoryRegions) { 548 MemoryRegion &MR = MRI.second; 549 if ((MR.Flags & Sec->Flags) != 0 && (MR.NegFlags & Sec->Flags) == 0) 550 return &MR; 551 } 552 553 // Otherwise, no suitable region was found. 554 if (Sec->Flags & SHF_ALLOC) 555 error("no memory region specified for section '" + Sec->Name + "'"); 556 return nullptr; 557 } 558 559 // This function assigns offsets to input sections and an output section 560 // for a single sections command (e.g. ".text { *(.text); }"). 561 template <class ELFT> 562 void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) { 563 if (Cmd->LMAExpr) 564 LMAOffset = Cmd->LMAExpr(Dot) - Dot; 565 OutputSectionBase *Sec = findSection<ELFT>(Cmd->Name, *OutputSections); 566 if (!Sec) 567 return; 568 569 if (Cmd->AddrExpr && Sec->Flags & SHF_ALLOC) 570 setDot(Cmd->AddrExpr); 571 572 // Handle align (e.g. ".foo : ALIGN(16) { ... }"). 573 if (Cmd->AlignExpr) 574 Sec->updateAlignment(Cmd->AlignExpr(0)); 575 576 // Try and find an appropriate memory region to assign offsets in. 577 CurMemRegion = findMemoryRegion(Cmd, Sec); 578 if (CurMemRegion) 579 Dot = CurMemRegion->Offset; 580 switchTo(Sec); 581 582 // Find the last section output location. We will output orphan sections 583 // there so that end symbols point to the correct location. 584 auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), 585 [](const std::unique_ptr<BaseCommand> &Cmd) { 586 return !isa<SymbolAssignment>(*Cmd); 587 }) 588 .base(); 589 for (auto I = Cmd->Commands.begin(); I != E; ++I) 590 process(**I); 591 flush(); 592 std::for_each(E, Cmd->Commands.end(), 593 [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); 594 } 595 596 template <class ELFT> void LinkerScript<ELFT>::removeEmptyCommands() { 597 // It is common practice to use very generic linker scripts. So for any 598 // given run some of the output sections in the script will be empty. 599 // We could create corresponding empty output sections, but that would 600 // clutter the output. 601 // We instead remove trivially empty sections. The bfd linker seems even 602 // more aggressive at removing them. 603 auto Pos = std::remove_if( 604 Opt.Commands.begin(), Opt.Commands.end(), 605 [&](const std::unique_ptr<BaseCommand> &Base) { 606 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 607 return !findSection<ELFT>(Cmd->Name, *OutputSections); 608 return false; 609 }); 610 Opt.Commands.erase(Pos, Opt.Commands.end()); 611 } 612 613 static bool isAllSectionDescription(const OutputSectionCommand &Cmd) { 614 for (const std::unique_ptr<BaseCommand> &I : Cmd.Commands) 615 if (!isa<InputSectionDescription>(*I)) 616 return false; 617 return true; 618 } 619 620 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() { 621 // If the output section contains only symbol assignments, create a 622 // corresponding output section. The bfd linker seems to only create them if 623 // '.' is assigned to, but creating these section should not have any bad 624 // consequeces and gives us a section to put the symbol in. 625 uintX_t Flags = SHF_ALLOC; 626 uint32_t Type = SHT_NOBITS; 627 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 628 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 629 if (!Cmd) 630 continue; 631 if (OutputSectionBase *Sec = 632 findSection<ELFT>(Cmd->Name, *OutputSections)) { 633 Flags = Sec->Flags; 634 Type = Sec->Type; 635 continue; 636 } 637 638 if (isAllSectionDescription(*Cmd)) 639 continue; 640 641 auto *OutSec = make<OutputSection<ELFT>>(Cmd->Name, Type, Flags); 642 OutputSections->push_back(OutSec); 643 } 644 } 645 646 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsAfterSorting() { 647 placeOrphanSections(); 648 649 // If output section command doesn't specify any segments, 650 // and we haven't previously assigned any section to segment, 651 // then we simply assign section to the very first load segment. 652 // Below is an example of such linker script: 653 // PHDRS { seg PT_LOAD; } 654 // SECTIONS { .aaa : { *(.aaa) } } 655 std::vector<StringRef> DefPhdrs; 656 auto FirstPtLoad = 657 std::find_if(Opt.PhdrsCommands.begin(), Opt.PhdrsCommands.end(), 658 [](const PhdrsCommand &Cmd) { return Cmd.Type == PT_LOAD; }); 659 if (FirstPtLoad != Opt.PhdrsCommands.end()) 660 DefPhdrs.push_back(FirstPtLoad->Name); 661 662 // Walk the commands and propagate the program headers to commands that don't 663 // explicitly specify them. 664 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 665 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 666 if (!Cmd) 667 continue; 668 if (Cmd->Phdrs.empty()) 669 Cmd->Phdrs = DefPhdrs; 670 else 671 DefPhdrs = Cmd->Phdrs; 672 } 673 674 removeEmptyCommands(); 675 } 676 677 // When placing orphan sections, we want to place them after symbol assignments 678 // so that an orphan after 679 // begin_foo = .; 680 // foo : { *(foo) } 681 // end_foo = .; 682 // doesn't break the intended meaning of the begin/end symbols. 683 // We don't want to go over sections since Writer<ELFT>::sortSections is the 684 // one in charge of deciding the order of the sections. 685 // We don't want to go over alignments, since doing so in 686 // rx_sec : { *(rx_sec) } 687 // . = ALIGN(0x1000); 688 // /* The RW PT_LOAD starts here*/ 689 // rw_sec : { *(rw_sec) } 690 // would mean that the RW PT_LOAD would become unaligned. 691 static bool shouldSkip(const BaseCommand &Cmd) { 692 if (isa<OutputSectionCommand>(Cmd)) 693 return false; 694 const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); 695 if (!Assign) 696 return true; 697 return Assign->Name != "."; 698 } 699 700 // Orphan sections are sections present in the input files which are 701 // not explicitly placed into the output file by the linker script. 702 // 703 // When the control reaches this function, Opt.Commands contains 704 // output section commands for non-orphan sections only. This function 705 // adds new elements for orphan sections to Opt.Commands so that all 706 // sections are explicitly handled by Opt.Commands. 707 // 708 // Writer<ELFT>::sortSections has already sorted output sections. 709 // What we need to do is to scan OutputSections vector and 710 // Opt.Commands in parallel to find orphan sections. If there is an 711 // output section that doesn't have a corresponding entry in 712 // Opt.Commands, we will insert a new entry to Opt.Commands. 713 // 714 // There is some ambiguity as to where exactly a new entry should be 715 // inserted, because Opt.Commands contains not only output section 716 // commands but other types of commands such as symbol assignment 717 // expressions. There's no correct answer here due to the lack of the 718 // formal specification of the linker script. We use heuristics to 719 // determine whether a new output command should be added before or 720 // after another commands. For the details, look at shouldSkip 721 // function. 722 template <class ELFT> void LinkerScript<ELFT>::placeOrphanSections() { 723 // The OutputSections are already in the correct order. 724 // This loops creates or moves commands as needed so that they are in the 725 // correct order. 726 int CmdIndex = 0; 727 728 // As a horrible special case, skip the first . assignment if it is before any 729 // section. We do this because it is common to set a load address by starting 730 // the script with ". = 0xabcd" and the expectation is that every section is 731 // after that. 732 auto FirstSectionOrDotAssignment = 733 std::find_if(Opt.Commands.begin(), Opt.Commands.end(), 734 [](const std::unique_ptr<BaseCommand> &Cmd) { 735 if (isa<OutputSectionCommand>(*Cmd)) 736 return true; 737 const auto *Assign = dyn_cast<SymbolAssignment>(Cmd.get()); 738 if (!Assign) 739 return false; 740 return Assign->Name == "."; 741 }); 742 if (FirstSectionOrDotAssignment != Opt.Commands.end()) { 743 CmdIndex = FirstSectionOrDotAssignment - Opt.Commands.begin(); 744 if (isa<SymbolAssignment>(**FirstSectionOrDotAssignment)) 745 ++CmdIndex; 746 } 747 748 for (OutputSectionBase *Sec : *OutputSections) { 749 StringRef Name = Sec->getName(); 750 751 // Find the last spot where we can insert a command and still get the 752 // correct result. 753 auto CmdIter = Opt.Commands.begin() + CmdIndex; 754 auto E = Opt.Commands.end(); 755 while (CmdIter != E && shouldSkip(**CmdIter)) { 756 ++CmdIter; 757 ++CmdIndex; 758 } 759 760 auto Pos = 761 std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { 762 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 763 return Cmd && Cmd->Name == Name; 764 }); 765 if (Pos == E) { 766 Opt.Commands.insert(CmdIter, 767 llvm::make_unique<OutputSectionCommand>(Name)); 768 ++CmdIndex; 769 continue; 770 } 771 772 // Continue from where we found it. 773 CmdIndex = (Pos - Opt.Commands.begin()) + 1; 774 } 775 } 776 777 template <class ELFT> 778 void LinkerScript<ELFT>::assignAddresses(std::vector<PhdrEntry> &Phdrs) { 779 // Assign addresses as instructed by linker script SECTIONS sub-commands. 780 Dot = 0; 781 782 // A symbol can be assigned before any section is mentioned in the linker 783 // script. In an DSO, the symbol values are addresses, so the only important 784 // section values are: 785 // * SHN_UNDEF 786 // * SHN_ABS 787 // * Any value meaning a regular section. 788 // To handle that, create a dummy aether section that fills the void before 789 // the linker scripts switches to another section. It has an index of one 790 // which will map to whatever the first actual section is. 791 auto *Aether = make<OutputSectionBase>("", 0, SHF_ALLOC); 792 Aether->SectionIndex = 1; 793 switchTo(Aether); 794 795 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 796 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { 797 assignSymbol(Cmd); 798 continue; 799 } 800 801 if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { 802 Cmd->Expression(Dot); 803 continue; 804 } 805 806 auto *Cmd = cast<OutputSectionCommand>(Base.get()); 807 assignOffsets(Cmd); 808 } 809 810 uintX_t MinVA = std::numeric_limits<uintX_t>::max(); 811 for (OutputSectionBase *Sec : *OutputSections) { 812 if (Sec->Flags & SHF_ALLOC) 813 MinVA = std::min<uint64_t>(MinVA, Sec->Addr); 814 else 815 Sec->Addr = 0; 816 } 817 818 allocateHeaders<ELFT>(Phdrs, *OutputSections, MinVA); 819 } 820 821 // Creates program headers as instructed by PHDRS linker script command. 822 template <class ELFT> std::vector<PhdrEntry> LinkerScript<ELFT>::createPhdrs() { 823 std::vector<PhdrEntry> Ret; 824 825 // Process PHDRS and FILEHDR keywords because they are not 826 // real output sections and cannot be added in the following loop. 827 for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) { 828 Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags); 829 PhdrEntry &Phdr = Ret.back(); 830 831 if (Cmd.HasFilehdr) 832 Phdr.add(Out<ELFT>::ElfHeader); 833 if (Cmd.HasPhdrs) 834 Phdr.add(Out<ELFT>::ProgramHeaders); 835 836 if (Cmd.LMAExpr) { 837 Phdr.p_paddr = Cmd.LMAExpr(0); 838 Phdr.HasLMA = true; 839 } 840 } 841 842 // Add output sections to program headers. 843 for (OutputSectionBase *Sec : *OutputSections) { 844 if (!(Sec->Flags & SHF_ALLOC)) 845 break; 846 847 // Assign headers specified by linker script 848 for (size_t Id : getPhdrIndices(Sec->getName())) { 849 Ret[Id].add(Sec); 850 if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) 851 Ret[Id].p_flags |= Sec->getPhdrFlags(); 852 } 853 } 854 return Ret; 855 } 856 857 template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() { 858 // Ignore .interp section in case we have PHDRS specification 859 // and PT_INTERP isn't listed. 860 return !Opt.PhdrsCommands.empty() && 861 llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { 862 return Cmd.Type == PT_INTERP; 863 }) == Opt.PhdrsCommands.end(); 864 } 865 866 template <class ELFT> uint32_t LinkerScript<ELFT>::getFiller(StringRef Name) { 867 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 868 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 869 if (Cmd->Name == Name) 870 return Cmd->Filler; 871 return 0; 872 } 873 874 template <class ELFT> 875 static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { 876 const endianness E = ELFT::TargetEndianness; 877 878 switch (Size) { 879 case 1: 880 *Buf = (uint8_t)Data; 881 break; 882 case 2: 883 write16<E>(Buf, Data); 884 break; 885 case 4: 886 write32<E>(Buf, Data); 887 break; 888 case 8: 889 write64<E>(Buf, Data); 890 break; 891 default: 892 llvm_unreachable("unsupported Size argument"); 893 } 894 } 895 896 template <class ELFT> 897 void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) { 898 int I = getSectionIndex(Name); 899 if (I == INT_MAX) 900 return; 901 902 auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()); 903 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 904 if (auto *Data = dyn_cast<BytesDataCommand>(Base.get())) 905 writeInt<ELFT>(Buf + Data->Offset, Data->Expression(0), Data->Size); 906 } 907 908 template <class ELFT> bool LinkerScript<ELFT>::hasLMA(StringRef Name) { 909 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 910 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 911 if (Cmd->LMAExpr && Cmd->Name == Name) 912 return true; 913 return false; 914 } 915 916 // Returns the index of the given section name in linker script 917 // SECTIONS commands. Sections are laid out as the same order as they 918 // were in the script. If a given name did not appear in the script, 919 // it returns INT_MAX, so that it will be laid out at end of file. 920 template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) { 921 for (int I = 0, E = Opt.Commands.size(); I != E; ++I) 922 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get())) 923 if (Cmd->Name == Name) 924 return I; 925 return INT_MAX; 926 } 927 928 template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() { 929 return !Opt.PhdrsCommands.empty(); 930 } 931 932 template <class ELFT> 933 const OutputSectionBase *LinkerScript<ELFT>::getOutputSection(const Twine &Loc, 934 StringRef Name) { 935 static OutputSectionBase FakeSec("", 0, 0); 936 937 for (OutputSectionBase *Sec : *OutputSections) 938 if (Sec->getName() == Name) 939 return Sec; 940 941 error(Loc + ": undefined section " + Name); 942 return &FakeSec; 943 } 944 945 // This function is essentially the same as getOutputSection(Name)->Size, 946 // but it won't print out an error message if a given section is not found. 947 // 948 // Linker script does not create an output section if its content is empty. 949 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 950 // be empty. That is why this function is different from getOutputSection(). 951 template <class ELFT> 952 uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) { 953 for (OutputSectionBase *Sec : *OutputSections) 954 if (Sec->getName() == Name) 955 return Sec->Size; 956 return 0; 957 } 958 959 template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() { 960 return elf::getHeaderSize<ELFT>(); 961 } 962 963 template <class ELFT> 964 uint64_t LinkerScript<ELFT>::getSymbolValue(const Twine &Loc, StringRef S) { 965 if (SymbolBody *B = Symtab<ELFT>::X->find(S)) 966 return B->getVA<ELFT>(); 967 error(Loc + ": symbol not found: " + S); 968 return 0; 969 } 970 971 template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) { 972 return Symtab<ELFT>::X->find(S) != nullptr; 973 } 974 975 template <class ELFT> bool LinkerScript<ELFT>::isAbsolute(StringRef S) { 976 SymbolBody *Sym = Symtab<ELFT>::X->find(S); 977 auto *DR = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym); 978 return DR && !DR->Section; 979 } 980 981 // Gets section symbol belongs to. Symbol "." doesn't belong to any 982 // specific section but isn't absolute at the same time, so we try 983 // to find suitable section for it as well. 984 template <class ELFT> 985 const OutputSectionBase *LinkerScript<ELFT>::getSymbolSection(StringRef S) { 986 if (SymbolBody *Sym = Symtab<ELFT>::X->find(S)) 987 return SymbolTableSection<ELFT>::getOutputSection(Sym); 988 return CurOutSec; 989 } 990 991 // Returns indices of ELF headers containing specific section, identified 992 // by Name. Each index is a zero based number of ELF header listed within 993 // PHDRS {} script block. 994 template <class ELFT> 995 std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { 996 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 997 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 998 if (!Cmd || Cmd->Name != SectionName) 999 continue; 1000 1001 std::vector<size_t> Ret; 1002 for (StringRef PhdrName : Cmd->Phdrs) 1003 Ret.push_back(getPhdrIndex(Cmd->Location, PhdrName)); 1004 return Ret; 1005 } 1006 return {}; 1007 } 1008 1009 template <class ELFT> 1010 size_t LinkerScript<ELFT>::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { 1011 size_t I = 0; 1012 for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { 1013 if (Cmd.Name == PhdrName) 1014 return I; 1015 ++I; 1016 } 1017 error(Loc + ": section header '" + PhdrName + "' is not listed in PHDRS"); 1018 return 0; 1019 } 1020 1021 class elf::ScriptParser final : public ScriptLexer { 1022 typedef void (ScriptParser::*Handler)(); 1023 1024 public: 1025 ScriptParser(MemoryBufferRef MB) 1026 : ScriptLexer(MB), 1027 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 1028 1029 void readLinkerScript(); 1030 void readVersionScript(); 1031 void readDynamicList(); 1032 1033 private: 1034 void addFile(StringRef Path); 1035 1036 void readAsNeeded(); 1037 void readEntry(); 1038 void readExtern(); 1039 void readGroup(); 1040 void readInclude(); 1041 void readMemory(); 1042 void readOutput(); 1043 void readOutputArch(); 1044 void readOutputFormat(); 1045 void readPhdrs(); 1046 void readSearchDir(); 1047 void readSections(); 1048 void readVersion(); 1049 void readVersionScriptCommand(); 1050 1051 SymbolAssignment *readAssignment(StringRef Name); 1052 BytesDataCommand *readBytesDataCommand(StringRef Tok); 1053 uint32_t readFill(); 1054 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 1055 uint32_t readOutputSectionFiller(StringRef Tok); 1056 std::vector<StringRef> readOutputSectionPhdrs(); 1057 InputSectionDescription *readInputSectionDescription(StringRef Tok); 1058 StringMatcher readFilePatterns(); 1059 std::vector<SectionPattern> readInputSectionsList(); 1060 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 1061 unsigned readPhdrType(); 1062 SortSectionPolicy readSortKind(); 1063 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 1064 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 1065 void readSort(); 1066 Expr readAssert(); 1067 1068 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 1069 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 1070 1071 Expr readExpr(); 1072 Expr readExpr1(Expr Lhs, int MinPrec); 1073 StringRef readParenLiteral(); 1074 Expr readPrimary(); 1075 Expr readTernary(Expr Cond); 1076 Expr readParenExpr(); 1077 1078 // For parsing version script. 1079 std::vector<SymbolVersion> readVersionExtern(); 1080 void readAnonymousDeclaration(); 1081 void readVersionDeclaration(StringRef VerStr); 1082 std::vector<SymbolVersion> readSymbols(); 1083 void readLocals(); 1084 1085 ScriptConfiguration &Opt = *ScriptConfig; 1086 bool IsUnderSysroot; 1087 }; 1088 1089 void ScriptParser::readDynamicList() { 1090 expect("{"); 1091 readAnonymousDeclaration(); 1092 if (!atEOF()) 1093 setError("EOF expected, but got " + next()); 1094 } 1095 1096 void ScriptParser::readVersionScript() { 1097 readVersionScriptCommand(); 1098 if (!atEOF()) 1099 setError("EOF expected, but got " + next()); 1100 } 1101 1102 void ScriptParser::readVersionScriptCommand() { 1103 if (consume("{")) { 1104 readAnonymousDeclaration(); 1105 return; 1106 } 1107 1108 while (!atEOF() && !Error && peek() != "}") { 1109 StringRef VerStr = next(); 1110 if (VerStr == "{") { 1111 setError("anonymous version definition is used in " 1112 "combination with other version definitions"); 1113 return; 1114 } 1115 expect("{"); 1116 readVersionDeclaration(VerStr); 1117 } 1118 } 1119 1120 void ScriptParser::readVersion() { 1121 expect("{"); 1122 readVersionScriptCommand(); 1123 expect("}"); 1124 } 1125 1126 void ScriptParser::readLinkerScript() { 1127 while (!atEOF()) { 1128 StringRef Tok = next(); 1129 if (Tok == ";") 1130 continue; 1131 1132 if (Tok == "ASSERT") { 1133 Opt.Commands.emplace_back(new AssertCommand(readAssert())); 1134 } else if (Tok == "ENTRY") { 1135 readEntry(); 1136 } else if (Tok == "EXTERN") { 1137 readExtern(); 1138 } else if (Tok == "GROUP" || Tok == "INPUT") { 1139 readGroup(); 1140 } else if (Tok == "INCLUDE") { 1141 readInclude(); 1142 } else if (Tok == "MEMORY") { 1143 readMemory(); 1144 } else if (Tok == "OUTPUT") { 1145 readOutput(); 1146 } else if (Tok == "OUTPUT_ARCH") { 1147 readOutputArch(); 1148 } else if (Tok == "OUTPUT_FORMAT") { 1149 readOutputFormat(); 1150 } else if (Tok == "PHDRS") { 1151 readPhdrs(); 1152 } else if (Tok == "SEARCH_DIR") { 1153 readSearchDir(); 1154 } else if (Tok == "SECTIONS") { 1155 readSections(); 1156 } else if (Tok == "VERSION") { 1157 readVersion(); 1158 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 1159 Opt.Commands.emplace_back(Cmd); 1160 } else { 1161 setError("unknown directive: " + Tok); 1162 } 1163 } 1164 } 1165 1166 void ScriptParser::addFile(StringRef S) { 1167 if (IsUnderSysroot && S.startswith("/")) { 1168 SmallString<128> PathData; 1169 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 1170 if (sys::fs::exists(Path)) { 1171 Driver->addFile(Saver.save(Path)); 1172 return; 1173 } 1174 } 1175 1176 if (sys::path::is_absolute(S)) { 1177 Driver->addFile(S); 1178 } else if (S.startswith("=")) { 1179 if (Config->Sysroot.empty()) 1180 Driver->addFile(S.substr(1)); 1181 else 1182 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); 1183 } else if (S.startswith("-l")) { 1184 Driver->addLibrary(S.substr(2)); 1185 } else if (sys::fs::exists(S)) { 1186 Driver->addFile(S); 1187 } else { 1188 if (Optional<std::string> Path = findFromSearchPaths(S)) 1189 Driver->addFile(Saver.save(*Path)); 1190 else 1191 setError("unable to find " + S); 1192 } 1193 } 1194 1195 void ScriptParser::readAsNeeded() { 1196 expect("("); 1197 bool Orig = Config->AsNeeded; 1198 Config->AsNeeded = true; 1199 while (!Error && !consume(")")) 1200 addFile(unquote(next())); 1201 Config->AsNeeded = Orig; 1202 } 1203 1204 void ScriptParser::readEntry() { 1205 // -e <symbol> takes predecence over ENTRY(<symbol>). 1206 expect("("); 1207 StringRef Tok = next(); 1208 if (Config->Entry.empty()) 1209 Config->Entry = Tok; 1210 expect(")"); 1211 } 1212 1213 void ScriptParser::readExtern() { 1214 expect("("); 1215 while (!Error && !consume(")")) 1216 Config->Undefined.push_back(next()); 1217 } 1218 1219 void ScriptParser::readGroup() { 1220 expect("("); 1221 while (!Error && !consume(")")) { 1222 StringRef Tok = next(); 1223 if (Tok == "AS_NEEDED") 1224 readAsNeeded(); 1225 else 1226 addFile(unquote(Tok)); 1227 } 1228 } 1229 1230 void ScriptParser::readInclude() { 1231 StringRef Tok = unquote(next()); 1232 1233 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 1234 // The file will be searched for in the current directory, and in any 1235 // directory specified with the -L option. 1236 if (sys::fs::exists(Tok)) { 1237 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 1238 tokenize(*MB); 1239 return; 1240 } 1241 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 1242 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 1243 tokenize(*MB); 1244 return; 1245 } 1246 setError("cannot open " + Tok); 1247 } 1248 1249 void ScriptParser::readOutput() { 1250 // -o <file> takes predecence over OUTPUT(<file>). 1251 expect("("); 1252 StringRef Tok = next(); 1253 if (Config->OutputFile.empty()) 1254 Config->OutputFile = unquote(Tok); 1255 expect(")"); 1256 } 1257 1258 void ScriptParser::readOutputArch() { 1259 // OUTPUT_ARCH is ignored for now. 1260 expect("("); 1261 while (!Error && !consume(")")) 1262 skip(); 1263 } 1264 1265 void ScriptParser::readOutputFormat() { 1266 // Error checking only for now. 1267 expect("("); 1268 skip(); 1269 StringRef Tok = next(); 1270 if (Tok == ")") 1271 return; 1272 if (Tok != ",") { 1273 setError("unexpected token: " + Tok); 1274 return; 1275 } 1276 skip(); 1277 expect(","); 1278 skip(); 1279 expect(")"); 1280 } 1281 1282 void ScriptParser::readPhdrs() { 1283 expect("{"); 1284 while (!Error && !consume("}")) { 1285 StringRef Tok = next(); 1286 Opt.PhdrsCommands.push_back( 1287 {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); 1288 PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back(); 1289 1290 PhdrCmd.Type = readPhdrType(); 1291 do { 1292 Tok = next(); 1293 if (Tok == ";") 1294 break; 1295 if (Tok == "FILEHDR") 1296 PhdrCmd.HasFilehdr = true; 1297 else if (Tok == "PHDRS") 1298 PhdrCmd.HasPhdrs = true; 1299 else if (Tok == "AT") 1300 PhdrCmd.LMAExpr = readParenExpr(); 1301 else if (Tok == "FLAGS") { 1302 expect("("); 1303 // Passing 0 for the value of dot is a bit of a hack. It means that 1304 // we accept expressions like ".|1". 1305 PhdrCmd.Flags = readExpr()(0); 1306 expect(")"); 1307 } else 1308 setError("unexpected header attribute: " + Tok); 1309 } while (!Error); 1310 } 1311 } 1312 1313 void ScriptParser::readSearchDir() { 1314 expect("("); 1315 StringRef Tok = next(); 1316 if (!Config->Nostdlib) 1317 Config->SearchPaths.push_back(unquote(Tok)); 1318 expect(")"); 1319 } 1320 1321 void ScriptParser::readSections() { 1322 Opt.HasSections = true; 1323 // -no-rosegment is used to avoid placing read only non-executable sections in 1324 // their own segment. We do the same if SECTIONS command is present in linker 1325 // script. See comment for computeFlags(). 1326 Config->SingleRoRx = true; 1327 1328 expect("{"); 1329 while (!Error && !consume("}")) { 1330 StringRef Tok = next(); 1331 BaseCommand *Cmd = readProvideOrAssignment(Tok); 1332 if (!Cmd) { 1333 if (Tok == "ASSERT") 1334 Cmd = new AssertCommand(readAssert()); 1335 else 1336 Cmd = readOutputSectionDescription(Tok); 1337 } 1338 Opt.Commands.emplace_back(Cmd); 1339 } 1340 } 1341 1342 static int precedence(StringRef Op) { 1343 return StringSwitch<int>(Op) 1344 .Cases("*", "/", 5) 1345 .Cases("+", "-", 4) 1346 .Cases("<<", ">>", 3) 1347 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 1348 .Cases("&", "|", 1) 1349 .Default(-1); 1350 } 1351 1352 StringMatcher ScriptParser::readFilePatterns() { 1353 std::vector<StringRef> V; 1354 while (!Error && !consume(")")) 1355 V.push_back(next()); 1356 return StringMatcher(V); 1357 } 1358 1359 SortSectionPolicy ScriptParser::readSortKind() { 1360 if (consume("SORT") || consume("SORT_BY_NAME")) 1361 return SortSectionPolicy::Name; 1362 if (consume("SORT_BY_ALIGNMENT")) 1363 return SortSectionPolicy::Alignment; 1364 if (consume("SORT_BY_INIT_PRIORITY")) 1365 return SortSectionPolicy::Priority; 1366 if (consume("SORT_NONE")) 1367 return SortSectionPolicy::None; 1368 return SortSectionPolicy::Default; 1369 } 1370 1371 // Method reads a list of sequence of excluded files and section globs given in 1372 // a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ 1373 // Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) 1374 // The semantics of that is next: 1375 // * Include .foo.1 from every file. 1376 // * Include .foo.2 from every file but a.o 1377 // * Include .foo.3 from every file but b.o 1378 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 1379 std::vector<SectionPattern> Ret; 1380 while (!Error && peek() != ")") { 1381 StringMatcher ExcludeFilePat; 1382 if (consume("EXCLUDE_FILE")) { 1383 expect("("); 1384 ExcludeFilePat = readFilePatterns(); 1385 } 1386 1387 std::vector<StringRef> V; 1388 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 1389 V.push_back(next()); 1390 1391 if (!V.empty()) 1392 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 1393 else 1394 setError("section pattern is expected"); 1395 } 1396 return Ret; 1397 } 1398 1399 // Reads contents of "SECTIONS" directive. That directive contains a 1400 // list of glob patterns for input sections. The grammar is as follows. 1401 // 1402 // <patterns> ::= <section-list> 1403 // | <sort> "(" <section-list> ")" 1404 // | <sort> "(" <sort> "(" <section-list> ")" ")" 1405 // 1406 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 1407 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 1408 // 1409 // <section-list> is parsed by readInputSectionsList(). 1410 InputSectionDescription * 1411 ScriptParser::readInputSectionRules(StringRef FilePattern) { 1412 auto *Cmd = new InputSectionDescription(FilePattern); 1413 expect("("); 1414 while (!Error && !consume(")")) { 1415 SortSectionPolicy Outer = readSortKind(); 1416 SortSectionPolicy Inner = SortSectionPolicy::Default; 1417 std::vector<SectionPattern> V; 1418 if (Outer != SortSectionPolicy::Default) { 1419 expect("("); 1420 Inner = readSortKind(); 1421 if (Inner != SortSectionPolicy::Default) { 1422 expect("("); 1423 V = readInputSectionsList(); 1424 expect(")"); 1425 } else { 1426 V = readInputSectionsList(); 1427 } 1428 expect(")"); 1429 } else { 1430 V = readInputSectionsList(); 1431 } 1432 1433 for (SectionPattern &Pat : V) { 1434 Pat.SortInner = Inner; 1435 Pat.SortOuter = Outer; 1436 } 1437 1438 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 1439 } 1440 return Cmd; 1441 } 1442 1443 InputSectionDescription * 1444 ScriptParser::readInputSectionDescription(StringRef Tok) { 1445 // Input section wildcard can be surrounded by KEEP. 1446 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 1447 if (Tok == "KEEP") { 1448 expect("("); 1449 StringRef FilePattern = next(); 1450 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 1451 expect(")"); 1452 Opt.KeptSections.push_back(Cmd); 1453 return Cmd; 1454 } 1455 return readInputSectionRules(Tok); 1456 } 1457 1458 void ScriptParser::readSort() { 1459 expect("("); 1460 expect("CONSTRUCTORS"); 1461 expect(")"); 1462 } 1463 1464 Expr ScriptParser::readAssert() { 1465 expect("("); 1466 Expr E = readExpr(); 1467 expect(","); 1468 StringRef Msg = unquote(next()); 1469 expect(")"); 1470 return [=](uint64_t Dot) { 1471 uint64_t V = E(Dot); 1472 if (!V) 1473 error(Msg); 1474 return V; 1475 }; 1476 } 1477 1478 // Reads a FILL(expr) command. We handle the FILL command as an 1479 // alias for =fillexp section attribute, which is different from 1480 // what GNU linkers do. 1481 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 1482 uint32_t ScriptParser::readFill() { 1483 expect("("); 1484 uint32_t V = readOutputSectionFiller(next()); 1485 expect(")"); 1486 expect(";"); 1487 return V; 1488 } 1489 1490 OutputSectionCommand * 1491 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 1492 OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); 1493 Cmd->Location = getCurrentLocation(); 1494 1495 // Read an address expression. 1496 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address 1497 if (peek() != ":") 1498 Cmd->AddrExpr = readExpr(); 1499 1500 expect(":"); 1501 1502 if (consume("AT")) 1503 Cmd->LMAExpr = readParenExpr(); 1504 if (consume("ALIGN")) 1505 Cmd->AlignExpr = readParenExpr(); 1506 if (consume("SUBALIGN")) 1507 Cmd->SubalignExpr = readParenExpr(); 1508 1509 // Parse constraints. 1510 if (consume("ONLY_IF_RO")) 1511 Cmd->Constraint = ConstraintKind::ReadOnly; 1512 if (consume("ONLY_IF_RW")) 1513 Cmd->Constraint = ConstraintKind::ReadWrite; 1514 expect("{"); 1515 1516 while (!Error && !consume("}")) { 1517 StringRef Tok = next(); 1518 if (Tok == ";") { 1519 // Empty commands are allowed. Do nothing here. 1520 } else if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok)) { 1521 Cmd->Commands.emplace_back(Assignment); 1522 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 1523 Cmd->Commands.emplace_back(Data); 1524 } else if (Tok == "ASSERT") { 1525 Cmd->Commands.emplace_back(new AssertCommand(readAssert())); 1526 expect(";"); 1527 } else if (Tok == "CONSTRUCTORS") { 1528 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 1529 // by name. This is for very old file formats such as ECOFF/XCOFF. 1530 // For ELF, we should ignore. 1531 } else if (Tok == "FILL") { 1532 Cmd->Filler = readFill(); 1533 } else if (Tok == "SORT") { 1534 readSort(); 1535 } else if (peek() == "(") { 1536 Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); 1537 } else { 1538 setError("unknown command " + Tok); 1539 } 1540 } 1541 1542 if (consume(">")) 1543 Cmd->MemoryRegionName = next(); 1544 1545 Cmd->Phdrs = readOutputSectionPhdrs(); 1546 1547 if (consume("=")) 1548 Cmd->Filler = readOutputSectionFiller(next()); 1549 else if (peek().startswith("=")) 1550 Cmd->Filler = readOutputSectionFiller(next().drop_front()); 1551 1552 // Consume optional comma following output section command. 1553 consume(","); 1554 1555 return Cmd; 1556 } 1557 1558 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number. 1559 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 1560 // 1561 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles 1562 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them 1563 // as 32-bit big-endian values. We will do the same as ld.gold does 1564 // because it's simpler than what ld.bfd does. 1565 uint32_t ScriptParser::readOutputSectionFiller(StringRef Tok) { 1566 uint32_t V; 1567 if (!Tok.getAsInteger(0, V)) 1568 return V; 1569 setError("invalid filler expression: " + Tok); 1570 return 0; 1571 } 1572 1573 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 1574 expect("("); 1575 SymbolAssignment *Cmd = readAssignment(next()); 1576 Cmd->Provide = Provide; 1577 Cmd->Hidden = Hidden; 1578 expect(")"); 1579 expect(";"); 1580 return Cmd; 1581 } 1582 1583 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 1584 SymbolAssignment *Cmd = nullptr; 1585 if (peek() == "=" || peek() == "+=") { 1586 Cmd = readAssignment(Tok); 1587 expect(";"); 1588 } else if (Tok == "PROVIDE") { 1589 Cmd = readProvideHidden(true, false); 1590 } else if (Tok == "HIDDEN") { 1591 Cmd = readProvideHidden(false, true); 1592 } else if (Tok == "PROVIDE_HIDDEN") { 1593 Cmd = readProvideHidden(true, true); 1594 } 1595 return Cmd; 1596 } 1597 1598 static uint64_t getSymbolValue(const Twine &Loc, StringRef S, uint64_t Dot) { 1599 if (S == ".") 1600 return Dot; 1601 return ScriptBase->getSymbolValue(Loc, S); 1602 } 1603 1604 static bool isAbsolute(StringRef S) { 1605 if (S == ".") 1606 return false; 1607 return ScriptBase->isAbsolute(S); 1608 } 1609 1610 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 1611 StringRef Op = next(); 1612 Expr E; 1613 assert(Op == "=" || Op == "+="); 1614 if (consume("ABSOLUTE")) { 1615 E = readExpr(); 1616 E.IsAbsolute = [] { return true; }; 1617 } else { 1618 E = readExpr(); 1619 } 1620 if (Op == "+=") { 1621 std::string Loc = getCurrentLocation(); 1622 E = [=](uint64_t Dot) { 1623 return getSymbolValue(Loc, Name, Dot) + E(Dot); 1624 }; 1625 } 1626 return new SymbolAssignment(Name, E); 1627 } 1628 1629 // This is an operator-precedence parser to parse a linker 1630 // script expression. 1631 Expr ScriptParser::readExpr() { 1632 // Our lexer is context-aware. Set the in-expression bit so that 1633 // they apply different tokenization rules. 1634 bool Orig = InExpr; 1635 InExpr = true; 1636 Expr E = readExpr1(readPrimary(), 0); 1637 InExpr = Orig; 1638 return E; 1639 } 1640 1641 static Expr combine(StringRef Op, Expr L, Expr R) { 1642 auto IsAbs = [=] { return L.IsAbsolute() && R.IsAbsolute(); }; 1643 auto GetOutSec = [=] { 1644 const OutputSectionBase *S = L.Section(); 1645 return S ? S : R.Section(); 1646 }; 1647 1648 if (Op == "*") 1649 return [=](uint64_t Dot) { return L(Dot) * R(Dot); }; 1650 if (Op == "/") { 1651 return [=](uint64_t Dot) -> uint64_t { 1652 uint64_t RHS = R(Dot); 1653 if (RHS == 0) { 1654 error("division by zero"); 1655 return 0; 1656 } 1657 return L(Dot) / RHS; 1658 }; 1659 } 1660 if (Op == "+") 1661 return {[=](uint64_t Dot) { return L(Dot) + R(Dot); }, IsAbs, GetOutSec}; 1662 if (Op == "-") 1663 return {[=](uint64_t Dot) { return L(Dot) - R(Dot); }, IsAbs, GetOutSec}; 1664 if (Op == "<<") 1665 return [=](uint64_t Dot) { return L(Dot) << R(Dot); }; 1666 if (Op == ">>") 1667 return [=](uint64_t Dot) { return L(Dot) >> R(Dot); }; 1668 if (Op == "<") 1669 return [=](uint64_t Dot) { return L(Dot) < R(Dot); }; 1670 if (Op == ">") 1671 return [=](uint64_t Dot) { return L(Dot) > R(Dot); }; 1672 if (Op == ">=") 1673 return [=](uint64_t Dot) { return L(Dot) >= R(Dot); }; 1674 if (Op == "<=") 1675 return [=](uint64_t Dot) { return L(Dot) <= R(Dot); }; 1676 if (Op == "==") 1677 return [=](uint64_t Dot) { return L(Dot) == R(Dot); }; 1678 if (Op == "!=") 1679 return [=](uint64_t Dot) { return L(Dot) != R(Dot); }; 1680 if (Op == "&") 1681 return [=](uint64_t Dot) { return L(Dot) & R(Dot); }; 1682 if (Op == "|") 1683 return [=](uint64_t Dot) { return L(Dot) | R(Dot); }; 1684 llvm_unreachable("invalid operator"); 1685 } 1686 1687 // This is a part of the operator-precedence parser. This function 1688 // assumes that the remaining token stream starts with an operator. 1689 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 1690 while (!atEOF() && !Error) { 1691 // Read an operator and an expression. 1692 if (consume("?")) 1693 return readTernary(Lhs); 1694 StringRef Op1 = peek(); 1695 if (precedence(Op1) < MinPrec) 1696 break; 1697 skip(); 1698 Expr Rhs = readPrimary(); 1699 1700 // Evaluate the remaining part of the expression first if the 1701 // next operator has greater precedence than the previous one. 1702 // For example, if we have read "+" and "3", and if the next 1703 // operator is "*", then we'll evaluate 3 * ... part first. 1704 while (!atEOF()) { 1705 StringRef Op2 = peek(); 1706 if (precedence(Op2) <= precedence(Op1)) 1707 break; 1708 Rhs = readExpr1(Rhs, precedence(Op2)); 1709 } 1710 1711 Lhs = combine(Op1, Lhs, Rhs); 1712 } 1713 return Lhs; 1714 } 1715 1716 uint64_t static getConstant(StringRef S) { 1717 if (S == "COMMONPAGESIZE") 1718 return Target->PageSize; 1719 if (S == "MAXPAGESIZE") 1720 return Config->MaxPageSize; 1721 error("unknown constant: " + S); 1722 return 0; 1723 } 1724 1725 // Parses Tok as an integer. Returns true if successful. 1726 // It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") 1727 // and decimal numbers. Decimal numbers may have "K" (kilo) or 1728 // "M" (mega) prefixes. 1729 static bool readInteger(StringRef Tok, uint64_t &Result) { 1730 // Negative number 1731 if (Tok.startswith("-")) { 1732 if (!readInteger(Tok.substr(1), Result)) 1733 return false; 1734 Result = -Result; 1735 return true; 1736 } 1737 1738 // Hexadecimal 1739 if (Tok.startswith_lower("0x")) 1740 return !Tok.substr(2).getAsInteger(16, Result); 1741 if (Tok.endswith_lower("H")) 1742 return !Tok.drop_back().getAsInteger(16, Result); 1743 1744 // Decimal 1745 int Suffix = 1; 1746 if (Tok.endswith_lower("K")) { 1747 Suffix = 1024; 1748 Tok = Tok.drop_back(); 1749 } else if (Tok.endswith_lower("M")) { 1750 Suffix = 1024 * 1024; 1751 Tok = Tok.drop_back(); 1752 } 1753 if (Tok.getAsInteger(10, Result)) 1754 return false; 1755 Result *= Suffix; 1756 return true; 1757 } 1758 1759 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 1760 int Size = StringSwitch<unsigned>(Tok) 1761 .Case("BYTE", 1) 1762 .Case("SHORT", 2) 1763 .Case("LONG", 4) 1764 .Case("QUAD", 8) 1765 .Default(-1); 1766 if (Size == -1) 1767 return nullptr; 1768 1769 return new BytesDataCommand(readParenExpr(), Size); 1770 } 1771 1772 StringRef ScriptParser::readParenLiteral() { 1773 expect("("); 1774 StringRef Tok = next(); 1775 expect(")"); 1776 return Tok; 1777 } 1778 1779 Expr ScriptParser::readPrimary() { 1780 if (peek() == "(") 1781 return readParenExpr(); 1782 1783 StringRef Tok = next(); 1784 std::string Location = getCurrentLocation(); 1785 1786 if (Tok == "~") { 1787 Expr E = readPrimary(); 1788 return [=](uint64_t Dot) { return ~E(Dot); }; 1789 } 1790 if (Tok == "-") { 1791 Expr E = readPrimary(); 1792 return [=](uint64_t Dot) { return -E(Dot); }; 1793 } 1794 1795 // Built-in functions are parsed here. 1796 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 1797 if (Tok == "ADDR") { 1798 StringRef Name = readParenLiteral(); 1799 return {[=](uint64_t Dot) { 1800 return ScriptBase->getOutputSection(Location, Name)->Addr; 1801 }, 1802 [=] { return false; }, 1803 [=] { return ScriptBase->getOutputSection(Location, Name); }}; 1804 } 1805 if (Tok == "LOADADDR") { 1806 StringRef Name = readParenLiteral(); 1807 return [=](uint64_t Dot) { 1808 return ScriptBase->getOutputSection(Location, Name)->getLMA(); 1809 }; 1810 } 1811 if (Tok == "ASSERT") 1812 return readAssert(); 1813 if (Tok == "ALIGN") { 1814 expect("("); 1815 Expr E = readExpr(); 1816 if (consume(",")) { 1817 Expr E2 = readExpr(); 1818 expect(")"); 1819 return [=](uint64_t Dot) { return alignTo(E(Dot), E2(Dot)); }; 1820 } 1821 expect(")"); 1822 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1823 } 1824 if (Tok == "CONSTANT") { 1825 StringRef Name = readParenLiteral(); 1826 return [=](uint64_t Dot) { return getConstant(Name); }; 1827 } 1828 if (Tok == "DEFINED") { 1829 StringRef Name = readParenLiteral(); 1830 return [=](uint64_t Dot) { return ScriptBase->isDefined(Name) ? 1 : 0; }; 1831 } 1832 if (Tok == "SEGMENT_START") { 1833 expect("("); 1834 skip(); 1835 expect(","); 1836 Expr E = readExpr(); 1837 expect(")"); 1838 return [=](uint64_t Dot) { return E(Dot); }; 1839 } 1840 if (Tok == "DATA_SEGMENT_ALIGN") { 1841 expect("("); 1842 Expr E = readExpr(); 1843 expect(","); 1844 readExpr(); 1845 expect(")"); 1846 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1847 } 1848 if (Tok == "DATA_SEGMENT_END") { 1849 expect("("); 1850 expect("."); 1851 expect(")"); 1852 return [](uint64_t Dot) { return Dot; }; 1853 } 1854 // GNU linkers implements more complicated logic to handle 1855 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to 1856 // the next page boundary for simplicity. 1857 if (Tok == "DATA_SEGMENT_RELRO_END") { 1858 expect("("); 1859 readExpr(); 1860 expect(","); 1861 readExpr(); 1862 expect(")"); 1863 return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); }; 1864 } 1865 if (Tok == "SIZEOF") { 1866 StringRef Name = readParenLiteral(); 1867 return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); }; 1868 } 1869 if (Tok == "ALIGNOF") { 1870 StringRef Name = readParenLiteral(); 1871 return [=](uint64_t Dot) { 1872 return ScriptBase->getOutputSection(Location, Name)->Addralign; 1873 }; 1874 } 1875 if (Tok == "SIZEOF_HEADERS") 1876 return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); }; 1877 1878 // Tok is a literal number. 1879 uint64_t V; 1880 if (readInteger(Tok, V)) 1881 return [=](uint64_t Dot) { return V; }; 1882 1883 // Tok is a symbol name. 1884 if (Tok != "." && !isValidCIdentifier(Tok)) 1885 setError("malformed number: " + Tok); 1886 return {[=](uint64_t Dot) { return getSymbolValue(Location, Tok, Dot); }, 1887 [=] { return isAbsolute(Tok); }, 1888 [=] { return ScriptBase->getSymbolSection(Tok); }}; 1889 } 1890 1891 Expr ScriptParser::readTernary(Expr Cond) { 1892 Expr L = readExpr(); 1893 expect(":"); 1894 Expr R = readExpr(); 1895 return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); }; 1896 } 1897 1898 Expr ScriptParser::readParenExpr() { 1899 expect("("); 1900 Expr E = readExpr(); 1901 expect(")"); 1902 return E; 1903 } 1904 1905 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1906 std::vector<StringRef> Phdrs; 1907 while (!Error && peek().startswith(":")) { 1908 StringRef Tok = next(); 1909 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 1910 } 1911 return Phdrs; 1912 } 1913 1914 // Read a program header type name. The next token must be a 1915 // name of a program header type or a constant (e.g. "0x3"). 1916 unsigned ScriptParser::readPhdrType() { 1917 StringRef Tok = next(); 1918 uint64_t Val; 1919 if (readInteger(Tok, Val)) 1920 return Val; 1921 1922 unsigned Ret = StringSwitch<unsigned>(Tok) 1923 .Case("PT_NULL", PT_NULL) 1924 .Case("PT_LOAD", PT_LOAD) 1925 .Case("PT_DYNAMIC", PT_DYNAMIC) 1926 .Case("PT_INTERP", PT_INTERP) 1927 .Case("PT_NOTE", PT_NOTE) 1928 .Case("PT_SHLIB", PT_SHLIB) 1929 .Case("PT_PHDR", PT_PHDR) 1930 .Case("PT_TLS", PT_TLS) 1931 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1932 .Case("PT_GNU_STACK", PT_GNU_STACK) 1933 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1934 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1935 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1936 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1937 .Default(-1); 1938 1939 if (Ret == (unsigned)-1) { 1940 setError("invalid program header type: " + Tok); 1941 return PT_NULL; 1942 } 1943 return Ret; 1944 } 1945 1946 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1947 void ScriptParser::readAnonymousDeclaration() { 1948 // Read global symbols first. "global:" is default, so if there's 1949 // no label, we assume global symbols. 1950 if (peek() != "local") { 1951 if (consume("global")) 1952 expect(":"); 1953 for (SymbolVersion V : readSymbols()) 1954 Config->VersionScriptGlobals.push_back(V); 1955 } 1956 readLocals(); 1957 expect("}"); 1958 expect(";"); 1959 } 1960 1961 void ScriptParser::readLocals() { 1962 if (!consume("local")) 1963 return; 1964 expect(":"); 1965 std::vector<SymbolVersion> Locals = readSymbols(); 1966 for (SymbolVersion V : Locals) { 1967 if (V.Name == "*") { 1968 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1969 continue; 1970 } 1971 Config->VersionScriptLocals.push_back(V); 1972 } 1973 } 1974 1975 // Reads a list of symbols, e.g. "VerStr { global: foo; bar; local: *; };". 1976 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1977 // Identifiers start at 2 because 0 and 1 are reserved 1978 // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants. 1979 uint16_t VersionId = Config->VersionDefinitions.size() + 2; 1980 Config->VersionDefinitions.push_back({VerStr, VersionId}); 1981 1982 // Read global symbols. 1983 if (peek() != "local") { 1984 if (consume("global")) 1985 expect(":"); 1986 Config->VersionDefinitions.back().Globals = readSymbols(); 1987 } 1988 readLocals(); 1989 expect("}"); 1990 1991 // Each version may have a parent version. For example, "Ver2" 1992 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1993 // as a parent. This version hierarchy is, probably against your 1994 // instinct, purely for hint; the runtime doesn't care about it 1995 // at all. In LLD, we simply ignore it. 1996 if (peek() != ";") 1997 skip(); 1998 expect(";"); 1999 } 2000 2001 // Reads a list of symbols for a versions cript. 2002 std::vector<SymbolVersion> ScriptParser::readSymbols() { 2003 std::vector<SymbolVersion> Ret; 2004 for (;;) { 2005 if (consume("extern")) { 2006 for (SymbolVersion V : readVersionExtern()) 2007 Ret.push_back(V); 2008 continue; 2009 } 2010 2011 if (peek() == "}" || (peek() == "local" && peek(1) == ":") || Error) 2012 break; 2013 StringRef Tok = next(); 2014 Ret.push_back({unquote(Tok), false, hasWildcard(Tok)}); 2015 expect(";"); 2016 } 2017 return Ret; 2018 } 2019 2020 // Reads an "extern C++" directive, e.g., 2021 // "extern "C++" { ns::*; "f(int, double)"; };" 2022 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 2023 StringRef Tok = next(); 2024 bool IsCXX = Tok == "\"C++\""; 2025 if (!IsCXX && Tok != "\"C\"") 2026 setError("Unknown language"); 2027 expect("{"); 2028 2029 std::vector<SymbolVersion> Ret; 2030 while (!Error && peek() != "}") { 2031 StringRef Tok = next(); 2032 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 2033 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 2034 expect(";"); 2035 } 2036 2037 expect("}"); 2038 expect(";"); 2039 return Ret; 2040 } 2041 2042 uint64_t ScriptParser::readMemoryAssignment( 2043 StringRef S1, StringRef S2, StringRef S3) { 2044 if (!(consume(S1) || consume(S2) || consume(S3))) { 2045 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 2046 return 0; 2047 } 2048 expect("="); 2049 2050 // TODO: Fully support constant expressions. 2051 uint64_t Val; 2052 if (!readInteger(next(), Val)) 2053 setError("nonconstant expression for "+ S1); 2054 return Val; 2055 } 2056 2057 // Parse the MEMORY command as specified in: 2058 // https://sourceware.org/binutils/docs/ld/MEMORY.html 2059 // 2060 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 2061 void ScriptParser::readMemory() { 2062 expect("{"); 2063 while (!Error && !consume("}")) { 2064 StringRef Name = next(); 2065 2066 uint32_t Flags = 0; 2067 uint32_t NegFlags = 0; 2068 if (consume("(")) { 2069 std::tie(Flags, NegFlags) = readMemoryAttributes(); 2070 expect(")"); 2071 } 2072 expect(":"); 2073 2074 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 2075 expect(","); 2076 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 2077 2078 // Add the memory region to the region map (if it doesn't already exist). 2079 auto It = Opt.MemoryRegions.find(Name); 2080 if (It != Opt.MemoryRegions.end()) 2081 setError("region '" + Name + "' already defined"); 2082 else 2083 Opt.MemoryRegions[Name] = {Name, Origin, Length, Origin, Flags, NegFlags}; 2084 } 2085 } 2086 2087 // This function parses the attributes used to match against section 2088 // flags when placing output sections in a memory region. These flags 2089 // are only used when an explicit memory region name is not used. 2090 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 2091 uint32_t Flags = 0; 2092 uint32_t NegFlags = 0; 2093 bool Invert = false; 2094 2095 for (char C : next().lower()) { 2096 uint32_t Flag = 0; 2097 if (C == '!') 2098 Invert = !Invert; 2099 else if (C == 'w') 2100 Flag = SHF_WRITE; 2101 else if (C == 'x') 2102 Flag = SHF_EXECINSTR; 2103 else if (C == 'a') 2104 Flag = SHF_ALLOC; 2105 else if (C != 'r') 2106 setError("invalid memory region attribute"); 2107 2108 if (Invert) 2109 NegFlags |= Flag; 2110 else 2111 Flags |= Flag; 2112 } 2113 return {Flags, NegFlags}; 2114 } 2115 2116 void elf::readLinkerScript(MemoryBufferRef MB) { 2117 ScriptParser(MB).readLinkerScript(); 2118 } 2119 2120 void elf::readVersionScript(MemoryBufferRef MB) { 2121 ScriptParser(MB).readVersionScript(); 2122 } 2123 2124 void elf::readDynamicList(MemoryBufferRef MB) { 2125 ScriptParser(MB).readDynamicList(); 2126 } 2127 2128 template class elf::LinkerScript<ELF32LE>; 2129 template class elf::LinkerScript<ELF32BE>; 2130 template class elf::LinkerScript<ELF64LE>; 2131 template class elf::LinkerScript<ELF64BE>; 2132