1 //===- LinkerScript.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the parser/evaluator of the linker script. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LinkerScript.h" 15 #include "Config.h" 16 #include "Driver.h" 17 #include "InputSection.h" 18 #include "Memory.h" 19 #include "OutputSections.h" 20 #include "ScriptParser.h" 21 #include "Strings.h" 22 #include "SymbolTable.h" 23 #include "Symbols.h" 24 #include "SyntheticSections.h" 25 #include "Target.h" 26 #include "Writer.h" 27 #include "llvm/ADT/STLExtras.h" 28 #include "llvm/ADT/SmallString.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/ADT/StringSwitch.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/ELF.h" 33 #include "llvm/Support/Endian.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/FileSystem.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/Path.h" 38 #include <algorithm> 39 #include <cassert> 40 #include <cstddef> 41 #include <cstdint> 42 #include <iterator> 43 #include <limits> 44 #include <memory> 45 #include <string> 46 #include <tuple> 47 #include <vector> 48 49 using namespace llvm; 50 using namespace llvm::ELF; 51 using namespace llvm::object; 52 using namespace llvm::support::endian; 53 using namespace lld; 54 using namespace lld::elf; 55 56 LinkerScriptBase *elf::ScriptBase; 57 ScriptConfiguration *elf::ScriptConfig; 58 59 template <class ELFT> static SymbolBody *addRegular(SymbolAssignment *Cmd) { 60 uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 61 Symbol *Sym = Symtab<ELFT>::X->addUndefined( 62 Cmd->Name, /*IsLocal=*/false, STB_GLOBAL, Visibility, 63 /*Type*/ 0, 64 /*CanOmitFromDynSym*/ false, /*File*/ nullptr); 65 66 replaceBody<DefinedRegular<ELFT>>(Sym, Cmd->Name, /*IsLocal=*/false, 67 Visibility, STT_NOTYPE, 0, 0, nullptr, 68 nullptr); 69 return Sym->body(); 70 } 71 72 template <class ELFT> static SymbolBody *addSynthetic(SymbolAssignment *Cmd) { 73 uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 74 const OutputSectionBase *Sec = 75 ScriptConfig->HasSections ? nullptr : Cmd->Expression.Section(); 76 Symbol *Sym = Symtab<ELFT>::X->addUndefined( 77 Cmd->Name, /*IsLocal=*/false, STB_GLOBAL, Visibility, 78 /*Type*/ 0, 79 /*CanOmitFromDynSym*/ false, /*File*/ nullptr); 80 81 replaceBody<DefinedSynthetic>(Sym, Cmd->Name, 0, Sec); 82 return Sym->body(); 83 } 84 85 static bool isUnderSysroot(StringRef Path) { 86 if (Config->Sysroot == "") 87 return false; 88 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 89 if (sys::fs::equivalent(Config->Sysroot, Path)) 90 return true; 91 return false; 92 } 93 94 // Sets value of a symbol. Two kinds of symbols are processed: synthetic 95 // symbols, whose value is an offset from beginning of section and regular 96 // symbols whose value is absolute. 97 template <class ELFT> 98 static void assignSymbol(SymbolAssignment *Cmd, typename ELFT::uint Dot = 0) { 99 if (!Cmd->Sym) 100 return; 101 102 if (auto *Body = dyn_cast<DefinedSynthetic>(Cmd->Sym)) { 103 Body->Section = Cmd->Expression.Section(); 104 if (Body->Section) { 105 uint64_t VA = 0; 106 if (Body->Section->Flags & SHF_ALLOC) 107 VA = Body->Section->Addr; 108 Body->Value = Cmd->Expression(Dot) - VA; 109 } 110 return; 111 } 112 113 cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(Dot); 114 } 115 116 template <class ELFT> static void addSymbol(SymbolAssignment *Cmd) { 117 if (Cmd->Name == ".") 118 return; 119 120 // If a symbol was in PROVIDE(), we need to define it only when 121 // it is a referenced undefined symbol. 122 SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name); 123 if (Cmd->Provide && (!B || B->isDefined())) 124 return; 125 126 // Otherwise, create a new symbol if one does not exist or an 127 // undefined one does exist. 128 if (Cmd->Expression.IsAbsolute()) 129 Cmd->Sym = addRegular<ELFT>(Cmd); 130 else 131 Cmd->Sym = addSynthetic<ELFT>(Cmd); 132 133 // If there are sections, then let the value be assigned later in 134 // `assignAddresses`. 135 if (!ScriptConfig->HasSections) 136 assignSymbol<ELFT>(Cmd); 137 } 138 139 bool SymbolAssignment::classof(const BaseCommand *C) { 140 return C->Kind == AssignmentKind; 141 } 142 143 bool OutputSectionCommand::classof(const BaseCommand *C) { 144 return C->Kind == OutputSectionKind; 145 } 146 147 bool InputSectionDescription::classof(const BaseCommand *C) { 148 return C->Kind == InputSectionKind; 149 } 150 151 bool AssertCommand::classof(const BaseCommand *C) { 152 return C->Kind == AssertKind; 153 } 154 155 bool BytesDataCommand::classof(const BaseCommand *C) { 156 return C->Kind == BytesDataKind; 157 } 158 159 template <class ELFT> LinkerScript<ELFT>::LinkerScript() = default; 160 template <class ELFT> LinkerScript<ELFT>::~LinkerScript() = default; 161 162 template <class ELFT> static StringRef basename(InputSectionBase<ELFT> *S) { 163 if (S->getFile()) 164 return sys::path::filename(S->getFile()->getName()); 165 return ""; 166 } 167 168 template <class ELFT> 169 bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { 170 for (InputSectionDescription *ID : Opt.KeptSections) 171 if (ID->FilePat.match(basename(S))) 172 for (SectionPattern &P : ID->SectionPatterns) 173 if (P.SectionPat.match(S->Name)) 174 return true; 175 return false; 176 } 177 178 static bool comparePriority(InputSectionData *A, InputSectionData *B) { 179 return getPriority(A->Name) < getPriority(B->Name); 180 } 181 182 static bool compareName(InputSectionData *A, InputSectionData *B) { 183 return A->Name < B->Name; 184 } 185 186 static bool compareAlignment(InputSectionData *A, InputSectionData *B) { 187 // ">" is not a mistake. Larger alignments are placed before smaller 188 // alignments in order to reduce the amount of padding necessary. 189 // This is compatible with GNU. 190 return A->Alignment > B->Alignment; 191 } 192 193 static std::function<bool(InputSectionData *, InputSectionData *)> 194 getComparator(SortSectionPolicy K) { 195 switch (K) { 196 case SortSectionPolicy::Alignment: 197 return compareAlignment; 198 case SortSectionPolicy::Name: 199 return compareName; 200 case SortSectionPolicy::Priority: 201 return comparePriority; 202 default: 203 llvm_unreachable("unknown sort policy"); 204 } 205 } 206 207 template <class ELFT> 208 static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections, 209 ConstraintKind Kind) { 210 if (Kind == ConstraintKind::NoConstraint) 211 return true; 212 bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) { 213 auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2); 214 return Sec->Flags & SHF_WRITE; 215 }); 216 return (IsRW && Kind == ConstraintKind::ReadWrite) || 217 (!IsRW && Kind == ConstraintKind::ReadOnly); 218 } 219 220 static void sortSections(InputSectionData **Begin, InputSectionData **End, 221 SortSectionPolicy K) { 222 if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) 223 std::stable_sort(Begin, End, getComparator(K)); 224 } 225 226 // Compute and remember which sections the InputSectionDescription matches. 227 template <class ELFT> 228 void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { 229 // Collects all sections that satisfy constraints of I 230 // and attach them to I. 231 for (SectionPattern &Pat : I->SectionPatterns) { 232 size_t SizeBefore = I->Sections.size(); 233 234 for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) { 235 if (!S->Live || S->Assigned) 236 continue; 237 238 StringRef Filename = basename(S); 239 if (!I->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename)) 240 continue; 241 if (!Pat.SectionPat.match(S->Name)) 242 continue; 243 I->Sections.push_back(S); 244 S->Assigned = true; 245 } 246 247 // Sort sections as instructed by SORT-family commands and --sort-section 248 // option. Because SORT-family commands can be nested at most two depth 249 // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command 250 // line option is respected even if a SORT command is given, the exact 251 // behavior we have here is a bit complicated. Here are the rules. 252 // 253 // 1. If two SORT commands are given, --sort-section is ignored. 254 // 2. If one SORT command is given, and if it is not SORT_NONE, 255 // --sort-section is handled as an inner SORT command. 256 // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. 257 // 4. If no SORT command is given, sort according to --sort-section. 258 InputSectionData **Begin = I->Sections.data() + SizeBefore; 259 InputSectionData **End = I->Sections.data() + I->Sections.size(); 260 if (Pat.SortOuter != SortSectionPolicy::None) { 261 if (Pat.SortInner == SortSectionPolicy::Default) 262 sortSections(Begin, End, Config->SortSection); 263 else 264 sortSections(Begin, End, Pat.SortInner); 265 sortSections(Begin, End, Pat.SortOuter); 266 } 267 } 268 } 269 270 template <class ELFT> 271 void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) { 272 for (InputSectionBase<ELFT> *S : V) { 273 S->Live = false; 274 reportDiscarded(S); 275 } 276 } 277 278 template <class ELFT> 279 std::vector<InputSectionBase<ELFT> *> 280 LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) { 281 std::vector<InputSectionBase<ELFT> *> Ret; 282 283 for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { 284 auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); 285 if (!Cmd) 286 continue; 287 computeInputSections(Cmd); 288 for (InputSectionData *S : Cmd->Sections) 289 Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S)); 290 } 291 292 return Ret; 293 } 294 295 template <class ELFT> 296 void LinkerScript<ELFT>::addSection(OutputSectionFactory<ELFT> &Factory, 297 InputSectionBase<ELFT> *Sec, 298 StringRef Name) { 299 OutputSectionBase *OutSec; 300 bool IsNew; 301 std::tie(OutSec, IsNew) = Factory.create(Sec, Name); 302 if (IsNew) 303 OutputSections->push_back(OutSec); 304 OutSec->addSection(Sec); 305 } 306 307 template <class ELFT> 308 void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) { 309 for (unsigned I = 0; I < Opt.Commands.size(); ++I) { 310 auto Iter = Opt.Commands.begin() + I; 311 const std::unique_ptr<BaseCommand> &Base1 = *Iter; 312 313 // Handle symbol assignments outside of any output section. 314 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { 315 addSymbol<ELFT>(Cmd); 316 continue; 317 } 318 319 if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) { 320 // If we don't have SECTIONS then output sections have already been 321 // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses 322 // will not be called, so ASSERT should be evaluated now. 323 if (!Opt.HasSections) 324 Cmd->Expression(0); 325 continue; 326 } 327 328 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { 329 std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd); 330 331 // The output section name `/DISCARD/' is special. 332 // Any input section assigned to it is discarded. 333 if (Cmd->Name == "/DISCARD/") { 334 discard(V); 335 continue; 336 } 337 338 // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive 339 // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input 340 // sections satisfy a given constraint. If not, a directive is handled 341 // as if it wasn't present from the beginning. 342 // 343 // Because we'll iterate over Commands many more times, the easiest 344 // way to "make it as if it wasn't present" is to just remove it. 345 if (!matchConstraints<ELFT>(V, Cmd->Constraint)) { 346 for (InputSectionBase<ELFT> *S : V) 347 S->Assigned = false; 348 Opt.Commands.erase(Iter); 349 --I; 350 continue; 351 } 352 353 // A directive may contain symbol definitions like this: 354 // ".foo : { ...; bar = .; }". Handle them. 355 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 356 if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) 357 addSymbol<ELFT>(OutCmd); 358 359 // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign 360 // is given, input sections are aligned to that value, whether the 361 // given value is larger or smaller than the original section alignment. 362 if (Cmd->SubalignExpr) { 363 uint32_t Subalign = Cmd->SubalignExpr(0); 364 for (InputSectionBase<ELFT> *S : V) 365 S->Alignment = Subalign; 366 } 367 368 // Add input sections to an output section. 369 for (InputSectionBase<ELFT> *S : V) 370 addSection(Factory, S, Cmd->Name); 371 } 372 } 373 } 374 375 // Add sections that didn't match any sections command. 376 template <class ELFT> 377 void LinkerScript<ELFT>::addOrphanSections( 378 OutputSectionFactory<ELFT> &Factory) { 379 for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) 380 if (S->Live && !S->OutSec) 381 addSection(Factory, S, getOutputSectionName(S->Name)); 382 } 383 384 template <class ELFT> static bool isTbss(OutputSectionBase *Sec) { 385 return (Sec->Flags & SHF_TLS) && Sec->Type == SHT_NOBITS; 386 } 387 388 template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) { 389 if (!AlreadyOutputIS.insert(S).second) 390 return; 391 bool IsTbss = isTbss<ELFT>(CurOutSec); 392 393 uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; 394 Pos = alignTo(Pos, S->Alignment); 395 S->OutSecOff = Pos - CurOutSec->Addr; 396 Pos += S->getSize(); 397 398 // Update output section size after adding each section. This is so that 399 // SIZEOF works correctly in the case below: 400 // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } 401 CurOutSec->Size = Pos - CurOutSec->Addr; 402 403 // If there is a memory region associated with this input section, then 404 // place the section in that region and update the region index. 405 if (CurMemRegion) { 406 CurMemRegion->Offset += CurOutSec->Size; 407 uint64_t CurSize = CurMemRegion->Offset - CurMemRegion->Origin; 408 if (CurSize > CurMemRegion->Length) { 409 uint64_t OverflowAmt = CurSize - CurMemRegion->Length; 410 error("section '" + CurOutSec->Name + "' will not fit in region '" + 411 CurMemRegion->Name + "': overflowed by " + Twine(OverflowAmt) + 412 " bytes"); 413 } 414 } 415 416 if (IsTbss) 417 ThreadBssOffset = Pos - Dot; 418 else 419 Dot = Pos; 420 } 421 422 template <class ELFT> void LinkerScript<ELFT>::flush() { 423 if (!CurOutSec || !AlreadyOutputOS.insert(CurOutSec).second) 424 return; 425 if (auto *OutSec = dyn_cast<OutputSection<ELFT>>(CurOutSec)) { 426 for (InputSection<ELFT> *I : OutSec->Sections) 427 output(I); 428 } else { 429 Dot += CurOutSec->Size; 430 } 431 } 432 433 template <class ELFT> 434 void LinkerScript<ELFT>::switchTo(OutputSectionBase *Sec) { 435 if (CurOutSec == Sec) 436 return; 437 if (AlreadyOutputOS.count(Sec)) 438 return; 439 440 flush(); 441 CurOutSec = Sec; 442 443 Dot = alignTo(Dot, CurOutSec->Addralign); 444 CurOutSec->Addr = isTbss<ELFT>(CurOutSec) ? Dot + ThreadBssOffset : Dot; 445 446 // If neither AT nor AT> is specified for an allocatable section, the linker 447 // will set the LMA such that the difference between VMA and LMA for the 448 // section is the same as the preceding output section in the same region 449 // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html 450 CurOutSec->setLMAOffset(LMAOffset); 451 } 452 453 template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) { 454 // This handles the assignments to symbol or to a location counter (.) 455 if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { 456 if (AssignCmd->Name == ".") { 457 // Update to location counter means update to section size. 458 uintX_t Val = AssignCmd->Expression(Dot); 459 if (Val < Dot) 460 error("unable to move location counter backward for: " + 461 CurOutSec->Name); 462 Dot = Val; 463 CurOutSec->Size = Dot - CurOutSec->Addr; 464 return; 465 } 466 assignSymbol<ELFT>(AssignCmd, Dot); 467 return; 468 } 469 470 // Handle BYTE(), SHORT(), LONG(), or QUAD(). 471 if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) { 472 DataCmd->Offset = Dot - CurOutSec->Addr; 473 Dot += DataCmd->Size; 474 CurOutSec->Size = Dot - CurOutSec->Addr; 475 return; 476 } 477 478 if (auto *AssertCmd = dyn_cast<AssertCommand>(&Base)) { 479 AssertCmd->Expression(Dot); 480 return; 481 } 482 483 // It handles single input section description command, 484 // calculates and assigns the offsets for each section and also 485 // updates the output section size. 486 auto &ICmd = cast<InputSectionDescription>(Base); 487 for (InputSectionData *ID : ICmd.Sections) { 488 // We tentatively added all synthetic sections at the beginning and removed 489 // empty ones afterwards (because there is no way to know whether they were 490 // going be empty or not other than actually running linker scripts.) 491 // We need to ignore remains of empty sections. 492 if (auto *Sec = dyn_cast<SyntheticSection<ELFT>>(ID)) 493 if (Sec->empty()) 494 continue; 495 496 auto *IB = static_cast<InputSectionBase<ELFT> *>(ID); 497 switchTo(IB->OutSec); 498 if (auto *I = dyn_cast<InputSection<ELFT>>(IB)) 499 output(I); 500 else 501 flush(); 502 } 503 } 504 505 template <class ELFT> 506 static OutputSectionBase * 507 findSection(StringRef Name, const std::vector<OutputSectionBase *> &Sections) { 508 auto End = Sections.end(); 509 auto HasName = [=](OutputSectionBase *Sec) { return Sec->getName() == Name; }; 510 auto I = std::find_if(Sections.begin(), End, HasName); 511 std::vector<OutputSectionBase *> Ret; 512 if (I == End) 513 return nullptr; 514 assert(std::find_if(I + 1, End, HasName) == End); 515 return *I; 516 } 517 518 // This function searches for a memory region to place the given output 519 // section in. If found, a pointer to the appropriate memory region is 520 // returned. Otherwise, a nullptr is returned. 521 template <class ELFT> 522 MemoryRegion *LinkerScript<ELFT>::findMemoryRegion(OutputSectionCommand *Cmd, 523 OutputSectionBase *Sec) { 524 // If a memory region name was specified in the output section command, 525 // then try to find that region first. 526 if (!Cmd->MemoryRegionName.empty()) { 527 auto It = Opt.MemoryRegions.find(Cmd->MemoryRegionName); 528 if (It != Opt.MemoryRegions.end()) 529 return &It->second; 530 error("memory region '" + Cmd->MemoryRegionName + "' not declared"); 531 return nullptr; 532 } 533 534 // The memory region name is empty, thus a suitable region must be 535 // searched for in the region map. If the region map is empty, just 536 // return. Note that this check doesn't happen at the very beginning 537 // so that uses of undeclared regions can be caught. 538 if (!Opt.MemoryRegions.size()) 539 return nullptr; 540 541 // See if a region can be found by matching section flags. 542 for (auto &MRI : Opt.MemoryRegions) { 543 MemoryRegion &MR = MRI.second; 544 if ((MR.Flags & Sec->Flags) != 0 && (MR.NegFlags & Sec->Flags) == 0) 545 return &MR; 546 } 547 548 // Otherwise, no suitable region was found. 549 if (Sec->Flags & SHF_ALLOC) 550 error("no memory region specified for section '" + Sec->Name + "'"); 551 return nullptr; 552 } 553 554 // This function assigns offsets to input sections and an output section 555 // for a single sections command (e.g. ".text { *(.text); }"). 556 template <class ELFT> 557 void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) { 558 if (Cmd->LMAExpr) 559 LMAOffset = Cmd->LMAExpr(Dot) - Dot; 560 OutputSectionBase *Sec = findSection<ELFT>(Cmd->Name, *OutputSections); 561 if (!Sec) 562 return; 563 564 // Handle align (e.g. ".foo : ALIGN(16) { ... }"). 565 if (Cmd->AlignExpr) 566 Sec->updateAlignment(Cmd->AlignExpr(0)); 567 568 // Try and find an appropriate memory region to assign offsets in. 569 CurMemRegion = findMemoryRegion(Cmd, Sec); 570 if (CurMemRegion) 571 Dot = CurMemRegion->Offset; 572 switchTo(Sec); 573 574 // Find the last section output location. We will output orphan sections 575 // there so that end symbols point to the correct location. 576 auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), 577 [](const std::unique_ptr<BaseCommand> &Cmd) { 578 return !isa<SymbolAssignment>(*Cmd); 579 }) 580 .base(); 581 for (auto I = Cmd->Commands.begin(); I != E; ++I) 582 process(**I); 583 flush(); 584 std::for_each(E, Cmd->Commands.end(), 585 [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); 586 } 587 588 template <class ELFT> void LinkerScript<ELFT>::removeEmptyCommands() { 589 // It is common practice to use very generic linker scripts. So for any 590 // given run some of the output sections in the script will be empty. 591 // We could create corresponding empty output sections, but that would 592 // clutter the output. 593 // We instead remove trivially empty sections. The bfd linker seems even 594 // more aggressive at removing them. 595 auto Pos = std::remove_if( 596 Opt.Commands.begin(), Opt.Commands.end(), 597 [&](const std::unique_ptr<BaseCommand> &Base) { 598 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 599 return !findSection<ELFT>(Cmd->Name, *OutputSections); 600 return false; 601 }); 602 Opt.Commands.erase(Pos, Opt.Commands.end()); 603 } 604 605 static bool isAllSectionDescription(const OutputSectionCommand &Cmd) { 606 for (const std::unique_ptr<BaseCommand> &I : Cmd.Commands) 607 if (!isa<InputSectionDescription>(*I)) 608 return false; 609 return true; 610 } 611 612 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() { 613 // If the output section contains only symbol assignments, create a 614 // corresponding output section. The bfd linker seems to only create them if 615 // '.' is assigned to, but creating these section should not have any bad 616 // consequeces and gives us a section to put the symbol in. 617 uintX_t Flags = SHF_ALLOC; 618 uint32_t Type = SHT_NOBITS; 619 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 620 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 621 if (!Cmd) 622 continue; 623 if (OutputSectionBase *Sec = 624 findSection<ELFT>(Cmd->Name, *OutputSections)) { 625 Flags = Sec->Flags; 626 Type = Sec->Type; 627 continue; 628 } 629 630 if (isAllSectionDescription(*Cmd)) 631 continue; 632 633 auto *OutSec = make<OutputSection<ELFT>>(Cmd->Name, Type, Flags); 634 OutputSections->push_back(OutSec); 635 } 636 } 637 638 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsAfterSorting() { 639 placeOrphanSections(); 640 641 // If output section command doesn't specify any segments, 642 // and we haven't previously assigned any section to segment, 643 // then we simply assign section to the very first load segment. 644 // Below is an example of such linker script: 645 // PHDRS { seg PT_LOAD; } 646 // SECTIONS { .aaa : { *(.aaa) } } 647 std::vector<StringRef> DefPhdrs; 648 auto FirstPtLoad = 649 std::find_if(Opt.PhdrsCommands.begin(), Opt.PhdrsCommands.end(), 650 [](const PhdrsCommand &Cmd) { return Cmd.Type == PT_LOAD; }); 651 if (FirstPtLoad != Opt.PhdrsCommands.end()) 652 DefPhdrs.push_back(FirstPtLoad->Name); 653 654 // Walk the commands and propagate the program headers to commands that don't 655 // explicitly specify them. 656 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 657 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 658 if (!Cmd) 659 continue; 660 if (Cmd->Phdrs.empty()) 661 Cmd->Phdrs = DefPhdrs; 662 else 663 DefPhdrs = Cmd->Phdrs; 664 } 665 666 removeEmptyCommands(); 667 } 668 669 // When placing orphan sections, we want to place them after symbol assignments 670 // so that an orphan after 671 // begin_foo = .; 672 // foo : { *(foo) } 673 // end_foo = .; 674 // doesn't break the intended meaning of the begin/end symbols. 675 // We don't want to go over sections since Writer<ELFT>::sortSections is the 676 // one in charge of deciding the order of the sections. 677 // We don't want to go over alignments, since doing so in 678 // rx_sec : { *(rx_sec) } 679 // . = ALIGN(0x1000); 680 // /* The RW PT_LOAD starts here*/ 681 // rw_sec : { *(rw_sec) } 682 // would mean that the RW PT_LOAD would become unaligned. 683 static bool shouldSkip(const BaseCommand &Cmd) { 684 if (isa<OutputSectionCommand>(Cmd)) 685 return false; 686 const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); 687 if (!Assign) 688 return true; 689 return Assign->Name != "."; 690 } 691 692 // Orphan sections are sections present in the input files which are 693 // not explicitly placed into the output file by the linker script. 694 // 695 // When the control reaches this function, Opt.Commands contains 696 // output section commands for non-orphan sections only. This function 697 // adds new elements for orphan sections to Opt.Commands so that all 698 // sections are explicitly handled by Opt.Commands. 699 // 700 // Writer<ELFT>::sortSections has already sorted output sections. 701 // What we need to do is to scan OutputSections vector and 702 // Opt.Commands in parallel to find orphan sections. If there is an 703 // output section that doesn't have a corresponding entry in 704 // Opt.Commands, we will insert a new entry to Opt.Commands. 705 // 706 // There is some ambiguity as to where exactly a new entry should be 707 // inserted, because Opt.Commands contains not only output section 708 // commands but other types of commands such as symbol assignment 709 // expressions. There's no correct answer here due to the lack of the 710 // formal specification of the linker script. We use heuristics to 711 // determine whether a new output command should be added before or 712 // after another commands. For the details, look at shouldSkip 713 // function. 714 template <class ELFT> void LinkerScript<ELFT>::placeOrphanSections() { 715 // The OutputSections are already in the correct order. 716 // This loops creates or moves commands as needed so that they are in the 717 // correct order. 718 int CmdIndex = 0; 719 720 // As a horrible special case, skip the first . assignment if it is before any 721 // section. We do this because it is common to set a load address by starting 722 // the script with ". = 0xabcd" and the expectation is that every section is 723 // after that. 724 auto FirstSectionOrDotAssignment = 725 std::find_if(Opt.Commands.begin(), Opt.Commands.end(), 726 [](const std::unique_ptr<BaseCommand> &Cmd) { 727 if (isa<OutputSectionCommand>(*Cmd)) 728 return true; 729 const auto *Assign = dyn_cast<SymbolAssignment>(Cmd.get()); 730 if (!Assign) 731 return false; 732 return Assign->Name == "."; 733 }); 734 if (FirstSectionOrDotAssignment != Opt.Commands.end()) { 735 CmdIndex = FirstSectionOrDotAssignment - Opt.Commands.begin(); 736 if (isa<SymbolAssignment>(**FirstSectionOrDotAssignment)) 737 ++CmdIndex; 738 } 739 740 for (OutputSectionBase *Sec : *OutputSections) { 741 StringRef Name = Sec->getName(); 742 743 // Find the last spot where we can insert a command and still get the 744 // correct result. 745 auto CmdIter = Opt.Commands.begin() + CmdIndex; 746 auto E = Opt.Commands.end(); 747 while (CmdIter != E && shouldSkip(**CmdIter)) { 748 ++CmdIter; 749 ++CmdIndex; 750 } 751 752 auto Pos = 753 std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { 754 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 755 return Cmd && Cmd->Name == Name; 756 }); 757 if (Pos == E) { 758 Opt.Commands.insert(CmdIter, 759 llvm::make_unique<OutputSectionCommand>(Name)); 760 ++CmdIndex; 761 continue; 762 } 763 764 // Continue from where we found it. 765 CmdIndex = (Pos - Opt.Commands.begin()) + 1; 766 } 767 } 768 769 template <class ELFT> 770 void LinkerScript<ELFT>::assignAddresses(std::vector<PhdrEntry> &Phdrs) { 771 // Assign addresses as instructed by linker script SECTIONS sub-commands. 772 Dot = 0; 773 774 // A symbol can be assigned before any section is mentioned in the linker 775 // script. In an DSO, the symbol values are addresses, so the only important 776 // section values are: 777 // * SHN_UNDEF 778 // * SHN_ABS 779 // * Any value meaning a regular section. 780 // To handle that, create a dummy aether section that fills the void before 781 // the linker scripts switches to another section. It has an index of one 782 // which will map to whatever the first actual section is. 783 auto *Aether = make<OutputSectionBase>("", 0, SHF_ALLOC); 784 Aether->SectionIndex = 1; 785 switchTo(Aether); 786 787 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 788 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { 789 if (Cmd->Name == ".") { 790 Dot = Cmd->Expression(Dot); 791 } else if (Cmd->Sym) { 792 assignSymbol<ELFT>(Cmd, Dot); 793 } 794 continue; 795 } 796 797 if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { 798 Cmd->Expression(Dot); 799 continue; 800 } 801 802 auto *Cmd = cast<OutputSectionCommand>(Base.get()); 803 if (Cmd->AddrExpr) 804 Dot = Cmd->AddrExpr(Dot); 805 assignOffsets(Cmd); 806 } 807 808 uintX_t MinVA = std::numeric_limits<uintX_t>::max(); 809 for (OutputSectionBase *Sec : *OutputSections) { 810 if (Sec->Flags & SHF_ALLOC) 811 MinVA = std::min<uint64_t>(MinVA, Sec->Addr); 812 else 813 Sec->Addr = 0; 814 } 815 816 allocateHeaders<ELFT>(Phdrs, *OutputSections, MinVA); 817 } 818 819 // Creates program headers as instructed by PHDRS linker script command. 820 template <class ELFT> std::vector<PhdrEntry> LinkerScript<ELFT>::createPhdrs() { 821 std::vector<PhdrEntry> Ret; 822 823 // Process PHDRS and FILEHDR keywords because they are not 824 // real output sections and cannot be added in the following loop. 825 for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) { 826 Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags); 827 PhdrEntry &Phdr = Ret.back(); 828 829 if (Cmd.HasFilehdr) 830 Phdr.add(Out<ELFT>::ElfHeader); 831 if (Cmd.HasPhdrs) 832 Phdr.add(Out<ELFT>::ProgramHeaders); 833 834 if (Cmd.LMAExpr) { 835 Phdr.p_paddr = Cmd.LMAExpr(0); 836 Phdr.HasLMA = true; 837 } 838 } 839 840 // Add output sections to program headers. 841 for (OutputSectionBase *Sec : *OutputSections) { 842 if (!(Sec->Flags & SHF_ALLOC)) 843 break; 844 845 // Assign headers specified by linker script 846 for (size_t Id : getPhdrIndices(Sec->getName())) { 847 Ret[Id].add(Sec); 848 if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) 849 Ret[Id].p_flags |= Sec->getPhdrFlags(); 850 } 851 } 852 return Ret; 853 } 854 855 template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() { 856 // Ignore .interp section in case we have PHDRS specification 857 // and PT_INTERP isn't listed. 858 return !Opt.PhdrsCommands.empty() && 859 llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { 860 return Cmd.Type == PT_INTERP; 861 }) == Opt.PhdrsCommands.end(); 862 } 863 864 template <class ELFT> uint32_t LinkerScript<ELFT>::getFiller(StringRef Name) { 865 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 866 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 867 if (Cmd->Name == Name) 868 return Cmd->Filler; 869 return 0; 870 } 871 872 template <class ELFT> 873 static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { 874 const endianness E = ELFT::TargetEndianness; 875 876 switch (Size) { 877 case 1: 878 *Buf = (uint8_t)Data; 879 break; 880 case 2: 881 write16<E>(Buf, Data); 882 break; 883 case 4: 884 write32<E>(Buf, Data); 885 break; 886 case 8: 887 write64<E>(Buf, Data); 888 break; 889 default: 890 llvm_unreachable("unsupported Size argument"); 891 } 892 } 893 894 template <class ELFT> 895 void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) { 896 int I = getSectionIndex(Name); 897 if (I == INT_MAX) 898 return; 899 900 auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()); 901 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 902 if (auto *Data = dyn_cast<BytesDataCommand>(Base.get())) 903 writeInt<ELFT>(Buf + Data->Offset, Data->Expression(0), Data->Size); 904 } 905 906 template <class ELFT> bool LinkerScript<ELFT>::hasLMA(StringRef Name) { 907 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 908 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 909 if (Cmd->LMAExpr && Cmd->Name == Name) 910 return true; 911 return false; 912 } 913 914 // Returns the index of the given section name in linker script 915 // SECTIONS commands. Sections are laid out as the same order as they 916 // were in the script. If a given name did not appear in the script, 917 // it returns INT_MAX, so that it will be laid out at end of file. 918 template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) { 919 for (int I = 0, E = Opt.Commands.size(); I != E; ++I) 920 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get())) 921 if (Cmd->Name == Name) 922 return I; 923 return INT_MAX; 924 } 925 926 template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() { 927 return !Opt.PhdrsCommands.empty(); 928 } 929 930 template <class ELFT> 931 const OutputSectionBase *LinkerScript<ELFT>::getOutputSection(const Twine &Loc, 932 StringRef Name) { 933 static OutputSectionBase FakeSec("", 0, 0); 934 935 for (OutputSectionBase *Sec : *OutputSections) 936 if (Sec->getName() == Name) 937 return Sec; 938 939 error(Loc + ": undefined section " + Name); 940 return &FakeSec; 941 } 942 943 // This function is essentially the same as getOutputSection(Name)->Size, 944 // but it won't print out an error message if a given section is not found. 945 // 946 // Linker script does not create an output section if its content is empty. 947 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 948 // be empty. That is why this function is different from getOutputSection(). 949 template <class ELFT> 950 uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) { 951 for (OutputSectionBase *Sec : *OutputSections) 952 if (Sec->getName() == Name) 953 return Sec->Size; 954 return 0; 955 } 956 957 template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() { 958 return elf::getHeaderSize<ELFT>(); 959 } 960 961 template <class ELFT> 962 uint64_t LinkerScript<ELFT>::getSymbolValue(const Twine &Loc, StringRef S) { 963 if (SymbolBody *B = Symtab<ELFT>::X->find(S)) 964 return B->getVA<ELFT>(); 965 error(Loc + ": symbol not found: " + S); 966 return 0; 967 } 968 969 template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) { 970 return Symtab<ELFT>::X->find(S) != nullptr; 971 } 972 973 template <class ELFT> bool LinkerScript<ELFT>::isAbsolute(StringRef S) { 974 SymbolBody *Sym = Symtab<ELFT>::X->find(S); 975 auto *DR = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym); 976 return DR && !DR->Section; 977 } 978 979 // Gets section symbol belongs to. Symbol "." doesn't belong to any 980 // specific section but isn't absolute at the same time, so we try 981 // to find suitable section for it as well. 982 template <class ELFT> 983 const OutputSectionBase *LinkerScript<ELFT>::getSymbolSection(StringRef S) { 984 if (SymbolBody *Sym = Symtab<ELFT>::X->find(S)) 985 return SymbolTableSection<ELFT>::getOutputSection(Sym); 986 return CurOutSec; 987 } 988 989 // Returns indices of ELF headers containing specific section, identified 990 // by Name. Each index is a zero based number of ELF header listed within 991 // PHDRS {} script block. 992 template <class ELFT> 993 std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { 994 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 995 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 996 if (!Cmd || Cmd->Name != SectionName) 997 continue; 998 999 std::vector<size_t> Ret; 1000 for (StringRef PhdrName : Cmd->Phdrs) 1001 Ret.push_back(getPhdrIndex(Cmd->Location, PhdrName)); 1002 return Ret; 1003 } 1004 return {}; 1005 } 1006 1007 template <class ELFT> 1008 size_t LinkerScript<ELFT>::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { 1009 size_t I = 0; 1010 for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { 1011 if (Cmd.Name == PhdrName) 1012 return I; 1013 ++I; 1014 } 1015 error(Loc + ": section header '" + PhdrName + "' is not listed in PHDRS"); 1016 return 0; 1017 } 1018 1019 class elf::ScriptParser final : public ScriptParserBase { 1020 typedef void (ScriptParser::*Handler)(); 1021 1022 public: 1023 ScriptParser(MemoryBufferRef MB) 1024 : ScriptParserBase(MB), 1025 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 1026 1027 void readLinkerScript(); 1028 void readVersionScript(); 1029 void readDynamicList(); 1030 1031 private: 1032 void addFile(StringRef Path); 1033 1034 void readAsNeeded(); 1035 void readEntry(); 1036 void readExtern(); 1037 void readGroup(); 1038 void readInclude(); 1039 void readMemory(); 1040 void readOutput(); 1041 void readOutputArch(); 1042 void readOutputFormat(); 1043 void readPhdrs(); 1044 void readSearchDir(); 1045 void readSections(); 1046 void readVersion(); 1047 void readVersionScriptCommand(); 1048 1049 SymbolAssignment *readAssignment(StringRef Name); 1050 BytesDataCommand *readBytesDataCommand(StringRef Tok); 1051 uint32_t readFill(); 1052 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 1053 uint32_t readOutputSectionFiller(StringRef Tok); 1054 std::vector<StringRef> readOutputSectionPhdrs(); 1055 InputSectionDescription *readInputSectionDescription(StringRef Tok); 1056 StringMatcher readFilePatterns(); 1057 std::vector<SectionPattern> readInputSectionsList(); 1058 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 1059 unsigned readPhdrType(); 1060 SortSectionPolicy readSortKind(); 1061 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 1062 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 1063 void readSort(); 1064 Expr readAssert(); 1065 1066 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 1067 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 1068 1069 Expr readExpr(); 1070 Expr readExpr1(Expr Lhs, int MinPrec); 1071 StringRef readParenLiteral(); 1072 Expr readPrimary(); 1073 Expr readTernary(Expr Cond); 1074 Expr readParenExpr(); 1075 1076 // For parsing version script. 1077 std::vector<SymbolVersion> readVersionExtern(); 1078 void readAnonymousDeclaration(); 1079 void readVersionDeclaration(StringRef VerStr); 1080 std::vector<SymbolVersion> readSymbols(); 1081 void readLocals(); 1082 1083 ScriptConfiguration &Opt = *ScriptConfig; 1084 bool IsUnderSysroot; 1085 }; 1086 1087 void ScriptParser::readDynamicList() { 1088 expect("{"); 1089 readAnonymousDeclaration(); 1090 if (!atEOF()) 1091 setError("EOF expected, but got " + next()); 1092 } 1093 1094 void ScriptParser::readVersionScript() { 1095 readVersionScriptCommand(); 1096 if (!atEOF()) 1097 setError("EOF expected, but got " + next()); 1098 } 1099 1100 void ScriptParser::readVersionScriptCommand() { 1101 if (consume("{")) { 1102 readAnonymousDeclaration(); 1103 return; 1104 } 1105 1106 while (!atEOF() && !Error && peek() != "}") { 1107 StringRef VerStr = next(); 1108 if (VerStr == "{") { 1109 setError("anonymous version definition is used in " 1110 "combination with other version definitions"); 1111 return; 1112 } 1113 expect("{"); 1114 readVersionDeclaration(VerStr); 1115 } 1116 } 1117 1118 void ScriptParser::readVersion() { 1119 expect("{"); 1120 readVersionScriptCommand(); 1121 expect("}"); 1122 } 1123 1124 void ScriptParser::readLinkerScript() { 1125 while (!atEOF()) { 1126 StringRef Tok = next(); 1127 if (Tok == ";") 1128 continue; 1129 1130 if (Tok == "ASSERT") { 1131 Opt.Commands.emplace_back(new AssertCommand(readAssert())); 1132 } else if (Tok == "ENTRY") { 1133 readEntry(); 1134 } else if (Tok == "EXTERN") { 1135 readExtern(); 1136 } else if (Tok == "GROUP" || Tok == "INPUT") { 1137 readGroup(); 1138 } else if (Tok == "INCLUDE") { 1139 readInclude(); 1140 } else if (Tok == "MEMORY") { 1141 readMemory(); 1142 } else if (Tok == "OUTPUT") { 1143 readOutput(); 1144 } else if (Tok == "OUTPUT_ARCH") { 1145 readOutputArch(); 1146 } else if (Tok == "OUTPUT_FORMAT") { 1147 readOutputFormat(); 1148 } else if (Tok == "PHDRS") { 1149 readPhdrs(); 1150 } else if (Tok == "SEARCH_DIR") { 1151 readSearchDir(); 1152 } else if (Tok == "SECTIONS") { 1153 readSections(); 1154 } else if (Tok == "VERSION") { 1155 readVersion(); 1156 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 1157 Opt.Commands.emplace_back(Cmd); 1158 } else { 1159 setError("unknown directive: " + Tok); 1160 } 1161 } 1162 } 1163 1164 void ScriptParser::addFile(StringRef S) { 1165 if (IsUnderSysroot && S.startswith("/")) { 1166 SmallString<128> PathData; 1167 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 1168 if (sys::fs::exists(Path)) { 1169 Driver->addFile(Saver.save(Path)); 1170 return; 1171 } 1172 } 1173 1174 if (sys::path::is_absolute(S)) { 1175 Driver->addFile(S); 1176 } else if (S.startswith("=")) { 1177 if (Config->Sysroot.empty()) 1178 Driver->addFile(S.substr(1)); 1179 else 1180 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); 1181 } else if (S.startswith("-l")) { 1182 Driver->addLibrary(S.substr(2)); 1183 } else if (sys::fs::exists(S)) { 1184 Driver->addFile(S); 1185 } else { 1186 if (Optional<std::string> Path = findFromSearchPaths(S)) 1187 Driver->addFile(Saver.save(*Path)); 1188 else 1189 setError("unable to find " + S); 1190 } 1191 } 1192 1193 void ScriptParser::readAsNeeded() { 1194 expect("("); 1195 bool Orig = Config->AsNeeded; 1196 Config->AsNeeded = true; 1197 while (!Error && !consume(")")) 1198 addFile(unquote(next())); 1199 Config->AsNeeded = Orig; 1200 } 1201 1202 void ScriptParser::readEntry() { 1203 // -e <symbol> takes predecence over ENTRY(<symbol>). 1204 expect("("); 1205 StringRef Tok = next(); 1206 if (Config->Entry.empty()) 1207 Config->Entry = Tok; 1208 expect(")"); 1209 } 1210 1211 void ScriptParser::readExtern() { 1212 expect("("); 1213 while (!Error && !consume(")")) 1214 Config->Undefined.push_back(next()); 1215 } 1216 1217 void ScriptParser::readGroup() { 1218 expect("("); 1219 while (!Error && !consume(")")) { 1220 StringRef Tok = next(); 1221 if (Tok == "AS_NEEDED") 1222 readAsNeeded(); 1223 else 1224 addFile(unquote(Tok)); 1225 } 1226 } 1227 1228 void ScriptParser::readInclude() { 1229 StringRef Tok = unquote(next()); 1230 1231 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 1232 // The file will be searched for in the current directory, and in any 1233 // directory specified with the -L option. 1234 if (sys::fs::exists(Tok)) { 1235 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 1236 tokenize(*MB); 1237 return; 1238 } 1239 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 1240 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 1241 tokenize(*MB); 1242 return; 1243 } 1244 setError("cannot open " + Tok); 1245 } 1246 1247 void ScriptParser::readOutput() { 1248 // -o <file> takes predecence over OUTPUT(<file>). 1249 expect("("); 1250 StringRef Tok = next(); 1251 if (Config->OutputFile.empty()) 1252 Config->OutputFile = unquote(Tok); 1253 expect(")"); 1254 } 1255 1256 void ScriptParser::readOutputArch() { 1257 // OUTPUT_ARCH is ignored for now. 1258 expect("("); 1259 while (!Error && !consume(")")) 1260 skip(); 1261 } 1262 1263 void ScriptParser::readOutputFormat() { 1264 // Error checking only for now. 1265 expect("("); 1266 skip(); 1267 StringRef Tok = next(); 1268 if (Tok == ")") 1269 return; 1270 if (Tok != ",") { 1271 setError("unexpected token: " + Tok); 1272 return; 1273 } 1274 skip(); 1275 expect(","); 1276 skip(); 1277 expect(")"); 1278 } 1279 1280 void ScriptParser::readPhdrs() { 1281 expect("{"); 1282 while (!Error && !consume("}")) { 1283 StringRef Tok = next(); 1284 Opt.PhdrsCommands.push_back( 1285 {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); 1286 PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back(); 1287 1288 PhdrCmd.Type = readPhdrType(); 1289 do { 1290 Tok = next(); 1291 if (Tok == ";") 1292 break; 1293 if (Tok == "FILEHDR") 1294 PhdrCmd.HasFilehdr = true; 1295 else if (Tok == "PHDRS") 1296 PhdrCmd.HasPhdrs = true; 1297 else if (Tok == "AT") 1298 PhdrCmd.LMAExpr = readParenExpr(); 1299 else if (Tok == "FLAGS") { 1300 expect("("); 1301 // Passing 0 for the value of dot is a bit of a hack. It means that 1302 // we accept expressions like ".|1". 1303 PhdrCmd.Flags = readExpr()(0); 1304 expect(")"); 1305 } else 1306 setError("unexpected header attribute: " + Tok); 1307 } while (!Error); 1308 } 1309 } 1310 1311 void ScriptParser::readSearchDir() { 1312 expect("("); 1313 StringRef Tok = next(); 1314 if (!Config->Nostdlib) 1315 Config->SearchPaths.push_back(unquote(Tok)); 1316 expect(")"); 1317 } 1318 1319 void ScriptParser::readSections() { 1320 Opt.HasSections = true; 1321 // -no-rosegment is used to avoid placing read only non-executable sections in 1322 // their own segment. We do the same if SECTIONS command is present in linker 1323 // script. See comment for computeFlags(). 1324 Config->SingleRoRx = true; 1325 1326 expect("{"); 1327 while (!Error && !consume("}")) { 1328 StringRef Tok = next(); 1329 BaseCommand *Cmd = readProvideOrAssignment(Tok); 1330 if (!Cmd) { 1331 if (Tok == "ASSERT") 1332 Cmd = new AssertCommand(readAssert()); 1333 else 1334 Cmd = readOutputSectionDescription(Tok); 1335 } 1336 Opt.Commands.emplace_back(Cmd); 1337 } 1338 } 1339 1340 static int precedence(StringRef Op) { 1341 return StringSwitch<int>(Op) 1342 .Cases("*", "/", 5) 1343 .Cases("+", "-", 4) 1344 .Cases("<<", ">>", 3) 1345 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 1346 .Cases("&", "|", 1) 1347 .Default(-1); 1348 } 1349 1350 StringMatcher ScriptParser::readFilePatterns() { 1351 std::vector<StringRef> V; 1352 while (!Error && !consume(")")) 1353 V.push_back(next()); 1354 return StringMatcher(V); 1355 } 1356 1357 SortSectionPolicy ScriptParser::readSortKind() { 1358 if (consume("SORT") || consume("SORT_BY_NAME")) 1359 return SortSectionPolicy::Name; 1360 if (consume("SORT_BY_ALIGNMENT")) 1361 return SortSectionPolicy::Alignment; 1362 if (consume("SORT_BY_INIT_PRIORITY")) 1363 return SortSectionPolicy::Priority; 1364 if (consume("SORT_NONE")) 1365 return SortSectionPolicy::None; 1366 return SortSectionPolicy::Default; 1367 } 1368 1369 // Method reads a list of sequence of excluded files and section globs given in 1370 // a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ 1371 // Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) 1372 // The semantics of that is next: 1373 // * Include .foo.1 from every file. 1374 // * Include .foo.2 from every file but a.o 1375 // * Include .foo.3 from every file but b.o 1376 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 1377 std::vector<SectionPattern> Ret; 1378 while (!Error && peek() != ")") { 1379 StringMatcher ExcludeFilePat; 1380 if (consume("EXCLUDE_FILE")) { 1381 expect("("); 1382 ExcludeFilePat = readFilePatterns(); 1383 } 1384 1385 std::vector<StringRef> V; 1386 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 1387 V.push_back(next()); 1388 1389 if (!V.empty()) 1390 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 1391 else 1392 setError("section pattern is expected"); 1393 } 1394 return Ret; 1395 } 1396 1397 // Reads contents of "SECTIONS" directive. That directive contains a 1398 // list of glob patterns for input sections. The grammar is as follows. 1399 // 1400 // <patterns> ::= <section-list> 1401 // | <sort> "(" <section-list> ")" 1402 // | <sort> "(" <sort> "(" <section-list> ")" ")" 1403 // 1404 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 1405 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 1406 // 1407 // <section-list> is parsed by readInputSectionsList(). 1408 InputSectionDescription * 1409 ScriptParser::readInputSectionRules(StringRef FilePattern) { 1410 auto *Cmd = new InputSectionDescription(FilePattern); 1411 expect("("); 1412 while (!Error && !consume(")")) { 1413 SortSectionPolicy Outer = readSortKind(); 1414 SortSectionPolicy Inner = SortSectionPolicy::Default; 1415 std::vector<SectionPattern> V; 1416 if (Outer != SortSectionPolicy::Default) { 1417 expect("("); 1418 Inner = readSortKind(); 1419 if (Inner != SortSectionPolicy::Default) { 1420 expect("("); 1421 V = readInputSectionsList(); 1422 expect(")"); 1423 } else { 1424 V = readInputSectionsList(); 1425 } 1426 expect(")"); 1427 } else { 1428 V = readInputSectionsList(); 1429 } 1430 1431 for (SectionPattern &Pat : V) { 1432 Pat.SortInner = Inner; 1433 Pat.SortOuter = Outer; 1434 } 1435 1436 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 1437 } 1438 return Cmd; 1439 } 1440 1441 InputSectionDescription * 1442 ScriptParser::readInputSectionDescription(StringRef Tok) { 1443 // Input section wildcard can be surrounded by KEEP. 1444 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 1445 if (Tok == "KEEP") { 1446 expect("("); 1447 StringRef FilePattern = next(); 1448 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 1449 expect(")"); 1450 Opt.KeptSections.push_back(Cmd); 1451 return Cmd; 1452 } 1453 return readInputSectionRules(Tok); 1454 } 1455 1456 void ScriptParser::readSort() { 1457 expect("("); 1458 expect("CONSTRUCTORS"); 1459 expect(")"); 1460 } 1461 1462 Expr ScriptParser::readAssert() { 1463 expect("("); 1464 Expr E = readExpr(); 1465 expect(","); 1466 StringRef Msg = unquote(next()); 1467 expect(")"); 1468 return [=](uint64_t Dot) { 1469 uint64_t V = E(Dot); 1470 if (!V) 1471 error(Msg); 1472 return V; 1473 }; 1474 } 1475 1476 // Reads a FILL(expr) command. We handle the FILL command as an 1477 // alias for =fillexp section attribute, which is different from 1478 // what GNU linkers do. 1479 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 1480 uint32_t ScriptParser::readFill() { 1481 expect("("); 1482 uint32_t V = readOutputSectionFiller(next()); 1483 expect(")"); 1484 expect(";"); 1485 return V; 1486 } 1487 1488 OutputSectionCommand * 1489 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 1490 OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); 1491 Cmd->Location = getCurrentLocation(); 1492 1493 // Read an address expression. 1494 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address 1495 if (peek() != ":") 1496 Cmd->AddrExpr = readExpr(); 1497 1498 expect(":"); 1499 1500 if (consume("AT")) 1501 Cmd->LMAExpr = readParenExpr(); 1502 if (consume("ALIGN")) 1503 Cmd->AlignExpr = readParenExpr(); 1504 if (consume("SUBALIGN")) 1505 Cmd->SubalignExpr = readParenExpr(); 1506 1507 // Parse constraints. 1508 if (consume("ONLY_IF_RO")) 1509 Cmd->Constraint = ConstraintKind::ReadOnly; 1510 if (consume("ONLY_IF_RW")) 1511 Cmd->Constraint = ConstraintKind::ReadWrite; 1512 expect("{"); 1513 1514 while (!Error && !consume("}")) { 1515 StringRef Tok = next(); 1516 if (Tok == ";") { 1517 // Empty commands are allowed. Do nothing here. 1518 } else if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok)) { 1519 Cmd->Commands.emplace_back(Assignment); 1520 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 1521 Cmd->Commands.emplace_back(Data); 1522 } else if (Tok == "ASSERT") { 1523 Cmd->Commands.emplace_back(new AssertCommand(readAssert())); 1524 expect(";"); 1525 } else if (Tok == "CONSTRUCTORS") { 1526 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 1527 // by name. This is for very old file formats such as ECOFF/XCOFF. 1528 // For ELF, we should ignore. 1529 } else if (Tok == "FILL") { 1530 Cmd->Filler = readFill(); 1531 } else if (Tok == "SORT") { 1532 readSort(); 1533 } else if (peek() == "(") { 1534 Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); 1535 } else { 1536 setError("unknown command " + Tok); 1537 } 1538 } 1539 1540 if (consume(">")) 1541 Cmd->MemoryRegionName = next(); 1542 1543 Cmd->Phdrs = readOutputSectionPhdrs(); 1544 1545 if (consume("=")) 1546 Cmd->Filler = readOutputSectionFiller(next()); 1547 else if (peek().startswith("=")) 1548 Cmd->Filler = readOutputSectionFiller(next().drop_front()); 1549 1550 // Consume optional comma following output section command. 1551 consume(","); 1552 1553 return Cmd; 1554 } 1555 1556 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number. 1557 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 1558 // 1559 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles 1560 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them 1561 // as 32-bit big-endian values. We will do the same as ld.gold does 1562 // because it's simpler than what ld.bfd does. 1563 uint32_t ScriptParser::readOutputSectionFiller(StringRef Tok) { 1564 uint32_t V; 1565 if (!Tok.getAsInteger(0, V)) 1566 return V; 1567 setError("invalid filler expression: " + Tok); 1568 return 0; 1569 } 1570 1571 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 1572 expect("("); 1573 SymbolAssignment *Cmd = readAssignment(next()); 1574 Cmd->Provide = Provide; 1575 Cmd->Hidden = Hidden; 1576 expect(")"); 1577 expect(";"); 1578 return Cmd; 1579 } 1580 1581 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 1582 SymbolAssignment *Cmd = nullptr; 1583 if (peek() == "=" || peek() == "+=") { 1584 Cmd = readAssignment(Tok); 1585 expect(";"); 1586 } else if (Tok == "PROVIDE") { 1587 Cmd = readProvideHidden(true, false); 1588 } else if (Tok == "HIDDEN") { 1589 Cmd = readProvideHidden(false, true); 1590 } else if (Tok == "PROVIDE_HIDDEN") { 1591 Cmd = readProvideHidden(true, true); 1592 } 1593 return Cmd; 1594 } 1595 1596 static uint64_t getSymbolValue(const Twine &Loc, StringRef S, uint64_t Dot) { 1597 if (S == ".") 1598 return Dot; 1599 return ScriptBase->getSymbolValue(Loc, S); 1600 } 1601 1602 static bool isAbsolute(StringRef S) { 1603 if (S == ".") 1604 return false; 1605 return ScriptBase->isAbsolute(S); 1606 } 1607 1608 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 1609 StringRef Op = next(); 1610 Expr E; 1611 assert(Op == "=" || Op == "+="); 1612 if (consume("ABSOLUTE")) { 1613 // The RHS may be something like "ABSOLUTE(.) & 0xff". 1614 // Call readExpr1 to read the whole expression. 1615 E = readExpr1(readParenExpr(), 0); 1616 E.IsAbsolute = [] { return true; }; 1617 } else { 1618 E = readExpr(); 1619 } 1620 if (Op == "+=") { 1621 std::string Loc = getCurrentLocation(); 1622 E = [=](uint64_t Dot) { 1623 return getSymbolValue(Loc, Name, Dot) + E(Dot); 1624 }; 1625 } 1626 return new SymbolAssignment(Name, E); 1627 } 1628 1629 // This is an operator-precedence parser to parse a linker 1630 // script expression. 1631 Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); } 1632 1633 static Expr combine(StringRef Op, Expr L, Expr R) { 1634 auto IsAbs = [=] { return L.IsAbsolute() && R.IsAbsolute(); }; 1635 auto GetOutSec = [=] { 1636 const OutputSectionBase *S = L.Section(); 1637 return S ? S : R.Section(); 1638 }; 1639 1640 if (Op == "*") 1641 return [=](uint64_t Dot) { return L(Dot) * R(Dot); }; 1642 if (Op == "/") { 1643 return [=](uint64_t Dot) -> uint64_t { 1644 uint64_t RHS = R(Dot); 1645 if (RHS == 0) { 1646 error("division by zero"); 1647 return 0; 1648 } 1649 return L(Dot) / RHS; 1650 }; 1651 } 1652 if (Op == "+") 1653 return {[=](uint64_t Dot) { return L(Dot) + R(Dot); }, IsAbs, GetOutSec}; 1654 if (Op == "-") 1655 return {[=](uint64_t Dot) { return L(Dot) - R(Dot); }, IsAbs, GetOutSec}; 1656 if (Op == "<<") 1657 return [=](uint64_t Dot) { return L(Dot) << R(Dot); }; 1658 if (Op == ">>") 1659 return [=](uint64_t Dot) { return L(Dot) >> R(Dot); }; 1660 if (Op == "<") 1661 return [=](uint64_t Dot) { return L(Dot) < R(Dot); }; 1662 if (Op == ">") 1663 return [=](uint64_t Dot) { return L(Dot) > R(Dot); }; 1664 if (Op == ">=") 1665 return [=](uint64_t Dot) { return L(Dot) >= R(Dot); }; 1666 if (Op == "<=") 1667 return [=](uint64_t Dot) { return L(Dot) <= R(Dot); }; 1668 if (Op == "==") 1669 return [=](uint64_t Dot) { return L(Dot) == R(Dot); }; 1670 if (Op == "!=") 1671 return [=](uint64_t Dot) { return L(Dot) != R(Dot); }; 1672 if (Op == "&") 1673 return [=](uint64_t Dot) { return L(Dot) & R(Dot); }; 1674 if (Op == "|") 1675 return [=](uint64_t Dot) { return L(Dot) | R(Dot); }; 1676 llvm_unreachable("invalid operator"); 1677 } 1678 1679 // This is a part of the operator-precedence parser. This function 1680 // assumes that the remaining token stream starts with an operator. 1681 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 1682 while (!atEOF() && !Error) { 1683 // Read an operator and an expression. 1684 if (consume("?")) 1685 return readTernary(Lhs); 1686 StringRef Op1 = peek(); 1687 if (precedence(Op1) < MinPrec) 1688 break; 1689 skip(); 1690 Expr Rhs = readPrimary(); 1691 1692 // Evaluate the remaining part of the expression first if the 1693 // next operator has greater precedence than the previous one. 1694 // For example, if we have read "+" and "3", and if the next 1695 // operator is "*", then we'll evaluate 3 * ... part first. 1696 while (!atEOF()) { 1697 StringRef Op2 = peek(); 1698 if (precedence(Op2) <= precedence(Op1)) 1699 break; 1700 Rhs = readExpr1(Rhs, precedence(Op2)); 1701 } 1702 1703 Lhs = combine(Op1, Lhs, Rhs); 1704 } 1705 return Lhs; 1706 } 1707 1708 uint64_t static getConstant(StringRef S) { 1709 if (S == "COMMONPAGESIZE") 1710 return Target->PageSize; 1711 if (S == "MAXPAGESIZE") 1712 return Config->MaxPageSize; 1713 error("unknown constant: " + S); 1714 return 0; 1715 } 1716 1717 // Parses Tok as an integer. Returns true if successful. 1718 // It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") 1719 // and decimal numbers. Decimal numbers may have "K" (kilo) or 1720 // "M" (mega) prefixes. 1721 static bool readInteger(StringRef Tok, uint64_t &Result) { 1722 // Negative number 1723 if (Tok.startswith("-")) { 1724 if (!readInteger(Tok.substr(1), Result)) 1725 return false; 1726 Result = -Result; 1727 return true; 1728 } 1729 1730 // Hexadecimal 1731 if (Tok.startswith_lower("0x")) 1732 return !Tok.substr(2).getAsInteger(16, Result); 1733 if (Tok.endswith_lower("H")) 1734 return !Tok.drop_back().getAsInteger(16, Result); 1735 1736 // Decimal 1737 int Suffix = 1; 1738 if (Tok.endswith_lower("K")) { 1739 Suffix = 1024; 1740 Tok = Tok.drop_back(); 1741 } else if (Tok.endswith_lower("M")) { 1742 Suffix = 1024 * 1024; 1743 Tok = Tok.drop_back(); 1744 } 1745 if (Tok.getAsInteger(10, Result)) 1746 return false; 1747 Result *= Suffix; 1748 return true; 1749 } 1750 1751 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 1752 int Size = StringSwitch<unsigned>(Tok) 1753 .Case("BYTE", 1) 1754 .Case("SHORT", 2) 1755 .Case("LONG", 4) 1756 .Case("QUAD", 8) 1757 .Default(-1); 1758 if (Size == -1) 1759 return nullptr; 1760 1761 return new BytesDataCommand(readParenExpr(), Size); 1762 } 1763 1764 StringRef ScriptParser::readParenLiteral() { 1765 expect("("); 1766 StringRef Tok = next(); 1767 expect(")"); 1768 return Tok; 1769 } 1770 1771 Expr ScriptParser::readPrimary() { 1772 if (peek() == "(") 1773 return readParenExpr(); 1774 1775 StringRef Tok = next(); 1776 std::string Location = getCurrentLocation(); 1777 1778 if (Tok == "~") { 1779 Expr E = readPrimary(); 1780 return [=](uint64_t Dot) { return ~E(Dot); }; 1781 } 1782 if (Tok == "-") { 1783 Expr E = readPrimary(); 1784 return [=](uint64_t Dot) { return -E(Dot); }; 1785 } 1786 1787 // Built-in functions are parsed here. 1788 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 1789 if (Tok == "ADDR") { 1790 StringRef Name = readParenLiteral(); 1791 return {[=](uint64_t Dot) { 1792 return ScriptBase->getOutputSection(Location, Name)->Addr; 1793 }, 1794 [=] { return false; }, 1795 [=] { return ScriptBase->getOutputSection(Location, Name); }}; 1796 } 1797 if (Tok == "LOADADDR") { 1798 StringRef Name = readParenLiteral(); 1799 return [=](uint64_t Dot) { 1800 return ScriptBase->getOutputSection(Location, Name)->getLMA(); 1801 }; 1802 } 1803 if (Tok == "ASSERT") 1804 return readAssert(); 1805 if (Tok == "ALIGN") { 1806 expect("("); 1807 Expr E = readExpr(); 1808 if (consume(",")) { 1809 Expr E2 = readExpr(); 1810 expect(")"); 1811 return [=](uint64_t Dot) { return alignTo(E(Dot), E2(Dot)); }; 1812 } 1813 expect(")"); 1814 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1815 } 1816 if (Tok == "CONSTANT") { 1817 StringRef Name = readParenLiteral(); 1818 return [=](uint64_t Dot) { return getConstant(Name); }; 1819 } 1820 if (Tok == "DEFINED") { 1821 StringRef Name = readParenLiteral(); 1822 return [=](uint64_t Dot) { return ScriptBase->isDefined(Name) ? 1 : 0; }; 1823 } 1824 if (Tok == "SEGMENT_START") { 1825 expect("("); 1826 skip(); 1827 expect(","); 1828 Expr E = readExpr(); 1829 expect(")"); 1830 return [=](uint64_t Dot) { return E(Dot); }; 1831 } 1832 if (Tok == "DATA_SEGMENT_ALIGN") { 1833 expect("("); 1834 Expr E = readExpr(); 1835 expect(","); 1836 readExpr(); 1837 expect(")"); 1838 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1839 } 1840 if (Tok == "DATA_SEGMENT_END") { 1841 expect("("); 1842 expect("."); 1843 expect(")"); 1844 return [](uint64_t Dot) { return Dot; }; 1845 } 1846 // GNU linkers implements more complicated logic to handle 1847 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to 1848 // the next page boundary for simplicity. 1849 if (Tok == "DATA_SEGMENT_RELRO_END") { 1850 expect("("); 1851 readExpr(); 1852 expect(","); 1853 readExpr(); 1854 expect(")"); 1855 return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); }; 1856 } 1857 if (Tok == "SIZEOF") { 1858 StringRef Name = readParenLiteral(); 1859 return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); }; 1860 } 1861 if (Tok == "ALIGNOF") { 1862 StringRef Name = readParenLiteral(); 1863 return [=](uint64_t Dot) { 1864 return ScriptBase->getOutputSection(Location, Name)->Addralign; 1865 }; 1866 } 1867 if (Tok == "SIZEOF_HEADERS") 1868 return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); }; 1869 1870 // Tok is a literal number. 1871 uint64_t V; 1872 if (readInteger(Tok, V)) 1873 return [=](uint64_t Dot) { return V; }; 1874 1875 // Tok is a symbol name. 1876 if (Tok != "." && !isValidCIdentifier(Tok)) 1877 setError("malformed number: " + Tok); 1878 return {[=](uint64_t Dot) { return getSymbolValue(Location, Tok, Dot); }, 1879 [=] { return isAbsolute(Tok); }, 1880 [=] { return ScriptBase->getSymbolSection(Tok); }}; 1881 } 1882 1883 Expr ScriptParser::readTernary(Expr Cond) { 1884 Expr L = readExpr(); 1885 expect(":"); 1886 Expr R = readExpr(); 1887 return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); }; 1888 } 1889 1890 Expr ScriptParser::readParenExpr() { 1891 expect("("); 1892 Expr E = readExpr(); 1893 expect(")"); 1894 return E; 1895 } 1896 1897 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1898 std::vector<StringRef> Phdrs; 1899 while (!Error && peek().startswith(":")) { 1900 StringRef Tok = next(); 1901 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 1902 } 1903 return Phdrs; 1904 } 1905 1906 // Read a program header type name. The next token must be a 1907 // name of a program header type or a constant (e.g. "0x3"). 1908 unsigned ScriptParser::readPhdrType() { 1909 StringRef Tok = next(); 1910 uint64_t Val; 1911 if (readInteger(Tok, Val)) 1912 return Val; 1913 1914 unsigned Ret = StringSwitch<unsigned>(Tok) 1915 .Case("PT_NULL", PT_NULL) 1916 .Case("PT_LOAD", PT_LOAD) 1917 .Case("PT_DYNAMIC", PT_DYNAMIC) 1918 .Case("PT_INTERP", PT_INTERP) 1919 .Case("PT_NOTE", PT_NOTE) 1920 .Case("PT_SHLIB", PT_SHLIB) 1921 .Case("PT_PHDR", PT_PHDR) 1922 .Case("PT_TLS", PT_TLS) 1923 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1924 .Case("PT_GNU_STACK", PT_GNU_STACK) 1925 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1926 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1927 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1928 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1929 .Default(-1); 1930 1931 if (Ret == (unsigned)-1) { 1932 setError("invalid program header type: " + Tok); 1933 return PT_NULL; 1934 } 1935 return Ret; 1936 } 1937 1938 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1939 void ScriptParser::readAnonymousDeclaration() { 1940 // Read global symbols first. "global:" is default, so if there's 1941 // no label, we assume global symbols. 1942 if (peek() != "local") { 1943 if (consume("global")) 1944 expect(":"); 1945 Config->VersionScriptGlobals = readSymbols(); 1946 } 1947 readLocals(); 1948 expect("}"); 1949 expect(";"); 1950 } 1951 1952 void ScriptParser::readLocals() { 1953 if (!consume("local")) 1954 return; 1955 expect(":"); 1956 std::vector<SymbolVersion> Locals = readSymbols(); 1957 for (SymbolVersion V : Locals) { 1958 if (V.Name == "*") { 1959 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1960 continue; 1961 } 1962 Config->VersionScriptLocals.push_back(V); 1963 } 1964 } 1965 1966 // Reads a list of symbols, e.g. "VerStr { global: foo; bar; local: *; };". 1967 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1968 // Identifiers start at 2 because 0 and 1 are reserved 1969 // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants. 1970 uint16_t VersionId = Config->VersionDefinitions.size() + 2; 1971 Config->VersionDefinitions.push_back({VerStr, VersionId}); 1972 1973 // Read global symbols. 1974 if (peek() != "local") { 1975 if (consume("global")) 1976 expect(":"); 1977 Config->VersionDefinitions.back().Globals = readSymbols(); 1978 } 1979 readLocals(); 1980 expect("}"); 1981 1982 // Each version may have a parent version. For example, "Ver2" 1983 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1984 // as a parent. This version hierarchy is, probably against your 1985 // instinct, purely for hint; the runtime doesn't care about it 1986 // at all. In LLD, we simply ignore it. 1987 if (peek() != ";") 1988 skip(); 1989 expect(";"); 1990 } 1991 1992 // Reads a list of symbols for a versions cript. 1993 std::vector<SymbolVersion> ScriptParser::readSymbols() { 1994 std::vector<SymbolVersion> Ret; 1995 for (;;) { 1996 if (consume("extern")) { 1997 for (SymbolVersion V : readVersionExtern()) 1998 Ret.push_back(V); 1999 continue; 2000 } 2001 2002 if (peek() == "}" || (peek() == "local" && peek(1) == ":") || Error) 2003 break; 2004 StringRef Tok = next(); 2005 Ret.push_back({unquote(Tok), false, hasWildcard(Tok)}); 2006 expect(";"); 2007 } 2008 return Ret; 2009 } 2010 2011 // Reads an "extern C++" directive, e.g., 2012 // "extern "C++" { ns::*; "f(int, double)"; };" 2013 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 2014 StringRef Tok = next(); 2015 bool IsCXX = Tok == "\"C++\""; 2016 if (!IsCXX && Tok != "\"C\"") 2017 setError("Unknown language"); 2018 expect("{"); 2019 2020 std::vector<SymbolVersion> Ret; 2021 while (!Error && peek() != "}") { 2022 StringRef Tok = next(); 2023 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 2024 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 2025 expect(";"); 2026 } 2027 2028 expect("}"); 2029 expect(";"); 2030 return Ret; 2031 } 2032 2033 uint64_t ScriptParser::readMemoryAssignment( 2034 StringRef S1, StringRef S2, StringRef S3) { 2035 if (!(consume(S1) || consume(S2) || consume(S3))) { 2036 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 2037 return 0; 2038 } 2039 expect("="); 2040 2041 // TODO: Fully support constant expressions. 2042 uint64_t Val; 2043 if (!readInteger(next(), Val)) 2044 setError("nonconstant expression for "+ S1); 2045 return Val; 2046 } 2047 2048 // Parse the MEMORY command as specified in: 2049 // https://sourceware.org/binutils/docs/ld/MEMORY.html 2050 // 2051 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 2052 void ScriptParser::readMemory() { 2053 expect("{"); 2054 while (!Error && !consume("}")) { 2055 StringRef Name = next(); 2056 2057 uint32_t Flags = 0; 2058 uint32_t NegFlags = 0; 2059 if (consume("(")) { 2060 std::tie(Flags, NegFlags) = readMemoryAttributes(); 2061 expect(")"); 2062 } 2063 expect(":"); 2064 2065 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 2066 expect(","); 2067 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 2068 2069 // Add the memory region to the region map (if it doesn't already exist). 2070 auto It = Opt.MemoryRegions.find(Name); 2071 if (It != Opt.MemoryRegions.end()) 2072 setError("region '" + Name + "' already defined"); 2073 else 2074 Opt.MemoryRegions[Name] = {Name, Origin, Length, Origin, Flags, NegFlags}; 2075 } 2076 } 2077 2078 // This function parses the attributes used to match against section 2079 // flags when placing output sections in a memory region. These flags 2080 // are only used when an explicit memory region name is not used. 2081 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 2082 uint32_t Flags = 0; 2083 uint32_t NegFlags = 0; 2084 bool Invert = false; 2085 2086 for (char C : next().lower()) { 2087 uint32_t Flag = 0; 2088 if (C == '!') 2089 Invert = !Invert; 2090 else if (C == 'w') 2091 Flag = SHF_WRITE; 2092 else if (C == 'x') 2093 Flag = SHF_EXECINSTR; 2094 else if (C == 'a') 2095 Flag = SHF_ALLOC; 2096 else if (C != 'r') 2097 setError("invalid memory region attribute"); 2098 2099 if (Invert) 2100 NegFlags |= Flag; 2101 else 2102 Flags |= Flag; 2103 } 2104 return {Flags, NegFlags}; 2105 } 2106 2107 void elf::readLinkerScript(MemoryBufferRef MB) { 2108 ScriptParser(MB).readLinkerScript(); 2109 } 2110 2111 void elf::readVersionScript(MemoryBufferRef MB) { 2112 ScriptParser(MB).readVersionScript(); 2113 } 2114 2115 void elf::readDynamicList(MemoryBufferRef MB) { 2116 ScriptParser(MB).readDynamicList(); 2117 } 2118 2119 template class elf::LinkerScript<ELF32LE>; 2120 template class elf::LinkerScript<ELF32BE>; 2121 template class elf::LinkerScript<ELF64LE>; 2122 template class elf::LinkerScript<ELF64BE>; 2123