1 //===- LinkerScript.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the parser/evaluator of the linker script. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LinkerScript.h" 15 #include "Config.h" 16 #include "Driver.h" 17 #include "InputSection.h" 18 #include "Memory.h" 19 #include "OutputSections.h" 20 #include "ScriptParser.h" 21 #include "Strings.h" 22 #include "SymbolTable.h" 23 #include "Symbols.h" 24 #include "SyntheticSections.h" 25 #include "Target.h" 26 #include "Writer.h" 27 #include "llvm/ADT/STLExtras.h" 28 #include "llvm/ADT/SmallString.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/ADT/StringSwitch.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/ELF.h" 33 #include "llvm/Support/Endian.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/FileSystem.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/Path.h" 38 #include <algorithm> 39 #include <cassert> 40 #include <cstddef> 41 #include <cstdint> 42 #include <iterator> 43 #include <limits> 44 #include <memory> 45 #include <string> 46 #include <tuple> 47 #include <vector> 48 49 using namespace llvm; 50 using namespace llvm::ELF; 51 using namespace llvm::object; 52 using namespace llvm::support::endian; 53 using namespace lld; 54 using namespace lld::elf; 55 56 LinkerScriptBase *elf::ScriptBase; 57 ScriptConfiguration *elf::ScriptConfig; 58 59 template <class ELFT> static SymbolBody *addRegular(SymbolAssignment *Cmd) { 60 uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 61 Symbol *Sym = Symtab<ELFT>::X->addUndefined( 62 Cmd->Name, /*IsLocal=*/false, STB_GLOBAL, Visibility, 63 /*Type*/ 0, 64 /*CanOmitFromDynSym*/ false, /*File*/ nullptr); 65 66 replaceBody<DefinedRegular<ELFT>>(Sym, Cmd->Name, /*IsLocal=*/false, 67 Visibility, STT_NOTYPE, 0, 0, nullptr, 68 nullptr); 69 return Sym->body(); 70 } 71 72 template <class ELFT> static SymbolBody *addSynthetic(SymbolAssignment *Cmd) { 73 uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 74 const OutputSectionBase *Sec = 75 ScriptConfig->HasSections ? nullptr : Cmd->Expression.Section(); 76 Symbol *Sym = Symtab<ELFT>::X->addUndefined( 77 Cmd->Name, /*IsLocal=*/false, STB_GLOBAL, Visibility, 78 /*Type*/ 0, 79 /*CanOmitFromDynSym*/ false, /*File*/ nullptr); 80 81 replaceBody<DefinedSynthetic>(Sym, Cmd->Name, 0, Sec); 82 return Sym->body(); 83 } 84 85 static bool isUnderSysroot(StringRef Path) { 86 if (Config->Sysroot == "") 87 return false; 88 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 89 if (sys::fs::equivalent(Config->Sysroot, Path)) 90 return true; 91 return false; 92 } 93 94 // Sets value of a symbol. Two kinds of symbols are processed: synthetic 95 // symbols, whose value is an offset from beginning of section and regular 96 // symbols whose value is absolute. 97 template <class ELFT> 98 static void assignSymbol(SymbolAssignment *Cmd, typename ELFT::uint Dot = 0) { 99 if (!Cmd->Sym) 100 return; 101 102 if (auto *Body = dyn_cast<DefinedSynthetic>(Cmd->Sym)) { 103 Body->Section = Cmd->Expression.Section(); 104 if (Body->Section) { 105 uint64_t VA = 0; 106 if (Body->Section->Flags & SHF_ALLOC) 107 VA = Body->Section->Addr; 108 Body->Value = Cmd->Expression(Dot) - VA; 109 } 110 return; 111 } 112 113 cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(Dot); 114 } 115 116 template <class ELFT> static void addSymbol(SymbolAssignment *Cmd) { 117 if (Cmd->Name == ".") 118 return; 119 120 // If a symbol was in PROVIDE(), we need to define it only when 121 // it is a referenced undefined symbol. 122 SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name); 123 if (Cmd->Provide && (!B || B->isDefined())) 124 return; 125 126 // Otherwise, create a new symbol if one does not exist or an 127 // undefined one does exist. 128 if (Cmd->Expression.IsAbsolute()) 129 Cmd->Sym = addRegular<ELFT>(Cmd); 130 else 131 Cmd->Sym = addSynthetic<ELFT>(Cmd); 132 133 // If there are sections, then let the value be assigned later in 134 // `assignAddresses`. 135 if (!ScriptConfig->HasSections) 136 assignSymbol<ELFT>(Cmd); 137 } 138 139 bool SymbolAssignment::classof(const BaseCommand *C) { 140 return C->Kind == AssignmentKind; 141 } 142 143 bool OutputSectionCommand::classof(const BaseCommand *C) { 144 return C->Kind == OutputSectionKind; 145 } 146 147 bool InputSectionDescription::classof(const BaseCommand *C) { 148 return C->Kind == InputSectionKind; 149 } 150 151 bool AssertCommand::classof(const BaseCommand *C) { 152 return C->Kind == AssertKind; 153 } 154 155 bool BytesDataCommand::classof(const BaseCommand *C) { 156 return C->Kind == BytesDataKind; 157 } 158 159 template <class ELFT> LinkerScript<ELFT>::LinkerScript() = default; 160 template <class ELFT> LinkerScript<ELFT>::~LinkerScript() = default; 161 162 template <class ELFT> static StringRef basename(InputSectionBase<ELFT> *S) { 163 if (S->getFile()) 164 return sys::path::filename(S->getFile()->getName()); 165 return ""; 166 } 167 168 template <class ELFT> 169 bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { 170 for (InputSectionDescription *ID : Opt.KeptSections) 171 if (ID->FilePat.match(basename(S))) 172 for (SectionPattern &P : ID->SectionPatterns) 173 if (P.SectionPat.match(S->Name)) 174 return true; 175 return false; 176 } 177 178 static bool comparePriority(InputSectionData *A, InputSectionData *B) { 179 return getPriority(A->Name) < getPriority(B->Name); 180 } 181 182 static bool compareName(InputSectionData *A, InputSectionData *B) { 183 return A->Name < B->Name; 184 } 185 186 static bool compareAlignment(InputSectionData *A, InputSectionData *B) { 187 // ">" is not a mistake. Larger alignments are placed before smaller 188 // alignments in order to reduce the amount of padding necessary. 189 // This is compatible with GNU. 190 return A->Alignment > B->Alignment; 191 } 192 193 static std::function<bool(InputSectionData *, InputSectionData *)> 194 getComparator(SortSectionPolicy K) { 195 switch (K) { 196 case SortSectionPolicy::Alignment: 197 return compareAlignment; 198 case SortSectionPolicy::Name: 199 return compareName; 200 case SortSectionPolicy::Priority: 201 return comparePriority; 202 default: 203 llvm_unreachable("unknown sort policy"); 204 } 205 } 206 207 template <class ELFT> 208 static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections, 209 ConstraintKind Kind) { 210 if (Kind == ConstraintKind::NoConstraint) 211 return true; 212 bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) { 213 auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2); 214 return Sec->Flags & SHF_WRITE; 215 }); 216 return (IsRW && Kind == ConstraintKind::ReadWrite) || 217 (!IsRW && Kind == ConstraintKind::ReadOnly); 218 } 219 220 static void sortSections(InputSectionData **Begin, InputSectionData **End, 221 SortSectionPolicy K) { 222 if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) 223 std::stable_sort(Begin, End, getComparator(K)); 224 } 225 226 // Compute and remember which sections the InputSectionDescription matches. 227 template <class ELFT> 228 void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { 229 // Collects all sections that satisfy constraints of I 230 // and attach them to I. 231 for (SectionPattern &Pat : I->SectionPatterns) { 232 size_t SizeBefore = I->Sections.size(); 233 234 for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) { 235 if (!S->Live || S->Assigned) 236 continue; 237 238 StringRef Filename = basename(S); 239 if (!I->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename)) 240 continue; 241 if (!Pat.SectionPat.match(S->Name)) 242 continue; 243 I->Sections.push_back(S); 244 S->Assigned = true; 245 } 246 247 // Sort sections as instructed by SORT-family commands and --sort-section 248 // option. Because SORT-family commands can be nested at most two depth 249 // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command 250 // line option is respected even if a SORT command is given, the exact 251 // behavior we have here is a bit complicated. Here are the rules. 252 // 253 // 1. If two SORT commands are given, --sort-section is ignored. 254 // 2. If one SORT command is given, and if it is not SORT_NONE, 255 // --sort-section is handled as an inner SORT command. 256 // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. 257 // 4. If no SORT command is given, sort according to --sort-section. 258 InputSectionData **Begin = I->Sections.data() + SizeBefore; 259 InputSectionData **End = I->Sections.data() + I->Sections.size(); 260 if (Pat.SortOuter != SortSectionPolicy::None) { 261 if (Pat.SortInner == SortSectionPolicy::Default) 262 sortSections(Begin, End, Config->SortSection); 263 else 264 sortSections(Begin, End, Pat.SortInner); 265 sortSections(Begin, End, Pat.SortOuter); 266 } 267 } 268 } 269 270 template <class ELFT> 271 void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) { 272 for (InputSectionBase<ELFT> *S : V) { 273 S->Live = false; 274 reportDiscarded(S); 275 } 276 } 277 278 template <class ELFT> 279 std::vector<InputSectionBase<ELFT> *> 280 LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) { 281 std::vector<InputSectionBase<ELFT> *> Ret; 282 283 for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { 284 auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); 285 if (!Cmd) 286 continue; 287 computeInputSections(Cmd); 288 for (InputSectionData *S : Cmd->Sections) 289 Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S)); 290 } 291 292 return Ret; 293 } 294 295 template <class ELFT> 296 void LinkerScript<ELFT>::addSection(OutputSectionFactory<ELFT> &Factory, 297 InputSectionBase<ELFT> *Sec, 298 StringRef Name) { 299 OutputSectionBase *OutSec; 300 bool IsNew; 301 std::tie(OutSec, IsNew) = Factory.create(Sec, Name); 302 if (IsNew) 303 OutputSections->push_back(OutSec); 304 OutSec->addSection(Sec); 305 } 306 307 template <class ELFT> 308 void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) { 309 for (unsigned I = 0; I < Opt.Commands.size(); ++I) { 310 auto Iter = Opt.Commands.begin() + I; 311 const std::unique_ptr<BaseCommand> &Base1 = *Iter; 312 313 // Handle symbol assignments outside of any output section. 314 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { 315 addSymbol<ELFT>(Cmd); 316 continue; 317 } 318 319 if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) { 320 // If we don't have SECTIONS then output sections have already been 321 // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses 322 // will not be called, so ASSERT should be evaluated now. 323 if (!Opt.HasSections) 324 Cmd->Expression(0); 325 continue; 326 } 327 328 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { 329 std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd); 330 331 // The output section name `/DISCARD/' is special. 332 // Any input section assigned to it is discarded. 333 if (Cmd->Name == "/DISCARD/") { 334 discard(V); 335 continue; 336 } 337 338 // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive 339 // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input 340 // sections satisfy a given constraint. If not, a directive is handled 341 // as if it wasn't present from the beginning. 342 // 343 // Because we'll iterate over Commands many more times, the easiest 344 // way to "make it as if it wasn't present" is to just remove it. 345 if (!matchConstraints<ELFT>(V, Cmd->Constraint)) { 346 for (InputSectionBase<ELFT> *S : V) 347 S->Assigned = false; 348 Opt.Commands.erase(Iter); 349 --I; 350 continue; 351 } 352 353 // A directive may contain symbol definitions like this: 354 // ".foo : { ...; bar = .; }". Handle them. 355 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 356 if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) 357 addSymbol<ELFT>(OutCmd); 358 359 // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign 360 // is given, input sections are aligned to that value, whether the 361 // given value is larger or smaller than the original section alignment. 362 if (Cmd->SubalignExpr) { 363 uint32_t Subalign = Cmd->SubalignExpr(0); 364 for (InputSectionBase<ELFT> *S : V) 365 S->Alignment = Subalign; 366 } 367 368 // Add input sections to an output section. 369 for (InputSectionBase<ELFT> *S : V) 370 addSection(Factory, S, Cmd->Name); 371 } 372 } 373 } 374 375 // Add sections that didn't match any sections command. 376 template <class ELFT> 377 void LinkerScript<ELFT>::addOrphanSections( 378 OutputSectionFactory<ELFT> &Factory) { 379 for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) 380 if (S->Live && !S->OutSec) 381 addSection(Factory, S, getOutputSectionName(S->Name)); 382 } 383 384 template <class ELFT> static bool isTbss(OutputSectionBase *Sec) { 385 return (Sec->Flags & SHF_TLS) && Sec->Type == SHT_NOBITS; 386 } 387 388 template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) { 389 if (!AlreadyOutputIS.insert(S).second) 390 return; 391 bool IsTbss = isTbss<ELFT>(CurOutSec); 392 393 uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; 394 Pos = alignTo(Pos, S->Alignment); 395 S->OutSecOff = Pos - CurOutSec->Addr; 396 Pos += S->getSize(); 397 398 // Update output section size after adding each section. This is so that 399 // SIZEOF works correctly in the case below: 400 // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } 401 CurOutSec->Size = Pos - CurOutSec->Addr; 402 403 // If there is a memory region associated with this input section, then 404 // place the section in that region and update the region index. 405 if (CurMemRegion) { 406 CurMemRegion->Offset += CurOutSec->Size; 407 uint64_t CurSize = CurMemRegion->Offset - CurMemRegion->Origin; 408 if (CurSize > CurMemRegion->Length) { 409 uint64_t OverflowAmt = CurSize - CurMemRegion->Length; 410 error("section '" + CurOutSec->Name + "' will not fit in region '" + 411 CurMemRegion->Name + "': overflowed by " + Twine(OverflowAmt) + 412 " bytes"); 413 } 414 } 415 416 if (IsTbss) 417 ThreadBssOffset = Pos - Dot; 418 else 419 Dot = Pos; 420 } 421 422 template <class ELFT> void LinkerScript<ELFT>::flush() { 423 if (!CurOutSec || !AlreadyOutputOS.insert(CurOutSec).second) 424 return; 425 if (auto *OutSec = dyn_cast<OutputSection<ELFT>>(CurOutSec)) { 426 for (InputSection<ELFT> *I : OutSec->Sections) 427 output(I); 428 } else { 429 Dot += CurOutSec->Size; 430 } 431 } 432 433 template <class ELFT> 434 void LinkerScript<ELFT>::switchTo(OutputSectionBase *Sec) { 435 if (CurOutSec == Sec) 436 return; 437 if (AlreadyOutputOS.count(Sec)) 438 return; 439 440 flush(); 441 CurOutSec = Sec; 442 443 Dot = alignTo(Dot, CurOutSec->Addralign); 444 CurOutSec->Addr = isTbss<ELFT>(CurOutSec) ? Dot + ThreadBssOffset : Dot; 445 446 // If neither AT nor AT> is specified for an allocatable section, the linker 447 // will set the LMA such that the difference between VMA and LMA for the 448 // section is the same as the preceding output section in the same region 449 // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html 450 CurOutSec->setLMAOffset(LMAOffset); 451 } 452 453 template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) { 454 // This handles the assignments to symbol or to a location counter (.) 455 if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { 456 if (AssignCmd->Name == ".") { 457 // Update to location counter means update to section size. 458 uintX_t Val = AssignCmd->Expression(Dot); 459 if (Val < Dot) 460 error("unable to move location counter backward for: " + 461 CurOutSec->Name); 462 Dot = Val; 463 CurOutSec->Size = Dot - CurOutSec->Addr; 464 return; 465 } 466 assignSymbol<ELFT>(AssignCmd, Dot); 467 return; 468 } 469 470 // Handle BYTE(), SHORT(), LONG(), or QUAD(). 471 if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) { 472 DataCmd->Offset = Dot - CurOutSec->Addr; 473 Dot += DataCmd->Size; 474 CurOutSec->Size = Dot - CurOutSec->Addr; 475 return; 476 } 477 478 if (auto *AssertCmd = dyn_cast<AssertCommand>(&Base)) { 479 AssertCmd->Expression(Dot); 480 return; 481 } 482 483 // It handles single input section description command, 484 // calculates and assigns the offsets for each section and also 485 // updates the output section size. 486 auto &ICmd = cast<InputSectionDescription>(Base); 487 for (InputSectionData *ID : ICmd.Sections) { 488 // We tentatively added all synthetic sections at the beginning and removed 489 // empty ones afterwards (because there is no way to know whether they were 490 // going be empty or not other than actually running linker scripts.) 491 // We need to ignore remains of empty sections. 492 if (auto *Sec = dyn_cast<SyntheticSection<ELFT>>(ID)) 493 if (Sec->empty()) 494 continue; 495 496 auto *IB = static_cast<InputSectionBase<ELFT> *>(ID); 497 switchTo(IB->OutSec); 498 if (auto *I = dyn_cast<InputSection<ELFT>>(IB)) 499 output(I); 500 else 501 flush(); 502 } 503 } 504 505 template <class ELFT> 506 static OutputSectionBase * 507 findSection(StringRef Name, const std::vector<OutputSectionBase *> &Sections) { 508 auto End = Sections.end(); 509 auto HasName = [=](OutputSectionBase *Sec) { return Sec->getName() == Name; }; 510 auto I = std::find_if(Sections.begin(), End, HasName); 511 std::vector<OutputSectionBase *> Ret; 512 if (I == End) 513 return nullptr; 514 assert(std::find_if(I + 1, End, HasName) == End); 515 return *I; 516 } 517 518 // This function searches for a memory region to place the given output 519 // section in. If found, a pointer to the appropriate memory region is 520 // returned. Otherwise, a nullptr is returned. 521 template <class ELFT> 522 MemoryRegion *LinkerScript<ELFT>::findMemoryRegion(OutputSectionCommand *Cmd, 523 OutputSectionBase *Sec) { 524 // If a memory region name was specified in the output section command, 525 // then try to find that region first. 526 if (!Cmd->MemoryRegionName.empty()) { 527 auto It = Opt.MemoryRegions.find(Cmd->MemoryRegionName); 528 if (It != Opt.MemoryRegions.end()) 529 return &It->second; 530 error("memory region '" + Cmd->MemoryRegionName + "' not declared"); 531 return nullptr; 532 } 533 534 // The memory region name is empty, thus a suitable region must be 535 // searched for in the region map. If the region map is empty, just 536 // return. Note that this check doesn't happen at the very beginning 537 // so that uses of undeclared regions can be caught. 538 if (!Opt.MemoryRegions.size()) 539 return nullptr; 540 541 // See if a region can be found by matching section flags. 542 for (auto &MRI : Opt.MemoryRegions) { 543 MemoryRegion &MR = MRI.second; 544 if ((MR.Flags & Sec->Flags) != 0 && (MR.NegFlags & Sec->Flags) == 0) 545 return &MR; 546 } 547 548 // Otherwise, no suitable region was found. 549 if (Sec->Flags & SHF_ALLOC) 550 error("no memory region specified for section '" + Sec->Name + "'"); 551 return nullptr; 552 } 553 554 // This function assigns offsets to input sections and an output section 555 // for a single sections command (e.g. ".text { *(.text); }"). 556 template <class ELFT> 557 void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) { 558 if (Cmd->LMAExpr) 559 LMAOffset = Cmd->LMAExpr(Dot) - Dot; 560 OutputSectionBase *Sec = findSection<ELFT>(Cmd->Name, *OutputSections); 561 if (!Sec) 562 return; 563 564 // Try and find an appropriate memory region to assign offsets in. 565 CurMemRegion = findMemoryRegion(Cmd, Sec); 566 if (CurMemRegion) 567 Dot = CurMemRegion->Offset; 568 switchTo(Sec); 569 570 // Find the last section output location. We will output orphan sections 571 // there so that end symbols point to the correct location. 572 auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), 573 [](const std::unique_ptr<BaseCommand> &Cmd) { 574 return !isa<SymbolAssignment>(*Cmd); 575 }) 576 .base(); 577 for (auto I = Cmd->Commands.begin(); I != E; ++I) 578 process(**I); 579 flush(); 580 std::for_each(E, Cmd->Commands.end(), 581 [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); 582 } 583 584 template <class ELFT> void LinkerScript<ELFT>::removeEmptyCommands() { 585 // It is common practice to use very generic linker scripts. So for any 586 // given run some of the output sections in the script will be empty. 587 // We could create corresponding empty output sections, but that would 588 // clutter the output. 589 // We instead remove trivially empty sections. The bfd linker seems even 590 // more aggressive at removing them. 591 auto Pos = std::remove_if( 592 Opt.Commands.begin(), Opt.Commands.end(), 593 [&](const std::unique_ptr<BaseCommand> &Base) { 594 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 595 return !findSection<ELFT>(Cmd->Name, *OutputSections); 596 return false; 597 }); 598 Opt.Commands.erase(Pos, Opt.Commands.end()); 599 } 600 601 static bool isAllSectionDescription(const OutputSectionCommand &Cmd) { 602 for (const std::unique_ptr<BaseCommand> &I : Cmd.Commands) 603 if (!isa<InputSectionDescription>(*I)) 604 return false; 605 return true; 606 } 607 608 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() { 609 // If the output section contains only symbol assignments, create a 610 // corresponding output section. The bfd linker seems to only create them if 611 // '.' is assigned to, but creating these section should not have any bad 612 // consequeces and gives us a section to put the symbol in. 613 uintX_t Flags = SHF_ALLOC; 614 uint32_t Type = SHT_NOBITS; 615 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 616 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 617 if (!Cmd) 618 continue; 619 if (OutputSectionBase *Sec = 620 findSection<ELFT>(Cmd->Name, *OutputSections)) { 621 Flags = Sec->Flags; 622 Type = Sec->Type; 623 continue; 624 } 625 626 if (isAllSectionDescription(*Cmd)) 627 continue; 628 629 auto *OutSec = make<OutputSection<ELFT>>(Cmd->Name, Type, Flags); 630 OutputSections->push_back(OutSec); 631 } 632 } 633 634 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsAfterSorting() { 635 placeOrphanSections(); 636 637 // If output section command doesn't specify any segments, 638 // and we haven't previously assigned any section to segment, 639 // then we simply assign section to the very first load segment. 640 // Below is an example of such linker script: 641 // PHDRS { seg PT_LOAD; } 642 // SECTIONS { .aaa : { *(.aaa) } } 643 std::vector<StringRef> DefPhdrs; 644 auto FirstPtLoad = 645 std::find_if(Opt.PhdrsCommands.begin(), Opt.PhdrsCommands.end(), 646 [](const PhdrsCommand &Cmd) { return Cmd.Type == PT_LOAD; }); 647 if (FirstPtLoad != Opt.PhdrsCommands.end()) 648 DefPhdrs.push_back(FirstPtLoad->Name); 649 650 // Walk the commands and propagate the program headers to commands that don't 651 // explicitly specify them. 652 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 653 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 654 if (!Cmd) 655 continue; 656 if (Cmd->Phdrs.empty()) 657 Cmd->Phdrs = DefPhdrs; 658 else 659 DefPhdrs = Cmd->Phdrs; 660 } 661 662 removeEmptyCommands(); 663 } 664 665 // When placing orphan sections, we want to place them after symbol assignments 666 // so that an orphan after 667 // begin_foo = .; 668 // foo : { *(foo) } 669 // end_foo = .; 670 // doesn't break the intended meaning of the begin/end symbols. 671 // We don't want to go over sections since Writer<ELFT>::sortSections is the 672 // one in charge of deciding the order of the sections. 673 // We don't want to go over alignments, since doing so in 674 // rx_sec : { *(rx_sec) } 675 // . = ALIGN(0x1000); 676 // /* The RW PT_LOAD starts here*/ 677 // rw_sec : { *(rw_sec) } 678 // would mean that the RW PT_LOAD would become unaligned. 679 static bool shouldSkip(const BaseCommand &Cmd) { 680 if (isa<OutputSectionCommand>(Cmd)) 681 return false; 682 const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); 683 if (!Assign) 684 return true; 685 return Assign->Name != "."; 686 } 687 688 // Orphan sections are sections present in the input files which are 689 // not explicitly placed into the output file by the linker script. 690 // 691 // When the control reaches this function, Opt.Commands contains 692 // output section commands for non-orphan sections only. This function 693 // adds new elements for orphan sections to Opt.Commands so that all 694 // sections are explicitly handled by Opt.Commands. 695 // 696 // Writer<ELFT>::sortSections has already sorted output sections. 697 // What we need to do is to scan OutputSections vector and 698 // Opt.Commands in parallel to find orphan sections. If there is an 699 // output section that doesn't have a corresponding entry in 700 // Opt.Commands, we will insert a new entry to Opt.Commands. 701 // 702 // There is some ambiguity as to where exactly a new entry should be 703 // inserted, because Opt.Commands contains not only output section 704 // commands but other types of commands such as symbol assignment 705 // expressions. There's no correct answer here due to the lack of the 706 // formal specification of the linker script. We use heuristics to 707 // determine whether a new output command should be added before or 708 // after another commands. For the details, look at shouldSkip 709 // function. 710 template <class ELFT> void LinkerScript<ELFT>::placeOrphanSections() { 711 // The OutputSections are already in the correct order. 712 // This loops creates or moves commands as needed so that they are in the 713 // correct order. 714 int CmdIndex = 0; 715 716 // As a horrible special case, skip the first . assignment if it is before any 717 // section. We do this because it is common to set a load address by starting 718 // the script with ". = 0xabcd" and the expectation is that every section is 719 // after that. 720 auto FirstSectionOrDotAssignment = 721 std::find_if(Opt.Commands.begin(), Opt.Commands.end(), 722 [](const std::unique_ptr<BaseCommand> &Cmd) { 723 if (isa<OutputSectionCommand>(*Cmd)) 724 return true; 725 const auto *Assign = dyn_cast<SymbolAssignment>(Cmd.get()); 726 if (!Assign) 727 return false; 728 return Assign->Name == "."; 729 }); 730 if (FirstSectionOrDotAssignment != Opt.Commands.end()) { 731 CmdIndex = FirstSectionOrDotAssignment - Opt.Commands.begin(); 732 if (isa<SymbolAssignment>(**FirstSectionOrDotAssignment)) 733 ++CmdIndex; 734 } 735 736 for (OutputSectionBase *Sec : *OutputSections) { 737 StringRef Name = Sec->getName(); 738 739 // Find the last spot where we can insert a command and still get the 740 // correct result. 741 auto CmdIter = Opt.Commands.begin() + CmdIndex; 742 auto E = Opt.Commands.end(); 743 while (CmdIter != E && shouldSkip(**CmdIter)) { 744 ++CmdIter; 745 ++CmdIndex; 746 } 747 748 auto Pos = 749 std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { 750 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 751 return Cmd && Cmd->Name == Name; 752 }); 753 if (Pos == E) { 754 Opt.Commands.insert(CmdIter, 755 llvm::make_unique<OutputSectionCommand>(Name)); 756 ++CmdIndex; 757 continue; 758 } 759 760 // Continue from where we found it. 761 CmdIndex = (Pos - Opt.Commands.begin()) + 1; 762 } 763 } 764 765 template <class ELFT> 766 void LinkerScript<ELFT>::assignAddresses(std::vector<PhdrEntry> &Phdrs) { 767 // Assign addresses as instructed by linker script SECTIONS sub-commands. 768 Dot = 0; 769 770 // A symbol can be assigned before any section is mentioned in the linker 771 // script. In an DSO, the symbol values are addresses, so the only important 772 // section values are: 773 // * SHN_UNDEF 774 // * SHN_ABS 775 // * Any value meaning a regular section. 776 // To handle that, create a dummy aether section that fills the void before 777 // the linker scripts switches to another section. It has an index of one 778 // which will map to whatever the first actual section is. 779 auto *Aether = make<OutputSectionBase>("", 0, SHF_ALLOC); 780 Aether->SectionIndex = 1; 781 switchTo(Aether); 782 783 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 784 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { 785 if (Cmd->Name == ".") { 786 Dot = Cmd->Expression(Dot); 787 } else if (Cmd->Sym) { 788 assignSymbol<ELFT>(Cmd, Dot); 789 } 790 continue; 791 } 792 793 if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { 794 Cmd->Expression(Dot); 795 continue; 796 } 797 798 auto *Cmd = cast<OutputSectionCommand>(Base.get()); 799 if (Cmd->AddrExpr) 800 Dot = Cmd->AddrExpr(Dot); 801 assignOffsets(Cmd); 802 } 803 804 uintX_t MinVA = std::numeric_limits<uintX_t>::max(); 805 for (OutputSectionBase *Sec : *OutputSections) { 806 if (Sec->Flags & SHF_ALLOC) 807 MinVA = std::min<uint64_t>(MinVA, Sec->Addr); 808 else 809 Sec->Addr = 0; 810 } 811 812 allocateHeaders<ELFT>(Phdrs, *OutputSections, MinVA); 813 } 814 815 // Creates program headers as instructed by PHDRS linker script command. 816 template <class ELFT> std::vector<PhdrEntry> LinkerScript<ELFT>::createPhdrs() { 817 std::vector<PhdrEntry> Ret; 818 819 // Process PHDRS and FILEHDR keywords because they are not 820 // real output sections and cannot be added in the following loop. 821 for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) { 822 Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags); 823 PhdrEntry &Phdr = Ret.back(); 824 825 if (Cmd.HasFilehdr) 826 Phdr.add(Out<ELFT>::ElfHeader); 827 if (Cmd.HasPhdrs) 828 Phdr.add(Out<ELFT>::ProgramHeaders); 829 830 if (Cmd.LMAExpr) { 831 Phdr.p_paddr = Cmd.LMAExpr(0); 832 Phdr.HasLMA = true; 833 } 834 } 835 836 // Add output sections to program headers. 837 for (OutputSectionBase *Sec : *OutputSections) { 838 if (!(Sec->Flags & SHF_ALLOC)) 839 break; 840 841 // Assign headers specified by linker script 842 for (size_t Id : getPhdrIndices(Sec->getName())) { 843 Ret[Id].add(Sec); 844 if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) 845 Ret[Id].p_flags |= Sec->getPhdrFlags(); 846 } 847 } 848 return Ret; 849 } 850 851 template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() { 852 // Ignore .interp section in case we have PHDRS specification 853 // and PT_INTERP isn't listed. 854 return !Opt.PhdrsCommands.empty() && 855 llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { 856 return Cmd.Type == PT_INTERP; 857 }) == Opt.PhdrsCommands.end(); 858 } 859 860 template <class ELFT> uint32_t LinkerScript<ELFT>::getFiller(StringRef Name) { 861 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 862 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 863 if (Cmd->Name == Name) 864 return Cmd->Filler; 865 return 0; 866 } 867 868 template <class ELFT> 869 static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { 870 const endianness E = ELFT::TargetEndianness; 871 872 switch (Size) { 873 case 1: 874 *Buf = (uint8_t)Data; 875 break; 876 case 2: 877 write16<E>(Buf, Data); 878 break; 879 case 4: 880 write32<E>(Buf, Data); 881 break; 882 case 8: 883 write64<E>(Buf, Data); 884 break; 885 default: 886 llvm_unreachable("unsupported Size argument"); 887 } 888 } 889 890 template <class ELFT> 891 void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) { 892 int I = getSectionIndex(Name); 893 if (I == INT_MAX) 894 return; 895 896 auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()); 897 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 898 if (auto *Data = dyn_cast<BytesDataCommand>(Base.get())) 899 writeInt<ELFT>(Buf + Data->Offset, Data->Expression(0), Data->Size); 900 } 901 902 template <class ELFT> bool LinkerScript<ELFT>::hasLMA(StringRef Name) { 903 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 904 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 905 if (Cmd->LMAExpr && Cmd->Name == Name) 906 return true; 907 return false; 908 } 909 910 // Returns the index of the given section name in linker script 911 // SECTIONS commands. Sections are laid out as the same order as they 912 // were in the script. If a given name did not appear in the script, 913 // it returns INT_MAX, so that it will be laid out at end of file. 914 template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) { 915 for (int I = 0, E = Opt.Commands.size(); I != E; ++I) 916 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get())) 917 if (Cmd->Name == Name) 918 return I; 919 return INT_MAX; 920 } 921 922 template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() { 923 return !Opt.PhdrsCommands.empty(); 924 } 925 926 template <class ELFT> 927 const OutputSectionBase *LinkerScript<ELFT>::getOutputSection(const Twine &Loc, 928 StringRef Name) { 929 static OutputSectionBase FakeSec("", 0, 0); 930 931 for (OutputSectionBase *Sec : *OutputSections) 932 if (Sec->getName() == Name) 933 return Sec; 934 935 error(Loc + ": undefined section " + Name); 936 return &FakeSec; 937 } 938 939 // This function is essentially the same as getOutputSection(Name)->Size, 940 // but it won't print out an error message if a given section is not found. 941 // 942 // Linker script does not create an output section if its content is empty. 943 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 944 // be empty. That is why this function is different from getOutputSection(). 945 template <class ELFT> 946 uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) { 947 for (OutputSectionBase *Sec : *OutputSections) 948 if (Sec->getName() == Name) 949 return Sec->Size; 950 return 0; 951 } 952 953 template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() { 954 return elf::getHeaderSize<ELFT>(); 955 } 956 957 template <class ELFT> 958 uint64_t LinkerScript<ELFT>::getSymbolValue(const Twine &Loc, StringRef S) { 959 if (SymbolBody *B = Symtab<ELFT>::X->find(S)) 960 return B->getVA<ELFT>(); 961 error(Loc + ": symbol not found: " + S); 962 return 0; 963 } 964 965 template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) { 966 return Symtab<ELFT>::X->find(S) != nullptr; 967 } 968 969 template <class ELFT> bool LinkerScript<ELFT>::isAbsolute(StringRef S) { 970 SymbolBody *Sym = Symtab<ELFT>::X->find(S); 971 auto *DR = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym); 972 return DR && !DR->Section; 973 } 974 975 // Gets section symbol belongs to. Symbol "." doesn't belong to any 976 // specific section but isn't absolute at the same time, so we try 977 // to find suitable section for it as well. 978 template <class ELFT> 979 const OutputSectionBase *LinkerScript<ELFT>::getSymbolSection(StringRef S) { 980 if (SymbolBody *Sym = Symtab<ELFT>::X->find(S)) 981 return SymbolTableSection<ELFT>::getOutputSection(Sym); 982 return CurOutSec; 983 } 984 985 // Returns indices of ELF headers containing specific section, identified 986 // by Name. Each index is a zero based number of ELF header listed within 987 // PHDRS {} script block. 988 template <class ELFT> 989 std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { 990 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 991 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 992 if (!Cmd || Cmd->Name != SectionName) 993 continue; 994 995 std::vector<size_t> Ret; 996 for (StringRef PhdrName : Cmd->Phdrs) 997 Ret.push_back(getPhdrIndex(Cmd->Location, PhdrName)); 998 return Ret; 999 } 1000 return {}; 1001 } 1002 1003 template <class ELFT> 1004 size_t LinkerScript<ELFT>::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { 1005 size_t I = 0; 1006 for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { 1007 if (Cmd.Name == PhdrName) 1008 return I; 1009 ++I; 1010 } 1011 error(Loc + ": section header '" + PhdrName + "' is not listed in PHDRS"); 1012 return 0; 1013 } 1014 1015 class elf::ScriptParser final : public ScriptParserBase { 1016 typedef void (ScriptParser::*Handler)(); 1017 1018 public: 1019 ScriptParser(MemoryBufferRef MB) 1020 : ScriptParserBase(MB), 1021 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 1022 1023 void readLinkerScript(); 1024 void readVersionScript(); 1025 void readDynamicList(); 1026 1027 private: 1028 void addFile(StringRef Path); 1029 1030 void readAsNeeded(); 1031 void readEntry(); 1032 void readExtern(); 1033 void readGroup(); 1034 void readInclude(); 1035 void readMemory(); 1036 void readOutput(); 1037 void readOutputArch(); 1038 void readOutputFormat(); 1039 void readPhdrs(); 1040 void readSearchDir(); 1041 void readSections(); 1042 void readVersion(); 1043 void readVersionScriptCommand(); 1044 1045 SymbolAssignment *readAssignment(StringRef Name); 1046 BytesDataCommand *readBytesDataCommand(StringRef Tok); 1047 uint32_t readFill(); 1048 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 1049 uint32_t readOutputSectionFiller(StringRef Tok); 1050 std::vector<StringRef> readOutputSectionPhdrs(); 1051 InputSectionDescription *readInputSectionDescription(StringRef Tok); 1052 StringMatcher readFilePatterns(); 1053 std::vector<SectionPattern> readInputSectionsList(); 1054 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 1055 unsigned readPhdrType(); 1056 SortSectionPolicy readSortKind(); 1057 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 1058 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 1059 void readSort(); 1060 Expr readAssert(); 1061 1062 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 1063 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 1064 1065 Expr readExpr(); 1066 Expr readExpr1(Expr Lhs, int MinPrec); 1067 StringRef readParenLiteral(); 1068 Expr readPrimary(); 1069 Expr readTernary(Expr Cond); 1070 Expr readParenExpr(); 1071 1072 // For parsing version script. 1073 std::vector<SymbolVersion> readVersionExtern(); 1074 void readAnonymousDeclaration(); 1075 void readVersionDeclaration(StringRef VerStr); 1076 std::vector<SymbolVersion> readSymbols(); 1077 void readLocals(); 1078 1079 ScriptConfiguration &Opt = *ScriptConfig; 1080 bool IsUnderSysroot; 1081 }; 1082 1083 void ScriptParser::readDynamicList() { 1084 expect("{"); 1085 readAnonymousDeclaration(); 1086 if (!atEOF()) 1087 setError("EOF expected, but got " + next()); 1088 } 1089 1090 void ScriptParser::readVersionScript() { 1091 readVersionScriptCommand(); 1092 if (!atEOF()) 1093 setError("EOF expected, but got " + next()); 1094 } 1095 1096 void ScriptParser::readVersionScriptCommand() { 1097 if (consume("{")) { 1098 readAnonymousDeclaration(); 1099 return; 1100 } 1101 1102 while (!atEOF() && !Error && peek() != "}") { 1103 StringRef VerStr = next(); 1104 if (VerStr == "{") { 1105 setError("anonymous version definition is used in " 1106 "combination with other version definitions"); 1107 return; 1108 } 1109 expect("{"); 1110 readVersionDeclaration(VerStr); 1111 } 1112 } 1113 1114 void ScriptParser::readVersion() { 1115 expect("{"); 1116 readVersionScriptCommand(); 1117 expect("}"); 1118 } 1119 1120 void ScriptParser::readLinkerScript() { 1121 while (!atEOF()) { 1122 StringRef Tok = next(); 1123 if (Tok == ";") 1124 continue; 1125 1126 if (Tok == "ASSERT") { 1127 Opt.Commands.emplace_back(new AssertCommand(readAssert())); 1128 } else if (Tok == "ENTRY") { 1129 readEntry(); 1130 } else if (Tok == "EXTERN") { 1131 readExtern(); 1132 } else if (Tok == "GROUP" || Tok == "INPUT") { 1133 readGroup(); 1134 } else if (Tok == "INCLUDE") { 1135 readInclude(); 1136 } else if (Tok == "MEMORY") { 1137 readMemory(); 1138 } else if (Tok == "OUTPUT") { 1139 readOutput(); 1140 } else if (Tok == "OUTPUT_ARCH") { 1141 readOutputArch(); 1142 } else if (Tok == "OUTPUT_FORMAT") { 1143 readOutputFormat(); 1144 } else if (Tok == "PHDRS") { 1145 readPhdrs(); 1146 } else if (Tok == "SEARCH_DIR") { 1147 readSearchDir(); 1148 } else if (Tok == "SECTIONS") { 1149 readSections(); 1150 } else if (Tok == "VERSION") { 1151 readVersion(); 1152 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 1153 Opt.Commands.emplace_back(Cmd); 1154 } else { 1155 setError("unknown directive: " + Tok); 1156 } 1157 } 1158 } 1159 1160 void ScriptParser::addFile(StringRef S) { 1161 if (IsUnderSysroot && S.startswith("/")) { 1162 SmallString<128> PathData; 1163 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 1164 if (sys::fs::exists(Path)) { 1165 Driver->addFile(Saver.save(Path)); 1166 return; 1167 } 1168 } 1169 1170 if (sys::path::is_absolute(S)) { 1171 Driver->addFile(S); 1172 } else if (S.startswith("=")) { 1173 if (Config->Sysroot.empty()) 1174 Driver->addFile(S.substr(1)); 1175 else 1176 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); 1177 } else if (S.startswith("-l")) { 1178 Driver->addLibrary(S.substr(2)); 1179 } else if (sys::fs::exists(S)) { 1180 Driver->addFile(S); 1181 } else { 1182 if (Optional<std::string> Path = findFromSearchPaths(S)) 1183 Driver->addFile(Saver.save(*Path)); 1184 else 1185 setError("unable to find " + S); 1186 } 1187 } 1188 1189 void ScriptParser::readAsNeeded() { 1190 expect("("); 1191 bool Orig = Config->AsNeeded; 1192 Config->AsNeeded = true; 1193 while (!Error && !consume(")")) 1194 addFile(unquote(next())); 1195 Config->AsNeeded = Orig; 1196 } 1197 1198 void ScriptParser::readEntry() { 1199 // -e <symbol> takes predecence over ENTRY(<symbol>). 1200 expect("("); 1201 StringRef Tok = next(); 1202 if (Config->Entry.empty()) 1203 Config->Entry = Tok; 1204 expect(")"); 1205 } 1206 1207 void ScriptParser::readExtern() { 1208 expect("("); 1209 while (!Error && !consume(")")) 1210 Config->Undefined.push_back(next()); 1211 } 1212 1213 void ScriptParser::readGroup() { 1214 expect("("); 1215 while (!Error && !consume(")")) { 1216 StringRef Tok = next(); 1217 if (Tok == "AS_NEEDED") 1218 readAsNeeded(); 1219 else 1220 addFile(unquote(Tok)); 1221 } 1222 } 1223 1224 void ScriptParser::readInclude() { 1225 StringRef Tok = unquote(next()); 1226 1227 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 1228 // The file will be searched for in the current directory, and in any 1229 // directory specified with the -L option. 1230 if (sys::fs::exists(Tok)) { 1231 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 1232 tokenize(*MB); 1233 return; 1234 } 1235 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 1236 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 1237 tokenize(*MB); 1238 return; 1239 } 1240 setError("cannot open " + Tok); 1241 } 1242 1243 void ScriptParser::readOutput() { 1244 // -o <file> takes predecence over OUTPUT(<file>). 1245 expect("("); 1246 StringRef Tok = next(); 1247 if (Config->OutputFile.empty()) 1248 Config->OutputFile = unquote(Tok); 1249 expect(")"); 1250 } 1251 1252 void ScriptParser::readOutputArch() { 1253 // Error checking only for now. 1254 expect("("); 1255 skip(); 1256 expect(")"); 1257 } 1258 1259 void ScriptParser::readOutputFormat() { 1260 // Error checking only for now. 1261 expect("("); 1262 skip(); 1263 StringRef Tok = next(); 1264 if (Tok == ")") 1265 return; 1266 if (Tok != ",") { 1267 setError("unexpected token: " + Tok); 1268 return; 1269 } 1270 skip(); 1271 expect(","); 1272 skip(); 1273 expect(")"); 1274 } 1275 1276 void ScriptParser::readPhdrs() { 1277 expect("{"); 1278 while (!Error && !consume("}")) { 1279 StringRef Tok = next(); 1280 Opt.PhdrsCommands.push_back( 1281 {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); 1282 PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back(); 1283 1284 PhdrCmd.Type = readPhdrType(); 1285 do { 1286 Tok = next(); 1287 if (Tok == ";") 1288 break; 1289 if (Tok == "FILEHDR") 1290 PhdrCmd.HasFilehdr = true; 1291 else if (Tok == "PHDRS") 1292 PhdrCmd.HasPhdrs = true; 1293 else if (Tok == "AT") 1294 PhdrCmd.LMAExpr = readParenExpr(); 1295 else if (Tok == "FLAGS") { 1296 expect("("); 1297 // Passing 0 for the value of dot is a bit of a hack. It means that 1298 // we accept expressions like ".|1". 1299 PhdrCmd.Flags = readExpr()(0); 1300 expect(")"); 1301 } else 1302 setError("unexpected header attribute: " + Tok); 1303 } while (!Error); 1304 } 1305 } 1306 1307 void ScriptParser::readSearchDir() { 1308 expect("("); 1309 StringRef Tok = next(); 1310 if (!Config->Nostdlib) 1311 Config->SearchPaths.push_back(unquote(Tok)); 1312 expect(")"); 1313 } 1314 1315 void ScriptParser::readSections() { 1316 Opt.HasSections = true; 1317 // -no-rosegment is used to avoid placing read only non-executable sections in 1318 // their own segment. We do the same if SECTIONS command is present in linker 1319 // script. See comment for computeFlags(). 1320 Config->SingleRoRx = true; 1321 1322 expect("{"); 1323 while (!Error && !consume("}")) { 1324 StringRef Tok = next(); 1325 BaseCommand *Cmd = readProvideOrAssignment(Tok); 1326 if (!Cmd) { 1327 if (Tok == "ASSERT") 1328 Cmd = new AssertCommand(readAssert()); 1329 else 1330 Cmd = readOutputSectionDescription(Tok); 1331 } 1332 Opt.Commands.emplace_back(Cmd); 1333 } 1334 } 1335 1336 static int precedence(StringRef Op) { 1337 return StringSwitch<int>(Op) 1338 .Cases("*", "/", 5) 1339 .Cases("+", "-", 4) 1340 .Cases("<<", ">>", 3) 1341 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 1342 .Cases("&", "|", 1) 1343 .Default(-1); 1344 } 1345 1346 StringMatcher ScriptParser::readFilePatterns() { 1347 std::vector<StringRef> V; 1348 while (!Error && !consume(")")) 1349 V.push_back(next()); 1350 return StringMatcher(V); 1351 } 1352 1353 SortSectionPolicy ScriptParser::readSortKind() { 1354 if (consume("SORT") || consume("SORT_BY_NAME")) 1355 return SortSectionPolicy::Name; 1356 if (consume("SORT_BY_ALIGNMENT")) 1357 return SortSectionPolicy::Alignment; 1358 if (consume("SORT_BY_INIT_PRIORITY")) 1359 return SortSectionPolicy::Priority; 1360 if (consume("SORT_NONE")) 1361 return SortSectionPolicy::None; 1362 return SortSectionPolicy::Default; 1363 } 1364 1365 // Method reads a list of sequence of excluded files and section globs given in 1366 // a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ 1367 // Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) 1368 // The semantics of that is next: 1369 // * Include .foo.1 from every file. 1370 // * Include .foo.2 from every file but a.o 1371 // * Include .foo.3 from every file but b.o 1372 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 1373 std::vector<SectionPattern> Ret; 1374 while (!Error && peek() != ")") { 1375 StringMatcher ExcludeFilePat; 1376 if (consume("EXCLUDE_FILE")) { 1377 expect("("); 1378 ExcludeFilePat = readFilePatterns(); 1379 } 1380 1381 std::vector<StringRef> V; 1382 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 1383 V.push_back(next()); 1384 1385 if (!V.empty()) 1386 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 1387 else 1388 setError("section pattern is expected"); 1389 } 1390 return Ret; 1391 } 1392 1393 // Reads contents of "SECTIONS" directive. That directive contains a 1394 // list of glob patterns for input sections. The grammar is as follows. 1395 // 1396 // <patterns> ::= <section-list> 1397 // | <sort> "(" <section-list> ")" 1398 // | <sort> "(" <sort> "(" <section-list> ")" ")" 1399 // 1400 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 1401 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 1402 // 1403 // <section-list> is parsed by readInputSectionsList(). 1404 InputSectionDescription * 1405 ScriptParser::readInputSectionRules(StringRef FilePattern) { 1406 auto *Cmd = new InputSectionDescription(FilePattern); 1407 expect("("); 1408 while (!Error && !consume(")")) { 1409 SortSectionPolicy Outer = readSortKind(); 1410 SortSectionPolicy Inner = SortSectionPolicy::Default; 1411 std::vector<SectionPattern> V; 1412 if (Outer != SortSectionPolicy::Default) { 1413 expect("("); 1414 Inner = readSortKind(); 1415 if (Inner != SortSectionPolicy::Default) { 1416 expect("("); 1417 V = readInputSectionsList(); 1418 expect(")"); 1419 } else { 1420 V = readInputSectionsList(); 1421 } 1422 expect(")"); 1423 } else { 1424 V = readInputSectionsList(); 1425 } 1426 1427 for (SectionPattern &Pat : V) { 1428 Pat.SortInner = Inner; 1429 Pat.SortOuter = Outer; 1430 } 1431 1432 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 1433 } 1434 return Cmd; 1435 } 1436 1437 InputSectionDescription * 1438 ScriptParser::readInputSectionDescription(StringRef Tok) { 1439 // Input section wildcard can be surrounded by KEEP. 1440 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 1441 if (Tok == "KEEP") { 1442 expect("("); 1443 StringRef FilePattern = next(); 1444 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 1445 expect(")"); 1446 Opt.KeptSections.push_back(Cmd); 1447 return Cmd; 1448 } 1449 return readInputSectionRules(Tok); 1450 } 1451 1452 void ScriptParser::readSort() { 1453 expect("("); 1454 expect("CONSTRUCTORS"); 1455 expect(")"); 1456 } 1457 1458 Expr ScriptParser::readAssert() { 1459 expect("("); 1460 Expr E = readExpr(); 1461 expect(","); 1462 StringRef Msg = unquote(next()); 1463 expect(")"); 1464 return [=](uint64_t Dot) { 1465 uint64_t V = E(Dot); 1466 if (!V) 1467 error(Msg); 1468 return V; 1469 }; 1470 } 1471 1472 // Reads a FILL(expr) command. We handle the FILL command as an 1473 // alias for =fillexp section attribute, which is different from 1474 // what GNU linkers do. 1475 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 1476 uint32_t ScriptParser::readFill() { 1477 expect("("); 1478 uint32_t V = readOutputSectionFiller(next()); 1479 expect(")"); 1480 expect(";"); 1481 return V; 1482 } 1483 1484 OutputSectionCommand * 1485 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 1486 OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); 1487 Cmd->Location = getCurrentLocation(); 1488 1489 // Read an address expression. 1490 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address 1491 if (peek() != ":") 1492 Cmd->AddrExpr = readExpr(); 1493 1494 expect(":"); 1495 1496 if (consume("AT")) 1497 Cmd->LMAExpr = readParenExpr(); 1498 if (consume("ALIGN")) 1499 Cmd->AlignExpr = readParenExpr(); 1500 if (consume("SUBALIGN")) 1501 Cmd->SubalignExpr = readParenExpr(); 1502 1503 // Parse constraints. 1504 if (consume("ONLY_IF_RO")) 1505 Cmd->Constraint = ConstraintKind::ReadOnly; 1506 if (consume("ONLY_IF_RW")) 1507 Cmd->Constraint = ConstraintKind::ReadWrite; 1508 expect("{"); 1509 1510 while (!Error && !consume("}")) { 1511 StringRef Tok = next(); 1512 if (Tok == ";") { 1513 // Empty commands are allowed. Do nothing here. 1514 } else if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok)) { 1515 Cmd->Commands.emplace_back(Assignment); 1516 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 1517 Cmd->Commands.emplace_back(Data); 1518 } else if (Tok == "ASSERT") { 1519 Cmd->Commands.emplace_back(new AssertCommand(readAssert())); 1520 expect(";"); 1521 } else if (Tok == "CONSTRUCTORS") { 1522 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 1523 // by name. This is for very old file formats such as ECOFF/XCOFF. 1524 // For ELF, we should ignore. 1525 } else if (Tok == "FILL") { 1526 Cmd->Filler = readFill(); 1527 } else if (Tok == "SORT") { 1528 readSort(); 1529 } else if (peek() == "(") { 1530 Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); 1531 } else { 1532 setError("unknown command " + Tok); 1533 } 1534 } 1535 1536 if (consume(">")) 1537 Cmd->MemoryRegionName = next(); 1538 1539 Cmd->Phdrs = readOutputSectionPhdrs(); 1540 1541 if (consume("=")) 1542 Cmd->Filler = readOutputSectionFiller(next()); 1543 else if (peek().startswith("=")) 1544 Cmd->Filler = readOutputSectionFiller(next().drop_front()); 1545 1546 // Consume optional comma following output section command. 1547 consume(","); 1548 1549 return Cmd; 1550 } 1551 1552 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number. 1553 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 1554 // 1555 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles 1556 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them 1557 // as 32-bit big-endian values. We will do the same as ld.gold does 1558 // because it's simpler than what ld.bfd does. 1559 uint32_t ScriptParser::readOutputSectionFiller(StringRef Tok) { 1560 uint32_t V; 1561 if (!Tok.getAsInteger(0, V)) 1562 return V; 1563 setError("invalid filler expression: " + Tok); 1564 return 0; 1565 } 1566 1567 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 1568 expect("("); 1569 SymbolAssignment *Cmd = readAssignment(next()); 1570 Cmd->Provide = Provide; 1571 Cmd->Hidden = Hidden; 1572 expect(")"); 1573 expect(";"); 1574 return Cmd; 1575 } 1576 1577 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 1578 SymbolAssignment *Cmd = nullptr; 1579 if (peek() == "=" || peek() == "+=") { 1580 Cmd = readAssignment(Tok); 1581 expect(";"); 1582 } else if (Tok == "PROVIDE") { 1583 Cmd = readProvideHidden(true, false); 1584 } else if (Tok == "HIDDEN") { 1585 Cmd = readProvideHidden(false, true); 1586 } else if (Tok == "PROVIDE_HIDDEN") { 1587 Cmd = readProvideHidden(true, true); 1588 } 1589 return Cmd; 1590 } 1591 1592 static uint64_t getSymbolValue(const Twine &Loc, StringRef S, uint64_t Dot) { 1593 if (S == ".") 1594 return Dot; 1595 return ScriptBase->getSymbolValue(Loc, S); 1596 } 1597 1598 static bool isAbsolute(StringRef S) { 1599 if (S == ".") 1600 return false; 1601 return ScriptBase->isAbsolute(S); 1602 } 1603 1604 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 1605 StringRef Op = next(); 1606 Expr E; 1607 assert(Op == "=" || Op == "+="); 1608 if (consume("ABSOLUTE")) { 1609 // The RHS may be something like "ABSOLUTE(.) & 0xff". 1610 // Call readExpr1 to read the whole expression. 1611 E = readExpr1(readParenExpr(), 0); 1612 E.IsAbsolute = [] { return true; }; 1613 } else { 1614 E = readExpr(); 1615 } 1616 if (Op == "+=") { 1617 std::string Loc = getCurrentLocation(); 1618 E = [=](uint64_t Dot) { 1619 return getSymbolValue(Loc, Name, Dot) + E(Dot); 1620 }; 1621 } 1622 return new SymbolAssignment(Name, E); 1623 } 1624 1625 // This is an operator-precedence parser to parse a linker 1626 // script expression. 1627 Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); } 1628 1629 static Expr combine(StringRef Op, Expr L, Expr R) { 1630 auto IsAbs = [=] { return L.IsAbsolute() && R.IsAbsolute(); }; 1631 auto GetOutSec = [=] { 1632 const OutputSectionBase *S = L.Section(); 1633 return S ? S : R.Section(); 1634 }; 1635 1636 if (Op == "*") 1637 return [=](uint64_t Dot) { return L(Dot) * R(Dot); }; 1638 if (Op == "/") { 1639 return [=](uint64_t Dot) -> uint64_t { 1640 uint64_t RHS = R(Dot); 1641 if (RHS == 0) { 1642 error("division by zero"); 1643 return 0; 1644 } 1645 return L(Dot) / RHS; 1646 }; 1647 } 1648 if (Op == "+") 1649 return {[=](uint64_t Dot) { return L(Dot) + R(Dot); }, IsAbs, GetOutSec}; 1650 if (Op == "-") 1651 return {[=](uint64_t Dot) { return L(Dot) - R(Dot); }, IsAbs, GetOutSec}; 1652 if (Op == "<<") 1653 return [=](uint64_t Dot) { return L(Dot) << R(Dot); }; 1654 if (Op == ">>") 1655 return [=](uint64_t Dot) { return L(Dot) >> R(Dot); }; 1656 if (Op == "<") 1657 return [=](uint64_t Dot) { return L(Dot) < R(Dot); }; 1658 if (Op == ">") 1659 return [=](uint64_t Dot) { return L(Dot) > R(Dot); }; 1660 if (Op == ">=") 1661 return [=](uint64_t Dot) { return L(Dot) >= R(Dot); }; 1662 if (Op == "<=") 1663 return [=](uint64_t Dot) { return L(Dot) <= R(Dot); }; 1664 if (Op == "==") 1665 return [=](uint64_t Dot) { return L(Dot) == R(Dot); }; 1666 if (Op == "!=") 1667 return [=](uint64_t Dot) { return L(Dot) != R(Dot); }; 1668 if (Op == "&") 1669 return [=](uint64_t Dot) { return L(Dot) & R(Dot); }; 1670 if (Op == "|") 1671 return [=](uint64_t Dot) { return L(Dot) | R(Dot); }; 1672 llvm_unreachable("invalid operator"); 1673 } 1674 1675 // This is a part of the operator-precedence parser. This function 1676 // assumes that the remaining token stream starts with an operator. 1677 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 1678 while (!atEOF() && !Error) { 1679 // Read an operator and an expression. 1680 if (consume("?")) 1681 return readTernary(Lhs); 1682 StringRef Op1 = peek(); 1683 if (precedence(Op1) < MinPrec) 1684 break; 1685 skip(); 1686 Expr Rhs = readPrimary(); 1687 1688 // Evaluate the remaining part of the expression first if the 1689 // next operator has greater precedence than the previous one. 1690 // For example, if we have read "+" and "3", and if the next 1691 // operator is "*", then we'll evaluate 3 * ... part first. 1692 while (!atEOF()) { 1693 StringRef Op2 = peek(); 1694 if (precedence(Op2) <= precedence(Op1)) 1695 break; 1696 Rhs = readExpr1(Rhs, precedence(Op2)); 1697 } 1698 1699 Lhs = combine(Op1, Lhs, Rhs); 1700 } 1701 return Lhs; 1702 } 1703 1704 uint64_t static getConstant(StringRef S) { 1705 if (S == "COMMONPAGESIZE") 1706 return Target->PageSize; 1707 if (S == "MAXPAGESIZE") 1708 return Config->MaxPageSize; 1709 error("unknown constant: " + S); 1710 return 0; 1711 } 1712 1713 // Parses Tok as an integer. Returns true if successful. 1714 // It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") 1715 // and decimal numbers. Decimal numbers may have "K" (kilo) or 1716 // "M" (mega) prefixes. 1717 static bool readInteger(StringRef Tok, uint64_t &Result) { 1718 // Negative number 1719 if (Tok.startswith("-")) { 1720 if (!readInteger(Tok.substr(1), Result)) 1721 return false; 1722 Result = -Result; 1723 return true; 1724 } 1725 1726 // Hexadecimal 1727 if (Tok.startswith_lower("0x")) 1728 return !Tok.substr(2).getAsInteger(16, Result); 1729 if (Tok.endswith_lower("H")) 1730 return !Tok.drop_back().getAsInteger(16, Result); 1731 1732 // Decimal 1733 int Suffix = 1; 1734 if (Tok.endswith_lower("K")) { 1735 Suffix = 1024; 1736 Tok = Tok.drop_back(); 1737 } else if (Tok.endswith_lower("M")) { 1738 Suffix = 1024 * 1024; 1739 Tok = Tok.drop_back(); 1740 } 1741 if (Tok.getAsInteger(10, Result)) 1742 return false; 1743 Result *= Suffix; 1744 return true; 1745 } 1746 1747 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 1748 int Size = StringSwitch<unsigned>(Tok) 1749 .Case("BYTE", 1) 1750 .Case("SHORT", 2) 1751 .Case("LONG", 4) 1752 .Case("QUAD", 8) 1753 .Default(-1); 1754 if (Size == -1) 1755 return nullptr; 1756 1757 return new BytesDataCommand(readParenExpr(), Size); 1758 } 1759 1760 StringRef ScriptParser::readParenLiteral() { 1761 expect("("); 1762 StringRef Tok = next(); 1763 expect(")"); 1764 return Tok; 1765 } 1766 1767 Expr ScriptParser::readPrimary() { 1768 if (peek() == "(") 1769 return readParenExpr(); 1770 1771 StringRef Tok = next(); 1772 std::string Location = getCurrentLocation(); 1773 1774 if (Tok == "~") { 1775 Expr E = readPrimary(); 1776 return [=](uint64_t Dot) { return ~E(Dot); }; 1777 } 1778 if (Tok == "-") { 1779 Expr E = readPrimary(); 1780 return [=](uint64_t Dot) { return -E(Dot); }; 1781 } 1782 1783 // Built-in functions are parsed here. 1784 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 1785 if (Tok == "ADDR") { 1786 StringRef Name = readParenLiteral(); 1787 return {[=](uint64_t Dot) { 1788 return ScriptBase->getOutputSection(Location, Name)->Addr; 1789 }, 1790 [=] { return false; }, 1791 [=] { return ScriptBase->getOutputSection(Location, Name); }}; 1792 } 1793 if (Tok == "LOADADDR") { 1794 StringRef Name = readParenLiteral(); 1795 return [=](uint64_t Dot) { 1796 return ScriptBase->getOutputSection(Location, Name)->getLMA(); 1797 }; 1798 } 1799 if (Tok == "ASSERT") 1800 return readAssert(); 1801 if (Tok == "ALIGN") { 1802 expect("("); 1803 Expr E = readExpr(); 1804 if (consume(",")) { 1805 Expr E2 = readExpr(); 1806 expect(")"); 1807 return [=](uint64_t Dot) { return alignTo(E(Dot), E2(Dot)); }; 1808 } 1809 expect(")"); 1810 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1811 } 1812 if (Tok == "CONSTANT") { 1813 StringRef Name = readParenLiteral(); 1814 return [=](uint64_t Dot) { return getConstant(Name); }; 1815 } 1816 if (Tok == "DEFINED") { 1817 StringRef Name = readParenLiteral(); 1818 return [=](uint64_t Dot) { return ScriptBase->isDefined(Name) ? 1 : 0; }; 1819 } 1820 if (Tok == "SEGMENT_START") { 1821 expect("("); 1822 skip(); 1823 expect(","); 1824 Expr E = readExpr(); 1825 expect(")"); 1826 return [=](uint64_t Dot) { return E(Dot); }; 1827 } 1828 if (Tok == "DATA_SEGMENT_ALIGN") { 1829 expect("("); 1830 Expr E = readExpr(); 1831 expect(","); 1832 readExpr(); 1833 expect(")"); 1834 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1835 } 1836 if (Tok == "DATA_SEGMENT_END") { 1837 expect("("); 1838 expect("."); 1839 expect(")"); 1840 return [](uint64_t Dot) { return Dot; }; 1841 } 1842 // GNU linkers implements more complicated logic to handle 1843 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to 1844 // the next page boundary for simplicity. 1845 if (Tok == "DATA_SEGMENT_RELRO_END") { 1846 expect("("); 1847 readExpr(); 1848 expect(","); 1849 readExpr(); 1850 expect(")"); 1851 return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); }; 1852 } 1853 if (Tok == "SIZEOF") { 1854 StringRef Name = readParenLiteral(); 1855 return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); }; 1856 } 1857 if (Tok == "ALIGNOF") { 1858 StringRef Name = readParenLiteral(); 1859 return [=](uint64_t Dot) { 1860 return ScriptBase->getOutputSection(Location, Name)->Addralign; 1861 }; 1862 } 1863 if (Tok == "SIZEOF_HEADERS") 1864 return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); }; 1865 1866 // Tok is a literal number. 1867 uint64_t V; 1868 if (readInteger(Tok, V)) 1869 return [=](uint64_t Dot) { return V; }; 1870 1871 // Tok is a symbol name. 1872 if (Tok != "." && !isValidCIdentifier(Tok)) 1873 setError("malformed number: " + Tok); 1874 return {[=](uint64_t Dot) { return getSymbolValue(Location, Tok, Dot); }, 1875 [=] { return isAbsolute(Tok); }, 1876 [=] { return ScriptBase->getSymbolSection(Tok); }}; 1877 } 1878 1879 Expr ScriptParser::readTernary(Expr Cond) { 1880 Expr L = readExpr(); 1881 expect(":"); 1882 Expr R = readExpr(); 1883 return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); }; 1884 } 1885 1886 Expr ScriptParser::readParenExpr() { 1887 expect("("); 1888 Expr E = readExpr(); 1889 expect(")"); 1890 return E; 1891 } 1892 1893 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1894 std::vector<StringRef> Phdrs; 1895 while (!Error && peek().startswith(":")) { 1896 StringRef Tok = next(); 1897 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 1898 } 1899 return Phdrs; 1900 } 1901 1902 // Read a program header type name. The next token must be a 1903 // name of a program header type or a constant (e.g. "0x3"). 1904 unsigned ScriptParser::readPhdrType() { 1905 StringRef Tok = next(); 1906 uint64_t Val; 1907 if (readInteger(Tok, Val)) 1908 return Val; 1909 1910 unsigned Ret = StringSwitch<unsigned>(Tok) 1911 .Case("PT_NULL", PT_NULL) 1912 .Case("PT_LOAD", PT_LOAD) 1913 .Case("PT_DYNAMIC", PT_DYNAMIC) 1914 .Case("PT_INTERP", PT_INTERP) 1915 .Case("PT_NOTE", PT_NOTE) 1916 .Case("PT_SHLIB", PT_SHLIB) 1917 .Case("PT_PHDR", PT_PHDR) 1918 .Case("PT_TLS", PT_TLS) 1919 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1920 .Case("PT_GNU_STACK", PT_GNU_STACK) 1921 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1922 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1923 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1924 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1925 .Default(-1); 1926 1927 if (Ret == (unsigned)-1) { 1928 setError("invalid program header type: " + Tok); 1929 return PT_NULL; 1930 } 1931 return Ret; 1932 } 1933 1934 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1935 void ScriptParser::readAnonymousDeclaration() { 1936 // Read global symbols first. "global:" is default, so if there's 1937 // no label, we assume global symbols. 1938 if (peek() != "local") { 1939 if (consume("global")) 1940 expect(":"); 1941 Config->VersionScriptGlobals = readSymbols(); 1942 } 1943 readLocals(); 1944 expect("}"); 1945 expect(";"); 1946 } 1947 1948 void ScriptParser::readLocals() { 1949 if (!consume("local")) 1950 return; 1951 expect(":"); 1952 std::vector<SymbolVersion> Locals = readSymbols(); 1953 for (SymbolVersion V : Locals) { 1954 if (V.Name == "*") { 1955 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1956 continue; 1957 } 1958 Config->VersionScriptLocals.push_back(V); 1959 } 1960 } 1961 1962 // Reads a list of symbols, e.g. "VerStr { global: foo; bar; local: *; };". 1963 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1964 // Identifiers start at 2 because 0 and 1 are reserved 1965 // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants. 1966 uint16_t VersionId = Config->VersionDefinitions.size() + 2; 1967 Config->VersionDefinitions.push_back({VerStr, VersionId}); 1968 1969 // Read global symbols. 1970 if (peek() != "local") { 1971 if (consume("global")) 1972 expect(":"); 1973 Config->VersionDefinitions.back().Globals = readSymbols(); 1974 } 1975 readLocals(); 1976 expect("}"); 1977 1978 // Each version may have a parent version. For example, "Ver2" 1979 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1980 // as a parent. This version hierarchy is, probably against your 1981 // instinct, purely for hint; the runtime doesn't care about it 1982 // at all. In LLD, we simply ignore it. 1983 if (peek() != ";") 1984 skip(); 1985 expect(";"); 1986 } 1987 1988 // Reads a list of symbols for a versions cript. 1989 std::vector<SymbolVersion> ScriptParser::readSymbols() { 1990 std::vector<SymbolVersion> Ret; 1991 for (;;) { 1992 if (consume("extern")) { 1993 for (SymbolVersion V : readVersionExtern()) 1994 Ret.push_back(V); 1995 continue; 1996 } 1997 1998 if (peek() == "}" || peek() == "local" || Error) 1999 break; 2000 StringRef Tok = next(); 2001 Ret.push_back({unquote(Tok), false, hasWildcard(Tok)}); 2002 expect(";"); 2003 } 2004 return Ret; 2005 } 2006 2007 // Reads an "extern C++" directive, e.g., 2008 // "extern "C++" { ns::*; "f(int, double)"; };" 2009 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 2010 StringRef Tok = next(); 2011 bool IsCXX = Tok == "\"C++\""; 2012 if (!IsCXX && Tok != "\"C\"") 2013 setError("Unknown language"); 2014 expect("{"); 2015 2016 std::vector<SymbolVersion> Ret; 2017 while (!Error && peek() != "}") { 2018 StringRef Tok = next(); 2019 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 2020 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 2021 expect(";"); 2022 } 2023 2024 expect("}"); 2025 expect(";"); 2026 return Ret; 2027 } 2028 2029 uint64_t ScriptParser::readMemoryAssignment( 2030 StringRef S1, StringRef S2, StringRef S3) { 2031 if (!(consume(S1) || consume(S2) || consume(S3))) { 2032 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 2033 return 0; 2034 } 2035 expect("="); 2036 2037 // TODO: Fully support constant expressions. 2038 uint64_t Val; 2039 if (!readInteger(next(), Val)) 2040 setError("nonconstant expression for "+ S1); 2041 return Val; 2042 } 2043 2044 // Parse the MEMORY command as specified in: 2045 // https://sourceware.org/binutils/docs/ld/MEMORY.html 2046 // 2047 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 2048 void ScriptParser::readMemory() { 2049 expect("{"); 2050 while (!Error && !consume("}")) { 2051 StringRef Name = next(); 2052 2053 uint32_t Flags = 0; 2054 uint32_t NegFlags = 0; 2055 if (consume("(")) { 2056 std::tie(Flags, NegFlags) = readMemoryAttributes(); 2057 expect(")"); 2058 } 2059 expect(":"); 2060 2061 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 2062 expect(","); 2063 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 2064 2065 // Add the memory region to the region map (if it doesn't already exist). 2066 auto It = Opt.MemoryRegions.find(Name); 2067 if (It != Opt.MemoryRegions.end()) 2068 setError("region '" + Name + "' already defined"); 2069 else 2070 Opt.MemoryRegions[Name] = {Name, Origin, Length, Origin, Flags, NegFlags}; 2071 } 2072 } 2073 2074 // This function parses the attributes used to match against section 2075 // flags when placing output sections in a memory region. These flags 2076 // are only used when an explicit memory region name is not used. 2077 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 2078 uint32_t Flags = 0; 2079 uint32_t NegFlags = 0; 2080 bool Invert = false; 2081 2082 for (char C : next().lower()) { 2083 uint32_t Flag = 0; 2084 if (C == '!') 2085 Invert = !Invert; 2086 else if (C == 'w') 2087 Flag = SHF_WRITE; 2088 else if (C == 'x') 2089 Flag = SHF_EXECINSTR; 2090 else if (C == 'a') 2091 Flag = SHF_ALLOC; 2092 else if (C != 'r') 2093 setError("invalid memory region attribute"); 2094 2095 if (Invert) 2096 NegFlags |= Flag; 2097 else 2098 Flags |= Flag; 2099 } 2100 return {Flags, NegFlags}; 2101 } 2102 2103 void elf::readLinkerScript(MemoryBufferRef MB) { 2104 ScriptParser(MB).readLinkerScript(); 2105 } 2106 2107 void elf::readVersionScript(MemoryBufferRef MB) { 2108 ScriptParser(MB).readVersionScript(); 2109 } 2110 2111 void elf::readDynamicList(MemoryBufferRef MB) { 2112 ScriptParser(MB).readDynamicList(); 2113 } 2114 2115 template class elf::LinkerScript<ELF32LE>; 2116 template class elf::LinkerScript<ELF32BE>; 2117 template class elf::LinkerScript<ELF64LE>; 2118 template class elf::LinkerScript<ELF64BE>; 2119