1 //===- LinkerScript.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the parser/evaluator of the linker script. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LinkerScript.h" 15 #include "Config.h" 16 #include "Driver.h" 17 #include "InputSection.h" 18 #include "Memory.h" 19 #include "OutputSections.h" 20 #include "ScriptParser.h" 21 #include "Strings.h" 22 #include "SymbolTable.h" 23 #include "Symbols.h" 24 #include "SyntheticSections.h" 25 #include "Target.h" 26 #include "Writer.h" 27 #include "llvm/ADT/STLExtras.h" 28 #include "llvm/ADT/SmallString.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/ADT/StringSwitch.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/ELF.h" 33 #include "llvm/Support/Endian.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/FileSystem.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/Path.h" 38 #include <algorithm> 39 #include <cassert> 40 #include <cstddef> 41 #include <cstdint> 42 #include <iterator> 43 #include <limits> 44 #include <memory> 45 #include <string> 46 #include <tuple> 47 #include <vector> 48 49 using namespace llvm; 50 using namespace llvm::ELF; 51 using namespace llvm::object; 52 using namespace llvm::support::endian; 53 using namespace lld; 54 using namespace lld::elf; 55 56 LinkerScriptBase *elf::ScriptBase; 57 ScriptConfiguration *elf::ScriptConfig; 58 59 template <class ELFT> static SymbolBody *addRegular(SymbolAssignment *Cmd) { 60 uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 61 Symbol *Sym = Symtab<ELFT>::X->addUndefined( 62 Cmd->Name, /*IsLocal=*/false, STB_GLOBAL, Visibility, 63 /*Type*/ 0, 64 /*CanOmitFromDynSym*/ false, /*File*/ nullptr); 65 66 replaceBody<DefinedRegular<ELFT>>(Sym, Cmd->Name, /*IsLocal=*/false, 67 Visibility, STT_NOTYPE, 0, 0, nullptr, 68 nullptr); 69 return Sym->body(); 70 } 71 72 template <class ELFT> static SymbolBody *addSynthetic(SymbolAssignment *Cmd) { 73 uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 74 const OutputSectionBase *Sec = 75 ScriptConfig->HasSections ? nullptr : Cmd->Expression.Section(); 76 Symbol *Sym = Symtab<ELFT>::X->addUndefined( 77 Cmd->Name, /*IsLocal=*/false, STB_GLOBAL, Visibility, 78 /*Type*/ 0, 79 /*CanOmitFromDynSym*/ false, /*File*/ nullptr); 80 81 replaceBody<DefinedSynthetic>(Sym, Cmd->Name, 0, Sec); 82 return Sym->body(); 83 } 84 85 static bool isUnderSysroot(StringRef Path) { 86 if (Config->Sysroot == "") 87 return false; 88 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 89 if (sys::fs::equivalent(Config->Sysroot, Path)) 90 return true; 91 return false; 92 } 93 94 template <class ELFT> static void assignSymbol(SymbolAssignment *Cmd) { 95 // If there are sections, then let the value be assigned later in 96 // `assignAddresses`. 97 if (ScriptConfig->HasSections) 98 return; 99 100 uint64_t Value = Cmd->Expression(0); 101 if (Cmd->Expression.IsAbsolute()) { 102 cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Value; 103 } else { 104 const OutputSectionBase *Sec = Cmd->Expression.Section(); 105 if (Sec) 106 cast<DefinedSynthetic>(Cmd->Sym)->Value = Value - Sec->Addr; 107 } 108 } 109 110 template <class ELFT> static void addSymbol(SymbolAssignment *Cmd) { 111 if (Cmd->Name == ".") 112 return; 113 114 // If a symbol was in PROVIDE(), we need to define it only when 115 // it is a referenced undefined symbol. 116 SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name); 117 if (Cmd->Provide && (!B || B->isDefined())) 118 return; 119 120 // Otherwise, create a new symbol if one does not exist or an 121 // undefined one does exist. 122 if (Cmd->Expression.IsAbsolute()) 123 Cmd->Sym = addRegular<ELFT>(Cmd); 124 else 125 Cmd->Sym = addSynthetic<ELFT>(Cmd); 126 assignSymbol<ELFT>(Cmd); 127 } 128 129 bool SymbolAssignment::classof(const BaseCommand *C) { 130 return C->Kind == AssignmentKind; 131 } 132 133 bool OutputSectionCommand::classof(const BaseCommand *C) { 134 return C->Kind == OutputSectionKind; 135 } 136 137 bool InputSectionDescription::classof(const BaseCommand *C) { 138 return C->Kind == InputSectionKind; 139 } 140 141 bool AssertCommand::classof(const BaseCommand *C) { 142 return C->Kind == AssertKind; 143 } 144 145 bool BytesDataCommand::classof(const BaseCommand *C) { 146 return C->Kind == BytesDataKind; 147 } 148 149 template <class ELFT> LinkerScript<ELFT>::LinkerScript() = default; 150 template <class ELFT> LinkerScript<ELFT>::~LinkerScript() = default; 151 152 template <class ELFT> static StringRef basename(InputSectionBase<ELFT> *S) { 153 if (S->getFile()) 154 return sys::path::filename(S->getFile()->getName()); 155 return ""; 156 } 157 158 template <class ELFT> 159 bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { 160 for (InputSectionDescription *ID : Opt.KeptSections) 161 if (ID->FilePat.match(basename(S))) 162 for (SectionPattern &P : ID->SectionPatterns) 163 if (P.SectionPat.match(S->Name)) 164 return true; 165 return false; 166 } 167 168 static bool comparePriority(InputSectionData *A, InputSectionData *B) { 169 return getPriority(A->Name) < getPriority(B->Name); 170 } 171 172 static bool compareName(InputSectionData *A, InputSectionData *B) { 173 return A->Name < B->Name; 174 } 175 176 static bool compareAlignment(InputSectionData *A, InputSectionData *B) { 177 // ">" is not a mistake. Larger alignments are placed before smaller 178 // alignments in order to reduce the amount of padding necessary. 179 // This is compatible with GNU. 180 return A->Alignment > B->Alignment; 181 } 182 183 static std::function<bool(InputSectionData *, InputSectionData *)> 184 getComparator(SortSectionPolicy K) { 185 switch (K) { 186 case SortSectionPolicy::Alignment: 187 return compareAlignment; 188 case SortSectionPolicy::Name: 189 return compareName; 190 case SortSectionPolicy::Priority: 191 return comparePriority; 192 default: 193 llvm_unreachable("unknown sort policy"); 194 } 195 } 196 197 template <class ELFT> 198 static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections, 199 ConstraintKind Kind) { 200 if (Kind == ConstraintKind::NoConstraint) 201 return true; 202 bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) { 203 auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2); 204 return Sec->Flags & SHF_WRITE; 205 }); 206 return (IsRW && Kind == ConstraintKind::ReadWrite) || 207 (!IsRW && Kind == ConstraintKind::ReadOnly); 208 } 209 210 static void sortSections(InputSectionData **Begin, InputSectionData **End, 211 SortSectionPolicy K) { 212 if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) 213 std::stable_sort(Begin, End, getComparator(K)); 214 } 215 216 // Compute and remember which sections the InputSectionDescription matches. 217 template <class ELFT> 218 void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { 219 // Collects all sections that satisfy constraints of I 220 // and attach them to I. 221 for (SectionPattern &Pat : I->SectionPatterns) { 222 size_t SizeBefore = I->Sections.size(); 223 224 for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) { 225 if (!S->Live || S->Assigned) 226 continue; 227 228 StringRef Filename = basename(S); 229 if (!I->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename)) 230 continue; 231 if (!Pat.SectionPat.match(S->Name)) 232 continue; 233 I->Sections.push_back(S); 234 S->Assigned = true; 235 } 236 237 // Sort sections as instructed by SORT-family commands and --sort-section 238 // option. Because SORT-family commands can be nested at most two depth 239 // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command 240 // line option is respected even if a SORT command is given, the exact 241 // behavior we have here is a bit complicated. Here are the rules. 242 // 243 // 1. If two SORT commands are given, --sort-section is ignored. 244 // 2. If one SORT command is given, and if it is not SORT_NONE, 245 // --sort-section is handled as an inner SORT command. 246 // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. 247 // 4. If no SORT command is given, sort according to --sort-section. 248 InputSectionData **Begin = I->Sections.data() + SizeBefore; 249 InputSectionData **End = I->Sections.data() + I->Sections.size(); 250 if (Pat.SortOuter != SortSectionPolicy::None) { 251 if (Pat.SortInner == SortSectionPolicy::Default) 252 sortSections(Begin, End, Config->SortSection); 253 else 254 sortSections(Begin, End, Pat.SortInner); 255 sortSections(Begin, End, Pat.SortOuter); 256 } 257 } 258 } 259 260 template <class ELFT> 261 void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) { 262 for (InputSectionBase<ELFT> *S : V) { 263 S->Live = false; 264 reportDiscarded(S); 265 } 266 } 267 268 template <class ELFT> 269 std::vector<InputSectionBase<ELFT> *> 270 LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) { 271 std::vector<InputSectionBase<ELFT> *> Ret; 272 273 for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { 274 auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); 275 if (!Cmd) 276 continue; 277 computeInputSections(Cmd); 278 for (InputSectionData *S : Cmd->Sections) 279 Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S)); 280 } 281 282 return Ret; 283 } 284 285 template <class ELFT> 286 void LinkerScript<ELFT>::addSection(OutputSectionFactory<ELFT> &Factory, 287 InputSectionBase<ELFT> *Sec, 288 StringRef Name) { 289 OutputSectionBase *OutSec; 290 bool IsNew; 291 std::tie(OutSec, IsNew) = Factory.create(Sec, Name); 292 if (IsNew) 293 OutputSections->push_back(OutSec); 294 OutSec->addSection(Sec); 295 } 296 297 template <class ELFT> 298 void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) { 299 for (unsigned I = 0; I < Opt.Commands.size(); ++I) { 300 auto Iter = Opt.Commands.begin() + I; 301 const std::unique_ptr<BaseCommand> &Base1 = *Iter; 302 303 // Handle symbol assignments outside of any output section. 304 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { 305 addSymbol<ELFT>(Cmd); 306 continue; 307 } 308 309 if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) { 310 // If we don't have SECTIONS then output sections have already been 311 // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses 312 // will not be called, so ASSERT should be evaluated now. 313 if (!Opt.HasSections) 314 Cmd->Expression(0); 315 continue; 316 } 317 318 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { 319 std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd); 320 321 // The output section name `/DISCARD/' is special. 322 // Any input section assigned to it is discarded. 323 if (Cmd->Name == "/DISCARD/") { 324 discard(V); 325 continue; 326 } 327 328 // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive 329 // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input 330 // sections satisfy a given constraint. If not, a directive is handled 331 // as if it wasn't present from the beginning. 332 // 333 // Because we'll iterate over Commands many more times, the easiest 334 // way to "make it as if it wasn't present" is to just remove it. 335 if (!matchConstraints<ELFT>(V, Cmd->Constraint)) { 336 for (InputSectionBase<ELFT> *S : V) 337 S->Assigned = false; 338 Opt.Commands.erase(Iter); 339 --I; 340 continue; 341 } 342 343 // A directive may contain symbol definitions like this: 344 // ".foo : { ...; bar = .; }". Handle them. 345 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 346 if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) 347 addSymbol<ELFT>(OutCmd); 348 349 // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign 350 // is given, input sections are aligned to that value, whether the 351 // given value is larger or smaller than the original section alignment. 352 if (Cmd->SubalignExpr) { 353 uint32_t Subalign = Cmd->SubalignExpr(0); 354 for (InputSectionBase<ELFT> *S : V) 355 S->Alignment = Subalign; 356 } 357 358 // Add input sections to an output section. 359 for (InputSectionBase<ELFT> *S : V) 360 addSection(Factory, S, Cmd->Name); 361 } 362 } 363 } 364 365 // Add sections that didn't match any sections command. 366 template <class ELFT> 367 void LinkerScript<ELFT>::addOrphanSections( 368 OutputSectionFactory<ELFT> &Factory) { 369 for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) 370 if (S->Live && !S->OutSec) 371 addSection(Factory, S, getOutputSectionName(S->Name)); 372 } 373 374 // Sets value of a section-defined symbol. Two kinds of 375 // symbols are processed: synthetic symbols, whose value 376 // is an offset from beginning of section and regular 377 // symbols whose value is absolute. 378 template <class ELFT> 379 static void assignSectionSymbol(SymbolAssignment *Cmd, 380 typename ELFT::uint Value) { 381 if (!Cmd->Sym) 382 return; 383 384 if (auto *Body = dyn_cast<DefinedSynthetic>(Cmd->Sym)) { 385 Body->Section = Cmd->Expression.Section(); 386 Body->Value = Cmd->Expression(Value) - Body->Section->Addr; 387 return; 388 } 389 auto *Body = cast<DefinedRegular<ELFT>>(Cmd->Sym); 390 Body->Value = Cmd->Expression(Value); 391 } 392 393 template <class ELFT> static bool isTbss(OutputSectionBase *Sec) { 394 return (Sec->Flags & SHF_TLS) && Sec->Type == SHT_NOBITS; 395 } 396 397 template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) { 398 if (!AlreadyOutputIS.insert(S).second) 399 return; 400 bool IsTbss = isTbss<ELFT>(CurOutSec); 401 402 uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; 403 Pos = alignTo(Pos, S->Alignment); 404 S->OutSecOff = Pos - CurOutSec->Addr; 405 Pos += S->getSize(); 406 407 // Update output section size after adding each section. This is so that 408 // SIZEOF works correctly in the case below: 409 // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } 410 CurOutSec->Size = Pos - CurOutSec->Addr; 411 412 // If there is a memory region associated with this input section, then 413 // place the section in that region and update the region index. 414 if (CurMemRegion) { 415 CurMemRegion->Offset += CurOutSec->Size; 416 uint64_t CurSize = CurMemRegion->Offset - CurMemRegion->Origin; 417 if (CurSize > CurMemRegion->Length) { 418 uint64_t OverflowAmt = CurSize - CurMemRegion->Length; 419 error("section '" + CurOutSec->Name + "' will not fit in region '" + 420 CurMemRegion->Name + "': overflowed by " + Twine(OverflowAmt) + 421 " bytes"); 422 } 423 } 424 425 if (IsTbss) 426 ThreadBssOffset = Pos - Dot; 427 else 428 Dot = Pos; 429 } 430 431 template <class ELFT> void LinkerScript<ELFT>::flush() { 432 if (!CurOutSec || !AlreadyOutputOS.insert(CurOutSec).second) 433 return; 434 if (auto *OutSec = dyn_cast<OutputSection<ELFT>>(CurOutSec)) { 435 for (InputSection<ELFT> *I : OutSec->Sections) 436 output(I); 437 } else { 438 Dot += CurOutSec->Size; 439 } 440 } 441 442 template <class ELFT> 443 void LinkerScript<ELFT>::switchTo(OutputSectionBase *Sec) { 444 if (CurOutSec == Sec) 445 return; 446 if (AlreadyOutputOS.count(Sec)) 447 return; 448 449 flush(); 450 CurOutSec = Sec; 451 452 Dot = alignTo(Dot, CurOutSec->Addralign); 453 CurOutSec->Addr = isTbss<ELFT>(CurOutSec) ? Dot + ThreadBssOffset : Dot; 454 455 // If neither AT nor AT> is specified for an allocatable section, the linker 456 // will set the LMA such that the difference between VMA and LMA for the 457 // section is the same as the preceding output section in the same region 458 // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html 459 CurOutSec->setLMAOffset(LMAOffset); 460 } 461 462 template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) { 463 // This handles the assignments to symbol or to a location counter (.) 464 if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { 465 if (AssignCmd->Name == ".") { 466 // Update to location counter means update to section size. 467 uintX_t Val = AssignCmd->Expression(Dot); 468 if (Val < Dot) 469 error("unable to move location counter backward for: " + 470 CurOutSec->Name); 471 Dot = Val; 472 CurOutSec->Size = Dot - CurOutSec->Addr; 473 return; 474 } 475 assignSectionSymbol<ELFT>(AssignCmd, Dot); 476 return; 477 } 478 479 // Handle BYTE(), SHORT(), LONG(), or QUAD(). 480 if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) { 481 DataCmd->Offset = Dot - CurOutSec->Addr; 482 Dot += DataCmd->Size; 483 CurOutSec->Size = Dot - CurOutSec->Addr; 484 return; 485 } 486 487 if (auto *AssertCmd = dyn_cast<AssertCommand>(&Base)) { 488 AssertCmd->Expression(Dot); 489 return; 490 } 491 492 // It handles single input section description command, 493 // calculates and assigns the offsets for each section and also 494 // updates the output section size. 495 auto &ICmd = cast<InputSectionDescription>(Base); 496 for (InputSectionData *ID : ICmd.Sections) { 497 // We tentatively added all synthetic sections at the beginning and removed 498 // empty ones afterwards (because there is no way to know whether they were 499 // going be empty or not other than actually running linker scripts.) 500 // We need to ignore remains of empty sections. 501 if (auto *Sec = dyn_cast<SyntheticSection<ELFT>>(ID)) 502 if (Sec->empty()) 503 continue; 504 505 auto *IB = static_cast<InputSectionBase<ELFT> *>(ID); 506 switchTo(IB->OutSec); 507 if (auto *I = dyn_cast<InputSection<ELFT>>(IB)) 508 output(I); 509 else 510 flush(); 511 } 512 } 513 514 template <class ELFT> 515 static OutputSectionBase * 516 findSection(StringRef Name, const std::vector<OutputSectionBase *> &Sections) { 517 auto End = Sections.end(); 518 auto HasName = [=](OutputSectionBase *Sec) { return Sec->getName() == Name; }; 519 auto I = std::find_if(Sections.begin(), End, HasName); 520 std::vector<OutputSectionBase *> Ret; 521 if (I == End) 522 return nullptr; 523 assert(std::find_if(I + 1, End, HasName) == End); 524 return *I; 525 } 526 527 // This function searches for a memory region to place the given output 528 // section in. If found, a pointer to the appropriate memory region is 529 // returned. Otherwise, a nullptr is returned. 530 template <class ELFT> 531 MemoryRegion *LinkerScript<ELFT>::findMemoryRegion(OutputSectionCommand *Cmd, 532 OutputSectionBase *Sec) { 533 // If a memory region name was specified in the output section command, 534 // then try to find that region first. 535 if (!Cmd->MemoryRegionName.empty()) { 536 auto It = Opt.MemoryRegions.find(Cmd->MemoryRegionName); 537 if (It != Opt.MemoryRegions.end()) 538 return &It->second; 539 error("memory region '" + Cmd->MemoryRegionName + "' not declared"); 540 return nullptr; 541 } 542 543 // The memory region name is empty, thus a suitable region must be 544 // searched for in the region map. If the region map is empty, just 545 // return. Note that this check doesn't happen at the very beginning 546 // so that uses of undeclared regions can be caught. 547 if (!Opt.MemoryRegions.size()) 548 return nullptr; 549 550 // See if a region can be found by matching section flags. 551 for (auto &MRI : Opt.MemoryRegions) { 552 MemoryRegion &MR = MRI.second; 553 if ((MR.Flags & Sec->Flags) != 0 && (MR.NegFlags & Sec->Flags) == 0) 554 return &MR; 555 } 556 557 // Otherwise, no suitable region was found. 558 if (Sec->Flags & SHF_ALLOC) 559 error("no memory region specified for section '" + Sec->Name + "'"); 560 return nullptr; 561 } 562 563 // This function assigns offsets to input sections and an output section 564 // for a single sections command (e.g. ".text { *(.text); }"). 565 template <class ELFT> 566 void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) { 567 if (Cmd->LMAExpr) 568 LMAOffset = Cmd->LMAExpr(Dot) - Dot; 569 OutputSectionBase *Sec = findSection<ELFT>(Cmd->Name, *OutputSections); 570 if (!Sec) 571 return; 572 573 // Try and find an appropriate memory region to assign offsets in. 574 CurMemRegion = findMemoryRegion(Cmd, Sec); 575 if (CurMemRegion) 576 Dot = CurMemRegion->Offset; 577 switchTo(Sec); 578 579 // Find the last section output location. We will output orphan sections 580 // there so that end symbols point to the correct location. 581 auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), 582 [](const std::unique_ptr<BaseCommand> &Cmd) { 583 return !isa<SymbolAssignment>(*Cmd); 584 }) 585 .base(); 586 for (auto I = Cmd->Commands.begin(); I != E; ++I) 587 process(**I); 588 flush(); 589 std::for_each(E, Cmd->Commands.end(), 590 [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); 591 } 592 593 template <class ELFT> void LinkerScript<ELFT>::removeEmptyCommands() { 594 // It is common practice to use very generic linker scripts. So for any 595 // given run some of the output sections in the script will be empty. 596 // We could create corresponding empty output sections, but that would 597 // clutter the output. 598 // We instead remove trivially empty sections. The bfd linker seems even 599 // more aggressive at removing them. 600 auto Pos = std::remove_if( 601 Opt.Commands.begin(), Opt.Commands.end(), 602 [&](const std::unique_ptr<BaseCommand> &Base) { 603 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 604 return !findSection<ELFT>(Cmd->Name, *OutputSections); 605 return false; 606 }); 607 Opt.Commands.erase(Pos, Opt.Commands.end()); 608 } 609 610 static bool isAllSectionDescription(const OutputSectionCommand &Cmd) { 611 for (const std::unique_ptr<BaseCommand> &I : Cmd.Commands) 612 if (!isa<InputSectionDescription>(*I)) 613 return false; 614 return true; 615 } 616 617 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() { 618 // If the output section contains only symbol assignments, create a 619 // corresponding output section. The bfd linker seems to only create them if 620 // '.' is assigned to, but creating these section should not have any bad 621 // consequeces and gives us a section to put the symbol in. 622 uintX_t Flags = SHF_ALLOC; 623 uint32_t Type = SHT_NOBITS; 624 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 625 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 626 if (!Cmd) 627 continue; 628 if (OutputSectionBase *Sec = 629 findSection<ELFT>(Cmd->Name, *OutputSections)) { 630 Flags = Sec->Flags; 631 Type = Sec->Type; 632 continue; 633 } 634 635 if (isAllSectionDescription(*Cmd)) 636 continue; 637 638 auto *OutSec = make<OutputSection<ELFT>>(Cmd->Name, Type, Flags); 639 OutputSections->push_back(OutSec); 640 } 641 } 642 643 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsAfterSorting() { 644 placeOrphanSections(); 645 646 // If output section command doesn't specify any segments, 647 // and we haven't previously assigned any section to segment, 648 // then we simply assign section to the very first load segment. 649 // Below is an example of such linker script: 650 // PHDRS { seg PT_LOAD; } 651 // SECTIONS { .aaa : { *(.aaa) } } 652 std::vector<StringRef> DefPhdrs; 653 auto FirstPtLoad = 654 std::find_if(Opt.PhdrsCommands.begin(), Opt.PhdrsCommands.end(), 655 [](const PhdrsCommand &Cmd) { return Cmd.Type == PT_LOAD; }); 656 if (FirstPtLoad != Opt.PhdrsCommands.end()) 657 DefPhdrs.push_back(FirstPtLoad->Name); 658 659 // Walk the commands and propagate the program headers to commands that don't 660 // explicitly specify them. 661 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 662 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 663 if (!Cmd) 664 continue; 665 if (Cmd->Phdrs.empty()) 666 Cmd->Phdrs = DefPhdrs; 667 else 668 DefPhdrs = Cmd->Phdrs; 669 } 670 671 removeEmptyCommands(); 672 } 673 674 // When placing orphan sections, we want to place them after symbol assignments 675 // so that an orphan after 676 // begin_foo = .; 677 // foo : { *(foo) } 678 // end_foo = .; 679 // doesn't break the intended meaning of the begin/end symbols. 680 // We don't want to go over sections since Writer<ELFT>::sortSections is the 681 // one in charge of deciding the order of the sections. 682 // We don't want to go over alignments, since doing so in 683 // rx_sec : { *(rx_sec) } 684 // . = ALIGN(0x1000); 685 // /* The RW PT_LOAD starts here*/ 686 // rw_sec : { *(rw_sec) } 687 // would mean that the RW PT_LOAD would become unaligned. 688 static bool shouldSkip(const BaseCommand &Cmd) { 689 if (isa<OutputSectionCommand>(Cmd)) 690 return false; 691 const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); 692 if (!Assign) 693 return true; 694 return Assign->Name != "."; 695 } 696 697 // Orphan sections are sections present in the input files which are 698 // not explicitly placed into the output file by the linker script. 699 // 700 // When the control reaches this function, Opt.Commands contains 701 // output section commands for non-orphan sections only. This function 702 // adds new elements for orphan sections to Opt.Commands so that all 703 // sections are explicitly handled by Opt.Commands. 704 // 705 // Writer<ELFT>::sortSections has already sorted output sections. 706 // What we need to do is to scan OutputSections vector and 707 // Opt.Commands in parallel to find orphan sections. If there is an 708 // output section that doesn't have a corresponding entry in 709 // Opt.Commands, we will insert a new entry to Opt.Commands. 710 // 711 // There is some ambiguity as to where exactly a new entry should be 712 // inserted, because Opt.Commands contains not only output section 713 // commands but other types of commands such as symbol assignment 714 // expressions. There's no correct answer here due to the lack of the 715 // formal specification of the linker script. We use heuristics to 716 // determine whether a new output command should be added before or 717 // after another commands. For the details, look at shouldSkip 718 // function. 719 template <class ELFT> void LinkerScript<ELFT>::placeOrphanSections() { 720 // The OutputSections are already in the correct order. 721 // This loops creates or moves commands as needed so that they are in the 722 // correct order. 723 int CmdIndex = 0; 724 725 // As a horrible special case, skip the first . assignment if it is before any 726 // section. We do this because it is common to set a load address by starting 727 // the script with ". = 0xabcd" and the expectation is that every section is 728 // after that. 729 auto FirstSectionOrDotAssignment = 730 std::find_if(Opt.Commands.begin(), Opt.Commands.end(), 731 [](const std::unique_ptr<BaseCommand> &Cmd) { 732 if (isa<OutputSectionCommand>(*Cmd)) 733 return true; 734 const auto *Assign = dyn_cast<SymbolAssignment>(Cmd.get()); 735 if (!Assign) 736 return false; 737 return Assign->Name == "."; 738 }); 739 if (FirstSectionOrDotAssignment != Opt.Commands.end()) { 740 CmdIndex = FirstSectionOrDotAssignment - Opt.Commands.begin(); 741 if (isa<SymbolAssignment>(**FirstSectionOrDotAssignment)) 742 ++CmdIndex; 743 } 744 745 for (OutputSectionBase *Sec : *OutputSections) { 746 StringRef Name = Sec->getName(); 747 748 // Find the last spot where we can insert a command and still get the 749 // correct result. 750 auto CmdIter = Opt.Commands.begin() + CmdIndex; 751 auto E = Opt.Commands.end(); 752 while (CmdIter != E && shouldSkip(**CmdIter)) { 753 ++CmdIter; 754 ++CmdIndex; 755 } 756 757 auto Pos = 758 std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { 759 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 760 return Cmd && Cmd->Name == Name; 761 }); 762 if (Pos == E) { 763 Opt.Commands.insert(CmdIter, 764 llvm::make_unique<OutputSectionCommand>(Name)); 765 ++CmdIndex; 766 continue; 767 } 768 769 // Continue from where we found it. 770 CmdIndex = (Pos - Opt.Commands.begin()) + 1; 771 } 772 } 773 774 template <class ELFT> 775 void LinkerScript<ELFT>::assignAddresses(std::vector<PhdrEntry> &Phdrs) { 776 // Assign addresses as instructed by linker script SECTIONS sub-commands. 777 Dot = 0; 778 779 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 780 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { 781 if (Cmd->Name == ".") { 782 Dot = Cmd->Expression(Dot); 783 } else if (Cmd->Sym) { 784 assignSectionSymbol<ELFT>(Cmd, Dot); 785 } 786 continue; 787 } 788 789 if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { 790 Cmd->Expression(Dot); 791 continue; 792 } 793 794 auto *Cmd = cast<OutputSectionCommand>(Base.get()); 795 if (Cmd->AddrExpr) 796 Dot = Cmd->AddrExpr(Dot); 797 assignOffsets(Cmd); 798 } 799 800 uintX_t MinVA = std::numeric_limits<uintX_t>::max(); 801 for (OutputSectionBase *Sec : *OutputSections) { 802 if (Sec->Flags & SHF_ALLOC) 803 MinVA = std::min<uint64_t>(MinVA, Sec->Addr); 804 else 805 Sec->Addr = 0; 806 } 807 808 allocateHeaders<ELFT>(Phdrs, *OutputSections, MinVA); 809 } 810 811 // Creates program headers as instructed by PHDRS linker script command. 812 template <class ELFT> std::vector<PhdrEntry> LinkerScript<ELFT>::createPhdrs() { 813 std::vector<PhdrEntry> Ret; 814 815 // Process PHDRS and FILEHDR keywords because they are not 816 // real output sections and cannot be added in the following loop. 817 for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) { 818 Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags); 819 PhdrEntry &Phdr = Ret.back(); 820 821 if (Cmd.HasFilehdr) 822 Phdr.add(Out<ELFT>::ElfHeader); 823 if (Cmd.HasPhdrs) 824 Phdr.add(Out<ELFT>::ProgramHeaders); 825 826 if (Cmd.LMAExpr) { 827 Phdr.p_paddr = Cmd.LMAExpr(0); 828 Phdr.HasLMA = true; 829 } 830 } 831 832 // Add output sections to program headers. 833 for (OutputSectionBase *Sec : *OutputSections) { 834 if (!(Sec->Flags & SHF_ALLOC)) 835 break; 836 837 // Assign headers specified by linker script 838 for (size_t Id : getPhdrIndices(Sec->getName())) { 839 Ret[Id].add(Sec); 840 if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) 841 Ret[Id].p_flags |= Sec->getPhdrFlags(); 842 } 843 } 844 return Ret; 845 } 846 847 template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() { 848 // Ignore .interp section in case we have PHDRS specification 849 // and PT_INTERP isn't listed. 850 return !Opt.PhdrsCommands.empty() && 851 llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { 852 return Cmd.Type == PT_INTERP; 853 }) == Opt.PhdrsCommands.end(); 854 } 855 856 template <class ELFT> uint32_t LinkerScript<ELFT>::getFiller(StringRef Name) { 857 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 858 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 859 if (Cmd->Name == Name) 860 return Cmd->Filler; 861 return 0; 862 } 863 864 template <class ELFT> 865 static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { 866 const endianness E = ELFT::TargetEndianness; 867 868 switch (Size) { 869 case 1: 870 *Buf = (uint8_t)Data; 871 break; 872 case 2: 873 write16<E>(Buf, Data); 874 break; 875 case 4: 876 write32<E>(Buf, Data); 877 break; 878 case 8: 879 write64<E>(Buf, Data); 880 break; 881 default: 882 llvm_unreachable("unsupported Size argument"); 883 } 884 } 885 886 template <class ELFT> 887 void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) { 888 int I = getSectionIndex(Name); 889 if (I == INT_MAX) 890 return; 891 892 auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()); 893 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 894 if (auto *Data = dyn_cast<BytesDataCommand>(Base.get())) 895 writeInt<ELFT>(Buf + Data->Offset, Data->Expression(0), Data->Size); 896 } 897 898 template <class ELFT> bool LinkerScript<ELFT>::hasLMA(StringRef Name) { 899 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 900 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 901 if (Cmd->LMAExpr && Cmd->Name == Name) 902 return true; 903 return false; 904 } 905 906 // Returns the index of the given section name in linker script 907 // SECTIONS commands. Sections are laid out as the same order as they 908 // were in the script. If a given name did not appear in the script, 909 // it returns INT_MAX, so that it will be laid out at end of file. 910 template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) { 911 for (int I = 0, E = Opt.Commands.size(); I != E; ++I) 912 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get())) 913 if (Cmd->Name == Name) 914 return I; 915 return INT_MAX; 916 } 917 918 template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() { 919 return !Opt.PhdrsCommands.empty(); 920 } 921 922 template <class ELFT> 923 const OutputSectionBase *LinkerScript<ELFT>::getOutputSection(const Twine &Loc, 924 StringRef Name) { 925 static OutputSectionBase FakeSec("", 0, 0); 926 927 for (OutputSectionBase *Sec : *OutputSections) 928 if (Sec->getName() == Name) 929 return Sec; 930 931 error(Loc + ": undefined section " + Name); 932 return &FakeSec; 933 } 934 935 // This function is essentially the same as getOutputSection(Name)->Size, 936 // but it won't print out an error message if a given section is not found. 937 // 938 // Linker script does not create an output section if its content is empty. 939 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 940 // be empty. That is why this function is different from getOutputSection(). 941 template <class ELFT> 942 uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) { 943 for (OutputSectionBase *Sec : *OutputSections) 944 if (Sec->getName() == Name) 945 return Sec->Size; 946 return 0; 947 } 948 949 template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() { 950 return elf::getHeaderSize<ELFT>(); 951 } 952 953 template <class ELFT> 954 uint64_t LinkerScript<ELFT>::getSymbolValue(const Twine &Loc, StringRef S) { 955 if (SymbolBody *B = Symtab<ELFT>::X->find(S)) 956 return B->getVA<ELFT>(); 957 error(Loc + ": symbol not found: " + S); 958 return 0; 959 } 960 961 template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) { 962 return Symtab<ELFT>::X->find(S) != nullptr; 963 } 964 965 template <class ELFT> bool LinkerScript<ELFT>::isAbsolute(StringRef S) { 966 SymbolBody *Sym = Symtab<ELFT>::X->find(S); 967 auto *DR = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym); 968 return DR && !DR->Section; 969 } 970 971 // Gets section symbol belongs to. Symbol "." doesn't belong to any 972 // specific section but isn't absolute at the same time, so we try 973 // to find suitable section for it as well. 974 template <class ELFT> 975 const OutputSectionBase *LinkerScript<ELFT>::getSymbolSection(StringRef S) { 976 SymbolBody *Sym = Symtab<ELFT>::X->find(S); 977 if (!Sym) { 978 if (OutputSections->empty()) 979 return nullptr; 980 return CurOutSec ? CurOutSec : (*OutputSections)[0]; 981 } 982 983 return SymbolTableSection<ELFT>::getOutputSection(Sym); 984 } 985 986 // Returns indices of ELF headers containing specific section, identified 987 // by Name. Each index is a zero based number of ELF header listed within 988 // PHDRS {} script block. 989 template <class ELFT> 990 std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { 991 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 992 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 993 if (!Cmd || Cmd->Name != SectionName) 994 continue; 995 996 std::vector<size_t> Ret; 997 for (StringRef PhdrName : Cmd->Phdrs) 998 Ret.push_back(getPhdrIndex(Cmd->Location, PhdrName)); 999 return Ret; 1000 } 1001 return {}; 1002 } 1003 1004 template <class ELFT> 1005 size_t LinkerScript<ELFT>::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { 1006 size_t I = 0; 1007 for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { 1008 if (Cmd.Name == PhdrName) 1009 return I; 1010 ++I; 1011 } 1012 error(Loc + ": section header '" + PhdrName + "' is not listed in PHDRS"); 1013 return 0; 1014 } 1015 1016 class elf::ScriptParser final : public ScriptParserBase { 1017 typedef void (ScriptParser::*Handler)(); 1018 1019 public: 1020 ScriptParser(MemoryBufferRef MB) 1021 : ScriptParserBase(MB), 1022 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 1023 1024 void readLinkerScript(); 1025 void readVersionScript(); 1026 void readDynamicList(); 1027 1028 private: 1029 void addFile(StringRef Path); 1030 1031 void readAsNeeded(); 1032 void readEntry(); 1033 void readExtern(); 1034 void readGroup(); 1035 void readInclude(); 1036 void readMemory(); 1037 void readOutput(); 1038 void readOutputArch(); 1039 void readOutputFormat(); 1040 void readPhdrs(); 1041 void readSearchDir(); 1042 void readSections(); 1043 void readVersion(); 1044 void readVersionScriptCommand(); 1045 1046 SymbolAssignment *readAssignment(StringRef Name); 1047 BytesDataCommand *readBytesDataCommand(StringRef Tok); 1048 uint32_t readFill(); 1049 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 1050 uint32_t readOutputSectionFiller(StringRef Tok); 1051 std::vector<StringRef> readOutputSectionPhdrs(); 1052 InputSectionDescription *readInputSectionDescription(StringRef Tok); 1053 StringMatcher readFilePatterns(); 1054 std::vector<SectionPattern> readInputSectionsList(); 1055 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 1056 unsigned readPhdrType(); 1057 SortSectionPolicy readSortKind(); 1058 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 1059 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 1060 void readSort(); 1061 Expr readAssert(); 1062 1063 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 1064 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 1065 1066 Expr readExpr(); 1067 Expr readExpr1(Expr Lhs, int MinPrec); 1068 StringRef readParenLiteral(); 1069 Expr readPrimary(); 1070 Expr readTernary(Expr Cond); 1071 Expr readParenExpr(); 1072 1073 // For parsing version script. 1074 std::vector<SymbolVersion> readVersionExtern(); 1075 void readAnonymousDeclaration(); 1076 void readVersionDeclaration(StringRef VerStr); 1077 std::vector<SymbolVersion> readSymbols(); 1078 void readLocals(); 1079 1080 ScriptConfiguration &Opt = *ScriptConfig; 1081 bool IsUnderSysroot; 1082 }; 1083 1084 void ScriptParser::readDynamicList() { 1085 expect("{"); 1086 readAnonymousDeclaration(); 1087 if (!atEOF()) 1088 setError("EOF expected, but got " + next()); 1089 } 1090 1091 void ScriptParser::readVersionScript() { 1092 readVersionScriptCommand(); 1093 if (!atEOF()) 1094 setError("EOF expected, but got " + next()); 1095 } 1096 1097 void ScriptParser::readVersionScriptCommand() { 1098 if (consume("{")) { 1099 readAnonymousDeclaration(); 1100 return; 1101 } 1102 1103 while (!atEOF() && !Error && peek() != "}") { 1104 StringRef VerStr = next(); 1105 if (VerStr == "{") { 1106 setError("anonymous version definition is used in " 1107 "combination with other version definitions"); 1108 return; 1109 } 1110 expect("{"); 1111 readVersionDeclaration(VerStr); 1112 } 1113 } 1114 1115 void ScriptParser::readVersion() { 1116 expect("{"); 1117 readVersionScriptCommand(); 1118 expect("}"); 1119 } 1120 1121 void ScriptParser::readLinkerScript() { 1122 while (!atEOF()) { 1123 StringRef Tok = next(); 1124 if (Tok == ";") 1125 continue; 1126 1127 if (Tok == "ASSERT") { 1128 Opt.Commands.emplace_back(new AssertCommand(readAssert())); 1129 } else if (Tok == "ENTRY") { 1130 readEntry(); 1131 } else if (Tok == "EXTERN") { 1132 readExtern(); 1133 } else if (Tok == "GROUP" || Tok == "INPUT") { 1134 readGroup(); 1135 } else if (Tok == "INCLUDE") { 1136 readInclude(); 1137 } else if (Tok == "MEMORY") { 1138 readMemory(); 1139 } else if (Tok == "OUTPUT") { 1140 readOutput(); 1141 } else if (Tok == "OUTPUT_ARCH") { 1142 readOutputArch(); 1143 } else if (Tok == "OUTPUT_FORMAT") { 1144 readOutputFormat(); 1145 } else if (Tok == "PHDRS") { 1146 readPhdrs(); 1147 } else if (Tok == "SEARCH_DIR") { 1148 readSearchDir(); 1149 } else if (Tok == "SECTIONS") { 1150 readSections(); 1151 } else if (Tok == "VERSION") { 1152 readVersion(); 1153 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 1154 Opt.Commands.emplace_back(Cmd); 1155 } else { 1156 setError("unknown directive: " + Tok); 1157 } 1158 } 1159 } 1160 1161 void ScriptParser::addFile(StringRef S) { 1162 if (IsUnderSysroot && S.startswith("/")) { 1163 SmallString<128> PathData; 1164 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 1165 if (sys::fs::exists(Path)) { 1166 Driver->addFile(Saver.save(Path)); 1167 return; 1168 } 1169 } 1170 1171 if (sys::path::is_absolute(S)) { 1172 Driver->addFile(S); 1173 } else if (S.startswith("=")) { 1174 if (Config->Sysroot.empty()) 1175 Driver->addFile(S.substr(1)); 1176 else 1177 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); 1178 } else if (S.startswith("-l")) { 1179 Driver->addLibrary(S.substr(2)); 1180 } else if (sys::fs::exists(S)) { 1181 Driver->addFile(S); 1182 } else { 1183 if (Optional<std::string> Path = findFromSearchPaths(S)) 1184 Driver->addFile(Saver.save(*Path)); 1185 else 1186 setError("unable to find " + S); 1187 } 1188 } 1189 1190 void ScriptParser::readAsNeeded() { 1191 expect("("); 1192 bool Orig = Config->AsNeeded; 1193 Config->AsNeeded = true; 1194 while (!Error && !consume(")")) 1195 addFile(unquote(next())); 1196 Config->AsNeeded = Orig; 1197 } 1198 1199 void ScriptParser::readEntry() { 1200 // -e <symbol> takes predecence over ENTRY(<symbol>). 1201 expect("("); 1202 StringRef Tok = next(); 1203 if (Config->Entry.empty()) 1204 Config->Entry = Tok; 1205 expect(")"); 1206 } 1207 1208 void ScriptParser::readExtern() { 1209 expect("("); 1210 while (!Error && !consume(")")) 1211 Config->Undefined.push_back(next()); 1212 } 1213 1214 void ScriptParser::readGroup() { 1215 expect("("); 1216 while (!Error && !consume(")")) { 1217 StringRef Tok = next(); 1218 if (Tok == "AS_NEEDED") 1219 readAsNeeded(); 1220 else 1221 addFile(unquote(Tok)); 1222 } 1223 } 1224 1225 void ScriptParser::readInclude() { 1226 StringRef Tok = unquote(next()); 1227 1228 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 1229 // The file will be searched for in the current directory, and in any 1230 // directory specified with the -L option. 1231 if (sys::fs::exists(Tok)) { 1232 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 1233 tokenize(*MB); 1234 return; 1235 } 1236 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 1237 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 1238 tokenize(*MB); 1239 return; 1240 } 1241 setError("cannot open " + Tok); 1242 } 1243 1244 void ScriptParser::readOutput() { 1245 // -o <file> takes predecence over OUTPUT(<file>). 1246 expect("("); 1247 StringRef Tok = next(); 1248 if (Config->OutputFile.empty()) 1249 Config->OutputFile = unquote(Tok); 1250 expect(")"); 1251 } 1252 1253 void ScriptParser::readOutputArch() { 1254 // Error checking only for now. 1255 expect("("); 1256 skip(); 1257 expect(")"); 1258 } 1259 1260 void ScriptParser::readOutputFormat() { 1261 // Error checking only for now. 1262 expect("("); 1263 skip(); 1264 StringRef Tok = next(); 1265 if (Tok == ")") 1266 return; 1267 if (Tok != ",") { 1268 setError("unexpected token: " + Tok); 1269 return; 1270 } 1271 skip(); 1272 expect(","); 1273 skip(); 1274 expect(")"); 1275 } 1276 1277 void ScriptParser::readPhdrs() { 1278 expect("{"); 1279 while (!Error && !consume("}")) { 1280 StringRef Tok = next(); 1281 Opt.PhdrsCommands.push_back( 1282 {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); 1283 PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back(); 1284 1285 PhdrCmd.Type = readPhdrType(); 1286 do { 1287 Tok = next(); 1288 if (Tok == ";") 1289 break; 1290 if (Tok == "FILEHDR") 1291 PhdrCmd.HasFilehdr = true; 1292 else if (Tok == "PHDRS") 1293 PhdrCmd.HasPhdrs = true; 1294 else if (Tok == "AT") 1295 PhdrCmd.LMAExpr = readParenExpr(); 1296 else if (Tok == "FLAGS") { 1297 expect("("); 1298 // Passing 0 for the value of dot is a bit of a hack. It means that 1299 // we accept expressions like ".|1". 1300 PhdrCmd.Flags = readExpr()(0); 1301 expect(")"); 1302 } else 1303 setError("unexpected header attribute: " + Tok); 1304 } while (!Error); 1305 } 1306 } 1307 1308 void ScriptParser::readSearchDir() { 1309 expect("("); 1310 StringRef Tok = next(); 1311 if (!Config->Nostdlib) 1312 Config->SearchPaths.push_back(unquote(Tok)); 1313 expect(")"); 1314 } 1315 1316 void ScriptParser::readSections() { 1317 Opt.HasSections = true; 1318 // -no-rosegment is used to avoid placing read only non-executable sections in 1319 // their own segment. We do the same if SECTIONS command is present in linker 1320 // script. See comment for computeFlags(). 1321 Config->SingleRoRx = true; 1322 1323 expect("{"); 1324 while (!Error && !consume("}")) { 1325 StringRef Tok = next(); 1326 BaseCommand *Cmd = readProvideOrAssignment(Tok); 1327 if (!Cmd) { 1328 if (Tok == "ASSERT") 1329 Cmd = new AssertCommand(readAssert()); 1330 else 1331 Cmd = readOutputSectionDescription(Tok); 1332 } 1333 Opt.Commands.emplace_back(Cmd); 1334 } 1335 } 1336 1337 static int precedence(StringRef Op) { 1338 return StringSwitch<int>(Op) 1339 .Cases("*", "/", 5) 1340 .Cases("+", "-", 4) 1341 .Cases("<<", ">>", 3) 1342 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 1343 .Cases("&", "|", 1) 1344 .Default(-1); 1345 } 1346 1347 StringMatcher ScriptParser::readFilePatterns() { 1348 std::vector<StringRef> V; 1349 while (!Error && !consume(")")) 1350 V.push_back(next()); 1351 return StringMatcher(V); 1352 } 1353 1354 SortSectionPolicy ScriptParser::readSortKind() { 1355 if (consume("SORT") || consume("SORT_BY_NAME")) 1356 return SortSectionPolicy::Name; 1357 if (consume("SORT_BY_ALIGNMENT")) 1358 return SortSectionPolicy::Alignment; 1359 if (consume("SORT_BY_INIT_PRIORITY")) 1360 return SortSectionPolicy::Priority; 1361 if (consume("SORT_NONE")) 1362 return SortSectionPolicy::None; 1363 return SortSectionPolicy::Default; 1364 } 1365 1366 // Method reads a list of sequence of excluded files and section globs given in 1367 // a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ 1368 // Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) 1369 // The semantics of that is next: 1370 // * Include .foo.1 from every file. 1371 // * Include .foo.2 from every file but a.o 1372 // * Include .foo.3 from every file but b.o 1373 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 1374 std::vector<SectionPattern> Ret; 1375 while (!Error && peek() != ")") { 1376 StringMatcher ExcludeFilePat; 1377 if (consume("EXCLUDE_FILE")) { 1378 expect("("); 1379 ExcludeFilePat = readFilePatterns(); 1380 } 1381 1382 std::vector<StringRef> V; 1383 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 1384 V.push_back(next()); 1385 1386 if (!V.empty()) 1387 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 1388 else 1389 setError("section pattern is expected"); 1390 } 1391 return Ret; 1392 } 1393 1394 // Reads contents of "SECTIONS" directive. That directive contains a 1395 // list of glob patterns for input sections. The grammar is as follows. 1396 // 1397 // <patterns> ::= <section-list> 1398 // | <sort> "(" <section-list> ")" 1399 // | <sort> "(" <sort> "(" <section-list> ")" ")" 1400 // 1401 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 1402 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 1403 // 1404 // <section-list> is parsed by readInputSectionsList(). 1405 InputSectionDescription * 1406 ScriptParser::readInputSectionRules(StringRef FilePattern) { 1407 auto *Cmd = new InputSectionDescription(FilePattern); 1408 expect("("); 1409 while (!Error && !consume(")")) { 1410 SortSectionPolicy Outer = readSortKind(); 1411 SortSectionPolicy Inner = SortSectionPolicy::Default; 1412 std::vector<SectionPattern> V; 1413 if (Outer != SortSectionPolicy::Default) { 1414 expect("("); 1415 Inner = readSortKind(); 1416 if (Inner != SortSectionPolicy::Default) { 1417 expect("("); 1418 V = readInputSectionsList(); 1419 expect(")"); 1420 } else { 1421 V = readInputSectionsList(); 1422 } 1423 expect(")"); 1424 } else { 1425 V = readInputSectionsList(); 1426 } 1427 1428 for (SectionPattern &Pat : V) { 1429 Pat.SortInner = Inner; 1430 Pat.SortOuter = Outer; 1431 } 1432 1433 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 1434 } 1435 return Cmd; 1436 } 1437 1438 InputSectionDescription * 1439 ScriptParser::readInputSectionDescription(StringRef Tok) { 1440 // Input section wildcard can be surrounded by KEEP. 1441 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 1442 if (Tok == "KEEP") { 1443 expect("("); 1444 StringRef FilePattern = next(); 1445 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 1446 expect(")"); 1447 Opt.KeptSections.push_back(Cmd); 1448 return Cmd; 1449 } 1450 return readInputSectionRules(Tok); 1451 } 1452 1453 void ScriptParser::readSort() { 1454 expect("("); 1455 expect("CONSTRUCTORS"); 1456 expect(")"); 1457 } 1458 1459 Expr ScriptParser::readAssert() { 1460 expect("("); 1461 Expr E = readExpr(); 1462 expect(","); 1463 StringRef Msg = unquote(next()); 1464 expect(")"); 1465 return [=](uint64_t Dot) { 1466 uint64_t V = E(Dot); 1467 if (!V) 1468 error(Msg); 1469 return V; 1470 }; 1471 } 1472 1473 // Reads a FILL(expr) command. We handle the FILL command as an 1474 // alias for =fillexp section attribute, which is different from 1475 // what GNU linkers do. 1476 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 1477 uint32_t ScriptParser::readFill() { 1478 expect("("); 1479 uint32_t V = readOutputSectionFiller(next()); 1480 expect(")"); 1481 expect(";"); 1482 return V; 1483 } 1484 1485 OutputSectionCommand * 1486 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 1487 OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); 1488 Cmd->Location = getCurrentLocation(); 1489 1490 // Read an address expression. 1491 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address 1492 if (peek() != ":") 1493 Cmd->AddrExpr = readExpr(); 1494 1495 expect(":"); 1496 1497 if (consume("AT")) 1498 Cmd->LMAExpr = readParenExpr(); 1499 if (consume("ALIGN")) 1500 Cmd->AlignExpr = readParenExpr(); 1501 if (consume("SUBALIGN")) 1502 Cmd->SubalignExpr = readParenExpr(); 1503 1504 // Parse constraints. 1505 if (consume("ONLY_IF_RO")) 1506 Cmd->Constraint = ConstraintKind::ReadOnly; 1507 if (consume("ONLY_IF_RW")) 1508 Cmd->Constraint = ConstraintKind::ReadWrite; 1509 expect("{"); 1510 1511 while (!Error && !consume("}")) { 1512 StringRef Tok = next(); 1513 if (Tok == ";") { 1514 // Empty commands are allowed. Do nothing here. 1515 } else if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok)) { 1516 Cmd->Commands.emplace_back(Assignment); 1517 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 1518 Cmd->Commands.emplace_back(Data); 1519 } else if (Tok == "ASSERT") { 1520 Cmd->Commands.emplace_back(new AssertCommand(readAssert())); 1521 expect(";"); 1522 } else if (Tok == "CONSTRUCTORS") { 1523 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 1524 // by name. This is for very old file formats such as ECOFF/XCOFF. 1525 // For ELF, we should ignore. 1526 } else if (Tok == "FILL") { 1527 Cmd->Filler = readFill(); 1528 } else if (Tok == "SORT") { 1529 readSort(); 1530 } else if (peek() == "(") { 1531 Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); 1532 } else { 1533 setError("unknown command " + Tok); 1534 } 1535 } 1536 1537 if (consume(">")) 1538 Cmd->MemoryRegionName = next(); 1539 1540 Cmd->Phdrs = readOutputSectionPhdrs(); 1541 1542 if (consume("=")) 1543 Cmd->Filler = readOutputSectionFiller(next()); 1544 else if (peek().startswith("=")) 1545 Cmd->Filler = readOutputSectionFiller(next().drop_front()); 1546 1547 // Consume optional comma following output section command. 1548 consume(","); 1549 1550 return Cmd; 1551 } 1552 1553 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number. 1554 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 1555 // 1556 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles 1557 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them 1558 // as 32-bit big-endian values. We will do the same as ld.gold does 1559 // because it's simpler than what ld.bfd does. 1560 uint32_t ScriptParser::readOutputSectionFiller(StringRef Tok) { 1561 uint32_t V; 1562 if (!Tok.getAsInteger(0, V)) 1563 return V; 1564 setError("invalid filler expression: " + Tok); 1565 return 0; 1566 } 1567 1568 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 1569 expect("("); 1570 SymbolAssignment *Cmd = readAssignment(next()); 1571 Cmd->Provide = Provide; 1572 Cmd->Hidden = Hidden; 1573 expect(")"); 1574 expect(";"); 1575 return Cmd; 1576 } 1577 1578 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 1579 SymbolAssignment *Cmd = nullptr; 1580 if (peek() == "=" || peek() == "+=") { 1581 Cmd = readAssignment(Tok); 1582 expect(";"); 1583 } else if (Tok == "PROVIDE") { 1584 Cmd = readProvideHidden(true, false); 1585 } else if (Tok == "HIDDEN") { 1586 Cmd = readProvideHidden(false, true); 1587 } else if (Tok == "PROVIDE_HIDDEN") { 1588 Cmd = readProvideHidden(true, true); 1589 } 1590 return Cmd; 1591 } 1592 1593 static uint64_t getSymbolValue(const Twine &Loc, StringRef S, uint64_t Dot) { 1594 if (S == ".") 1595 return Dot; 1596 return ScriptBase->getSymbolValue(Loc, S); 1597 } 1598 1599 static bool isAbsolute(StringRef S) { 1600 if (S == ".") 1601 return false; 1602 return ScriptBase->isAbsolute(S); 1603 } 1604 1605 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 1606 StringRef Op = next(); 1607 Expr E; 1608 assert(Op == "=" || Op == "+="); 1609 if (consume("ABSOLUTE")) { 1610 // The RHS may be something like "ABSOLUTE(.) & 0xff". 1611 // Call readExpr1 to read the whole expression. 1612 E = readExpr1(readParenExpr(), 0); 1613 E.IsAbsolute = [] { return true; }; 1614 } else { 1615 E = readExpr(); 1616 } 1617 if (Op == "+=") { 1618 std::string Loc = getCurrentLocation(); 1619 E = [=](uint64_t Dot) { 1620 return getSymbolValue(Loc, Name, Dot) + E(Dot); 1621 }; 1622 } 1623 return new SymbolAssignment(Name, E); 1624 } 1625 1626 // This is an operator-precedence parser to parse a linker 1627 // script expression. 1628 Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); } 1629 1630 static Expr combine(StringRef Op, Expr L, Expr R) { 1631 auto IsAbs = [=] { return L.IsAbsolute() && R.IsAbsolute(); }; 1632 auto GetOutSec = [=] { 1633 const OutputSectionBase *S = L.Section(); 1634 return S ? S : R.Section(); 1635 }; 1636 1637 if (Op == "*") 1638 return [=](uint64_t Dot) { return L(Dot) * R(Dot); }; 1639 if (Op == "/") { 1640 return [=](uint64_t Dot) -> uint64_t { 1641 uint64_t RHS = R(Dot); 1642 if (RHS == 0) { 1643 error("division by zero"); 1644 return 0; 1645 } 1646 return L(Dot) / RHS; 1647 }; 1648 } 1649 if (Op == "+") 1650 return {[=](uint64_t Dot) { return L(Dot) + R(Dot); }, IsAbs, GetOutSec}; 1651 if (Op == "-") 1652 return {[=](uint64_t Dot) { return L(Dot) - R(Dot); }, IsAbs, GetOutSec}; 1653 if (Op == "<<") 1654 return [=](uint64_t Dot) { return L(Dot) << R(Dot); }; 1655 if (Op == ">>") 1656 return [=](uint64_t Dot) { return L(Dot) >> R(Dot); }; 1657 if (Op == "<") 1658 return [=](uint64_t Dot) { return L(Dot) < R(Dot); }; 1659 if (Op == ">") 1660 return [=](uint64_t Dot) { return L(Dot) > R(Dot); }; 1661 if (Op == ">=") 1662 return [=](uint64_t Dot) { return L(Dot) >= R(Dot); }; 1663 if (Op == "<=") 1664 return [=](uint64_t Dot) { return L(Dot) <= R(Dot); }; 1665 if (Op == "==") 1666 return [=](uint64_t Dot) { return L(Dot) == R(Dot); }; 1667 if (Op == "!=") 1668 return [=](uint64_t Dot) { return L(Dot) != R(Dot); }; 1669 if (Op == "&") 1670 return [=](uint64_t Dot) { return L(Dot) & R(Dot); }; 1671 if (Op == "|") 1672 return [=](uint64_t Dot) { return L(Dot) | R(Dot); }; 1673 llvm_unreachable("invalid operator"); 1674 } 1675 1676 // This is a part of the operator-precedence parser. This function 1677 // assumes that the remaining token stream starts with an operator. 1678 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 1679 while (!atEOF() && !Error) { 1680 // Read an operator and an expression. 1681 if (consume("?")) 1682 return readTernary(Lhs); 1683 StringRef Op1 = peek(); 1684 if (precedence(Op1) < MinPrec) 1685 break; 1686 skip(); 1687 Expr Rhs = readPrimary(); 1688 1689 // Evaluate the remaining part of the expression first if the 1690 // next operator has greater precedence than the previous one. 1691 // For example, if we have read "+" and "3", and if the next 1692 // operator is "*", then we'll evaluate 3 * ... part first. 1693 while (!atEOF()) { 1694 StringRef Op2 = peek(); 1695 if (precedence(Op2) <= precedence(Op1)) 1696 break; 1697 Rhs = readExpr1(Rhs, precedence(Op2)); 1698 } 1699 1700 Lhs = combine(Op1, Lhs, Rhs); 1701 } 1702 return Lhs; 1703 } 1704 1705 uint64_t static getConstant(StringRef S) { 1706 if (S == "COMMONPAGESIZE") 1707 return Target->PageSize; 1708 if (S == "MAXPAGESIZE") 1709 return Config->MaxPageSize; 1710 error("unknown constant: " + S); 1711 return 0; 1712 } 1713 1714 // Parses Tok as an integer. Returns true if successful. 1715 // It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") 1716 // and decimal numbers. Decimal numbers may have "K" (kilo) or 1717 // "M" (mega) prefixes. 1718 static bool readInteger(StringRef Tok, uint64_t &Result) { 1719 // Negative number 1720 if (Tok.startswith("-")) { 1721 if (!readInteger(Tok.substr(1), Result)) 1722 return false; 1723 Result = -Result; 1724 return true; 1725 } 1726 1727 // Hexadecimal 1728 if (Tok.startswith_lower("0x")) 1729 return !Tok.substr(2).getAsInteger(16, Result); 1730 if (Tok.endswith_lower("H")) 1731 return !Tok.drop_back().getAsInteger(16, Result); 1732 1733 // Decimal 1734 int Suffix = 1; 1735 if (Tok.endswith_lower("K")) { 1736 Suffix = 1024; 1737 Tok = Tok.drop_back(); 1738 } else if (Tok.endswith_lower("M")) { 1739 Suffix = 1024 * 1024; 1740 Tok = Tok.drop_back(); 1741 } 1742 if (Tok.getAsInteger(10, Result)) 1743 return false; 1744 Result *= Suffix; 1745 return true; 1746 } 1747 1748 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 1749 int Size = StringSwitch<unsigned>(Tok) 1750 .Case("BYTE", 1) 1751 .Case("SHORT", 2) 1752 .Case("LONG", 4) 1753 .Case("QUAD", 8) 1754 .Default(-1); 1755 if (Size == -1) 1756 return nullptr; 1757 1758 return new BytesDataCommand(readParenExpr(), Size); 1759 } 1760 1761 StringRef ScriptParser::readParenLiteral() { 1762 expect("("); 1763 StringRef Tok = next(); 1764 expect(")"); 1765 return Tok; 1766 } 1767 1768 Expr ScriptParser::readPrimary() { 1769 if (peek() == "(") 1770 return readParenExpr(); 1771 1772 StringRef Tok = next(); 1773 std::string Location = getCurrentLocation(); 1774 1775 if (Tok == "~") { 1776 Expr E = readPrimary(); 1777 return [=](uint64_t Dot) { return ~E(Dot); }; 1778 } 1779 if (Tok == "-") { 1780 Expr E = readPrimary(); 1781 return [=](uint64_t Dot) { return -E(Dot); }; 1782 } 1783 1784 // Built-in functions are parsed here. 1785 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 1786 if (Tok == "ADDR") { 1787 StringRef Name = readParenLiteral(); 1788 return {[=](uint64_t Dot) { 1789 return ScriptBase->getOutputSection(Location, Name)->Addr; 1790 }, 1791 [=] { return false; }, 1792 [=] { return ScriptBase->getOutputSection(Location, Name); }}; 1793 } 1794 if (Tok == "LOADADDR") { 1795 StringRef Name = readParenLiteral(); 1796 return [=](uint64_t Dot) { 1797 return ScriptBase->getOutputSection(Location, Name)->getLMA(); 1798 }; 1799 } 1800 if (Tok == "ASSERT") 1801 return readAssert(); 1802 if (Tok == "ALIGN") { 1803 expect("("); 1804 Expr E = readExpr(); 1805 if (consume(",")) { 1806 Expr E2 = readExpr(); 1807 expect(")"); 1808 return [=](uint64_t Dot) { return alignTo(E(Dot), E2(Dot)); }; 1809 } 1810 expect(")"); 1811 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1812 } 1813 if (Tok == "CONSTANT") { 1814 StringRef Name = readParenLiteral(); 1815 return [=](uint64_t Dot) { return getConstant(Name); }; 1816 } 1817 if (Tok == "DEFINED") { 1818 StringRef Name = readParenLiteral(); 1819 return [=](uint64_t Dot) { return ScriptBase->isDefined(Name) ? 1 : 0; }; 1820 } 1821 if (Tok == "SEGMENT_START") { 1822 expect("("); 1823 skip(); 1824 expect(","); 1825 Expr E = readExpr(); 1826 expect(")"); 1827 return [=](uint64_t Dot) { return E(Dot); }; 1828 } 1829 if (Tok == "DATA_SEGMENT_ALIGN") { 1830 expect("("); 1831 Expr E = readExpr(); 1832 expect(","); 1833 readExpr(); 1834 expect(")"); 1835 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1836 } 1837 if (Tok == "DATA_SEGMENT_END") { 1838 expect("("); 1839 expect("."); 1840 expect(")"); 1841 return [](uint64_t Dot) { return Dot; }; 1842 } 1843 // GNU linkers implements more complicated logic to handle 1844 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to 1845 // the next page boundary for simplicity. 1846 if (Tok == "DATA_SEGMENT_RELRO_END") { 1847 expect("("); 1848 readExpr(); 1849 expect(","); 1850 readExpr(); 1851 expect(")"); 1852 return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); }; 1853 } 1854 if (Tok == "SIZEOF") { 1855 StringRef Name = readParenLiteral(); 1856 return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); }; 1857 } 1858 if (Tok == "ALIGNOF") { 1859 StringRef Name = readParenLiteral(); 1860 return [=](uint64_t Dot) { 1861 return ScriptBase->getOutputSection(Location, Name)->Addralign; 1862 }; 1863 } 1864 if (Tok == "SIZEOF_HEADERS") 1865 return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); }; 1866 1867 // Tok is a literal number. 1868 uint64_t V; 1869 if (readInteger(Tok, V)) 1870 return [=](uint64_t Dot) { return V; }; 1871 1872 // Tok is a symbol name. 1873 if (Tok != "." && !isValidCIdentifier(Tok)) 1874 setError("malformed number: " + Tok); 1875 return {[=](uint64_t Dot) { return getSymbolValue(Location, Tok, Dot); }, 1876 [=] { return isAbsolute(Tok); }, 1877 [=] { return ScriptBase->getSymbolSection(Tok); }}; 1878 } 1879 1880 Expr ScriptParser::readTernary(Expr Cond) { 1881 Expr L = readExpr(); 1882 expect(":"); 1883 Expr R = readExpr(); 1884 return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); }; 1885 } 1886 1887 Expr ScriptParser::readParenExpr() { 1888 expect("("); 1889 Expr E = readExpr(); 1890 expect(")"); 1891 return E; 1892 } 1893 1894 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1895 std::vector<StringRef> Phdrs; 1896 while (!Error && peek().startswith(":")) { 1897 StringRef Tok = next(); 1898 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 1899 } 1900 return Phdrs; 1901 } 1902 1903 // Read a program header type name. The next token must be a 1904 // name of a program header type or a constant (e.g. "0x3"). 1905 unsigned ScriptParser::readPhdrType() { 1906 StringRef Tok = next(); 1907 uint64_t Val; 1908 if (readInteger(Tok, Val)) 1909 return Val; 1910 1911 unsigned Ret = StringSwitch<unsigned>(Tok) 1912 .Case("PT_NULL", PT_NULL) 1913 .Case("PT_LOAD", PT_LOAD) 1914 .Case("PT_DYNAMIC", PT_DYNAMIC) 1915 .Case("PT_INTERP", PT_INTERP) 1916 .Case("PT_NOTE", PT_NOTE) 1917 .Case("PT_SHLIB", PT_SHLIB) 1918 .Case("PT_PHDR", PT_PHDR) 1919 .Case("PT_TLS", PT_TLS) 1920 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1921 .Case("PT_GNU_STACK", PT_GNU_STACK) 1922 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1923 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1924 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1925 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1926 .Default(-1); 1927 1928 if (Ret == (unsigned)-1) { 1929 setError("invalid program header type: " + Tok); 1930 return PT_NULL; 1931 } 1932 return Ret; 1933 } 1934 1935 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1936 void ScriptParser::readAnonymousDeclaration() { 1937 // Read global symbols first. "global:" is default, so if there's 1938 // no label, we assume global symbols. 1939 if (peek() != "local") { 1940 if (consume("global")) 1941 expect(":"); 1942 Config->VersionScriptGlobals = readSymbols(); 1943 } 1944 readLocals(); 1945 expect("}"); 1946 expect(";"); 1947 } 1948 1949 void ScriptParser::readLocals() { 1950 if (!consume("local")) 1951 return; 1952 expect(":"); 1953 std::vector<SymbolVersion> Locals = readSymbols(); 1954 for (SymbolVersion V : Locals) { 1955 if (V.Name == "*") { 1956 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1957 continue; 1958 } 1959 Config->VersionScriptLocals.push_back(V); 1960 } 1961 } 1962 1963 // Reads a list of symbols, e.g. "VerStr { global: foo; bar; local: *; };". 1964 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1965 // Identifiers start at 2 because 0 and 1 are reserved 1966 // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants. 1967 uint16_t VersionId = Config->VersionDefinitions.size() + 2; 1968 Config->VersionDefinitions.push_back({VerStr, VersionId}); 1969 1970 // Read global symbols. 1971 if (peek() != "local") { 1972 if (consume("global")) 1973 expect(":"); 1974 Config->VersionDefinitions.back().Globals = readSymbols(); 1975 } 1976 readLocals(); 1977 expect("}"); 1978 1979 // Each version may have a parent version. For example, "Ver2" 1980 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1981 // as a parent. This version hierarchy is, probably against your 1982 // instinct, purely for hint; the runtime doesn't care about it 1983 // at all. In LLD, we simply ignore it. 1984 if (peek() != ";") 1985 skip(); 1986 expect(";"); 1987 } 1988 1989 // Reads a list of symbols for a versions cript. 1990 std::vector<SymbolVersion> ScriptParser::readSymbols() { 1991 std::vector<SymbolVersion> Ret; 1992 for (;;) { 1993 if (consume("extern")) { 1994 for (SymbolVersion V : readVersionExtern()) 1995 Ret.push_back(V); 1996 continue; 1997 } 1998 1999 if (peek() == "}" || peek() == "local" || Error) 2000 break; 2001 StringRef Tok = next(); 2002 Ret.push_back({unquote(Tok), false, hasWildcard(Tok)}); 2003 expect(";"); 2004 } 2005 return Ret; 2006 } 2007 2008 // Reads an "extern C++" directive, e.g., 2009 // "extern "C++" { ns::*; "f(int, double)"; };" 2010 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 2011 StringRef Tok = next(); 2012 bool IsCXX = Tok == "\"C++\""; 2013 if (!IsCXX && Tok != "\"C\"") 2014 setError("Unknown language"); 2015 expect("{"); 2016 2017 std::vector<SymbolVersion> Ret; 2018 while (!Error && peek() != "}") { 2019 StringRef Tok = next(); 2020 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 2021 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 2022 expect(";"); 2023 } 2024 2025 expect("}"); 2026 expect(";"); 2027 return Ret; 2028 } 2029 2030 uint64_t ScriptParser::readMemoryAssignment( 2031 StringRef S1, StringRef S2, StringRef S3) { 2032 if (!(consume(S1) || consume(S2) || consume(S3))) { 2033 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 2034 return 0; 2035 } 2036 expect("="); 2037 2038 // TODO: Fully support constant expressions. 2039 uint64_t Val; 2040 if (!readInteger(next(), Val)) 2041 setError("nonconstant expression for "+ S1); 2042 return Val; 2043 } 2044 2045 // Parse the MEMORY command as specified in: 2046 // https://sourceware.org/binutils/docs/ld/MEMORY.html 2047 // 2048 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 2049 void ScriptParser::readMemory() { 2050 expect("{"); 2051 while (!Error && !consume("}")) { 2052 StringRef Name = next(); 2053 2054 uint32_t Flags = 0; 2055 uint32_t NegFlags = 0; 2056 if (consume("(")) { 2057 std::tie(Flags, NegFlags) = readMemoryAttributes(); 2058 expect(")"); 2059 } 2060 expect(":"); 2061 2062 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 2063 expect(","); 2064 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 2065 2066 // Add the memory region to the region map (if it doesn't already exist). 2067 auto It = Opt.MemoryRegions.find(Name); 2068 if (It != Opt.MemoryRegions.end()) 2069 setError("region '" + Name + "' already defined"); 2070 else 2071 Opt.MemoryRegions[Name] = {Name, Origin, Length, Origin, Flags, NegFlags}; 2072 } 2073 } 2074 2075 // This function parses the attributes used to match against section 2076 // flags when placing output sections in a memory region. These flags 2077 // are only used when an explicit memory region name is not used. 2078 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 2079 uint32_t Flags = 0; 2080 uint32_t NegFlags = 0; 2081 bool Invert = false; 2082 2083 for (char C : next().lower()) { 2084 uint32_t Flag = 0; 2085 if (C == '!') 2086 Invert = !Invert; 2087 else if (C == 'w') 2088 Flag = SHF_WRITE; 2089 else if (C == 'x') 2090 Flag = SHF_EXECINSTR; 2091 else if (C == 'a') 2092 Flag = SHF_ALLOC; 2093 else if (C != 'r') 2094 setError("invalid memory region attribute"); 2095 2096 if (Invert) 2097 NegFlags |= Flag; 2098 else 2099 Flags |= Flag; 2100 } 2101 return {Flags, NegFlags}; 2102 } 2103 2104 void elf::readLinkerScript(MemoryBufferRef MB) { 2105 ScriptParser(MB).readLinkerScript(); 2106 } 2107 2108 void elf::readVersionScript(MemoryBufferRef MB) { 2109 ScriptParser(MB).readVersionScript(); 2110 } 2111 2112 void elf::readDynamicList(MemoryBufferRef MB) { 2113 ScriptParser(MB).readDynamicList(); 2114 } 2115 2116 template class elf::LinkerScript<ELF32LE>; 2117 template class elf::LinkerScript<ELF32BE>; 2118 template class elf::LinkerScript<ELF64LE>; 2119 template class elf::LinkerScript<ELF64BE>; 2120