1 //===- LinkerScript.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the parser/evaluator of the linker script. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LinkerScript.h" 15 #include "Config.h" 16 #include "Driver.h" 17 #include "InputSection.h" 18 #include "Memory.h" 19 #include "OutputSections.h" 20 #include "ScriptParser.h" 21 #include "Strings.h" 22 #include "SymbolTable.h" 23 #include "Symbols.h" 24 #include "SyntheticSections.h" 25 #include "Target.h" 26 #include "Writer.h" 27 #include "llvm/ADT/STLExtras.h" 28 #include "llvm/ADT/SmallString.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/ADT/StringSwitch.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/ELF.h" 33 #include "llvm/Support/Endian.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/FileSystem.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/Path.h" 38 #include <algorithm> 39 #include <cassert> 40 #include <cstddef> 41 #include <cstdint> 42 #include <iterator> 43 #include <limits> 44 #include <memory> 45 #include <string> 46 #include <tuple> 47 #include <vector> 48 49 using namespace llvm; 50 using namespace llvm::ELF; 51 using namespace llvm::object; 52 using namespace llvm::support::endian; 53 using namespace lld; 54 using namespace lld::elf; 55 56 LinkerScriptBase *elf::ScriptBase; 57 ScriptConfiguration *elf::ScriptConfig; 58 59 template <class ELFT> static SymbolBody *addRegular(SymbolAssignment *Cmd) { 60 uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 61 Symbol *Sym = Symtab<ELFT>::X->addUndefined( 62 Cmd->Name, /*IsLocal=*/false, STB_GLOBAL, Visibility, 63 /*Type*/ 0, 64 /*CanOmitFromDynSym*/ false, /*File*/ nullptr); 65 66 replaceBody<DefinedRegular<ELFT>>(Sym, Cmd->Name, /*IsLocal=*/false, 67 Visibility, STT_NOTYPE, 0, 0, nullptr, 68 nullptr); 69 return Sym->body(); 70 } 71 72 template <class ELFT> static SymbolBody *addSynthetic(SymbolAssignment *Cmd) { 73 uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 74 const OutputSectionBase *Sec = 75 ScriptConfig->HasSections ? nullptr : Cmd->Expression.Section(); 76 Symbol *Sym = Symtab<ELFT>::X->addUndefined( 77 Cmd->Name, /*IsLocal=*/false, STB_GLOBAL, Visibility, 78 /*Type*/ 0, 79 /*CanOmitFromDynSym*/ false, /*File*/ nullptr); 80 81 replaceBody<DefinedSynthetic>(Sym, Cmd->Name, 0, Sec); 82 return Sym->body(); 83 } 84 85 static bool isUnderSysroot(StringRef Path) { 86 if (Config->Sysroot == "") 87 return false; 88 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 89 if (sys::fs::equivalent(Config->Sysroot, Path)) 90 return true; 91 return false; 92 } 93 94 template <class ELFT> static void assignSymbol(SymbolAssignment *Cmd) { 95 // If there are sections, then let the value be assigned later in 96 // `assignAddresses`. 97 if (ScriptConfig->HasSections) 98 return; 99 100 uint64_t Value = Cmd->Expression(0); 101 if (Cmd->Expression.IsAbsolute()) { 102 cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Value; 103 } else { 104 const OutputSectionBase *Sec = Cmd->Expression.Section(); 105 if (Sec) 106 cast<DefinedSynthetic>(Cmd->Sym)->Value = Value - Sec->Addr; 107 } 108 } 109 110 template <class ELFT> static void addSymbol(SymbolAssignment *Cmd) { 111 if (Cmd->Name == ".") 112 return; 113 114 // If a symbol was in PROVIDE(), we need to define it only when 115 // it is a referenced undefined symbol. 116 SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name); 117 if (Cmd->Provide && (!B || B->isDefined())) 118 return; 119 120 // Otherwise, create a new symbol if one does not exist or an 121 // undefined one does exist. 122 if (Cmd->Expression.IsAbsolute()) 123 Cmd->Sym = addRegular<ELFT>(Cmd); 124 else 125 Cmd->Sym = addSynthetic<ELFT>(Cmd); 126 assignSymbol<ELFT>(Cmd); 127 } 128 129 bool SymbolAssignment::classof(const BaseCommand *C) { 130 return C->Kind == AssignmentKind; 131 } 132 133 bool OutputSectionCommand::classof(const BaseCommand *C) { 134 return C->Kind == OutputSectionKind; 135 } 136 137 bool InputSectionDescription::classof(const BaseCommand *C) { 138 return C->Kind == InputSectionKind; 139 } 140 141 bool AssertCommand::classof(const BaseCommand *C) { 142 return C->Kind == AssertKind; 143 } 144 145 bool BytesDataCommand::classof(const BaseCommand *C) { 146 return C->Kind == BytesDataKind; 147 } 148 149 template <class ELFT> LinkerScript<ELFT>::LinkerScript() = default; 150 template <class ELFT> LinkerScript<ELFT>::~LinkerScript() = default; 151 152 template <class ELFT> static StringRef basename(InputSectionBase<ELFT> *S) { 153 if (S->getFile()) 154 return sys::path::filename(S->getFile()->getName()); 155 return ""; 156 } 157 158 template <class ELFT> 159 bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { 160 for (InputSectionDescription *ID : Opt.KeptSections) 161 if (ID->FilePat.match(basename(S))) 162 for (SectionPattern &P : ID->SectionPatterns) 163 if (P.SectionPat.match(S->Name)) 164 return true; 165 return false; 166 } 167 168 static bool comparePriority(InputSectionData *A, InputSectionData *B) { 169 return getPriority(A->Name) < getPriority(B->Name); 170 } 171 172 static bool compareName(InputSectionData *A, InputSectionData *B) { 173 return A->Name < B->Name; 174 } 175 176 static bool compareAlignment(InputSectionData *A, InputSectionData *B) { 177 // ">" is not a mistake. Larger alignments are placed before smaller 178 // alignments in order to reduce the amount of padding necessary. 179 // This is compatible with GNU. 180 return A->Alignment > B->Alignment; 181 } 182 183 static std::function<bool(InputSectionData *, InputSectionData *)> 184 getComparator(SortSectionPolicy K) { 185 switch (K) { 186 case SortSectionPolicy::Alignment: 187 return compareAlignment; 188 case SortSectionPolicy::Name: 189 return compareName; 190 case SortSectionPolicy::Priority: 191 return comparePriority; 192 default: 193 llvm_unreachable("unknown sort policy"); 194 } 195 } 196 197 template <class ELFT> 198 static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections, 199 ConstraintKind Kind) { 200 if (Kind == ConstraintKind::NoConstraint) 201 return true; 202 bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) { 203 auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2); 204 return Sec->Flags & SHF_WRITE; 205 }); 206 return (IsRW && Kind == ConstraintKind::ReadWrite) || 207 (!IsRW && Kind == ConstraintKind::ReadOnly); 208 } 209 210 static void sortSections(InputSectionData **Begin, InputSectionData **End, 211 SortSectionPolicy K) { 212 if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) 213 std::stable_sort(Begin, End, getComparator(K)); 214 } 215 216 // Compute and remember which sections the InputSectionDescription matches. 217 template <class ELFT> 218 void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { 219 // Collects all sections that satisfy constraints of I 220 // and attach them to I. 221 for (SectionPattern &Pat : I->SectionPatterns) { 222 size_t SizeBefore = I->Sections.size(); 223 224 for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) { 225 if (!S->Live || S->Assigned) 226 continue; 227 228 StringRef Filename = basename(S); 229 if (!I->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename)) 230 continue; 231 if (!Pat.SectionPat.match(S->Name)) 232 continue; 233 I->Sections.push_back(S); 234 S->Assigned = true; 235 } 236 237 // Sort sections as instructed by SORT-family commands and --sort-section 238 // option. Because SORT-family commands can be nested at most two depth 239 // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command 240 // line option is respected even if a SORT command is given, the exact 241 // behavior we have here is a bit complicated. Here are the rules. 242 // 243 // 1. If two SORT commands are given, --sort-section is ignored. 244 // 2. If one SORT command is given, and if it is not SORT_NONE, 245 // --sort-section is handled as an inner SORT command. 246 // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. 247 // 4. If no SORT command is given, sort according to --sort-section. 248 InputSectionData **Begin = I->Sections.data() + SizeBefore; 249 InputSectionData **End = I->Sections.data() + I->Sections.size(); 250 if (Pat.SortOuter != SortSectionPolicy::None) { 251 if (Pat.SortInner == SortSectionPolicy::Default) 252 sortSections(Begin, End, Config->SortSection); 253 else 254 sortSections(Begin, End, Pat.SortInner); 255 sortSections(Begin, End, Pat.SortOuter); 256 } 257 } 258 } 259 260 template <class ELFT> 261 void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) { 262 for (InputSectionBase<ELFT> *S : V) { 263 S->Live = false; 264 reportDiscarded(S); 265 } 266 } 267 268 template <class ELFT> 269 std::vector<InputSectionBase<ELFT> *> 270 LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) { 271 std::vector<InputSectionBase<ELFT> *> Ret; 272 273 for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { 274 auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); 275 if (!Cmd) 276 continue; 277 computeInputSections(Cmd); 278 for (InputSectionData *S : Cmd->Sections) 279 Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S)); 280 } 281 282 return Ret; 283 } 284 285 template <class ELFT> 286 void LinkerScript<ELFT>::addSection(OutputSectionFactory<ELFT> &Factory, 287 InputSectionBase<ELFT> *Sec, 288 StringRef Name) { 289 OutputSectionBase *OutSec; 290 bool IsNew; 291 std::tie(OutSec, IsNew) = Factory.create(Sec, Name); 292 if (IsNew) 293 OutputSections->push_back(OutSec); 294 OutSec->addSection(Sec); 295 } 296 297 template <class ELFT> 298 void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) { 299 for (unsigned I = 0; I < Opt.Commands.size(); ++I) { 300 auto Iter = Opt.Commands.begin() + I; 301 const std::unique_ptr<BaseCommand> &Base1 = *Iter; 302 303 // Handle symbol assignments outside of any output section. 304 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { 305 addSymbol<ELFT>(Cmd); 306 continue; 307 } 308 309 if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) { 310 // If we don't have SECTIONS then output sections have already been 311 // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses 312 // will not be called, so ASSERT should be evaluated now. 313 if (!Opt.HasSections) 314 Cmd->Expression(0); 315 continue; 316 } 317 318 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { 319 std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd); 320 321 // The output section name `/DISCARD/' is special. 322 // Any input section assigned to it is discarded. 323 if (Cmd->Name == "/DISCARD/") { 324 discard(V); 325 continue; 326 } 327 328 // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive 329 // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input 330 // sections satisfy a given constraint. If not, a directive is handled 331 // as if it wasn't present from the beginning. 332 // 333 // Because we'll iterate over Commands many more times, the easiest 334 // way to "make it as if it wasn't present" is to just remove it. 335 if (!matchConstraints<ELFT>(V, Cmd->Constraint)) { 336 for (InputSectionBase<ELFT> *S : V) 337 S->Assigned = false; 338 Opt.Commands.erase(Iter); 339 --I; 340 continue; 341 } 342 343 // A directive may contain symbol definitions like this: 344 // ".foo : { ...; bar = .; }". Handle them. 345 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 346 if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) 347 addSymbol<ELFT>(OutCmd); 348 349 // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign 350 // is given, input sections are aligned to that value, whether the 351 // given value is larger or smaller than the original section alignment. 352 if (Cmd->SubalignExpr) { 353 uint32_t Subalign = Cmd->SubalignExpr(0); 354 for (InputSectionBase<ELFT> *S : V) 355 S->Alignment = Subalign; 356 } 357 358 // Add input sections to an output section. 359 for (InputSectionBase<ELFT> *S : V) 360 addSection(Factory, S, Cmd->Name); 361 } 362 } 363 } 364 365 // Add sections that didn't match any sections command. 366 template <class ELFT> 367 void LinkerScript<ELFT>::addOrphanSections( 368 OutputSectionFactory<ELFT> &Factory) { 369 for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) 370 if (S->Live && !S->OutSec) 371 addSection(Factory, S, getOutputSectionName(S->Name)); 372 } 373 374 // Sets value of a section-defined symbol. Two kinds of 375 // symbols are processed: synthetic symbols, whose value 376 // is an offset from beginning of section and regular 377 // symbols whose value is absolute. 378 template <class ELFT> 379 static void assignSectionSymbol(SymbolAssignment *Cmd, 380 typename ELFT::uint Value) { 381 if (!Cmd->Sym) 382 return; 383 384 if (auto *Body = dyn_cast<DefinedSynthetic>(Cmd->Sym)) { 385 Body->Section = Cmd->Expression.Section(); 386 Body->Value = Cmd->Expression(Value) - Body->Section->Addr; 387 return; 388 } 389 auto *Body = cast<DefinedRegular<ELFT>>(Cmd->Sym); 390 Body->Value = Cmd->Expression(Value); 391 } 392 393 template <class ELFT> static bool isTbss(OutputSectionBase *Sec) { 394 return (Sec->Flags & SHF_TLS) && Sec->Type == SHT_NOBITS; 395 } 396 397 template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) { 398 if (!AlreadyOutputIS.insert(S).second) 399 return; 400 bool IsTbss = isTbss<ELFT>(CurOutSec); 401 402 uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; 403 Pos = alignTo(Pos, S->Alignment); 404 S->OutSecOff = Pos - CurOutSec->Addr; 405 Pos += S->getSize(); 406 407 // Update output section size after adding each section. This is so that 408 // SIZEOF works correctly in the case below: 409 // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } 410 CurOutSec->Size = Pos - CurOutSec->Addr; 411 412 // If there is a memory region associated with this input section, then 413 // place the section in that region and update the region index. 414 if (CurMemRegion) { 415 CurMemRegion->Offset += CurOutSec->Size; 416 uint64_t CurSize = CurMemRegion->Offset - CurMemRegion->Origin; 417 if (CurSize > CurMemRegion->Length) { 418 uint64_t OverflowAmt = CurSize - CurMemRegion->Length; 419 error("section '" + CurOutSec->Name + "' will not fit in region '" + 420 CurMemRegion->Name + "': overflowed by " + Twine(OverflowAmt) + 421 " bytes"); 422 } 423 } 424 425 if (IsTbss) 426 ThreadBssOffset = Pos - Dot; 427 else 428 Dot = Pos; 429 } 430 431 template <class ELFT> void LinkerScript<ELFT>::flush() { 432 if (!CurOutSec || !AlreadyOutputOS.insert(CurOutSec).second) 433 return; 434 if (auto *OutSec = dyn_cast<OutputSection<ELFT>>(CurOutSec)) { 435 for (InputSection<ELFT> *I : OutSec->Sections) 436 output(I); 437 } else { 438 Dot += CurOutSec->Size; 439 } 440 } 441 442 template <class ELFT> 443 void LinkerScript<ELFT>::switchTo(OutputSectionBase *Sec) { 444 if (CurOutSec == Sec) 445 return; 446 if (AlreadyOutputOS.count(Sec)) 447 return; 448 449 flush(); 450 CurOutSec = Sec; 451 452 Dot = alignTo(Dot, CurOutSec->Addralign); 453 CurOutSec->Addr = isTbss<ELFT>(CurOutSec) ? Dot + ThreadBssOffset : Dot; 454 455 // If neither AT nor AT> is specified for an allocatable section, the linker 456 // will set the LMA such that the difference between VMA and LMA for the 457 // section is the same as the preceding output section in the same region 458 // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html 459 CurOutSec->setLMAOffset(LMAOffset); 460 } 461 462 template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) { 463 // This handles the assignments to symbol or to a location counter (.) 464 if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { 465 if (AssignCmd->Name == ".") { 466 // Update to location counter means update to section size. 467 uintX_t Val = AssignCmd->Expression(Dot); 468 if (Val < Dot) 469 error("unable to move location counter backward for: " + 470 CurOutSec->Name); 471 Dot = Val; 472 CurOutSec->Size = Dot - CurOutSec->Addr; 473 return; 474 } 475 assignSectionSymbol<ELFT>(AssignCmd, Dot); 476 return; 477 } 478 479 // Handle BYTE(), SHORT(), LONG(), or QUAD(). 480 if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) { 481 DataCmd->Offset = Dot - CurOutSec->Addr; 482 Dot += DataCmd->Size; 483 CurOutSec->Size = Dot - CurOutSec->Addr; 484 return; 485 } 486 487 if (auto *AssertCmd = dyn_cast<AssertCommand>(&Base)) { 488 AssertCmd->Expression(Dot); 489 return; 490 } 491 492 // It handles single input section description command, 493 // calculates and assigns the offsets for each section and also 494 // updates the output section size. 495 auto &ICmd = cast<InputSectionDescription>(Base); 496 for (InputSectionData *ID : ICmd.Sections) { 497 // We tentatively added all synthetic sections at the beginning and removed 498 // empty ones afterwards (because there is no way to know whether they were 499 // going be empty or not other than actually running linker scripts.) 500 // We need to ignore remains of empty sections. 501 if (auto *Sec = dyn_cast<SyntheticSection<ELFT>>(ID)) 502 if (Sec->empty()) 503 continue; 504 505 auto *IB = static_cast<InputSectionBase<ELFT> *>(ID); 506 switchTo(IB->OutSec); 507 if (auto *I = dyn_cast<InputSection<ELFT>>(IB)) 508 output(I); 509 else 510 flush(); 511 } 512 } 513 514 template <class ELFT> 515 static std::vector<OutputSectionBase *> 516 findSections(StringRef Name, const std::vector<OutputSectionBase *> &Sections) { 517 std::vector<OutputSectionBase *> Ret; 518 for (OutputSectionBase *Sec : Sections) 519 if (Sec->getName() == Name) 520 Ret.push_back(Sec); 521 return Ret; 522 } 523 524 // This function searches for a memory region to place the given output 525 // section in. If found, a pointer to the appropriate memory region is 526 // returned. Otherwise, a nullptr is returned. 527 template <class ELFT> 528 MemoryRegion *LinkerScript<ELFT>::findMemoryRegion(OutputSectionCommand *Cmd, 529 OutputSectionBase *Sec) { 530 // If a memory region name was specified in the output section command, 531 // then try to find that region first. 532 if (!Cmd->MemoryRegionName.empty()) { 533 auto It = Opt.MemoryRegions.find(Cmd->MemoryRegionName); 534 if (It != Opt.MemoryRegions.end()) 535 return &It->second; 536 error("memory region '" + Cmd->MemoryRegionName + "' not declared"); 537 return nullptr; 538 } 539 540 // The memory region name is empty, thus a suitable region must be 541 // searched for in the region map. If the region map is empty, just 542 // return. Note that this check doesn't happen at the very beginning 543 // so that uses of undeclared regions can be caught. 544 if (!Opt.MemoryRegions.size()) 545 return nullptr; 546 547 // See if a region can be found by matching section flags. 548 for (auto &MRI : Opt.MemoryRegions) { 549 MemoryRegion &MR = MRI.second; 550 if ((MR.Flags & Sec->Flags) != 0 && (MR.NegFlags & Sec->Flags) == 0) 551 return &MR; 552 } 553 554 // Otherwise, no suitable region was found. 555 if (Sec->Flags & SHF_ALLOC) 556 error("no memory region specified for section '" + Sec->Name + "'"); 557 return nullptr; 558 } 559 560 // This function assigns offsets to input sections and an output section 561 // for a single sections command (e.g. ".text { *(.text); }"). 562 template <class ELFT> 563 void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) { 564 if (Cmd->LMAExpr) 565 LMAOffset = Cmd->LMAExpr(Dot) - Dot; 566 std::vector<OutputSectionBase *> Sections = 567 findSections<ELFT>(Cmd->Name, *OutputSections); 568 if (Sections.empty()) 569 return; 570 571 OutputSectionBase *Sec = Sections[0]; 572 // Try and find an appropriate memory region to assign offsets in. 573 CurMemRegion = findMemoryRegion(Cmd, Sec); 574 if (CurMemRegion) 575 Dot = CurMemRegion->Offset; 576 switchTo(Sec); 577 578 // Find the last section output location. We will output orphan sections 579 // there so that end symbols point to the correct location. 580 auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), 581 [](const std::unique_ptr<BaseCommand> &Cmd) { 582 return !isa<SymbolAssignment>(*Cmd); 583 }) 584 .base(); 585 for (auto I = Cmd->Commands.begin(); I != E; ++I) 586 process(**I); 587 for (OutputSectionBase *Base : Sections) 588 switchTo(Base); 589 flush(); 590 std::for_each(E, Cmd->Commands.end(), 591 [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); 592 } 593 594 template <class ELFT> void LinkerScript<ELFT>::removeEmptyCommands() { 595 // It is common practice to use very generic linker scripts. So for any 596 // given run some of the output sections in the script will be empty. 597 // We could create corresponding empty output sections, but that would 598 // clutter the output. 599 // We instead remove trivially empty sections. The bfd linker seems even 600 // more aggressive at removing them. 601 auto Pos = std::remove_if( 602 Opt.Commands.begin(), Opt.Commands.end(), 603 [&](const std::unique_ptr<BaseCommand> &Base) { 604 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 605 return findSections<ELFT>(Cmd->Name, *OutputSections).empty(); 606 return false; 607 }); 608 Opt.Commands.erase(Pos, Opt.Commands.end()); 609 } 610 611 static bool isAllSectionDescription(const OutputSectionCommand &Cmd) { 612 for (const std::unique_ptr<BaseCommand> &I : Cmd.Commands) 613 if (!isa<InputSectionDescription>(*I)) 614 return false; 615 return true; 616 } 617 618 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() { 619 // If the output section contains only symbol assignments, create a 620 // corresponding output section. The bfd linker seems to only create them if 621 // '.' is assigned to, but creating these section should not have any bad 622 // consequeces and gives us a section to put the symbol in. 623 uintX_t Flags = SHF_ALLOC; 624 uint32_t Type = SHT_NOBITS; 625 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 626 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 627 if (!Cmd) 628 continue; 629 std::vector<OutputSectionBase *> Secs = 630 findSections<ELFT>(Cmd->Name, *OutputSections); 631 if (!Secs.empty()) { 632 Flags = Secs[0]->Flags; 633 Type = Secs[0]->Type; 634 continue; 635 } 636 637 if (isAllSectionDescription(*Cmd)) 638 continue; 639 640 auto *OutSec = make<OutputSection<ELFT>>(Cmd->Name, Type, Flags); 641 OutputSections->push_back(OutSec); 642 } 643 } 644 645 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsAfterSorting() { 646 placeOrphanSections(); 647 648 // If output section command doesn't specify any segments, 649 // and we haven't previously assigned any section to segment, 650 // then we simply assign section to the very first load segment. 651 // Below is an example of such linker script: 652 // PHDRS { seg PT_LOAD; } 653 // SECTIONS { .aaa : { *(.aaa) } } 654 std::vector<StringRef> DefPhdrs; 655 auto FirstPtLoad = 656 std::find_if(Opt.PhdrsCommands.begin(), Opt.PhdrsCommands.end(), 657 [](const PhdrsCommand &Cmd) { return Cmd.Type == PT_LOAD; }); 658 if (FirstPtLoad != Opt.PhdrsCommands.end()) 659 DefPhdrs.push_back(FirstPtLoad->Name); 660 661 // Walk the commands and propagate the program headers to commands that don't 662 // explicitly specify them. 663 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 664 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 665 if (!Cmd) 666 continue; 667 if (Cmd->Phdrs.empty()) 668 Cmd->Phdrs = DefPhdrs; 669 else 670 DefPhdrs = Cmd->Phdrs; 671 } 672 673 removeEmptyCommands(); 674 } 675 676 // When placing orphan sections, we want to place them after symbol assignments 677 // so that an orphan after 678 // begin_foo = .; 679 // foo : { *(foo) } 680 // end_foo = .; 681 // doesn't break the intended meaning of the begin/end symbols. 682 // We don't want to go over sections since Writer<ELFT>::sortSections is the 683 // one in charge of deciding the order of the sections. 684 // We don't want to go over alignments, since doing so in 685 // rx_sec : { *(rx_sec) } 686 // . = ALIGN(0x1000); 687 // /* The RW PT_LOAD starts here*/ 688 // rw_sec : { *(rw_sec) } 689 // would mean that the RW PT_LOAD would become unaligned. 690 static bool shouldSkip(const BaseCommand &Cmd) { 691 if (isa<OutputSectionCommand>(Cmd)) 692 return false; 693 const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); 694 if (!Assign) 695 return true; 696 return Assign->Name != "."; 697 } 698 699 // Orphan sections are sections present in the input files which are not 700 // explicitly placed into the output file by the linker script. This just 701 // places them in the order already decided in OutputSections. 702 template <class ELFT> void LinkerScript<ELFT>::placeOrphanSections() { 703 // The OutputSections are already in the correct order. 704 // This loops creates or moves commands as needed so that they are in the 705 // correct order. 706 int CmdIndex = 0; 707 708 // As a horrible special case, skip the first . assignment if it is before any 709 // section. We do this because it is common to set a load address by starting 710 // the script with ". = 0xabcd" and the expectation is that every section is 711 // after that. 712 auto FirstSectionOrDotAssignment = 713 std::find_if(Opt.Commands.begin(), Opt.Commands.end(), 714 [](const std::unique_ptr<BaseCommand> &Cmd) { 715 if (isa<OutputSectionCommand>(*Cmd)) 716 return true; 717 const auto *Assign = dyn_cast<SymbolAssignment>(Cmd.get()); 718 if (!Assign) 719 return false; 720 return Assign->Name == "."; 721 }); 722 if (FirstSectionOrDotAssignment != Opt.Commands.end()) { 723 CmdIndex = FirstSectionOrDotAssignment - Opt.Commands.begin(); 724 if (isa<SymbolAssignment>(**FirstSectionOrDotAssignment)) 725 ++CmdIndex; 726 } 727 728 for (OutputSectionBase *Sec : *OutputSections) { 729 StringRef Name = Sec->getName(); 730 731 // Find the last spot where we can insert a command and still get the 732 // correct result. 733 auto CmdIter = Opt.Commands.begin() + CmdIndex; 734 auto E = Opt.Commands.end(); 735 while (CmdIter != E && shouldSkip(**CmdIter)) { 736 ++CmdIter; 737 ++CmdIndex; 738 } 739 740 auto Pos = 741 std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { 742 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 743 return Cmd && Cmd->Name == Name; 744 }); 745 if (Pos == E) { 746 Opt.Commands.insert(CmdIter, 747 llvm::make_unique<OutputSectionCommand>(Name)); 748 ++CmdIndex; 749 continue; 750 } 751 752 // Continue from where we found it. 753 CmdIndex = (Pos - Opt.Commands.begin()) + 1; 754 } 755 } 756 757 template <class ELFT> 758 void LinkerScript<ELFT>::assignAddresses(std::vector<PhdrEntry> &Phdrs) { 759 // Assign addresses as instructed by linker script SECTIONS sub-commands. 760 Dot = 0; 761 762 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 763 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { 764 if (Cmd->Name == ".") { 765 Dot = Cmd->Expression(Dot); 766 } else if (Cmd->Sym) { 767 assignSectionSymbol<ELFT>(Cmd, Dot); 768 } 769 continue; 770 } 771 772 if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { 773 Cmd->Expression(Dot); 774 continue; 775 } 776 777 auto *Cmd = cast<OutputSectionCommand>(Base.get()); 778 if (Cmd->AddrExpr) 779 Dot = Cmd->AddrExpr(Dot); 780 assignOffsets(Cmd); 781 } 782 783 uintX_t MinVA = std::numeric_limits<uintX_t>::max(); 784 for (OutputSectionBase *Sec : *OutputSections) { 785 if (Sec->Flags & SHF_ALLOC) 786 MinVA = std::min<uint64_t>(MinVA, Sec->Addr); 787 else 788 Sec->Addr = 0; 789 } 790 791 allocateHeaders<ELFT>(Phdrs, *OutputSections, MinVA); 792 } 793 794 // Creates program headers as instructed by PHDRS linker script command. 795 template <class ELFT> std::vector<PhdrEntry> LinkerScript<ELFT>::createPhdrs() { 796 std::vector<PhdrEntry> Ret; 797 798 // Process PHDRS and FILEHDR keywords because they are not 799 // real output sections and cannot be added in the following loop. 800 for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) { 801 Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags); 802 PhdrEntry &Phdr = Ret.back(); 803 804 if (Cmd.HasFilehdr) 805 Phdr.add(Out<ELFT>::ElfHeader); 806 if (Cmd.HasPhdrs) 807 Phdr.add(Out<ELFT>::ProgramHeaders); 808 809 if (Cmd.LMAExpr) { 810 Phdr.p_paddr = Cmd.LMAExpr(0); 811 Phdr.HasLMA = true; 812 } 813 } 814 815 // Add output sections to program headers. 816 for (OutputSectionBase *Sec : *OutputSections) { 817 if (!(Sec->Flags & SHF_ALLOC)) 818 break; 819 820 // Assign headers specified by linker script 821 for (size_t Id : getPhdrIndices(Sec->getName())) { 822 Ret[Id].add(Sec); 823 if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) 824 Ret[Id].p_flags |= Sec->getPhdrFlags(); 825 } 826 } 827 return Ret; 828 } 829 830 template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() { 831 // Ignore .interp section in case we have PHDRS specification 832 // and PT_INTERP isn't listed. 833 return !Opt.PhdrsCommands.empty() && 834 llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { 835 return Cmd.Type == PT_INTERP; 836 }) == Opt.PhdrsCommands.end(); 837 } 838 839 template <class ELFT> uint32_t LinkerScript<ELFT>::getFiller(StringRef Name) { 840 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 841 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 842 if (Cmd->Name == Name) 843 return Cmd->Filler; 844 return 0; 845 } 846 847 template <class ELFT> 848 static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { 849 const endianness E = ELFT::TargetEndianness; 850 851 switch (Size) { 852 case 1: 853 *Buf = (uint8_t)Data; 854 break; 855 case 2: 856 write16<E>(Buf, Data); 857 break; 858 case 4: 859 write32<E>(Buf, Data); 860 break; 861 case 8: 862 write64<E>(Buf, Data); 863 break; 864 default: 865 llvm_unreachable("unsupported Size argument"); 866 } 867 } 868 869 template <class ELFT> 870 void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) { 871 int I = getSectionIndex(Name); 872 if (I == INT_MAX) 873 return; 874 875 auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()); 876 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 877 if (auto *Data = dyn_cast<BytesDataCommand>(Base.get())) 878 writeInt<ELFT>(Buf + Data->Offset, Data->Expression(0), Data->Size); 879 } 880 881 template <class ELFT> bool LinkerScript<ELFT>::hasLMA(StringRef Name) { 882 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 883 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 884 if (Cmd->LMAExpr && Cmd->Name == Name) 885 return true; 886 return false; 887 } 888 889 // Returns the index of the given section name in linker script 890 // SECTIONS commands. Sections are laid out as the same order as they 891 // were in the script. If a given name did not appear in the script, 892 // it returns INT_MAX, so that it will be laid out at end of file. 893 template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) { 894 for (int I = 0, E = Opt.Commands.size(); I != E; ++I) 895 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get())) 896 if (Cmd->Name == Name) 897 return I; 898 return INT_MAX; 899 } 900 901 template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() { 902 return !Opt.PhdrsCommands.empty(); 903 } 904 905 template <class ELFT> 906 const OutputSectionBase *LinkerScript<ELFT>::getOutputSection(const Twine &Loc, 907 StringRef Name) { 908 static OutputSectionBase FakeSec("", 0, 0); 909 910 for (OutputSectionBase *Sec : *OutputSections) 911 if (Sec->getName() == Name) 912 return Sec; 913 914 error(Loc + ": undefined section " + Name); 915 return &FakeSec; 916 } 917 918 // This function is essentially the same as getOutputSection(Name)->Size, 919 // but it won't print out an error message if a given section is not found. 920 // 921 // Linker script does not create an output section if its content is empty. 922 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 923 // be empty. That is why this function is different from getOutputSection(). 924 template <class ELFT> 925 uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) { 926 for (OutputSectionBase *Sec : *OutputSections) 927 if (Sec->getName() == Name) 928 return Sec->Size; 929 return 0; 930 } 931 932 template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() { 933 return elf::getHeaderSize<ELFT>(); 934 } 935 936 template <class ELFT> 937 uint64_t LinkerScript<ELFT>::getSymbolValue(const Twine &Loc, StringRef S) { 938 if (SymbolBody *B = Symtab<ELFT>::X->find(S)) 939 return B->getVA<ELFT>(); 940 error(Loc + ": symbol not found: " + S); 941 return 0; 942 } 943 944 template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) { 945 return Symtab<ELFT>::X->find(S) != nullptr; 946 } 947 948 template <class ELFT> bool LinkerScript<ELFT>::isAbsolute(StringRef S) { 949 SymbolBody *Sym = Symtab<ELFT>::X->find(S); 950 auto *DR = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym); 951 return DR && !DR->Section; 952 } 953 954 // Gets section symbol belongs to. Symbol "." doesn't belong to any 955 // specific section but isn't absolute at the same time, so we try 956 // to find suitable section for it as well. 957 template <class ELFT> 958 const OutputSectionBase *LinkerScript<ELFT>::getSymbolSection(StringRef S) { 959 SymbolBody *Sym = Symtab<ELFT>::X->find(S); 960 if (!Sym) { 961 if (OutputSections->empty()) 962 return nullptr; 963 return CurOutSec ? CurOutSec : (*OutputSections)[0]; 964 } 965 966 return SymbolTableSection<ELFT>::getOutputSection(Sym); 967 } 968 969 // Returns indices of ELF headers containing specific section, identified 970 // by Name. Each index is a zero based number of ELF header listed within 971 // PHDRS {} script block. 972 template <class ELFT> 973 std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { 974 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 975 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 976 if (!Cmd || Cmd->Name != SectionName) 977 continue; 978 979 std::vector<size_t> Ret; 980 for (StringRef PhdrName : Cmd->Phdrs) 981 Ret.push_back(getPhdrIndex(Cmd->Location, PhdrName)); 982 return Ret; 983 } 984 return {}; 985 } 986 987 template <class ELFT> 988 size_t LinkerScript<ELFT>::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { 989 size_t I = 0; 990 for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { 991 if (Cmd.Name == PhdrName) 992 return I; 993 ++I; 994 } 995 error(Loc + ": section header '" + PhdrName + "' is not listed in PHDRS"); 996 return 0; 997 } 998 999 class elf::ScriptParser final : public ScriptParserBase { 1000 typedef void (ScriptParser::*Handler)(); 1001 1002 public: 1003 ScriptParser(MemoryBufferRef MB) 1004 : ScriptParserBase(MB), 1005 IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} 1006 1007 void readLinkerScript(); 1008 void readVersionScript(); 1009 void readDynamicList(); 1010 1011 private: 1012 void addFile(StringRef Path); 1013 1014 void readAsNeeded(); 1015 void readEntry(); 1016 void readExtern(); 1017 void readGroup(); 1018 void readInclude(); 1019 void readMemory(); 1020 void readOutput(); 1021 void readOutputArch(); 1022 void readOutputFormat(); 1023 void readPhdrs(); 1024 void readSearchDir(); 1025 void readSections(); 1026 void readVersion(); 1027 void readVersionScriptCommand(); 1028 1029 SymbolAssignment *readAssignment(StringRef Name); 1030 BytesDataCommand *readBytesDataCommand(StringRef Tok); 1031 uint32_t readFill(); 1032 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 1033 uint32_t readOutputSectionFiller(StringRef Tok); 1034 std::vector<StringRef> readOutputSectionPhdrs(); 1035 InputSectionDescription *readInputSectionDescription(StringRef Tok); 1036 StringMatcher readFilePatterns(); 1037 std::vector<SectionPattern> readInputSectionsList(); 1038 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 1039 unsigned readPhdrType(); 1040 SortSectionPolicy readSortKind(); 1041 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 1042 SymbolAssignment *readProvideOrAssignment(StringRef Tok); 1043 void readSort(); 1044 Expr readAssert(); 1045 1046 uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); 1047 std::pair<uint32_t, uint32_t> readMemoryAttributes(); 1048 1049 Expr readExpr(); 1050 Expr readExpr1(Expr Lhs, int MinPrec); 1051 StringRef readParenLiteral(); 1052 Expr readPrimary(); 1053 Expr readTernary(Expr Cond); 1054 Expr readParenExpr(); 1055 1056 // For parsing version script. 1057 std::vector<SymbolVersion> readVersionExtern(); 1058 void readAnonymousDeclaration(); 1059 void readVersionDeclaration(StringRef VerStr); 1060 std::vector<SymbolVersion> readSymbols(); 1061 void readLocals(); 1062 1063 ScriptConfiguration &Opt = *ScriptConfig; 1064 bool IsUnderSysroot; 1065 }; 1066 1067 void ScriptParser::readDynamicList() { 1068 expect("{"); 1069 readAnonymousDeclaration(); 1070 if (!atEOF()) 1071 setError("EOF expected, but got " + next()); 1072 } 1073 1074 void ScriptParser::readVersionScript() { 1075 readVersionScriptCommand(); 1076 if (!atEOF()) 1077 setError("EOF expected, but got " + next()); 1078 } 1079 1080 void ScriptParser::readVersionScriptCommand() { 1081 if (consume("{")) { 1082 readAnonymousDeclaration(); 1083 return; 1084 } 1085 1086 while (!atEOF() && !Error && peek() != "}") { 1087 StringRef VerStr = next(); 1088 if (VerStr == "{") { 1089 setError("anonymous version definition is used in " 1090 "combination with other version definitions"); 1091 return; 1092 } 1093 expect("{"); 1094 readVersionDeclaration(VerStr); 1095 } 1096 } 1097 1098 void ScriptParser::readVersion() { 1099 expect("{"); 1100 readVersionScriptCommand(); 1101 expect("}"); 1102 } 1103 1104 void ScriptParser::readLinkerScript() { 1105 while (!atEOF()) { 1106 StringRef Tok = next(); 1107 if (Tok == ";") 1108 continue; 1109 1110 if (Tok == "ASSERT") { 1111 Opt.Commands.emplace_back(new AssertCommand(readAssert())); 1112 } else if (Tok == "ENTRY") { 1113 readEntry(); 1114 } else if (Tok == "EXTERN") { 1115 readExtern(); 1116 } else if (Tok == "GROUP" || Tok == "INPUT") { 1117 readGroup(); 1118 } else if (Tok == "INCLUDE") { 1119 readInclude(); 1120 } else if (Tok == "MEMORY") { 1121 readMemory(); 1122 } else if (Tok == "OUTPUT") { 1123 readOutput(); 1124 } else if (Tok == "OUTPUT_ARCH") { 1125 readOutputArch(); 1126 } else if (Tok == "OUTPUT_FORMAT") { 1127 readOutputFormat(); 1128 } else if (Tok == "PHDRS") { 1129 readPhdrs(); 1130 } else if (Tok == "SEARCH_DIR") { 1131 readSearchDir(); 1132 } else if (Tok == "SECTIONS") { 1133 readSections(); 1134 } else if (Tok == "VERSION") { 1135 readVersion(); 1136 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { 1137 Opt.Commands.emplace_back(Cmd); 1138 } else { 1139 setError("unknown directive: " + Tok); 1140 } 1141 } 1142 } 1143 1144 void ScriptParser::addFile(StringRef S) { 1145 if (IsUnderSysroot && S.startswith("/")) { 1146 SmallString<128> PathData; 1147 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 1148 if (sys::fs::exists(Path)) { 1149 Driver->addFile(Saver.save(Path)); 1150 return; 1151 } 1152 } 1153 1154 if (sys::path::is_absolute(S)) { 1155 Driver->addFile(S); 1156 } else if (S.startswith("=")) { 1157 if (Config->Sysroot.empty()) 1158 Driver->addFile(S.substr(1)); 1159 else 1160 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); 1161 } else if (S.startswith("-l")) { 1162 Driver->addLibrary(S.substr(2)); 1163 } else if (sys::fs::exists(S)) { 1164 Driver->addFile(S); 1165 } else { 1166 if (Optional<std::string> Path = findFromSearchPaths(S)) 1167 Driver->addFile(Saver.save(*Path)); 1168 else 1169 setError("unable to find " + S); 1170 } 1171 } 1172 1173 void ScriptParser::readAsNeeded() { 1174 expect("("); 1175 bool Orig = Config->AsNeeded; 1176 Config->AsNeeded = true; 1177 while (!Error && !consume(")")) 1178 addFile(unquote(next())); 1179 Config->AsNeeded = Orig; 1180 } 1181 1182 void ScriptParser::readEntry() { 1183 // -e <symbol> takes predecence over ENTRY(<symbol>). 1184 expect("("); 1185 StringRef Tok = next(); 1186 if (Config->Entry.empty()) 1187 Config->Entry = Tok; 1188 expect(")"); 1189 } 1190 1191 void ScriptParser::readExtern() { 1192 expect("("); 1193 while (!Error && !consume(")")) 1194 Config->Undefined.push_back(next()); 1195 } 1196 1197 void ScriptParser::readGroup() { 1198 expect("("); 1199 while (!Error && !consume(")")) { 1200 StringRef Tok = next(); 1201 if (Tok == "AS_NEEDED") 1202 readAsNeeded(); 1203 else 1204 addFile(unquote(Tok)); 1205 } 1206 } 1207 1208 void ScriptParser::readInclude() { 1209 StringRef Tok = unquote(next()); 1210 1211 // https://sourceware.org/binutils/docs/ld/File-Commands.html: 1212 // The file will be searched for in the current directory, and in any 1213 // directory specified with the -L option. 1214 if (sys::fs::exists(Tok)) { 1215 if (Optional<MemoryBufferRef> MB = readFile(Tok)) 1216 tokenize(*MB); 1217 return; 1218 } 1219 if (Optional<std::string> Path = findFromSearchPaths(Tok)) { 1220 if (Optional<MemoryBufferRef> MB = readFile(*Path)) 1221 tokenize(*MB); 1222 return; 1223 } 1224 setError("cannot open " + Tok); 1225 } 1226 1227 void ScriptParser::readOutput() { 1228 // -o <file> takes predecence over OUTPUT(<file>). 1229 expect("("); 1230 StringRef Tok = next(); 1231 if (Config->OutputFile.empty()) 1232 Config->OutputFile = unquote(Tok); 1233 expect(")"); 1234 } 1235 1236 void ScriptParser::readOutputArch() { 1237 // Error checking only for now. 1238 expect("("); 1239 skip(); 1240 expect(")"); 1241 } 1242 1243 void ScriptParser::readOutputFormat() { 1244 // Error checking only for now. 1245 expect("("); 1246 skip(); 1247 StringRef Tok = next(); 1248 if (Tok == ")") 1249 return; 1250 if (Tok != ",") { 1251 setError("unexpected token: " + Tok); 1252 return; 1253 } 1254 skip(); 1255 expect(","); 1256 skip(); 1257 expect(")"); 1258 } 1259 1260 void ScriptParser::readPhdrs() { 1261 expect("{"); 1262 while (!Error && !consume("}")) { 1263 StringRef Tok = next(); 1264 Opt.PhdrsCommands.push_back( 1265 {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); 1266 PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back(); 1267 1268 PhdrCmd.Type = readPhdrType(); 1269 do { 1270 Tok = next(); 1271 if (Tok == ";") 1272 break; 1273 if (Tok == "FILEHDR") 1274 PhdrCmd.HasFilehdr = true; 1275 else if (Tok == "PHDRS") 1276 PhdrCmd.HasPhdrs = true; 1277 else if (Tok == "AT") 1278 PhdrCmd.LMAExpr = readParenExpr(); 1279 else if (Tok == "FLAGS") { 1280 expect("("); 1281 // Passing 0 for the value of dot is a bit of a hack. It means that 1282 // we accept expressions like ".|1". 1283 PhdrCmd.Flags = readExpr()(0); 1284 expect(")"); 1285 } else 1286 setError("unexpected header attribute: " + Tok); 1287 } while (!Error); 1288 } 1289 } 1290 1291 void ScriptParser::readSearchDir() { 1292 expect("("); 1293 StringRef Tok = next(); 1294 if (!Config->Nostdlib) 1295 Config->SearchPaths.push_back(unquote(Tok)); 1296 expect(")"); 1297 } 1298 1299 void ScriptParser::readSections() { 1300 Opt.HasSections = true; 1301 // -no-rosegment is used to avoid placing read only non-executable sections in 1302 // their own segment. We do the same if SECTIONS command is present in linker 1303 // script. See comment for computeFlags(). 1304 Config->SingleRoRx = true; 1305 1306 expect("{"); 1307 while (!Error && !consume("}")) { 1308 StringRef Tok = next(); 1309 BaseCommand *Cmd = readProvideOrAssignment(Tok); 1310 if (!Cmd) { 1311 if (Tok == "ASSERT") 1312 Cmd = new AssertCommand(readAssert()); 1313 else 1314 Cmd = readOutputSectionDescription(Tok); 1315 } 1316 Opt.Commands.emplace_back(Cmd); 1317 } 1318 } 1319 1320 static int precedence(StringRef Op) { 1321 return StringSwitch<int>(Op) 1322 .Cases("*", "/", 5) 1323 .Cases("+", "-", 4) 1324 .Cases("<<", ">>", 3) 1325 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 1326 .Cases("&", "|", 1) 1327 .Default(-1); 1328 } 1329 1330 StringMatcher ScriptParser::readFilePatterns() { 1331 std::vector<StringRef> V; 1332 while (!Error && !consume(")")) 1333 V.push_back(next()); 1334 return StringMatcher(V); 1335 } 1336 1337 SortSectionPolicy ScriptParser::readSortKind() { 1338 if (consume("SORT") || consume("SORT_BY_NAME")) 1339 return SortSectionPolicy::Name; 1340 if (consume("SORT_BY_ALIGNMENT")) 1341 return SortSectionPolicy::Alignment; 1342 if (consume("SORT_BY_INIT_PRIORITY")) 1343 return SortSectionPolicy::Priority; 1344 if (consume("SORT_NONE")) 1345 return SortSectionPolicy::None; 1346 return SortSectionPolicy::Default; 1347 } 1348 1349 // Method reads a list of sequence of excluded files and section globs given in 1350 // a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ 1351 // Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) 1352 // The semantics of that is next: 1353 // * Include .foo.1 from every file. 1354 // * Include .foo.2 from every file but a.o 1355 // * Include .foo.3 from every file but b.o 1356 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 1357 std::vector<SectionPattern> Ret; 1358 while (!Error && peek() != ")") { 1359 StringMatcher ExcludeFilePat; 1360 if (consume("EXCLUDE_FILE")) { 1361 expect("("); 1362 ExcludeFilePat = readFilePatterns(); 1363 } 1364 1365 std::vector<StringRef> V; 1366 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 1367 V.push_back(next()); 1368 1369 if (!V.empty()) 1370 Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); 1371 else 1372 setError("section pattern is expected"); 1373 } 1374 return Ret; 1375 } 1376 1377 // Reads contents of "SECTIONS" directive. That directive contains a 1378 // list of glob patterns for input sections. The grammar is as follows. 1379 // 1380 // <patterns> ::= <section-list> 1381 // | <sort> "(" <section-list> ")" 1382 // | <sort> "(" <sort> "(" <section-list> ")" ")" 1383 // 1384 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 1385 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 1386 // 1387 // <section-list> is parsed by readInputSectionsList(). 1388 InputSectionDescription * 1389 ScriptParser::readInputSectionRules(StringRef FilePattern) { 1390 auto *Cmd = new InputSectionDescription(FilePattern); 1391 expect("("); 1392 while (!Error && !consume(")")) { 1393 SortSectionPolicy Outer = readSortKind(); 1394 SortSectionPolicy Inner = SortSectionPolicy::Default; 1395 std::vector<SectionPattern> V; 1396 if (Outer != SortSectionPolicy::Default) { 1397 expect("("); 1398 Inner = readSortKind(); 1399 if (Inner != SortSectionPolicy::Default) { 1400 expect("("); 1401 V = readInputSectionsList(); 1402 expect(")"); 1403 } else { 1404 V = readInputSectionsList(); 1405 } 1406 expect(")"); 1407 } else { 1408 V = readInputSectionsList(); 1409 } 1410 1411 for (SectionPattern &Pat : V) { 1412 Pat.SortInner = Inner; 1413 Pat.SortOuter = Outer; 1414 } 1415 1416 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 1417 } 1418 return Cmd; 1419 } 1420 1421 InputSectionDescription * 1422 ScriptParser::readInputSectionDescription(StringRef Tok) { 1423 // Input section wildcard can be surrounded by KEEP. 1424 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 1425 if (Tok == "KEEP") { 1426 expect("("); 1427 StringRef FilePattern = next(); 1428 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 1429 expect(")"); 1430 Opt.KeptSections.push_back(Cmd); 1431 return Cmd; 1432 } 1433 return readInputSectionRules(Tok); 1434 } 1435 1436 void ScriptParser::readSort() { 1437 expect("("); 1438 expect("CONSTRUCTORS"); 1439 expect(")"); 1440 } 1441 1442 Expr ScriptParser::readAssert() { 1443 expect("("); 1444 Expr E = readExpr(); 1445 expect(","); 1446 StringRef Msg = unquote(next()); 1447 expect(")"); 1448 return [=](uint64_t Dot) { 1449 uint64_t V = E(Dot); 1450 if (!V) 1451 error(Msg); 1452 return V; 1453 }; 1454 } 1455 1456 // Reads a FILL(expr) command. We handle the FILL command as an 1457 // alias for =fillexp section attribute, which is different from 1458 // what GNU linkers do. 1459 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 1460 uint32_t ScriptParser::readFill() { 1461 expect("("); 1462 uint32_t V = readOutputSectionFiller(next()); 1463 expect(")"); 1464 expect(";"); 1465 return V; 1466 } 1467 1468 OutputSectionCommand * 1469 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 1470 OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); 1471 Cmd->Location = getCurrentLocation(); 1472 1473 // Read an address expression. 1474 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address 1475 if (peek() != ":") 1476 Cmd->AddrExpr = readExpr(); 1477 1478 expect(":"); 1479 1480 if (consume("AT")) 1481 Cmd->LMAExpr = readParenExpr(); 1482 if (consume("ALIGN")) 1483 Cmd->AlignExpr = readParenExpr(); 1484 if (consume("SUBALIGN")) 1485 Cmd->SubalignExpr = readParenExpr(); 1486 1487 // Parse constraints. 1488 if (consume("ONLY_IF_RO")) 1489 Cmd->Constraint = ConstraintKind::ReadOnly; 1490 if (consume("ONLY_IF_RW")) 1491 Cmd->Constraint = ConstraintKind::ReadWrite; 1492 expect("{"); 1493 1494 while (!Error && !consume("}")) { 1495 StringRef Tok = next(); 1496 if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok)) { 1497 Cmd->Commands.emplace_back(Assignment); 1498 } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { 1499 Cmd->Commands.emplace_back(Data); 1500 } else if (Tok == "ASSERT") { 1501 Cmd->Commands.emplace_back(new AssertCommand(readAssert())); 1502 expect(";"); 1503 } else if (Tok == "CONSTRUCTORS") { 1504 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 1505 // by name. This is for very old file formats such as ECOFF/XCOFF. 1506 // For ELF, we should ignore. 1507 } else if (Tok == "FILL") { 1508 Cmd->Filler = readFill(); 1509 } else if (Tok == "SORT") { 1510 readSort(); 1511 } else if (peek() == "(") { 1512 Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); 1513 } else { 1514 setError("unknown command " + Tok); 1515 } 1516 } 1517 1518 if (consume(">")) 1519 Cmd->MemoryRegionName = next(); 1520 1521 Cmd->Phdrs = readOutputSectionPhdrs(); 1522 1523 if (consume("=")) 1524 Cmd->Filler = readOutputSectionFiller(next()); 1525 else if (peek().startswith("=")) 1526 Cmd->Filler = readOutputSectionFiller(next().drop_front()); 1527 1528 // Consume optional comma following output section command. 1529 consume(","); 1530 1531 return Cmd; 1532 } 1533 1534 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number. 1535 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 1536 // 1537 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles 1538 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them 1539 // as 32-bit big-endian values. We will do the same as ld.gold does 1540 // because it's simpler than what ld.bfd does. 1541 uint32_t ScriptParser::readOutputSectionFiller(StringRef Tok) { 1542 uint32_t V; 1543 if (!Tok.getAsInteger(0, V)) 1544 return V; 1545 setError("invalid filler expression: " + Tok); 1546 return 0; 1547 } 1548 1549 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 1550 expect("("); 1551 SymbolAssignment *Cmd = readAssignment(next()); 1552 Cmd->Provide = Provide; 1553 Cmd->Hidden = Hidden; 1554 expect(")"); 1555 expect(";"); 1556 return Cmd; 1557 } 1558 1559 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { 1560 SymbolAssignment *Cmd = nullptr; 1561 if (peek() == "=" || peek() == "+=") { 1562 Cmd = readAssignment(Tok); 1563 expect(";"); 1564 } else if (Tok == "PROVIDE") { 1565 Cmd = readProvideHidden(true, false); 1566 } else if (Tok == "HIDDEN") { 1567 Cmd = readProvideHidden(false, true); 1568 } else if (Tok == "PROVIDE_HIDDEN") { 1569 Cmd = readProvideHidden(true, true); 1570 } 1571 return Cmd; 1572 } 1573 1574 static uint64_t getSymbolValue(const Twine &Loc, StringRef S, uint64_t Dot) { 1575 if (S == ".") 1576 return Dot; 1577 return ScriptBase->getSymbolValue(Loc, S); 1578 } 1579 1580 static bool isAbsolute(StringRef S) { 1581 if (S == ".") 1582 return false; 1583 return ScriptBase->isAbsolute(S); 1584 } 1585 1586 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 1587 StringRef Op = next(); 1588 Expr E; 1589 assert(Op == "=" || Op == "+="); 1590 if (consume("ABSOLUTE")) { 1591 // The RHS may be something like "ABSOLUTE(.) & 0xff". 1592 // Call readExpr1 to read the whole expression. 1593 E = readExpr1(readParenExpr(), 0); 1594 E.IsAbsolute = [] { return true; }; 1595 } else { 1596 E = readExpr(); 1597 } 1598 if (Op == "+=") { 1599 std::string Loc = getCurrentLocation(); 1600 E = [=](uint64_t Dot) { 1601 return getSymbolValue(Loc, Name, Dot) + E(Dot); 1602 }; 1603 } 1604 return new SymbolAssignment(Name, E); 1605 } 1606 1607 // This is an operator-precedence parser to parse a linker 1608 // script expression. 1609 Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); } 1610 1611 static Expr combine(StringRef Op, Expr L, Expr R) { 1612 if (Op == "*") 1613 return [=](uint64_t Dot) { return L(Dot) * R(Dot); }; 1614 if (Op == "/") { 1615 return [=](uint64_t Dot) -> uint64_t { 1616 uint64_t RHS = R(Dot); 1617 if (RHS == 0) { 1618 error("division by zero"); 1619 return 0; 1620 } 1621 return L(Dot) / RHS; 1622 }; 1623 } 1624 if (Op == "+") 1625 return {[=](uint64_t Dot) { return L(Dot) + R(Dot); }, 1626 [=] { return L.IsAbsolute() && R.IsAbsolute(); }, 1627 [=] { 1628 const OutputSectionBase *S = L.Section(); 1629 return S ? S : R.Section(); 1630 }}; 1631 if (Op == "-") 1632 return [=](uint64_t Dot) { return L(Dot) - R(Dot); }; 1633 if (Op == "<<") 1634 return [=](uint64_t Dot) { return L(Dot) << R(Dot); }; 1635 if (Op == ">>") 1636 return [=](uint64_t Dot) { return L(Dot) >> R(Dot); }; 1637 if (Op == "<") 1638 return [=](uint64_t Dot) { return L(Dot) < R(Dot); }; 1639 if (Op == ">") 1640 return [=](uint64_t Dot) { return L(Dot) > R(Dot); }; 1641 if (Op == ">=") 1642 return [=](uint64_t Dot) { return L(Dot) >= R(Dot); }; 1643 if (Op == "<=") 1644 return [=](uint64_t Dot) { return L(Dot) <= R(Dot); }; 1645 if (Op == "==") 1646 return [=](uint64_t Dot) { return L(Dot) == R(Dot); }; 1647 if (Op == "!=") 1648 return [=](uint64_t Dot) { return L(Dot) != R(Dot); }; 1649 if (Op == "&") 1650 return [=](uint64_t Dot) { return L(Dot) & R(Dot); }; 1651 if (Op == "|") 1652 return [=](uint64_t Dot) { return L(Dot) | R(Dot); }; 1653 llvm_unreachable("invalid operator"); 1654 } 1655 1656 // This is a part of the operator-precedence parser. This function 1657 // assumes that the remaining token stream starts with an operator. 1658 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 1659 while (!atEOF() && !Error) { 1660 // Read an operator and an expression. 1661 if (consume("?")) 1662 return readTernary(Lhs); 1663 StringRef Op1 = peek(); 1664 if (precedence(Op1) < MinPrec) 1665 break; 1666 skip(); 1667 Expr Rhs = readPrimary(); 1668 1669 // Evaluate the remaining part of the expression first if the 1670 // next operator has greater precedence than the previous one. 1671 // For example, if we have read "+" and "3", and if the next 1672 // operator is "*", then we'll evaluate 3 * ... part first. 1673 while (!atEOF()) { 1674 StringRef Op2 = peek(); 1675 if (precedence(Op2) <= precedence(Op1)) 1676 break; 1677 Rhs = readExpr1(Rhs, precedence(Op2)); 1678 } 1679 1680 Lhs = combine(Op1, Lhs, Rhs); 1681 } 1682 return Lhs; 1683 } 1684 1685 uint64_t static getConstant(StringRef S) { 1686 if (S == "COMMONPAGESIZE") 1687 return Target->PageSize; 1688 if (S == "MAXPAGESIZE") 1689 return Config->MaxPageSize; 1690 error("unknown constant: " + S); 1691 return 0; 1692 } 1693 1694 // Parses Tok as an integer. Returns true if successful. 1695 // It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") 1696 // and decimal numbers. Decimal numbers may have "K" (kilo) or 1697 // "M" (mega) prefixes. 1698 static bool readInteger(StringRef Tok, uint64_t &Result) { 1699 // Negative number 1700 if (Tok.startswith("-")) { 1701 if (!readInteger(Tok.substr(1), Result)) 1702 return false; 1703 Result = -Result; 1704 return true; 1705 } 1706 1707 // Hexadecimal 1708 if (Tok.startswith_lower("0x")) 1709 return !Tok.substr(2).getAsInteger(16, Result); 1710 if (Tok.endswith_lower("H")) 1711 return !Tok.drop_back().getAsInteger(16, Result); 1712 1713 // Decimal 1714 int Suffix = 1; 1715 if (Tok.endswith_lower("K")) { 1716 Suffix = 1024; 1717 Tok = Tok.drop_back(); 1718 } else if (Tok.endswith_lower("M")) { 1719 Suffix = 1024 * 1024; 1720 Tok = Tok.drop_back(); 1721 } 1722 if (Tok.getAsInteger(10, Result)) 1723 return false; 1724 Result *= Suffix; 1725 return true; 1726 } 1727 1728 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 1729 int Size = StringSwitch<unsigned>(Tok) 1730 .Case("BYTE", 1) 1731 .Case("SHORT", 2) 1732 .Case("LONG", 4) 1733 .Case("QUAD", 8) 1734 .Default(-1); 1735 if (Size == -1) 1736 return nullptr; 1737 1738 return new BytesDataCommand(readParenExpr(), Size); 1739 } 1740 1741 StringRef ScriptParser::readParenLiteral() { 1742 expect("("); 1743 StringRef Tok = next(); 1744 expect(")"); 1745 return Tok; 1746 } 1747 1748 Expr ScriptParser::readPrimary() { 1749 if (peek() == "(") 1750 return readParenExpr(); 1751 1752 StringRef Tok = next(); 1753 std::string Location = getCurrentLocation(); 1754 1755 if (Tok == "~") { 1756 Expr E = readPrimary(); 1757 return [=](uint64_t Dot) { return ~E(Dot); }; 1758 } 1759 if (Tok == "-") { 1760 Expr E = readPrimary(); 1761 return [=](uint64_t Dot) { return -E(Dot); }; 1762 } 1763 1764 // Built-in functions are parsed here. 1765 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 1766 if (Tok == "ADDR") { 1767 StringRef Name = readParenLiteral(); 1768 return {[=](uint64_t Dot) { 1769 return ScriptBase->getOutputSection(Location, Name)->Addr; 1770 }, 1771 [=] { return false; }, 1772 [=] { return ScriptBase->getOutputSection(Location, Name); }}; 1773 } 1774 if (Tok == "LOADADDR") { 1775 StringRef Name = readParenLiteral(); 1776 return [=](uint64_t Dot) { 1777 return ScriptBase->getOutputSection(Location, Name)->getLMA(); 1778 }; 1779 } 1780 if (Tok == "ASSERT") 1781 return readAssert(); 1782 if (Tok == "ALIGN") { 1783 expect("("); 1784 Expr E = readExpr(); 1785 if (consume(",")) { 1786 Expr E2 = readExpr(); 1787 expect(")"); 1788 return [=](uint64_t Dot) { return alignTo(E(Dot), E2(Dot)); }; 1789 } 1790 expect(")"); 1791 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1792 } 1793 if (Tok == "CONSTANT") { 1794 StringRef Name = readParenLiteral(); 1795 return [=](uint64_t Dot) { return getConstant(Name); }; 1796 } 1797 if (Tok == "DEFINED") { 1798 StringRef Name = readParenLiteral(); 1799 return [=](uint64_t Dot) { return ScriptBase->isDefined(Name) ? 1 : 0; }; 1800 } 1801 if (Tok == "SEGMENT_START") { 1802 expect("("); 1803 skip(); 1804 expect(","); 1805 Expr E = readExpr(); 1806 expect(")"); 1807 return [=](uint64_t Dot) { return E(Dot); }; 1808 } 1809 if (Tok == "DATA_SEGMENT_ALIGN") { 1810 expect("("); 1811 Expr E = readExpr(); 1812 expect(","); 1813 readExpr(); 1814 expect(")"); 1815 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1816 } 1817 if (Tok == "DATA_SEGMENT_END") { 1818 expect("("); 1819 expect("."); 1820 expect(")"); 1821 return [](uint64_t Dot) { return Dot; }; 1822 } 1823 // GNU linkers implements more complicated logic to handle 1824 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to 1825 // the next page boundary for simplicity. 1826 if (Tok == "DATA_SEGMENT_RELRO_END") { 1827 expect("("); 1828 readExpr(); 1829 expect(","); 1830 readExpr(); 1831 expect(")"); 1832 return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); }; 1833 } 1834 if (Tok == "SIZEOF") { 1835 StringRef Name = readParenLiteral(); 1836 return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); }; 1837 } 1838 if (Tok == "ALIGNOF") { 1839 StringRef Name = readParenLiteral(); 1840 return [=](uint64_t Dot) { 1841 return ScriptBase->getOutputSection(Location, Name)->Addralign; 1842 }; 1843 } 1844 if (Tok == "SIZEOF_HEADERS") 1845 return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); }; 1846 1847 // Tok is a literal number. 1848 uint64_t V; 1849 if (readInteger(Tok, V)) 1850 return [=](uint64_t Dot) { return V; }; 1851 1852 // Tok is a symbol name. 1853 if (Tok != "." && !isValidCIdentifier(Tok)) 1854 setError("malformed number: " + Tok); 1855 return {[=](uint64_t Dot) { return getSymbolValue(Location, Tok, Dot); }, 1856 [=] { return isAbsolute(Tok); }, 1857 [=] { return ScriptBase->getSymbolSection(Tok); }}; 1858 } 1859 1860 Expr ScriptParser::readTernary(Expr Cond) { 1861 Expr L = readExpr(); 1862 expect(":"); 1863 Expr R = readExpr(); 1864 return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); }; 1865 } 1866 1867 Expr ScriptParser::readParenExpr() { 1868 expect("("); 1869 Expr E = readExpr(); 1870 expect(")"); 1871 return E; 1872 } 1873 1874 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1875 std::vector<StringRef> Phdrs; 1876 while (!Error && peek().startswith(":")) { 1877 StringRef Tok = next(); 1878 Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); 1879 } 1880 return Phdrs; 1881 } 1882 1883 // Read a program header type name. The next token must be a 1884 // name of a program header type or a constant (e.g. "0x3"). 1885 unsigned ScriptParser::readPhdrType() { 1886 StringRef Tok = next(); 1887 uint64_t Val; 1888 if (readInteger(Tok, Val)) 1889 return Val; 1890 1891 unsigned Ret = StringSwitch<unsigned>(Tok) 1892 .Case("PT_NULL", PT_NULL) 1893 .Case("PT_LOAD", PT_LOAD) 1894 .Case("PT_DYNAMIC", PT_DYNAMIC) 1895 .Case("PT_INTERP", PT_INTERP) 1896 .Case("PT_NOTE", PT_NOTE) 1897 .Case("PT_SHLIB", PT_SHLIB) 1898 .Case("PT_PHDR", PT_PHDR) 1899 .Case("PT_TLS", PT_TLS) 1900 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1901 .Case("PT_GNU_STACK", PT_GNU_STACK) 1902 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1903 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1904 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1905 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1906 .Default(-1); 1907 1908 if (Ret == (unsigned)-1) { 1909 setError("invalid program header type: " + Tok); 1910 return PT_NULL; 1911 } 1912 return Ret; 1913 } 1914 1915 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1916 void ScriptParser::readAnonymousDeclaration() { 1917 // Read global symbols first. "global:" is default, so if there's 1918 // no label, we assume global symbols. 1919 if (consume("global:") || peek() != "local:") 1920 Config->VersionScriptGlobals = readSymbols(); 1921 1922 readLocals(); 1923 expect("}"); 1924 expect(";"); 1925 } 1926 1927 void ScriptParser::readLocals() { 1928 if (!consume("local:")) 1929 return; 1930 std::vector<SymbolVersion> Locals = readSymbols(); 1931 for (SymbolVersion V : Locals) { 1932 if (V.Name == "*") { 1933 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1934 continue; 1935 } 1936 Config->VersionScriptLocals.push_back(V); 1937 } 1938 } 1939 1940 // Reads a list of symbols, e.g. "VerStr { global: foo; bar; local: *; };". 1941 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1942 // Identifiers start at 2 because 0 and 1 are reserved 1943 // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants. 1944 uint16_t VersionId = Config->VersionDefinitions.size() + 2; 1945 Config->VersionDefinitions.push_back({VerStr, VersionId}); 1946 1947 // Read global symbols. 1948 if (consume("global:") || peek() != "local:") 1949 Config->VersionDefinitions.back().Globals = readSymbols(); 1950 1951 readLocals(); 1952 expect("}"); 1953 1954 // Each version may have a parent version. For example, "Ver2" 1955 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1956 // as a parent. This version hierarchy is, probably against your 1957 // instinct, purely for hint; the runtime doesn't care about it 1958 // at all. In LLD, we simply ignore it. 1959 if (peek() != ";") 1960 skip(); 1961 expect(";"); 1962 } 1963 1964 // Reads a list of symbols for a versions cript. 1965 std::vector<SymbolVersion> ScriptParser::readSymbols() { 1966 std::vector<SymbolVersion> Ret; 1967 for (;;) { 1968 if (consume("extern")) { 1969 for (SymbolVersion V : readVersionExtern()) 1970 Ret.push_back(V); 1971 continue; 1972 } 1973 1974 if (peek() == "}" || peek() == "local:" || Error) 1975 break; 1976 StringRef Tok = next(); 1977 Ret.push_back({unquote(Tok), false, hasWildcard(Tok)}); 1978 expect(";"); 1979 } 1980 return Ret; 1981 } 1982 1983 // Reads an "extern C++" directive, e.g., 1984 // "extern "C++" { ns::*; "f(int, double)"; };" 1985 std::vector<SymbolVersion> ScriptParser::readVersionExtern() { 1986 StringRef Tok = next(); 1987 bool IsCXX = Tok == "\"C++\""; 1988 if (!IsCXX && Tok != "\"C\"") 1989 setError("Unknown language"); 1990 expect("{"); 1991 1992 std::vector<SymbolVersion> Ret; 1993 while (!Error && peek() != "}") { 1994 StringRef Tok = next(); 1995 bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); 1996 Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); 1997 expect(";"); 1998 } 1999 2000 expect("}"); 2001 expect(";"); 2002 return Ret; 2003 } 2004 2005 uint64_t ScriptParser::readMemoryAssignment( 2006 StringRef S1, StringRef S2, StringRef S3) { 2007 if (!(consume(S1) || consume(S2) || consume(S3))) { 2008 setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); 2009 return 0; 2010 } 2011 expect("="); 2012 2013 // TODO: Fully support constant expressions. 2014 uint64_t Val; 2015 if (!readInteger(next(), Val)) 2016 setError("nonconstant expression for "+ S1); 2017 return Val; 2018 } 2019 2020 // Parse the MEMORY command as specified in: 2021 // https://sourceware.org/binutils/docs/ld/MEMORY.html 2022 // 2023 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 2024 void ScriptParser::readMemory() { 2025 expect("{"); 2026 while (!Error && !consume("}")) { 2027 StringRef Name = next(); 2028 2029 uint32_t Flags = 0; 2030 uint32_t NegFlags = 0; 2031 if (consume("(")) { 2032 std::tie(Flags, NegFlags) = readMemoryAttributes(); 2033 expect(")"); 2034 } 2035 expect(":"); 2036 2037 uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); 2038 expect(","); 2039 uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); 2040 2041 // Add the memory region to the region map (if it doesn't already exist). 2042 auto It = Opt.MemoryRegions.find(Name); 2043 if (It != Opt.MemoryRegions.end()) 2044 setError("region '" + Name + "' already defined"); 2045 else 2046 Opt.MemoryRegions[Name] = {Name, Origin, Length, Origin, Flags, NegFlags}; 2047 } 2048 } 2049 2050 // This function parses the attributes used to match against section 2051 // flags when placing output sections in a memory region. These flags 2052 // are only used when an explicit memory region name is not used. 2053 std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { 2054 uint32_t Flags = 0; 2055 uint32_t NegFlags = 0; 2056 bool Invert = false; 2057 2058 for (char C : next().lower()) { 2059 uint32_t Flag = 0; 2060 if (C == '!') 2061 Invert = !Invert; 2062 else if (C == 'w') 2063 Flag = SHF_WRITE; 2064 else if (C == 'x') 2065 Flag = SHF_EXECINSTR; 2066 else if (C == 'a') 2067 Flag = SHF_ALLOC; 2068 else if (C != 'r') 2069 setError("invalid memory region attribute"); 2070 2071 if (Invert) 2072 NegFlags |= Flag; 2073 else 2074 Flags |= Flag; 2075 } 2076 return {Flags, NegFlags}; 2077 } 2078 2079 void elf::readLinkerScript(MemoryBufferRef MB) { 2080 ScriptParser(MB).readLinkerScript(); 2081 } 2082 2083 void elf::readVersionScript(MemoryBufferRef MB) { 2084 ScriptParser(MB).readVersionScript(); 2085 } 2086 2087 void elf::readDynamicList(MemoryBufferRef MB) { 2088 ScriptParser(MB).readDynamicList(); 2089 } 2090 2091 template class elf::LinkerScript<ELF32LE>; 2092 template class elf::LinkerScript<ELF32BE>; 2093 template class elf::LinkerScript<ELF64LE>; 2094 template class elf::LinkerScript<ELF64BE>; 2095