1 //===- LinkerScript.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the parser/evaluator of the linker script. 11 // It parses a linker script and write the result to Config or ScriptConfig 12 // objects. 13 // 14 // If SECTIONS command is used, a ScriptConfig contains an AST 15 // of the command which will later be consumed by createSections() and 16 // assignAddresses(). 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "LinkerScript.h" 21 #include "Config.h" 22 #include "Driver.h" 23 #include "InputSection.h" 24 #include "OutputSections.h" 25 #include "ScriptParser.h" 26 #include "Strings.h" 27 #include "Symbols.h" 28 #include "SymbolTable.h" 29 #include "Target.h" 30 #include "Writer.h" 31 #include "llvm/ADT/StringSwitch.h" 32 #include "llvm/Support/ELF.h" 33 #include "llvm/Support/FileSystem.h" 34 #include "llvm/Support/MemoryBuffer.h" 35 #include "llvm/Support/Path.h" 36 #include "llvm/Support/StringSaver.h" 37 38 using namespace llvm; 39 using namespace llvm::ELF; 40 using namespace llvm::object; 41 using namespace llvm::support::endian; 42 using namespace lld; 43 using namespace lld::elf; 44 45 LinkerScriptBase *elf::ScriptBase; 46 ScriptConfiguration *elf::ScriptConfig; 47 48 template <class ELFT> static void addRegular(SymbolAssignment *Cmd) { 49 Symbol *Sym = Symtab<ELFT>::X->addRegular(Cmd->Name, STB_GLOBAL, STV_DEFAULT); 50 Sym->Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 51 Cmd->Sym = Sym->body(); 52 53 // If we have no SECTIONS then we don't have '.' and don't call 54 // assignAddresses(). We calculate symbol value immediately in this case. 55 if (!ScriptConfig->HasSections) 56 cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(0); 57 } 58 59 template <class ELFT> static void addSynthetic(SymbolAssignment *Cmd) { 60 Symbol *Sym = Symtab<ELFT>::X->addSynthetic( 61 Cmd->Name, nullptr, 0, Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT); 62 Cmd->Sym = Sym->body(); 63 } 64 65 template <class ELFT> static void addSymbol(SymbolAssignment *Cmd) { 66 if (Cmd->IsAbsolute) 67 addRegular<ELFT>(Cmd); 68 else 69 addSynthetic<ELFT>(Cmd); 70 } 71 // If a symbol was in PROVIDE(), we need to define it only when 72 // it is an undefined symbol. 73 template <class ELFT> static bool shouldDefine(SymbolAssignment *Cmd) { 74 if (Cmd->Name == ".") 75 return false; 76 if (!Cmd->Provide) 77 return true; 78 SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name); 79 return B && B->isUndefined(); 80 } 81 82 bool SymbolAssignment::classof(const BaseCommand *C) { 83 return C->Kind == AssignmentKind; 84 } 85 86 bool OutputSectionCommand::classof(const BaseCommand *C) { 87 return C->Kind == OutputSectionKind; 88 } 89 90 bool InputSectionDescription::classof(const BaseCommand *C) { 91 return C->Kind == InputSectionKind; 92 } 93 94 bool AssertCommand::classof(const BaseCommand *C) { 95 return C->Kind == AssertKind; 96 } 97 98 bool BytesDataCommand::classof(const BaseCommand *C) { 99 return C->Kind == BytesDataKind; 100 } 101 102 template <class ELFT> static bool isDiscarded(InputSectionBase<ELFT> *S) { 103 return !S || !S->Live; 104 } 105 106 template <class ELFT> LinkerScript<ELFT>::LinkerScript() {} 107 template <class ELFT> LinkerScript<ELFT>::~LinkerScript() {} 108 109 template <class ELFT> 110 bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { 111 for (InputSectionDescription *ID : Opt.KeptSections) { 112 StringRef Filename = S->getFile()->getName(); 113 if (!ID->FileRe.match(sys::path::filename(Filename))) 114 continue; 115 116 for (SectionPattern &P : ID->SectionPatterns) 117 if (P.SectionRe.match(S->Name)) 118 return true; 119 } 120 return false; 121 } 122 123 static bool comparePriority(InputSectionData *A, InputSectionData *B) { 124 return getPriority(A->Name) < getPriority(B->Name); 125 } 126 127 static bool compareName(InputSectionData *A, InputSectionData *B) { 128 return A->Name < B->Name; 129 } 130 131 static bool compareAlignment(InputSectionData *A, InputSectionData *B) { 132 // ">" is not a mistake. Larger alignments are placed before smaller 133 // alignments in order to reduce the amount of padding necessary. 134 // This is compatible with GNU. 135 return A->Alignment > B->Alignment; 136 } 137 138 static std::function<bool(InputSectionData *, InputSectionData *)> 139 getComparator(SortSectionPolicy K) { 140 switch (K) { 141 case SortSectionPolicy::Alignment: 142 return compareAlignment; 143 case SortSectionPolicy::Name: 144 return compareName; 145 case SortSectionPolicy::Priority: 146 return comparePriority; 147 default: 148 llvm_unreachable("unknown sort policy"); 149 } 150 } 151 152 template <class ELFT> 153 static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections, 154 ConstraintKind Kind) { 155 if (Kind == ConstraintKind::NoConstraint) 156 return true; 157 bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) { 158 auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2); 159 return Sec->getSectionHdr()->sh_flags & SHF_WRITE; 160 }); 161 return (IsRW && Kind == ConstraintKind::ReadWrite) || 162 (!IsRW && Kind == ConstraintKind::ReadOnly); 163 } 164 165 static void sortSections(InputSectionData **Begin, InputSectionData **End, 166 SortSectionPolicy K) { 167 if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) 168 std::stable_sort(Begin, End, getComparator(K)); 169 } 170 171 // Compute and remember which sections the InputSectionDescription matches. 172 template <class ELFT> 173 void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { 174 // Collects all sections that satisfy constraints of I 175 // and attach them to I. 176 for (SectionPattern &Pat : I->SectionPatterns) { 177 size_t SizeBefore = I->Sections.size(); 178 for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) { 179 StringRef Filename = sys::path::filename(F->getName()); 180 if (!I->FileRe.match(Filename) || Pat.ExcludedFileRe.match(Filename)) 181 continue; 182 183 for (InputSectionBase<ELFT> *S : F->getSections()) 184 if (!isDiscarded(S) && !S->OutSec && Pat.SectionRe.match(S->Name)) 185 I->Sections.push_back(S); 186 if (Pat.SectionRe.match("COMMON")) 187 I->Sections.push_back(CommonInputSection<ELFT>::X); 188 } 189 190 // Sort sections as instructed by SORT-family commands and --sort-section 191 // option. Because SORT-family commands can be nested at most two depth 192 // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command 193 // line option is respected even if a SORT command is given, the exact 194 // behavior we have here is a bit complicated. Here are the rules. 195 // 196 // 1. If two SORT commands are given, --sort-section is ignored. 197 // 2. If one SORT command is given, and if it is not SORT_NONE, 198 // --sort-section is handled as an inner SORT command. 199 // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. 200 // 4. If no SORT command is given, sort according to --sort-section. 201 InputSectionData **Begin = I->Sections.data() + SizeBefore; 202 InputSectionData **End = I->Sections.data() + I->Sections.size(); 203 if (Pat.SortOuter != SortSectionPolicy::None) { 204 if (Pat.SortInner == SortSectionPolicy::Default) 205 sortSections(Begin, End, Config->SortSection); 206 else 207 sortSections(Begin, End, Pat.SortInner); 208 sortSections(Begin, End, Pat.SortOuter); 209 } 210 } 211 212 // We do not add duplicate input sections, so mark them with a dummy output 213 // section for now. 214 for (InputSectionData *S : I->Sections) { 215 auto *S2 = static_cast<InputSectionBase<ELFT> *>(S); 216 S2->OutSec = (OutputSectionBase<ELFT> *)-1; 217 } 218 } 219 220 template <class ELFT> 221 void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) { 222 for (InputSectionBase<ELFT> *S : V) { 223 S->Live = false; 224 reportDiscarded(S); 225 } 226 } 227 228 template <class ELFT> 229 std::vector<InputSectionBase<ELFT> *> 230 LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) { 231 std::vector<InputSectionBase<ELFT> *> Ret; 232 233 for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { 234 auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); 235 if (!Cmd) 236 continue; 237 computeInputSections(Cmd); 238 for (InputSectionData *S : Cmd->Sections) 239 Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S)); 240 } 241 242 // After we created final list we should now set OutSec pointer to null, 243 // instead of -1. Otherwise we may get a crash when writing relocs, in 244 // case section is discarded by linker script 245 for (InputSectionBase<ELFT> *S : Ret) 246 S->OutSec = nullptr; 247 248 return Ret; 249 } 250 251 template <class ELFT> 252 static SectionKey<ELFT::Is64Bits> createKey(InputSectionBase<ELFT> *C, 253 StringRef OutsecName) { 254 // When using linker script the merge rules are different. 255 // Unfortunately, linker scripts are name based. This means that expressions 256 // like *(.foo*) can refer to multiple input sections that would normally be 257 // placed in different output sections. We cannot put them in different 258 // output sections or we would produce wrong results for 259 // start = .; *(.foo.*) end = .; *(.bar) 260 // and a mapping of .foo1 and .bar1 to one section and .foo2 and .bar2 to 261 // another. The problem is that there is no way to layout those output 262 // sections such that the .foo sections are the only thing between the 263 // start and end symbols. 264 265 // An extra annoyance is that we cannot simply disable merging of the contents 266 // of SHF_MERGE sections, but our implementation requires one output section 267 // per "kind" (string or not, which size/aligment). 268 // Fortunately, creating symbols in the middle of a merge section is not 269 // supported by bfd or gold, so we can just create multiple section in that 270 // case. 271 const typename ELFT::Shdr *H = C->getSectionHdr(); 272 typedef typename ELFT::uint uintX_t; 273 uintX_t Flags = H->sh_flags & (SHF_MERGE | SHF_STRINGS); 274 275 uintX_t Alignment = 0; 276 if (isa<MergeInputSection<ELFT>>(C)) 277 Alignment = std::max(H->sh_addralign, H->sh_entsize); 278 279 return SectionKey<ELFT::Is64Bits>{OutsecName, /*Type*/ 0, Flags, Alignment}; 280 } 281 282 template <class ELFT> 283 void LinkerScript<ELFT>::addSection(OutputSectionFactory<ELFT> &Factory, 284 InputSectionBase<ELFT> *Sec, 285 StringRef Name) { 286 OutputSectionBase<ELFT> *OutSec; 287 bool IsNew; 288 std::tie(OutSec, IsNew) = Factory.create(createKey(Sec, Name), Sec); 289 if (IsNew) 290 OutputSections->push_back(OutSec); 291 OutSec->addSection(Sec); 292 } 293 294 template <class ELFT> 295 void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) { 296 297 for (unsigned I = 0; I < Opt.Commands.size(); ++I) { 298 auto Iter = Opt.Commands.begin() + I; 299 const std::unique_ptr<BaseCommand> &Base1 = *Iter; 300 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { 301 if (shouldDefine<ELFT>(Cmd)) 302 addRegular<ELFT>(Cmd); 303 continue; 304 } 305 if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) { 306 // If we don't have SECTIONS then output sections have already been 307 // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses 308 // will not be called, so ASSERT should be evaluated now. 309 if (!Opt.HasSections) 310 Cmd->Expression(0); 311 continue; 312 } 313 314 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { 315 std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd); 316 317 if (Cmd->Name == "/DISCARD/") { 318 discard(V); 319 continue; 320 } 321 322 if (!matchConstraints<ELFT>(V, Cmd->Constraint)) { 323 for (InputSectionBase<ELFT> *S : V) 324 S->OutSec = nullptr; 325 Opt.Commands.erase(Iter); 326 --I; 327 continue; 328 } 329 330 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 331 if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) 332 if (shouldDefine<ELFT>(OutCmd)) 333 addSymbol<ELFT>(OutCmd); 334 335 if (V.empty()) 336 continue; 337 338 for (InputSectionBase<ELFT> *Sec : V) { 339 addSection(Factory, Sec, Cmd->Name); 340 if (uint32_t Subalign = Cmd->SubalignExpr ? Cmd->SubalignExpr(0) : 0) 341 Sec->Alignment = Subalign; 342 } 343 } 344 } 345 } 346 347 template <class ELFT> 348 void LinkerScript<ELFT>::createSections(OutputSectionFactory<ELFT> &Factory) { 349 processCommands(Factory); 350 // Add orphan sections. 351 for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) 352 for (InputSectionBase<ELFT> *S : F->getSections()) 353 if (!isDiscarded(S) && !S->OutSec) 354 addSection(Factory, S, getOutputSectionName(S->Name, Opt.Alloc)); 355 } 356 357 // Sets value of a section-defined symbol. Two kinds of 358 // symbols are processed: synthetic symbols, whose value 359 // is an offset from beginning of section and regular 360 // symbols whose value is absolute. 361 template <class ELFT> 362 static void assignSectionSymbol(SymbolAssignment *Cmd, 363 OutputSectionBase<ELFT> *Sec, 364 typename ELFT::uint Off) { 365 if (!Cmd->Sym) 366 return; 367 368 if (auto *Body = dyn_cast<DefinedSynthetic<ELFT>>(Cmd->Sym)) { 369 Body->Section = Sec; 370 Body->Value = Cmd->Expression(Sec->getVA() + Off) - Sec->getVA(); 371 return; 372 } 373 auto *Body = cast<DefinedRegular<ELFT>>(Cmd->Sym); 374 Body->Value = Cmd->Expression(Sec->getVA() + Off); 375 } 376 377 template <class ELFT> static bool isTbss(OutputSectionBase<ELFT> *Sec) { 378 return (Sec->getFlags() & SHF_TLS) && Sec->getType() == SHT_NOBITS; 379 } 380 381 template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) { 382 if (!AlreadyOutputIS.insert(S).second) 383 return; 384 bool IsTbss = isTbss(CurOutSec); 385 386 uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; 387 Pos = alignTo(Pos, S->Alignment); 388 S->OutSecOff = Pos - CurOutSec->getVA(); 389 Pos += S->getSize(); 390 391 // Update output section size after adding each section. This is so that 392 // SIZEOF works correctly in the case below: 393 // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } 394 CurOutSec->setSize(Pos - CurOutSec->getVA()); 395 396 if (IsTbss) 397 ThreadBssOffset = Pos - Dot; 398 else 399 Dot = Pos; 400 } 401 402 template <class ELFT> void LinkerScript<ELFT>::flush() { 403 if (!CurOutSec || !AlreadyOutputOS.insert(CurOutSec).second) 404 return; 405 if (auto *OutSec = dyn_cast<OutputSection<ELFT>>(CurOutSec)) { 406 for (InputSection<ELFT> *I : OutSec->Sections) 407 output(I); 408 } else { 409 Dot += CurOutSec->getSize(); 410 } 411 } 412 413 template <class ELFT> 414 void LinkerScript<ELFT>::switchTo(OutputSectionBase<ELFT> *Sec) { 415 if (CurOutSec == Sec) 416 return; 417 if (AlreadyOutputOS.count(Sec)) 418 return; 419 420 flush(); 421 CurOutSec = Sec; 422 423 Dot = alignTo(Dot, CurOutSec->getAlignment()); 424 CurOutSec->setVA(isTbss(CurOutSec) ? Dot + ThreadBssOffset : Dot); 425 426 // If neither AT nor AT> is specified for an allocatable section, the linker 427 // will set the LMA such that the difference between VMA and LMA for the 428 // section is the same as the preceding output section in the same region 429 // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html 430 CurOutSec->setLMAOffset(LMAOffset); 431 } 432 433 template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) { 434 // This handles the assignments to symbol or to a location counter (.) 435 if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { 436 if (AssignCmd->Name == ".") { 437 // Update to location counter means update to section size. 438 Dot = AssignCmd->Expression(Dot); 439 CurOutSec->setSize(Dot - CurOutSec->getVA()); 440 return; 441 } 442 assignSectionSymbol<ELFT>(AssignCmd, CurOutSec, Dot - CurOutSec->getVA()); 443 return; 444 } 445 446 // Handle BYTE(), SHORT(), LONG(), or QUAD(). 447 if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) { 448 DataCmd->Offset = Dot - CurOutSec->getVA(); 449 Dot += DataCmd->Size; 450 CurOutSec->setSize(Dot - CurOutSec->getVA()); 451 return; 452 } 453 454 // It handles single input section description command, 455 // calculates and assigns the offsets for each section and also 456 // updates the output section size. 457 auto &ICmd = cast<InputSectionDescription>(Base); 458 for (InputSectionData *ID : ICmd.Sections) { 459 auto *IB = static_cast<InputSectionBase<ELFT> *>(ID); 460 switchTo(IB->OutSec); 461 if (auto *I = dyn_cast<InputSection<ELFT>>(IB)) 462 output(I); 463 else 464 flush(); 465 } 466 } 467 468 template <class ELFT> 469 static std::vector<OutputSectionBase<ELFT> *> 470 findSections(StringRef Name, 471 const std::vector<OutputSectionBase<ELFT> *> &Sections) { 472 std::vector<OutputSectionBase<ELFT> *> Ret; 473 for (OutputSectionBase<ELFT> *Sec : Sections) 474 if (Sec->getName() == Name) 475 Ret.push_back(Sec); 476 return Ret; 477 } 478 479 template <class ELFT> 480 void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) { 481 if (Cmd->LMAExpr) 482 LMAOffset = Cmd->LMAExpr(Dot) - Dot; 483 std::vector<OutputSectionBase<ELFT> *> Sections = 484 findSections(Cmd->Name, *OutputSections); 485 if (Sections.empty()) 486 return; 487 switchTo(Sections[0]); 488 // Find the last section output location. We will output orphan sections 489 // there so that end symbols point to the correct location. 490 auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), 491 [](const std::unique_ptr<BaseCommand> &Cmd) { 492 return !isa<SymbolAssignment>(*Cmd); 493 }) 494 .base(); 495 for (auto I = Cmd->Commands.begin(); I != E; ++I) 496 process(**I); 497 for (OutputSectionBase<ELFT> *Base : Sections) 498 switchTo(Base); 499 flush(); 500 std::for_each(E, Cmd->Commands.end(), 501 [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); 502 } 503 504 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() { 505 // It is common practice to use very generic linker scripts. So for any 506 // given run some of the output sections in the script will be empty. 507 // We could create corresponding empty output sections, but that would 508 // clutter the output. 509 // We instead remove trivially empty sections. The bfd linker seems even 510 // more aggressive at removing them. 511 auto Pos = std::remove_if( 512 Opt.Commands.begin(), Opt.Commands.end(), 513 [&](const std::unique_ptr<BaseCommand> &Base) { 514 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 515 if (!Cmd) 516 return false; 517 std::vector<OutputSectionBase<ELFT> *> Secs = 518 findSections(Cmd->Name, *OutputSections); 519 if (!Secs.empty()) 520 return false; 521 for (const std::unique_ptr<BaseCommand> &I : Cmd->Commands) 522 if (!isa<InputSectionDescription>(I.get())) 523 return false; 524 return true; 525 }); 526 Opt.Commands.erase(Pos, Opt.Commands.end()); 527 528 // If the output section contains only symbol assignments, create a 529 // corresponding output section. The bfd linker seems to only create them if 530 // '.' is assigned to, but creating these section should not have any bad 531 // consequeces and gives us a section to put the symbol in. 532 uintX_t Flags = SHF_ALLOC; 533 uint32_t Type = 0; 534 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 535 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 536 if (!Cmd) 537 continue; 538 std::vector<OutputSectionBase<ELFT> *> Secs = 539 findSections(Cmd->Name, *OutputSections); 540 if (!Secs.empty()) { 541 Flags = Secs[0]->getFlags(); 542 Type = Secs[0]->getType(); 543 continue; 544 } 545 546 auto *OutSec = new OutputSection<ELFT>(Cmd->Name, Type, Flags); 547 Out<ELFT>::Pool.emplace_back(OutSec); 548 OutputSections->push_back(OutSec); 549 } 550 } 551 552 // When placing orphan sections, we want to place them after symbol assignments 553 // so that an orphan after 554 // begin_foo = .; 555 // foo : { *(foo) } 556 // end_foo = .; 557 // doesn't break the intended meaning of the begin/end symbols. 558 // We don't want to go over sections since Writer<ELFT>::sortSections is the 559 // one in charge of deciding the order of the sections. 560 // We don't want to go over alignments, since doing so in 561 // rx_sec : { *(rx_sec) } 562 // . = ALIGN(0x1000); 563 // /* The RW PT_LOAD starts here*/ 564 // rw_sec : { *(rw_sec) } 565 // would mean that the RW PT_LOAD would become unaligned. 566 static bool shouldSkip(const BaseCommand &Cmd) { 567 if (isa<OutputSectionCommand>(Cmd)) 568 return false; 569 const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); 570 if (!Assign) 571 return true; 572 return Assign->Name != "."; 573 } 574 575 template <class ELFT> 576 void LinkerScript<ELFT>::assignAddresses(std::vector<PhdrEntry<ELFT>> &Phdrs) { 577 // Orphan sections are sections present in the input files which 578 // are not explicitly placed into the output file by the linker script. 579 // We place orphan sections at end of file. 580 // Other linkers places them using some heuristics as described in 581 // https://sourceware.org/binutils/docs/ld/Orphan-Sections.html#Orphan-Sections. 582 583 // The OutputSections are already in the correct order. 584 // This loops creates or moves commands as needed so that they are in the 585 // correct order. 586 int CmdIndex = 0; 587 for (OutputSectionBase<ELFT> *Sec : *OutputSections) { 588 StringRef Name = Sec->getName(); 589 590 // Find the last spot where we can insert a command and still get the 591 // correct result. 592 auto CmdIter = Opt.Commands.begin() + CmdIndex; 593 auto E = Opt.Commands.end(); 594 while (CmdIter != E && shouldSkip(**CmdIter)) { 595 ++CmdIter; 596 ++CmdIndex; 597 } 598 599 auto Pos = 600 std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { 601 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 602 return Cmd && Cmd->Name == Name; 603 }); 604 if (Pos == E) { 605 Opt.Commands.insert(CmdIter, 606 llvm::make_unique<OutputSectionCommand>(Name)); 607 ++CmdIndex; 608 continue; 609 } 610 611 // Continue from where we found it. 612 CmdIndex = (Pos - Opt.Commands.begin()) + 1; 613 continue; 614 } 615 616 // Assign addresses as instructed by linker script SECTIONS sub-commands. 617 Dot = 0; 618 619 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 620 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { 621 if (Cmd->Name == ".") { 622 Dot = Cmd->Expression(Dot); 623 } else if (Cmd->Sym) { 624 cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(Dot); 625 } 626 continue; 627 } 628 629 if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { 630 Cmd->Expression(Dot); 631 continue; 632 } 633 634 auto *Cmd = cast<OutputSectionCommand>(Base.get()); 635 636 if (Cmd->AddrExpr) 637 Dot = Cmd->AddrExpr(Dot); 638 639 assignOffsets(Cmd); 640 } 641 642 uintX_t MinVA = std::numeric_limits<uintX_t>::max(); 643 for (OutputSectionBase<ELFT> *Sec : *OutputSections) { 644 if (Sec->getFlags() & SHF_ALLOC) 645 MinVA = std::min(MinVA, Sec->getVA()); 646 else 647 Sec->setVA(0); 648 } 649 650 uintX_t HeaderSize = getHeaderSize(); 651 auto FirstPTLoad = 652 std::find_if(Phdrs.begin(), Phdrs.end(), [](const PhdrEntry<ELFT> &E) { 653 return E.H.p_type == PT_LOAD; 654 }); 655 if (HeaderSize <= MinVA && FirstPTLoad != Phdrs.end()) { 656 // ELF and Program headers need to be right before the first section in 657 // memory. Set their addresses accordingly. 658 MinVA = alignDown(MinVA - HeaderSize, Target->PageSize); 659 Out<ELFT>::ElfHeader->setVA(MinVA); 660 Out<ELFT>::ProgramHeaders->setVA(Out<ELFT>::ElfHeader->getSize() + MinVA); 661 FirstPTLoad->First = Out<ELFT>::ElfHeader; 662 if (!FirstPTLoad->Last) 663 FirstPTLoad->Last = Out<ELFT>::ProgramHeaders; 664 } else if (!FirstPTLoad->First) { 665 // Sometimes the very first PT_LOAD segment can be empty. 666 // This happens if (all conditions met): 667 // - Linker script is used 668 // - First section in ELF image is not RO 669 // - Not enough space for program headers. 670 // The code below removes empty PT_LOAD segment and updates 671 // program headers size. 672 Phdrs.erase(FirstPTLoad); 673 Out<ELFT>::ProgramHeaders->setSize(sizeof(typename ELFT::Phdr) * 674 Phdrs.size()); 675 } 676 } 677 678 // Creates program headers as instructed by PHDRS linker script command. 679 template <class ELFT> 680 std::vector<PhdrEntry<ELFT>> LinkerScript<ELFT>::createPhdrs() { 681 std::vector<PhdrEntry<ELFT>> Ret; 682 683 // Process PHDRS and FILEHDR keywords because they are not 684 // real output sections and cannot be added in the following loop. 685 for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) { 686 Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags); 687 PhdrEntry<ELFT> &Phdr = Ret.back(); 688 689 if (Cmd.HasFilehdr) 690 Phdr.add(Out<ELFT>::ElfHeader); 691 if (Cmd.HasPhdrs) 692 Phdr.add(Out<ELFT>::ProgramHeaders); 693 694 if (Cmd.LMAExpr) { 695 Phdr.H.p_paddr = Cmd.LMAExpr(0); 696 Phdr.HasLMA = true; 697 } 698 } 699 700 // Add output sections to program headers. 701 PhdrEntry<ELFT> *Load = nullptr; 702 uintX_t Flags = PF_R; 703 for (OutputSectionBase<ELFT> *Sec : *OutputSections) { 704 if (!(Sec->getFlags() & SHF_ALLOC)) 705 break; 706 707 std::vector<size_t> PhdrIds = getPhdrIndices(Sec->getName()); 708 if (!PhdrIds.empty()) { 709 // Assign headers specified by linker script 710 for (size_t Id : PhdrIds) { 711 Ret[Id].add(Sec); 712 if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) 713 Ret[Id].H.p_flags |= Sec->getPhdrFlags(); 714 } 715 } else { 716 // If we have no load segment or flags've changed then we want new load 717 // segment. 718 uintX_t NewFlags = Sec->getPhdrFlags(); 719 if (Load == nullptr || Flags != NewFlags) { 720 Load = &*Ret.emplace(Ret.end(), PT_LOAD, NewFlags); 721 Flags = NewFlags; 722 } 723 Load->add(Sec); 724 } 725 } 726 return Ret; 727 } 728 729 template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() { 730 // Ignore .interp section in case we have PHDRS specification 731 // and PT_INTERP isn't listed. 732 return !Opt.PhdrsCommands.empty() && 733 llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { 734 return Cmd.Type == PT_INTERP; 735 }) == Opt.PhdrsCommands.end(); 736 } 737 738 template <class ELFT> 739 ArrayRef<uint8_t> LinkerScript<ELFT>::getFiller(StringRef Name) { 740 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 741 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 742 if (Cmd->Name == Name) 743 return Cmd->Filler; 744 return {}; 745 } 746 747 template <class ELFT> 748 static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { 749 const endianness E = ELFT::TargetEndianness; 750 751 switch (Size) { 752 case 1: 753 *Buf = (uint8_t)Data; 754 break; 755 case 2: 756 write16<E>(Buf, Data); 757 break; 758 case 4: 759 write32<E>(Buf, Data); 760 break; 761 case 8: 762 write64<E>(Buf, Data); 763 break; 764 default: 765 llvm_unreachable("unsupported Size argument"); 766 } 767 } 768 769 template <class ELFT> 770 void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) { 771 int I = getSectionIndex(Name); 772 if (I == INT_MAX) 773 return; 774 775 OutputSectionCommand *Cmd = 776 dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()); 777 for (const std::unique_ptr<BaseCommand> &Base2 : Cmd->Commands) 778 if (auto *DataCmd = dyn_cast<BytesDataCommand>(Base2.get())) 779 writeInt<ELFT>(&Buf[DataCmd->Offset], DataCmd->Data, DataCmd->Size); 780 } 781 782 template <class ELFT> bool LinkerScript<ELFT>::hasLMA(StringRef Name) { 783 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 784 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 785 if (Cmd->LMAExpr && Cmd->Name == Name) 786 return true; 787 return false; 788 } 789 790 // Returns the index of the given section name in linker script 791 // SECTIONS commands. Sections are laid out as the same order as they 792 // were in the script. If a given name did not appear in the script, 793 // it returns INT_MAX, so that it will be laid out at end of file. 794 template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) { 795 int I = 0; 796 for (std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 797 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 798 if (Cmd->Name == Name) 799 return I; 800 ++I; 801 } 802 return INT_MAX; 803 } 804 805 template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() { 806 return !Opt.PhdrsCommands.empty(); 807 } 808 809 template <class ELFT> 810 uint64_t LinkerScript<ELFT>::getOutputSectionAddress(StringRef Name) { 811 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 812 if (Sec->getName() == Name) 813 return Sec->getVA(); 814 error("undefined section " + Name); 815 return 0; 816 } 817 818 template <class ELFT> 819 uint64_t LinkerScript<ELFT>::getOutputSectionLMA(StringRef Name) { 820 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 821 if (Sec->getName() == Name) 822 return Sec->getLMA(); 823 error("undefined section " + Name); 824 return 0; 825 } 826 827 template <class ELFT> 828 uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) { 829 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 830 if (Sec->getName() == Name) 831 return Sec->getSize(); 832 error("undefined section " + Name); 833 return 0; 834 } 835 836 template <class ELFT> 837 uint64_t LinkerScript<ELFT>::getOutputSectionAlign(StringRef Name) { 838 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 839 if (Sec->getName() == Name) 840 return Sec->getAlignment(); 841 error("undefined section " + Name); 842 return 0; 843 } 844 845 template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() { 846 return elf::getHeaderSize<ELFT>(); 847 } 848 849 template <class ELFT> uint64_t LinkerScript<ELFT>::getSymbolValue(StringRef S) { 850 if (SymbolBody *B = Symtab<ELFT>::X->find(S)) 851 return B->getVA<ELFT>(); 852 error("symbol not found: " + S); 853 return 0; 854 } 855 856 template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) { 857 return Symtab<ELFT>::X->find(S) != nullptr; 858 } 859 860 // Returns indices of ELF headers containing specific section, identified 861 // by Name. Each index is a zero based number of ELF header listed within 862 // PHDRS {} script block. 863 template <class ELFT> 864 std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { 865 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 866 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 867 if (!Cmd || Cmd->Name != SectionName) 868 continue; 869 870 std::vector<size_t> Ret; 871 for (StringRef PhdrName : Cmd->Phdrs) 872 Ret.push_back(getPhdrIndex(PhdrName)); 873 return Ret; 874 } 875 return {}; 876 } 877 878 template <class ELFT> 879 size_t LinkerScript<ELFT>::getPhdrIndex(StringRef PhdrName) { 880 size_t I = 0; 881 for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { 882 if (Cmd.Name == PhdrName) 883 return I; 884 ++I; 885 } 886 error("section header '" + PhdrName + "' is not listed in PHDRS"); 887 return 0; 888 } 889 890 class elf::ScriptParser : public ScriptParserBase { 891 typedef void (ScriptParser::*Handler)(); 892 893 public: 894 ScriptParser(StringRef S, bool B) : ScriptParserBase(S), IsUnderSysroot(B) {} 895 896 void readLinkerScript(); 897 void readVersionScript(); 898 899 private: 900 void addFile(StringRef Path); 901 902 void readAsNeeded(); 903 void readEntry(); 904 void readExtern(); 905 void readGroup(); 906 void readInclude(); 907 void readOutput(); 908 void readOutputArch(); 909 void readOutputFormat(); 910 void readPhdrs(); 911 void readSearchDir(); 912 void readSections(); 913 void readVersion(); 914 void readVersionScriptCommand(); 915 916 SymbolAssignment *readAssignment(StringRef Name); 917 BytesDataCommand *readBytesDataCommand(StringRef Tok); 918 std::vector<uint8_t> readFill(); 919 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 920 std::vector<uint8_t> readOutputSectionFiller(StringRef Tok); 921 std::vector<StringRef> readOutputSectionPhdrs(); 922 InputSectionDescription *readInputSectionDescription(StringRef Tok); 923 Regex readFilePatterns(); 924 std::vector<SectionPattern> readInputSectionsList(); 925 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 926 unsigned readPhdrType(); 927 SortSectionPolicy readSortKind(); 928 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 929 SymbolAssignment *readProvideOrAssignment(StringRef Tok, bool MakeAbsolute); 930 void readSort(); 931 Expr readAssert(); 932 933 Expr readExpr(); 934 Expr readExpr1(Expr Lhs, int MinPrec); 935 StringRef readParenLiteral(); 936 Expr readPrimary(); 937 Expr readTernary(Expr Cond); 938 Expr readParenExpr(); 939 940 // For parsing version script. 941 void readExtern(std::vector<SymbolVersion> *Globals); 942 void readVersionDeclaration(StringRef VerStr); 943 void readGlobal(StringRef VerStr); 944 void readLocal(); 945 946 ScriptConfiguration &Opt = *ScriptConfig; 947 StringSaver Saver = {ScriptConfig->Alloc}; 948 bool IsUnderSysroot; 949 }; 950 951 void ScriptParser::readVersionScript() { 952 readVersionScriptCommand(); 953 if (!atEOF()) 954 setError("EOF expected, but got " + next()); 955 } 956 957 void ScriptParser::readVersionScriptCommand() { 958 if (skip("{")) { 959 readVersionDeclaration(""); 960 return; 961 } 962 963 while (!atEOF() && !Error && peek() != "}") { 964 StringRef VerStr = next(); 965 if (VerStr == "{") { 966 setError("anonymous version definition is used in " 967 "combination with other version definitions"); 968 return; 969 } 970 expect("{"); 971 readVersionDeclaration(VerStr); 972 } 973 } 974 975 void ScriptParser::readVersion() { 976 expect("{"); 977 readVersionScriptCommand(); 978 expect("}"); 979 } 980 981 void ScriptParser::readLinkerScript() { 982 while (!atEOF()) { 983 StringRef Tok = next(); 984 if (Tok == ";") 985 continue; 986 987 if (Tok == "ASSERT") { 988 Opt.Commands.emplace_back(new AssertCommand(readAssert())); 989 } else if (Tok == "ENTRY") { 990 readEntry(); 991 } else if (Tok == "EXTERN") { 992 readExtern(); 993 } else if (Tok == "GROUP" || Tok == "INPUT") { 994 readGroup(); 995 } else if (Tok == "INCLUDE") { 996 readInclude(); 997 } else if (Tok == "OUTPUT") { 998 readOutput(); 999 } else if (Tok == "OUTPUT_ARCH") { 1000 readOutputArch(); 1001 } else if (Tok == "OUTPUT_FORMAT") { 1002 readOutputFormat(); 1003 } else if (Tok == "PHDRS") { 1004 readPhdrs(); 1005 } else if (Tok == "SEARCH_DIR") { 1006 readSearchDir(); 1007 } else if (Tok == "SECTIONS") { 1008 readSections(); 1009 } else if (Tok == "VERSION") { 1010 readVersion(); 1011 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok, true)) { 1012 Opt.Commands.emplace_back(Cmd); 1013 } else { 1014 setError("unknown directive: " + Tok); 1015 } 1016 } 1017 } 1018 1019 void ScriptParser::addFile(StringRef S) { 1020 if (IsUnderSysroot && S.startswith("/")) { 1021 SmallString<128> Path; 1022 (Config->Sysroot + S).toStringRef(Path); 1023 if (sys::fs::exists(Path)) { 1024 Driver->addFile(Saver.save(Path.str())); 1025 return; 1026 } 1027 } 1028 1029 if (sys::path::is_absolute(S)) { 1030 Driver->addFile(S); 1031 } else if (S.startswith("=")) { 1032 if (Config->Sysroot.empty()) 1033 Driver->addFile(S.substr(1)); 1034 else 1035 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); 1036 } else if (S.startswith("-l")) { 1037 Driver->addLibrary(S.substr(2)); 1038 } else if (sys::fs::exists(S)) { 1039 Driver->addFile(S); 1040 } else { 1041 std::string Path = findFromSearchPaths(S); 1042 if (Path.empty()) 1043 setError("unable to find " + S); 1044 else 1045 Driver->addFile(Saver.save(Path)); 1046 } 1047 } 1048 1049 void ScriptParser::readAsNeeded() { 1050 expect("("); 1051 bool Orig = Config->AsNeeded; 1052 Config->AsNeeded = true; 1053 while (!Error && !skip(")")) 1054 addFile(unquote(next())); 1055 Config->AsNeeded = Orig; 1056 } 1057 1058 void ScriptParser::readEntry() { 1059 // -e <symbol> takes predecence over ENTRY(<symbol>). 1060 expect("("); 1061 StringRef Tok = next(); 1062 if (Config->Entry.empty()) 1063 Config->Entry = Tok; 1064 expect(")"); 1065 } 1066 1067 void ScriptParser::readExtern() { 1068 expect("("); 1069 while (!Error && !skip(")")) 1070 Config->Undefined.push_back(next()); 1071 } 1072 1073 void ScriptParser::readGroup() { 1074 expect("("); 1075 while (!Error && !skip(")")) { 1076 StringRef Tok = next(); 1077 if (Tok == "AS_NEEDED") 1078 readAsNeeded(); 1079 else 1080 addFile(unquote(Tok)); 1081 } 1082 } 1083 1084 void ScriptParser::readInclude() { 1085 StringRef Tok = next(); 1086 auto MBOrErr = MemoryBuffer::getFile(unquote(Tok)); 1087 if (!MBOrErr) { 1088 setError("cannot open " + Tok); 1089 return; 1090 } 1091 std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; 1092 StringRef S = Saver.save(MB->getMemBufferRef().getBuffer()); 1093 std::vector<StringRef> V = tokenize(S); 1094 Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end()); 1095 } 1096 1097 void ScriptParser::readOutput() { 1098 // -o <file> takes predecence over OUTPUT(<file>). 1099 expect("("); 1100 StringRef Tok = next(); 1101 if (Config->OutputFile.empty()) 1102 Config->OutputFile = unquote(Tok); 1103 expect(")"); 1104 } 1105 1106 void ScriptParser::readOutputArch() { 1107 // Error checking only for now. 1108 expect("("); 1109 next(); 1110 expect(")"); 1111 } 1112 1113 void ScriptParser::readOutputFormat() { 1114 // Error checking only for now. 1115 expect("("); 1116 next(); 1117 StringRef Tok = next(); 1118 if (Tok == ")") 1119 return; 1120 if (Tok != ",") { 1121 setError("unexpected token: " + Tok); 1122 return; 1123 } 1124 next(); 1125 expect(","); 1126 next(); 1127 expect(")"); 1128 } 1129 1130 void ScriptParser::readPhdrs() { 1131 expect("{"); 1132 while (!Error && !skip("}")) { 1133 StringRef Tok = next(); 1134 Opt.PhdrsCommands.push_back( 1135 {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); 1136 PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back(); 1137 1138 PhdrCmd.Type = readPhdrType(); 1139 do { 1140 Tok = next(); 1141 if (Tok == ";") 1142 break; 1143 if (Tok == "FILEHDR") 1144 PhdrCmd.HasFilehdr = true; 1145 else if (Tok == "PHDRS") 1146 PhdrCmd.HasPhdrs = true; 1147 else if (Tok == "AT") 1148 PhdrCmd.LMAExpr = readParenExpr(); 1149 else if (Tok == "FLAGS") { 1150 expect("("); 1151 // Passing 0 for the value of dot is a bit of a hack. It means that 1152 // we accept expressions like ".|1". 1153 PhdrCmd.Flags = readExpr()(0); 1154 expect(")"); 1155 } else 1156 setError("unexpected header attribute: " + Tok); 1157 } while (!Error); 1158 } 1159 } 1160 1161 void ScriptParser::readSearchDir() { 1162 expect("("); 1163 StringRef Tok = next(); 1164 if (!Config->Nostdlib) 1165 Config->SearchPaths.push_back(unquote(Tok)); 1166 expect(")"); 1167 } 1168 1169 void ScriptParser::readSections() { 1170 Opt.HasSections = true; 1171 expect("{"); 1172 while (!Error && !skip("}")) { 1173 StringRef Tok = next(); 1174 BaseCommand *Cmd = readProvideOrAssignment(Tok, true); 1175 if (!Cmd) { 1176 if (Tok == "ASSERT") 1177 Cmd = new AssertCommand(readAssert()); 1178 else 1179 Cmd = readOutputSectionDescription(Tok); 1180 } 1181 Opt.Commands.emplace_back(Cmd); 1182 } 1183 } 1184 1185 static int precedence(StringRef Op) { 1186 return StringSwitch<int>(Op) 1187 .Cases("*", "/", 5) 1188 .Cases("+", "-", 4) 1189 .Cases("<<", ">>", 3) 1190 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 1191 .Cases("&", "|", 1) 1192 .Default(-1); 1193 } 1194 1195 Regex ScriptParser::readFilePatterns() { 1196 std::vector<StringRef> V; 1197 while (!Error && !skip(")")) 1198 V.push_back(next()); 1199 return compileGlobPatterns(V); 1200 } 1201 1202 SortSectionPolicy ScriptParser::readSortKind() { 1203 if (skip("SORT") || skip("SORT_BY_NAME")) 1204 return SortSectionPolicy::Name; 1205 if (skip("SORT_BY_ALIGNMENT")) 1206 return SortSectionPolicy::Alignment; 1207 if (skip("SORT_BY_INIT_PRIORITY")) 1208 return SortSectionPolicy::Priority; 1209 if (skip("SORT_NONE")) 1210 return SortSectionPolicy::None; 1211 return SortSectionPolicy::Default; 1212 } 1213 1214 // Method reads a list of sequence of excluded files and section globs given in 1215 // a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ 1216 // Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) 1217 // The semantics of that is next: 1218 // * Include .foo.1 from every file. 1219 // * Include .foo.2 from every file but a.o 1220 // * Include .foo.3 from every file but b.o 1221 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 1222 std::vector<SectionPattern> Ret; 1223 while (!Error && peek() != ")") { 1224 Regex ExcludeFileRe; 1225 if (skip("EXCLUDE_FILE")) { 1226 expect("("); 1227 ExcludeFileRe = readFilePatterns(); 1228 } 1229 1230 std::vector<StringRef> V; 1231 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 1232 V.push_back(next()); 1233 1234 if (!V.empty()) 1235 Ret.push_back({std::move(ExcludeFileRe), compileGlobPatterns(V)}); 1236 else 1237 setError("section pattern is expected"); 1238 } 1239 return Ret; 1240 } 1241 1242 // Section pattern grammar can have complex expressions, for example: 1243 // *(SORT(.foo.* EXCLUDE_FILE (*file1.o) .bar.*) .bar.* SORT(.zed.*)) 1244 // Generally is a sequence of globs and excludes that may be wrapped in a SORT() 1245 // commands, like: SORT(glob0) glob1 glob2 SORT(glob4) 1246 // This methods handles wrapping sequences of excluded files and section globs 1247 // into SORT() if that needed and reads them all. 1248 InputSectionDescription * 1249 ScriptParser::readInputSectionRules(StringRef FilePattern) { 1250 auto *Cmd = new InputSectionDescription(FilePattern); 1251 expect("("); 1252 while (!HasError && !skip(")")) { 1253 SortSectionPolicy Outer = readSortKind(); 1254 SortSectionPolicy Inner = SortSectionPolicy::Default; 1255 std::vector<SectionPattern> V; 1256 if (Outer != SortSectionPolicy::Default) { 1257 expect("("); 1258 Inner = readSortKind(); 1259 if (Inner != SortSectionPolicy::Default) { 1260 expect("("); 1261 V = readInputSectionsList(); 1262 expect(")"); 1263 } else { 1264 V = readInputSectionsList(); 1265 } 1266 expect(")"); 1267 } else { 1268 V = readInputSectionsList(); 1269 } 1270 1271 for (SectionPattern &Pat : V) { 1272 Pat.SortInner = Inner; 1273 Pat.SortOuter = Outer; 1274 } 1275 1276 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 1277 } 1278 return Cmd; 1279 } 1280 1281 InputSectionDescription * 1282 ScriptParser::readInputSectionDescription(StringRef Tok) { 1283 // Input section wildcard can be surrounded by KEEP. 1284 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 1285 if (Tok == "KEEP") { 1286 expect("("); 1287 StringRef FilePattern = next(); 1288 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 1289 expect(")"); 1290 Opt.KeptSections.push_back(Cmd); 1291 return Cmd; 1292 } 1293 return readInputSectionRules(Tok); 1294 } 1295 1296 void ScriptParser::readSort() { 1297 expect("("); 1298 expect("CONSTRUCTORS"); 1299 expect(")"); 1300 } 1301 1302 Expr ScriptParser::readAssert() { 1303 expect("("); 1304 Expr E = readExpr(); 1305 expect(","); 1306 StringRef Msg = unquote(next()); 1307 expect(")"); 1308 return [=](uint64_t Dot) { 1309 uint64_t V = E(Dot); 1310 if (!V) 1311 error(Msg); 1312 return V; 1313 }; 1314 } 1315 1316 // Reads a FILL(expr) command. We handle the FILL command as an 1317 // alias for =fillexp section attribute, which is different from 1318 // what GNU linkers do. 1319 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 1320 std::vector<uint8_t> ScriptParser::readFill() { 1321 expect("("); 1322 std::vector<uint8_t> V = readOutputSectionFiller(next()); 1323 expect(")"); 1324 expect(";"); 1325 return V; 1326 } 1327 1328 OutputSectionCommand * 1329 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 1330 OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); 1331 1332 // Read an address expression. 1333 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address 1334 if (peek() != ":") 1335 Cmd->AddrExpr = readExpr(); 1336 1337 expect(":"); 1338 1339 if (skip("AT")) 1340 Cmd->LMAExpr = readParenExpr(); 1341 if (skip("ALIGN")) 1342 Cmd->AlignExpr = readParenExpr(); 1343 if (skip("SUBALIGN")) 1344 Cmd->SubalignExpr = readParenExpr(); 1345 1346 // Parse constraints. 1347 if (skip("ONLY_IF_RO")) 1348 Cmd->Constraint = ConstraintKind::ReadOnly; 1349 if (skip("ONLY_IF_RW")) 1350 Cmd->Constraint = ConstraintKind::ReadWrite; 1351 expect("{"); 1352 1353 while (!Error && !skip("}")) { 1354 StringRef Tok = next(); 1355 if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok, false)) 1356 Cmd->Commands.emplace_back(Assignment); 1357 else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) 1358 Cmd->Commands.emplace_back(Data); 1359 else if (Tok == "FILL") 1360 Cmd->Filler = readFill(); 1361 else if (Tok == "SORT") 1362 readSort(); 1363 else if (peek() == "(") 1364 Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); 1365 else 1366 setError("unknown command " + Tok); 1367 } 1368 Cmd->Phdrs = readOutputSectionPhdrs(); 1369 1370 if (skip("=")) 1371 Cmd->Filler = readOutputSectionFiller(next()); 1372 else if (peek().startswith("=")) 1373 Cmd->Filler = readOutputSectionFiller(next().drop_front()); 1374 1375 return Cmd; 1376 } 1377 1378 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number. 1379 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 1380 // 1381 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles 1382 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them 1383 // as 32-bit big-endian values. We will do the same as ld.gold does 1384 // because it's simpler than what ld.bfd does. 1385 std::vector<uint8_t> ScriptParser::readOutputSectionFiller(StringRef Tok) { 1386 uint32_t V; 1387 if (Tok.getAsInteger(0, V)) { 1388 setError("invalid filler expression: " + Tok); 1389 return {}; 1390 } 1391 return {uint8_t(V >> 24), uint8_t(V >> 16), uint8_t(V >> 8), uint8_t(V)}; 1392 } 1393 1394 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 1395 expect("("); 1396 SymbolAssignment *Cmd = readAssignment(next()); 1397 Cmd->Provide = Provide; 1398 Cmd->Hidden = Hidden; 1399 expect(")"); 1400 expect(";"); 1401 return Cmd; 1402 } 1403 1404 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok, 1405 bool MakeAbsolute) { 1406 SymbolAssignment *Cmd = nullptr; 1407 if (peek() == "=" || peek() == "+=") { 1408 Cmd = readAssignment(Tok); 1409 expect(";"); 1410 } else if (Tok == "PROVIDE") { 1411 Cmd = readProvideHidden(true, false); 1412 } else if (Tok == "HIDDEN") { 1413 Cmd = readProvideHidden(false, true); 1414 } else if (Tok == "PROVIDE_HIDDEN") { 1415 Cmd = readProvideHidden(true, true); 1416 } 1417 if (Cmd && MakeAbsolute) 1418 Cmd->IsAbsolute = true; 1419 return Cmd; 1420 } 1421 1422 static uint64_t getSymbolValue(StringRef S, uint64_t Dot) { 1423 if (S == ".") 1424 return Dot; 1425 return ScriptBase->getSymbolValue(S); 1426 } 1427 1428 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 1429 StringRef Op = next(); 1430 bool IsAbsolute = false; 1431 Expr E; 1432 assert(Op == "=" || Op == "+="); 1433 if (skip("ABSOLUTE")) { 1434 E = readParenExpr(); 1435 IsAbsolute = true; 1436 } else { 1437 E = readExpr(); 1438 } 1439 if (Op == "+=") 1440 E = [=](uint64_t Dot) { return getSymbolValue(Name, Dot) + E(Dot); }; 1441 return new SymbolAssignment(Name, E, IsAbsolute); 1442 } 1443 1444 // This is an operator-precedence parser to parse a linker 1445 // script expression. 1446 Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); } 1447 1448 static Expr combine(StringRef Op, Expr L, Expr R) { 1449 if (Op == "*") 1450 return [=](uint64_t Dot) { return L(Dot) * R(Dot); }; 1451 if (Op == "/") { 1452 return [=](uint64_t Dot) -> uint64_t { 1453 uint64_t RHS = R(Dot); 1454 if (RHS == 0) { 1455 error("division by zero"); 1456 return 0; 1457 } 1458 return L(Dot) / RHS; 1459 }; 1460 } 1461 if (Op == "+") 1462 return [=](uint64_t Dot) { return L(Dot) + R(Dot); }; 1463 if (Op == "-") 1464 return [=](uint64_t Dot) { return L(Dot) - R(Dot); }; 1465 if (Op == "<<") 1466 return [=](uint64_t Dot) { return L(Dot) << R(Dot); }; 1467 if (Op == ">>") 1468 return [=](uint64_t Dot) { return L(Dot) >> R(Dot); }; 1469 if (Op == "<") 1470 return [=](uint64_t Dot) { return L(Dot) < R(Dot); }; 1471 if (Op == ">") 1472 return [=](uint64_t Dot) { return L(Dot) > R(Dot); }; 1473 if (Op == ">=") 1474 return [=](uint64_t Dot) { return L(Dot) >= R(Dot); }; 1475 if (Op == "<=") 1476 return [=](uint64_t Dot) { return L(Dot) <= R(Dot); }; 1477 if (Op == "==") 1478 return [=](uint64_t Dot) { return L(Dot) == R(Dot); }; 1479 if (Op == "!=") 1480 return [=](uint64_t Dot) { return L(Dot) != R(Dot); }; 1481 if (Op == "&") 1482 return [=](uint64_t Dot) { return L(Dot) & R(Dot); }; 1483 if (Op == "|") 1484 return [=](uint64_t Dot) { return L(Dot) | R(Dot); }; 1485 llvm_unreachable("invalid operator"); 1486 } 1487 1488 // This is a part of the operator-precedence parser. This function 1489 // assumes that the remaining token stream starts with an operator. 1490 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 1491 while (!atEOF() && !Error) { 1492 // Read an operator and an expression. 1493 StringRef Op1 = peek(); 1494 if (Op1 == "?") 1495 return readTernary(Lhs); 1496 if (precedence(Op1) < MinPrec) 1497 break; 1498 next(); 1499 Expr Rhs = readPrimary(); 1500 1501 // Evaluate the remaining part of the expression first if the 1502 // next operator has greater precedence than the previous one. 1503 // For example, if we have read "+" and "3", and if the next 1504 // operator is "*", then we'll evaluate 3 * ... part first. 1505 while (!atEOF()) { 1506 StringRef Op2 = peek(); 1507 if (precedence(Op2) <= precedence(Op1)) 1508 break; 1509 Rhs = readExpr1(Rhs, precedence(Op2)); 1510 } 1511 1512 Lhs = combine(Op1, Lhs, Rhs); 1513 } 1514 return Lhs; 1515 } 1516 1517 uint64_t static getConstant(StringRef S) { 1518 if (S == "COMMONPAGESIZE") 1519 return Target->PageSize; 1520 if (S == "MAXPAGESIZE") 1521 return Config->MaxPageSize; 1522 error("unknown constant: " + S); 1523 return 0; 1524 } 1525 1526 // Parses Tok as an integer. Returns true if successful. 1527 // It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") 1528 // and decimal numbers. Decimal numbers may have "K" (kilo) or 1529 // "M" (mega) prefixes. 1530 static bool readInteger(StringRef Tok, uint64_t &Result) { 1531 if (Tok.startswith("-")) { 1532 if (!readInteger(Tok.substr(1), Result)) 1533 return false; 1534 Result = -Result; 1535 return true; 1536 } 1537 if (Tok.startswith_lower("0x")) 1538 return !Tok.substr(2).getAsInteger(16, Result); 1539 if (Tok.endswith_lower("H")) 1540 return !Tok.drop_back().getAsInteger(16, Result); 1541 1542 int Suffix = 1; 1543 if (Tok.endswith_lower("K")) { 1544 Suffix = 1024; 1545 Tok = Tok.drop_back(); 1546 } else if (Tok.endswith_lower("M")) { 1547 Suffix = 1024 * 1024; 1548 Tok = Tok.drop_back(); 1549 } 1550 if (Tok.getAsInteger(10, Result)) 1551 return false; 1552 Result *= Suffix; 1553 return true; 1554 } 1555 1556 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 1557 int Size = StringSwitch<unsigned>(Tok) 1558 .Case("BYTE", 1) 1559 .Case("SHORT", 2) 1560 .Case("LONG", 4) 1561 .Case("QUAD", 8) 1562 .Default(-1); 1563 if (Size == -1) 1564 return nullptr; 1565 1566 expect("("); 1567 uint64_t Val = 0; 1568 StringRef S = next(); 1569 if (!readInteger(S, Val)) 1570 setError("unexpected value: " + S); 1571 expect(")"); 1572 return new BytesDataCommand(Val, Size); 1573 } 1574 1575 StringRef ScriptParser::readParenLiteral() { 1576 expect("("); 1577 StringRef Tok = next(); 1578 expect(")"); 1579 return Tok; 1580 } 1581 1582 Expr ScriptParser::readPrimary() { 1583 if (peek() == "(") 1584 return readParenExpr(); 1585 1586 StringRef Tok = next(); 1587 1588 if (Tok == "~") { 1589 Expr E = readPrimary(); 1590 return [=](uint64_t Dot) { return ~E(Dot); }; 1591 } 1592 if (Tok == "-") { 1593 Expr E = readPrimary(); 1594 return [=](uint64_t Dot) { return -E(Dot); }; 1595 } 1596 1597 // Built-in functions are parsed here. 1598 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 1599 if (Tok == "ADDR") { 1600 StringRef Name = readParenLiteral(); 1601 return 1602 [=](uint64_t Dot) { return ScriptBase->getOutputSectionAddress(Name); }; 1603 } 1604 if (Tok == "LOADADDR") { 1605 StringRef Name = readParenLiteral(); 1606 return [=](uint64_t Dot) { return ScriptBase->getOutputSectionLMA(Name); }; 1607 } 1608 if (Tok == "ASSERT") 1609 return readAssert(); 1610 if (Tok == "ALIGN") { 1611 Expr E = readParenExpr(); 1612 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1613 } 1614 if (Tok == "CONSTANT") { 1615 StringRef Name = readParenLiteral(); 1616 return [=](uint64_t Dot) { return getConstant(Name); }; 1617 } 1618 if (Tok == "DEFINED") { 1619 expect("("); 1620 StringRef Tok = next(); 1621 expect(")"); 1622 return [=](uint64_t Dot) { return ScriptBase->isDefined(Tok) ? 1 : 0; }; 1623 } 1624 if (Tok == "SEGMENT_START") { 1625 expect("("); 1626 next(); 1627 expect(","); 1628 Expr E = readExpr(); 1629 expect(")"); 1630 return [=](uint64_t Dot) { return E(Dot); }; 1631 } 1632 if (Tok == "DATA_SEGMENT_ALIGN") { 1633 expect("("); 1634 Expr E = readExpr(); 1635 expect(","); 1636 readExpr(); 1637 expect(")"); 1638 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1639 } 1640 if (Tok == "DATA_SEGMENT_END") { 1641 expect("("); 1642 expect("."); 1643 expect(")"); 1644 return [](uint64_t Dot) { return Dot; }; 1645 } 1646 // GNU linkers implements more complicated logic to handle 1647 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to 1648 // the next page boundary for simplicity. 1649 if (Tok == "DATA_SEGMENT_RELRO_END") { 1650 expect("("); 1651 readExpr(); 1652 expect(","); 1653 readExpr(); 1654 expect(")"); 1655 return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); }; 1656 } 1657 if (Tok == "SIZEOF") { 1658 StringRef Name = readParenLiteral(); 1659 return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); }; 1660 } 1661 if (Tok == "ALIGNOF") { 1662 StringRef Name = readParenLiteral(); 1663 return 1664 [=](uint64_t Dot) { return ScriptBase->getOutputSectionAlign(Name); }; 1665 } 1666 if (Tok == "SIZEOF_HEADERS") 1667 return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); }; 1668 1669 // Tok is a literal number. 1670 uint64_t V; 1671 if (readInteger(Tok, V)) 1672 return [=](uint64_t Dot) { return V; }; 1673 1674 // Tok is a symbol name. 1675 if (Tok != "." && !isValidCIdentifier(Tok)) 1676 setError("malformed number: " + Tok); 1677 return [=](uint64_t Dot) { return getSymbolValue(Tok, Dot); }; 1678 } 1679 1680 Expr ScriptParser::readTernary(Expr Cond) { 1681 next(); 1682 Expr L = readExpr(); 1683 expect(":"); 1684 Expr R = readExpr(); 1685 return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); }; 1686 } 1687 1688 Expr ScriptParser::readParenExpr() { 1689 expect("("); 1690 Expr E = readExpr(); 1691 expect(")"); 1692 return E; 1693 } 1694 1695 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1696 std::vector<StringRef> Phdrs; 1697 while (!Error && peek().startswith(":")) { 1698 StringRef Tok = next(); 1699 Tok = (Tok.size() == 1) ? next() : Tok.substr(1); 1700 if (Tok.empty()) { 1701 setError("section header name is empty"); 1702 break; 1703 } 1704 Phdrs.push_back(Tok); 1705 } 1706 return Phdrs; 1707 } 1708 1709 unsigned ScriptParser::readPhdrType() { 1710 StringRef Tok = next(); 1711 unsigned Ret = StringSwitch<unsigned>(Tok) 1712 .Case("PT_NULL", PT_NULL) 1713 .Case("PT_LOAD", PT_LOAD) 1714 .Case("PT_DYNAMIC", PT_DYNAMIC) 1715 .Case("PT_INTERP", PT_INTERP) 1716 .Case("PT_NOTE", PT_NOTE) 1717 .Case("PT_SHLIB", PT_SHLIB) 1718 .Case("PT_PHDR", PT_PHDR) 1719 .Case("PT_TLS", PT_TLS) 1720 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1721 .Case("PT_GNU_STACK", PT_GNU_STACK) 1722 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1723 .Default(-1); 1724 1725 if (Ret == (unsigned)-1) { 1726 setError("invalid program header type: " + Tok); 1727 return PT_NULL; 1728 } 1729 return Ret; 1730 } 1731 1732 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1733 // Identifiers start at 2 because 0 and 1 are reserved 1734 // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants. 1735 size_t VersionId = Config->VersionDefinitions.size() + 2; 1736 Config->VersionDefinitions.push_back({VerStr, VersionId}); 1737 1738 if (skip("global:") || peek() != "local:") 1739 readGlobal(VerStr); 1740 if (skip("local:")) 1741 readLocal(); 1742 expect("}"); 1743 1744 // Each version may have a parent version. For example, "Ver2" defined as 1745 // "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" as a parent. This 1746 // version hierarchy is, probably against your instinct, purely for human; the 1747 // runtime doesn't care about them at all. In LLD, we simply skip the token. 1748 if (!VerStr.empty() && peek() != ";") 1749 next(); 1750 expect(";"); 1751 } 1752 1753 void ScriptParser::readLocal() { 1754 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1755 expect("*"); 1756 expect(";"); 1757 } 1758 1759 void ScriptParser::readExtern(std::vector<SymbolVersion> *Globals) { 1760 expect("\"C++\""); 1761 expect("{"); 1762 1763 for (;;) { 1764 if (peek() == "}" || Error) 1765 break; 1766 bool HasWildcard = !peek().startswith("\"") && hasWildcard(peek()); 1767 Globals->push_back({unquote(next()), true, HasWildcard}); 1768 expect(";"); 1769 } 1770 1771 expect("}"); 1772 expect(";"); 1773 } 1774 1775 void ScriptParser::readGlobal(StringRef VerStr) { 1776 std::vector<SymbolVersion> *Globals; 1777 if (VerStr.empty()) 1778 Globals = &Config->VersionScriptGlobals; 1779 else 1780 Globals = &Config->VersionDefinitions.back().Globals; 1781 1782 for (;;) { 1783 if (skip("extern")) 1784 readExtern(Globals); 1785 1786 StringRef Cur = peek(); 1787 if (Cur == "}" || Cur == "local:" || Error) 1788 return; 1789 next(); 1790 Globals->push_back({unquote(Cur), false, hasWildcard(Cur)}); 1791 expect(";"); 1792 } 1793 } 1794 1795 static bool isUnderSysroot(StringRef Path) { 1796 if (Config->Sysroot == "") 1797 return false; 1798 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 1799 if (sys::fs::equivalent(Config->Sysroot, Path)) 1800 return true; 1801 return false; 1802 } 1803 1804 void elf::readLinkerScript(MemoryBufferRef MB) { 1805 StringRef Path = MB.getBufferIdentifier(); 1806 ScriptParser(MB.getBuffer(), isUnderSysroot(Path)).readLinkerScript(); 1807 } 1808 1809 void elf::readVersionScript(MemoryBufferRef MB) { 1810 ScriptParser(MB.getBuffer(), false).readVersionScript(); 1811 } 1812 1813 template class elf::LinkerScript<ELF32LE>; 1814 template class elf::LinkerScript<ELF32BE>; 1815 template class elf::LinkerScript<ELF64LE>; 1816 template class elf::LinkerScript<ELF64BE>; 1817