1 //===- LinkerScript.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the parser/evaluator of the linker script. 11 // It parses a linker script and write the result to Config or ScriptConfig 12 // objects. 13 // 14 // If SECTIONS command is used, a ScriptConfig contains an AST 15 // of the command which will later be consumed by createSections() and 16 // assignAddresses(). 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "LinkerScript.h" 21 #include "Config.h" 22 #include "Driver.h" 23 #include "InputSection.h" 24 #include "OutputSections.h" 25 #include "ScriptParser.h" 26 #include "Strings.h" 27 #include "Symbols.h" 28 #include "SymbolTable.h" 29 #include "Target.h" 30 #include "Writer.h" 31 #include "llvm/ADT/StringSwitch.h" 32 #include "llvm/Support/ELF.h" 33 #include "llvm/Support/FileSystem.h" 34 #include "llvm/Support/MemoryBuffer.h" 35 #include "llvm/Support/Path.h" 36 #include "llvm/Support/StringSaver.h" 37 38 using namespace llvm; 39 using namespace llvm::ELF; 40 using namespace llvm::object; 41 using namespace llvm::support::endian; 42 using namespace lld; 43 using namespace lld::elf; 44 45 LinkerScriptBase *elf::ScriptBase; 46 ScriptConfiguration *elf::ScriptConfig; 47 48 template <class ELFT> static void addRegular(SymbolAssignment *Cmd) { 49 Symbol *Sym = Symtab<ELFT>::X->addRegular(Cmd->Name, STB_GLOBAL, STV_DEFAULT); 50 Sym->Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 51 Cmd->Sym = Sym->body(); 52 53 // If we have no SECTIONS then we don't have '.' and don't call 54 // assignAddresses(). We calculate symbol value immediately in this case. 55 if (!ScriptConfig->HasSections) 56 cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(0); 57 } 58 59 template <class ELFT> static void addSynthetic(SymbolAssignment *Cmd) { 60 Symbol *Sym = Symtab<ELFT>::X->addSynthetic( 61 Cmd->Name, nullptr, 0, Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT); 62 Cmd->Sym = Sym->body(); 63 } 64 65 template <class ELFT> static void addSymbol(SymbolAssignment *Cmd) { 66 if (Cmd->IsAbsolute) 67 addRegular<ELFT>(Cmd); 68 else 69 addSynthetic<ELFT>(Cmd); 70 } 71 // If a symbol was in PROVIDE(), we need to define it only when 72 // it is an undefined symbol. 73 template <class ELFT> static bool shouldDefine(SymbolAssignment *Cmd) { 74 if (Cmd->Name == ".") 75 return false; 76 if (!Cmd->Provide) 77 return true; 78 SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name); 79 return B && B->isUndefined(); 80 } 81 82 bool SymbolAssignment::classof(const BaseCommand *C) { 83 return C->Kind == AssignmentKind; 84 } 85 86 bool OutputSectionCommand::classof(const BaseCommand *C) { 87 return C->Kind == OutputSectionKind; 88 } 89 90 bool InputSectionDescription::classof(const BaseCommand *C) { 91 return C->Kind == InputSectionKind; 92 } 93 94 bool AssertCommand::classof(const BaseCommand *C) { 95 return C->Kind == AssertKind; 96 } 97 98 bool BytesDataCommand::classof(const BaseCommand *C) { 99 return C->Kind == BytesDataKind; 100 } 101 102 template <class ELFT> static bool isDiscarded(InputSectionBase<ELFT> *S) { 103 return !S || !S->Live; 104 } 105 106 template <class ELFT> LinkerScript<ELFT>::LinkerScript() {} 107 template <class ELFT> LinkerScript<ELFT>::~LinkerScript() {} 108 109 template <class ELFT> 110 bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { 111 for (InputSectionDescription *ID : Opt.KeptSections) { 112 StringRef Filename = S->getFile()->getName(); 113 if (!ID->FileRe.match(sys::path::filename(Filename))) 114 continue; 115 116 for (SectionPattern &P : ID->SectionPatterns) 117 if (P.SectionRe.match(S->Name)) 118 return true; 119 } 120 return false; 121 } 122 123 static bool comparePriority(InputSectionData *A, InputSectionData *B) { 124 return getPriority(A->Name) < getPriority(B->Name); 125 } 126 127 static bool compareName(InputSectionData *A, InputSectionData *B) { 128 return A->Name < B->Name; 129 } 130 131 static bool compareAlignment(InputSectionData *A, InputSectionData *B) { 132 // ">" is not a mistake. Larger alignments are placed before smaller 133 // alignments in order to reduce the amount of padding necessary. 134 // This is compatible with GNU. 135 return A->Alignment > B->Alignment; 136 } 137 138 static std::function<bool(InputSectionData *, InputSectionData *)> 139 getComparator(SortSectionPolicy K) { 140 switch (K) { 141 case SortSectionPolicy::Alignment: 142 return compareAlignment; 143 case SortSectionPolicy::Name: 144 return compareName; 145 case SortSectionPolicy::Priority: 146 return comparePriority; 147 default: 148 llvm_unreachable("unknown sort policy"); 149 } 150 } 151 152 template <class ELFT> 153 static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections, 154 ConstraintKind Kind) { 155 if (Kind == ConstraintKind::NoConstraint) 156 return true; 157 bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) { 158 auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2); 159 return Sec->getSectionHdr()->sh_flags & SHF_WRITE; 160 }); 161 return (IsRW && Kind == ConstraintKind::ReadWrite) || 162 (!IsRW && Kind == ConstraintKind::ReadOnly); 163 } 164 165 static void sortSections(InputSectionData **Begin, InputSectionData **End, 166 SortSectionPolicy K) { 167 if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) 168 std::stable_sort(Begin, End, getComparator(K)); 169 } 170 171 // Compute and remember which sections the InputSectionDescription matches. 172 template <class ELFT> 173 void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { 174 // Collects all sections that satisfy constraints of I 175 // and attach them to I. 176 for (SectionPattern &Pat : I->SectionPatterns) { 177 size_t SizeBefore = I->Sections.size(); 178 for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) { 179 StringRef Filename = sys::path::filename(F->getName()); 180 if (!I->FileRe.match(Filename) || Pat.ExcludedFileRe.match(Filename)) 181 continue; 182 183 for (InputSectionBase<ELFT> *S : F->getSections()) 184 if (!isDiscarded(S) && !S->OutSec && Pat.SectionRe.match(S->Name)) 185 I->Sections.push_back(S); 186 if (Pat.SectionRe.match("COMMON")) 187 I->Sections.push_back(CommonInputSection<ELFT>::X); 188 } 189 190 // Sort sections as instructed by SORT-family commands and --sort-section 191 // option. Because SORT-family commands can be nested at most two depth 192 // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command 193 // line option is respected even if a SORT command is given, the exact 194 // behavior we have here is a bit complicated. Here are the rules. 195 // 196 // 1. If two SORT commands are given, --sort-section is ignored. 197 // 2. If one SORT command is given, and if it is not SORT_NONE, 198 // --sort-section is handled as an inner SORT command. 199 // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. 200 // 4. If no SORT command is given, sort according to --sort-section. 201 InputSectionData **Begin = I->Sections.data() + SizeBefore; 202 InputSectionData **End = I->Sections.data() + I->Sections.size(); 203 if (Pat.SortOuter != SortSectionPolicy::None) { 204 if (Pat.SortInner == SortSectionPolicy::Default) 205 sortSections(Begin, End, Config->SortSection); 206 else 207 sortSections(Begin, End, Pat.SortInner); 208 sortSections(Begin, End, Pat.SortOuter); 209 } 210 } 211 212 // We do not add duplicate input sections, so mark them with a dummy output 213 // section for now. 214 for (InputSectionData *S : I->Sections) { 215 auto *S2 = static_cast<InputSectionBase<ELFT> *>(S); 216 S2->OutSec = (OutputSectionBase<ELFT> *)-1; 217 } 218 } 219 220 template <class ELFT> 221 void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) { 222 for (InputSectionBase<ELFT> *S : V) { 223 S->Live = false; 224 reportDiscarded(S); 225 } 226 } 227 228 template <class ELFT> 229 std::vector<InputSectionBase<ELFT> *> 230 LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) { 231 std::vector<InputSectionBase<ELFT> *> Ret; 232 233 for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { 234 auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); 235 if (!Cmd) 236 continue; 237 computeInputSections(Cmd); 238 for (InputSectionData *S : Cmd->Sections) 239 Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S)); 240 } 241 242 // After we created final list we should now set OutSec pointer to null, 243 // instead of -1. Otherwise we may get a crash when writing relocs, in 244 // case section is discarded by linker script 245 for (InputSectionBase<ELFT> *S : Ret) 246 S->OutSec = nullptr; 247 248 return Ret; 249 } 250 251 template <class ELFT> 252 static SectionKey<ELFT::Is64Bits> createKey(InputSectionBase<ELFT> *C, 253 StringRef OutsecName) { 254 // When using linker script the merge rules are different. 255 // Unfortunately, linker scripts are name based. This means that expressions 256 // like *(.foo*) can refer to multiple input sections that would normally be 257 // placed in different output sections. We cannot put them in different 258 // output sections or we would produce wrong results for 259 // start = .; *(.foo.*) end = .; *(.bar) 260 // and a mapping of .foo1 and .bar1 to one section and .foo2 and .bar2 to 261 // another. The problem is that there is no way to layout those output 262 // sections such that the .foo sections are the only thing between the 263 // start and end symbols. 264 265 // An extra annoyance is that we cannot simply disable merging of the contents 266 // of SHF_MERGE sections, but our implementation requires one output section 267 // per "kind" (string or not, which size/aligment). 268 // Fortunately, creating symbols in the middle of a merge section is not 269 // supported by bfd or gold, so we can just create multiple section in that 270 // case. 271 const typename ELFT::Shdr *H = C->getSectionHdr(); 272 typedef typename ELFT::uint uintX_t; 273 uintX_t Flags = H->sh_flags & (SHF_MERGE | SHF_STRINGS); 274 275 uintX_t Alignment = 0; 276 if (isa<MergeInputSection<ELFT>>(C)) 277 Alignment = std::max(H->sh_addralign, H->sh_entsize); 278 279 return SectionKey<ELFT::Is64Bits>{OutsecName, /*Type*/ 0, Flags, Alignment}; 280 } 281 282 template <class ELFT> 283 void LinkerScript<ELFT>::addSection(OutputSectionFactory<ELFT> &Factory, 284 InputSectionBase<ELFT> *Sec, 285 StringRef Name) { 286 OutputSectionBase<ELFT> *OutSec; 287 bool IsNew; 288 std::tie(OutSec, IsNew) = Factory.create(createKey(Sec, Name), Sec); 289 if (IsNew) 290 OutputSections->push_back(OutSec); 291 OutSec->addSection(Sec); 292 } 293 294 template <class ELFT> 295 void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) { 296 297 for (unsigned I = 0; I < Opt.Commands.size(); ++I) { 298 auto Iter = Opt.Commands.begin() + I; 299 const std::unique_ptr<BaseCommand> &Base1 = *Iter; 300 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { 301 if (shouldDefine<ELFT>(Cmd)) 302 addRegular<ELFT>(Cmd); 303 continue; 304 } 305 if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) { 306 // If we don't have SECTIONS then output sections have already been 307 // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses 308 // will not be called, so ASSERT should be evaluated now. 309 if (!Opt.HasSections) 310 Cmd->Expression(0); 311 continue; 312 } 313 314 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { 315 std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd); 316 317 if (Cmd->Name == "/DISCARD/") { 318 discard(V); 319 continue; 320 } 321 322 if (!matchConstraints<ELFT>(V, Cmd->Constraint)) { 323 for (InputSectionBase<ELFT> *S : V) 324 S->OutSec = nullptr; 325 Opt.Commands.erase(Iter); 326 --I; 327 continue; 328 } 329 330 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 331 if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) 332 if (shouldDefine<ELFT>(OutCmd)) 333 addSymbol<ELFT>(OutCmd); 334 335 if (V.empty()) 336 continue; 337 338 for (InputSectionBase<ELFT> *Sec : V) { 339 addSection(Factory, Sec, Cmd->Name); 340 if (uint32_t Subalign = Cmd->SubalignExpr ? Cmd->SubalignExpr(0) : 0) 341 Sec->Alignment = Subalign; 342 } 343 } 344 } 345 } 346 347 template <class ELFT> 348 void LinkerScript<ELFT>::createSections(OutputSectionFactory<ELFT> &Factory) { 349 processCommands(Factory); 350 // Add orphan sections. 351 for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) 352 for (InputSectionBase<ELFT> *S : F->getSections()) 353 if (!isDiscarded(S) && !S->OutSec) 354 addSection(Factory, S, getOutputSectionName(S->Name, Opt.Alloc)); 355 } 356 357 // Sets value of a section-defined symbol. Two kinds of 358 // symbols are processed: synthetic symbols, whose value 359 // is an offset from beginning of section and regular 360 // symbols whose value is absolute. 361 template <class ELFT> 362 static void assignSectionSymbol(SymbolAssignment *Cmd, 363 OutputSectionBase<ELFT> *Sec, 364 typename ELFT::uint Off) { 365 if (!Cmd->Sym) 366 return; 367 368 if (auto *Body = dyn_cast<DefinedSynthetic<ELFT>>(Cmd->Sym)) { 369 Body->Section = Sec; 370 Body->Value = Cmd->Expression(Sec->getVA() + Off) - Sec->getVA(); 371 return; 372 } 373 auto *Body = cast<DefinedRegular<ELFT>>(Cmd->Sym); 374 Body->Value = Cmd->Expression(Sec->getVA() + Off); 375 } 376 377 template <class ELFT> static bool isTbss(OutputSectionBase<ELFT> *Sec) { 378 return (Sec->getFlags() & SHF_TLS) && Sec->getType() == SHT_NOBITS; 379 } 380 381 template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) { 382 if (!AlreadyOutputIS.insert(S).second) 383 return; 384 bool IsTbss = isTbss(CurOutSec); 385 386 uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; 387 Pos = alignTo(Pos, S->Alignment); 388 S->OutSecOff = Pos - CurOutSec->getVA(); 389 Pos += S->getSize(); 390 391 // Update output section size after adding each section. This is so that 392 // SIZEOF works correctly in the case below: 393 // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } 394 CurOutSec->setSize(Pos - CurOutSec->getVA()); 395 396 if (IsTbss) 397 ThreadBssOffset = Pos - Dot; 398 else 399 Dot = Pos; 400 } 401 402 template <class ELFT> void LinkerScript<ELFT>::flush() { 403 if (!CurOutSec || !AlreadyOutputOS.insert(CurOutSec).second) 404 return; 405 if (auto *OutSec = dyn_cast<OutputSection<ELFT>>(CurOutSec)) { 406 for (InputSection<ELFT> *I : OutSec->Sections) 407 output(I); 408 } else { 409 Dot += CurOutSec->getSize(); 410 } 411 } 412 413 template <class ELFT> 414 void LinkerScript<ELFT>::switchTo(OutputSectionBase<ELFT> *Sec) { 415 if (CurOutSec == Sec) 416 return; 417 if (AlreadyOutputOS.count(Sec)) 418 return; 419 420 flush(); 421 CurOutSec = Sec; 422 423 Dot = alignTo(Dot, CurOutSec->getAlignment()); 424 CurOutSec->setVA(isTbss(CurOutSec) ? Dot + ThreadBssOffset : Dot); 425 426 // If neither AT nor AT> is specified for an allocatable section, the linker 427 // will set the LMA such that the difference between VMA and LMA for the 428 // section is the same as the preceding output section in the same region 429 // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html 430 CurOutSec->setLMAOffset(LMAOffset); 431 } 432 433 template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) { 434 // This handles the assignments to symbol or to a location counter (.) 435 if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { 436 if (AssignCmd->Name == ".") { 437 // Update to location counter means update to section size. 438 Dot = AssignCmd->Expression(Dot); 439 CurOutSec->setSize(Dot - CurOutSec->getVA()); 440 return; 441 } 442 assignSectionSymbol<ELFT>(AssignCmd, CurOutSec, Dot - CurOutSec->getVA()); 443 return; 444 } 445 446 // Handle BYTE(), SHORT(), LONG(), or QUAD(). 447 if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) { 448 DataCmd->Offset = Dot - CurOutSec->getVA(); 449 Dot += DataCmd->Size; 450 CurOutSec->setSize(Dot - CurOutSec->getVA()); 451 return; 452 } 453 454 // It handles single input section description command, 455 // calculates and assigns the offsets for each section and also 456 // updates the output section size. 457 auto &ICmd = cast<InputSectionDescription>(Base); 458 for (InputSectionData *ID : ICmd.Sections) { 459 auto *IB = static_cast<InputSectionBase<ELFT> *>(ID); 460 switchTo(IB->OutSec); 461 if (auto *I = dyn_cast<InputSection<ELFT>>(IB)) 462 output(I); 463 else 464 flush(); 465 } 466 } 467 468 template <class ELFT> 469 static std::vector<OutputSectionBase<ELFT> *> 470 findSections(StringRef Name, 471 const std::vector<OutputSectionBase<ELFT> *> &Sections) { 472 std::vector<OutputSectionBase<ELFT> *> Ret; 473 for (OutputSectionBase<ELFT> *Sec : Sections) 474 if (Sec->getName() == Name) 475 Ret.push_back(Sec); 476 return Ret; 477 } 478 479 template <class ELFT> 480 void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) { 481 if (Cmd->LMAExpr) 482 LMAOffset = Cmd->LMAExpr(Dot) - Dot; 483 std::vector<OutputSectionBase<ELFT> *> Sections = 484 findSections(Cmd->Name, *OutputSections); 485 if (Sections.empty()) 486 return; 487 switchTo(Sections[0]); 488 // Find the last section output location. We will output orphan sections 489 // there so that end symbols point to the correct location. 490 auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), 491 [](const std::unique_ptr<BaseCommand> &Cmd) { 492 return !isa<SymbolAssignment>(*Cmd); 493 }) 494 .base(); 495 for (auto I = Cmd->Commands.begin(); I != E; ++I) 496 process(**I); 497 for (OutputSectionBase<ELFT> *Base : Sections) 498 switchTo(Base); 499 flush(); 500 std::for_each(E, Cmd->Commands.end(), 501 [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); 502 } 503 504 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() { 505 // It is common practice to use very generic linker scripts. So for any 506 // given run some of the output sections in the script will be empty. 507 // We could create corresponding empty output sections, but that would 508 // clutter the output. 509 // We instead remove trivially empty sections. The bfd linker seems even 510 // more aggressive at removing them. 511 auto Pos = std::remove_if( 512 Opt.Commands.begin(), Opt.Commands.end(), 513 [&](const std::unique_ptr<BaseCommand> &Base) { 514 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 515 if (!Cmd) 516 return false; 517 std::vector<OutputSectionBase<ELFT> *> Secs = 518 findSections(Cmd->Name, *OutputSections); 519 if (!Secs.empty()) 520 return false; 521 for (const std::unique_ptr<BaseCommand> &I : Cmd->Commands) 522 if (!isa<InputSectionDescription>(I.get())) 523 return false; 524 return true; 525 }); 526 Opt.Commands.erase(Pos, Opt.Commands.end()); 527 528 // If the output section contains only symbol assignments, create a 529 // corresponding output section. The bfd linker seems to only create them if 530 // '.' is assigned to, but creating these section should not have any bad 531 // consequeces and gives us a section to put the symbol in. 532 uintX_t Flags = SHF_ALLOC; 533 uint32_t Type = 0; 534 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 535 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 536 if (!Cmd) 537 continue; 538 std::vector<OutputSectionBase<ELFT> *> Secs = 539 findSections(Cmd->Name, *OutputSections); 540 if (!Secs.empty()) { 541 Flags = Secs[0]->getFlags(); 542 Type = Secs[0]->getType(); 543 continue; 544 } 545 546 auto *OutSec = new OutputSection<ELFT>(Cmd->Name, Type, Flags); 547 Out<ELFT>::Pool.emplace_back(OutSec); 548 OutputSections->push_back(OutSec); 549 } 550 } 551 552 // When placing orphan sections, we want to place them after symbol assignments 553 // so that an orphan after 554 // begin_foo = .; 555 // foo : { *(foo) } 556 // end_foo = .; 557 // doesn't break the intended meaning of the begin/end symbols. 558 // We don't want to go over sections since Writer<ELFT>::sortSections is the 559 // one in charge of deciding the order of the sections. 560 // We don't want to go over alignments, since doing so in 561 // rx_sec : { *(rx_sec) } 562 // . = ALIGN(0x1000); 563 // /* The RW PT_LOAD starts here*/ 564 // rw_sec : { *(rw_sec) } 565 // would mean that the RW PT_LOAD would become unaligned. 566 static bool shouldSkip(const BaseCommand &Cmd) { 567 if (isa<OutputSectionCommand>(Cmd)) 568 return false; 569 const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); 570 if (!Assign) 571 return true; 572 return Assign->Name != "."; 573 } 574 575 template <class ELFT> 576 void LinkerScript<ELFT>::assignAddresses(std::vector<PhdrEntry<ELFT>> &Phdrs) { 577 // Orphan sections are sections present in the input files which 578 // are not explicitly placed into the output file by the linker script. 579 // We place orphan sections at end of file. 580 // Other linkers places them using some heuristics as described in 581 // https://sourceware.org/binutils/docs/ld/Orphan-Sections.html#Orphan-Sections. 582 583 // The OutputSections are already in the correct order. 584 // This loops creates or moves commands as needed so that they are in the 585 // correct order. 586 int CmdIndex = 0; 587 for (OutputSectionBase<ELFT> *Sec : *OutputSections) { 588 StringRef Name = Sec->getName(); 589 590 // Find the last spot where we can insert a command and still get the 591 // correct result. 592 auto CmdIter = Opt.Commands.begin() + CmdIndex; 593 auto E = Opt.Commands.end(); 594 while (CmdIter != E && shouldSkip(**CmdIter)) { 595 ++CmdIter; 596 ++CmdIndex; 597 } 598 599 auto Pos = 600 std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { 601 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 602 return Cmd && Cmd->Name == Name; 603 }); 604 if (Pos == E) { 605 Opt.Commands.insert(CmdIter, 606 llvm::make_unique<OutputSectionCommand>(Name)); 607 ++CmdIndex; 608 continue; 609 } 610 611 // Continue from where we found it. 612 CmdIndex = (Pos - Opt.Commands.begin()) + 1; 613 continue; 614 } 615 616 // Assign addresses as instructed by linker script SECTIONS sub-commands. 617 Dot = 0; 618 619 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 620 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { 621 if (Cmd->Name == ".") { 622 Dot = Cmd->Expression(Dot); 623 } else if (Cmd->Sym) { 624 cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(Dot); 625 } 626 continue; 627 } 628 629 if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { 630 Cmd->Expression(Dot); 631 continue; 632 } 633 634 auto *Cmd = cast<OutputSectionCommand>(Base.get()); 635 636 if (Cmd->AddrExpr) 637 Dot = Cmd->AddrExpr(Dot); 638 639 assignOffsets(Cmd); 640 } 641 642 uintX_t MinVA = std::numeric_limits<uintX_t>::max(); 643 for (OutputSectionBase<ELFT> *Sec : *OutputSections) { 644 if (Sec->getFlags() & SHF_ALLOC) 645 MinVA = std::min(MinVA, Sec->getVA()); 646 else 647 Sec->setVA(0); 648 } 649 650 uintX_t HeaderSize = getHeaderSize(); 651 auto FirstPTLoad = 652 std::find_if(Phdrs.begin(), Phdrs.end(), [](const PhdrEntry<ELFT> &E) { 653 return E.H.p_type == PT_LOAD; 654 }); 655 656 if (HeaderSize <= MinVA && FirstPTLoad != Phdrs.end()) { 657 // If linker script specifies program headers and first PT_LOAD doesn't 658 // have both PHDRS and FILEHDR attributes then do nothing 659 if (!Opt.PhdrsCommands.empty()) { 660 size_t SegNum = std::distance(Phdrs.begin(), FirstPTLoad); 661 if (!Opt.PhdrsCommands[SegNum].HasPhdrs || 662 !Opt.PhdrsCommands[SegNum].HasFilehdr) 663 return; 664 } 665 // ELF and Program headers need to be right before the first section in 666 // memory. Set their addresses accordingly. 667 MinVA = alignDown(MinVA - HeaderSize, Target->PageSize); 668 Out<ELFT>::ElfHeader->setVA(MinVA); 669 Out<ELFT>::ProgramHeaders->setVA(Out<ELFT>::ElfHeader->getSize() + MinVA); 670 FirstPTLoad->First = Out<ELFT>::ElfHeader; 671 if (!FirstPTLoad->Last) 672 FirstPTLoad->Last = Out<ELFT>::ProgramHeaders; 673 } else if (!FirstPTLoad->First) { 674 // Sometimes the very first PT_LOAD segment can be empty. 675 // This happens if (all conditions met): 676 // - Linker script is used 677 // - First section in ELF image is not RO 678 // - Not enough space for program headers. 679 // The code below removes empty PT_LOAD segment and updates 680 // program headers size. 681 Phdrs.erase(FirstPTLoad); 682 Out<ELFT>::ProgramHeaders->setSize(sizeof(typename ELFT::Phdr) * 683 Phdrs.size()); 684 } 685 } 686 687 // Creates program headers as instructed by PHDRS linker script command. 688 template <class ELFT> 689 std::vector<PhdrEntry<ELFT>> LinkerScript<ELFT>::createPhdrs() { 690 std::vector<PhdrEntry<ELFT>> Ret; 691 692 // Process PHDRS and FILEHDR keywords because they are not 693 // real output sections and cannot be added in the following loop. 694 std::vector<size_t> DefPhdrIds; 695 for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) { 696 Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags); 697 PhdrEntry<ELFT> &Phdr = Ret.back(); 698 699 if (Cmd.HasFilehdr) 700 Phdr.add(Out<ELFT>::ElfHeader); 701 if (Cmd.HasPhdrs) 702 Phdr.add(Out<ELFT>::ProgramHeaders); 703 704 if (Cmd.LMAExpr) { 705 Phdr.H.p_paddr = Cmd.LMAExpr(0); 706 Phdr.HasLMA = true; 707 } 708 709 // If output section command doesn't specify any segments, 710 // and we haven't previously assigned any section to segment, 711 // then we simply assign section to the very first load segment. 712 // Below is an example of such linker script: 713 // PHDRS { seg PT_LOAD; } 714 // SECTIONS { .aaa : { *(.aaa) } } 715 if (DefPhdrIds.empty() && Phdr.H.p_type == PT_LOAD) 716 DefPhdrIds.push_back(Ret.size() - 1); 717 } 718 719 // Add output sections to program headers. 720 for (OutputSectionBase<ELFT> *Sec : *OutputSections) { 721 if (!(Sec->getFlags() & SHF_ALLOC)) 722 break; 723 724 std::vector<size_t> PhdrIds = getPhdrIndices(Sec->getName()); 725 if (PhdrIds.empty()) 726 PhdrIds = std::move(DefPhdrIds); 727 728 // Assign headers specified by linker script 729 for (size_t Id : PhdrIds) { 730 Ret[Id].add(Sec); 731 if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) 732 Ret[Id].H.p_flags |= Sec->getPhdrFlags(); 733 } 734 DefPhdrIds = std::move(PhdrIds); 735 } 736 return Ret; 737 } 738 739 template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() { 740 // Ignore .interp section in case we have PHDRS specification 741 // and PT_INTERP isn't listed. 742 return !Opt.PhdrsCommands.empty() && 743 llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { 744 return Cmd.Type == PT_INTERP; 745 }) == Opt.PhdrsCommands.end(); 746 } 747 748 template <class ELFT> 749 ArrayRef<uint8_t> LinkerScript<ELFT>::getFiller(StringRef Name) { 750 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 751 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 752 if (Cmd->Name == Name) 753 return Cmd->Filler; 754 return {}; 755 } 756 757 template <class ELFT> 758 static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { 759 const endianness E = ELFT::TargetEndianness; 760 761 switch (Size) { 762 case 1: 763 *Buf = (uint8_t)Data; 764 break; 765 case 2: 766 write16<E>(Buf, Data); 767 break; 768 case 4: 769 write32<E>(Buf, Data); 770 break; 771 case 8: 772 write64<E>(Buf, Data); 773 break; 774 default: 775 llvm_unreachable("unsupported Size argument"); 776 } 777 } 778 779 template <class ELFT> 780 void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) { 781 int I = getSectionIndex(Name); 782 if (I == INT_MAX) 783 return; 784 785 OutputSectionCommand *Cmd = 786 dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()); 787 for (const std::unique_ptr<BaseCommand> &Base2 : Cmd->Commands) 788 if (auto *DataCmd = dyn_cast<BytesDataCommand>(Base2.get())) 789 writeInt<ELFT>(&Buf[DataCmd->Offset], DataCmd->Data, DataCmd->Size); 790 } 791 792 template <class ELFT> bool LinkerScript<ELFT>::hasLMA(StringRef Name) { 793 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 794 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 795 if (Cmd->LMAExpr && Cmd->Name == Name) 796 return true; 797 return false; 798 } 799 800 // Returns the index of the given section name in linker script 801 // SECTIONS commands. Sections are laid out as the same order as they 802 // were in the script. If a given name did not appear in the script, 803 // it returns INT_MAX, so that it will be laid out at end of file. 804 template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) { 805 int I = 0; 806 for (std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 807 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 808 if (Cmd->Name == Name) 809 return I; 810 ++I; 811 } 812 return INT_MAX; 813 } 814 815 template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() { 816 return !Opt.PhdrsCommands.empty(); 817 } 818 819 template <class ELFT> 820 uint64_t LinkerScript<ELFT>::getOutputSectionAddress(StringRef Name) { 821 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 822 if (Sec->getName() == Name) 823 return Sec->getVA(); 824 error("undefined section " + Name); 825 return 0; 826 } 827 828 template <class ELFT> 829 uint64_t LinkerScript<ELFT>::getOutputSectionLMA(StringRef Name) { 830 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 831 if (Sec->getName() == Name) 832 return Sec->getLMA(); 833 error("undefined section " + Name); 834 return 0; 835 } 836 837 template <class ELFT> 838 uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) { 839 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 840 if (Sec->getName() == Name) 841 return Sec->getSize(); 842 error("undefined section " + Name); 843 return 0; 844 } 845 846 template <class ELFT> 847 uint64_t LinkerScript<ELFT>::getOutputSectionAlign(StringRef Name) { 848 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 849 if (Sec->getName() == Name) 850 return Sec->getAlignment(); 851 error("undefined section " + Name); 852 return 0; 853 } 854 855 template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() { 856 return elf::getHeaderSize<ELFT>(); 857 } 858 859 template <class ELFT> uint64_t LinkerScript<ELFT>::getSymbolValue(StringRef S) { 860 if (SymbolBody *B = Symtab<ELFT>::X->find(S)) 861 return B->getVA<ELFT>(); 862 error("symbol not found: " + S); 863 return 0; 864 } 865 866 template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) { 867 return Symtab<ELFT>::X->find(S) != nullptr; 868 } 869 870 // Returns indices of ELF headers containing specific section, identified 871 // by Name. Each index is a zero based number of ELF header listed within 872 // PHDRS {} script block. 873 template <class ELFT> 874 std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { 875 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 876 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 877 if (!Cmd || Cmd->Name != SectionName) 878 continue; 879 880 std::vector<size_t> Ret; 881 for (StringRef PhdrName : Cmd->Phdrs) 882 Ret.push_back(getPhdrIndex(PhdrName)); 883 return Ret; 884 } 885 return {}; 886 } 887 888 template <class ELFT> 889 size_t LinkerScript<ELFT>::getPhdrIndex(StringRef PhdrName) { 890 size_t I = 0; 891 for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { 892 if (Cmd.Name == PhdrName) 893 return I; 894 ++I; 895 } 896 error("section header '" + PhdrName + "' is not listed in PHDRS"); 897 return 0; 898 } 899 900 class elf::ScriptParser : public ScriptParserBase { 901 typedef void (ScriptParser::*Handler)(); 902 903 public: 904 ScriptParser(StringRef S, bool B) : ScriptParserBase(S), IsUnderSysroot(B) {} 905 906 void readLinkerScript(); 907 void readVersionScript(); 908 909 private: 910 void addFile(StringRef Path); 911 912 void readAsNeeded(); 913 void readEntry(); 914 void readExtern(); 915 void readGroup(); 916 void readInclude(); 917 void readOutput(); 918 void readOutputArch(); 919 void readOutputFormat(); 920 void readPhdrs(); 921 void readSearchDir(); 922 void readSections(); 923 void readVersion(); 924 void readVersionScriptCommand(); 925 926 SymbolAssignment *readAssignment(StringRef Name); 927 BytesDataCommand *readBytesDataCommand(StringRef Tok); 928 std::vector<uint8_t> readFill(); 929 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 930 std::vector<uint8_t> readOutputSectionFiller(StringRef Tok); 931 std::vector<StringRef> readOutputSectionPhdrs(); 932 InputSectionDescription *readInputSectionDescription(StringRef Tok); 933 Regex readFilePatterns(); 934 std::vector<SectionPattern> readInputSectionsList(); 935 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 936 unsigned readPhdrType(); 937 SortSectionPolicy readSortKind(); 938 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 939 SymbolAssignment *readProvideOrAssignment(StringRef Tok, bool MakeAbsolute); 940 void readSort(); 941 Expr readAssert(); 942 943 Expr readExpr(); 944 Expr readExpr1(Expr Lhs, int MinPrec); 945 StringRef readParenLiteral(); 946 Expr readPrimary(); 947 Expr readTernary(Expr Cond); 948 Expr readParenExpr(); 949 950 // For parsing version script. 951 void readExtern(std::vector<SymbolVersion> *Globals); 952 void readVersionDeclaration(StringRef VerStr); 953 void readGlobal(StringRef VerStr); 954 void readLocal(); 955 956 ScriptConfiguration &Opt = *ScriptConfig; 957 StringSaver Saver = {ScriptConfig->Alloc}; 958 bool IsUnderSysroot; 959 }; 960 961 void ScriptParser::readVersionScript() { 962 readVersionScriptCommand(); 963 if (!atEOF()) 964 setError("EOF expected, but got " + next()); 965 } 966 967 void ScriptParser::readVersionScriptCommand() { 968 if (consume("{")) { 969 readVersionDeclaration(""); 970 return; 971 } 972 973 while (!atEOF() && !Error && peek() != "}") { 974 StringRef VerStr = next(); 975 if (VerStr == "{") { 976 setError("anonymous version definition is used in " 977 "combination with other version definitions"); 978 return; 979 } 980 expect("{"); 981 readVersionDeclaration(VerStr); 982 } 983 } 984 985 void ScriptParser::readVersion() { 986 expect("{"); 987 readVersionScriptCommand(); 988 expect("}"); 989 } 990 991 void ScriptParser::readLinkerScript() { 992 while (!atEOF()) { 993 StringRef Tok = next(); 994 if (Tok == ";") 995 continue; 996 997 if (Tok == "ASSERT") { 998 Opt.Commands.emplace_back(new AssertCommand(readAssert())); 999 } else if (Tok == "ENTRY") { 1000 readEntry(); 1001 } else if (Tok == "EXTERN") { 1002 readExtern(); 1003 } else if (Tok == "GROUP" || Tok == "INPUT") { 1004 readGroup(); 1005 } else if (Tok == "INCLUDE") { 1006 readInclude(); 1007 } else if (Tok == "OUTPUT") { 1008 readOutput(); 1009 } else if (Tok == "OUTPUT_ARCH") { 1010 readOutputArch(); 1011 } else if (Tok == "OUTPUT_FORMAT") { 1012 readOutputFormat(); 1013 } else if (Tok == "PHDRS") { 1014 readPhdrs(); 1015 } else if (Tok == "SEARCH_DIR") { 1016 readSearchDir(); 1017 } else if (Tok == "SECTIONS") { 1018 readSections(); 1019 } else if (Tok == "VERSION") { 1020 readVersion(); 1021 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok, true)) { 1022 Opt.Commands.emplace_back(Cmd); 1023 } else { 1024 setError("unknown directive: " + Tok); 1025 } 1026 } 1027 } 1028 1029 void ScriptParser::addFile(StringRef S) { 1030 if (IsUnderSysroot && S.startswith("/")) { 1031 SmallString<128> PathData; 1032 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 1033 if (sys::fs::exists(Path)) { 1034 Driver->addFile(Saver.save(Path)); 1035 return; 1036 } 1037 } 1038 1039 if (sys::path::is_absolute(S)) { 1040 Driver->addFile(S); 1041 } else if (S.startswith("=")) { 1042 if (Config->Sysroot.empty()) 1043 Driver->addFile(S.substr(1)); 1044 else 1045 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); 1046 } else if (S.startswith("-l")) { 1047 Driver->addLibrary(S.substr(2)); 1048 } else if (sys::fs::exists(S)) { 1049 Driver->addFile(S); 1050 } else { 1051 std::string Path = findFromSearchPaths(S); 1052 if (Path.empty()) 1053 setError("unable to find " + S); 1054 else 1055 Driver->addFile(Saver.save(Path)); 1056 } 1057 } 1058 1059 void ScriptParser::readAsNeeded() { 1060 expect("("); 1061 bool Orig = Config->AsNeeded; 1062 Config->AsNeeded = true; 1063 while (!Error && !consume(")")) 1064 addFile(unquote(next())); 1065 Config->AsNeeded = Orig; 1066 } 1067 1068 void ScriptParser::readEntry() { 1069 // -e <symbol> takes predecence over ENTRY(<symbol>). 1070 expect("("); 1071 StringRef Tok = next(); 1072 if (Config->Entry.empty()) 1073 Config->Entry = Tok; 1074 expect(")"); 1075 } 1076 1077 void ScriptParser::readExtern() { 1078 expect("("); 1079 while (!Error && !consume(")")) 1080 Config->Undefined.push_back(next()); 1081 } 1082 1083 void ScriptParser::readGroup() { 1084 expect("("); 1085 while (!Error && !consume(")")) { 1086 StringRef Tok = next(); 1087 if (Tok == "AS_NEEDED") 1088 readAsNeeded(); 1089 else 1090 addFile(unquote(Tok)); 1091 } 1092 } 1093 1094 void ScriptParser::readInclude() { 1095 StringRef Tok = next(); 1096 auto MBOrErr = MemoryBuffer::getFile(unquote(Tok)); 1097 if (!MBOrErr) { 1098 setError("cannot open " + Tok); 1099 return; 1100 } 1101 std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; 1102 StringRef S = Saver.save(MB->getMemBufferRef().getBuffer()); 1103 std::vector<StringRef> V = tokenize(S); 1104 Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end()); 1105 } 1106 1107 void ScriptParser::readOutput() { 1108 // -o <file> takes predecence over OUTPUT(<file>). 1109 expect("("); 1110 StringRef Tok = next(); 1111 if (Config->OutputFile.empty()) 1112 Config->OutputFile = unquote(Tok); 1113 expect(")"); 1114 } 1115 1116 void ScriptParser::readOutputArch() { 1117 // Error checking only for now. 1118 expect("("); 1119 skip(); 1120 expect(")"); 1121 } 1122 1123 void ScriptParser::readOutputFormat() { 1124 // Error checking only for now. 1125 expect("("); 1126 skip(); 1127 StringRef Tok = next(); 1128 if (Tok == ")") 1129 return; 1130 if (Tok != ",") { 1131 setError("unexpected token: " + Tok); 1132 return; 1133 } 1134 skip(); 1135 expect(","); 1136 skip(); 1137 expect(")"); 1138 } 1139 1140 void ScriptParser::readPhdrs() { 1141 expect("{"); 1142 while (!Error && !consume("}")) { 1143 StringRef Tok = next(); 1144 Opt.PhdrsCommands.push_back( 1145 {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); 1146 PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back(); 1147 1148 PhdrCmd.Type = readPhdrType(); 1149 do { 1150 Tok = next(); 1151 if (Tok == ";") 1152 break; 1153 if (Tok == "FILEHDR") 1154 PhdrCmd.HasFilehdr = true; 1155 else if (Tok == "PHDRS") 1156 PhdrCmd.HasPhdrs = true; 1157 else if (Tok == "AT") 1158 PhdrCmd.LMAExpr = readParenExpr(); 1159 else if (Tok == "FLAGS") { 1160 expect("("); 1161 // Passing 0 for the value of dot is a bit of a hack. It means that 1162 // we accept expressions like ".|1". 1163 PhdrCmd.Flags = readExpr()(0); 1164 expect(")"); 1165 } else 1166 setError("unexpected header attribute: " + Tok); 1167 } while (!Error); 1168 } 1169 } 1170 1171 void ScriptParser::readSearchDir() { 1172 expect("("); 1173 StringRef Tok = next(); 1174 if (!Config->Nostdlib) 1175 Config->SearchPaths.push_back(unquote(Tok)); 1176 expect(")"); 1177 } 1178 1179 void ScriptParser::readSections() { 1180 Opt.HasSections = true; 1181 expect("{"); 1182 while (!Error && !consume("}")) { 1183 StringRef Tok = next(); 1184 BaseCommand *Cmd = readProvideOrAssignment(Tok, true); 1185 if (!Cmd) { 1186 if (Tok == "ASSERT") 1187 Cmd = new AssertCommand(readAssert()); 1188 else 1189 Cmd = readOutputSectionDescription(Tok); 1190 } 1191 Opt.Commands.emplace_back(Cmd); 1192 } 1193 } 1194 1195 static int precedence(StringRef Op) { 1196 return StringSwitch<int>(Op) 1197 .Cases("*", "/", 5) 1198 .Cases("+", "-", 4) 1199 .Cases("<<", ">>", 3) 1200 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 1201 .Cases("&", "|", 1) 1202 .Default(-1); 1203 } 1204 1205 Regex ScriptParser::readFilePatterns() { 1206 std::vector<StringRef> V; 1207 while (!Error && !consume(")")) 1208 V.push_back(next()); 1209 return compileGlobPatterns(V); 1210 } 1211 1212 SortSectionPolicy ScriptParser::readSortKind() { 1213 if (consume("SORT") || consume("SORT_BY_NAME")) 1214 return SortSectionPolicy::Name; 1215 if (consume("SORT_BY_ALIGNMENT")) 1216 return SortSectionPolicy::Alignment; 1217 if (consume("SORT_BY_INIT_PRIORITY")) 1218 return SortSectionPolicy::Priority; 1219 if (consume("SORT_NONE")) 1220 return SortSectionPolicy::None; 1221 return SortSectionPolicy::Default; 1222 } 1223 1224 // Method reads a list of sequence of excluded files and section globs given in 1225 // a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ 1226 // Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) 1227 // The semantics of that is next: 1228 // * Include .foo.1 from every file. 1229 // * Include .foo.2 from every file but a.o 1230 // * Include .foo.3 from every file but b.o 1231 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 1232 std::vector<SectionPattern> Ret; 1233 while (!Error && peek() != ")") { 1234 Regex ExcludeFileRe; 1235 if (consume("EXCLUDE_FILE")) { 1236 expect("("); 1237 ExcludeFileRe = readFilePatterns(); 1238 } 1239 1240 std::vector<StringRef> V; 1241 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 1242 V.push_back(next()); 1243 1244 if (!V.empty()) 1245 Ret.push_back({std::move(ExcludeFileRe), compileGlobPatterns(V)}); 1246 else 1247 setError("section pattern is expected"); 1248 } 1249 return Ret; 1250 } 1251 1252 // Section pattern grammar can have complex expressions, for example: 1253 // *(SORT(.foo.* EXCLUDE_FILE (*file1.o) .bar.*) .bar.* SORT(.zed.*)) 1254 // Generally is a sequence of globs and excludes that may be wrapped in a SORT() 1255 // commands, like: SORT(glob0) glob1 glob2 SORT(glob4) 1256 // This methods handles wrapping sequences of excluded files and section globs 1257 // into SORT() if that needed and reads them all. 1258 InputSectionDescription * 1259 ScriptParser::readInputSectionRules(StringRef FilePattern) { 1260 auto *Cmd = new InputSectionDescription(FilePattern); 1261 expect("("); 1262 while (!HasError && !consume(")")) { 1263 SortSectionPolicy Outer = readSortKind(); 1264 SortSectionPolicy Inner = SortSectionPolicy::Default; 1265 std::vector<SectionPattern> V; 1266 if (Outer != SortSectionPolicy::Default) { 1267 expect("("); 1268 Inner = readSortKind(); 1269 if (Inner != SortSectionPolicy::Default) { 1270 expect("("); 1271 V = readInputSectionsList(); 1272 expect(")"); 1273 } else { 1274 V = readInputSectionsList(); 1275 } 1276 expect(")"); 1277 } else { 1278 V = readInputSectionsList(); 1279 } 1280 1281 for (SectionPattern &Pat : V) { 1282 Pat.SortInner = Inner; 1283 Pat.SortOuter = Outer; 1284 } 1285 1286 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 1287 } 1288 return Cmd; 1289 } 1290 1291 InputSectionDescription * 1292 ScriptParser::readInputSectionDescription(StringRef Tok) { 1293 // Input section wildcard can be surrounded by KEEP. 1294 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 1295 if (Tok == "KEEP") { 1296 expect("("); 1297 StringRef FilePattern = next(); 1298 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 1299 expect(")"); 1300 Opt.KeptSections.push_back(Cmd); 1301 return Cmd; 1302 } 1303 return readInputSectionRules(Tok); 1304 } 1305 1306 void ScriptParser::readSort() { 1307 expect("("); 1308 expect("CONSTRUCTORS"); 1309 expect(")"); 1310 } 1311 1312 Expr ScriptParser::readAssert() { 1313 expect("("); 1314 Expr E = readExpr(); 1315 expect(","); 1316 StringRef Msg = unquote(next()); 1317 expect(")"); 1318 return [=](uint64_t Dot) { 1319 uint64_t V = E(Dot); 1320 if (!V) 1321 error(Msg); 1322 return V; 1323 }; 1324 } 1325 1326 // Reads a FILL(expr) command. We handle the FILL command as an 1327 // alias for =fillexp section attribute, which is different from 1328 // what GNU linkers do. 1329 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 1330 std::vector<uint8_t> ScriptParser::readFill() { 1331 expect("("); 1332 std::vector<uint8_t> V = readOutputSectionFiller(next()); 1333 expect(")"); 1334 expect(";"); 1335 return V; 1336 } 1337 1338 OutputSectionCommand * 1339 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 1340 OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); 1341 1342 // Read an address expression. 1343 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address 1344 if (peek() != ":") 1345 Cmd->AddrExpr = readExpr(); 1346 1347 expect(":"); 1348 1349 if (consume("AT")) 1350 Cmd->LMAExpr = readParenExpr(); 1351 if (consume("ALIGN")) 1352 Cmd->AlignExpr = readParenExpr(); 1353 if (consume("SUBALIGN")) 1354 Cmd->SubalignExpr = readParenExpr(); 1355 1356 // Parse constraints. 1357 if (consume("ONLY_IF_RO")) 1358 Cmd->Constraint = ConstraintKind::ReadOnly; 1359 if (consume("ONLY_IF_RW")) 1360 Cmd->Constraint = ConstraintKind::ReadWrite; 1361 expect("{"); 1362 1363 while (!Error && !consume("}")) { 1364 StringRef Tok = next(); 1365 if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok, false)) 1366 Cmd->Commands.emplace_back(Assignment); 1367 else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) 1368 Cmd->Commands.emplace_back(Data); 1369 else if (Tok == "FILL") 1370 Cmd->Filler = readFill(); 1371 else if (Tok == "SORT") 1372 readSort(); 1373 else if (peek() == "(") 1374 Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); 1375 else 1376 setError("unknown command " + Tok); 1377 } 1378 Cmd->Phdrs = readOutputSectionPhdrs(); 1379 1380 if (consume("=")) 1381 Cmd->Filler = readOutputSectionFiller(next()); 1382 else if (peek().startswith("=")) 1383 Cmd->Filler = readOutputSectionFiller(next().drop_front()); 1384 1385 return Cmd; 1386 } 1387 1388 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number. 1389 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 1390 // 1391 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles 1392 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them 1393 // as 32-bit big-endian values. We will do the same as ld.gold does 1394 // because it's simpler than what ld.bfd does. 1395 std::vector<uint8_t> ScriptParser::readOutputSectionFiller(StringRef Tok) { 1396 uint32_t V; 1397 if (Tok.getAsInteger(0, V)) { 1398 setError("invalid filler expression: " + Tok); 1399 return {}; 1400 } 1401 return {uint8_t(V >> 24), uint8_t(V >> 16), uint8_t(V >> 8), uint8_t(V)}; 1402 } 1403 1404 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 1405 expect("("); 1406 SymbolAssignment *Cmd = readAssignment(next()); 1407 Cmd->Provide = Provide; 1408 Cmd->Hidden = Hidden; 1409 expect(")"); 1410 expect(";"); 1411 return Cmd; 1412 } 1413 1414 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok, 1415 bool MakeAbsolute) { 1416 SymbolAssignment *Cmd = nullptr; 1417 if (peek() == "=" || peek() == "+=") { 1418 Cmd = readAssignment(Tok); 1419 expect(";"); 1420 } else if (Tok == "PROVIDE") { 1421 Cmd = readProvideHidden(true, false); 1422 } else if (Tok == "HIDDEN") { 1423 Cmd = readProvideHidden(false, true); 1424 } else if (Tok == "PROVIDE_HIDDEN") { 1425 Cmd = readProvideHidden(true, true); 1426 } 1427 if (Cmd && MakeAbsolute) 1428 Cmd->IsAbsolute = true; 1429 return Cmd; 1430 } 1431 1432 static uint64_t getSymbolValue(StringRef S, uint64_t Dot) { 1433 if (S == ".") 1434 return Dot; 1435 return ScriptBase->getSymbolValue(S); 1436 } 1437 1438 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 1439 StringRef Op = next(); 1440 bool IsAbsolute = false; 1441 Expr E; 1442 assert(Op == "=" || Op == "+="); 1443 if (consume("ABSOLUTE")) { 1444 // The RHS may be something like "ABSOLUTE(.) & 0xff". 1445 // Call readExpr1 to read the whole expression. 1446 E = readExpr1(readParenExpr(), 0); 1447 IsAbsolute = true; 1448 } else { 1449 E = readExpr(); 1450 } 1451 if (Op == "+=") 1452 E = [=](uint64_t Dot) { return getSymbolValue(Name, Dot) + E(Dot); }; 1453 return new SymbolAssignment(Name, E, IsAbsolute); 1454 } 1455 1456 // This is an operator-precedence parser to parse a linker 1457 // script expression. 1458 Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); } 1459 1460 static Expr combine(StringRef Op, Expr L, Expr R) { 1461 if (Op == "*") 1462 return [=](uint64_t Dot) { return L(Dot) * R(Dot); }; 1463 if (Op == "/") { 1464 return [=](uint64_t Dot) -> uint64_t { 1465 uint64_t RHS = R(Dot); 1466 if (RHS == 0) { 1467 error("division by zero"); 1468 return 0; 1469 } 1470 return L(Dot) / RHS; 1471 }; 1472 } 1473 if (Op == "+") 1474 return [=](uint64_t Dot) { return L(Dot) + R(Dot); }; 1475 if (Op == "-") 1476 return [=](uint64_t Dot) { return L(Dot) - R(Dot); }; 1477 if (Op == "<<") 1478 return [=](uint64_t Dot) { return L(Dot) << R(Dot); }; 1479 if (Op == ">>") 1480 return [=](uint64_t Dot) { return L(Dot) >> R(Dot); }; 1481 if (Op == "<") 1482 return [=](uint64_t Dot) { return L(Dot) < R(Dot); }; 1483 if (Op == ">") 1484 return [=](uint64_t Dot) { return L(Dot) > R(Dot); }; 1485 if (Op == ">=") 1486 return [=](uint64_t Dot) { return L(Dot) >= R(Dot); }; 1487 if (Op == "<=") 1488 return [=](uint64_t Dot) { return L(Dot) <= R(Dot); }; 1489 if (Op == "==") 1490 return [=](uint64_t Dot) { return L(Dot) == R(Dot); }; 1491 if (Op == "!=") 1492 return [=](uint64_t Dot) { return L(Dot) != R(Dot); }; 1493 if (Op == "&") 1494 return [=](uint64_t Dot) { return L(Dot) & R(Dot); }; 1495 if (Op == "|") 1496 return [=](uint64_t Dot) { return L(Dot) | R(Dot); }; 1497 llvm_unreachable("invalid operator"); 1498 } 1499 1500 // This is a part of the operator-precedence parser. This function 1501 // assumes that the remaining token stream starts with an operator. 1502 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 1503 while (!atEOF() && !Error) { 1504 // Read an operator and an expression. 1505 StringRef Op1 = peek(); 1506 if (Op1 == "?") 1507 return readTernary(Lhs); 1508 if (precedence(Op1) < MinPrec) 1509 break; 1510 skip(); 1511 Expr Rhs = readPrimary(); 1512 1513 // Evaluate the remaining part of the expression first if the 1514 // next operator has greater precedence than the previous one. 1515 // For example, if we have read "+" and "3", and if the next 1516 // operator is "*", then we'll evaluate 3 * ... part first. 1517 while (!atEOF()) { 1518 StringRef Op2 = peek(); 1519 if (precedence(Op2) <= precedence(Op1)) 1520 break; 1521 Rhs = readExpr1(Rhs, precedence(Op2)); 1522 } 1523 1524 Lhs = combine(Op1, Lhs, Rhs); 1525 } 1526 return Lhs; 1527 } 1528 1529 uint64_t static getConstant(StringRef S) { 1530 if (S == "COMMONPAGESIZE") 1531 return Target->PageSize; 1532 if (S == "MAXPAGESIZE") 1533 return Config->MaxPageSize; 1534 error("unknown constant: " + S); 1535 return 0; 1536 } 1537 1538 // Parses Tok as an integer. Returns true if successful. 1539 // It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") 1540 // and decimal numbers. Decimal numbers may have "K" (kilo) or 1541 // "M" (mega) prefixes. 1542 static bool readInteger(StringRef Tok, uint64_t &Result) { 1543 if (Tok.startswith("-")) { 1544 if (!readInteger(Tok.substr(1), Result)) 1545 return false; 1546 Result = -Result; 1547 return true; 1548 } 1549 if (Tok.startswith_lower("0x")) 1550 return !Tok.substr(2).getAsInteger(16, Result); 1551 if (Tok.endswith_lower("H")) 1552 return !Tok.drop_back().getAsInteger(16, Result); 1553 1554 int Suffix = 1; 1555 if (Tok.endswith_lower("K")) { 1556 Suffix = 1024; 1557 Tok = Tok.drop_back(); 1558 } else if (Tok.endswith_lower("M")) { 1559 Suffix = 1024 * 1024; 1560 Tok = Tok.drop_back(); 1561 } 1562 if (Tok.getAsInteger(10, Result)) 1563 return false; 1564 Result *= Suffix; 1565 return true; 1566 } 1567 1568 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 1569 int Size = StringSwitch<unsigned>(Tok) 1570 .Case("BYTE", 1) 1571 .Case("SHORT", 2) 1572 .Case("LONG", 4) 1573 .Case("QUAD", 8) 1574 .Default(-1); 1575 if (Size == -1) 1576 return nullptr; 1577 1578 expect("("); 1579 uint64_t Val = 0; 1580 StringRef S = next(); 1581 if (!readInteger(S, Val)) 1582 setError("unexpected value: " + S); 1583 expect(")"); 1584 return new BytesDataCommand(Val, Size); 1585 } 1586 1587 StringRef ScriptParser::readParenLiteral() { 1588 expect("("); 1589 StringRef Tok = next(); 1590 expect(")"); 1591 return Tok; 1592 } 1593 1594 Expr ScriptParser::readPrimary() { 1595 if (peek() == "(") 1596 return readParenExpr(); 1597 1598 StringRef Tok = next(); 1599 1600 if (Tok == "~") { 1601 Expr E = readPrimary(); 1602 return [=](uint64_t Dot) { return ~E(Dot); }; 1603 } 1604 if (Tok == "-") { 1605 Expr E = readPrimary(); 1606 return [=](uint64_t Dot) { return -E(Dot); }; 1607 } 1608 1609 // Built-in functions are parsed here. 1610 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 1611 if (Tok == "ADDR") { 1612 StringRef Name = readParenLiteral(); 1613 return 1614 [=](uint64_t Dot) { return ScriptBase->getOutputSectionAddress(Name); }; 1615 } 1616 if (Tok == "LOADADDR") { 1617 StringRef Name = readParenLiteral(); 1618 return [=](uint64_t Dot) { return ScriptBase->getOutputSectionLMA(Name); }; 1619 } 1620 if (Tok == "ASSERT") 1621 return readAssert(); 1622 if (Tok == "ALIGN") { 1623 Expr E = readParenExpr(); 1624 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1625 } 1626 if (Tok == "CONSTANT") { 1627 StringRef Name = readParenLiteral(); 1628 return [=](uint64_t Dot) { return getConstant(Name); }; 1629 } 1630 if (Tok == "DEFINED") { 1631 expect("("); 1632 StringRef Tok = next(); 1633 expect(")"); 1634 return [=](uint64_t Dot) { return ScriptBase->isDefined(Tok) ? 1 : 0; }; 1635 } 1636 if (Tok == "SEGMENT_START") { 1637 expect("("); 1638 skip(); 1639 expect(","); 1640 Expr E = readExpr(); 1641 expect(")"); 1642 return [=](uint64_t Dot) { return E(Dot); }; 1643 } 1644 if (Tok == "DATA_SEGMENT_ALIGN") { 1645 expect("("); 1646 Expr E = readExpr(); 1647 expect(","); 1648 readExpr(); 1649 expect(")"); 1650 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1651 } 1652 if (Tok == "DATA_SEGMENT_END") { 1653 expect("("); 1654 expect("."); 1655 expect(")"); 1656 return [](uint64_t Dot) { return Dot; }; 1657 } 1658 // GNU linkers implements more complicated logic to handle 1659 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to 1660 // the next page boundary for simplicity. 1661 if (Tok == "DATA_SEGMENT_RELRO_END") { 1662 expect("("); 1663 readExpr(); 1664 expect(","); 1665 readExpr(); 1666 expect(")"); 1667 return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); }; 1668 } 1669 if (Tok == "SIZEOF") { 1670 StringRef Name = readParenLiteral(); 1671 return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); }; 1672 } 1673 if (Tok == "ALIGNOF") { 1674 StringRef Name = readParenLiteral(); 1675 return 1676 [=](uint64_t Dot) { return ScriptBase->getOutputSectionAlign(Name); }; 1677 } 1678 if (Tok == "SIZEOF_HEADERS") 1679 return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); }; 1680 1681 // Tok is a literal number. 1682 uint64_t V; 1683 if (readInteger(Tok, V)) 1684 return [=](uint64_t Dot) { return V; }; 1685 1686 // Tok is a symbol name. 1687 if (Tok != "." && !isValidCIdentifier(Tok)) 1688 setError("malformed number: " + Tok); 1689 return [=](uint64_t Dot) { return getSymbolValue(Tok, Dot); }; 1690 } 1691 1692 Expr ScriptParser::readTernary(Expr Cond) { 1693 skip(); 1694 Expr L = readExpr(); 1695 expect(":"); 1696 Expr R = readExpr(); 1697 return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); }; 1698 } 1699 1700 Expr ScriptParser::readParenExpr() { 1701 expect("("); 1702 Expr E = readExpr(); 1703 expect(")"); 1704 return E; 1705 } 1706 1707 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1708 std::vector<StringRef> Phdrs; 1709 while (!Error && peek().startswith(":")) { 1710 StringRef Tok = next(); 1711 Tok = (Tok.size() == 1) ? next() : Tok.substr(1); 1712 if (Tok.empty()) { 1713 setError("section header name is empty"); 1714 break; 1715 } 1716 Phdrs.push_back(Tok); 1717 } 1718 return Phdrs; 1719 } 1720 1721 // Read a program header type name. The next token must be a 1722 // name of a program header type or a constant (e.g. "0x3"). 1723 unsigned ScriptParser::readPhdrType() { 1724 StringRef Tok = next(); 1725 uint64_t Val; 1726 if (readInteger(Tok, Val)) 1727 return Val; 1728 1729 unsigned Ret = StringSwitch<unsigned>(Tok) 1730 .Case("PT_NULL", PT_NULL) 1731 .Case("PT_LOAD", PT_LOAD) 1732 .Case("PT_DYNAMIC", PT_DYNAMIC) 1733 .Case("PT_INTERP", PT_INTERP) 1734 .Case("PT_NOTE", PT_NOTE) 1735 .Case("PT_SHLIB", PT_SHLIB) 1736 .Case("PT_PHDR", PT_PHDR) 1737 .Case("PT_TLS", PT_TLS) 1738 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1739 .Case("PT_GNU_STACK", PT_GNU_STACK) 1740 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1741 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1742 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1743 .Default(-1); 1744 1745 if (Ret == (unsigned)-1) { 1746 setError("invalid program header type: " + Tok); 1747 return PT_NULL; 1748 } 1749 return Ret; 1750 } 1751 1752 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1753 // Identifiers start at 2 because 0 and 1 are reserved 1754 // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants. 1755 size_t VersionId = Config->VersionDefinitions.size() + 2; 1756 Config->VersionDefinitions.push_back({VerStr, VersionId}); 1757 1758 if (consume("global:") || peek() != "local:") 1759 readGlobal(VerStr); 1760 if (consume("local:")) 1761 readLocal(); 1762 expect("}"); 1763 1764 // Each version may have a parent version. For example, "Ver2" defined as 1765 // "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" as a parent. This 1766 // version hierarchy is, probably against your instinct, purely for human; the 1767 // runtime doesn't care about them at all. In LLD, we simply skip the token. 1768 if (!VerStr.empty() && peek() != ";") 1769 skip(); 1770 expect(";"); 1771 } 1772 1773 void ScriptParser::readLocal() { 1774 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1775 expect("*"); 1776 expect(";"); 1777 } 1778 1779 void ScriptParser::readExtern(std::vector<SymbolVersion> *Globals) { 1780 expect("\"C++\""); 1781 expect("{"); 1782 1783 for (;;) { 1784 if (peek() == "}" || Error) 1785 break; 1786 bool HasWildcard = !peek().startswith("\"") && hasWildcard(peek()); 1787 Globals->push_back({unquote(next()), true, HasWildcard}); 1788 expect(";"); 1789 } 1790 1791 expect("}"); 1792 expect(";"); 1793 } 1794 1795 void ScriptParser::readGlobal(StringRef VerStr) { 1796 std::vector<SymbolVersion> *Globals; 1797 if (VerStr.empty()) 1798 Globals = &Config->VersionScriptGlobals; 1799 else 1800 Globals = &Config->VersionDefinitions.back().Globals; 1801 1802 for (;;) { 1803 if (consume("extern")) 1804 readExtern(Globals); 1805 1806 StringRef Cur = peek(); 1807 if (Cur == "}" || Cur == "local:" || Error) 1808 return; 1809 skip(); 1810 Globals->push_back({unquote(Cur), false, hasWildcard(Cur)}); 1811 expect(";"); 1812 } 1813 } 1814 1815 static bool isUnderSysroot(StringRef Path) { 1816 if (Config->Sysroot == "") 1817 return false; 1818 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 1819 if (sys::fs::equivalent(Config->Sysroot, Path)) 1820 return true; 1821 return false; 1822 } 1823 1824 void elf::readLinkerScript(MemoryBufferRef MB) { 1825 StringRef Path = MB.getBufferIdentifier(); 1826 ScriptParser(MB.getBuffer(), isUnderSysroot(Path)).readLinkerScript(); 1827 } 1828 1829 void elf::readVersionScript(MemoryBufferRef MB) { 1830 ScriptParser(MB.getBuffer(), false).readVersionScript(); 1831 } 1832 1833 template class elf::LinkerScript<ELF32LE>; 1834 template class elf::LinkerScript<ELF32BE>; 1835 template class elf::LinkerScript<ELF64LE>; 1836 template class elf::LinkerScript<ELF64BE>; 1837