1 //===- LinkerScript.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the parser/evaluator of the linker script. 11 // It parses a linker script and write the result to Config or ScriptConfig 12 // objects. 13 // 14 // If SECTIONS command is used, a ScriptConfig contains an AST 15 // of the command which will later be consumed by createSections() and 16 // assignAddresses(). 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "LinkerScript.h" 21 #include "Config.h" 22 #include "Driver.h" 23 #include "InputSection.h" 24 #include "OutputSections.h" 25 #include "ScriptParser.h" 26 #include "Strings.h" 27 #include "Symbols.h" 28 #include "SymbolTable.h" 29 #include "Target.h" 30 #include "Writer.h" 31 #include "llvm/ADT/StringSwitch.h" 32 #include "llvm/Support/ELF.h" 33 #include "llvm/Support/FileSystem.h" 34 #include "llvm/Support/MemoryBuffer.h" 35 #include "llvm/Support/Path.h" 36 #include "llvm/Support/StringSaver.h" 37 38 using namespace llvm; 39 using namespace llvm::ELF; 40 using namespace llvm::object; 41 using namespace llvm::support::endian; 42 using namespace lld; 43 using namespace lld::elf; 44 45 LinkerScriptBase *elf::ScriptBase; 46 ScriptConfiguration *elf::ScriptConfig; 47 48 template <class ELFT> static void addRegular(SymbolAssignment *Cmd) { 49 Symbol *Sym = Symtab<ELFT>::X->addRegular(Cmd->Name, STB_GLOBAL, STV_DEFAULT); 50 Sym->Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 51 Cmd->Sym = Sym->body(); 52 53 // If we have no SECTIONS then we don't have '.' and don't call 54 // assignAddresses(). We calculate symbol value immediately in this case. 55 if (!ScriptConfig->HasSections) 56 cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(0); 57 } 58 59 template <class ELFT> static void addSynthetic(SymbolAssignment *Cmd) { 60 Symbol *Sym = Symtab<ELFT>::X->addSynthetic( 61 Cmd->Name, nullptr, 0, Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT); 62 Cmd->Sym = Sym->body(); 63 } 64 65 template <class ELFT> static void addSymbol(SymbolAssignment *Cmd) { 66 if (Cmd->IsAbsolute) 67 addRegular<ELFT>(Cmd); 68 else 69 addSynthetic<ELFT>(Cmd); 70 } 71 // If a symbol was in PROVIDE(), we need to define it only when 72 // it is an undefined symbol. 73 template <class ELFT> static bool shouldDefine(SymbolAssignment *Cmd) { 74 if (Cmd->Name == ".") 75 return false; 76 if (!Cmd->Provide) 77 return true; 78 SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name); 79 return B && B->isUndefined(); 80 } 81 82 bool SymbolAssignment::classof(const BaseCommand *C) { 83 return C->Kind == AssignmentKind; 84 } 85 86 bool OutputSectionCommand::classof(const BaseCommand *C) { 87 return C->Kind == OutputSectionKind; 88 } 89 90 bool InputSectionDescription::classof(const BaseCommand *C) { 91 return C->Kind == InputSectionKind; 92 } 93 94 bool AssertCommand::classof(const BaseCommand *C) { 95 return C->Kind == AssertKind; 96 } 97 98 bool BytesDataCommand::classof(const BaseCommand *C) { 99 return C->Kind == BytesDataKind; 100 } 101 102 template <class ELFT> static bool isDiscarded(InputSectionBase<ELFT> *S) { 103 return !S || !S->Live; 104 } 105 106 template <class ELFT> LinkerScript<ELFT>::LinkerScript() {} 107 template <class ELFT> LinkerScript<ELFT>::~LinkerScript() {} 108 109 template <class ELFT> 110 bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { 111 for (InputSectionDescription *ID : Opt.KeptSections) { 112 StringRef Filename = S->getFile()->getName(); 113 if (!ID->FileRe.match(sys::path::filename(Filename))) 114 continue; 115 116 for (SectionPattern &P : ID->SectionPatterns) 117 if (P.SectionRe.match(S->Name)) 118 return true; 119 } 120 return false; 121 } 122 123 static bool comparePriority(InputSectionData *A, InputSectionData *B) { 124 return getPriority(A->Name) < getPriority(B->Name); 125 } 126 127 static bool compareName(InputSectionData *A, InputSectionData *B) { 128 return A->Name < B->Name; 129 } 130 131 static bool compareAlignment(InputSectionData *A, InputSectionData *B) { 132 // ">" is not a mistake. Larger alignments are placed before smaller 133 // alignments in order to reduce the amount of padding necessary. 134 // This is compatible with GNU. 135 return A->Alignment > B->Alignment; 136 } 137 138 static std::function<bool(InputSectionData *, InputSectionData *)> 139 getComparator(SortSectionPolicy K) { 140 switch (K) { 141 case SortSectionPolicy::Alignment: 142 return compareAlignment; 143 case SortSectionPolicy::Name: 144 return compareName; 145 case SortSectionPolicy::Priority: 146 return comparePriority; 147 default: 148 llvm_unreachable("unknown sort policy"); 149 } 150 } 151 152 template <class ELFT> 153 static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections, 154 ConstraintKind Kind) { 155 if (Kind == ConstraintKind::NoConstraint) 156 return true; 157 bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) { 158 auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2); 159 return Sec->getSectionHdr()->sh_flags & SHF_WRITE; 160 }); 161 return (IsRW && Kind == ConstraintKind::ReadWrite) || 162 (!IsRW && Kind == ConstraintKind::ReadOnly); 163 } 164 165 static void sortSections(InputSectionData **Begin, InputSectionData **End, 166 SortSectionPolicy K) { 167 if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) 168 std::stable_sort(Begin, End, getComparator(K)); 169 } 170 171 // Compute and remember which sections the InputSectionDescription matches. 172 template <class ELFT> 173 void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { 174 // Collects all sections that satisfy constraints of I 175 // and attach them to I. 176 for (SectionPattern &Pat : I->SectionPatterns) { 177 size_t SizeBefore = I->Sections.size(); 178 for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) { 179 StringRef Filename = sys::path::filename(F->getName()); 180 if (!I->FileRe.match(Filename) || Pat.ExcludedFileRe.match(Filename)) 181 continue; 182 183 for (InputSectionBase<ELFT> *S : F->getSections()) 184 if (!isDiscarded(S) && !S->OutSec && Pat.SectionRe.match(S->Name)) 185 I->Sections.push_back(S); 186 if (Pat.SectionRe.match("COMMON")) 187 I->Sections.push_back(CommonInputSection<ELFT>::X); 188 } 189 190 // Sort sections as instructed by SORT-family commands and --sort-section 191 // option. Because SORT-family commands can be nested at most two depth 192 // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command 193 // line option is respected even if a SORT command is given, the exact 194 // behavior we have here is a bit complicated. Here are the rules. 195 // 196 // 1. If two SORT commands are given, --sort-section is ignored. 197 // 2. If one SORT command is given, and if it is not SORT_NONE, 198 // --sort-section is handled as an inner SORT command. 199 // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. 200 // 4. If no SORT command is given, sort according to --sort-section. 201 InputSectionData **Begin = I->Sections.data() + SizeBefore; 202 InputSectionData **End = I->Sections.data() + I->Sections.size(); 203 if (Pat.SortOuter != SortSectionPolicy::None) { 204 if (Pat.SortInner == SortSectionPolicy::Default) 205 sortSections(Begin, End, Config->SortSection); 206 else 207 sortSections(Begin, End, Pat.SortInner); 208 sortSections(Begin, End, Pat.SortOuter); 209 } 210 } 211 212 // We do not add duplicate input sections, so mark them with a dummy output 213 // section for now. 214 for (InputSectionData *S : I->Sections) { 215 auto *S2 = static_cast<InputSectionBase<ELFT> *>(S); 216 S2->OutSec = (OutputSectionBase<ELFT> *)-1; 217 } 218 } 219 220 template <class ELFT> 221 void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) { 222 for (InputSectionBase<ELFT> *S : V) { 223 S->Live = false; 224 reportDiscarded(S); 225 } 226 } 227 228 template <class ELFT> 229 std::vector<InputSectionBase<ELFT> *> 230 LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) { 231 std::vector<InputSectionBase<ELFT> *> Ret; 232 233 for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { 234 auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); 235 if (!Cmd) 236 continue; 237 computeInputSections(Cmd); 238 for (InputSectionData *S : Cmd->Sections) 239 Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S)); 240 } 241 242 return Ret; 243 } 244 245 template <class ELFT> 246 static SectionKey<ELFT::Is64Bits> createKey(InputSectionBase<ELFT> *C, 247 StringRef OutsecName) { 248 // When using linker script the merge rules are different. 249 // Unfortunately, linker scripts are name based. This means that expressions 250 // like *(.foo*) can refer to multiple input sections that would normally be 251 // placed in different output sections. We cannot put them in different 252 // output sections or we would produce wrong results for 253 // start = .; *(.foo.*) end = .; *(.bar) 254 // and a mapping of .foo1 and .bar1 to one section and .foo2 and .bar2 to 255 // another. The problem is that there is no way to layout those output 256 // sections such that the .foo sections are the only thing between the 257 // start and end symbols. 258 259 // An extra annoyance is that we cannot simply disable merging of the contents 260 // of SHF_MERGE sections, but our implementation requires one output section 261 // per "kind" (string or not, which size/aligment). 262 // Fortunately, creating symbols in the middle of a merge section is not 263 // supported by bfd or gold, so we can just create multiple section in that 264 // case. 265 const typename ELFT::Shdr *H = C->getSectionHdr(); 266 typedef typename ELFT::uint uintX_t; 267 uintX_t Flags = H->sh_flags & (SHF_MERGE | SHF_STRINGS); 268 269 uintX_t Alignment = 0; 270 if (isa<MergeInputSection<ELFT>>(C)) 271 Alignment = std::max(H->sh_addralign, H->sh_entsize); 272 273 return SectionKey<ELFT::Is64Bits>{OutsecName, /*Type*/ 0, Flags, Alignment}; 274 } 275 276 template <class ELFT> 277 void LinkerScript<ELFT>::addSection(OutputSectionFactory<ELFT> &Factory, 278 InputSectionBase<ELFT> *Sec, 279 StringRef Name) { 280 OutputSectionBase<ELFT> *OutSec; 281 bool IsNew; 282 std::tie(OutSec, IsNew) = Factory.create(createKey(Sec, Name), Sec); 283 if (IsNew) 284 OutputSections->push_back(OutSec); 285 OutSec->addSection(Sec); 286 } 287 288 template <class ELFT> 289 void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) { 290 291 for (unsigned I = 0; I < Opt.Commands.size(); ++I) { 292 auto Iter = Opt.Commands.begin() + I; 293 const std::unique_ptr<BaseCommand> &Base1 = *Iter; 294 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { 295 if (shouldDefine<ELFT>(Cmd)) 296 addRegular<ELFT>(Cmd); 297 continue; 298 } 299 if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) { 300 // If we don't have SECTIONS then output sections have already been 301 // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses 302 // will not be called, so ASSERT should be evaluated now. 303 if (!Opt.HasSections) 304 Cmd->Expression(0); 305 continue; 306 } 307 308 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { 309 std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd); 310 311 if (Cmd->Name == "/DISCARD/") { 312 discard(V); 313 continue; 314 } 315 316 if (!matchConstraints<ELFT>(V, Cmd->Constraint)) { 317 for (InputSectionBase<ELFT> *S : V) 318 S->OutSec = nullptr; 319 Opt.Commands.erase(Iter); 320 --I; 321 continue; 322 } 323 324 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 325 if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) 326 if (shouldDefine<ELFT>(OutCmd)) 327 addSymbol<ELFT>(OutCmd); 328 329 if (V.empty()) 330 continue; 331 332 for (InputSectionBase<ELFT> *Sec : V) { 333 addSection(Factory, Sec, Cmd->Name); 334 if (uint32_t Subalign = Cmd->SubalignExpr ? Cmd->SubalignExpr(0) : 0) 335 Sec->Alignment = Subalign; 336 } 337 } 338 } 339 } 340 341 template <class ELFT> 342 void LinkerScript<ELFT>::createSections(OutputSectionFactory<ELFT> &Factory) { 343 processCommands(Factory); 344 // Add orphan sections. 345 for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) 346 for (InputSectionBase<ELFT> *S : F->getSections()) 347 if (!isDiscarded(S) && !S->OutSec) 348 addSection(Factory, S, getOutputSectionName(S->Name)); 349 } 350 351 // Sets value of a section-defined symbol. Two kinds of 352 // symbols are processed: synthetic symbols, whose value 353 // is an offset from beginning of section and regular 354 // symbols whose value is absolute. 355 template <class ELFT> 356 static void assignSectionSymbol(SymbolAssignment *Cmd, 357 OutputSectionBase<ELFT> *Sec, 358 typename ELFT::uint Off) { 359 if (!Cmd->Sym) 360 return; 361 362 if (auto *Body = dyn_cast<DefinedSynthetic<ELFT>>(Cmd->Sym)) { 363 Body->Section = Sec; 364 Body->Value = Cmd->Expression(Sec->getVA() + Off) - Sec->getVA(); 365 return; 366 } 367 auto *Body = cast<DefinedRegular<ELFT>>(Cmd->Sym); 368 Body->Value = Cmd->Expression(Sec->getVA() + Off); 369 } 370 371 template <class ELFT> static bool isTbss(OutputSectionBase<ELFT> *Sec) { 372 return (Sec->getFlags() & SHF_TLS) && Sec->getType() == SHT_NOBITS; 373 } 374 375 template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) { 376 if (!AlreadyOutputIS.insert(S).second) 377 return; 378 bool IsTbss = isTbss(CurOutSec); 379 380 uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; 381 Pos = alignTo(Pos, S->Alignment); 382 S->OutSecOff = Pos - CurOutSec->getVA(); 383 Pos += S->getSize(); 384 385 // Update output section size after adding each section. This is so that 386 // SIZEOF works correctly in the case below: 387 // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } 388 CurOutSec->setSize(Pos - CurOutSec->getVA()); 389 390 if (IsTbss) 391 ThreadBssOffset = Pos - Dot; 392 else 393 Dot = Pos; 394 } 395 396 template <class ELFT> void LinkerScript<ELFT>::flush() { 397 if (!CurOutSec || !AlreadyOutputOS.insert(CurOutSec).second) 398 return; 399 if (auto *OutSec = dyn_cast<OutputSection<ELFT>>(CurOutSec)) { 400 for (InputSection<ELFT> *I : OutSec->Sections) 401 output(I); 402 } else { 403 Dot += CurOutSec->getSize(); 404 } 405 } 406 407 template <class ELFT> 408 void LinkerScript<ELFT>::switchTo(OutputSectionBase<ELFT> *Sec) { 409 if (CurOutSec == Sec) 410 return; 411 if (AlreadyOutputOS.count(Sec)) 412 return; 413 414 flush(); 415 CurOutSec = Sec; 416 417 Dot = alignTo(Dot, CurOutSec->getAlignment()); 418 CurOutSec->setVA(isTbss(CurOutSec) ? Dot + ThreadBssOffset : Dot); 419 420 // If neither AT nor AT> is specified for an allocatable section, the linker 421 // will set the LMA such that the difference between VMA and LMA for the 422 // section is the same as the preceding output section in the same region 423 // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html 424 CurOutSec->setLMAOffset(LMAOffset); 425 } 426 427 template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) { 428 // This handles the assignments to symbol or to a location counter (.) 429 if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { 430 if (AssignCmd->Name == ".") { 431 // Update to location counter means update to section size. 432 Dot = AssignCmd->Expression(Dot); 433 CurOutSec->setSize(Dot - CurOutSec->getVA()); 434 return; 435 } 436 assignSectionSymbol<ELFT>(AssignCmd, CurOutSec, Dot - CurOutSec->getVA()); 437 return; 438 } 439 440 // Handle BYTE(), SHORT(), LONG(), or QUAD(). 441 if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) { 442 DataCmd->Offset = Dot - CurOutSec->getVA(); 443 Dot += DataCmd->Size; 444 CurOutSec->setSize(Dot - CurOutSec->getVA()); 445 return; 446 } 447 448 // It handles single input section description command, 449 // calculates and assigns the offsets for each section and also 450 // updates the output section size. 451 auto &ICmd = cast<InputSectionDescription>(Base); 452 for (InputSectionData *ID : ICmd.Sections) { 453 auto *IB = static_cast<InputSectionBase<ELFT> *>(ID); 454 switchTo(IB->OutSec); 455 if (auto *I = dyn_cast<InputSection<ELFT>>(IB)) 456 output(I); 457 else 458 flush(); 459 } 460 } 461 462 template <class ELFT> 463 static std::vector<OutputSectionBase<ELFT> *> 464 findSections(StringRef Name, 465 const std::vector<OutputSectionBase<ELFT> *> &Sections) { 466 std::vector<OutputSectionBase<ELFT> *> Ret; 467 for (OutputSectionBase<ELFT> *Sec : Sections) 468 if (Sec->getName() == Name) 469 Ret.push_back(Sec); 470 return Ret; 471 } 472 473 template <class ELFT> 474 void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) { 475 if (Cmd->LMAExpr) 476 LMAOffset = Cmd->LMAExpr(Dot) - Dot; 477 std::vector<OutputSectionBase<ELFT> *> Sections = 478 findSections(Cmd->Name, *OutputSections); 479 if (Sections.empty()) 480 return; 481 switchTo(Sections[0]); 482 // Find the last section output location. We will output orphan sections 483 // there so that end symbols point to the correct location. 484 auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), 485 [](const std::unique_ptr<BaseCommand> &Cmd) { 486 return !isa<SymbolAssignment>(*Cmd); 487 }) 488 .base(); 489 for (auto I = Cmd->Commands.begin(); I != E; ++I) 490 process(**I); 491 for (OutputSectionBase<ELFT> *Base : Sections) 492 switchTo(Base); 493 flush(); 494 std::for_each(E, Cmd->Commands.end(), 495 [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); 496 } 497 498 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() { 499 // It is common practice to use very generic linker scripts. So for any 500 // given run some of the output sections in the script will be empty. 501 // We could create corresponding empty output sections, but that would 502 // clutter the output. 503 // We instead remove trivially empty sections. The bfd linker seems even 504 // more aggressive at removing them. 505 auto Pos = std::remove_if( 506 Opt.Commands.begin(), Opt.Commands.end(), 507 [&](const std::unique_ptr<BaseCommand> &Base) { 508 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 509 if (!Cmd) 510 return false; 511 std::vector<OutputSectionBase<ELFT> *> Secs = 512 findSections(Cmd->Name, *OutputSections); 513 if (!Secs.empty()) 514 return false; 515 for (const std::unique_ptr<BaseCommand> &I : Cmd->Commands) 516 if (!isa<InputSectionDescription>(I.get())) 517 return false; 518 return true; 519 }); 520 Opt.Commands.erase(Pos, Opt.Commands.end()); 521 522 // If the output section contains only symbol assignments, create a 523 // corresponding output section. The bfd linker seems to only create them if 524 // '.' is assigned to, but creating these section should not have any bad 525 // consequeces and gives us a section to put the symbol in. 526 uintX_t Flags = SHF_ALLOC; 527 uint32_t Type = 0; 528 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 529 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 530 if (!Cmd) 531 continue; 532 std::vector<OutputSectionBase<ELFT> *> Secs = 533 findSections(Cmd->Name, *OutputSections); 534 if (!Secs.empty()) { 535 Flags = Secs[0]->getFlags(); 536 Type = Secs[0]->getType(); 537 continue; 538 } 539 540 auto *OutSec = new OutputSection<ELFT>(Cmd->Name, Type, Flags); 541 Out<ELFT>::Pool.emplace_back(OutSec); 542 OutputSections->push_back(OutSec); 543 } 544 } 545 546 // When placing orphan sections, we want to place them after symbol assignments 547 // so that an orphan after 548 // begin_foo = .; 549 // foo : { *(foo) } 550 // end_foo = .; 551 // doesn't break the intended meaning of the begin/end symbols. 552 // We don't want to go over sections since Writer<ELFT>::sortSections is the 553 // one in charge of deciding the order of the sections. 554 // We don't want to go over alignments, since doing so in 555 // rx_sec : { *(rx_sec) } 556 // . = ALIGN(0x1000); 557 // /* The RW PT_LOAD starts here*/ 558 // rw_sec : { *(rw_sec) } 559 // would mean that the RW PT_LOAD would become unaligned. 560 static bool shouldSkip(const BaseCommand &Cmd) { 561 if (isa<OutputSectionCommand>(Cmd)) 562 return false; 563 const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); 564 if (!Assign) 565 return true; 566 return Assign->Name != "."; 567 } 568 569 template <class ELFT> 570 void LinkerScript<ELFT>::assignAddresses(std::vector<PhdrEntry<ELFT>> &Phdrs) { 571 // Orphan sections are sections present in the input files which 572 // are not explicitly placed into the output file by the linker script. 573 // We place orphan sections at end of file. 574 // Other linkers places them using some heuristics as described in 575 // https://sourceware.org/binutils/docs/ld/Orphan-Sections.html#Orphan-Sections. 576 577 // The OutputSections are already in the correct order. 578 // This loops creates or moves commands as needed so that they are in the 579 // correct order. 580 int CmdIndex = 0; 581 for (OutputSectionBase<ELFT> *Sec : *OutputSections) { 582 StringRef Name = Sec->getName(); 583 584 // Find the last spot where we can insert a command and still get the 585 // correct result. 586 auto CmdIter = Opt.Commands.begin() + CmdIndex; 587 auto E = Opt.Commands.end(); 588 while (CmdIter != E && shouldSkip(**CmdIter)) { 589 ++CmdIter; 590 ++CmdIndex; 591 } 592 593 auto Pos = 594 std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { 595 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 596 return Cmd && Cmd->Name == Name; 597 }); 598 if (Pos == E) { 599 Opt.Commands.insert(CmdIter, 600 llvm::make_unique<OutputSectionCommand>(Name)); 601 ++CmdIndex; 602 continue; 603 } 604 605 // Continue from where we found it. 606 CmdIndex = (Pos - Opt.Commands.begin()) + 1; 607 continue; 608 } 609 610 // Assign addresses as instructed by linker script SECTIONS sub-commands. 611 Dot = 0; 612 613 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 614 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { 615 if (Cmd->Name == ".") { 616 Dot = Cmd->Expression(Dot); 617 } else if (Cmd->Sym) { 618 cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(Dot); 619 } 620 continue; 621 } 622 623 if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { 624 Cmd->Expression(Dot); 625 continue; 626 } 627 628 auto *Cmd = cast<OutputSectionCommand>(Base.get()); 629 630 if (Cmd->AddrExpr) 631 Dot = Cmd->AddrExpr(Dot); 632 633 assignOffsets(Cmd); 634 } 635 636 uintX_t MinVA = std::numeric_limits<uintX_t>::max(); 637 for (OutputSectionBase<ELFT> *Sec : *OutputSections) { 638 if (Sec->getFlags() & SHF_ALLOC) 639 MinVA = std::min(MinVA, Sec->getVA()); 640 else 641 Sec->setVA(0); 642 } 643 644 uintX_t HeaderSize = getHeaderSize(); 645 auto FirstPTLoad = 646 std::find_if(Phdrs.begin(), Phdrs.end(), [](const PhdrEntry<ELFT> &E) { 647 return E.H.p_type == PT_LOAD; 648 }); 649 if (HeaderSize <= MinVA && FirstPTLoad != Phdrs.end()) { 650 // ELF and Program headers need to be right before the first section in 651 // memory. Set their addresses accordingly. 652 MinVA = alignDown(MinVA - HeaderSize, Target->PageSize); 653 Out<ELFT>::ElfHeader->setVA(MinVA); 654 Out<ELFT>::ProgramHeaders->setVA(Out<ELFT>::ElfHeader->getSize() + MinVA); 655 FirstPTLoad->First = Out<ELFT>::ElfHeader; 656 if (!FirstPTLoad->Last) 657 FirstPTLoad->Last = Out<ELFT>::ProgramHeaders; 658 } else if (!FirstPTLoad->First) { 659 // Sometimes the very first PT_LOAD segment can be empty. 660 // This happens if (all conditions met): 661 // - Linker script is used 662 // - First section in ELF image is not RO 663 // - Not enough space for program headers. 664 // The code below removes empty PT_LOAD segment and updates 665 // program headers size. 666 Phdrs.erase(FirstPTLoad); 667 Out<ELFT>::ProgramHeaders->setSize(sizeof(typename ELFT::Phdr) * 668 Phdrs.size()); 669 } 670 } 671 672 // Creates program headers as instructed by PHDRS linker script command. 673 template <class ELFT> 674 std::vector<PhdrEntry<ELFT>> LinkerScript<ELFT>::createPhdrs() { 675 std::vector<PhdrEntry<ELFT>> Ret; 676 677 // Process PHDRS and FILEHDR keywords because they are not 678 // real output sections and cannot be added in the following loop. 679 for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) { 680 Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags); 681 PhdrEntry<ELFT> &Phdr = Ret.back(); 682 683 if (Cmd.HasFilehdr) 684 Phdr.add(Out<ELFT>::ElfHeader); 685 if (Cmd.HasPhdrs) 686 Phdr.add(Out<ELFT>::ProgramHeaders); 687 688 if (Cmd.LMAExpr) { 689 Phdr.H.p_paddr = Cmd.LMAExpr(0); 690 Phdr.HasLMA = true; 691 } 692 } 693 694 // Add output sections to program headers. 695 PhdrEntry<ELFT> *Load = nullptr; 696 uintX_t Flags = PF_R; 697 for (OutputSectionBase<ELFT> *Sec : *OutputSections) { 698 if (!(Sec->getFlags() & SHF_ALLOC)) 699 break; 700 701 std::vector<size_t> PhdrIds = getPhdrIndices(Sec->getName()); 702 if (!PhdrIds.empty()) { 703 // Assign headers specified by linker script 704 for (size_t Id : PhdrIds) { 705 Ret[Id].add(Sec); 706 if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) 707 Ret[Id].H.p_flags |= Sec->getPhdrFlags(); 708 } 709 } else { 710 // If we have no load segment or flags've changed then we want new load 711 // segment. 712 uintX_t NewFlags = Sec->getPhdrFlags(); 713 if (Load == nullptr || Flags != NewFlags) { 714 Load = &*Ret.emplace(Ret.end(), PT_LOAD, NewFlags); 715 Flags = NewFlags; 716 } 717 Load->add(Sec); 718 } 719 } 720 return Ret; 721 } 722 723 template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() { 724 // Ignore .interp section in case we have PHDRS specification 725 // and PT_INTERP isn't listed. 726 return !Opt.PhdrsCommands.empty() && 727 llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { 728 return Cmd.Type == PT_INTERP; 729 }) == Opt.PhdrsCommands.end(); 730 } 731 732 template <class ELFT> 733 ArrayRef<uint8_t> LinkerScript<ELFT>::getFiller(StringRef Name) { 734 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 735 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 736 if (Cmd->Name == Name) 737 return Cmd->Filler; 738 return {}; 739 } 740 741 template <class ELFT> 742 static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { 743 const endianness E = ELFT::TargetEndianness; 744 745 switch (Size) { 746 case 1: 747 *Buf = (uint8_t)Data; 748 break; 749 case 2: 750 write16<E>(Buf, Data); 751 break; 752 case 4: 753 write32<E>(Buf, Data); 754 break; 755 case 8: 756 write64<E>(Buf, Data); 757 break; 758 default: 759 llvm_unreachable("unsupported Size argument"); 760 } 761 } 762 763 template <class ELFT> 764 void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) { 765 int I = getSectionIndex(Name); 766 if (I == INT_MAX) 767 return; 768 769 OutputSectionCommand *Cmd = 770 dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()); 771 for (const std::unique_ptr<BaseCommand> &Base2 : Cmd->Commands) 772 if (auto *DataCmd = dyn_cast<BytesDataCommand>(Base2.get())) 773 writeInt<ELFT>(&Buf[DataCmd->Offset], DataCmd->Data, DataCmd->Size); 774 } 775 776 template <class ELFT> bool LinkerScript<ELFT>::hasLMA(StringRef Name) { 777 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 778 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 779 if (Cmd->LMAExpr && Cmd->Name == Name) 780 return true; 781 return false; 782 } 783 784 // Returns the index of the given section name in linker script 785 // SECTIONS commands. Sections are laid out as the same order as they 786 // were in the script. If a given name did not appear in the script, 787 // it returns INT_MAX, so that it will be laid out at end of file. 788 template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) { 789 int I = 0; 790 for (std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 791 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 792 if (Cmd->Name == Name) 793 return I; 794 ++I; 795 } 796 return INT_MAX; 797 } 798 799 template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() { 800 return !Opt.PhdrsCommands.empty(); 801 } 802 803 template <class ELFT> 804 uint64_t LinkerScript<ELFT>::getOutputSectionAddress(StringRef Name) { 805 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 806 if (Sec->getName() == Name) 807 return Sec->getVA(); 808 error("undefined section " + Name); 809 return 0; 810 } 811 812 template <class ELFT> 813 uint64_t LinkerScript<ELFT>::getOutputSectionLMA(StringRef Name) { 814 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 815 if (Sec->getName() == Name) 816 return Sec->getLMA(); 817 error("undefined section " + Name); 818 return 0; 819 } 820 821 template <class ELFT> 822 uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) { 823 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 824 if (Sec->getName() == Name) 825 return Sec->getSize(); 826 error("undefined section " + Name); 827 return 0; 828 } 829 830 template <class ELFT> 831 uint64_t LinkerScript<ELFT>::getOutputSectionAlign(StringRef Name) { 832 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 833 if (Sec->getName() == Name) 834 return Sec->getAlignment(); 835 error("undefined section " + Name); 836 return 0; 837 } 838 839 template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() { 840 return elf::getHeaderSize<ELFT>(); 841 } 842 843 template <class ELFT> uint64_t LinkerScript<ELFT>::getSymbolValue(StringRef S) { 844 if (SymbolBody *B = Symtab<ELFT>::X->find(S)) 845 return B->getVA<ELFT>(); 846 error("symbol not found: " + S); 847 return 0; 848 } 849 850 template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) { 851 return Symtab<ELFT>::X->find(S) != nullptr; 852 } 853 854 // Returns indices of ELF headers containing specific section, identified 855 // by Name. Each index is a zero based number of ELF header listed within 856 // PHDRS {} script block. 857 template <class ELFT> 858 std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { 859 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 860 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 861 if (!Cmd || Cmd->Name != SectionName) 862 continue; 863 864 std::vector<size_t> Ret; 865 for (StringRef PhdrName : Cmd->Phdrs) 866 Ret.push_back(getPhdrIndex(PhdrName)); 867 return Ret; 868 } 869 return {}; 870 } 871 872 template <class ELFT> 873 size_t LinkerScript<ELFT>::getPhdrIndex(StringRef PhdrName) { 874 size_t I = 0; 875 for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { 876 if (Cmd.Name == PhdrName) 877 return I; 878 ++I; 879 } 880 error("section header '" + PhdrName + "' is not listed in PHDRS"); 881 return 0; 882 } 883 884 class elf::ScriptParser : public ScriptParserBase { 885 typedef void (ScriptParser::*Handler)(); 886 887 public: 888 ScriptParser(StringRef S, bool B) : ScriptParserBase(S), IsUnderSysroot(B) {} 889 890 void readLinkerScript(); 891 void readVersionScript(); 892 893 private: 894 void addFile(StringRef Path); 895 896 void readAsNeeded(); 897 void readEntry(); 898 void readExtern(); 899 void readGroup(); 900 void readInclude(); 901 void readOutput(); 902 void readOutputArch(); 903 void readOutputFormat(); 904 void readPhdrs(); 905 void readSearchDir(); 906 void readSections(); 907 void readVersion(); 908 void readVersionScriptCommand(); 909 910 SymbolAssignment *readAssignment(StringRef Name); 911 BytesDataCommand *readBytesDataCommand(StringRef Tok); 912 std::vector<uint8_t> readFill(); 913 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 914 std::vector<uint8_t> readOutputSectionFiller(StringRef Tok); 915 std::vector<StringRef> readOutputSectionPhdrs(); 916 InputSectionDescription *readInputSectionDescription(StringRef Tok); 917 Regex readFilePatterns(); 918 std::vector<SectionPattern> readInputSectionsList(); 919 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 920 unsigned readPhdrType(); 921 SortSectionPolicy readSortKind(); 922 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 923 SymbolAssignment *readProvideOrAssignment(StringRef Tok, bool MakeAbsolute); 924 void readSort(); 925 Expr readAssert(); 926 927 Expr readExpr(); 928 Expr readExpr1(Expr Lhs, int MinPrec); 929 StringRef readParenLiteral(); 930 Expr readPrimary(); 931 Expr readTernary(Expr Cond); 932 Expr readParenExpr(); 933 934 // For parsing version script. 935 void readExtern(std::vector<SymbolVersion> *Globals); 936 void readVersionDeclaration(StringRef VerStr); 937 void readGlobal(StringRef VerStr); 938 void readLocal(); 939 940 ScriptConfiguration &Opt = *ScriptConfig; 941 StringSaver Saver = {ScriptConfig->Alloc}; 942 bool IsUnderSysroot; 943 }; 944 945 void ScriptParser::readVersionScript() { 946 readVersionScriptCommand(); 947 if (!atEOF()) 948 setError("EOF expected, but got " + next()); 949 } 950 951 void ScriptParser::readVersionScriptCommand() { 952 if (skip("{")) { 953 readVersionDeclaration(""); 954 return; 955 } 956 957 while (!atEOF() && !Error && peek() != "}") { 958 StringRef VerStr = next(); 959 if (VerStr == "{") { 960 setError("anonymous version definition is used in " 961 "combination with other version definitions"); 962 return; 963 } 964 expect("{"); 965 readVersionDeclaration(VerStr); 966 } 967 } 968 969 void ScriptParser::readVersion() { 970 expect("{"); 971 readVersionScriptCommand(); 972 expect("}"); 973 } 974 975 void ScriptParser::readLinkerScript() { 976 while (!atEOF()) { 977 StringRef Tok = next(); 978 if (Tok == ";") 979 continue; 980 981 if (Tok == "ASSERT") { 982 Opt.Commands.emplace_back(new AssertCommand(readAssert())); 983 } else if (Tok == "ENTRY") { 984 readEntry(); 985 } else if (Tok == "EXTERN") { 986 readExtern(); 987 } else if (Tok == "GROUP" || Tok == "INPUT") { 988 readGroup(); 989 } else if (Tok == "INCLUDE") { 990 readInclude(); 991 } else if (Tok == "OUTPUT") { 992 readOutput(); 993 } else if (Tok == "OUTPUT_ARCH") { 994 readOutputArch(); 995 } else if (Tok == "OUTPUT_FORMAT") { 996 readOutputFormat(); 997 } else if (Tok == "PHDRS") { 998 readPhdrs(); 999 } else if (Tok == "SEARCH_DIR") { 1000 readSearchDir(); 1001 } else if (Tok == "SECTIONS") { 1002 readSections(); 1003 } else if (Tok == "VERSION") { 1004 readVersion(); 1005 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok, true)) { 1006 Opt.Commands.emplace_back(Cmd); 1007 } else { 1008 setError("unknown directive: " + Tok); 1009 } 1010 } 1011 } 1012 1013 void ScriptParser::addFile(StringRef S) { 1014 if (IsUnderSysroot && S.startswith("/")) { 1015 SmallString<128> Path; 1016 (Config->Sysroot + S).toStringRef(Path); 1017 if (sys::fs::exists(Path)) { 1018 Driver->addFile(Saver.save(Path.str())); 1019 return; 1020 } 1021 } 1022 1023 if (sys::path::is_absolute(S)) { 1024 Driver->addFile(S); 1025 } else if (S.startswith("=")) { 1026 if (Config->Sysroot.empty()) 1027 Driver->addFile(S.substr(1)); 1028 else 1029 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); 1030 } else if (S.startswith("-l")) { 1031 Driver->addLibrary(S.substr(2)); 1032 } else if (sys::fs::exists(S)) { 1033 Driver->addFile(S); 1034 } else { 1035 std::string Path = findFromSearchPaths(S); 1036 if (Path.empty()) 1037 setError("unable to find " + S); 1038 else 1039 Driver->addFile(Saver.save(Path)); 1040 } 1041 } 1042 1043 void ScriptParser::readAsNeeded() { 1044 expect("("); 1045 bool Orig = Config->AsNeeded; 1046 Config->AsNeeded = true; 1047 while (!Error && !skip(")")) 1048 addFile(unquote(next())); 1049 Config->AsNeeded = Orig; 1050 } 1051 1052 void ScriptParser::readEntry() { 1053 // -e <symbol> takes predecence over ENTRY(<symbol>). 1054 expect("("); 1055 StringRef Tok = next(); 1056 if (Config->Entry.empty()) 1057 Config->Entry = Tok; 1058 expect(")"); 1059 } 1060 1061 void ScriptParser::readExtern() { 1062 expect("("); 1063 while (!Error && !skip(")")) 1064 Config->Undefined.push_back(next()); 1065 } 1066 1067 void ScriptParser::readGroup() { 1068 expect("("); 1069 while (!Error && !skip(")")) { 1070 StringRef Tok = next(); 1071 if (Tok == "AS_NEEDED") 1072 readAsNeeded(); 1073 else 1074 addFile(unquote(Tok)); 1075 } 1076 } 1077 1078 void ScriptParser::readInclude() { 1079 StringRef Tok = next(); 1080 auto MBOrErr = MemoryBuffer::getFile(unquote(Tok)); 1081 if (!MBOrErr) { 1082 setError("cannot open " + Tok); 1083 return; 1084 } 1085 std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; 1086 StringRef S = Saver.save(MB->getMemBufferRef().getBuffer()); 1087 std::vector<StringRef> V = tokenize(S); 1088 Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end()); 1089 } 1090 1091 void ScriptParser::readOutput() { 1092 // -o <file> takes predecence over OUTPUT(<file>). 1093 expect("("); 1094 StringRef Tok = next(); 1095 if (Config->OutputFile.empty()) 1096 Config->OutputFile = unquote(Tok); 1097 expect(")"); 1098 } 1099 1100 void ScriptParser::readOutputArch() { 1101 // Error checking only for now. 1102 expect("("); 1103 next(); 1104 expect(")"); 1105 } 1106 1107 void ScriptParser::readOutputFormat() { 1108 // Error checking only for now. 1109 expect("("); 1110 next(); 1111 StringRef Tok = next(); 1112 if (Tok == ")") 1113 return; 1114 if (Tok != ",") { 1115 setError("unexpected token: " + Tok); 1116 return; 1117 } 1118 next(); 1119 expect(","); 1120 next(); 1121 expect(")"); 1122 } 1123 1124 void ScriptParser::readPhdrs() { 1125 expect("{"); 1126 while (!Error && !skip("}")) { 1127 StringRef Tok = next(); 1128 Opt.PhdrsCommands.push_back( 1129 {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); 1130 PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back(); 1131 1132 PhdrCmd.Type = readPhdrType(); 1133 do { 1134 Tok = next(); 1135 if (Tok == ";") 1136 break; 1137 if (Tok == "FILEHDR") 1138 PhdrCmd.HasFilehdr = true; 1139 else if (Tok == "PHDRS") 1140 PhdrCmd.HasPhdrs = true; 1141 else if (Tok == "AT") 1142 PhdrCmd.LMAExpr = readParenExpr(); 1143 else if (Tok == "FLAGS") { 1144 expect("("); 1145 // Passing 0 for the value of dot is a bit of a hack. It means that 1146 // we accept expressions like ".|1". 1147 PhdrCmd.Flags = readExpr()(0); 1148 expect(")"); 1149 } else 1150 setError("unexpected header attribute: " + Tok); 1151 } while (!Error); 1152 } 1153 } 1154 1155 void ScriptParser::readSearchDir() { 1156 expect("("); 1157 StringRef Tok = next(); 1158 if (!Config->Nostdlib) 1159 Config->SearchPaths.push_back(unquote(Tok)); 1160 expect(")"); 1161 } 1162 1163 void ScriptParser::readSections() { 1164 Opt.HasSections = true; 1165 expect("{"); 1166 while (!Error && !skip("}")) { 1167 StringRef Tok = next(); 1168 BaseCommand *Cmd = readProvideOrAssignment(Tok, true); 1169 if (!Cmd) { 1170 if (Tok == "ASSERT") 1171 Cmd = new AssertCommand(readAssert()); 1172 else 1173 Cmd = readOutputSectionDescription(Tok); 1174 } 1175 Opt.Commands.emplace_back(Cmd); 1176 } 1177 } 1178 1179 static int precedence(StringRef Op) { 1180 return StringSwitch<int>(Op) 1181 .Cases("*", "/", 5) 1182 .Cases("+", "-", 4) 1183 .Cases("<<", ">>", 3) 1184 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 1185 .Cases("&", "|", 1) 1186 .Default(-1); 1187 } 1188 1189 Regex ScriptParser::readFilePatterns() { 1190 std::vector<StringRef> V; 1191 while (!Error && !skip(")")) 1192 V.push_back(next()); 1193 return compileGlobPatterns(V); 1194 } 1195 1196 SortSectionPolicy ScriptParser::readSortKind() { 1197 if (skip("SORT") || skip("SORT_BY_NAME")) 1198 return SortSectionPolicy::Name; 1199 if (skip("SORT_BY_ALIGNMENT")) 1200 return SortSectionPolicy::Alignment; 1201 if (skip("SORT_BY_INIT_PRIORITY")) 1202 return SortSectionPolicy::Priority; 1203 if (skip("SORT_NONE")) 1204 return SortSectionPolicy::None; 1205 return SortSectionPolicy::Default; 1206 } 1207 1208 // Method reads a list of sequence of excluded files and section globs given in 1209 // a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ 1210 // Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) 1211 // The semantics of that is next: 1212 // * Include .foo.1 from every file. 1213 // * Include .foo.2 from every file but a.o 1214 // * Include .foo.3 from every file but b.o 1215 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 1216 std::vector<SectionPattern> Ret; 1217 while (!Error && peek() != ")") { 1218 Regex ExcludeFileRe; 1219 if (skip("EXCLUDE_FILE")) { 1220 expect("("); 1221 ExcludeFileRe = readFilePatterns(); 1222 } 1223 1224 std::vector<StringRef> V; 1225 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 1226 V.push_back(next()); 1227 1228 if (!V.empty()) 1229 Ret.push_back({std::move(ExcludeFileRe), compileGlobPatterns(V)}); 1230 else 1231 setError("section pattern is expected"); 1232 } 1233 return Ret; 1234 } 1235 1236 // Section pattern grammar can have complex expressions, for example: 1237 // *(SORT(.foo.* EXCLUDE_FILE (*file1.o) .bar.*) .bar.* SORT(.zed.*)) 1238 // Generally is a sequence of globs and excludes that may be wrapped in a SORT() 1239 // commands, like: SORT(glob0) glob1 glob2 SORT(glob4) 1240 // This methods handles wrapping sequences of excluded files and section globs 1241 // into SORT() if that needed and reads them all. 1242 InputSectionDescription * 1243 ScriptParser::readInputSectionRules(StringRef FilePattern) { 1244 auto *Cmd = new InputSectionDescription(FilePattern); 1245 expect("("); 1246 while (!HasError && !skip(")")) { 1247 SortSectionPolicy Outer = readSortKind(); 1248 SortSectionPolicy Inner = SortSectionPolicy::Default; 1249 std::vector<SectionPattern> V; 1250 if (Outer != SortSectionPolicy::Default) { 1251 expect("("); 1252 Inner = readSortKind(); 1253 if (Inner != SortSectionPolicy::Default) { 1254 expect("("); 1255 V = readInputSectionsList(); 1256 expect(")"); 1257 } else { 1258 V = readInputSectionsList(); 1259 } 1260 expect(")"); 1261 } else { 1262 V = readInputSectionsList(); 1263 } 1264 1265 for (SectionPattern &Pat : V) { 1266 Pat.SortInner = Inner; 1267 Pat.SortOuter = Outer; 1268 } 1269 1270 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 1271 } 1272 return Cmd; 1273 } 1274 1275 InputSectionDescription * 1276 ScriptParser::readInputSectionDescription(StringRef Tok) { 1277 // Input section wildcard can be surrounded by KEEP. 1278 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 1279 if (Tok == "KEEP") { 1280 expect("("); 1281 StringRef FilePattern = next(); 1282 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 1283 expect(")"); 1284 Opt.KeptSections.push_back(Cmd); 1285 return Cmd; 1286 } 1287 return readInputSectionRules(Tok); 1288 } 1289 1290 void ScriptParser::readSort() { 1291 expect("("); 1292 expect("CONSTRUCTORS"); 1293 expect(")"); 1294 } 1295 1296 Expr ScriptParser::readAssert() { 1297 expect("("); 1298 Expr E = readExpr(); 1299 expect(","); 1300 StringRef Msg = unquote(next()); 1301 expect(")"); 1302 return [=](uint64_t Dot) { 1303 uint64_t V = E(Dot); 1304 if (!V) 1305 error(Msg); 1306 return V; 1307 }; 1308 } 1309 1310 // Reads a FILL(expr) command. We handle the FILL command as an 1311 // alias for =fillexp section attribute, which is different from 1312 // what GNU linkers do. 1313 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 1314 std::vector<uint8_t> ScriptParser::readFill() { 1315 expect("("); 1316 std::vector<uint8_t> V = readOutputSectionFiller(next()); 1317 expect(")"); 1318 expect(";"); 1319 return V; 1320 } 1321 1322 OutputSectionCommand * 1323 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 1324 OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); 1325 1326 // Read an address expression. 1327 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address 1328 if (peek() != ":") 1329 Cmd->AddrExpr = readExpr(); 1330 1331 expect(":"); 1332 1333 if (skip("AT")) 1334 Cmd->LMAExpr = readParenExpr(); 1335 if (skip("ALIGN")) 1336 Cmd->AlignExpr = readParenExpr(); 1337 if (skip("SUBALIGN")) 1338 Cmd->SubalignExpr = readParenExpr(); 1339 1340 // Parse constraints. 1341 if (skip("ONLY_IF_RO")) 1342 Cmd->Constraint = ConstraintKind::ReadOnly; 1343 if (skip("ONLY_IF_RW")) 1344 Cmd->Constraint = ConstraintKind::ReadWrite; 1345 expect("{"); 1346 1347 while (!Error && !skip("}")) { 1348 StringRef Tok = next(); 1349 if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok, false)) 1350 Cmd->Commands.emplace_back(Assignment); 1351 else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) 1352 Cmd->Commands.emplace_back(Data); 1353 else if (Tok == "FILL") 1354 Cmd->Filler = readFill(); 1355 else if (Tok == "SORT") 1356 readSort(); 1357 else if (peek() == "(") 1358 Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); 1359 else 1360 setError("unknown command " + Tok); 1361 } 1362 Cmd->Phdrs = readOutputSectionPhdrs(); 1363 1364 if (skip("=")) 1365 Cmd->Filler = readOutputSectionFiller(next()); 1366 else if (peek().startswith("=")) 1367 Cmd->Filler = readOutputSectionFiller(next().drop_front()); 1368 1369 return Cmd; 1370 } 1371 1372 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number. 1373 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 1374 // 1375 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles 1376 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them 1377 // as 32-bit big-endian values. We will do the same as ld.gold does 1378 // because it's simpler than what ld.bfd does. 1379 std::vector<uint8_t> ScriptParser::readOutputSectionFiller(StringRef Tok) { 1380 uint32_t V; 1381 if (Tok.getAsInteger(0, V)) { 1382 setError("invalid filler expression: " + Tok); 1383 return {}; 1384 } 1385 return {uint8_t(V >> 24), uint8_t(V >> 16), uint8_t(V >> 8), uint8_t(V)}; 1386 } 1387 1388 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 1389 expect("("); 1390 SymbolAssignment *Cmd = readAssignment(next()); 1391 Cmd->Provide = Provide; 1392 Cmd->Hidden = Hidden; 1393 expect(")"); 1394 expect(";"); 1395 return Cmd; 1396 } 1397 1398 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok, 1399 bool MakeAbsolute) { 1400 SymbolAssignment *Cmd = nullptr; 1401 if (peek() == "=" || peek() == "+=") { 1402 Cmd = readAssignment(Tok); 1403 expect(";"); 1404 } else if (Tok == "PROVIDE") { 1405 Cmd = readProvideHidden(true, false); 1406 } else if (Tok == "HIDDEN") { 1407 Cmd = readProvideHidden(false, true); 1408 } else if (Tok == "PROVIDE_HIDDEN") { 1409 Cmd = readProvideHidden(true, true); 1410 } 1411 if (Cmd && MakeAbsolute) 1412 Cmd->IsAbsolute = true; 1413 return Cmd; 1414 } 1415 1416 static uint64_t getSymbolValue(StringRef S, uint64_t Dot) { 1417 if (S == ".") 1418 return Dot; 1419 return ScriptBase->getSymbolValue(S); 1420 } 1421 1422 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 1423 StringRef Op = next(); 1424 bool IsAbsolute = false; 1425 Expr E; 1426 assert(Op == "=" || Op == "+="); 1427 if (skip("ABSOLUTE")) { 1428 E = readParenExpr(); 1429 IsAbsolute = true; 1430 } else { 1431 E = readExpr(); 1432 } 1433 if (Op == "+=") 1434 E = [=](uint64_t Dot) { return getSymbolValue(Name, Dot) + E(Dot); }; 1435 return new SymbolAssignment(Name, E, IsAbsolute); 1436 } 1437 1438 // This is an operator-precedence parser to parse a linker 1439 // script expression. 1440 Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); } 1441 1442 static Expr combine(StringRef Op, Expr L, Expr R) { 1443 if (Op == "*") 1444 return [=](uint64_t Dot) { return L(Dot) * R(Dot); }; 1445 if (Op == "/") { 1446 return [=](uint64_t Dot) -> uint64_t { 1447 uint64_t RHS = R(Dot); 1448 if (RHS == 0) { 1449 error("division by zero"); 1450 return 0; 1451 } 1452 return L(Dot) / RHS; 1453 }; 1454 } 1455 if (Op == "+") 1456 return [=](uint64_t Dot) { return L(Dot) + R(Dot); }; 1457 if (Op == "-") 1458 return [=](uint64_t Dot) { return L(Dot) - R(Dot); }; 1459 if (Op == "<<") 1460 return [=](uint64_t Dot) { return L(Dot) << R(Dot); }; 1461 if (Op == ">>") 1462 return [=](uint64_t Dot) { return L(Dot) >> R(Dot); }; 1463 if (Op == "<") 1464 return [=](uint64_t Dot) { return L(Dot) < R(Dot); }; 1465 if (Op == ">") 1466 return [=](uint64_t Dot) { return L(Dot) > R(Dot); }; 1467 if (Op == ">=") 1468 return [=](uint64_t Dot) { return L(Dot) >= R(Dot); }; 1469 if (Op == "<=") 1470 return [=](uint64_t Dot) { return L(Dot) <= R(Dot); }; 1471 if (Op == "==") 1472 return [=](uint64_t Dot) { return L(Dot) == R(Dot); }; 1473 if (Op == "!=") 1474 return [=](uint64_t Dot) { return L(Dot) != R(Dot); }; 1475 if (Op == "&") 1476 return [=](uint64_t Dot) { return L(Dot) & R(Dot); }; 1477 if (Op == "|") 1478 return [=](uint64_t Dot) { return L(Dot) | R(Dot); }; 1479 llvm_unreachable("invalid operator"); 1480 } 1481 1482 // This is a part of the operator-precedence parser. This function 1483 // assumes that the remaining token stream starts with an operator. 1484 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 1485 while (!atEOF() && !Error) { 1486 // Read an operator and an expression. 1487 StringRef Op1 = peek(); 1488 if (Op1 == "?") 1489 return readTernary(Lhs); 1490 if (precedence(Op1) < MinPrec) 1491 break; 1492 next(); 1493 Expr Rhs = readPrimary(); 1494 1495 // Evaluate the remaining part of the expression first if the 1496 // next operator has greater precedence than the previous one. 1497 // For example, if we have read "+" and "3", and if the next 1498 // operator is "*", then we'll evaluate 3 * ... part first. 1499 while (!atEOF()) { 1500 StringRef Op2 = peek(); 1501 if (precedence(Op2) <= precedence(Op1)) 1502 break; 1503 Rhs = readExpr1(Rhs, precedence(Op2)); 1504 } 1505 1506 Lhs = combine(Op1, Lhs, Rhs); 1507 } 1508 return Lhs; 1509 } 1510 1511 uint64_t static getConstant(StringRef S) { 1512 if (S == "COMMONPAGESIZE") 1513 return Target->PageSize; 1514 if (S == "MAXPAGESIZE") 1515 return Config->MaxPageSize; 1516 error("unknown constant: " + S); 1517 return 0; 1518 } 1519 1520 // Parses Tok as an integer. Returns true if successful. 1521 // It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") 1522 // and decimal numbers. Decimal numbers may have "K" (kilo) or 1523 // "M" (mega) prefixes. 1524 static bool readInteger(StringRef Tok, uint64_t &Result) { 1525 if (Tok.startswith("-")) { 1526 if (!readInteger(Tok.substr(1), Result)) 1527 return false; 1528 Result = -Result; 1529 return true; 1530 } 1531 if (Tok.startswith_lower("0x")) 1532 return !Tok.substr(2).getAsInteger(16, Result); 1533 if (Tok.endswith_lower("H")) 1534 return !Tok.drop_back().getAsInteger(16, Result); 1535 1536 int Suffix = 1; 1537 if (Tok.endswith_lower("K")) { 1538 Suffix = 1024; 1539 Tok = Tok.drop_back(); 1540 } else if (Tok.endswith_lower("M")) { 1541 Suffix = 1024 * 1024; 1542 Tok = Tok.drop_back(); 1543 } 1544 if (Tok.getAsInteger(10, Result)) 1545 return false; 1546 Result *= Suffix; 1547 return true; 1548 } 1549 1550 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 1551 int Size = StringSwitch<unsigned>(Tok) 1552 .Case("BYTE", 1) 1553 .Case("SHORT", 2) 1554 .Case("LONG", 4) 1555 .Case("QUAD", 8) 1556 .Default(-1); 1557 if (Size == -1) 1558 return nullptr; 1559 1560 expect("("); 1561 uint64_t Val = 0; 1562 StringRef S = next(); 1563 if (!readInteger(S, Val)) 1564 setError("unexpected value: " + S); 1565 expect(")"); 1566 return new BytesDataCommand(Val, Size); 1567 } 1568 1569 StringRef ScriptParser::readParenLiteral() { 1570 expect("("); 1571 StringRef Tok = next(); 1572 expect(")"); 1573 return Tok; 1574 } 1575 1576 Expr ScriptParser::readPrimary() { 1577 if (peek() == "(") 1578 return readParenExpr(); 1579 1580 StringRef Tok = next(); 1581 1582 if (Tok == "~") { 1583 Expr E = readPrimary(); 1584 return [=](uint64_t Dot) { return ~E(Dot); }; 1585 } 1586 if (Tok == "-") { 1587 Expr E = readPrimary(); 1588 return [=](uint64_t Dot) { return -E(Dot); }; 1589 } 1590 1591 // Built-in functions are parsed here. 1592 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 1593 if (Tok == "ADDR") { 1594 StringRef Name = readParenLiteral(); 1595 return 1596 [=](uint64_t Dot) { return ScriptBase->getOutputSectionAddress(Name); }; 1597 } 1598 if (Tok == "LOADADDR") { 1599 StringRef Name = readParenLiteral(); 1600 return [=](uint64_t Dot) { return ScriptBase->getOutputSectionLMA(Name); }; 1601 } 1602 if (Tok == "ASSERT") 1603 return readAssert(); 1604 if (Tok == "ALIGN") { 1605 Expr E = readParenExpr(); 1606 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1607 } 1608 if (Tok == "CONSTANT") { 1609 StringRef Name = readParenLiteral(); 1610 return [=](uint64_t Dot) { return getConstant(Name); }; 1611 } 1612 if (Tok == "DEFINED") { 1613 expect("("); 1614 StringRef Tok = next(); 1615 expect(")"); 1616 return [=](uint64_t Dot) { return ScriptBase->isDefined(Tok) ? 1 : 0; }; 1617 } 1618 if (Tok == "SEGMENT_START") { 1619 expect("("); 1620 next(); 1621 expect(","); 1622 Expr E = readExpr(); 1623 expect(")"); 1624 return [=](uint64_t Dot) { return E(Dot); }; 1625 } 1626 if (Tok == "DATA_SEGMENT_ALIGN") { 1627 expect("("); 1628 Expr E = readExpr(); 1629 expect(","); 1630 readExpr(); 1631 expect(")"); 1632 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1633 } 1634 if (Tok == "DATA_SEGMENT_END") { 1635 expect("("); 1636 expect("."); 1637 expect(")"); 1638 return [](uint64_t Dot) { return Dot; }; 1639 } 1640 // GNU linkers implements more complicated logic to handle 1641 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to 1642 // the next page boundary for simplicity. 1643 if (Tok == "DATA_SEGMENT_RELRO_END") { 1644 expect("("); 1645 readExpr(); 1646 expect(","); 1647 readExpr(); 1648 expect(")"); 1649 return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); }; 1650 } 1651 if (Tok == "SIZEOF") { 1652 StringRef Name = readParenLiteral(); 1653 return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); }; 1654 } 1655 if (Tok == "ALIGNOF") { 1656 StringRef Name = readParenLiteral(); 1657 return 1658 [=](uint64_t Dot) { return ScriptBase->getOutputSectionAlign(Name); }; 1659 } 1660 if (Tok == "SIZEOF_HEADERS") 1661 return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); }; 1662 1663 // Tok is a literal number. 1664 uint64_t V; 1665 if (readInteger(Tok, V)) 1666 return [=](uint64_t Dot) { return V; }; 1667 1668 // Tok is a symbol name. 1669 if (Tok != "." && !isValidCIdentifier(Tok)) 1670 setError("malformed number: " + Tok); 1671 return [=](uint64_t Dot) { return getSymbolValue(Tok, Dot); }; 1672 } 1673 1674 Expr ScriptParser::readTernary(Expr Cond) { 1675 next(); 1676 Expr L = readExpr(); 1677 expect(":"); 1678 Expr R = readExpr(); 1679 return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); }; 1680 } 1681 1682 Expr ScriptParser::readParenExpr() { 1683 expect("("); 1684 Expr E = readExpr(); 1685 expect(")"); 1686 return E; 1687 } 1688 1689 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1690 std::vector<StringRef> Phdrs; 1691 while (!Error && peek().startswith(":")) { 1692 StringRef Tok = next(); 1693 Tok = (Tok.size() == 1) ? next() : Tok.substr(1); 1694 if (Tok.empty()) { 1695 setError("section header name is empty"); 1696 break; 1697 } 1698 Phdrs.push_back(Tok); 1699 } 1700 return Phdrs; 1701 } 1702 1703 unsigned ScriptParser::readPhdrType() { 1704 StringRef Tok = next(); 1705 unsigned Ret = StringSwitch<unsigned>(Tok) 1706 .Case("PT_NULL", PT_NULL) 1707 .Case("PT_LOAD", PT_LOAD) 1708 .Case("PT_DYNAMIC", PT_DYNAMIC) 1709 .Case("PT_INTERP", PT_INTERP) 1710 .Case("PT_NOTE", PT_NOTE) 1711 .Case("PT_SHLIB", PT_SHLIB) 1712 .Case("PT_PHDR", PT_PHDR) 1713 .Case("PT_TLS", PT_TLS) 1714 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1715 .Case("PT_GNU_STACK", PT_GNU_STACK) 1716 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1717 .Default(-1); 1718 1719 if (Ret == (unsigned)-1) { 1720 setError("invalid program header type: " + Tok); 1721 return PT_NULL; 1722 } 1723 return Ret; 1724 } 1725 1726 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1727 // Identifiers start at 2 because 0 and 1 are reserved 1728 // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants. 1729 size_t VersionId = Config->VersionDefinitions.size() + 2; 1730 Config->VersionDefinitions.push_back({VerStr, VersionId}); 1731 1732 if (skip("global:") || peek() != "local:") 1733 readGlobal(VerStr); 1734 if (skip("local:")) 1735 readLocal(); 1736 expect("}"); 1737 1738 // Each version may have a parent version. For example, "Ver2" defined as 1739 // "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" as a parent. This 1740 // version hierarchy is, probably against your instinct, purely for human; the 1741 // runtime doesn't care about them at all. In LLD, we simply skip the token. 1742 if (!VerStr.empty() && peek() != ";") 1743 next(); 1744 expect(";"); 1745 } 1746 1747 void ScriptParser::readLocal() { 1748 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1749 expect("*"); 1750 expect(";"); 1751 } 1752 1753 void ScriptParser::readExtern(std::vector<SymbolVersion> *Globals) { 1754 expect("\"C++\""); 1755 expect("{"); 1756 1757 for (;;) { 1758 if (peek() == "}" || Error) 1759 break; 1760 bool HasWildcard = !peek().startswith("\"") && hasWildcard(peek()); 1761 Globals->push_back({unquote(next()), true, HasWildcard}); 1762 expect(";"); 1763 } 1764 1765 expect("}"); 1766 expect(";"); 1767 } 1768 1769 void ScriptParser::readGlobal(StringRef VerStr) { 1770 std::vector<SymbolVersion> *Globals; 1771 if (VerStr.empty()) 1772 Globals = &Config->VersionScriptGlobals; 1773 else 1774 Globals = &Config->VersionDefinitions.back().Globals; 1775 1776 for (;;) { 1777 if (skip("extern")) 1778 readExtern(Globals); 1779 1780 StringRef Cur = peek(); 1781 if (Cur == "}" || Cur == "local:" || Error) 1782 return; 1783 next(); 1784 Globals->push_back({unquote(Cur), false, hasWildcard(Cur)}); 1785 expect(";"); 1786 } 1787 } 1788 1789 static bool isUnderSysroot(StringRef Path) { 1790 if (Config->Sysroot == "") 1791 return false; 1792 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 1793 if (sys::fs::equivalent(Config->Sysroot, Path)) 1794 return true; 1795 return false; 1796 } 1797 1798 void elf::readLinkerScript(MemoryBufferRef MB) { 1799 StringRef Path = MB.getBufferIdentifier(); 1800 ScriptParser(MB.getBuffer(), isUnderSysroot(Path)).readLinkerScript(); 1801 } 1802 1803 void elf::readVersionScript(MemoryBufferRef MB) { 1804 ScriptParser(MB.getBuffer(), false).readVersionScript(); 1805 } 1806 1807 template class elf::LinkerScript<ELF32LE>; 1808 template class elf::LinkerScript<ELF32BE>; 1809 template class elf::LinkerScript<ELF64LE>; 1810 template class elf::LinkerScript<ELF64BE>; 1811