1 //===- LinkerScript.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the parser/evaluator of the linker script. 11 // It parses a linker script and write the result to Config or ScriptConfig 12 // objects. 13 // 14 // If SECTIONS command is used, a ScriptConfig contains an AST 15 // of the command which will later be consumed by createSections() and 16 // assignAddresses(). 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "LinkerScript.h" 21 #include "Config.h" 22 #include "Driver.h" 23 #include "InputSection.h" 24 #include "OutputSections.h" 25 #include "ScriptParser.h" 26 #include "Strings.h" 27 #include "Symbols.h" 28 #include "SymbolTable.h" 29 #include "Target.h" 30 #include "Writer.h" 31 #include "llvm/ADT/StringSwitch.h" 32 #include "llvm/Support/ELF.h" 33 #include "llvm/Support/FileSystem.h" 34 #include "llvm/Support/MemoryBuffer.h" 35 #include "llvm/Support/Path.h" 36 #include "llvm/Support/StringSaver.h" 37 38 using namespace llvm; 39 using namespace llvm::ELF; 40 using namespace llvm::object; 41 using namespace llvm::support::endian; 42 using namespace lld; 43 using namespace lld::elf; 44 45 LinkerScriptBase *elf::ScriptBase; 46 ScriptConfiguration *elf::ScriptConfig; 47 48 template <class ELFT> static void addRegular(SymbolAssignment *Cmd) { 49 Symbol *Sym = Symtab<ELFT>::X->addRegular(Cmd->Name, STB_GLOBAL, STV_DEFAULT); 50 Sym->Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 51 Cmd->Sym = Sym->body(); 52 53 // If we have no SECTIONS then we don't have '.' and don't call 54 // assignAddresses(). We calculate symbol value immediately in this case. 55 if (!ScriptConfig->HasSections) 56 cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(0); 57 } 58 59 template <class ELFT> static void addSynthetic(SymbolAssignment *Cmd) { 60 Symbol *Sym = Symtab<ELFT>::X->addSynthetic( 61 Cmd->Name, nullptr, 0, Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT); 62 Cmd->Sym = Sym->body(); 63 } 64 65 template <class ELFT> static void addSymbol(SymbolAssignment *Cmd) { 66 if (Cmd->IsAbsolute) 67 addRegular<ELFT>(Cmd); 68 else 69 addSynthetic<ELFT>(Cmd); 70 } 71 // If a symbol was in PROVIDE(), we need to define it only when 72 // it is an undefined symbol. 73 template <class ELFT> static bool shouldDefine(SymbolAssignment *Cmd) { 74 if (Cmd->Name == ".") 75 return false; 76 if (!Cmd->Provide) 77 return true; 78 SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name); 79 return B && B->isUndefined(); 80 } 81 82 bool SymbolAssignment::classof(const BaseCommand *C) { 83 return C->Kind == AssignmentKind; 84 } 85 86 bool OutputSectionCommand::classof(const BaseCommand *C) { 87 return C->Kind == OutputSectionKind; 88 } 89 90 bool InputSectionDescription::classof(const BaseCommand *C) { 91 return C->Kind == InputSectionKind; 92 } 93 94 bool AssertCommand::classof(const BaseCommand *C) { 95 return C->Kind == AssertKind; 96 } 97 98 bool BytesDataCommand::classof(const BaseCommand *C) { 99 return C->Kind == BytesDataKind; 100 } 101 102 template <class ELFT> static bool isDiscarded(InputSectionBase<ELFT> *S) { 103 return !S || !S->Live; 104 } 105 106 template <class ELFT> LinkerScript<ELFT>::LinkerScript() {} 107 template <class ELFT> LinkerScript<ELFT>::~LinkerScript() {} 108 109 template <class ELFT> 110 bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { 111 for (InputSectionDescription *ID : Opt.KeptSections) { 112 StringRef Filename = S->getFile()->getName(); 113 if (!ID->FileRe.match(sys::path::filename(Filename))) 114 continue; 115 116 for (SectionPattern &P : ID->SectionPatterns) 117 if (P.SectionRe.match(S->Name)) 118 return true; 119 } 120 return false; 121 } 122 123 static bool comparePriority(InputSectionData *A, InputSectionData *B) { 124 return getPriority(A->Name) < getPriority(B->Name); 125 } 126 127 static bool compareName(InputSectionData *A, InputSectionData *B) { 128 return A->Name < B->Name; 129 } 130 131 static bool compareAlignment(InputSectionData *A, InputSectionData *B) { 132 // ">" is not a mistake. Larger alignments are placed before smaller 133 // alignments in order to reduce the amount of padding necessary. 134 // This is compatible with GNU. 135 return A->Alignment > B->Alignment; 136 } 137 138 static std::function<bool(InputSectionData *, InputSectionData *)> 139 getComparator(SortSectionPolicy K) { 140 switch (K) { 141 case SortSectionPolicy::Alignment: 142 return compareAlignment; 143 case SortSectionPolicy::Name: 144 return compareName; 145 case SortSectionPolicy::Priority: 146 return comparePriority; 147 default: 148 llvm_unreachable("unknown sort policy"); 149 } 150 } 151 152 template <class ELFT> 153 static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections, 154 ConstraintKind Kind) { 155 if (Kind == ConstraintKind::NoConstraint) 156 return true; 157 bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) { 158 auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2); 159 return Sec->getFlags() & SHF_WRITE; 160 }); 161 return (IsRW && Kind == ConstraintKind::ReadWrite) || 162 (!IsRW && Kind == ConstraintKind::ReadOnly); 163 } 164 165 static void sortSections(InputSectionData **Begin, InputSectionData **End, 166 SortSectionPolicy K) { 167 if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) 168 std::stable_sort(Begin, End, getComparator(K)); 169 } 170 171 // Compute and remember which sections the InputSectionDescription matches. 172 template <class ELFT> 173 void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { 174 // Collects all sections that satisfy constraints of I 175 // and attach them to I. 176 for (SectionPattern &Pat : I->SectionPatterns) { 177 size_t SizeBefore = I->Sections.size(); 178 for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) { 179 StringRef Filename = sys::path::filename(F->getName()); 180 if (!I->FileRe.match(Filename) || Pat.ExcludedFileRe.match(Filename)) 181 continue; 182 183 for (InputSectionBase<ELFT> *S : F->getSections()) 184 if (!isDiscarded(S) && !S->OutSec && Pat.SectionRe.match(S->Name)) 185 I->Sections.push_back(S); 186 if (Pat.SectionRe.match("COMMON")) 187 I->Sections.push_back(InputSection<ELFT>::CommonInputSection); 188 } 189 190 // Sort sections as instructed by SORT-family commands and --sort-section 191 // option. Because SORT-family commands can be nested at most two depth 192 // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command 193 // line option is respected even if a SORT command is given, the exact 194 // behavior we have here is a bit complicated. Here are the rules. 195 // 196 // 1. If two SORT commands are given, --sort-section is ignored. 197 // 2. If one SORT command is given, and if it is not SORT_NONE, 198 // --sort-section is handled as an inner SORT command. 199 // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. 200 // 4. If no SORT command is given, sort according to --sort-section. 201 InputSectionData **Begin = I->Sections.data() + SizeBefore; 202 InputSectionData **End = I->Sections.data() + I->Sections.size(); 203 if (Pat.SortOuter != SortSectionPolicy::None) { 204 if (Pat.SortInner == SortSectionPolicy::Default) 205 sortSections(Begin, End, Config->SortSection); 206 else 207 sortSections(Begin, End, Pat.SortInner); 208 sortSections(Begin, End, Pat.SortOuter); 209 } 210 } 211 212 // We do not add duplicate input sections, so mark them with a dummy output 213 // section for now. 214 for (InputSectionData *S : I->Sections) { 215 auto *S2 = static_cast<InputSectionBase<ELFT> *>(S); 216 S2->OutSec = (OutputSectionBase<ELFT> *)-1; 217 } 218 } 219 220 template <class ELFT> 221 void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) { 222 for (InputSectionBase<ELFT> *S : V) { 223 S->Live = false; 224 reportDiscarded(S); 225 } 226 } 227 228 template <class ELFT> 229 std::vector<InputSectionBase<ELFT> *> 230 LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) { 231 std::vector<InputSectionBase<ELFT> *> Ret; 232 233 for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { 234 auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); 235 if (!Cmd) 236 continue; 237 computeInputSections(Cmd); 238 for (InputSectionData *S : Cmd->Sections) 239 Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S)); 240 } 241 242 // After we created final list we should now set OutSec pointer to null, 243 // instead of -1. Otherwise we may get a crash when writing relocs, in 244 // case section is discarded by linker script 245 for (InputSectionBase<ELFT> *S : Ret) 246 S->OutSec = nullptr; 247 248 return Ret; 249 } 250 251 template <class ELFT> 252 static SectionKey<ELFT::Is64Bits> createKey(InputSectionBase<ELFT> *C, 253 StringRef OutsecName) { 254 // When using linker script the merge rules are different. 255 // Unfortunately, linker scripts are name based. This means that expressions 256 // like *(.foo*) can refer to multiple input sections that would normally be 257 // placed in different output sections. We cannot put them in different 258 // output sections or we would produce wrong results for 259 // start = .; *(.foo.*) end = .; *(.bar) 260 // and a mapping of .foo1 and .bar1 to one section and .foo2 and .bar2 to 261 // another. The problem is that there is no way to layout those output 262 // sections such that the .foo sections are the only thing between the 263 // start and end symbols. 264 265 // An extra annoyance is that we cannot simply disable merging of the contents 266 // of SHF_MERGE sections, but our implementation requires one output section 267 // per "kind" (string or not, which size/aligment). 268 // Fortunately, creating symbols in the middle of a merge section is not 269 // supported by bfd or gold, so we can just create multiple section in that 270 // case. 271 typedef typename ELFT::uint uintX_t; 272 uintX_t Flags = C->getFlags() & (SHF_MERGE | SHF_STRINGS); 273 274 uintX_t Alignment = 0; 275 if (isa<MergeInputSection<ELFT>>(C)) 276 Alignment = std::max<uintX_t>(C->Alignment, C->getEntsize()); 277 278 return SectionKey<ELFT::Is64Bits>{OutsecName, /*Type*/ 0, Flags, Alignment}; 279 } 280 281 template <class ELFT> 282 void LinkerScript<ELFT>::addSection(OutputSectionFactory<ELFT> &Factory, 283 InputSectionBase<ELFT> *Sec, 284 StringRef Name) { 285 OutputSectionBase<ELFT> *OutSec; 286 bool IsNew; 287 std::tie(OutSec, IsNew) = Factory.create(createKey(Sec, Name), Sec); 288 if (IsNew) 289 OutputSections->push_back(OutSec); 290 OutSec->addSection(Sec); 291 } 292 293 template <class ELFT> 294 void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) { 295 296 for (unsigned I = 0; I < Opt.Commands.size(); ++I) { 297 auto Iter = Opt.Commands.begin() + I; 298 const std::unique_ptr<BaseCommand> &Base1 = *Iter; 299 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { 300 if (shouldDefine<ELFT>(Cmd)) 301 addRegular<ELFT>(Cmd); 302 continue; 303 } 304 if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) { 305 // If we don't have SECTIONS then output sections have already been 306 // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses 307 // will not be called, so ASSERT should be evaluated now. 308 if (!Opt.HasSections) 309 Cmd->Expression(0); 310 continue; 311 } 312 313 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { 314 std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd); 315 316 if (Cmd->Name == "/DISCARD/") { 317 discard(V); 318 continue; 319 } 320 321 if (!matchConstraints<ELFT>(V, Cmd->Constraint)) { 322 for (InputSectionBase<ELFT> *S : V) 323 S->OutSec = nullptr; 324 Opt.Commands.erase(Iter); 325 --I; 326 continue; 327 } 328 329 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 330 if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) 331 if (shouldDefine<ELFT>(OutCmd)) 332 addSymbol<ELFT>(OutCmd); 333 334 if (V.empty()) 335 continue; 336 337 for (InputSectionBase<ELFT> *Sec : V) { 338 addSection(Factory, Sec, Cmd->Name); 339 if (uint32_t Subalign = Cmd->SubalignExpr ? Cmd->SubalignExpr(0) : 0) 340 Sec->Alignment = Subalign; 341 } 342 } 343 } 344 } 345 346 template <class ELFT> 347 void LinkerScript<ELFT>::createSections(OutputSectionFactory<ELFT> &Factory) { 348 processCommands(Factory); 349 // Add orphan sections. 350 for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) 351 for (InputSectionBase<ELFT> *S : F->getSections()) 352 if (!isDiscarded(S) && !S->OutSec) 353 addSection(Factory, S, getOutputSectionName(S->Name, Opt.Alloc)); 354 } 355 356 // Sets value of a section-defined symbol. Two kinds of 357 // symbols are processed: synthetic symbols, whose value 358 // is an offset from beginning of section and regular 359 // symbols whose value is absolute. 360 template <class ELFT> 361 static void assignSectionSymbol(SymbolAssignment *Cmd, 362 OutputSectionBase<ELFT> *Sec, 363 typename ELFT::uint Off) { 364 if (!Cmd->Sym) 365 return; 366 367 if (auto *Body = dyn_cast<DefinedSynthetic<ELFT>>(Cmd->Sym)) { 368 Body->Section = Sec; 369 Body->Value = Cmd->Expression(Sec->getVA() + Off) - Sec->getVA(); 370 return; 371 } 372 auto *Body = cast<DefinedRegular<ELFT>>(Cmd->Sym); 373 Body->Value = Cmd->Expression(Sec->getVA() + Off); 374 } 375 376 template <class ELFT> static bool isTbss(OutputSectionBase<ELFT> *Sec) { 377 return (Sec->getFlags() & SHF_TLS) && Sec->getType() == SHT_NOBITS; 378 } 379 380 template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) { 381 if (!AlreadyOutputIS.insert(S).second) 382 return; 383 bool IsTbss = isTbss(CurOutSec); 384 385 uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; 386 Pos = alignTo(Pos, S->Alignment); 387 S->OutSecOff = Pos - CurOutSec->getVA(); 388 Pos += S->getSize(); 389 390 // Update output section size after adding each section. This is so that 391 // SIZEOF works correctly in the case below: 392 // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } 393 CurOutSec->setSize(Pos - CurOutSec->getVA()); 394 395 if (IsTbss) 396 ThreadBssOffset = Pos - Dot; 397 else 398 Dot = Pos; 399 } 400 401 template <class ELFT> void LinkerScript<ELFT>::flush() { 402 if (!CurOutSec || !AlreadyOutputOS.insert(CurOutSec).second) 403 return; 404 if (auto *OutSec = dyn_cast<OutputSection<ELFT>>(CurOutSec)) { 405 for (InputSection<ELFT> *I : OutSec->Sections) 406 output(I); 407 } else { 408 Dot += CurOutSec->getSize(); 409 } 410 } 411 412 template <class ELFT> 413 void LinkerScript<ELFT>::switchTo(OutputSectionBase<ELFT> *Sec) { 414 if (CurOutSec == Sec) 415 return; 416 if (AlreadyOutputOS.count(Sec)) 417 return; 418 419 flush(); 420 CurOutSec = Sec; 421 422 Dot = alignTo(Dot, CurOutSec->getAlignment()); 423 CurOutSec->setVA(isTbss(CurOutSec) ? Dot + ThreadBssOffset : Dot); 424 425 // If neither AT nor AT> is specified for an allocatable section, the linker 426 // will set the LMA such that the difference between VMA and LMA for the 427 // section is the same as the preceding output section in the same region 428 // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html 429 CurOutSec->setLMAOffset(LMAOffset); 430 } 431 432 template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) { 433 // This handles the assignments to symbol or to a location counter (.) 434 if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { 435 if (AssignCmd->Name == ".") { 436 // Update to location counter means update to section size. 437 Dot = AssignCmd->Expression(Dot); 438 CurOutSec->setSize(Dot - CurOutSec->getVA()); 439 return; 440 } 441 assignSectionSymbol<ELFT>(AssignCmd, CurOutSec, Dot - CurOutSec->getVA()); 442 return; 443 } 444 445 // Handle BYTE(), SHORT(), LONG(), or QUAD(). 446 if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) { 447 DataCmd->Offset = Dot - CurOutSec->getVA(); 448 Dot += DataCmd->Size; 449 CurOutSec->setSize(Dot - CurOutSec->getVA()); 450 return; 451 } 452 453 // It handles single input section description command, 454 // calculates and assigns the offsets for each section and also 455 // updates the output section size. 456 auto &ICmd = cast<InputSectionDescription>(Base); 457 for (InputSectionData *ID : ICmd.Sections) { 458 auto *IB = static_cast<InputSectionBase<ELFT> *>(ID); 459 switchTo(IB->OutSec); 460 if (auto *I = dyn_cast<InputSection<ELFT>>(IB)) 461 output(I); 462 else 463 flush(); 464 } 465 } 466 467 template <class ELFT> 468 static std::vector<OutputSectionBase<ELFT> *> 469 findSections(StringRef Name, 470 const std::vector<OutputSectionBase<ELFT> *> &Sections) { 471 std::vector<OutputSectionBase<ELFT> *> Ret; 472 for (OutputSectionBase<ELFT> *Sec : Sections) 473 if (Sec->getName() == Name) 474 Ret.push_back(Sec); 475 return Ret; 476 } 477 478 template <class ELFT> 479 void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) { 480 if (Cmd->LMAExpr) 481 LMAOffset = Cmd->LMAExpr(Dot) - Dot; 482 std::vector<OutputSectionBase<ELFT> *> Sections = 483 findSections(Cmd->Name, *OutputSections); 484 if (Sections.empty()) 485 return; 486 switchTo(Sections[0]); 487 // Find the last section output location. We will output orphan sections 488 // there so that end symbols point to the correct location. 489 auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), 490 [](const std::unique_ptr<BaseCommand> &Cmd) { 491 return !isa<SymbolAssignment>(*Cmd); 492 }) 493 .base(); 494 for (auto I = Cmd->Commands.begin(); I != E; ++I) 495 process(**I); 496 for (OutputSectionBase<ELFT> *Base : Sections) 497 switchTo(Base); 498 flush(); 499 std::for_each(E, Cmd->Commands.end(), 500 [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); 501 } 502 503 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() { 504 // It is common practice to use very generic linker scripts. So for any 505 // given run some of the output sections in the script will be empty. 506 // We could create corresponding empty output sections, but that would 507 // clutter the output. 508 // We instead remove trivially empty sections. The bfd linker seems even 509 // more aggressive at removing them. 510 auto Pos = std::remove_if( 511 Opt.Commands.begin(), Opt.Commands.end(), 512 [&](const std::unique_ptr<BaseCommand> &Base) { 513 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 514 if (!Cmd) 515 return false; 516 std::vector<OutputSectionBase<ELFT> *> Secs = 517 findSections(Cmd->Name, *OutputSections); 518 if (!Secs.empty()) 519 return false; 520 for (const std::unique_ptr<BaseCommand> &I : Cmd->Commands) 521 if (!isa<InputSectionDescription>(I.get())) 522 return false; 523 return true; 524 }); 525 Opt.Commands.erase(Pos, Opt.Commands.end()); 526 527 // If the output section contains only symbol assignments, create a 528 // corresponding output section. The bfd linker seems to only create them if 529 // '.' is assigned to, but creating these section should not have any bad 530 // consequeces and gives us a section to put the symbol in. 531 uintX_t Flags = SHF_ALLOC; 532 uint32_t Type = 0; 533 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 534 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 535 if (!Cmd) 536 continue; 537 std::vector<OutputSectionBase<ELFT> *> Secs = 538 findSections(Cmd->Name, *OutputSections); 539 if (!Secs.empty()) { 540 Flags = Secs[0]->getFlags(); 541 Type = Secs[0]->getType(); 542 continue; 543 } 544 545 auto *OutSec = new OutputSection<ELFT>(Cmd->Name, Type, Flags); 546 Out<ELFT>::Pool.emplace_back(OutSec); 547 OutputSections->push_back(OutSec); 548 } 549 } 550 551 // When placing orphan sections, we want to place them after symbol assignments 552 // so that an orphan after 553 // begin_foo = .; 554 // foo : { *(foo) } 555 // end_foo = .; 556 // doesn't break the intended meaning of the begin/end symbols. 557 // We don't want to go over sections since Writer<ELFT>::sortSections is the 558 // one in charge of deciding the order of the sections. 559 // We don't want to go over alignments, since doing so in 560 // rx_sec : { *(rx_sec) } 561 // . = ALIGN(0x1000); 562 // /* The RW PT_LOAD starts here*/ 563 // rw_sec : { *(rw_sec) } 564 // would mean that the RW PT_LOAD would become unaligned. 565 static bool shouldSkip(const BaseCommand &Cmd) { 566 if (isa<OutputSectionCommand>(Cmd)) 567 return false; 568 const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); 569 if (!Assign) 570 return true; 571 return Assign->Name != "."; 572 } 573 574 template <class ELFT> 575 void LinkerScript<ELFT>::assignAddresses(std::vector<PhdrEntry<ELFT>> &Phdrs) { 576 // Orphan sections are sections present in the input files which 577 // are not explicitly placed into the output file by the linker script. 578 // We place orphan sections at end of file. 579 // Other linkers places them using some heuristics as described in 580 // https://sourceware.org/binutils/docs/ld/Orphan-Sections.html#Orphan-Sections. 581 582 // The OutputSections are already in the correct order. 583 // This loops creates or moves commands as needed so that they are in the 584 // correct order. 585 int CmdIndex = 0; 586 for (OutputSectionBase<ELFT> *Sec : *OutputSections) { 587 StringRef Name = Sec->getName(); 588 589 // Find the last spot where we can insert a command and still get the 590 // correct result. 591 auto CmdIter = Opt.Commands.begin() + CmdIndex; 592 auto E = Opt.Commands.end(); 593 while (CmdIter != E && shouldSkip(**CmdIter)) { 594 ++CmdIter; 595 ++CmdIndex; 596 } 597 598 auto Pos = 599 std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { 600 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 601 return Cmd && Cmd->Name == Name; 602 }); 603 if (Pos == E) { 604 Opt.Commands.insert(CmdIter, 605 llvm::make_unique<OutputSectionCommand>(Name)); 606 ++CmdIndex; 607 continue; 608 } 609 610 // Continue from where we found it. 611 CmdIndex = (Pos - Opt.Commands.begin()) + 1; 612 continue; 613 } 614 615 // Assign addresses as instructed by linker script SECTIONS sub-commands. 616 Dot = 0; 617 618 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 619 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { 620 if (Cmd->Name == ".") { 621 Dot = Cmd->Expression(Dot); 622 } else if (Cmd->Sym) { 623 cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(Dot); 624 } 625 continue; 626 } 627 628 if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { 629 Cmd->Expression(Dot); 630 continue; 631 } 632 633 auto *Cmd = cast<OutputSectionCommand>(Base.get()); 634 635 if (Cmd->AddrExpr) 636 Dot = Cmd->AddrExpr(Dot); 637 638 assignOffsets(Cmd); 639 } 640 641 uintX_t MinVA = std::numeric_limits<uintX_t>::max(); 642 for (OutputSectionBase<ELFT> *Sec : *OutputSections) { 643 if (Sec->getFlags() & SHF_ALLOC) 644 MinVA = std::min(MinVA, Sec->getVA()); 645 else 646 Sec->setVA(0); 647 } 648 649 uintX_t HeaderSize = getHeaderSize(); 650 auto FirstPTLoad = 651 std::find_if(Phdrs.begin(), Phdrs.end(), [](const PhdrEntry<ELFT> &E) { 652 return E.H.p_type == PT_LOAD; 653 }); 654 655 if (HeaderSize <= MinVA && FirstPTLoad != Phdrs.end()) { 656 // If linker script specifies program headers and first PT_LOAD doesn't 657 // have both PHDRS and FILEHDR attributes then do nothing 658 if (!Opt.PhdrsCommands.empty()) { 659 size_t SegNum = std::distance(Phdrs.begin(), FirstPTLoad); 660 if (!Opt.PhdrsCommands[SegNum].HasPhdrs || 661 !Opt.PhdrsCommands[SegNum].HasFilehdr) 662 return; 663 } 664 // ELF and Program headers need to be right before the first section in 665 // memory. Set their addresses accordingly. 666 MinVA = alignDown(MinVA - HeaderSize, Target->PageSize); 667 Out<ELFT>::ElfHeader->setVA(MinVA); 668 Out<ELFT>::ProgramHeaders->setVA(Out<ELFT>::ElfHeader->getSize() + MinVA); 669 FirstPTLoad->First = Out<ELFT>::ElfHeader; 670 if (!FirstPTLoad->Last) 671 FirstPTLoad->Last = Out<ELFT>::ProgramHeaders; 672 } else if (!FirstPTLoad->First) { 673 // Sometimes the very first PT_LOAD segment can be empty. 674 // This happens if (all conditions met): 675 // - Linker script is used 676 // - First section in ELF image is not RO 677 // - Not enough space for program headers. 678 // The code below removes empty PT_LOAD segment and updates 679 // program headers size. 680 Phdrs.erase(FirstPTLoad); 681 Out<ELFT>::ProgramHeaders->setSize(sizeof(typename ELFT::Phdr) * 682 Phdrs.size()); 683 } 684 } 685 686 // Creates program headers as instructed by PHDRS linker script command. 687 template <class ELFT> 688 std::vector<PhdrEntry<ELFT>> LinkerScript<ELFT>::createPhdrs() { 689 std::vector<PhdrEntry<ELFT>> Ret; 690 691 // Process PHDRS and FILEHDR keywords because they are not 692 // real output sections and cannot be added in the following loop. 693 std::vector<size_t> DefPhdrIds; 694 for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) { 695 Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags); 696 PhdrEntry<ELFT> &Phdr = Ret.back(); 697 698 if (Cmd.HasFilehdr) 699 Phdr.add(Out<ELFT>::ElfHeader); 700 if (Cmd.HasPhdrs) 701 Phdr.add(Out<ELFT>::ProgramHeaders); 702 703 if (Cmd.LMAExpr) { 704 Phdr.H.p_paddr = Cmd.LMAExpr(0); 705 Phdr.HasLMA = true; 706 } 707 708 // If output section command doesn't specify any segments, 709 // and we haven't previously assigned any section to segment, 710 // then we simply assign section to the very first load segment. 711 // Below is an example of such linker script: 712 // PHDRS { seg PT_LOAD; } 713 // SECTIONS { .aaa : { *(.aaa) } } 714 if (DefPhdrIds.empty() && Phdr.H.p_type == PT_LOAD) 715 DefPhdrIds.push_back(Ret.size() - 1); 716 } 717 718 // Add output sections to program headers. 719 for (OutputSectionBase<ELFT> *Sec : *OutputSections) { 720 if (!(Sec->getFlags() & SHF_ALLOC)) 721 break; 722 723 std::vector<size_t> PhdrIds = getPhdrIndices(Sec->getName()); 724 if (PhdrIds.empty()) 725 PhdrIds = std::move(DefPhdrIds); 726 727 // Assign headers specified by linker script 728 for (size_t Id : PhdrIds) { 729 Ret[Id].add(Sec); 730 if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) 731 Ret[Id].H.p_flags |= Sec->getPhdrFlags(); 732 } 733 DefPhdrIds = std::move(PhdrIds); 734 } 735 return Ret; 736 } 737 738 template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() { 739 // Ignore .interp section in case we have PHDRS specification 740 // and PT_INTERP isn't listed. 741 return !Opt.PhdrsCommands.empty() && 742 llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { 743 return Cmd.Type == PT_INTERP; 744 }) == Opt.PhdrsCommands.end(); 745 } 746 747 template <class ELFT> 748 ArrayRef<uint8_t> LinkerScript<ELFT>::getFiller(StringRef Name) { 749 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 750 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 751 if (Cmd->Name == Name) 752 return Cmd->Filler; 753 return {}; 754 } 755 756 template <class ELFT> 757 static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { 758 const endianness E = ELFT::TargetEndianness; 759 760 switch (Size) { 761 case 1: 762 *Buf = (uint8_t)Data; 763 break; 764 case 2: 765 write16<E>(Buf, Data); 766 break; 767 case 4: 768 write32<E>(Buf, Data); 769 break; 770 case 8: 771 write64<E>(Buf, Data); 772 break; 773 default: 774 llvm_unreachable("unsupported Size argument"); 775 } 776 } 777 778 template <class ELFT> 779 void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) { 780 int I = getSectionIndex(Name); 781 if (I == INT_MAX) 782 return; 783 784 OutputSectionCommand *Cmd = 785 dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()); 786 for (const std::unique_ptr<BaseCommand> &Base2 : Cmd->Commands) 787 if (auto *DataCmd = dyn_cast<BytesDataCommand>(Base2.get())) 788 writeInt<ELFT>(&Buf[DataCmd->Offset], DataCmd->Data, DataCmd->Size); 789 } 790 791 template <class ELFT> bool LinkerScript<ELFT>::hasLMA(StringRef Name) { 792 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 793 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 794 if (Cmd->LMAExpr && Cmd->Name == Name) 795 return true; 796 return false; 797 } 798 799 // Returns the index of the given section name in linker script 800 // SECTIONS commands. Sections are laid out as the same order as they 801 // were in the script. If a given name did not appear in the script, 802 // it returns INT_MAX, so that it will be laid out at end of file. 803 template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) { 804 int I = 0; 805 for (std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 806 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 807 if (Cmd->Name == Name) 808 return I; 809 ++I; 810 } 811 return INT_MAX; 812 } 813 814 template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() { 815 return !Opt.PhdrsCommands.empty(); 816 } 817 818 template <class ELFT> 819 uint64_t LinkerScript<ELFT>::getOutputSectionAddress(StringRef Name) { 820 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 821 if (Sec->getName() == Name) 822 return Sec->getVA(); 823 error("undefined section " + Name); 824 return 0; 825 } 826 827 template <class ELFT> 828 uint64_t LinkerScript<ELFT>::getOutputSectionLMA(StringRef Name) { 829 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 830 if (Sec->getName() == Name) 831 return Sec->getLMA(); 832 error("undefined section " + Name); 833 return 0; 834 } 835 836 template <class ELFT> 837 uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) { 838 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 839 if (Sec->getName() == Name) 840 return Sec->getSize(); 841 error("undefined section " + Name); 842 return 0; 843 } 844 845 template <class ELFT> 846 uint64_t LinkerScript<ELFT>::getOutputSectionAlign(StringRef Name) { 847 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 848 if (Sec->getName() == Name) 849 return Sec->getAlignment(); 850 error("undefined section " + Name); 851 return 0; 852 } 853 854 template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() { 855 return elf::getHeaderSize<ELFT>(); 856 } 857 858 template <class ELFT> uint64_t LinkerScript<ELFT>::getSymbolValue(StringRef S) { 859 if (SymbolBody *B = Symtab<ELFT>::X->find(S)) 860 return B->getVA<ELFT>(); 861 error("symbol not found: " + S); 862 return 0; 863 } 864 865 template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) { 866 return Symtab<ELFT>::X->find(S) != nullptr; 867 } 868 869 // Returns indices of ELF headers containing specific section, identified 870 // by Name. Each index is a zero based number of ELF header listed within 871 // PHDRS {} script block. 872 template <class ELFT> 873 std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { 874 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 875 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 876 if (!Cmd || Cmd->Name != SectionName) 877 continue; 878 879 std::vector<size_t> Ret; 880 for (StringRef PhdrName : Cmd->Phdrs) 881 Ret.push_back(getPhdrIndex(PhdrName)); 882 return Ret; 883 } 884 return {}; 885 } 886 887 template <class ELFT> 888 size_t LinkerScript<ELFT>::getPhdrIndex(StringRef PhdrName) { 889 size_t I = 0; 890 for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { 891 if (Cmd.Name == PhdrName) 892 return I; 893 ++I; 894 } 895 error("section header '" + PhdrName + "' is not listed in PHDRS"); 896 return 0; 897 } 898 899 class elf::ScriptParser : public ScriptParserBase { 900 typedef void (ScriptParser::*Handler)(); 901 902 public: 903 ScriptParser(StringRef S, bool B) : ScriptParserBase(S), IsUnderSysroot(B) {} 904 905 void readLinkerScript(); 906 void readVersionScript(); 907 908 private: 909 void addFile(StringRef Path); 910 911 void readAsNeeded(); 912 void readEntry(); 913 void readExtern(); 914 void readGroup(); 915 void readInclude(); 916 void readOutput(); 917 void readOutputArch(); 918 void readOutputFormat(); 919 void readPhdrs(); 920 void readSearchDir(); 921 void readSections(); 922 void readVersion(); 923 void readVersionScriptCommand(); 924 925 SymbolAssignment *readAssignment(StringRef Name); 926 BytesDataCommand *readBytesDataCommand(StringRef Tok); 927 std::vector<uint8_t> readFill(); 928 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 929 std::vector<uint8_t> readOutputSectionFiller(StringRef Tok); 930 std::vector<StringRef> readOutputSectionPhdrs(); 931 InputSectionDescription *readInputSectionDescription(StringRef Tok); 932 Regex readFilePatterns(); 933 std::vector<SectionPattern> readInputSectionsList(); 934 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 935 unsigned readPhdrType(); 936 SortSectionPolicy readSortKind(); 937 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 938 SymbolAssignment *readProvideOrAssignment(StringRef Tok, bool MakeAbsolute); 939 void readSort(); 940 Expr readAssert(); 941 942 Expr readExpr(); 943 Expr readExpr1(Expr Lhs, int MinPrec); 944 StringRef readParenLiteral(); 945 Expr readPrimary(); 946 Expr readTernary(Expr Cond); 947 Expr readParenExpr(); 948 949 // For parsing version script. 950 void readExtern(std::vector<SymbolVersion> *Globals); 951 void readVersionDeclaration(StringRef VerStr); 952 void readGlobal(StringRef VerStr); 953 void readLocal(); 954 955 ScriptConfiguration &Opt = *ScriptConfig; 956 StringSaver Saver = {ScriptConfig->Alloc}; 957 bool IsUnderSysroot; 958 }; 959 960 void ScriptParser::readVersionScript() { 961 readVersionScriptCommand(); 962 if (!atEOF()) 963 setError("EOF expected, but got " + next()); 964 } 965 966 void ScriptParser::readVersionScriptCommand() { 967 if (consume("{")) { 968 readVersionDeclaration(""); 969 return; 970 } 971 972 while (!atEOF() && !Error && peek() != "}") { 973 StringRef VerStr = next(); 974 if (VerStr == "{") { 975 setError("anonymous version definition is used in " 976 "combination with other version definitions"); 977 return; 978 } 979 expect("{"); 980 readVersionDeclaration(VerStr); 981 } 982 } 983 984 void ScriptParser::readVersion() { 985 expect("{"); 986 readVersionScriptCommand(); 987 expect("}"); 988 } 989 990 void ScriptParser::readLinkerScript() { 991 while (!atEOF()) { 992 StringRef Tok = next(); 993 if (Tok == ";") 994 continue; 995 996 if (Tok == "ASSERT") { 997 Opt.Commands.emplace_back(new AssertCommand(readAssert())); 998 } else if (Tok == "ENTRY") { 999 readEntry(); 1000 } else if (Tok == "EXTERN") { 1001 readExtern(); 1002 } else if (Tok == "GROUP" || Tok == "INPUT") { 1003 readGroup(); 1004 } else if (Tok == "INCLUDE") { 1005 readInclude(); 1006 } else if (Tok == "OUTPUT") { 1007 readOutput(); 1008 } else if (Tok == "OUTPUT_ARCH") { 1009 readOutputArch(); 1010 } else if (Tok == "OUTPUT_FORMAT") { 1011 readOutputFormat(); 1012 } else if (Tok == "PHDRS") { 1013 readPhdrs(); 1014 } else if (Tok == "SEARCH_DIR") { 1015 readSearchDir(); 1016 } else if (Tok == "SECTIONS") { 1017 readSections(); 1018 } else if (Tok == "VERSION") { 1019 readVersion(); 1020 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok, true)) { 1021 Opt.Commands.emplace_back(Cmd); 1022 } else { 1023 setError("unknown directive: " + Tok); 1024 } 1025 } 1026 } 1027 1028 void ScriptParser::addFile(StringRef S) { 1029 if (IsUnderSysroot && S.startswith("/")) { 1030 SmallString<128> PathData; 1031 StringRef Path = (Config->Sysroot + S).toStringRef(PathData); 1032 if (sys::fs::exists(Path)) { 1033 Driver->addFile(Saver.save(Path)); 1034 return; 1035 } 1036 } 1037 1038 if (sys::path::is_absolute(S)) { 1039 Driver->addFile(S); 1040 } else if (S.startswith("=")) { 1041 if (Config->Sysroot.empty()) 1042 Driver->addFile(S.substr(1)); 1043 else 1044 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); 1045 } else if (S.startswith("-l")) { 1046 Driver->addLibrary(S.substr(2)); 1047 } else if (sys::fs::exists(S)) { 1048 Driver->addFile(S); 1049 } else { 1050 std::string Path = findFromSearchPaths(S); 1051 if (Path.empty()) 1052 setError("unable to find " + S); 1053 else 1054 Driver->addFile(Saver.save(Path)); 1055 } 1056 } 1057 1058 void ScriptParser::readAsNeeded() { 1059 expect("("); 1060 bool Orig = Config->AsNeeded; 1061 Config->AsNeeded = true; 1062 while (!Error && !consume(")")) 1063 addFile(unquote(next())); 1064 Config->AsNeeded = Orig; 1065 } 1066 1067 void ScriptParser::readEntry() { 1068 // -e <symbol> takes predecence over ENTRY(<symbol>). 1069 expect("("); 1070 StringRef Tok = next(); 1071 if (Config->Entry.empty()) 1072 Config->Entry = Tok; 1073 expect(")"); 1074 } 1075 1076 void ScriptParser::readExtern() { 1077 expect("("); 1078 while (!Error && !consume(")")) 1079 Config->Undefined.push_back(next()); 1080 } 1081 1082 void ScriptParser::readGroup() { 1083 expect("("); 1084 while (!Error && !consume(")")) { 1085 StringRef Tok = next(); 1086 if (Tok == "AS_NEEDED") 1087 readAsNeeded(); 1088 else 1089 addFile(unquote(Tok)); 1090 } 1091 } 1092 1093 void ScriptParser::readInclude() { 1094 StringRef Tok = next(); 1095 auto MBOrErr = MemoryBuffer::getFile(unquote(Tok)); 1096 if (!MBOrErr) { 1097 setError("cannot open " + Tok); 1098 return; 1099 } 1100 std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; 1101 StringRef S = Saver.save(MB->getMemBufferRef().getBuffer()); 1102 std::vector<StringRef> V = tokenize(S); 1103 Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end()); 1104 } 1105 1106 void ScriptParser::readOutput() { 1107 // -o <file> takes predecence over OUTPUT(<file>). 1108 expect("("); 1109 StringRef Tok = next(); 1110 if (Config->OutputFile.empty()) 1111 Config->OutputFile = unquote(Tok); 1112 expect(")"); 1113 } 1114 1115 void ScriptParser::readOutputArch() { 1116 // Error checking only for now. 1117 expect("("); 1118 skip(); 1119 expect(")"); 1120 } 1121 1122 void ScriptParser::readOutputFormat() { 1123 // Error checking only for now. 1124 expect("("); 1125 skip(); 1126 StringRef Tok = next(); 1127 if (Tok == ")") 1128 return; 1129 if (Tok != ",") { 1130 setError("unexpected token: " + Tok); 1131 return; 1132 } 1133 skip(); 1134 expect(","); 1135 skip(); 1136 expect(")"); 1137 } 1138 1139 void ScriptParser::readPhdrs() { 1140 expect("{"); 1141 while (!Error && !consume("}")) { 1142 StringRef Tok = next(); 1143 Opt.PhdrsCommands.push_back( 1144 {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); 1145 PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back(); 1146 1147 PhdrCmd.Type = readPhdrType(); 1148 do { 1149 Tok = next(); 1150 if (Tok == ";") 1151 break; 1152 if (Tok == "FILEHDR") 1153 PhdrCmd.HasFilehdr = true; 1154 else if (Tok == "PHDRS") 1155 PhdrCmd.HasPhdrs = true; 1156 else if (Tok == "AT") 1157 PhdrCmd.LMAExpr = readParenExpr(); 1158 else if (Tok == "FLAGS") { 1159 expect("("); 1160 // Passing 0 for the value of dot is a bit of a hack. It means that 1161 // we accept expressions like ".|1". 1162 PhdrCmd.Flags = readExpr()(0); 1163 expect(")"); 1164 } else 1165 setError("unexpected header attribute: " + Tok); 1166 } while (!Error); 1167 } 1168 } 1169 1170 void ScriptParser::readSearchDir() { 1171 expect("("); 1172 StringRef Tok = next(); 1173 if (!Config->Nostdlib) 1174 Config->SearchPaths.push_back(unquote(Tok)); 1175 expect(")"); 1176 } 1177 1178 void ScriptParser::readSections() { 1179 Opt.HasSections = true; 1180 expect("{"); 1181 while (!Error && !consume("}")) { 1182 StringRef Tok = next(); 1183 BaseCommand *Cmd = readProvideOrAssignment(Tok, true); 1184 if (!Cmd) { 1185 if (Tok == "ASSERT") 1186 Cmd = new AssertCommand(readAssert()); 1187 else 1188 Cmd = readOutputSectionDescription(Tok); 1189 } 1190 Opt.Commands.emplace_back(Cmd); 1191 } 1192 } 1193 1194 static int precedence(StringRef Op) { 1195 return StringSwitch<int>(Op) 1196 .Cases("*", "/", 5) 1197 .Cases("+", "-", 4) 1198 .Cases("<<", ">>", 3) 1199 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 1200 .Cases("&", "|", 1) 1201 .Default(-1); 1202 } 1203 1204 Regex ScriptParser::readFilePatterns() { 1205 std::vector<StringRef> V; 1206 while (!Error && !consume(")")) 1207 V.push_back(next()); 1208 return compileGlobPatterns(V); 1209 } 1210 1211 SortSectionPolicy ScriptParser::readSortKind() { 1212 if (consume("SORT") || consume("SORT_BY_NAME")) 1213 return SortSectionPolicy::Name; 1214 if (consume("SORT_BY_ALIGNMENT")) 1215 return SortSectionPolicy::Alignment; 1216 if (consume("SORT_BY_INIT_PRIORITY")) 1217 return SortSectionPolicy::Priority; 1218 if (consume("SORT_NONE")) 1219 return SortSectionPolicy::None; 1220 return SortSectionPolicy::Default; 1221 } 1222 1223 // Method reads a list of sequence of excluded files and section globs given in 1224 // a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ 1225 // Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) 1226 // The semantics of that is next: 1227 // * Include .foo.1 from every file. 1228 // * Include .foo.2 from every file but a.o 1229 // * Include .foo.3 from every file but b.o 1230 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 1231 std::vector<SectionPattern> Ret; 1232 while (!Error && peek() != ")") { 1233 Regex ExcludeFileRe; 1234 if (consume("EXCLUDE_FILE")) { 1235 expect("("); 1236 ExcludeFileRe = readFilePatterns(); 1237 } 1238 1239 std::vector<StringRef> V; 1240 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 1241 V.push_back(next()); 1242 1243 if (!V.empty()) 1244 Ret.push_back({std::move(ExcludeFileRe), compileGlobPatterns(V)}); 1245 else 1246 setError("section pattern is expected"); 1247 } 1248 return Ret; 1249 } 1250 1251 // Section pattern grammar can have complex expressions, for example: 1252 // *(SORT(.foo.* EXCLUDE_FILE (*file1.o) .bar.*) .bar.* SORT(.zed.*)) 1253 // Generally is a sequence of globs and excludes that may be wrapped in a SORT() 1254 // commands, like: SORT(glob0) glob1 glob2 SORT(glob4) 1255 // This methods handles wrapping sequences of excluded files and section globs 1256 // into SORT() if that needed and reads them all. 1257 InputSectionDescription * 1258 ScriptParser::readInputSectionRules(StringRef FilePattern) { 1259 auto *Cmd = new InputSectionDescription(FilePattern); 1260 expect("("); 1261 while (!HasError && !consume(")")) { 1262 SortSectionPolicy Outer = readSortKind(); 1263 SortSectionPolicy Inner = SortSectionPolicy::Default; 1264 std::vector<SectionPattern> V; 1265 if (Outer != SortSectionPolicy::Default) { 1266 expect("("); 1267 Inner = readSortKind(); 1268 if (Inner != SortSectionPolicy::Default) { 1269 expect("("); 1270 V = readInputSectionsList(); 1271 expect(")"); 1272 } else { 1273 V = readInputSectionsList(); 1274 } 1275 expect(")"); 1276 } else { 1277 V = readInputSectionsList(); 1278 } 1279 1280 for (SectionPattern &Pat : V) { 1281 Pat.SortInner = Inner; 1282 Pat.SortOuter = Outer; 1283 } 1284 1285 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 1286 } 1287 return Cmd; 1288 } 1289 1290 InputSectionDescription * 1291 ScriptParser::readInputSectionDescription(StringRef Tok) { 1292 // Input section wildcard can be surrounded by KEEP. 1293 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 1294 if (Tok == "KEEP") { 1295 expect("("); 1296 StringRef FilePattern = next(); 1297 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 1298 expect(")"); 1299 Opt.KeptSections.push_back(Cmd); 1300 return Cmd; 1301 } 1302 return readInputSectionRules(Tok); 1303 } 1304 1305 void ScriptParser::readSort() { 1306 expect("("); 1307 expect("CONSTRUCTORS"); 1308 expect(")"); 1309 } 1310 1311 Expr ScriptParser::readAssert() { 1312 expect("("); 1313 Expr E = readExpr(); 1314 expect(","); 1315 StringRef Msg = unquote(next()); 1316 expect(")"); 1317 return [=](uint64_t Dot) { 1318 uint64_t V = E(Dot); 1319 if (!V) 1320 error(Msg); 1321 return V; 1322 }; 1323 } 1324 1325 // Reads a FILL(expr) command. We handle the FILL command as an 1326 // alias for =fillexp section attribute, which is different from 1327 // what GNU linkers do. 1328 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 1329 std::vector<uint8_t> ScriptParser::readFill() { 1330 expect("("); 1331 std::vector<uint8_t> V = readOutputSectionFiller(next()); 1332 expect(")"); 1333 expect(";"); 1334 return V; 1335 } 1336 1337 OutputSectionCommand * 1338 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 1339 OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); 1340 1341 // Read an address expression. 1342 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address 1343 if (peek() != ":") 1344 Cmd->AddrExpr = readExpr(); 1345 1346 expect(":"); 1347 1348 if (consume("AT")) 1349 Cmd->LMAExpr = readParenExpr(); 1350 if (consume("ALIGN")) 1351 Cmd->AlignExpr = readParenExpr(); 1352 if (consume("SUBALIGN")) 1353 Cmd->SubalignExpr = readParenExpr(); 1354 1355 // Parse constraints. 1356 if (consume("ONLY_IF_RO")) 1357 Cmd->Constraint = ConstraintKind::ReadOnly; 1358 if (consume("ONLY_IF_RW")) 1359 Cmd->Constraint = ConstraintKind::ReadWrite; 1360 expect("{"); 1361 1362 while (!Error && !consume("}")) { 1363 StringRef Tok = next(); 1364 if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok, false)) 1365 Cmd->Commands.emplace_back(Assignment); 1366 else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) 1367 Cmd->Commands.emplace_back(Data); 1368 else if (Tok == "FILL") 1369 Cmd->Filler = readFill(); 1370 else if (Tok == "SORT") 1371 readSort(); 1372 else if (peek() == "(") 1373 Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); 1374 else 1375 setError("unknown command " + Tok); 1376 } 1377 Cmd->Phdrs = readOutputSectionPhdrs(); 1378 1379 if (consume("=")) 1380 Cmd->Filler = readOutputSectionFiller(next()); 1381 else if (peek().startswith("=")) 1382 Cmd->Filler = readOutputSectionFiller(next().drop_front()); 1383 1384 return Cmd; 1385 } 1386 1387 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number. 1388 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 1389 // 1390 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles 1391 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them 1392 // as 32-bit big-endian values. We will do the same as ld.gold does 1393 // because it's simpler than what ld.bfd does. 1394 std::vector<uint8_t> ScriptParser::readOutputSectionFiller(StringRef Tok) { 1395 uint32_t V; 1396 if (Tok.getAsInteger(0, V)) { 1397 setError("invalid filler expression: " + Tok); 1398 return {}; 1399 } 1400 return {uint8_t(V >> 24), uint8_t(V >> 16), uint8_t(V >> 8), uint8_t(V)}; 1401 } 1402 1403 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 1404 expect("("); 1405 SymbolAssignment *Cmd = readAssignment(next()); 1406 Cmd->Provide = Provide; 1407 Cmd->Hidden = Hidden; 1408 expect(")"); 1409 expect(";"); 1410 return Cmd; 1411 } 1412 1413 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok, 1414 bool MakeAbsolute) { 1415 SymbolAssignment *Cmd = nullptr; 1416 if (peek() == "=" || peek() == "+=") { 1417 Cmd = readAssignment(Tok); 1418 expect(";"); 1419 } else if (Tok == "PROVIDE") { 1420 Cmd = readProvideHidden(true, false); 1421 } else if (Tok == "HIDDEN") { 1422 Cmd = readProvideHidden(false, true); 1423 } else if (Tok == "PROVIDE_HIDDEN") { 1424 Cmd = readProvideHidden(true, true); 1425 } 1426 if (Cmd && MakeAbsolute) 1427 Cmd->IsAbsolute = true; 1428 return Cmd; 1429 } 1430 1431 static uint64_t getSymbolValue(StringRef S, uint64_t Dot) { 1432 if (S == ".") 1433 return Dot; 1434 return ScriptBase->getSymbolValue(S); 1435 } 1436 1437 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 1438 StringRef Op = next(); 1439 bool IsAbsolute = false; 1440 Expr E; 1441 assert(Op == "=" || Op == "+="); 1442 if (consume("ABSOLUTE")) { 1443 // The RHS may be something like "ABSOLUTE(.) & 0xff". 1444 // Call readExpr1 to read the whole expression. 1445 E = readExpr1(readParenExpr(), 0); 1446 IsAbsolute = true; 1447 } else { 1448 E = readExpr(); 1449 } 1450 if (Op == "+=") 1451 E = [=](uint64_t Dot) { return getSymbolValue(Name, Dot) + E(Dot); }; 1452 return new SymbolAssignment(Name, E, IsAbsolute); 1453 } 1454 1455 // This is an operator-precedence parser to parse a linker 1456 // script expression. 1457 Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); } 1458 1459 static Expr combine(StringRef Op, Expr L, Expr R) { 1460 if (Op == "*") 1461 return [=](uint64_t Dot) { return L(Dot) * R(Dot); }; 1462 if (Op == "/") { 1463 return [=](uint64_t Dot) -> uint64_t { 1464 uint64_t RHS = R(Dot); 1465 if (RHS == 0) { 1466 error("division by zero"); 1467 return 0; 1468 } 1469 return L(Dot) / RHS; 1470 }; 1471 } 1472 if (Op == "+") 1473 return [=](uint64_t Dot) { return L(Dot) + R(Dot); }; 1474 if (Op == "-") 1475 return [=](uint64_t Dot) { return L(Dot) - R(Dot); }; 1476 if (Op == "<<") 1477 return [=](uint64_t Dot) { return L(Dot) << R(Dot); }; 1478 if (Op == ">>") 1479 return [=](uint64_t Dot) { return L(Dot) >> R(Dot); }; 1480 if (Op == "<") 1481 return [=](uint64_t Dot) { return L(Dot) < R(Dot); }; 1482 if (Op == ">") 1483 return [=](uint64_t Dot) { return L(Dot) > R(Dot); }; 1484 if (Op == ">=") 1485 return [=](uint64_t Dot) { return L(Dot) >= R(Dot); }; 1486 if (Op == "<=") 1487 return [=](uint64_t Dot) { return L(Dot) <= R(Dot); }; 1488 if (Op == "==") 1489 return [=](uint64_t Dot) { return L(Dot) == R(Dot); }; 1490 if (Op == "!=") 1491 return [=](uint64_t Dot) { return L(Dot) != R(Dot); }; 1492 if (Op == "&") 1493 return [=](uint64_t Dot) { return L(Dot) & R(Dot); }; 1494 if (Op == "|") 1495 return [=](uint64_t Dot) { return L(Dot) | R(Dot); }; 1496 llvm_unreachable("invalid operator"); 1497 } 1498 1499 // This is a part of the operator-precedence parser. This function 1500 // assumes that the remaining token stream starts with an operator. 1501 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 1502 while (!atEOF() && !Error) { 1503 // Read an operator and an expression. 1504 StringRef Op1 = peek(); 1505 if (Op1 == "?") 1506 return readTernary(Lhs); 1507 if (precedence(Op1) < MinPrec) 1508 break; 1509 skip(); 1510 Expr Rhs = readPrimary(); 1511 1512 // Evaluate the remaining part of the expression first if the 1513 // next operator has greater precedence than the previous one. 1514 // For example, if we have read "+" and "3", and if the next 1515 // operator is "*", then we'll evaluate 3 * ... part first. 1516 while (!atEOF()) { 1517 StringRef Op2 = peek(); 1518 if (precedence(Op2) <= precedence(Op1)) 1519 break; 1520 Rhs = readExpr1(Rhs, precedence(Op2)); 1521 } 1522 1523 Lhs = combine(Op1, Lhs, Rhs); 1524 } 1525 return Lhs; 1526 } 1527 1528 uint64_t static getConstant(StringRef S) { 1529 if (S == "COMMONPAGESIZE") 1530 return Target->PageSize; 1531 if (S == "MAXPAGESIZE") 1532 return Config->MaxPageSize; 1533 error("unknown constant: " + S); 1534 return 0; 1535 } 1536 1537 // Parses Tok as an integer. Returns true if successful. 1538 // It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") 1539 // and decimal numbers. Decimal numbers may have "K" (kilo) or 1540 // "M" (mega) prefixes. 1541 static bool readInteger(StringRef Tok, uint64_t &Result) { 1542 if (Tok.startswith("-")) { 1543 if (!readInteger(Tok.substr(1), Result)) 1544 return false; 1545 Result = -Result; 1546 return true; 1547 } 1548 if (Tok.startswith_lower("0x")) 1549 return !Tok.substr(2).getAsInteger(16, Result); 1550 if (Tok.endswith_lower("H")) 1551 return !Tok.drop_back().getAsInteger(16, Result); 1552 1553 int Suffix = 1; 1554 if (Tok.endswith_lower("K")) { 1555 Suffix = 1024; 1556 Tok = Tok.drop_back(); 1557 } else if (Tok.endswith_lower("M")) { 1558 Suffix = 1024 * 1024; 1559 Tok = Tok.drop_back(); 1560 } 1561 if (Tok.getAsInteger(10, Result)) 1562 return false; 1563 Result *= Suffix; 1564 return true; 1565 } 1566 1567 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 1568 int Size = StringSwitch<unsigned>(Tok) 1569 .Case("BYTE", 1) 1570 .Case("SHORT", 2) 1571 .Case("LONG", 4) 1572 .Case("QUAD", 8) 1573 .Default(-1); 1574 if (Size == -1) 1575 return nullptr; 1576 1577 expect("("); 1578 uint64_t Val = 0; 1579 StringRef S = next(); 1580 if (!readInteger(S, Val)) 1581 setError("unexpected value: " + S); 1582 expect(")"); 1583 return new BytesDataCommand(Val, Size); 1584 } 1585 1586 StringRef ScriptParser::readParenLiteral() { 1587 expect("("); 1588 StringRef Tok = next(); 1589 expect(")"); 1590 return Tok; 1591 } 1592 1593 Expr ScriptParser::readPrimary() { 1594 if (peek() == "(") 1595 return readParenExpr(); 1596 1597 StringRef Tok = next(); 1598 1599 if (Tok == "~") { 1600 Expr E = readPrimary(); 1601 return [=](uint64_t Dot) { return ~E(Dot); }; 1602 } 1603 if (Tok == "-") { 1604 Expr E = readPrimary(); 1605 return [=](uint64_t Dot) { return -E(Dot); }; 1606 } 1607 1608 // Built-in functions are parsed here. 1609 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 1610 if (Tok == "ADDR") { 1611 StringRef Name = readParenLiteral(); 1612 return 1613 [=](uint64_t Dot) { return ScriptBase->getOutputSectionAddress(Name); }; 1614 } 1615 if (Tok == "LOADADDR") { 1616 StringRef Name = readParenLiteral(); 1617 return [=](uint64_t Dot) { return ScriptBase->getOutputSectionLMA(Name); }; 1618 } 1619 if (Tok == "ASSERT") 1620 return readAssert(); 1621 if (Tok == "ALIGN") { 1622 Expr E = readParenExpr(); 1623 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1624 } 1625 if (Tok == "CONSTANT") { 1626 StringRef Name = readParenLiteral(); 1627 return [=](uint64_t Dot) { return getConstant(Name); }; 1628 } 1629 if (Tok == "DEFINED") { 1630 expect("("); 1631 StringRef Tok = next(); 1632 expect(")"); 1633 return [=](uint64_t Dot) { return ScriptBase->isDefined(Tok) ? 1 : 0; }; 1634 } 1635 if (Tok == "SEGMENT_START") { 1636 expect("("); 1637 skip(); 1638 expect(","); 1639 Expr E = readExpr(); 1640 expect(")"); 1641 return [=](uint64_t Dot) { return E(Dot); }; 1642 } 1643 if (Tok == "DATA_SEGMENT_ALIGN") { 1644 expect("("); 1645 Expr E = readExpr(); 1646 expect(","); 1647 readExpr(); 1648 expect(")"); 1649 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1650 } 1651 if (Tok == "DATA_SEGMENT_END") { 1652 expect("("); 1653 expect("."); 1654 expect(")"); 1655 return [](uint64_t Dot) { return Dot; }; 1656 } 1657 // GNU linkers implements more complicated logic to handle 1658 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to 1659 // the next page boundary for simplicity. 1660 if (Tok == "DATA_SEGMENT_RELRO_END") { 1661 expect("("); 1662 readExpr(); 1663 expect(","); 1664 readExpr(); 1665 expect(")"); 1666 return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); }; 1667 } 1668 if (Tok == "SIZEOF") { 1669 StringRef Name = readParenLiteral(); 1670 return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); }; 1671 } 1672 if (Tok == "ALIGNOF") { 1673 StringRef Name = readParenLiteral(); 1674 return 1675 [=](uint64_t Dot) { return ScriptBase->getOutputSectionAlign(Name); }; 1676 } 1677 if (Tok == "SIZEOF_HEADERS") 1678 return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); }; 1679 1680 // Tok is a literal number. 1681 uint64_t V; 1682 if (readInteger(Tok, V)) 1683 return [=](uint64_t Dot) { return V; }; 1684 1685 // Tok is a symbol name. 1686 if (Tok != "." && !isValidCIdentifier(Tok)) 1687 setError("malformed number: " + Tok); 1688 return [=](uint64_t Dot) { return getSymbolValue(Tok, Dot); }; 1689 } 1690 1691 Expr ScriptParser::readTernary(Expr Cond) { 1692 skip(); 1693 Expr L = readExpr(); 1694 expect(":"); 1695 Expr R = readExpr(); 1696 return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); }; 1697 } 1698 1699 Expr ScriptParser::readParenExpr() { 1700 expect("("); 1701 Expr E = readExpr(); 1702 expect(")"); 1703 return E; 1704 } 1705 1706 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1707 std::vector<StringRef> Phdrs; 1708 while (!Error && peek().startswith(":")) { 1709 StringRef Tok = next(); 1710 Tok = (Tok.size() == 1) ? next() : Tok.substr(1); 1711 if (Tok.empty()) { 1712 setError("section header name is empty"); 1713 break; 1714 } 1715 Phdrs.push_back(Tok); 1716 } 1717 return Phdrs; 1718 } 1719 1720 // Read a program header type name. The next token must be a 1721 // name of a program header type or a constant (e.g. "0x3"). 1722 unsigned ScriptParser::readPhdrType() { 1723 StringRef Tok = next(); 1724 uint64_t Val; 1725 if (readInteger(Tok, Val)) 1726 return Val; 1727 1728 unsigned Ret = StringSwitch<unsigned>(Tok) 1729 .Case("PT_NULL", PT_NULL) 1730 .Case("PT_LOAD", PT_LOAD) 1731 .Case("PT_DYNAMIC", PT_DYNAMIC) 1732 .Case("PT_INTERP", PT_INTERP) 1733 .Case("PT_NOTE", PT_NOTE) 1734 .Case("PT_SHLIB", PT_SHLIB) 1735 .Case("PT_PHDR", PT_PHDR) 1736 .Case("PT_TLS", PT_TLS) 1737 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1738 .Case("PT_GNU_STACK", PT_GNU_STACK) 1739 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1740 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1741 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1742 .Default(-1); 1743 1744 if (Ret == (unsigned)-1) { 1745 setError("invalid program header type: " + Tok); 1746 return PT_NULL; 1747 } 1748 return Ret; 1749 } 1750 1751 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1752 // Identifiers start at 2 because 0 and 1 are reserved 1753 // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants. 1754 size_t VersionId = Config->VersionDefinitions.size() + 2; 1755 Config->VersionDefinitions.push_back({VerStr, VersionId}); 1756 1757 if (consume("global:") || peek() != "local:") 1758 readGlobal(VerStr); 1759 if (consume("local:")) 1760 readLocal(); 1761 expect("}"); 1762 1763 // Each version may have a parent version. For example, "Ver2" defined as 1764 // "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" as a parent. This 1765 // version hierarchy is, probably against your instinct, purely for human; the 1766 // runtime doesn't care about them at all. In LLD, we simply skip the token. 1767 if (!VerStr.empty() && peek() != ";") 1768 skip(); 1769 expect(";"); 1770 } 1771 1772 void ScriptParser::readLocal() { 1773 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1774 expect("*"); 1775 expect(";"); 1776 } 1777 1778 void ScriptParser::readExtern(std::vector<SymbolVersion> *Globals) { 1779 expect("\"C++\""); 1780 expect("{"); 1781 1782 for (;;) { 1783 if (peek() == "}" || Error) 1784 break; 1785 bool HasWildcard = !peek().startswith("\"") && hasWildcard(peek()); 1786 Globals->push_back({unquote(next()), true, HasWildcard}); 1787 expect(";"); 1788 } 1789 1790 expect("}"); 1791 expect(";"); 1792 } 1793 1794 void ScriptParser::readGlobal(StringRef VerStr) { 1795 std::vector<SymbolVersion> *Globals; 1796 if (VerStr.empty()) 1797 Globals = &Config->VersionScriptGlobals; 1798 else 1799 Globals = &Config->VersionDefinitions.back().Globals; 1800 1801 for (;;) { 1802 if (consume("extern")) 1803 readExtern(Globals); 1804 1805 StringRef Cur = peek(); 1806 if (Cur == "}" || Cur == "local:" || Error) 1807 return; 1808 skip(); 1809 Globals->push_back({unquote(Cur), false, hasWildcard(Cur)}); 1810 expect(";"); 1811 } 1812 } 1813 1814 static bool isUnderSysroot(StringRef Path) { 1815 if (Config->Sysroot == "") 1816 return false; 1817 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 1818 if (sys::fs::equivalent(Config->Sysroot, Path)) 1819 return true; 1820 return false; 1821 } 1822 1823 void elf::readLinkerScript(MemoryBufferRef MB) { 1824 StringRef Path = MB.getBufferIdentifier(); 1825 ScriptParser(MB.getBuffer(), isUnderSysroot(Path)).readLinkerScript(); 1826 } 1827 1828 void elf::readVersionScript(MemoryBufferRef MB) { 1829 ScriptParser(MB.getBuffer(), false).readVersionScript(); 1830 } 1831 1832 template class elf::LinkerScript<ELF32LE>; 1833 template class elf::LinkerScript<ELF32BE>; 1834 template class elf::LinkerScript<ELF64LE>; 1835 template class elf::LinkerScript<ELF64BE>; 1836