1 //===- LinkerScript.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the parser/evaluator of the linker script. 11 // It parses a linker script and write the result to Config or ScriptConfig 12 // objects. 13 // 14 // If SECTIONS command is used, a ScriptConfig contains an AST 15 // of the command which will later be consumed by createSections() and 16 // assignAddresses(). 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "LinkerScript.h" 21 #include "Config.h" 22 #include "Driver.h" 23 #include "InputSection.h" 24 #include "OutputSections.h" 25 #include "ScriptParser.h" 26 #include "Strings.h" 27 #include "Symbols.h" 28 #include "SymbolTable.h" 29 #include "Target.h" 30 #include "Writer.h" 31 #include "llvm/ADT/StringSwitch.h" 32 #include "llvm/Support/ELF.h" 33 #include "llvm/Support/FileSystem.h" 34 #include "llvm/Support/MemoryBuffer.h" 35 #include "llvm/Support/Path.h" 36 #include "llvm/Support/StringSaver.h" 37 38 using namespace llvm; 39 using namespace llvm::ELF; 40 using namespace llvm::object; 41 using namespace llvm::support::endian; 42 using namespace lld; 43 using namespace lld::elf; 44 45 LinkerScriptBase *elf::ScriptBase; 46 ScriptConfiguration *elf::ScriptConfig; 47 48 template <class ELFT> static void addRegular(SymbolAssignment *Cmd) { 49 Symbol *Sym = Symtab<ELFT>::X->addRegular(Cmd->Name, STB_GLOBAL, STV_DEFAULT); 50 Sym->Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 51 Cmd->Sym = Sym->body(); 52 53 // If we have no SECTIONS then we don't have '.' and don't call 54 // assignAddresses(). We calculate symbol value immediately in this case. 55 if (!ScriptConfig->HasSections) 56 cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(0); 57 } 58 59 template <class ELFT> static void addSynthetic(SymbolAssignment *Cmd) { 60 Symbol *Sym = Symtab<ELFT>::X->addSynthetic( 61 Cmd->Name, nullptr, 0, Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT); 62 Cmd->Sym = Sym->body(); 63 } 64 65 template <class ELFT> static void addSymbol(SymbolAssignment *Cmd) { 66 if (Cmd->IsAbsolute) 67 addRegular<ELFT>(Cmd); 68 else 69 addSynthetic<ELFT>(Cmd); 70 } 71 // If a symbol was in PROVIDE(), we need to define it only when 72 // it is an undefined symbol. 73 template <class ELFT> static bool shouldDefine(SymbolAssignment *Cmd) { 74 if (Cmd->Name == ".") 75 return false; 76 if (!Cmd->Provide) 77 return true; 78 SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name); 79 return B && B->isUndefined(); 80 } 81 82 bool SymbolAssignment::classof(const BaseCommand *C) { 83 return C->Kind == AssignmentKind; 84 } 85 86 bool OutputSectionCommand::classof(const BaseCommand *C) { 87 return C->Kind == OutputSectionKind; 88 } 89 90 bool InputSectionDescription::classof(const BaseCommand *C) { 91 return C->Kind == InputSectionKind; 92 } 93 94 bool AssertCommand::classof(const BaseCommand *C) { 95 return C->Kind == AssertKind; 96 } 97 98 bool BytesDataCommand::classof(const BaseCommand *C) { 99 return C->Kind == BytesDataKind; 100 } 101 102 template <class ELFT> static bool isDiscarded(InputSectionBase<ELFT> *S) { 103 return !S || !S->Live; 104 } 105 106 template <class ELFT> LinkerScript<ELFT>::LinkerScript() {} 107 template <class ELFT> LinkerScript<ELFT>::~LinkerScript() {} 108 109 template <class ELFT> 110 bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { 111 for (Regex *Re : Opt.KeptSections) 112 if (Re->match(S->Name)) 113 return true; 114 return false; 115 } 116 117 static bool comparePriority(InputSectionData *A, InputSectionData *B) { 118 return getPriority(A->Name) < getPriority(B->Name); 119 } 120 121 static bool compareName(InputSectionData *A, InputSectionData *B) { 122 return A->Name < B->Name; 123 } 124 125 static bool compareAlignment(InputSectionData *A, InputSectionData *B) { 126 // ">" is not a mistake. Larger alignments are placed before smaller 127 // alignments in order to reduce the amount of padding necessary. 128 // This is compatible with GNU. 129 return A->Alignment > B->Alignment; 130 } 131 132 static std::function<bool(InputSectionData *, InputSectionData *)> 133 getComparator(SortSectionPolicy K) { 134 switch (K) { 135 case SortSectionPolicy::Alignment: 136 return compareAlignment; 137 case SortSectionPolicy::Name: 138 return compareName; 139 case SortSectionPolicy::Priority: 140 return comparePriority; 141 default: 142 llvm_unreachable("unknown sort policy"); 143 } 144 } 145 146 template <class ELFT> 147 static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections, 148 ConstraintKind Kind) { 149 if (Kind == ConstraintKind::NoConstraint) 150 return true; 151 bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) { 152 auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2); 153 return Sec->getSectionHdr()->sh_flags & SHF_WRITE; 154 }); 155 return (IsRW && Kind == ConstraintKind::ReadWrite) || 156 (!IsRW && Kind == ConstraintKind::ReadOnly); 157 } 158 159 static void sortSections(InputSectionData **Begin, InputSectionData **End, 160 SortSectionPolicy K) { 161 if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) 162 std::stable_sort(Begin, End, getComparator(K)); 163 } 164 165 // Compute and remember which sections the InputSectionDescription matches. 166 template <class ELFT> 167 void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { 168 // Collects all sections that satisfy constraints of I 169 // and attach them to I. 170 for (SectionPattern &Pat : I->SectionPatterns) { 171 size_t SizeBefore = I->Sections.size(); 172 for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) { 173 StringRef Filename = sys::path::filename(F->getName()); 174 if (!I->FileRe.match(Filename) || Pat.ExcludedFileRe.match(Filename)) 175 continue; 176 177 for (InputSectionBase<ELFT> *S : F->getSections()) 178 if (!isDiscarded(S) && !S->OutSec && Pat.SectionRe.match(S->Name)) 179 I->Sections.push_back(S); 180 if (Pat.SectionRe.match("COMMON")) 181 I->Sections.push_back(CommonInputSection<ELFT>::X); 182 } 183 184 // Sort sections as instructed by SORT-family commands and --sort-section 185 // option. Because SORT-family commands can be nested at most two depth 186 // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command 187 // line option is respected even if a SORT command is given, the exact 188 // behavior we have here is a bit complicated. Here are the rules. 189 // 190 // 1. If two SORT commands are given, --sort-section is ignored. 191 // 2. If one SORT command is given, and if it is not SORT_NONE, 192 // --sort-section is handled as an inner SORT command. 193 // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. 194 // 4. If no SORT command is given, sort according to --sort-section. 195 InputSectionData **Begin = I->Sections.data() + SizeBefore; 196 InputSectionData **End = I->Sections.data() + I->Sections.size(); 197 if (Pat.SortOuter != SortSectionPolicy::None) { 198 if (Pat.SortInner == SortSectionPolicy::Default) 199 sortSections(Begin, End, Config->SortSection); 200 else 201 sortSections(Begin, End, Pat.SortInner); 202 sortSections(Begin, End, Pat.SortOuter); 203 } 204 } 205 206 // We do not add duplicate input sections, so mark them with a dummy output 207 // section for now. 208 for (InputSectionData *S : I->Sections) { 209 auto *S2 = static_cast<InputSectionBase<ELFT> *>(S); 210 S2->OutSec = (OutputSectionBase<ELFT> *)-1; 211 } 212 } 213 214 template <class ELFT> 215 void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) { 216 for (InputSectionBase<ELFT> *S : V) { 217 S->Live = false; 218 reportDiscarded(S); 219 } 220 } 221 222 template <class ELFT> 223 std::vector<InputSectionBase<ELFT> *> 224 LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) { 225 std::vector<InputSectionBase<ELFT> *> Ret; 226 227 for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { 228 auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); 229 if (!Cmd) 230 continue; 231 computeInputSections(Cmd); 232 for (InputSectionData *S : Cmd->Sections) 233 Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S)); 234 } 235 236 return Ret; 237 } 238 239 template <class ELFT> 240 static SectionKey<ELFT::Is64Bits> createKey(InputSectionBase<ELFT> *C, 241 StringRef OutsecName) { 242 // When using linker script the merge rules are different. 243 // Unfortunately, linker scripts are name based. This means that expressions 244 // like *(.foo*) can refer to multiple input sections that would normally be 245 // placed in different output sections. We cannot put them in different 246 // output sections or we would produce wrong results for 247 // start = .; *(.foo.*) end = .; *(.bar) 248 // and a mapping of .foo1 and .bar1 to one section and .foo2 and .bar2 to 249 // another. The problem is that there is no way to layout those output 250 // sections such that the .foo sections are the only thing between the 251 // start and end symbols. 252 253 // An extra annoyance is that we cannot simply disable merging of the contents 254 // of SHF_MERGE sections, but our implementation requires one output section 255 // per "kind" (string or not, which size/aligment). 256 // Fortunately, creating symbols in the middle of a merge section is not 257 // supported by bfd or gold, so we can just create multiple section in that 258 // case. 259 const typename ELFT::Shdr *H = C->getSectionHdr(); 260 typedef typename ELFT::uint uintX_t; 261 uintX_t Flags = H->sh_flags & (SHF_MERGE | SHF_STRINGS); 262 263 uintX_t Alignment = 0; 264 if (isa<MergeInputSection<ELFT>>(C)) 265 Alignment = std::max(H->sh_addralign, H->sh_entsize); 266 267 return SectionKey<ELFT::Is64Bits>{OutsecName, /*Type*/ 0, Flags, Alignment}; 268 } 269 270 template <class ELFT> 271 void LinkerScript<ELFT>::addSection(OutputSectionFactory<ELFT> &Factory, 272 InputSectionBase<ELFT> *Sec, 273 StringRef Name) { 274 OutputSectionBase<ELFT> *OutSec; 275 bool IsNew; 276 std::tie(OutSec, IsNew) = Factory.create(createKey(Sec, Name), Sec); 277 if (IsNew) 278 OutputSections->push_back(OutSec); 279 OutSec->addSection(Sec); 280 } 281 282 template <class ELFT> 283 void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) { 284 285 for (unsigned I = 0; I < Opt.Commands.size(); ++I) { 286 auto Iter = Opt.Commands.begin() + I; 287 const std::unique_ptr<BaseCommand> &Base1 = *Iter; 288 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { 289 if (shouldDefine<ELFT>(Cmd)) 290 addRegular<ELFT>(Cmd); 291 continue; 292 } 293 if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) { 294 // If we don't have SECTIONS then output sections have already been 295 // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses 296 // will not be called, so ASSERT should be evaluated now. 297 if (!Opt.HasSections) 298 Cmd->Expression(0); 299 continue; 300 } 301 302 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { 303 std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd); 304 305 if (Cmd->Name == "/DISCARD/") { 306 discard(V); 307 continue; 308 } 309 310 if (!matchConstraints<ELFT>(V, Cmd->Constraint)) { 311 for (InputSectionBase<ELFT> *S : V) 312 S->OutSec = nullptr; 313 Opt.Commands.erase(Iter); 314 --I; 315 continue; 316 } 317 318 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 319 if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) 320 if (shouldDefine<ELFT>(OutCmd)) 321 addSymbol<ELFT>(OutCmd); 322 323 if (V.empty()) 324 continue; 325 326 for (InputSectionBase<ELFT> *Sec : V) { 327 addSection(Factory, Sec, Cmd->Name); 328 if (uint32_t Subalign = Cmd->SubalignExpr ? Cmd->SubalignExpr(0) : 0) 329 Sec->Alignment = Subalign; 330 } 331 } 332 } 333 } 334 335 template <class ELFT> 336 void LinkerScript<ELFT>::createSections(OutputSectionFactory<ELFT> &Factory) { 337 processCommands(Factory); 338 // Add orphan sections. 339 for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) 340 for (InputSectionBase<ELFT> *S : F->getSections()) 341 if (!isDiscarded(S) && !S->OutSec) 342 addSection(Factory, S, getOutputSectionName(S)); 343 } 344 345 // Sets value of a section-defined symbol. Two kinds of 346 // symbols are processed: synthetic symbols, whose value 347 // is an offset from beginning of section and regular 348 // symbols whose value is absolute. 349 template <class ELFT> 350 static void assignSectionSymbol(SymbolAssignment *Cmd, 351 OutputSectionBase<ELFT> *Sec, 352 typename ELFT::uint Off) { 353 if (!Cmd->Sym) 354 return; 355 356 if (auto *Body = dyn_cast<DefinedSynthetic<ELFT>>(Cmd->Sym)) { 357 Body->Section = Sec; 358 Body->Value = Cmd->Expression(Sec->getVA() + Off) - Sec->getVA(); 359 return; 360 } 361 auto *Body = cast<DefinedRegular<ELFT>>(Cmd->Sym); 362 Body->Value = Cmd->Expression(Sec->getVA() + Off); 363 } 364 365 template <class ELFT> static bool isTbss(OutputSectionBase<ELFT> *Sec) { 366 return (Sec->getFlags() & SHF_TLS) && Sec->getType() == SHT_NOBITS; 367 } 368 369 template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) { 370 if (!AlreadyOutputIS.insert(S).second) 371 return; 372 bool IsTbss = isTbss(CurOutSec); 373 374 uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; 375 Pos = alignTo(Pos, S->Alignment); 376 S->OutSecOff = Pos - CurOutSec->getVA(); 377 Pos += S->getSize(); 378 379 // Update output section size after adding each section. This is so that 380 // SIZEOF works correctly in the case below: 381 // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } 382 CurOutSec->setSize(Pos - CurOutSec->getVA()); 383 384 if (IsTbss) 385 ThreadBssOffset = Pos - Dot; 386 else 387 Dot = Pos; 388 } 389 390 template <class ELFT> void LinkerScript<ELFT>::flush() { 391 if (!CurOutSec || !AlreadyOutputOS.insert(CurOutSec).second) 392 return; 393 if (auto *OutSec = dyn_cast<OutputSection<ELFT>>(CurOutSec)) { 394 for (InputSection<ELFT> *I : OutSec->Sections) 395 output(I); 396 } else { 397 Dot += CurOutSec->getSize(); 398 } 399 } 400 401 template <class ELFT> 402 void LinkerScript<ELFT>::switchTo(OutputSectionBase<ELFT> *Sec) { 403 if (CurOutSec == Sec) 404 return; 405 if (AlreadyOutputOS.count(Sec)) 406 return; 407 408 flush(); 409 CurOutSec = Sec; 410 411 Dot = alignTo(Dot, CurOutSec->getAlignment()); 412 CurOutSec->setVA(isTbss(CurOutSec) ? Dot + ThreadBssOffset : Dot); 413 } 414 415 template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) { 416 // This handles the assignments to symbol or to a location counter (.) 417 if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { 418 if (AssignCmd->Name == ".") { 419 // Update to location counter means update to section size. 420 Dot = AssignCmd->Expression(Dot); 421 CurOutSec->setSize(Dot - CurOutSec->getVA()); 422 return; 423 } 424 assignSectionSymbol<ELFT>(AssignCmd, CurOutSec, Dot - CurOutSec->getVA()); 425 return; 426 } 427 428 // Handle BYTE(), SHORT(), LONG(), or QUAD(). 429 if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) { 430 DataCmd->Offset = Dot - CurOutSec->getVA(); 431 Dot += DataCmd->Size; 432 CurOutSec->setSize(Dot - CurOutSec->getVA()); 433 return; 434 } 435 436 // It handles single input section description command, 437 // calculates and assigns the offsets for each section and also 438 // updates the output section size. 439 auto &ICmd = cast<InputSectionDescription>(Base); 440 for (InputSectionData *ID : ICmd.Sections) { 441 auto *IB = static_cast<InputSectionBase<ELFT> *>(ID); 442 switchTo(IB->OutSec); 443 if (auto *I = dyn_cast<InputSection<ELFT>>(IB)) 444 output(I); 445 else 446 flush(); 447 } 448 } 449 450 template <class ELFT> 451 static std::vector<OutputSectionBase<ELFT> *> 452 findSections(OutputSectionCommand &Cmd, 453 const std::vector<OutputSectionBase<ELFT> *> &Sections) { 454 std::vector<OutputSectionBase<ELFT> *> Ret; 455 for (OutputSectionBase<ELFT> *Sec : Sections) 456 if (Sec->getName() == Cmd.Name) 457 Ret.push_back(Sec); 458 return Ret; 459 } 460 461 template <class ELFT> 462 void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) { 463 std::vector<OutputSectionBase<ELFT> *> Sections = 464 findSections(*Cmd, *OutputSections); 465 if (Sections.empty()) 466 return; 467 switchTo(Sections[0]); 468 469 // Find the last section output location. We will output orphan sections 470 // there so that end symbols point to the correct location. 471 auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), 472 [](const std::unique_ptr<BaseCommand> &Cmd) { 473 return !isa<SymbolAssignment>(*Cmd); 474 }) 475 .base(); 476 for (auto I = Cmd->Commands.begin(); I != E; ++I) 477 process(**I); 478 for (OutputSectionBase<ELFT> *Base : Sections) 479 switchTo(Base); 480 flush(); 481 std::for_each(E, Cmd->Commands.end(), 482 [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); 483 } 484 485 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() { 486 // It is common practice to use very generic linker scripts. So for any 487 // given run some of the output sections in the script will be empty. 488 // We could create corresponding empty output sections, but that would 489 // clutter the output. 490 // We instead remove trivially empty sections. The bfd linker seems even 491 // more aggressive at removing them. 492 auto Pos = std::remove_if( 493 Opt.Commands.begin(), Opt.Commands.end(), 494 [&](const std::unique_ptr<BaseCommand> &Base) { 495 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 496 if (!Cmd) 497 return false; 498 std::vector<OutputSectionBase<ELFT> *> Secs = 499 findSections(*Cmd, *OutputSections); 500 if (!Secs.empty()) 501 return false; 502 for (const std::unique_ptr<BaseCommand> &I : Cmd->Commands) 503 if (!isa<InputSectionDescription>(I.get())) 504 return false; 505 return true; 506 }); 507 Opt.Commands.erase(Pos, Opt.Commands.end()); 508 509 // If the output section contains only symbol assignments, create a 510 // corresponding output section. The bfd linker seems to only create them if 511 // '.' is assigned to, but creating these section should not have any bad 512 // consequeces and gives us a section to put the symbol in. 513 uintX_t Flags = SHF_ALLOC; 514 uint32_t Type = 0; 515 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 516 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 517 if (!Cmd) 518 continue; 519 std::vector<OutputSectionBase<ELFT> *> Secs = 520 findSections(*Cmd, *OutputSections); 521 if (!Secs.empty()) { 522 Flags = Secs[0]->getFlags(); 523 Type = Secs[0]->getType(); 524 continue; 525 } 526 527 auto *OutSec = new OutputSection<ELFT>(Cmd->Name, Type, Flags); 528 Out<ELFT>::Pool.emplace_back(OutSec); 529 OutputSections->push_back(OutSec); 530 } 531 } 532 533 // When placing orphan sections, we want to place them after symbol assignments 534 // so that an orphan after 535 // begin_foo = .; 536 // foo : { *(foo) } 537 // end_foo = .; 538 // doesn't break the intended meaning of the begin/end symbols. 539 // We don't want to go over sections since Writer<ELFT>::sortSections is the 540 // one in charge of deciding the order of the sections. 541 // We don't want to go over alignments, since doing so in 542 // rx_sec : { *(rx_sec) } 543 // . = ALIGN(0x1000); 544 // /* The RW PT_LOAD starts here*/ 545 // rw_sec : { *(rw_sec) } 546 // would mean that the RW PT_LOAD would become unaligned. 547 static bool shouldSkip(const BaseCommand &Cmd) { 548 if (isa<OutputSectionCommand>(Cmd)) 549 return false; 550 const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); 551 if (!Assign) 552 return true; 553 return Assign->Name != "."; 554 } 555 556 template <class ELFT> 557 void LinkerScript<ELFT>::assignAddresses(std::vector<PhdrEntry<ELFT>> &Phdrs) { 558 // Orphan sections are sections present in the input files which 559 // are not explicitly placed into the output file by the linker script. 560 // We place orphan sections at end of file. 561 // Other linkers places them using some heuristics as described in 562 // https://sourceware.org/binutils/docs/ld/Orphan-Sections.html#Orphan-Sections. 563 564 // The OutputSections are already in the correct order. 565 // This loops creates or moves commands as needed so that they are in the 566 // correct order. 567 int CmdIndex = 0; 568 for (OutputSectionBase<ELFT> *Sec : *OutputSections) { 569 StringRef Name = Sec->getName(); 570 571 // Find the last spot where we can insert a command and still get the 572 // correct result. 573 auto CmdIter = Opt.Commands.begin() + CmdIndex; 574 auto E = Opt.Commands.end(); 575 while (CmdIter != E && shouldSkip(**CmdIter)) { 576 ++CmdIter; 577 ++CmdIndex; 578 } 579 580 auto Pos = 581 std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { 582 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 583 return Cmd && Cmd->Name == Name; 584 }); 585 if (Pos == E) { 586 Opt.Commands.insert(CmdIter, 587 llvm::make_unique<OutputSectionCommand>(Name)); 588 ++CmdIndex; 589 continue; 590 } 591 592 // Continue from where we found it. 593 CmdIndex = (Pos - Opt.Commands.begin()) + 1; 594 continue; 595 } 596 597 // Assign addresses as instructed by linker script SECTIONS sub-commands. 598 Dot = 0; 599 600 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 601 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { 602 if (Cmd->Name == ".") { 603 Dot = Cmd->Expression(Dot); 604 } else if (Cmd->Sym) { 605 cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(Dot); 606 } 607 continue; 608 } 609 610 if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { 611 Cmd->Expression(Dot); 612 continue; 613 } 614 615 auto *Cmd = cast<OutputSectionCommand>(Base.get()); 616 617 if (Cmd->AddrExpr) 618 Dot = Cmd->AddrExpr(Dot); 619 620 assignOffsets(Cmd); 621 } 622 623 uintX_t MinVA = std::numeric_limits<uintX_t>::max(); 624 for (OutputSectionBase<ELFT> *Sec : *OutputSections) { 625 if (Sec->getFlags() & SHF_ALLOC) 626 MinVA = std::min(MinVA, Sec->getVA()); 627 else 628 Sec->setVA(0); 629 } 630 631 uintX_t HeaderSize = getHeaderSize(); 632 auto FirstPTLoad = 633 std::find_if(Phdrs.begin(), Phdrs.end(), [](const PhdrEntry<ELFT> &E) { 634 return E.H.p_type == PT_LOAD; 635 }); 636 if (HeaderSize <= MinVA && FirstPTLoad != Phdrs.end()) { 637 // ELF and Program headers need to be right before the first section in 638 // memory. Set their addresses accordingly. 639 MinVA = alignDown(MinVA - HeaderSize, Target->PageSize); 640 Out<ELFT>::ElfHeader->setVA(MinVA); 641 Out<ELFT>::ProgramHeaders->setVA(Out<ELFT>::ElfHeader->getSize() + MinVA); 642 FirstPTLoad->First = Out<ELFT>::ElfHeader; 643 if (!FirstPTLoad->Last) 644 FirstPTLoad->Last = Out<ELFT>::ProgramHeaders; 645 } 646 } 647 648 // Creates program headers as instructed by PHDRS linker script command. 649 template <class ELFT> 650 std::vector<PhdrEntry<ELFT>> LinkerScript<ELFT>::createPhdrs() { 651 std::vector<PhdrEntry<ELFT>> Ret; 652 653 // Process PHDRS and FILEHDR keywords because they are not 654 // real output sections and cannot be added in the following loop. 655 for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) { 656 Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags); 657 PhdrEntry<ELFT> &Phdr = Ret.back(); 658 659 if (Cmd.HasFilehdr) 660 Phdr.add(Out<ELFT>::ElfHeader); 661 if (Cmd.HasPhdrs) 662 Phdr.add(Out<ELFT>::ProgramHeaders); 663 664 if (Cmd.LMAExpr) { 665 Phdr.H.p_paddr = Cmd.LMAExpr(0); 666 Phdr.HasLMA = true; 667 } 668 } 669 670 // Add output sections to program headers. 671 PhdrEntry<ELFT> *Load = nullptr; 672 uintX_t Flags = PF_R; 673 for (OutputSectionBase<ELFT> *Sec : *OutputSections) { 674 if (!(Sec->getFlags() & SHF_ALLOC)) 675 break; 676 677 std::vector<size_t> PhdrIds = getPhdrIndices(Sec->getName()); 678 if (!PhdrIds.empty()) { 679 // Assign headers specified by linker script 680 for (size_t Id : PhdrIds) { 681 Ret[Id].add(Sec); 682 if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) 683 Ret[Id].H.p_flags |= Sec->getPhdrFlags(); 684 } 685 } else { 686 // If we have no load segment or flags've changed then we want new load 687 // segment. 688 uintX_t NewFlags = Sec->getPhdrFlags(); 689 if (Load == nullptr || Flags != NewFlags) { 690 Load = &*Ret.emplace(Ret.end(), PT_LOAD, NewFlags); 691 Flags = NewFlags; 692 } 693 Load->add(Sec); 694 } 695 } 696 return Ret; 697 } 698 699 template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() { 700 // Ignore .interp section in case we have PHDRS specification 701 // and PT_INTERP isn't listed. 702 return !Opt.PhdrsCommands.empty() && 703 llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { 704 return Cmd.Type == PT_INTERP; 705 }) == Opt.PhdrsCommands.end(); 706 } 707 708 template <class ELFT> 709 ArrayRef<uint8_t> LinkerScript<ELFT>::getFiller(StringRef Name) { 710 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 711 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 712 if (Cmd->Name == Name) 713 return Cmd->Filler; 714 return {}; 715 } 716 717 template <class ELFT> 718 static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { 719 const endianness E = ELFT::TargetEndianness; 720 721 switch (Size) { 722 case 1: 723 *Buf = (uint8_t)Data; 724 break; 725 case 2: 726 write16<E>(Buf, Data); 727 break; 728 case 4: 729 write32<E>(Buf, Data); 730 break; 731 case 8: 732 write64<E>(Buf, Data); 733 break; 734 default: 735 llvm_unreachable("unsupported Size argument"); 736 } 737 } 738 739 template <class ELFT> 740 void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) { 741 int I = getSectionIndex(Name); 742 if (I == INT_MAX) 743 return; 744 745 OutputSectionCommand *Cmd = 746 dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()); 747 for (const std::unique_ptr<BaseCommand> &Base2 : Cmd->Commands) 748 if (auto *DataCmd = dyn_cast<BytesDataCommand>(Base2.get())) 749 writeInt<ELFT>(&Buf[DataCmd->Offset], DataCmd->Data, DataCmd->Size); 750 } 751 752 template <class ELFT> Expr LinkerScript<ELFT>::getLma(StringRef Name) { 753 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 754 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 755 if (Cmd->LmaExpr && Cmd->Name == Name) 756 return Cmd->LmaExpr; 757 return {}; 758 } 759 760 // Returns the index of the given section name in linker script 761 // SECTIONS commands. Sections are laid out as the same order as they 762 // were in the script. If a given name did not appear in the script, 763 // it returns INT_MAX, so that it will be laid out at end of file. 764 template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) { 765 int I = 0; 766 for (std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 767 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 768 if (Cmd->Name == Name) 769 return I; 770 ++I; 771 } 772 return INT_MAX; 773 } 774 775 template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() { 776 return !Opt.PhdrsCommands.empty(); 777 } 778 779 template <class ELFT> 780 uint64_t LinkerScript<ELFT>::getOutputSectionAddress(StringRef Name) { 781 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 782 if (Sec->getName() == Name) 783 return Sec->getVA(); 784 error("undefined section " + Name); 785 return 0; 786 } 787 788 template <class ELFT> 789 uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) { 790 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 791 if (Sec->getName() == Name) 792 return Sec->getSize(); 793 error("undefined section " + Name); 794 return 0; 795 } 796 797 template <class ELFT> 798 uint64_t LinkerScript<ELFT>::getOutputSectionAlign(StringRef Name) { 799 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 800 if (Sec->getName() == Name) 801 return Sec->getAlignment(); 802 error("undefined section " + Name); 803 return 0; 804 } 805 806 template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() { 807 return elf::getHeaderSize<ELFT>(); 808 } 809 810 template <class ELFT> uint64_t LinkerScript<ELFT>::getSymbolValue(StringRef S) { 811 if (SymbolBody *B = Symtab<ELFT>::X->find(S)) 812 return B->getVA<ELFT>(); 813 error("symbol not found: " + S); 814 return 0; 815 } 816 817 template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) { 818 return Symtab<ELFT>::X->find(S) != nullptr; 819 } 820 821 // Returns indices of ELF headers containing specific section, identified 822 // by Name. Each index is a zero based number of ELF header listed within 823 // PHDRS {} script block. 824 template <class ELFT> 825 std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { 826 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 827 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 828 if (!Cmd || Cmd->Name != SectionName) 829 continue; 830 831 std::vector<size_t> Ret; 832 for (StringRef PhdrName : Cmd->Phdrs) 833 Ret.push_back(getPhdrIndex(PhdrName)); 834 return Ret; 835 } 836 return {}; 837 } 838 839 template <class ELFT> 840 size_t LinkerScript<ELFT>::getPhdrIndex(StringRef PhdrName) { 841 size_t I = 0; 842 for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { 843 if (Cmd.Name == PhdrName) 844 return I; 845 ++I; 846 } 847 error("section header '" + PhdrName + "' is not listed in PHDRS"); 848 return 0; 849 } 850 851 class elf::ScriptParser : public ScriptParserBase { 852 typedef void (ScriptParser::*Handler)(); 853 854 public: 855 ScriptParser(StringRef S, bool B) : ScriptParserBase(S), IsUnderSysroot(B) {} 856 857 void readLinkerScript(); 858 void readVersionScript(); 859 860 private: 861 void addFile(StringRef Path); 862 863 void readAsNeeded(); 864 void readEntry(); 865 void readExtern(); 866 void readGroup(); 867 void readInclude(); 868 void readOutput(); 869 void readOutputArch(); 870 void readOutputFormat(); 871 void readPhdrs(); 872 void readSearchDir(); 873 void readSections(); 874 void readVersion(); 875 void readVersionScriptCommand(); 876 877 SymbolAssignment *readAssignment(StringRef Name); 878 BytesDataCommand *readBytesDataCommand(StringRef Tok); 879 std::vector<uint8_t> readFill(); 880 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 881 std::vector<uint8_t> readOutputSectionFiller(StringRef Tok); 882 std::vector<StringRef> readOutputSectionPhdrs(); 883 InputSectionDescription *readInputSectionDescription(StringRef Tok); 884 Regex readFilePatterns(); 885 std::vector<SectionPattern> readInputSectionsList(); 886 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 887 unsigned readPhdrType(); 888 SortSectionPolicy readSortKind(); 889 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 890 SymbolAssignment *readProvideOrAssignment(StringRef Tok, bool MakeAbsolute); 891 void readSort(); 892 Expr readAssert(); 893 894 Expr readExpr(); 895 Expr readExpr1(Expr Lhs, int MinPrec); 896 Expr readPrimary(); 897 Expr readTernary(Expr Cond); 898 Expr readParenExpr(); 899 900 // For parsing version script. 901 void readExtern(std::vector<SymbolVersion> *Globals); 902 void readVersionDeclaration(StringRef VerStr); 903 void readGlobal(StringRef VerStr); 904 void readLocal(); 905 906 ScriptConfiguration &Opt = *ScriptConfig; 907 StringSaver Saver = {ScriptConfig->Alloc}; 908 bool IsUnderSysroot; 909 }; 910 911 void ScriptParser::readVersionScript() { 912 readVersionScriptCommand(); 913 if (!atEOF()) 914 setError("EOF expected, but got " + next()); 915 } 916 917 void ScriptParser::readVersionScriptCommand() { 918 if (skip("{")) { 919 readVersionDeclaration(""); 920 return; 921 } 922 923 while (!atEOF() && !Error && peek() != "}") { 924 StringRef VerStr = next(); 925 if (VerStr == "{") { 926 setError("anonymous version definition is used in " 927 "combination with other version definitions"); 928 return; 929 } 930 expect("{"); 931 readVersionDeclaration(VerStr); 932 } 933 } 934 935 void ScriptParser::readVersion() { 936 expect("{"); 937 readVersionScriptCommand(); 938 expect("}"); 939 } 940 941 void ScriptParser::readLinkerScript() { 942 while (!atEOF()) { 943 StringRef Tok = next(); 944 if (Tok == ";") 945 continue; 946 947 if (Tok == "ASSERT") { 948 Opt.Commands.emplace_back(new AssertCommand(readAssert())); 949 } else if (Tok == "ENTRY") { 950 readEntry(); 951 } else if (Tok == "EXTERN") { 952 readExtern(); 953 } else if (Tok == "GROUP" || Tok == "INPUT") { 954 readGroup(); 955 } else if (Tok == "INCLUDE") { 956 readInclude(); 957 } else if (Tok == "OUTPUT") { 958 readOutput(); 959 } else if (Tok == "OUTPUT_ARCH") { 960 readOutputArch(); 961 } else if (Tok == "OUTPUT_FORMAT") { 962 readOutputFormat(); 963 } else if (Tok == "PHDRS") { 964 readPhdrs(); 965 } else if (Tok == "SEARCH_DIR") { 966 readSearchDir(); 967 } else if (Tok == "SECTIONS") { 968 readSections(); 969 } else if (Tok == "VERSION") { 970 readVersion(); 971 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok, true)) { 972 Opt.Commands.emplace_back(Cmd); 973 } else { 974 setError("unknown directive: " + Tok); 975 } 976 } 977 } 978 979 void ScriptParser::addFile(StringRef S) { 980 if (IsUnderSysroot && S.startswith("/")) { 981 SmallString<128> Path; 982 (Config->Sysroot + S).toStringRef(Path); 983 if (sys::fs::exists(Path)) { 984 Driver->addFile(Saver.save(Path.str())); 985 return; 986 } 987 } 988 989 if (sys::path::is_absolute(S)) { 990 Driver->addFile(S); 991 } else if (S.startswith("=")) { 992 if (Config->Sysroot.empty()) 993 Driver->addFile(S.substr(1)); 994 else 995 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); 996 } else if (S.startswith("-l")) { 997 Driver->addLibrary(S.substr(2)); 998 } else if (sys::fs::exists(S)) { 999 Driver->addFile(S); 1000 } else { 1001 std::string Path = findFromSearchPaths(S); 1002 if (Path.empty()) 1003 setError("unable to find " + S); 1004 else 1005 Driver->addFile(Saver.save(Path)); 1006 } 1007 } 1008 1009 void ScriptParser::readAsNeeded() { 1010 expect("("); 1011 bool Orig = Config->AsNeeded; 1012 Config->AsNeeded = true; 1013 while (!Error && !skip(")")) 1014 addFile(unquote(next())); 1015 Config->AsNeeded = Orig; 1016 } 1017 1018 void ScriptParser::readEntry() { 1019 // -e <symbol> takes predecence over ENTRY(<symbol>). 1020 expect("("); 1021 StringRef Tok = next(); 1022 if (Config->Entry.empty()) 1023 Config->Entry = Tok; 1024 expect(")"); 1025 } 1026 1027 void ScriptParser::readExtern() { 1028 expect("("); 1029 while (!Error && !skip(")")) 1030 Config->Undefined.push_back(next()); 1031 } 1032 1033 void ScriptParser::readGroup() { 1034 expect("("); 1035 while (!Error && !skip(")")) { 1036 StringRef Tok = next(); 1037 if (Tok == "AS_NEEDED") 1038 readAsNeeded(); 1039 else 1040 addFile(unquote(Tok)); 1041 } 1042 } 1043 1044 void ScriptParser::readInclude() { 1045 StringRef Tok = next(); 1046 auto MBOrErr = MemoryBuffer::getFile(unquote(Tok)); 1047 if (!MBOrErr) { 1048 setError("cannot open " + Tok); 1049 return; 1050 } 1051 std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; 1052 StringRef S = Saver.save(MB->getMemBufferRef().getBuffer()); 1053 std::vector<StringRef> V = tokenize(S); 1054 Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end()); 1055 } 1056 1057 void ScriptParser::readOutput() { 1058 // -o <file> takes predecence over OUTPUT(<file>). 1059 expect("("); 1060 StringRef Tok = next(); 1061 if (Config->OutputFile.empty()) 1062 Config->OutputFile = unquote(Tok); 1063 expect(")"); 1064 } 1065 1066 void ScriptParser::readOutputArch() { 1067 // Error checking only for now. 1068 expect("("); 1069 next(); 1070 expect(")"); 1071 } 1072 1073 void ScriptParser::readOutputFormat() { 1074 // Error checking only for now. 1075 expect("("); 1076 next(); 1077 StringRef Tok = next(); 1078 if (Tok == ")") 1079 return; 1080 if (Tok != ",") { 1081 setError("unexpected token: " + Tok); 1082 return; 1083 } 1084 next(); 1085 expect(","); 1086 next(); 1087 expect(")"); 1088 } 1089 1090 void ScriptParser::readPhdrs() { 1091 expect("{"); 1092 while (!Error && !skip("}")) { 1093 StringRef Tok = next(); 1094 Opt.PhdrsCommands.push_back( 1095 {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); 1096 PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back(); 1097 1098 PhdrCmd.Type = readPhdrType(); 1099 do { 1100 Tok = next(); 1101 if (Tok == ";") 1102 break; 1103 if (Tok == "FILEHDR") 1104 PhdrCmd.HasFilehdr = true; 1105 else if (Tok == "PHDRS") 1106 PhdrCmd.HasPhdrs = true; 1107 else if (Tok == "AT") 1108 PhdrCmd.LMAExpr = readParenExpr(); 1109 else if (Tok == "FLAGS") { 1110 expect("("); 1111 // Passing 0 for the value of dot is a bit of a hack. It means that 1112 // we accept expressions like ".|1". 1113 PhdrCmd.Flags = readExpr()(0); 1114 expect(")"); 1115 } else 1116 setError("unexpected header attribute: " + Tok); 1117 } while (!Error); 1118 } 1119 } 1120 1121 void ScriptParser::readSearchDir() { 1122 expect("("); 1123 StringRef Tok = next(); 1124 if (!Config->Nostdlib) 1125 Config->SearchPaths.push_back(unquote(Tok)); 1126 expect(")"); 1127 } 1128 1129 void ScriptParser::readSections() { 1130 Opt.HasSections = true; 1131 expect("{"); 1132 while (!Error && !skip("}")) { 1133 StringRef Tok = next(); 1134 BaseCommand *Cmd = readProvideOrAssignment(Tok, true); 1135 if (!Cmd) { 1136 if (Tok == "ASSERT") 1137 Cmd = new AssertCommand(readAssert()); 1138 else 1139 Cmd = readOutputSectionDescription(Tok); 1140 } 1141 Opt.Commands.emplace_back(Cmd); 1142 } 1143 } 1144 1145 static int precedence(StringRef Op) { 1146 return StringSwitch<int>(Op) 1147 .Cases("*", "/", 5) 1148 .Cases("+", "-", 4) 1149 .Cases("<<", ">>", 3) 1150 .Cases("<", "<=", ">", ">=", "==", "!=", 2) 1151 .Cases("&", "|", 1) 1152 .Default(-1); 1153 } 1154 1155 Regex ScriptParser::readFilePatterns() { 1156 std::vector<StringRef> V; 1157 while (!Error && !skip(")")) 1158 V.push_back(next()); 1159 return compileGlobPatterns(V); 1160 } 1161 1162 SortSectionPolicy ScriptParser::readSortKind() { 1163 if (skip("SORT") || skip("SORT_BY_NAME")) 1164 return SortSectionPolicy::Name; 1165 if (skip("SORT_BY_ALIGNMENT")) 1166 return SortSectionPolicy::Alignment; 1167 if (skip("SORT_BY_INIT_PRIORITY")) 1168 return SortSectionPolicy::Priority; 1169 if (skip("SORT_NONE")) 1170 return SortSectionPolicy::None; 1171 return SortSectionPolicy::Default; 1172 } 1173 1174 // Method reads a list of sequence of excluded files and section globs given in 1175 // a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ 1176 // Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) 1177 // The semantics of that is next: 1178 // * Include .foo.1 from every file. 1179 // * Include .foo.2 from every file but a.o 1180 // * Include .foo.3 from every file but b.o 1181 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 1182 std::vector<SectionPattern> Ret; 1183 while (!Error && peek() != ")") { 1184 Regex ExcludeFileRe; 1185 if (skip("EXCLUDE_FILE")) { 1186 expect("("); 1187 ExcludeFileRe = readFilePatterns(); 1188 } 1189 1190 std::vector<StringRef> V; 1191 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 1192 V.push_back(next()); 1193 1194 if (!V.empty()) 1195 Ret.push_back({std::move(ExcludeFileRe), compileGlobPatterns(V)}); 1196 else 1197 setError("section pattern is expected"); 1198 } 1199 return Ret; 1200 } 1201 1202 // Section pattern grammar can have complex expressions, for example: 1203 // *(SORT(.foo.* EXCLUDE_FILE (*file1.o) .bar.*) .bar.* SORT(.zed.*)) 1204 // Generally is a sequence of globs and excludes that may be wrapped in a SORT() 1205 // commands, like: SORT(glob0) glob1 glob2 SORT(glob4) 1206 // This methods handles wrapping sequences of excluded files and section globs 1207 // into SORT() if that needed and reads them all. 1208 InputSectionDescription * 1209 ScriptParser::readInputSectionRules(StringRef FilePattern) { 1210 auto *Cmd = new InputSectionDescription(FilePattern); 1211 expect("("); 1212 while (!HasError && !skip(")")) { 1213 SortSectionPolicy Outer = readSortKind(); 1214 SortSectionPolicy Inner = SortSectionPolicy::Default; 1215 std::vector<SectionPattern> V; 1216 if (Outer != SortSectionPolicy::Default) { 1217 expect("("); 1218 Inner = readSortKind(); 1219 if (Inner != SortSectionPolicy::Default) { 1220 expect("("); 1221 V = readInputSectionsList(); 1222 expect(")"); 1223 } else { 1224 V = readInputSectionsList(); 1225 } 1226 expect(")"); 1227 } else { 1228 V = readInputSectionsList(); 1229 } 1230 1231 for (SectionPattern &Pat : V) { 1232 Pat.SortInner = Inner; 1233 Pat.SortOuter = Outer; 1234 } 1235 1236 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 1237 } 1238 return Cmd; 1239 } 1240 1241 InputSectionDescription * 1242 ScriptParser::readInputSectionDescription(StringRef Tok) { 1243 // Input section wildcard can be surrounded by KEEP. 1244 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 1245 if (Tok == "KEEP") { 1246 expect("("); 1247 StringRef FilePattern = next(); 1248 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 1249 expect(")"); 1250 for (SectionPattern &Pat : Cmd->SectionPatterns) 1251 Opt.KeptSections.push_back(&Pat.SectionRe); 1252 return Cmd; 1253 } 1254 return readInputSectionRules(Tok); 1255 } 1256 1257 void ScriptParser::readSort() { 1258 expect("("); 1259 expect("CONSTRUCTORS"); 1260 expect(")"); 1261 } 1262 1263 Expr ScriptParser::readAssert() { 1264 expect("("); 1265 Expr E = readExpr(); 1266 expect(","); 1267 StringRef Msg = unquote(next()); 1268 expect(")"); 1269 return [=](uint64_t Dot) { 1270 uint64_t V = E(Dot); 1271 if (!V) 1272 error(Msg); 1273 return V; 1274 }; 1275 } 1276 1277 // Reads a FILL(expr) command. We handle the FILL command as an 1278 // alias for =fillexp section attribute, which is different from 1279 // what GNU linkers do. 1280 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 1281 std::vector<uint8_t> ScriptParser::readFill() { 1282 expect("("); 1283 std::vector<uint8_t> V = readOutputSectionFiller(next()); 1284 expect(")"); 1285 expect(";"); 1286 return V; 1287 } 1288 1289 OutputSectionCommand * 1290 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 1291 OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); 1292 1293 // Read an address expression. 1294 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address 1295 if (peek() != ":") 1296 Cmd->AddrExpr = readExpr(); 1297 1298 expect(":"); 1299 1300 if (skip("AT")) 1301 Cmd->LmaExpr = readParenExpr(); 1302 if (skip("ALIGN")) 1303 Cmd->AlignExpr = readParenExpr(); 1304 if (skip("SUBALIGN")) 1305 Cmd->SubalignExpr = readParenExpr(); 1306 1307 // Parse constraints. 1308 if (skip("ONLY_IF_RO")) 1309 Cmd->Constraint = ConstraintKind::ReadOnly; 1310 if (skip("ONLY_IF_RW")) 1311 Cmd->Constraint = ConstraintKind::ReadWrite; 1312 expect("{"); 1313 1314 while (!Error && !skip("}")) { 1315 StringRef Tok = next(); 1316 if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok, false)) 1317 Cmd->Commands.emplace_back(Assignment); 1318 else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) 1319 Cmd->Commands.emplace_back(Data); 1320 else if (Tok == "FILL") 1321 Cmd->Filler = readFill(); 1322 else if (Tok == "SORT") 1323 readSort(); 1324 else if (peek() == "(") 1325 Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); 1326 else 1327 setError("unknown command " + Tok); 1328 } 1329 Cmd->Phdrs = readOutputSectionPhdrs(); 1330 1331 if (skip("=")) 1332 Cmd->Filler = readOutputSectionFiller(next()); 1333 else if (peek().startswith("=")) 1334 Cmd->Filler = readOutputSectionFiller(next().drop_front()); 1335 1336 return Cmd; 1337 } 1338 1339 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number. 1340 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 1341 // 1342 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles 1343 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them 1344 // as 32-bit big-endian values. We will do the same as ld.gold does 1345 // because it's simpler than what ld.bfd does. 1346 std::vector<uint8_t> ScriptParser::readOutputSectionFiller(StringRef Tok) { 1347 uint32_t V; 1348 if (Tok.getAsInteger(0, V)) { 1349 setError("invalid filler expression: " + Tok); 1350 return {}; 1351 } 1352 return {uint8_t(V >> 24), uint8_t(V >> 16), uint8_t(V >> 8), uint8_t(V)}; 1353 } 1354 1355 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 1356 expect("("); 1357 SymbolAssignment *Cmd = readAssignment(next()); 1358 Cmd->Provide = Provide; 1359 Cmd->Hidden = Hidden; 1360 expect(")"); 1361 expect(";"); 1362 return Cmd; 1363 } 1364 1365 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok, 1366 bool MakeAbsolute) { 1367 SymbolAssignment *Cmd = nullptr; 1368 if (peek() == "=" || peek() == "+=") { 1369 Cmd = readAssignment(Tok); 1370 expect(";"); 1371 } else if (Tok == "PROVIDE") { 1372 Cmd = readProvideHidden(true, false); 1373 } else if (Tok == "HIDDEN") { 1374 Cmd = readProvideHidden(false, true); 1375 } else if (Tok == "PROVIDE_HIDDEN") { 1376 Cmd = readProvideHidden(true, true); 1377 } 1378 if (Cmd && MakeAbsolute) 1379 Cmd->IsAbsolute = true; 1380 return Cmd; 1381 } 1382 1383 static uint64_t getSymbolValue(StringRef S, uint64_t Dot) { 1384 if (S == ".") 1385 return Dot; 1386 return ScriptBase->getSymbolValue(S); 1387 } 1388 1389 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 1390 StringRef Op = next(); 1391 bool IsAbsolute = false; 1392 Expr E; 1393 assert(Op == "=" || Op == "+="); 1394 if (skip("ABSOLUTE")) { 1395 E = readParenExpr(); 1396 IsAbsolute = true; 1397 } else { 1398 E = readExpr(); 1399 } 1400 if (Op == "+=") 1401 E = [=](uint64_t Dot) { return getSymbolValue(Name, Dot) + E(Dot); }; 1402 return new SymbolAssignment(Name, E, IsAbsolute); 1403 } 1404 1405 // This is an operator-precedence parser to parse a linker 1406 // script expression. 1407 Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); } 1408 1409 static Expr combine(StringRef Op, Expr L, Expr R) { 1410 if (Op == "*") 1411 return [=](uint64_t Dot) { return L(Dot) * R(Dot); }; 1412 if (Op == "/") { 1413 return [=](uint64_t Dot) -> uint64_t { 1414 uint64_t RHS = R(Dot); 1415 if (RHS == 0) { 1416 error("division by zero"); 1417 return 0; 1418 } 1419 return L(Dot) / RHS; 1420 }; 1421 } 1422 if (Op == "+") 1423 return [=](uint64_t Dot) { return L(Dot) + R(Dot); }; 1424 if (Op == "-") 1425 return [=](uint64_t Dot) { return L(Dot) - R(Dot); }; 1426 if (Op == "<<") 1427 return [=](uint64_t Dot) { return L(Dot) << R(Dot); }; 1428 if (Op == ">>") 1429 return [=](uint64_t Dot) { return L(Dot) >> R(Dot); }; 1430 if (Op == "<") 1431 return [=](uint64_t Dot) { return L(Dot) < R(Dot); }; 1432 if (Op == ">") 1433 return [=](uint64_t Dot) { return L(Dot) > R(Dot); }; 1434 if (Op == ">=") 1435 return [=](uint64_t Dot) { return L(Dot) >= R(Dot); }; 1436 if (Op == "<=") 1437 return [=](uint64_t Dot) { return L(Dot) <= R(Dot); }; 1438 if (Op == "==") 1439 return [=](uint64_t Dot) { return L(Dot) == R(Dot); }; 1440 if (Op == "!=") 1441 return [=](uint64_t Dot) { return L(Dot) != R(Dot); }; 1442 if (Op == "&") 1443 return [=](uint64_t Dot) { return L(Dot) & R(Dot); }; 1444 if (Op == "|") 1445 return [=](uint64_t Dot) { return L(Dot) | R(Dot); }; 1446 llvm_unreachable("invalid operator"); 1447 } 1448 1449 // This is a part of the operator-precedence parser. This function 1450 // assumes that the remaining token stream starts with an operator. 1451 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 1452 while (!atEOF() && !Error) { 1453 // Read an operator and an expression. 1454 StringRef Op1 = peek(); 1455 if (Op1 == "?") 1456 return readTernary(Lhs); 1457 if (precedence(Op1) < MinPrec) 1458 break; 1459 next(); 1460 Expr Rhs = readPrimary(); 1461 1462 // Evaluate the remaining part of the expression first if the 1463 // next operator has greater precedence than the previous one. 1464 // For example, if we have read "+" and "3", and if the next 1465 // operator is "*", then we'll evaluate 3 * ... part first. 1466 while (!atEOF()) { 1467 StringRef Op2 = peek(); 1468 if (precedence(Op2) <= precedence(Op1)) 1469 break; 1470 Rhs = readExpr1(Rhs, precedence(Op2)); 1471 } 1472 1473 Lhs = combine(Op1, Lhs, Rhs); 1474 } 1475 return Lhs; 1476 } 1477 1478 uint64_t static getConstant(StringRef S) { 1479 if (S == "COMMONPAGESIZE") 1480 return Target->PageSize; 1481 if (S == "MAXPAGESIZE") 1482 return Config->MaxPageSize; 1483 error("unknown constant: " + S); 1484 return 0; 1485 } 1486 1487 // Parses Tok as an integer. Returns true if successful. 1488 // It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") 1489 // and decimal numbers. Decimal numbers may have "K" (kilo) or 1490 // "M" (mega) prefixes. 1491 static bool readInteger(StringRef Tok, uint64_t &Result) { 1492 if (Tok.startswith("-")) { 1493 if (!readInteger(Tok.substr(1), Result)) 1494 return false; 1495 Result = -Result; 1496 return true; 1497 } 1498 if (Tok.startswith_lower("0x")) 1499 return !Tok.substr(2).getAsInteger(16, Result); 1500 if (Tok.endswith_lower("H")) 1501 return !Tok.drop_back().getAsInteger(16, Result); 1502 1503 int Suffix = 1; 1504 if (Tok.endswith_lower("K")) { 1505 Suffix = 1024; 1506 Tok = Tok.drop_back(); 1507 } else if (Tok.endswith_lower("M")) { 1508 Suffix = 1024 * 1024; 1509 Tok = Tok.drop_back(); 1510 } 1511 if (Tok.getAsInteger(10, Result)) 1512 return false; 1513 Result *= Suffix; 1514 return true; 1515 } 1516 1517 BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { 1518 int Size = StringSwitch<unsigned>(Tok) 1519 .Case("BYTE", 1) 1520 .Case("SHORT", 2) 1521 .Case("LONG", 4) 1522 .Case("QUAD", 8) 1523 .Default(-1); 1524 if (Size == -1) 1525 return nullptr; 1526 1527 expect("("); 1528 uint64_t Val = 0; 1529 StringRef S = next(); 1530 if (!readInteger(S, Val)) 1531 setError("unexpected value: " + S); 1532 expect(")"); 1533 return new BytesDataCommand(Val, Size); 1534 } 1535 1536 Expr ScriptParser::readPrimary() { 1537 if (peek() == "(") 1538 return readParenExpr(); 1539 1540 StringRef Tok = next(); 1541 1542 if (Tok == "~") { 1543 Expr E = readPrimary(); 1544 return [=](uint64_t Dot) { return ~E(Dot); }; 1545 } 1546 if (Tok == "-") { 1547 Expr E = readPrimary(); 1548 return [=](uint64_t Dot) { return -E(Dot); }; 1549 } 1550 1551 // Built-in functions are parsed here. 1552 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 1553 if (Tok == "ADDR") { 1554 expect("("); 1555 StringRef Name = next(); 1556 expect(")"); 1557 return 1558 [=](uint64_t Dot) { return ScriptBase->getOutputSectionAddress(Name); }; 1559 } 1560 if (Tok == "ASSERT") 1561 return readAssert(); 1562 if (Tok == "ALIGN") { 1563 Expr E = readParenExpr(); 1564 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1565 } 1566 if (Tok == "CONSTANT") { 1567 expect("("); 1568 StringRef Tok = next(); 1569 expect(")"); 1570 return [=](uint64_t Dot) { return getConstant(Tok); }; 1571 } 1572 if (Tok == "DEFINED") { 1573 expect("("); 1574 StringRef Tok = next(); 1575 expect(")"); 1576 return [=](uint64_t Dot) { return ScriptBase->isDefined(Tok) ? 1 : 0; }; 1577 } 1578 if (Tok == "SEGMENT_START") { 1579 expect("("); 1580 next(); 1581 expect(","); 1582 Expr E = readExpr(); 1583 expect(")"); 1584 return [=](uint64_t Dot) { return E(Dot); }; 1585 } 1586 if (Tok == "DATA_SEGMENT_ALIGN") { 1587 expect("("); 1588 Expr E = readExpr(); 1589 expect(","); 1590 readExpr(); 1591 expect(")"); 1592 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1593 } 1594 if (Tok == "DATA_SEGMENT_END") { 1595 expect("("); 1596 expect("."); 1597 expect(")"); 1598 return [](uint64_t Dot) { return Dot; }; 1599 } 1600 // GNU linkers implements more complicated logic to handle 1601 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to 1602 // the next page boundary for simplicity. 1603 if (Tok == "DATA_SEGMENT_RELRO_END") { 1604 expect("("); 1605 readExpr(); 1606 expect(","); 1607 readExpr(); 1608 expect(")"); 1609 return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); }; 1610 } 1611 if (Tok == "SIZEOF") { 1612 expect("("); 1613 StringRef Name = next(); 1614 expect(")"); 1615 return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); }; 1616 } 1617 if (Tok == "ALIGNOF") { 1618 expect("("); 1619 StringRef Name = next(); 1620 expect(")"); 1621 return 1622 [=](uint64_t Dot) { return ScriptBase->getOutputSectionAlign(Name); }; 1623 } 1624 if (Tok == "SIZEOF_HEADERS") 1625 return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); }; 1626 1627 // Tok is a literal number. 1628 uint64_t V; 1629 if (readInteger(Tok, V)) 1630 return [=](uint64_t Dot) { return V; }; 1631 1632 // Tok is a symbol name. 1633 if (Tok != "." && !isValidCIdentifier(Tok)) 1634 setError("malformed number: " + Tok); 1635 return [=](uint64_t Dot) { return getSymbolValue(Tok, Dot); }; 1636 } 1637 1638 Expr ScriptParser::readTernary(Expr Cond) { 1639 next(); 1640 Expr L = readExpr(); 1641 expect(":"); 1642 Expr R = readExpr(); 1643 return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); }; 1644 } 1645 1646 Expr ScriptParser::readParenExpr() { 1647 expect("("); 1648 Expr E = readExpr(); 1649 expect(")"); 1650 return E; 1651 } 1652 1653 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1654 std::vector<StringRef> Phdrs; 1655 while (!Error && peek().startswith(":")) { 1656 StringRef Tok = next(); 1657 Tok = (Tok.size() == 1) ? next() : Tok.substr(1); 1658 if (Tok.empty()) { 1659 setError("section header name is empty"); 1660 break; 1661 } 1662 Phdrs.push_back(Tok); 1663 } 1664 return Phdrs; 1665 } 1666 1667 unsigned ScriptParser::readPhdrType() { 1668 StringRef Tok = next(); 1669 unsigned Ret = StringSwitch<unsigned>(Tok) 1670 .Case("PT_NULL", PT_NULL) 1671 .Case("PT_LOAD", PT_LOAD) 1672 .Case("PT_DYNAMIC", PT_DYNAMIC) 1673 .Case("PT_INTERP", PT_INTERP) 1674 .Case("PT_NOTE", PT_NOTE) 1675 .Case("PT_SHLIB", PT_SHLIB) 1676 .Case("PT_PHDR", PT_PHDR) 1677 .Case("PT_TLS", PT_TLS) 1678 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1679 .Case("PT_GNU_STACK", PT_GNU_STACK) 1680 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1681 .Default(-1); 1682 1683 if (Ret == (unsigned)-1) { 1684 setError("invalid program header type: " + Tok); 1685 return PT_NULL; 1686 } 1687 return Ret; 1688 } 1689 1690 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1691 // Identifiers start at 2 because 0 and 1 are reserved 1692 // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants. 1693 size_t VersionId = Config->VersionDefinitions.size() + 2; 1694 Config->VersionDefinitions.push_back({VerStr, VersionId}); 1695 1696 if (skip("global:") || peek() != "local:") 1697 readGlobal(VerStr); 1698 if (skip("local:")) 1699 readLocal(); 1700 expect("}"); 1701 1702 // Each version may have a parent version. For example, "Ver2" defined as 1703 // "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" as a parent. This 1704 // version hierarchy is, probably against your instinct, purely for human; the 1705 // runtime doesn't care about them at all. In LLD, we simply skip the token. 1706 if (!VerStr.empty() && peek() != ";") 1707 next(); 1708 expect(";"); 1709 } 1710 1711 void ScriptParser::readLocal() { 1712 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1713 expect("*"); 1714 expect(";"); 1715 } 1716 1717 void ScriptParser::readExtern(std::vector<SymbolVersion> *Globals) { 1718 expect("\"C++\""); 1719 expect("{"); 1720 1721 for (;;) { 1722 if (peek() == "}" || Error) 1723 break; 1724 bool HasWildcard = !peek().startswith("\"") && hasWildcard(peek()); 1725 Globals->push_back({unquote(next()), true, HasWildcard}); 1726 expect(";"); 1727 } 1728 1729 expect("}"); 1730 expect(";"); 1731 } 1732 1733 void ScriptParser::readGlobal(StringRef VerStr) { 1734 std::vector<SymbolVersion> *Globals; 1735 if (VerStr.empty()) 1736 Globals = &Config->VersionScriptGlobals; 1737 else 1738 Globals = &Config->VersionDefinitions.back().Globals; 1739 1740 for (;;) { 1741 if (skip("extern")) 1742 readExtern(Globals); 1743 1744 StringRef Cur = peek(); 1745 if (Cur == "}" || Cur == "local:" || Error) 1746 return; 1747 next(); 1748 Globals->push_back({unquote(Cur), false, hasWildcard(Cur)}); 1749 expect(";"); 1750 } 1751 } 1752 1753 static bool isUnderSysroot(StringRef Path) { 1754 if (Config->Sysroot == "") 1755 return false; 1756 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 1757 if (sys::fs::equivalent(Config->Sysroot, Path)) 1758 return true; 1759 return false; 1760 } 1761 1762 void elf::readLinkerScript(MemoryBufferRef MB) { 1763 StringRef Path = MB.getBufferIdentifier(); 1764 ScriptParser(MB.getBuffer(), isUnderSysroot(Path)).readLinkerScript(); 1765 } 1766 1767 void elf::readVersionScript(MemoryBufferRef MB) { 1768 ScriptParser(MB.getBuffer(), false).readVersionScript(); 1769 } 1770 1771 template class elf::LinkerScript<ELF32LE>; 1772 template class elf::LinkerScript<ELF32BE>; 1773 template class elf::LinkerScript<ELF64LE>; 1774 template class elf::LinkerScript<ELF64BE>; 1775