1 //===- LinkerScript.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the parser/evaluator of the linker script. 11 // It parses a linker script and write the result to Config or ScriptConfig 12 // objects. 13 // 14 // If SECTIONS command is used, a ScriptConfig contains an AST 15 // of the command which will later be consumed by createSections() and 16 // assignAddresses(). 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "LinkerScript.h" 21 #include "Config.h" 22 #include "Driver.h" 23 #include "InputSection.h" 24 #include "OutputSections.h" 25 #include "ScriptParser.h" 26 #include "Strings.h" 27 #include "Symbols.h" 28 #include "SymbolTable.h" 29 #include "Target.h" 30 #include "Writer.h" 31 #include "llvm/ADT/StringSwitch.h" 32 #include "llvm/Support/ELF.h" 33 #include "llvm/Support/FileSystem.h" 34 #include "llvm/Support/MemoryBuffer.h" 35 #include "llvm/Support/Path.h" 36 #include "llvm/Support/StringSaver.h" 37 38 using namespace llvm; 39 using namespace llvm::ELF; 40 using namespace llvm::object; 41 using namespace lld; 42 using namespace lld::elf; 43 44 LinkerScriptBase *elf::ScriptBase; 45 ScriptConfiguration *elf::ScriptConfig; 46 47 template <class ELFT> static void addRegular(SymbolAssignment *Cmd) { 48 Symbol *Sym = Symtab<ELFT>::X->addRegular(Cmd->Name, STB_GLOBAL, STV_DEFAULT); 49 Sym->Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 50 Cmd->Sym = Sym->body(); 51 52 // If we have no SECTIONS then we don't have '.' and don't call 53 // assignAddresses(). We calculate symbol value immediately in this case. 54 if (!ScriptConfig->HasSections) 55 cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(0); 56 } 57 58 template <class ELFT> static void addSynthetic(SymbolAssignment *Cmd) { 59 Symbol *Sym = Symtab<ELFT>::X->addSynthetic( 60 Cmd->Name, nullptr, 0, Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT); 61 Cmd->Sym = Sym->body(); 62 } 63 64 template <class ELFT> static void addSymbol(SymbolAssignment *Cmd) { 65 if (Cmd->IsAbsolute) 66 addRegular<ELFT>(Cmd); 67 else 68 addSynthetic<ELFT>(Cmd); 69 } 70 // If a symbol was in PROVIDE(), we need to define it only when 71 // it is an undefined symbol. 72 template <class ELFT> static bool shouldDefine(SymbolAssignment *Cmd) { 73 if (Cmd->Name == ".") 74 return false; 75 if (!Cmd->Provide) 76 return true; 77 SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name); 78 return B && B->isUndefined(); 79 } 80 81 bool SymbolAssignment::classof(const BaseCommand *C) { 82 return C->Kind == AssignmentKind; 83 } 84 85 bool OutputSectionCommand::classof(const BaseCommand *C) { 86 return C->Kind == OutputSectionKind; 87 } 88 89 bool InputSectionDescription::classof(const BaseCommand *C) { 90 return C->Kind == InputSectionKind; 91 } 92 93 bool AssertCommand::classof(const BaseCommand *C) { 94 return C->Kind == AssertKind; 95 } 96 97 template <class ELFT> static bool isDiscarded(InputSectionBase<ELFT> *S) { 98 return !S || !S->Live; 99 } 100 101 template <class ELFT> LinkerScript<ELFT>::LinkerScript() {} 102 template <class ELFT> LinkerScript<ELFT>::~LinkerScript() {} 103 104 template <class ELFT> 105 bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { 106 for (Regex *Re : Opt.KeptSections) 107 if (Re->match(S->Name)) 108 return true; 109 return false; 110 } 111 112 static bool comparePriority(InputSectionData *A, InputSectionData *B) { 113 return getPriority(A->Name) < getPriority(B->Name); 114 } 115 116 static bool compareName(InputSectionData *A, InputSectionData *B) { 117 return A->Name < B->Name; 118 } 119 120 static bool compareAlignment(InputSectionData *A, InputSectionData *B) { 121 // ">" is not a mistake. Larger alignments are placed before smaller 122 // alignments in order to reduce the amount of padding necessary. 123 // This is compatible with GNU. 124 return A->Alignment > B->Alignment; 125 } 126 127 static std::function<bool(InputSectionData *, InputSectionData *)> 128 getComparator(SortSectionPolicy K) { 129 switch (K) { 130 case SortSectionPolicy::Alignment: 131 return compareAlignment; 132 case SortSectionPolicy::Name: 133 return compareName; 134 case SortSectionPolicy::Priority: 135 return comparePriority; 136 default: 137 llvm_unreachable("unknown sort policy"); 138 } 139 } 140 141 template <class ELFT> 142 static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections, 143 ConstraintKind Kind) { 144 if (Kind == ConstraintKind::NoConstraint) 145 return true; 146 bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) { 147 auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2); 148 return Sec->getSectionHdr()->sh_flags & SHF_WRITE; 149 }); 150 return (IsRW && Kind == ConstraintKind::ReadWrite) || 151 (!IsRW && Kind == ConstraintKind::ReadOnly); 152 } 153 154 static void sortSections(InputSectionData **Begin, InputSectionData **End, 155 SortSectionPolicy K) { 156 if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) 157 std::stable_sort(Begin, End, getComparator(K)); 158 } 159 160 // Compute and remember which sections the InputSectionDescription matches. 161 template <class ELFT> 162 void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { 163 // Collects all sections that satisfy constraints of I 164 // and attach them to I. 165 for (SectionPattern &Pat : I->SectionPatterns) { 166 size_t SizeBefore = I->Sections.size(); 167 for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) { 168 StringRef Filename = sys::path::filename(F->getName()); 169 if (!I->FileRe.match(Filename) || Pat.ExcludedFileRe.match(Filename)) 170 continue; 171 172 for (InputSectionBase<ELFT> *S : F->getSections()) 173 if (!isDiscarded(S) && !S->OutSec && Pat.SectionRe.match(S->Name)) 174 I->Sections.push_back(S); 175 if (Pat.SectionRe.match("COMMON")) 176 I->Sections.push_back(CommonInputSection<ELFT>::X); 177 } 178 179 // Sort sections as instructed by SORT-family commands and --sort-section 180 // option. Because SORT-family commands can be nested at most two depth 181 // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command 182 // line option is respected even if a SORT command is given, the exact 183 // behavior we have here is a bit complicated. Here are the rules. 184 // 185 // 1. If two SORT commands are given, --sort-section is ignored. 186 // 2. If one SORT command is given, and if it is not SORT_NONE, 187 // --sort-section is handled as an inner SORT command. 188 // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. 189 // 4. If no SORT command is given, sort according to --sort-section. 190 InputSectionData **Begin = I->Sections.data() + SizeBefore; 191 InputSectionData **End = I->Sections.data() + I->Sections.size(); 192 if (Pat.SortOuter != SortSectionPolicy::None) { 193 if (Pat.SortInner == SortSectionPolicy::Default) 194 sortSections(Begin, End, Config->SortSection); 195 else 196 sortSections(Begin, End, Pat.SortInner); 197 sortSections(Begin, End, Pat.SortOuter); 198 } 199 } 200 201 // We do not add duplicate input sections, so mark them with a dummy output 202 // section for now. 203 for (InputSectionData *S : I->Sections) { 204 auto *S2 = static_cast<InputSectionBase<ELFT> *>(S); 205 S2->OutSec = (OutputSectionBase<ELFT> *)-1; 206 } 207 } 208 209 template <class ELFT> 210 void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) { 211 for (InputSectionBase<ELFT> *S : V) { 212 S->Live = false; 213 reportDiscarded(S); 214 } 215 } 216 217 template <class ELFT> 218 std::vector<InputSectionBase<ELFT> *> 219 LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) { 220 std::vector<InputSectionBase<ELFT> *> Ret; 221 222 for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { 223 auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); 224 if (!Cmd) 225 continue; 226 computeInputSections(Cmd); 227 for (InputSectionData *S : Cmd->Sections) 228 Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S)); 229 } 230 231 return Ret; 232 } 233 234 template <class ELFT> 235 static SectionKey<ELFT::Is64Bits> createKey(InputSectionBase<ELFT> *C, 236 StringRef OutsecName) { 237 // When using linker script the merge rules are different. 238 // Unfortunately, linker scripts are name based. This means that expressions 239 // like *(.foo*) can refer to multiple input sections that would normally be 240 // placed in different output sections. We cannot put them in different 241 // output sections or we would produce wrong results for 242 // start = .; *(.foo.*) end = .; *(.bar) 243 // and a mapping of .foo1 and .bar1 to one section and .foo2 and .bar2 to 244 // another. The problem is that there is no way to layout those output 245 // sections such that the .foo sections are the only thing between the 246 // start and end symbols. 247 248 // An extra annoyance is that we cannot simply disable merging of the contents 249 // of SHF_MERGE sections, but our implementation requires one output section 250 // per "kind" (string or not, which size/aligment). 251 // Fortunately, creating symbols in the middle of a merge section is not 252 // supported by bfd or gold, so we can just create multiple section in that 253 // case. 254 const typename ELFT::Shdr *H = C->getSectionHdr(); 255 typedef typename ELFT::uint uintX_t; 256 uintX_t Flags = H->sh_flags & (SHF_MERGE | SHF_STRINGS); 257 258 uintX_t Alignment = 0; 259 if (isa<MergeInputSection<ELFT>>(C)) 260 Alignment = std::max(H->sh_addralign, H->sh_entsize); 261 262 return SectionKey<ELFT::Is64Bits>{OutsecName, /*Type*/ 0, Flags, Alignment}; 263 } 264 265 template <class ELFT> 266 void LinkerScript<ELFT>::addSection(OutputSectionFactory<ELFT> &Factory, 267 InputSectionBase<ELFT> *Sec, 268 StringRef Name) { 269 OutputSectionBase<ELFT> *OutSec; 270 bool IsNew; 271 std::tie(OutSec, IsNew) = Factory.create(createKey(Sec, Name), Sec); 272 if (IsNew) 273 OutputSections->push_back(OutSec); 274 OutSec->addSection(Sec); 275 } 276 277 template <class ELFT> 278 void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) { 279 280 for (unsigned I = 0; I < Opt.Commands.size(); ++I) { 281 auto Iter = Opt.Commands.begin() + I; 282 const std::unique_ptr<BaseCommand> &Base1 = *Iter; 283 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { 284 if (shouldDefine<ELFT>(Cmd)) 285 addRegular<ELFT>(Cmd); 286 continue; 287 } 288 if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) { 289 // If we don't have SECTIONS then output sections have already been 290 // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses 291 // will not be called, so ASSERT should be evaluated now. 292 if (!Opt.HasSections) 293 Cmd->Expression(0); 294 continue; 295 } 296 297 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { 298 std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd); 299 300 if (Cmd->Name == "/DISCARD/") { 301 discard(V); 302 continue; 303 } 304 305 if (!matchConstraints<ELFT>(V, Cmd->Constraint)) { 306 for (InputSectionBase<ELFT> *S : V) 307 S->OutSec = nullptr; 308 Opt.Commands.erase(Iter); 309 --I; 310 continue; 311 } 312 313 for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) 314 if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) 315 if (shouldDefine<ELFT>(OutCmd)) 316 addSymbol<ELFT>(OutCmd); 317 318 if (V.empty()) 319 continue; 320 321 for (InputSectionBase<ELFT> *Sec : V) { 322 addSection(Factory, Sec, Cmd->Name); 323 if (uint32_t Subalign = Cmd->SubalignExpr ? Cmd->SubalignExpr(0) : 0) 324 Sec->Alignment = Subalign; 325 } 326 } 327 } 328 } 329 330 template <class ELFT> 331 void LinkerScript<ELFT>::createSections(OutputSectionFactory<ELFT> &Factory) { 332 processCommands(Factory); 333 // Add orphan sections. 334 for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) 335 for (InputSectionBase<ELFT> *S : F->getSections()) 336 if (!isDiscarded(S) && !S->OutSec) 337 addSection(Factory, S, getOutputSectionName(S)); 338 } 339 340 // Sets value of a section-defined symbol. Two kinds of 341 // symbols are processed: synthetic symbols, whose value 342 // is an offset from beginning of section and regular 343 // symbols whose value is absolute. 344 template <class ELFT> 345 static void assignSectionSymbol(SymbolAssignment *Cmd, 346 OutputSectionBase<ELFT> *Sec, 347 typename ELFT::uint Off) { 348 if (!Cmd->Sym) 349 return; 350 351 if (auto *Body = dyn_cast<DefinedSynthetic<ELFT>>(Cmd->Sym)) { 352 Body->Section = Sec; 353 Body->Value = Cmd->Expression(Sec->getVA() + Off) - Sec->getVA(); 354 return; 355 } 356 auto *Body = cast<DefinedRegular<ELFT>>(Cmd->Sym); 357 Body->Value = Cmd->Expression(Sec->getVA() + Off); 358 } 359 360 template <class ELFT> static bool isTbss(OutputSectionBase<ELFT> *Sec) { 361 return (Sec->getFlags() & SHF_TLS) && Sec->getType() == SHT_NOBITS; 362 } 363 364 template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) { 365 if (!AlreadyOutputIS.insert(S).second) 366 return; 367 bool IsTbss = isTbss(CurOutSec); 368 369 uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; 370 Pos = alignTo(Pos, S->Alignment); 371 S->OutSecOff = Pos - CurOutSec->getVA(); 372 Pos += S->getSize(); 373 374 // Update output section size after adding each section. This is so that 375 // SIZEOF works correctly in the case below: 376 // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } 377 CurOutSec->setSize(Pos - CurOutSec->getVA()); 378 379 if (IsTbss) 380 ThreadBssOffset = Pos - Dot; 381 else 382 Dot = Pos; 383 } 384 385 template <class ELFT> void LinkerScript<ELFT>::flush() { 386 if (auto *OutSec = dyn_cast_or_null<OutputSection<ELFT>>(CurOutSec)) { 387 for (InputSection<ELFT> *I : OutSec->Sections) 388 output(I); 389 AlreadyOutputOS.insert(CurOutSec); 390 } 391 } 392 393 template <class ELFT> 394 void LinkerScript<ELFT>::switchTo(OutputSectionBase<ELFT> *Sec) { 395 if (CurOutSec == Sec) 396 return; 397 if (AlreadyOutputOS.count(Sec)) 398 return; 399 400 flush(); 401 CurOutSec = Sec; 402 403 Dot = alignTo(Dot, CurOutSec->getAlignment()); 404 CurOutSec->setVA(isTbss(CurOutSec) ? Dot + ThreadBssOffset : Dot); 405 } 406 407 template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) { 408 if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { 409 if (AssignCmd->Name == ".") { 410 // Update to location counter means update to section size. 411 Dot = AssignCmd->Expression(Dot); 412 CurOutSec->setSize(Dot - CurOutSec->getVA()); 413 return; 414 } 415 assignSectionSymbol<ELFT>(AssignCmd, CurOutSec, Dot - CurOutSec->getVA()); 416 return; 417 } 418 auto &ICmd = cast<InputSectionDescription>(Base); 419 for (InputSectionData *ID : ICmd.Sections) { 420 auto *IB = static_cast<InputSectionBase<ELFT> *>(ID); 421 switchTo(IB->OutSec); 422 if (auto *I = dyn_cast<InputSection<ELFT>>(IB)) 423 output(I); 424 else if (AlreadyOutputOS.insert(CurOutSec).second) 425 Dot += CurOutSec->getSize(); 426 } 427 } 428 429 template <class ELFT> 430 static std::vector<OutputSectionBase<ELFT> *> 431 findSections(OutputSectionCommand &Cmd, 432 const std::vector<OutputSectionBase<ELFT> *> &Sections) { 433 std::vector<OutputSectionBase<ELFT> *> Ret; 434 for (OutputSectionBase<ELFT> *Sec : Sections) 435 if (Sec->getName() == Cmd.Name) 436 Ret.push_back(Sec); 437 return Ret; 438 } 439 440 template <class ELFT> 441 void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) { 442 std::vector<OutputSectionBase<ELFT> *> Sections = 443 findSections(*Cmd, *OutputSections); 444 if (Sections.empty()) 445 return; 446 switchTo(Sections[0]); 447 448 // Find the last section output location. We will output orphan sections 449 // there so that end symbols point to the correct location. 450 auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), 451 [](const std::unique_ptr<BaseCommand> &Cmd) { 452 return !isa<SymbolAssignment>(*Cmd); 453 }) 454 .base(); 455 for (auto I = Cmd->Commands.begin(); I != E; ++I) 456 process(**I); 457 flush(); 458 for (OutputSectionBase<ELFT> *Base : Sections) { 459 if (AlreadyOutputOS.count(Base)) 460 continue; 461 switchTo(Base); 462 Dot += CurOutSec->getSize(); 463 flush(); 464 } 465 std::for_each(E, Cmd->Commands.end(), 466 [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); 467 } 468 469 template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() { 470 // It is common practice to use very generic linker scripts. So for any 471 // given run some of the output sections in the script will be empty. 472 // We could create corresponding empty output sections, but that would 473 // clutter the output. 474 // We instead remove trivially empty sections. The bfd linker seems even 475 // more aggressive at removing them. 476 auto Pos = std::remove_if( 477 Opt.Commands.begin(), Opt.Commands.end(), 478 [&](const std::unique_ptr<BaseCommand> &Base) { 479 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 480 if (!Cmd) 481 return false; 482 std::vector<OutputSectionBase<ELFT> *> Secs = 483 findSections(*Cmd, *OutputSections); 484 if (!Secs.empty()) 485 return false; 486 for (const std::unique_ptr<BaseCommand> &I : Cmd->Commands) 487 if (!isa<InputSectionDescription>(I.get())) 488 return false; 489 return true; 490 }); 491 Opt.Commands.erase(Pos, Opt.Commands.end()); 492 493 // If the output section contains only symbol assignments, create a 494 // corresponding output section. The bfd linker seems to only create them if 495 // '.' is assigned to, but creating these section should not have any bad 496 // consequeces and gives us a section to put the symbol in. 497 uintX_t Flags = SHF_ALLOC; 498 uint32_t Type = 0; 499 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 500 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 501 if (!Cmd) 502 continue; 503 std::vector<OutputSectionBase<ELFT> *> Secs = 504 findSections(*Cmd, *OutputSections); 505 if (!Secs.empty()) { 506 Flags = Secs[0]->getFlags(); 507 Type = Secs[0]->getType(); 508 continue; 509 } 510 511 auto *OutSec = new OutputSection<ELFT>(Cmd->Name, Type, Flags); 512 Out<ELFT>::Pool.emplace_back(OutSec); 513 OutputSections->push_back(OutSec); 514 } 515 } 516 517 // When placing orphan sections, we want to place them after symbol assignments 518 // so that an orphan after 519 // begin_foo = .; 520 // foo : { *(foo) } 521 // end_foo = .; 522 // doesn't break the intended meaning of the begin/end symbols. 523 // We don't want to go over sections since Writer<ELFT>::sortSections is the 524 // one in charge of deciding the order of the sections. 525 // We don't want to go over alignments, since doing so in 526 // rx_sec : { *(rx_sec) } 527 // . = ALIGN(0x1000); 528 // /* The RW PT_LOAD starts here*/ 529 // rw_sec : { *(rw_sec) } 530 // would mean that the RW PT_LOAD would become unaligned. 531 static bool shouldSkip(const BaseCommand &Cmd) { 532 if (isa<OutputSectionCommand>(Cmd)) 533 return false; 534 const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); 535 if (!Assign) 536 return true; 537 return Assign->Name != "."; 538 } 539 540 template <class ELFT> void LinkerScript<ELFT>::assignAddresses() { 541 // Orphan sections are sections present in the input files which 542 // are not explicitly placed into the output file by the linker script. 543 // We place orphan sections at end of file. 544 // Other linkers places them using some heuristics as described in 545 // https://sourceware.org/binutils/docs/ld/Orphan-Sections.html#Orphan-Sections. 546 547 // The OutputSections are already in the correct order. 548 // This loops creates or moves commands as needed so that they are in the 549 // correct order. 550 int CmdIndex = 0; 551 for (OutputSectionBase<ELFT> *Sec : *OutputSections) { 552 StringRef Name = Sec->getName(); 553 554 // Find the last spot where we can insert a command and still get the 555 // correct result. 556 auto CmdIter = Opt.Commands.begin() + CmdIndex; 557 auto E = Opt.Commands.end(); 558 while (CmdIter != E && shouldSkip(**CmdIter)) { 559 ++CmdIter; 560 ++CmdIndex; 561 } 562 563 auto Pos = 564 std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { 565 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 566 return Cmd && Cmd->Name == Name; 567 }); 568 if (Pos == E) { 569 Opt.Commands.insert(CmdIter, 570 llvm::make_unique<OutputSectionCommand>(Name)); 571 ++CmdIndex; 572 continue; 573 } 574 575 // Continue from where we found it. 576 CmdIndex = (Pos - Opt.Commands.begin()) + 1; 577 continue; 578 } 579 580 // Assign addresses as instructed by linker script SECTIONS sub-commands. 581 Dot = getHeaderSize(); 582 583 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 584 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { 585 if (Cmd->Name == ".") { 586 Dot = Cmd->Expression(Dot); 587 } else if (Cmd->Sym) { 588 cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(Dot); 589 } 590 continue; 591 } 592 593 if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { 594 Cmd->Expression(Dot); 595 continue; 596 } 597 598 auto *Cmd = cast<OutputSectionCommand>(Base.get()); 599 600 if (Cmd->AddrExpr) 601 Dot = Cmd->AddrExpr(Dot); 602 603 assignOffsets(Cmd); 604 } 605 606 uintX_t MinVA = std::numeric_limits<uintX_t>::max(); 607 for (OutputSectionBase<ELFT> *Sec : *OutputSections) { 608 if (Sec->getFlags() & SHF_ALLOC) 609 MinVA = std::min(MinVA, Sec->getVA()); 610 else 611 Sec->setVA(0); 612 } 613 614 uintX_t HeaderSize = getHeaderSize(); 615 if (HeaderSize > MinVA) 616 fatal("Not enough space for ELF and program headers"); 617 618 // ELF and Program headers need to be right before the first section in 619 // memory. Set their addresses accordingly. 620 MinVA = alignDown(MinVA - HeaderSize, Target->PageSize); 621 Out<ELFT>::ElfHeader->setVA(MinVA); 622 Out<ELFT>::ProgramHeaders->setVA(Out<ELFT>::ElfHeader->getSize() + MinVA); 623 } 624 625 // Creates program headers as instructed by PHDRS linker script command. 626 template <class ELFT> 627 std::vector<PhdrEntry<ELFT>> LinkerScript<ELFT>::createPhdrs() { 628 std::vector<PhdrEntry<ELFT>> Ret; 629 630 // Process PHDRS and FILEHDR keywords because they are not 631 // real output sections and cannot be added in the following loop. 632 for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) { 633 Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags); 634 PhdrEntry<ELFT> &Phdr = Ret.back(); 635 636 if (Cmd.HasFilehdr) 637 Phdr.add(Out<ELFT>::ElfHeader); 638 if (Cmd.HasPhdrs) 639 Phdr.add(Out<ELFT>::ProgramHeaders); 640 641 if (Cmd.LMAExpr) { 642 Phdr.H.p_paddr = Cmd.LMAExpr(0); 643 Phdr.HasLMA = true; 644 } 645 } 646 647 // Add output sections to program headers. 648 PhdrEntry<ELFT> *Load = nullptr; 649 uintX_t Flags = PF_R; 650 for (OutputSectionBase<ELFT> *Sec : *OutputSections) { 651 if (!(Sec->getFlags() & SHF_ALLOC)) 652 break; 653 654 std::vector<size_t> PhdrIds = getPhdrIndices(Sec->getName()); 655 if (!PhdrIds.empty()) { 656 // Assign headers specified by linker script 657 for (size_t Id : PhdrIds) { 658 Ret[Id].add(Sec); 659 if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) 660 Ret[Id].H.p_flags |= Sec->getPhdrFlags(); 661 } 662 } else { 663 // If we have no load segment or flags've changed then we want new load 664 // segment. 665 uintX_t NewFlags = Sec->getPhdrFlags(); 666 if (Load == nullptr || Flags != NewFlags) { 667 Load = &*Ret.emplace(Ret.end(), PT_LOAD, NewFlags); 668 Flags = NewFlags; 669 } 670 Load->add(Sec); 671 } 672 } 673 return Ret; 674 } 675 676 template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() { 677 // Ignore .interp section in case we have PHDRS specification 678 // and PT_INTERP isn't listed. 679 return !Opt.PhdrsCommands.empty() && 680 llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { 681 return Cmd.Type == PT_INTERP; 682 }) == Opt.PhdrsCommands.end(); 683 } 684 685 template <class ELFT> 686 ArrayRef<uint8_t> LinkerScript<ELFT>::getFiller(StringRef Name) { 687 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 688 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 689 if (Cmd->Name == Name) 690 return Cmd->Filler; 691 return {}; 692 } 693 694 template <class ELFT> Expr LinkerScript<ELFT>::getLma(StringRef Name) { 695 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) 696 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 697 if (Cmd->LmaExpr && Cmd->Name == Name) 698 return Cmd->LmaExpr; 699 return {}; 700 } 701 702 // Returns the index of the given section name in linker script 703 // SECTIONS commands. Sections are laid out as the same order as they 704 // were in the script. If a given name did not appear in the script, 705 // it returns INT_MAX, so that it will be laid out at end of file. 706 template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) { 707 int I = 0; 708 for (std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 709 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) 710 if (Cmd->Name == Name) 711 return I; 712 ++I; 713 } 714 return INT_MAX; 715 } 716 717 template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() { 718 return !Opt.PhdrsCommands.empty(); 719 } 720 721 template <class ELFT> 722 uint64_t LinkerScript<ELFT>::getOutputSectionAddress(StringRef Name) { 723 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 724 if (Sec->getName() == Name) 725 return Sec->getVA(); 726 error("undefined section " + Name); 727 return 0; 728 } 729 730 template <class ELFT> 731 uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) { 732 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 733 if (Sec->getName() == Name) 734 return Sec->getSize(); 735 error("undefined section " + Name); 736 return 0; 737 } 738 739 template <class ELFT> 740 uint64_t LinkerScript<ELFT>::getOutputSectionAlign(StringRef Name) { 741 for (OutputSectionBase<ELFT> *Sec : *OutputSections) 742 if (Sec->getName() == Name) 743 return Sec->getAlignment(); 744 error("undefined section " + Name); 745 return 0; 746 } 747 748 template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() { 749 return elf::getHeaderSize<ELFT>(); 750 } 751 752 template <class ELFT> uint64_t LinkerScript<ELFT>::getSymbolValue(StringRef S) { 753 if (SymbolBody *B = Symtab<ELFT>::X->find(S)) 754 return B->getVA<ELFT>(); 755 error("symbol not found: " + S); 756 return 0; 757 } 758 759 template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) { 760 return Symtab<ELFT>::X->find(S) != nullptr; 761 } 762 763 // Returns indices of ELF headers containing specific section, identified 764 // by Name. Each index is a zero based number of ELF header listed within 765 // PHDRS {} script block. 766 template <class ELFT> 767 std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { 768 for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { 769 auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); 770 if (!Cmd || Cmd->Name != SectionName) 771 continue; 772 773 std::vector<size_t> Ret; 774 for (StringRef PhdrName : Cmd->Phdrs) 775 Ret.push_back(getPhdrIndex(PhdrName)); 776 return Ret; 777 } 778 return {}; 779 } 780 781 template <class ELFT> 782 size_t LinkerScript<ELFT>::getPhdrIndex(StringRef PhdrName) { 783 size_t I = 0; 784 for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { 785 if (Cmd.Name == PhdrName) 786 return I; 787 ++I; 788 } 789 error("section header '" + PhdrName + "' is not listed in PHDRS"); 790 return 0; 791 } 792 793 class elf::ScriptParser : public ScriptParserBase { 794 typedef void (ScriptParser::*Handler)(); 795 796 public: 797 ScriptParser(StringRef S, bool B) : ScriptParserBase(S), IsUnderSysroot(B) {} 798 799 void readLinkerScript(); 800 void readVersionScript(); 801 802 private: 803 void addFile(StringRef Path); 804 805 void readAsNeeded(); 806 void readEntry(); 807 void readExtern(); 808 void readGroup(); 809 void readInclude(); 810 void readOutput(); 811 void readOutputArch(); 812 void readOutputFormat(); 813 void readPhdrs(); 814 void readSearchDir(); 815 void readSections(); 816 void readVersion(); 817 void readVersionScriptCommand(); 818 819 SymbolAssignment *readAssignment(StringRef Name); 820 std::vector<uint8_t> readFill(); 821 OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); 822 std::vector<uint8_t> readOutputSectionFiller(StringRef Tok); 823 std::vector<StringRef> readOutputSectionPhdrs(); 824 InputSectionDescription *readInputSectionDescription(StringRef Tok); 825 Regex readFilePatterns(); 826 std::vector<SectionPattern> readInputSectionsList(); 827 InputSectionDescription *readInputSectionRules(StringRef FilePattern); 828 unsigned readPhdrType(); 829 SortSectionPolicy readSortKind(); 830 SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); 831 SymbolAssignment *readProvideOrAssignment(StringRef Tok, bool MakeAbsolute); 832 void readSort(); 833 Expr readAssert(); 834 835 Expr readExpr(); 836 Expr readExpr1(Expr Lhs, int MinPrec); 837 Expr readPrimary(); 838 Expr readTernary(Expr Cond); 839 Expr readParenExpr(); 840 841 // For parsing version script. 842 void readExtern(std::vector<SymbolVersion> *Globals); 843 void readVersionDeclaration(StringRef VerStr); 844 void readGlobal(StringRef VerStr); 845 void readLocal(); 846 847 ScriptConfiguration &Opt = *ScriptConfig; 848 StringSaver Saver = {ScriptConfig->Alloc}; 849 bool IsUnderSysroot; 850 }; 851 852 void ScriptParser::readVersionScript() { 853 readVersionScriptCommand(); 854 if (!atEOF()) 855 setError("EOF expected, but got " + next()); 856 } 857 858 void ScriptParser::readVersionScriptCommand() { 859 if (skip("{")) { 860 readVersionDeclaration(""); 861 return; 862 } 863 864 while (!atEOF() && !Error && peek() != "}") { 865 StringRef VerStr = next(); 866 if (VerStr == "{") { 867 setError("anonymous version definition is used in " 868 "combination with other version definitions"); 869 return; 870 } 871 expect("{"); 872 readVersionDeclaration(VerStr); 873 } 874 } 875 876 void ScriptParser::readVersion() { 877 expect("{"); 878 readVersionScriptCommand(); 879 expect("}"); 880 } 881 882 void ScriptParser::readLinkerScript() { 883 while (!atEOF()) { 884 StringRef Tok = next(); 885 if (Tok == ";") 886 continue; 887 888 if (Tok == "ASSERT") { 889 Opt.Commands.emplace_back(new AssertCommand(readAssert())); 890 } else if (Tok == "ENTRY") { 891 readEntry(); 892 } else if (Tok == "EXTERN") { 893 readExtern(); 894 } else if (Tok == "GROUP" || Tok == "INPUT") { 895 readGroup(); 896 } else if (Tok == "INCLUDE") { 897 readInclude(); 898 } else if (Tok == "OUTPUT") { 899 readOutput(); 900 } else if (Tok == "OUTPUT_ARCH") { 901 readOutputArch(); 902 } else if (Tok == "OUTPUT_FORMAT") { 903 readOutputFormat(); 904 } else if (Tok == "PHDRS") { 905 readPhdrs(); 906 } else if (Tok == "SEARCH_DIR") { 907 readSearchDir(); 908 } else if (Tok == "SECTIONS") { 909 readSections(); 910 } else if (Tok == "VERSION") { 911 readVersion(); 912 } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok, true)) { 913 Opt.Commands.emplace_back(Cmd); 914 } else { 915 setError("unknown directive: " + Tok); 916 } 917 } 918 } 919 920 void ScriptParser::addFile(StringRef S) { 921 if (IsUnderSysroot && S.startswith("/")) { 922 SmallString<128> Path; 923 (Config->Sysroot + S).toStringRef(Path); 924 if (sys::fs::exists(Path)) { 925 Driver->addFile(Saver.save(Path.str())); 926 return; 927 } 928 } 929 930 if (sys::path::is_absolute(S)) { 931 Driver->addFile(S); 932 } else if (S.startswith("=")) { 933 if (Config->Sysroot.empty()) 934 Driver->addFile(S.substr(1)); 935 else 936 Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); 937 } else if (S.startswith("-l")) { 938 Driver->addLibrary(S.substr(2)); 939 } else if (sys::fs::exists(S)) { 940 Driver->addFile(S); 941 } else { 942 std::string Path = findFromSearchPaths(S); 943 if (Path.empty()) 944 setError("unable to find " + S); 945 else 946 Driver->addFile(Saver.save(Path)); 947 } 948 } 949 950 void ScriptParser::readAsNeeded() { 951 expect("("); 952 bool Orig = Config->AsNeeded; 953 Config->AsNeeded = true; 954 while (!Error && !skip(")")) 955 addFile(unquote(next())); 956 Config->AsNeeded = Orig; 957 } 958 959 void ScriptParser::readEntry() { 960 // -e <symbol> takes predecence over ENTRY(<symbol>). 961 expect("("); 962 StringRef Tok = next(); 963 if (Config->Entry.empty()) 964 Config->Entry = Tok; 965 expect(")"); 966 } 967 968 void ScriptParser::readExtern() { 969 expect("("); 970 while (!Error && !skip(")")) 971 Config->Undefined.push_back(next()); 972 } 973 974 void ScriptParser::readGroup() { 975 expect("("); 976 while (!Error && !skip(")")) { 977 StringRef Tok = next(); 978 if (Tok == "AS_NEEDED") 979 readAsNeeded(); 980 else 981 addFile(unquote(Tok)); 982 } 983 } 984 985 void ScriptParser::readInclude() { 986 StringRef Tok = next(); 987 auto MBOrErr = MemoryBuffer::getFile(unquote(Tok)); 988 if (!MBOrErr) { 989 setError("cannot open " + Tok); 990 return; 991 } 992 std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; 993 StringRef S = Saver.save(MB->getMemBufferRef().getBuffer()); 994 std::vector<StringRef> V = tokenize(S); 995 Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end()); 996 } 997 998 void ScriptParser::readOutput() { 999 // -o <file> takes predecence over OUTPUT(<file>). 1000 expect("("); 1001 StringRef Tok = next(); 1002 if (Config->OutputFile.empty()) 1003 Config->OutputFile = unquote(Tok); 1004 expect(")"); 1005 } 1006 1007 void ScriptParser::readOutputArch() { 1008 // Error checking only for now. 1009 expect("("); 1010 next(); 1011 expect(")"); 1012 } 1013 1014 void ScriptParser::readOutputFormat() { 1015 // Error checking only for now. 1016 expect("("); 1017 next(); 1018 StringRef Tok = next(); 1019 if (Tok == ")") 1020 return; 1021 if (Tok != ",") { 1022 setError("unexpected token: " + Tok); 1023 return; 1024 } 1025 next(); 1026 expect(","); 1027 next(); 1028 expect(")"); 1029 } 1030 1031 void ScriptParser::readPhdrs() { 1032 expect("{"); 1033 while (!Error && !skip("}")) { 1034 StringRef Tok = next(); 1035 Opt.PhdrsCommands.push_back( 1036 {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); 1037 PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back(); 1038 1039 PhdrCmd.Type = readPhdrType(); 1040 do { 1041 Tok = next(); 1042 if (Tok == ";") 1043 break; 1044 if (Tok == "FILEHDR") 1045 PhdrCmd.HasFilehdr = true; 1046 else if (Tok == "PHDRS") 1047 PhdrCmd.HasPhdrs = true; 1048 else if (Tok == "AT") 1049 PhdrCmd.LMAExpr = readParenExpr(); 1050 else if (Tok == "FLAGS") { 1051 expect("("); 1052 // Passing 0 for the value of dot is a bit of a hack. It means that 1053 // we accept expressions like ".|1". 1054 PhdrCmd.Flags = readExpr()(0); 1055 expect(")"); 1056 } else 1057 setError("unexpected header attribute: " + Tok); 1058 } while (!Error); 1059 } 1060 } 1061 1062 void ScriptParser::readSearchDir() { 1063 expect("("); 1064 StringRef Tok = next(); 1065 if (!Config->Nostdlib) 1066 Config->SearchPaths.push_back(unquote(Tok)); 1067 expect(")"); 1068 } 1069 1070 void ScriptParser::readSections() { 1071 Opt.HasSections = true; 1072 expect("{"); 1073 while (!Error && !skip("}")) { 1074 StringRef Tok = next(); 1075 BaseCommand *Cmd = readProvideOrAssignment(Tok, true); 1076 if (!Cmd) { 1077 if (Tok == "ASSERT") 1078 Cmd = new AssertCommand(readAssert()); 1079 else 1080 Cmd = readOutputSectionDescription(Tok); 1081 } 1082 Opt.Commands.emplace_back(Cmd); 1083 } 1084 } 1085 1086 static int precedence(StringRef Op) { 1087 return StringSwitch<int>(Op) 1088 .Case("*", 5) 1089 .Case("/", 5) 1090 .Case("+", 4) 1091 .Case("-", 4) 1092 .Case("<<", 3) 1093 .Case(">>", 3) 1094 .Case("<", 2) 1095 .Case(">", 2) 1096 .Case(">=", 2) 1097 .Case("<=", 2) 1098 .Case("==", 2) 1099 .Case("!=", 2) 1100 .Case("&", 1) 1101 .Case("|", 1) 1102 .Default(-1); 1103 } 1104 1105 Regex ScriptParser::readFilePatterns() { 1106 std::vector<StringRef> V; 1107 while (!Error && !skip(")")) 1108 V.push_back(next()); 1109 return compileGlobPatterns(V); 1110 } 1111 1112 SortSectionPolicy ScriptParser::readSortKind() { 1113 if (skip("SORT") || skip("SORT_BY_NAME")) 1114 return SortSectionPolicy::Name; 1115 if (skip("SORT_BY_ALIGNMENT")) 1116 return SortSectionPolicy::Alignment; 1117 if (skip("SORT_BY_INIT_PRIORITY")) 1118 return SortSectionPolicy::Priority; 1119 if (skip("SORT_NONE")) 1120 return SortSectionPolicy::None; 1121 return SortSectionPolicy::Default; 1122 } 1123 1124 // Method reads a list of sequence of excluded files and section globs given in 1125 // a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ 1126 // Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) 1127 // The semantics of that is next: 1128 // * Include .foo.1 from every file. 1129 // * Include .foo.2 from every file but a.o 1130 // * Include .foo.3 from every file but b.o 1131 std::vector<SectionPattern> ScriptParser::readInputSectionsList() { 1132 std::vector<SectionPattern> Ret; 1133 while (!Error && peek() != ")") { 1134 Regex ExcludeFileRe; 1135 if (skip("EXCLUDE_FILE")) { 1136 expect("("); 1137 ExcludeFileRe = readFilePatterns(); 1138 } 1139 1140 std::vector<StringRef> V; 1141 while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") 1142 V.push_back(next()); 1143 1144 if (!V.empty()) 1145 Ret.push_back({std::move(ExcludeFileRe), compileGlobPatterns(V)}); 1146 else 1147 setError("section pattern is expected"); 1148 } 1149 return Ret; 1150 } 1151 1152 // Section pattern grammar can have complex expressions, for example: 1153 // *(SORT(.foo.* EXCLUDE_FILE (*file1.o) .bar.*) .bar.* SORT(.zed.*)) 1154 // Generally is a sequence of globs and excludes that may be wrapped in a SORT() 1155 // commands, like: SORT(glob0) glob1 glob2 SORT(glob4) 1156 // This methods handles wrapping sequences of excluded files and section globs 1157 // into SORT() if that needed and reads them all. 1158 InputSectionDescription * 1159 ScriptParser::readInputSectionRules(StringRef FilePattern) { 1160 auto *Cmd = new InputSectionDescription(FilePattern); 1161 expect("("); 1162 while (!HasError && !skip(")")) { 1163 SortSectionPolicy Outer = readSortKind(); 1164 SortSectionPolicy Inner = SortSectionPolicy::Default; 1165 std::vector<SectionPattern> V; 1166 if (Outer != SortSectionPolicy::Default) { 1167 expect("("); 1168 Inner = readSortKind(); 1169 if (Inner != SortSectionPolicy::Default) { 1170 expect("("); 1171 V = readInputSectionsList(); 1172 expect(")"); 1173 } else { 1174 V = readInputSectionsList(); 1175 } 1176 expect(")"); 1177 } else { 1178 V = readInputSectionsList(); 1179 } 1180 1181 for (SectionPattern &Pat : V) { 1182 Pat.SortInner = Inner; 1183 Pat.SortOuter = Outer; 1184 } 1185 1186 std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); 1187 } 1188 return Cmd; 1189 } 1190 1191 InputSectionDescription * 1192 ScriptParser::readInputSectionDescription(StringRef Tok) { 1193 // Input section wildcard can be surrounded by KEEP. 1194 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 1195 if (Tok == "KEEP") { 1196 expect("("); 1197 StringRef FilePattern = next(); 1198 InputSectionDescription *Cmd = readInputSectionRules(FilePattern); 1199 expect(")"); 1200 for (SectionPattern &Pat : Cmd->SectionPatterns) 1201 Opt.KeptSections.push_back(&Pat.SectionRe); 1202 return Cmd; 1203 } 1204 return readInputSectionRules(Tok); 1205 } 1206 1207 void ScriptParser::readSort() { 1208 expect("("); 1209 expect("CONSTRUCTORS"); 1210 expect(")"); 1211 } 1212 1213 Expr ScriptParser::readAssert() { 1214 expect("("); 1215 Expr E = readExpr(); 1216 expect(","); 1217 StringRef Msg = unquote(next()); 1218 expect(")"); 1219 return [=](uint64_t Dot) { 1220 uint64_t V = E(Dot); 1221 if (!V) 1222 error(Msg); 1223 return V; 1224 }; 1225 } 1226 1227 // Reads a FILL(expr) command. We handle the FILL command as an 1228 // alias for =fillexp section attribute, which is different from 1229 // what GNU linkers do. 1230 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 1231 std::vector<uint8_t> ScriptParser::readFill() { 1232 expect("("); 1233 std::vector<uint8_t> V = readOutputSectionFiller(next()); 1234 expect(")"); 1235 expect(";"); 1236 return V; 1237 } 1238 1239 OutputSectionCommand * 1240 ScriptParser::readOutputSectionDescription(StringRef OutSec) { 1241 OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); 1242 1243 // Read an address expression. 1244 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address 1245 if (peek() != ":") 1246 Cmd->AddrExpr = readExpr(); 1247 1248 expect(":"); 1249 1250 if (skip("AT")) 1251 Cmd->LmaExpr = readParenExpr(); 1252 if (skip("ALIGN")) 1253 Cmd->AlignExpr = readParenExpr(); 1254 if (skip("SUBALIGN")) 1255 Cmd->SubalignExpr = readParenExpr(); 1256 1257 // Parse constraints. 1258 if (skip("ONLY_IF_RO")) 1259 Cmd->Constraint = ConstraintKind::ReadOnly; 1260 if (skip("ONLY_IF_RW")) 1261 Cmd->Constraint = ConstraintKind::ReadWrite; 1262 expect("{"); 1263 1264 while (!Error && !skip("}")) { 1265 StringRef Tok = next(); 1266 if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok, false)) 1267 Cmd->Commands.emplace_back(Assignment); 1268 else if (Tok == "FILL") 1269 Cmd->Filler = readFill(); 1270 else if (Tok == "SORT") 1271 readSort(); 1272 else if (peek() == "(") 1273 Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); 1274 else 1275 setError("unknown command " + Tok); 1276 } 1277 Cmd->Phdrs = readOutputSectionPhdrs(); 1278 1279 if (skip("=")) 1280 Cmd->Filler = readOutputSectionFiller(next()); 1281 else if (peek().startswith("=")) 1282 Cmd->Filler = readOutputSectionFiller(next().drop_front()); 1283 1284 return Cmd; 1285 } 1286 1287 // Read "=<number>" where <number> is an octal/decimal/hexadecimal number. 1288 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 1289 // 1290 // ld.gold is not fully compatible with ld.bfd. ld.bfd handles 1291 // hexstrings as blobs of arbitrary sizes, while ld.gold handles them 1292 // as 32-bit big-endian values. We will do the same as ld.gold does 1293 // because it's simpler than what ld.bfd does. 1294 std::vector<uint8_t> ScriptParser::readOutputSectionFiller(StringRef Tok) { 1295 uint32_t V; 1296 if (Tok.getAsInteger(0, V)) { 1297 setError("invalid filler expression: " + Tok); 1298 return {}; 1299 } 1300 return {uint8_t(V >> 24), uint8_t(V >> 16), uint8_t(V >> 8), uint8_t(V)}; 1301 } 1302 1303 SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { 1304 expect("("); 1305 SymbolAssignment *Cmd = readAssignment(next()); 1306 Cmd->Provide = Provide; 1307 Cmd->Hidden = Hidden; 1308 expect(")"); 1309 expect(";"); 1310 return Cmd; 1311 } 1312 1313 SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok, 1314 bool MakeAbsolute) { 1315 SymbolAssignment *Cmd = nullptr; 1316 if (peek() == "=" || peek() == "+=") { 1317 Cmd = readAssignment(Tok); 1318 expect(";"); 1319 } else if (Tok == "PROVIDE") { 1320 Cmd = readProvideHidden(true, false); 1321 } else if (Tok == "HIDDEN") { 1322 Cmd = readProvideHidden(false, true); 1323 } else if (Tok == "PROVIDE_HIDDEN") { 1324 Cmd = readProvideHidden(true, true); 1325 } 1326 if (Cmd && MakeAbsolute) 1327 Cmd->IsAbsolute = true; 1328 return Cmd; 1329 } 1330 1331 static uint64_t getSymbolValue(StringRef S, uint64_t Dot) { 1332 if (S == ".") 1333 return Dot; 1334 return ScriptBase->getSymbolValue(S); 1335 } 1336 1337 SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { 1338 StringRef Op = next(); 1339 bool IsAbsolute = false; 1340 Expr E; 1341 assert(Op == "=" || Op == "+="); 1342 if (skip("ABSOLUTE")) { 1343 E = readParenExpr(); 1344 IsAbsolute = true; 1345 } else { 1346 E = readExpr(); 1347 } 1348 if (Op == "+=") 1349 E = [=](uint64_t Dot) { return getSymbolValue(Name, Dot) + E(Dot); }; 1350 return new SymbolAssignment(Name, E, IsAbsolute); 1351 } 1352 1353 // This is an operator-precedence parser to parse a linker 1354 // script expression. 1355 Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); } 1356 1357 static Expr combine(StringRef Op, Expr L, Expr R) { 1358 if (Op == "*") 1359 return [=](uint64_t Dot) { return L(Dot) * R(Dot); }; 1360 if (Op == "/") { 1361 return [=](uint64_t Dot) -> uint64_t { 1362 uint64_t RHS = R(Dot); 1363 if (RHS == 0) { 1364 error("division by zero"); 1365 return 0; 1366 } 1367 return L(Dot) / RHS; 1368 }; 1369 } 1370 if (Op == "+") 1371 return [=](uint64_t Dot) { return L(Dot) + R(Dot); }; 1372 if (Op == "-") 1373 return [=](uint64_t Dot) { return L(Dot) - R(Dot); }; 1374 if (Op == "<<") 1375 return [=](uint64_t Dot) { return L(Dot) << R(Dot); }; 1376 if (Op == ">>") 1377 return [=](uint64_t Dot) { return L(Dot) >> R(Dot); }; 1378 if (Op == "<") 1379 return [=](uint64_t Dot) { return L(Dot) < R(Dot); }; 1380 if (Op == ">") 1381 return [=](uint64_t Dot) { return L(Dot) > R(Dot); }; 1382 if (Op == ">=") 1383 return [=](uint64_t Dot) { return L(Dot) >= R(Dot); }; 1384 if (Op == "<=") 1385 return [=](uint64_t Dot) { return L(Dot) <= R(Dot); }; 1386 if (Op == "==") 1387 return [=](uint64_t Dot) { return L(Dot) == R(Dot); }; 1388 if (Op == "!=") 1389 return [=](uint64_t Dot) { return L(Dot) != R(Dot); }; 1390 if (Op == "&") 1391 return [=](uint64_t Dot) { return L(Dot) & R(Dot); }; 1392 if (Op == "|") 1393 return [=](uint64_t Dot) { return L(Dot) | R(Dot); }; 1394 llvm_unreachable("invalid operator"); 1395 } 1396 1397 // This is a part of the operator-precedence parser. This function 1398 // assumes that the remaining token stream starts with an operator. 1399 Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { 1400 while (!atEOF() && !Error) { 1401 // Read an operator and an expression. 1402 StringRef Op1 = peek(); 1403 if (Op1 == "?") 1404 return readTernary(Lhs); 1405 if (precedence(Op1) < MinPrec) 1406 break; 1407 next(); 1408 Expr Rhs = readPrimary(); 1409 1410 // Evaluate the remaining part of the expression first if the 1411 // next operator has greater precedence than the previous one. 1412 // For example, if we have read "+" and "3", and if the next 1413 // operator is "*", then we'll evaluate 3 * ... part first. 1414 while (!atEOF()) { 1415 StringRef Op2 = peek(); 1416 if (precedence(Op2) <= precedence(Op1)) 1417 break; 1418 Rhs = readExpr1(Rhs, precedence(Op2)); 1419 } 1420 1421 Lhs = combine(Op1, Lhs, Rhs); 1422 } 1423 return Lhs; 1424 } 1425 1426 uint64_t static getConstant(StringRef S) { 1427 if (S == "COMMONPAGESIZE") 1428 return Target->PageSize; 1429 if (S == "MAXPAGESIZE") 1430 return Target->MaxPageSize; 1431 error("unknown constant: " + S); 1432 return 0; 1433 } 1434 1435 // Parses Tok as an integer. Returns true if successful. 1436 // It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") 1437 // and decimal numbers. Decimal numbers may have "K" (kilo) or 1438 // "M" (mega) prefixes. 1439 static bool readInteger(StringRef Tok, uint64_t &Result) { 1440 if (Tok.startswith("-")) { 1441 if (!readInteger(Tok.substr(1), Result)) 1442 return false; 1443 Result = -Result; 1444 return true; 1445 } 1446 if (Tok.startswith_lower("0x")) 1447 return !Tok.substr(2).getAsInteger(16, Result); 1448 if (Tok.endswith_lower("H")) 1449 return !Tok.drop_back().getAsInteger(16, Result); 1450 1451 int Suffix = 1; 1452 if (Tok.endswith_lower("K")) { 1453 Suffix = 1024; 1454 Tok = Tok.drop_back(); 1455 } else if (Tok.endswith_lower("M")) { 1456 Suffix = 1024 * 1024; 1457 Tok = Tok.drop_back(); 1458 } 1459 if (Tok.getAsInteger(10, Result)) 1460 return false; 1461 Result *= Suffix; 1462 return true; 1463 } 1464 1465 Expr ScriptParser::readPrimary() { 1466 if (peek() == "(") 1467 return readParenExpr(); 1468 1469 StringRef Tok = next(); 1470 1471 if (Tok == "~") { 1472 Expr E = readPrimary(); 1473 return [=](uint64_t Dot) { return ~E(Dot); }; 1474 } 1475 if (Tok == "-") { 1476 Expr E = readPrimary(); 1477 return [=](uint64_t Dot) { return -E(Dot); }; 1478 } 1479 1480 // Built-in functions are parsed here. 1481 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 1482 if (Tok == "ADDR") { 1483 expect("("); 1484 StringRef Name = next(); 1485 expect(")"); 1486 return 1487 [=](uint64_t Dot) { return ScriptBase->getOutputSectionAddress(Name); }; 1488 } 1489 if (Tok == "ASSERT") 1490 return readAssert(); 1491 if (Tok == "ALIGN") { 1492 Expr E = readParenExpr(); 1493 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1494 } 1495 if (Tok == "CONSTANT") { 1496 expect("("); 1497 StringRef Tok = next(); 1498 expect(")"); 1499 return [=](uint64_t Dot) { return getConstant(Tok); }; 1500 } 1501 if (Tok == "DEFINED") { 1502 expect("("); 1503 StringRef Tok = next(); 1504 expect(")"); 1505 return [=](uint64_t Dot) { 1506 return ScriptBase->isDefined(Tok) ? 1 : 0; 1507 }; 1508 } 1509 if (Tok == "SEGMENT_START") { 1510 expect("("); 1511 next(); 1512 expect(","); 1513 Expr E = readExpr(); 1514 expect(")"); 1515 return [=](uint64_t Dot) { return E(Dot); }; 1516 } 1517 if (Tok == "DATA_SEGMENT_ALIGN") { 1518 expect("("); 1519 Expr E = readExpr(); 1520 expect(","); 1521 readExpr(); 1522 expect(")"); 1523 return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; 1524 } 1525 if (Tok == "DATA_SEGMENT_END") { 1526 expect("("); 1527 expect("."); 1528 expect(")"); 1529 return [](uint64_t Dot) { return Dot; }; 1530 } 1531 // GNU linkers implements more complicated logic to handle 1532 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to 1533 // the next page boundary for simplicity. 1534 if (Tok == "DATA_SEGMENT_RELRO_END") { 1535 expect("("); 1536 readExpr(); 1537 expect(","); 1538 readExpr(); 1539 expect(")"); 1540 return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); }; 1541 } 1542 if (Tok == "SIZEOF") { 1543 expect("("); 1544 StringRef Name = next(); 1545 expect(")"); 1546 return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); }; 1547 } 1548 if (Tok == "ALIGNOF") { 1549 expect("("); 1550 StringRef Name = next(); 1551 expect(")"); 1552 return 1553 [=](uint64_t Dot) { return ScriptBase->getOutputSectionAlign(Name); }; 1554 } 1555 if (Tok == "SIZEOF_HEADERS") 1556 return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); }; 1557 1558 // Tok is a literal number. 1559 uint64_t V; 1560 if (readInteger(Tok, V)) 1561 return [=](uint64_t Dot) { return V; }; 1562 1563 // Tok is a symbol name. 1564 if (Tok != "." && !isValidCIdentifier(Tok)) 1565 setError("malformed number: " + Tok); 1566 return [=](uint64_t Dot) { return getSymbolValue(Tok, Dot); }; 1567 } 1568 1569 Expr ScriptParser::readTernary(Expr Cond) { 1570 next(); 1571 Expr L = readExpr(); 1572 expect(":"); 1573 Expr R = readExpr(); 1574 return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); }; 1575 } 1576 1577 Expr ScriptParser::readParenExpr() { 1578 expect("("); 1579 Expr E = readExpr(); 1580 expect(")"); 1581 return E; 1582 } 1583 1584 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { 1585 std::vector<StringRef> Phdrs; 1586 while (!Error && peek().startswith(":")) { 1587 StringRef Tok = next(); 1588 Tok = (Tok.size() == 1) ? next() : Tok.substr(1); 1589 if (Tok.empty()) { 1590 setError("section header name is empty"); 1591 break; 1592 } 1593 Phdrs.push_back(Tok); 1594 } 1595 return Phdrs; 1596 } 1597 1598 unsigned ScriptParser::readPhdrType() { 1599 StringRef Tok = next(); 1600 unsigned Ret = StringSwitch<unsigned>(Tok) 1601 .Case("PT_NULL", PT_NULL) 1602 .Case("PT_LOAD", PT_LOAD) 1603 .Case("PT_DYNAMIC", PT_DYNAMIC) 1604 .Case("PT_INTERP", PT_INTERP) 1605 .Case("PT_NOTE", PT_NOTE) 1606 .Case("PT_SHLIB", PT_SHLIB) 1607 .Case("PT_PHDR", PT_PHDR) 1608 .Case("PT_TLS", PT_TLS) 1609 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1610 .Case("PT_GNU_STACK", PT_GNU_STACK) 1611 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1612 .Default(-1); 1613 1614 if (Ret == (unsigned)-1) { 1615 setError("invalid program header type: " + Tok); 1616 return PT_NULL; 1617 } 1618 return Ret; 1619 } 1620 1621 void ScriptParser::readVersionDeclaration(StringRef VerStr) { 1622 // Identifiers start at 2 because 0 and 1 are reserved 1623 // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants. 1624 size_t VersionId = Config->VersionDefinitions.size() + 2; 1625 Config->VersionDefinitions.push_back({VerStr, VersionId}); 1626 1627 if (skip("global:") || peek() != "local:") 1628 readGlobal(VerStr); 1629 if (skip("local:")) 1630 readLocal(); 1631 expect("}"); 1632 1633 // Each version may have a parent version. For example, "Ver2" defined as 1634 // "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" as a parent. This 1635 // version hierarchy is, probably against your instinct, purely for human; the 1636 // runtime doesn't care about them at all. In LLD, we simply skip the token. 1637 if (!VerStr.empty() && peek() != ";") 1638 next(); 1639 expect(";"); 1640 } 1641 1642 void ScriptParser::readLocal() { 1643 Config->DefaultSymbolVersion = VER_NDX_LOCAL; 1644 expect("*"); 1645 expect(";"); 1646 } 1647 1648 void ScriptParser::readExtern(std::vector<SymbolVersion> *Globals) { 1649 expect("\"C++\""); 1650 expect("{"); 1651 1652 for (;;) { 1653 if (peek() == "}" || Error) 1654 break; 1655 bool HasWildcard = !peek().startswith("\"") && hasWildcard(peek()); 1656 Globals->push_back({unquote(next()), true, HasWildcard}); 1657 expect(";"); 1658 } 1659 1660 expect("}"); 1661 expect(";"); 1662 } 1663 1664 void ScriptParser::readGlobal(StringRef VerStr) { 1665 std::vector<SymbolVersion> *Globals; 1666 if (VerStr.empty()) 1667 Globals = &Config->VersionScriptGlobals; 1668 else 1669 Globals = &Config->VersionDefinitions.back().Globals; 1670 1671 for (;;) { 1672 if (skip("extern")) 1673 readExtern(Globals); 1674 1675 StringRef Cur = peek(); 1676 if (Cur == "}" || Cur == "local:" || Error) 1677 return; 1678 next(); 1679 Globals->push_back({unquote(Cur), false, hasWildcard(Cur)}); 1680 expect(";"); 1681 } 1682 } 1683 1684 static bool isUnderSysroot(StringRef Path) { 1685 if (Config->Sysroot == "") 1686 return false; 1687 for (; !Path.empty(); Path = sys::path::parent_path(Path)) 1688 if (sys::fs::equivalent(Config->Sysroot, Path)) 1689 return true; 1690 return false; 1691 } 1692 1693 void elf::readLinkerScript(MemoryBufferRef MB) { 1694 StringRef Path = MB.getBufferIdentifier(); 1695 ScriptParser(MB.getBuffer(), isUnderSysroot(Path)).readLinkerScript(); 1696 } 1697 1698 void elf::readVersionScript(MemoryBufferRef MB) { 1699 ScriptParser(MB.getBuffer(), false).readVersionScript(); 1700 } 1701 1702 template class elf::LinkerScript<ELF32LE>; 1703 template class elf::LinkerScript<ELF32BE>; 1704 template class elf::LinkerScript<ELF64LE>; 1705 template class elf::LinkerScript<ELF64BE>; 1706