1 //===- LinkerScript.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the parser/evaluator of the linker script. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LinkerScript.h" 15 #include "Config.h" 16 #include "InputSection.h" 17 #include "Memory.h" 18 #include "OutputSections.h" 19 #include "Strings.h" 20 #include "SymbolTable.h" 21 #include "Symbols.h" 22 #include "SyntheticSections.h" 23 #include "Writer.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/Support/Casting.h" 27 #include "llvm/Support/ELF.h" 28 #include "llvm/Support/Endian.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include "llvm/Support/FileSystem.h" 31 #include "llvm/Support/Path.h" 32 #include <algorithm> 33 #include <cassert> 34 #include <cstddef> 35 #include <cstdint> 36 #include <iterator> 37 #include <limits> 38 #include <string> 39 #include <vector> 40 41 using namespace llvm; 42 using namespace llvm::ELF; 43 using namespace llvm::object; 44 using namespace llvm::support::endian; 45 using namespace lld; 46 using namespace lld::elf; 47 48 LinkerScript *elf::Script; 49 50 uint64_t ExprValue::getValue() const { 51 if (Sec) { 52 if (Sec->getOutputSection()) 53 return Sec->getOffset(Val) + Sec->getOutputSection()->Addr; 54 error("unable to evaluate expression: input section " + Sec->Name + 55 " has no output section assigned"); 56 } 57 return Val; 58 } 59 60 uint64_t ExprValue::getSecAddr() const { 61 if (Sec) 62 return Sec->getOffset(0) + Sec->getOutputSection()->Addr; 63 return 0; 64 } 65 66 template <class ELFT> static SymbolBody *addRegular(SymbolAssignment *Cmd) { 67 Symbol *Sym; 68 uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; 69 std::tie(Sym, std::ignore) = Symtab<ELFT>::X->insert( 70 Cmd->Name, /*Type*/ 0, Visibility, /*CanOmitFromDynSym*/ false, 71 /*File*/ nullptr); 72 Sym->Binding = STB_GLOBAL; 73 ExprValue Value = Cmd->Expression(); 74 SectionBase *Sec = Value.isAbsolute() ? nullptr : Value.Sec; 75 76 // We want to set symbol values early if we can. This allows us to use symbols 77 // as variables in linker scripts. Doing so allows us to write expressions 78 // like this: `alignment = 16; . = ALIGN(., alignment)` 79 uint64_t SymValue = Value.isAbsolute() ? Value.getValue() : 0; 80 replaceBody<DefinedRegular>(Sym, Cmd->Name, /*IsLocal=*/false, Visibility, 81 STT_NOTYPE, SymValue, 0, Sec, nullptr); 82 return Sym->body(); 83 } 84 85 OutputSection *LinkerScript::getOutputSection(const Twine &Loc, 86 StringRef Name) { 87 for (OutputSection *Sec : *OutputSections) 88 if (Sec->Name == Name) 89 return Sec; 90 91 static OutputSection Dummy("", 0, 0); 92 if (ErrorOnMissingSection) 93 error(Loc + ": undefined section " + Name); 94 return &Dummy; 95 } 96 97 // This function is essentially the same as getOutputSection(Name)->Size, 98 // but it won't print out an error message if a given section is not found. 99 // 100 // Linker script does not create an output section if its content is empty. 101 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 102 // be empty. That is why this function is different from getOutputSection(). 103 uint64_t LinkerScript::getOutputSectionSize(StringRef Name) { 104 for (OutputSection *Sec : *OutputSections) 105 if (Sec->Name == Name) 106 return Sec->Size; 107 return 0; 108 } 109 110 void LinkerScript::setDot(Expr E, const Twine &Loc, bool InSec) { 111 uint64_t Val = E().getValue(); 112 if (Val < Dot) { 113 if (InSec) 114 error(Loc + ": unable to move location counter backward for: " + 115 CurOutSec->Name); 116 else 117 error(Loc + ": unable to move location counter backward"); 118 } 119 Dot = Val; 120 // Update to location counter means update to section size. 121 if (InSec) 122 CurOutSec->Size = Dot - CurOutSec->Addr; 123 } 124 125 // Sets value of a symbol. Two kinds of symbols are processed: synthetic 126 // symbols, whose value is an offset from beginning of section and regular 127 // symbols whose value is absolute. 128 void LinkerScript::assignSymbol(SymbolAssignment *Cmd, bool InSec) { 129 if (Cmd->Name == ".") { 130 setDot(Cmd->Expression, Cmd->Location, InSec); 131 return; 132 } 133 134 if (!Cmd->Sym) 135 return; 136 137 auto *Sym = cast<DefinedRegular>(Cmd->Sym); 138 ExprValue V = Cmd->Expression(); 139 if (V.isAbsolute()) { 140 Sym->Value = V.getValue(); 141 } else { 142 Sym->Section = V.Sec; 143 if (Sym->Section->Flags & SHF_ALLOC) 144 Sym->Value = V.Val; 145 else 146 Sym->Value = V.getValue(); 147 } 148 } 149 150 static SymbolBody *findSymbol(StringRef S) { 151 switch (Config->EKind) { 152 case ELF32LEKind: 153 return Symtab<ELF32LE>::X->find(S); 154 case ELF32BEKind: 155 return Symtab<ELF32BE>::X->find(S); 156 case ELF64LEKind: 157 return Symtab<ELF64LE>::X->find(S); 158 case ELF64BEKind: 159 return Symtab<ELF64BE>::X->find(S); 160 default: 161 llvm_unreachable("unknown Config->EKind"); 162 } 163 } 164 165 static SymbolBody *addRegularSymbol(SymbolAssignment *Cmd) { 166 switch (Config->EKind) { 167 case ELF32LEKind: 168 return addRegular<ELF32LE>(Cmd); 169 case ELF32BEKind: 170 return addRegular<ELF32BE>(Cmd); 171 case ELF64LEKind: 172 return addRegular<ELF64LE>(Cmd); 173 case ELF64BEKind: 174 return addRegular<ELF64BE>(Cmd); 175 default: 176 llvm_unreachable("unknown Config->EKind"); 177 } 178 } 179 180 void LinkerScript::addSymbol(SymbolAssignment *Cmd) { 181 if (Cmd->Name == ".") 182 return; 183 184 // If a symbol was in PROVIDE(), we need to define it only when 185 // it is a referenced undefined symbol. 186 SymbolBody *B = findSymbol(Cmd->Name); 187 if (Cmd->Provide && (!B || B->isDefined())) 188 return; 189 190 Cmd->Sym = addRegularSymbol(Cmd); 191 } 192 193 bool SymbolAssignment::classof(const BaseCommand *C) { 194 return C->Kind == AssignmentKind; 195 } 196 197 bool OutputSectionCommand::classof(const BaseCommand *C) { 198 return C->Kind == OutputSectionKind; 199 } 200 201 bool InputSectionDescription::classof(const BaseCommand *C) { 202 return C->Kind == InputSectionKind; 203 } 204 205 bool AssertCommand::classof(const BaseCommand *C) { 206 return C->Kind == AssertKind; 207 } 208 209 bool BytesDataCommand::classof(const BaseCommand *C) { 210 return C->Kind == BytesDataKind; 211 } 212 213 static StringRef basename(InputSectionBase *S) { 214 if (S->File) 215 return sys::path::filename(S->File->getName()); 216 return ""; 217 } 218 219 bool LinkerScript::shouldKeep(InputSectionBase *S) { 220 for (InputSectionDescription *ID : Opt.KeptSections) 221 if (ID->FilePat.match(basename(S))) 222 for (SectionPattern &P : ID->SectionPatterns) 223 if (P.SectionPat.match(S->Name)) 224 return true; 225 return false; 226 } 227 228 // A helper function for the SORT() command. 229 static std::function<bool(InputSectionBase *, InputSectionBase *)> 230 getComparator(SortSectionPolicy K) { 231 switch (K) { 232 case SortSectionPolicy::Alignment: 233 return [](InputSectionBase *A, InputSectionBase *B) { 234 // ">" is not a mistake. Sections with larger alignments are placed 235 // before sections with smaller alignments in order to reduce the 236 // amount of padding necessary. This is compatible with GNU. 237 return A->Alignment > B->Alignment; 238 }; 239 case SortSectionPolicy::Name: 240 return [](InputSectionBase *A, InputSectionBase *B) { 241 return A->Name < B->Name; 242 }; 243 case SortSectionPolicy::Priority: 244 return [](InputSectionBase *A, InputSectionBase *B) { 245 return getPriority(A->Name) < getPriority(B->Name); 246 }; 247 default: 248 llvm_unreachable("unknown sort policy"); 249 } 250 } 251 252 // A helper function for the SORT() command. 253 static bool matchConstraints(ArrayRef<InputSectionBase *> Sections, 254 ConstraintKind Kind) { 255 if (Kind == ConstraintKind::NoConstraint) 256 return true; 257 258 bool IsRW = llvm::any_of(Sections, [](InputSectionBase *Sec) { 259 return static_cast<InputSectionBase *>(Sec)->Flags & SHF_WRITE; 260 }); 261 262 return (IsRW && Kind == ConstraintKind::ReadWrite) || 263 (!IsRW && Kind == ConstraintKind::ReadOnly); 264 } 265 266 static void sortSections(InputSectionBase **Begin, InputSectionBase **End, 267 SortSectionPolicy K) { 268 if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) 269 std::stable_sort(Begin, End, getComparator(K)); 270 } 271 272 // Compute and remember which sections the InputSectionDescription matches. 273 std::vector<InputSectionBase *> 274 LinkerScript::computeInputSections(const InputSectionDescription *Cmd) { 275 std::vector<InputSectionBase *> Ret; 276 277 // Collects all sections that satisfy constraints of Cmd. 278 for (const SectionPattern &Pat : Cmd->SectionPatterns) { 279 size_t SizeBefore = Ret.size(); 280 281 for (InputSectionBase *Sec : InputSections) { 282 if (Sec->Assigned) 283 continue; 284 285 // For -emit-relocs we have to ignore entries like 286 // .rela.dyn : { *(.rela.data) } 287 // which are common because they are in the default bfd script. 288 if (Sec->Type == SHT_REL || Sec->Type == SHT_RELA) 289 continue; 290 291 StringRef Filename = basename(Sec); 292 if (!Cmd->FilePat.match(Filename) || 293 Pat.ExcludedFilePat.match(Filename) || 294 !Pat.SectionPat.match(Sec->Name)) 295 continue; 296 297 Ret.push_back(Sec); 298 Sec->Assigned = true; 299 } 300 301 // Sort sections as instructed by SORT-family commands and --sort-section 302 // option. Because SORT-family commands can be nested at most two depth 303 // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command 304 // line option is respected even if a SORT command is given, the exact 305 // behavior we have here is a bit complicated. Here are the rules. 306 // 307 // 1. If two SORT commands are given, --sort-section is ignored. 308 // 2. If one SORT command is given, and if it is not SORT_NONE, 309 // --sort-section is handled as an inner SORT command. 310 // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. 311 // 4. If no SORT command is given, sort according to --sort-section. 312 InputSectionBase **Begin = Ret.data() + SizeBefore; 313 InputSectionBase **End = Ret.data() + Ret.size(); 314 if (Pat.SortOuter != SortSectionPolicy::None) { 315 if (Pat.SortInner == SortSectionPolicy::Default) 316 sortSections(Begin, End, Config->SortSection); 317 else 318 sortSections(Begin, End, Pat.SortInner); 319 sortSections(Begin, End, Pat.SortOuter); 320 } 321 } 322 return Ret; 323 } 324 325 void LinkerScript::discard(ArrayRef<InputSectionBase *> V) { 326 for (InputSectionBase *S : V) { 327 S->Live = false; 328 if (S == InX::ShStrTab) 329 error("discarding .shstrtab section is not allowed"); 330 discard(S->DependentSections); 331 } 332 } 333 334 std::vector<InputSectionBase *> 335 LinkerScript::createInputSectionList(OutputSectionCommand &OutCmd) { 336 std::vector<InputSectionBase *> Ret; 337 338 for (BaseCommand *Base : OutCmd.Commands) { 339 auto *Cmd = dyn_cast<InputSectionDescription>(Base); 340 if (!Cmd) 341 continue; 342 343 Cmd->Sections = computeInputSections(Cmd); 344 Ret.insert(Ret.end(), Cmd->Sections.begin(), Cmd->Sections.end()); 345 } 346 347 return Ret; 348 } 349 350 void LinkerScript::processCommands(OutputSectionFactory &Factory) { 351 // A symbol can be assigned before any section is mentioned in the linker 352 // script. In an DSO, the symbol values are addresses, so the only important 353 // section values are: 354 // * SHN_UNDEF 355 // * SHN_ABS 356 // * Any value meaning a regular section. 357 // To handle that, create a dummy aether section that fills the void before 358 // the linker scripts switches to another section. It has an index of one 359 // which will map to whatever the first actual section is. 360 Aether = make<OutputSection>("", 0, SHF_ALLOC); 361 Aether->SectionIndex = 1; 362 CurOutSec = Aether; 363 Dot = 0; 364 365 for (size_t I = 0; I < Opt.Commands.size(); ++I) { 366 // Handle symbol assignments outside of any output section. 367 if (auto *Cmd = dyn_cast<SymbolAssignment>(Opt.Commands[I])) { 368 addSymbol(Cmd); 369 continue; 370 } 371 372 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I])) { 373 std::vector<InputSectionBase *> V = createInputSectionList(*Cmd); 374 375 // The output section name `/DISCARD/' is special. 376 // Any input section assigned to it is discarded. 377 if (Cmd->Name == "/DISCARD/") { 378 discard(V); 379 continue; 380 } 381 382 // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive 383 // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input 384 // sections satisfy a given constraint. If not, a directive is handled 385 // as if it wasn't present from the beginning. 386 // 387 // Because we'll iterate over Commands many more times, the easiest 388 // way to "make it as if it wasn't present" is to just remove it. 389 if (!matchConstraints(V, Cmd->Constraint)) { 390 for (InputSectionBase *S : V) 391 S->Assigned = false; 392 Opt.Commands.erase(Opt.Commands.begin() + I); 393 --I; 394 continue; 395 } 396 397 // A directive may contain symbol definitions like this: 398 // ".foo : { ...; bar = .; }". Handle them. 399 for (BaseCommand *Base : Cmd->Commands) 400 if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base)) 401 addSymbol(OutCmd); 402 403 // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign 404 // is given, input sections are aligned to that value, whether the 405 // given value is larger or smaller than the original section alignment. 406 if (Cmd->SubalignExpr) { 407 uint32_t Subalign = Cmd->SubalignExpr().getValue(); 408 for (InputSectionBase *S : V) 409 S->Alignment = Subalign; 410 } 411 412 // Add input sections to an output section. 413 for (InputSectionBase *S : V) 414 Factory.addInputSec(S, Cmd->Name, Cmd->Sec); 415 if (OutputSection *Sec = Cmd->Sec) { 416 assert(Sec->SectionIndex == INT_MAX); 417 Sec->SectionIndex = I; 418 SecToCommand[Sec] = Cmd; 419 } 420 } 421 } 422 CurOutSec = nullptr; 423 } 424 425 void LinkerScript::fabricateDefaultCommands() { 426 std::vector<BaseCommand *> Commands; 427 428 // Define start address 429 uint64_t StartAddr = Config->ImageBase + elf::getHeaderSize(); 430 431 // The Sections with -T<section> have been sorted in order of ascending 432 // address. We must lower StartAddr if the lowest -T<section address> as 433 // calls to setDot() must be monotonically increasing. 434 for (auto& KV : Config->SectionStartMap) 435 StartAddr = std::min(StartAddr, KV.second); 436 437 Commands.push_back( 438 make<SymbolAssignment>(".", [=] { return StartAddr; }, "")); 439 440 // For each OutputSection that needs a VA fabricate an OutputSectionCommand 441 // with an InputSectionDescription describing the InputSections 442 for (OutputSection *Sec : *OutputSections) { 443 if (!(Sec->Flags & SHF_ALLOC)) 444 continue; 445 446 auto *OSCmd = make<OutputSectionCommand>(Sec->Name); 447 OSCmd->Sec = Sec; 448 SecToCommand[Sec] = OSCmd; 449 450 // Prefer user supplied address over additional alignment constraint 451 auto I = Config->SectionStartMap.find(Sec->Name); 452 if (I != Config->SectionStartMap.end()) 453 Commands.push_back( 454 make<SymbolAssignment>(".", [=] { return I->second; }, "")); 455 else if (Sec->PageAlign) 456 OSCmd->AddrExpr = [=] { 457 return alignTo(Script->getDot(), Config->MaxPageSize); 458 }; 459 460 Commands.push_back(OSCmd); 461 if (Sec->Sections.size()) { 462 auto *ISD = make<InputSectionDescription>(""); 463 OSCmd->Commands.push_back(ISD); 464 for (InputSection *ISec : Sec->Sections) { 465 ISD->Sections.push_back(ISec); 466 ISec->Assigned = true; 467 } 468 } 469 } 470 // SECTIONS commands run before other non SECTIONS commands 471 Commands.insert(Commands.end(), Opt.Commands.begin(), Opt.Commands.end()); 472 Opt.Commands = std::move(Commands); 473 } 474 475 // Add sections that didn't match any sections command. 476 void LinkerScript::addOrphanSections(OutputSectionFactory &Factory) { 477 for (InputSectionBase *S : InputSections) { 478 if (!S->Live || S->OutSec) 479 continue; 480 StringRef Name = getOutputSectionName(S->Name); 481 auto I = std::find_if( 482 Opt.Commands.begin(), Opt.Commands.end(), [&](BaseCommand *Base) { 483 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) 484 return Cmd->Name == Name; 485 return false; 486 }); 487 if (I == Opt.Commands.end()) { 488 Factory.addInputSec(S, Name); 489 } else { 490 auto *Cmd = cast<OutputSectionCommand>(*I); 491 Factory.addInputSec(S, Name, Cmd->Sec); 492 if (OutputSection *Sec = Cmd->Sec) { 493 SecToCommand[Sec] = Cmd; 494 unsigned Index = std::distance(Opt.Commands.begin(), I); 495 assert(Sec->SectionIndex == INT_MAX || Sec->SectionIndex == Index); 496 Sec->SectionIndex = Index; 497 } 498 auto *ISD = make<InputSectionDescription>(""); 499 ISD->Sections.push_back(S); 500 Cmd->Commands.push_back(ISD); 501 } 502 } 503 } 504 505 uint64_t LinkerScript::advance(uint64_t Size, unsigned Align) { 506 bool IsTbss = (CurOutSec->Flags & SHF_TLS) && CurOutSec->Type == SHT_NOBITS; 507 uint64_t Start = IsTbss ? Dot + ThreadBssOffset : Dot; 508 Start = alignTo(Start, Align); 509 uint64_t End = Start + Size; 510 511 if (IsTbss) 512 ThreadBssOffset = End - Dot; 513 else 514 Dot = End; 515 return End; 516 } 517 518 void LinkerScript::output(InputSection *S) { 519 uint64_t Pos = advance(S->getSize(), S->Alignment); 520 S->OutSecOff = Pos - S->getSize() - CurOutSec->Addr; 521 522 // Update output section size after adding each section. This is so that 523 // SIZEOF works correctly in the case below: 524 // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } 525 CurOutSec->Size = Pos - CurOutSec->Addr; 526 527 // If there is a memory region associated with this input section, then 528 // place the section in that region and update the region index. 529 if (CurMemRegion) { 530 CurMemRegion->Offset += CurOutSec->Size; 531 uint64_t CurSize = CurMemRegion->Offset - CurMemRegion->Origin; 532 if (CurSize > CurMemRegion->Length) { 533 uint64_t OverflowAmt = CurSize - CurMemRegion->Length; 534 error("section '" + CurOutSec->Name + "' will not fit in region '" + 535 CurMemRegion->Name + "': overflowed by " + Twine(OverflowAmt) + 536 " bytes"); 537 } 538 } 539 } 540 541 void LinkerScript::switchTo(OutputSection *Sec) { 542 if (CurOutSec == Sec) 543 return; 544 545 CurOutSec = Sec; 546 CurOutSec->Addr = advance(0, CurOutSec->Alignment); 547 548 // If neither AT nor AT> is specified for an allocatable section, the linker 549 // will set the LMA such that the difference between VMA and LMA for the 550 // section is the same as the preceding output section in the same region 551 // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html 552 if (LMAOffset) 553 CurOutSec->LMAOffset = LMAOffset(); 554 } 555 556 void LinkerScript::process(BaseCommand &Base) { 557 // This handles the assignments to symbol or to the dot. 558 if (auto *Cmd = dyn_cast<SymbolAssignment>(&Base)) { 559 assignSymbol(Cmd, true); 560 return; 561 } 562 563 // Handle BYTE(), SHORT(), LONG(), or QUAD(). 564 if (auto *Cmd = dyn_cast<BytesDataCommand>(&Base)) { 565 Cmd->Offset = Dot - CurOutSec->Addr; 566 Dot += Cmd->Size; 567 CurOutSec->Size = Dot - CurOutSec->Addr; 568 return; 569 } 570 571 // Handle ASSERT(). 572 if (auto *Cmd = dyn_cast<AssertCommand>(&Base)) { 573 Cmd->Expression(); 574 return; 575 } 576 577 // Handle a single input section description command. 578 // It calculates and assigns the offsets for each section and also 579 // updates the output section size. 580 auto &Cmd = cast<InputSectionDescription>(Base); 581 for (InputSectionBase *Sec : Cmd.Sections) { 582 // We tentatively added all synthetic sections at the beginning and removed 583 // empty ones afterwards (because there is no way to know whether they were 584 // going be empty or not other than actually running linker scripts.) 585 // We need to ignore remains of empty sections. 586 if (auto *S = dyn_cast<SyntheticSection>(Sec)) 587 if (S->empty()) 588 continue; 589 590 if (!Sec->Live) 591 continue; 592 assert(CurOutSec == Sec->OutSec); 593 output(cast<InputSection>(Sec)); 594 } 595 } 596 597 // This function searches for a memory region to place the given output 598 // section in. If found, a pointer to the appropriate memory region is 599 // returned. Otherwise, a nullptr is returned. 600 MemoryRegion *LinkerScript::findMemoryRegion(OutputSectionCommand *Cmd) { 601 // If a memory region name was specified in the output section command, 602 // then try to find that region first. 603 if (!Cmd->MemoryRegionName.empty()) { 604 auto It = Opt.MemoryRegions.find(Cmd->MemoryRegionName); 605 if (It != Opt.MemoryRegions.end()) 606 return &It->second; 607 error("memory region '" + Cmd->MemoryRegionName + "' not declared"); 608 return nullptr; 609 } 610 611 // If at least one memory region is defined, all sections must 612 // belong to some memory region. Otherwise, we don't need to do 613 // anything for memory regions. 614 if (Opt.MemoryRegions.empty()) 615 return nullptr; 616 617 OutputSection *Sec = Cmd->Sec; 618 // See if a region can be found by matching section flags. 619 for (auto &Pair : Opt.MemoryRegions) { 620 MemoryRegion &M = Pair.second; 621 if ((M.Flags & Sec->Flags) && (M.NegFlags & Sec->Flags) == 0) 622 return &M; 623 } 624 625 // Otherwise, no suitable region was found. 626 if (Sec->Flags & SHF_ALLOC) 627 error("no memory region specified for section '" + Sec->Name + "'"); 628 return nullptr; 629 } 630 631 // This function assigns offsets to input sections and an output section 632 // for a single sections command (e.g. ".text { *(.text); }"). 633 void LinkerScript::assignOffsets(OutputSectionCommand *Cmd) { 634 OutputSection *Sec = Cmd->Sec; 635 if (!Sec) 636 return; 637 638 if (Cmd->AddrExpr && (Sec->Flags & SHF_ALLOC)) 639 setDot(Cmd->AddrExpr, Cmd->Location, false); 640 641 if (Cmd->LMAExpr) { 642 uint64_t D = Dot; 643 LMAOffset = [=] { return Cmd->LMAExpr().getValue() - D; }; 644 } 645 646 CurMemRegion = Cmd->MemRegion; 647 if (CurMemRegion) 648 Dot = CurMemRegion->Offset; 649 switchTo(Sec); 650 651 // We do not support custom layout for compressed debug sectons. 652 // At this point we already know their size and have compressed content. 653 if (CurOutSec->Flags & SHF_COMPRESSED) 654 return; 655 656 for (BaseCommand *C : Cmd->Commands) 657 process(*C); 658 } 659 660 void LinkerScript::removeEmptyCommands() { 661 // It is common practice to use very generic linker scripts. So for any 662 // given run some of the output sections in the script will be empty. 663 // We could create corresponding empty output sections, but that would 664 // clutter the output. 665 // We instead remove trivially empty sections. The bfd linker seems even 666 // more aggressive at removing them. 667 auto Pos = std::remove_if( 668 Opt.Commands.begin(), Opt.Commands.end(), [&](BaseCommand *Base) { 669 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) 670 return std::find(OutputSections->begin(), OutputSections->end(), 671 Cmd->Sec) == OutputSections->end(); 672 return false; 673 }); 674 Opt.Commands.erase(Pos, Opt.Commands.end()); 675 } 676 677 static bool isAllSectionDescription(const OutputSectionCommand &Cmd) { 678 for (BaseCommand *Base : Cmd.Commands) 679 if (!isa<InputSectionDescription>(*Base)) 680 return false; 681 return true; 682 } 683 684 void LinkerScript::adjustSectionsBeforeSorting() { 685 // If the output section contains only symbol assignments, create a 686 // corresponding output section. The bfd linker seems to only create them if 687 // '.' is assigned to, but creating these section should not have any bad 688 // consequeces and gives us a section to put the symbol in. 689 uint64_t Flags = SHF_ALLOC; 690 uint32_t Type = SHT_PROGBITS; 691 692 for (int I = 0, E = Opt.Commands.size(); I != E; ++I) { 693 auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I]); 694 if (!Cmd) 695 continue; 696 if (OutputSection *Sec = Cmd->Sec) { 697 Flags = Sec->Flags; 698 Type = Sec->Type; 699 continue; 700 } 701 702 if (isAllSectionDescription(*Cmd)) 703 continue; 704 705 auto *OutSec = make<OutputSection>(Cmd->Name, Type, Flags); 706 OutSec->SectionIndex = I; 707 OutputSections->push_back(OutSec); 708 Cmd->Sec = OutSec; 709 SecToCommand[OutSec] = Cmd; 710 } 711 } 712 713 void LinkerScript::adjustSectionsAfterSorting() { 714 placeOrphanSections(); 715 716 // Try and find an appropriate memory region to assign offsets in. 717 for (BaseCommand *Base : Opt.Commands) { 718 if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) { 719 Cmd->MemRegion = findMemoryRegion(Cmd); 720 // Handle align (e.g. ".foo : ALIGN(16) { ... }"). 721 if (Cmd->AlignExpr) 722 Cmd->Sec->updateAlignment(Cmd->AlignExpr().getValue()); 723 } 724 } 725 726 // If output section command doesn't specify any segments, 727 // and we haven't previously assigned any section to segment, 728 // then we simply assign section to the very first load segment. 729 // Below is an example of such linker script: 730 // PHDRS { seg PT_LOAD; } 731 // SECTIONS { .aaa : { *(.aaa) } } 732 std::vector<StringRef> DefPhdrs; 733 auto FirstPtLoad = 734 std::find_if(Opt.PhdrsCommands.begin(), Opt.PhdrsCommands.end(), 735 [](const PhdrsCommand &Cmd) { return Cmd.Type == PT_LOAD; }); 736 if (FirstPtLoad != Opt.PhdrsCommands.end()) 737 DefPhdrs.push_back(FirstPtLoad->Name); 738 739 // Walk the commands and propagate the program headers to commands that don't 740 // explicitly specify them. 741 for (BaseCommand *Base : Opt.Commands) { 742 auto *Cmd = dyn_cast<OutputSectionCommand>(Base); 743 if (!Cmd) 744 continue; 745 746 if (Cmd->Phdrs.empty()) 747 Cmd->Phdrs = DefPhdrs; 748 else 749 DefPhdrs = Cmd->Phdrs; 750 } 751 752 removeEmptyCommands(); 753 } 754 755 // When placing orphan sections, we want to place them after symbol assignments 756 // so that an orphan after 757 // begin_foo = .; 758 // foo : { *(foo) } 759 // end_foo = .; 760 // doesn't break the intended meaning of the begin/end symbols. 761 // We don't want to go over sections since Writer<ELFT>::sortSections is the 762 // one in charge of deciding the order of the sections. 763 // We don't want to go over alignments, since doing so in 764 // rx_sec : { *(rx_sec) } 765 // . = ALIGN(0x1000); 766 // /* The RW PT_LOAD starts here*/ 767 // rw_sec : { *(rw_sec) } 768 // would mean that the RW PT_LOAD would become unaligned. 769 static bool shouldSkip(BaseCommand *Cmd) { 770 if (isa<OutputSectionCommand>(Cmd)) 771 return false; 772 if (auto *Assign = dyn_cast<SymbolAssignment>(Cmd)) 773 return Assign->Name != "."; 774 return true; 775 } 776 777 // Orphan sections are sections present in the input files which are 778 // not explicitly placed into the output file by the linker script. 779 // 780 // When the control reaches this function, Opt.Commands contains 781 // output section commands for non-orphan sections only. This function 782 // adds new elements for orphan sections so that all sections are 783 // explicitly handled by Opt.Commands. 784 // 785 // Writer<ELFT>::sortSections has already sorted output sections. 786 // What we need to do is to scan OutputSections vector and 787 // Opt.Commands in parallel to find orphan sections. If there is an 788 // output section that doesn't have a corresponding entry in 789 // Opt.Commands, we will insert a new entry to Opt.Commands. 790 // 791 // There is some ambiguity as to where exactly a new entry should be 792 // inserted, because Opt.Commands contains not only output section 793 // commands but also other types of commands such as symbol assignment 794 // expressions. There's no correct answer here due to the lack of the 795 // formal specification of the linker script. We use heuristics to 796 // determine whether a new output command should be added before or 797 // after another commands. For the details, look at shouldSkip 798 // function. 799 void LinkerScript::placeOrphanSections() { 800 // The OutputSections are already in the correct order. 801 // This loops creates or moves commands as needed so that they are in the 802 // correct order. 803 int CmdIndex = 0; 804 805 // As a horrible special case, skip the first . assignment if it is before any 806 // section. We do this because it is common to set a load address by starting 807 // the script with ". = 0xabcd" and the expectation is that every section is 808 // after that. 809 auto FirstSectionOrDotAssignment = 810 std::find_if(Opt.Commands.begin(), Opt.Commands.end(), 811 [](BaseCommand *Cmd) { return !shouldSkip(Cmd); }); 812 if (FirstSectionOrDotAssignment != Opt.Commands.end()) { 813 CmdIndex = FirstSectionOrDotAssignment - Opt.Commands.begin(); 814 if (isa<SymbolAssignment>(**FirstSectionOrDotAssignment)) 815 ++CmdIndex; 816 } 817 818 for (OutputSection *Sec : *OutputSections) { 819 StringRef Name = Sec->Name; 820 821 // Find the last spot where we can insert a command and still get the 822 // correct result. 823 auto CmdIter = Opt.Commands.begin() + CmdIndex; 824 auto E = Opt.Commands.end(); 825 while (CmdIter != E && shouldSkip(*CmdIter)) { 826 ++CmdIter; 827 ++CmdIndex; 828 } 829 830 // If there is no command corresponding to this output section, 831 // create one and put a InputSectionDescription in it so that both 832 // representations agree on which input sections to use. 833 OutputSectionCommand *Cmd = getCmd(Sec); 834 if (!Cmd) { 835 Cmd = make<OutputSectionCommand>(Name); 836 Opt.Commands.insert(CmdIter, Cmd); 837 ++CmdIndex; 838 839 Cmd->Sec = Sec; 840 SecToCommand[Sec] = Cmd; 841 auto *ISD = make<InputSectionDescription>(""); 842 for (InputSection *IS : Sec->Sections) 843 ISD->Sections.push_back(IS); 844 Cmd->Commands.push_back(ISD); 845 846 continue; 847 } 848 849 // Continue from where we found it. 850 while (*CmdIter != Cmd) { 851 ++CmdIter; 852 ++CmdIndex; 853 } 854 ++CmdIndex; 855 } 856 } 857 858 void LinkerScript::processNonSectionCommands() { 859 for (BaseCommand *Base : Opt.Commands) { 860 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base)) 861 assignSymbol(Cmd, false); 862 else if (auto *Cmd = dyn_cast<AssertCommand>(Base)) 863 Cmd->Expression(); 864 } 865 } 866 867 // Do a last effort at synchronizing the linker script "AST" and the section 868 // list. This is needed to account for last minute changes, like adding a 869 // .ARM.exidx terminator and sorting SHF_LINK_ORDER sections. 870 // 871 // FIXME: We should instead create the "AST" earlier and the above changes would 872 // be done directly in the "AST". 873 // 874 // This can only handle new sections being added and sections being reordered. 875 void LinkerScript::synchronize() { 876 for (BaseCommand *Base : Opt.Commands) { 877 auto *Cmd = dyn_cast<OutputSectionCommand>(Base); 878 if (!Cmd) 879 continue; 880 ArrayRef<InputSection *> Sections = Cmd->Sec->Sections; 881 std::vector<InputSectionBase **> ScriptSections; 882 DenseSet<InputSectionBase *> ScriptSectionsSet; 883 for (BaseCommand *Base : Cmd->Commands) { 884 auto *ISD = dyn_cast<InputSectionDescription>(Base); 885 if (!ISD) 886 continue; 887 for (InputSectionBase *&IS : ISD->Sections) { 888 if (IS->Live) { 889 ScriptSections.push_back(&IS); 890 ScriptSectionsSet.insert(IS); 891 } 892 } 893 } 894 std::vector<InputSectionBase *> Missing; 895 for (InputSection *IS : Sections) 896 if (!ScriptSectionsSet.count(IS)) 897 Missing.push_back(IS); 898 if (!Missing.empty()) { 899 auto ISD = make<InputSectionDescription>(""); 900 ISD->Sections = Missing; 901 Cmd->Commands.push_back(ISD); 902 for (InputSectionBase *&IS : ISD->Sections) 903 if (IS->Live) 904 ScriptSections.push_back(&IS); 905 } 906 assert(ScriptSections.size() == Sections.size()); 907 for (int I = 0, N = Sections.size(); I < N; ++I) 908 *ScriptSections[I] = Sections[I]; 909 } 910 } 911 912 static bool allocateHeaders(std::vector<PhdrEntry> &Phdrs, 913 ArrayRef<OutputSection *> OutputSections, 914 uint64_t Min) { 915 auto FirstPTLoad = 916 std::find_if(Phdrs.begin(), Phdrs.end(), 917 [](const PhdrEntry &E) { return E.p_type == PT_LOAD; }); 918 if (FirstPTLoad == Phdrs.end()) 919 return false; 920 921 uint64_t HeaderSize = getHeaderSize(); 922 if (HeaderSize <= Min || Script->hasPhdrsCommands()) { 923 Min = alignDown(Min - HeaderSize, Config->MaxPageSize); 924 Out::ElfHeader->Addr = Min; 925 Out::ProgramHeaders->Addr = Min + Out::ElfHeader->Size; 926 return true; 927 } 928 929 assert(FirstPTLoad->First == Out::ElfHeader); 930 OutputSection *ActualFirst = nullptr; 931 for (OutputSection *Sec : OutputSections) { 932 if (Sec->FirstInPtLoad == Out::ElfHeader) { 933 ActualFirst = Sec; 934 break; 935 } 936 } 937 if (ActualFirst) { 938 for (OutputSection *Sec : OutputSections) 939 if (Sec->FirstInPtLoad == Out::ElfHeader) 940 Sec->FirstInPtLoad = ActualFirst; 941 FirstPTLoad->First = ActualFirst; 942 } else { 943 Phdrs.erase(FirstPTLoad); 944 } 945 946 auto PhdrI = std::find_if(Phdrs.begin(), Phdrs.end(), [](const PhdrEntry &E) { 947 return E.p_type == PT_PHDR; 948 }); 949 if (PhdrI != Phdrs.end()) 950 Phdrs.erase(PhdrI); 951 return false; 952 } 953 954 void LinkerScript::assignAddresses(std::vector<PhdrEntry> &Phdrs) { 955 // Assign addresses as instructed by linker script SECTIONS sub-commands. 956 Dot = 0; 957 ErrorOnMissingSection = true; 958 switchTo(Aether); 959 960 for (BaseCommand *Base : Opt.Commands) { 961 if (auto *Cmd = dyn_cast<SymbolAssignment>(Base)) { 962 assignSymbol(Cmd, false); 963 continue; 964 } 965 966 if (auto *Cmd = dyn_cast<AssertCommand>(Base)) { 967 Cmd->Expression(); 968 continue; 969 } 970 971 auto *Cmd = cast<OutputSectionCommand>(Base); 972 assignOffsets(Cmd); 973 } 974 975 uint64_t MinVA = std::numeric_limits<uint64_t>::max(); 976 for (OutputSection *Sec : *OutputSections) { 977 if (Sec->Flags & SHF_ALLOC) 978 MinVA = std::min<uint64_t>(MinVA, Sec->Addr); 979 else 980 Sec->Addr = 0; 981 } 982 983 allocateHeaders(Phdrs, *OutputSections, MinVA); 984 } 985 986 // Creates program headers as instructed by PHDRS linker script command. 987 std::vector<PhdrEntry> LinkerScript::createPhdrs() { 988 std::vector<PhdrEntry> Ret; 989 990 // Process PHDRS and FILEHDR keywords because they are not 991 // real output sections and cannot be added in the following loop. 992 for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) { 993 Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags); 994 PhdrEntry &Phdr = Ret.back(); 995 996 if (Cmd.HasFilehdr) 997 Phdr.add(Out::ElfHeader); 998 if (Cmd.HasPhdrs) 999 Phdr.add(Out::ProgramHeaders); 1000 1001 if (Cmd.LMAExpr) { 1002 Phdr.p_paddr = Cmd.LMAExpr().getValue(); 1003 Phdr.HasLMA = true; 1004 } 1005 } 1006 1007 // Add output sections to program headers. 1008 for (OutputSection *Sec : *OutputSections) { 1009 if (!(Sec->Flags & SHF_ALLOC)) 1010 break; 1011 1012 // Assign headers specified by linker script 1013 for (size_t Id : getPhdrIndices(Sec)) { 1014 Ret[Id].add(Sec); 1015 if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) 1016 Ret[Id].p_flags |= Sec->getPhdrFlags(); 1017 } 1018 } 1019 return Ret; 1020 } 1021 1022 bool LinkerScript::ignoreInterpSection() { 1023 // Ignore .interp section in case we have PHDRS specification 1024 // and PT_INTERP isn't listed. 1025 if (Opt.PhdrsCommands.empty()) 1026 return false; 1027 for (PhdrsCommand &Cmd : Opt.PhdrsCommands) 1028 if (Cmd.Type == PT_INTERP) 1029 return false; 1030 return true; 1031 } 1032 1033 OutputSectionCommand *LinkerScript::getCmd(OutputSection *Sec) const { 1034 auto I = SecToCommand.find(Sec); 1035 if (I == SecToCommand.end()) 1036 return nullptr; 1037 return I->second; 1038 } 1039 1040 Optional<uint32_t> LinkerScript::getFiller(OutputSection *Sec) { 1041 if (OutputSectionCommand *Cmd = getCmd(Sec)) 1042 return Cmd->Filler; 1043 return None; 1044 } 1045 1046 static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { 1047 if (Size == 1) 1048 *Buf = Data; 1049 else if (Size == 2) 1050 write16(Buf, Data, Config->Endianness); 1051 else if (Size == 4) 1052 write32(Buf, Data, Config->Endianness); 1053 else if (Size == 8) 1054 write64(Buf, Data, Config->Endianness); 1055 else 1056 llvm_unreachable("unsupported Size argument"); 1057 } 1058 1059 void LinkerScript::writeDataBytes(OutputSection *Sec, uint8_t *Buf) { 1060 if (OutputSectionCommand *Cmd = getCmd(Sec)) 1061 for (BaseCommand *Base : Cmd->Commands) 1062 if (auto *Data = dyn_cast<BytesDataCommand>(Base)) 1063 writeInt(Buf + Data->Offset, Data->Expression().getValue(), Data->Size); 1064 } 1065 1066 bool LinkerScript::hasLMA(OutputSection *Sec) { 1067 if (OutputSectionCommand *Cmd = getCmd(Sec)) 1068 if (Cmd->LMAExpr) 1069 return true; 1070 return false; 1071 } 1072 1073 ExprValue LinkerScript::getSymbolValue(const Twine &Loc, StringRef S) { 1074 if (S == ".") 1075 return {CurOutSec, Dot - CurOutSec->Addr}; 1076 if (SymbolBody *B = findSymbol(S)) { 1077 if (auto *D = dyn_cast<DefinedRegular>(B)) 1078 return {D->Section, D->Value}; 1079 if (auto *C = dyn_cast<DefinedCommon>(B)) 1080 return {InX::Common, C->Offset}; 1081 } 1082 error(Loc + ": symbol not found: " + S); 1083 return 0; 1084 } 1085 1086 bool LinkerScript::isDefined(StringRef S) { return findSymbol(S) != nullptr; } 1087 1088 // Returns indices of ELF headers containing specific section. Each index is a 1089 // zero based number of ELF header listed within PHDRS {} script block. 1090 std::vector<size_t> LinkerScript::getPhdrIndices(OutputSection *Sec) { 1091 if (OutputSectionCommand *Cmd = getCmd(Sec)) { 1092 std::vector<size_t> Ret; 1093 for (StringRef PhdrName : Cmd->Phdrs) 1094 Ret.push_back(getPhdrIndex(Cmd->Location, PhdrName)); 1095 return Ret; 1096 } 1097 return {}; 1098 } 1099 1100 size_t LinkerScript::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { 1101 size_t I = 0; 1102 for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { 1103 if (Cmd.Name == PhdrName) 1104 return I; 1105 ++I; 1106 } 1107 error(Loc + ": section header '" + PhdrName + "' is not listed in PHDRS"); 1108 return 0; 1109 } 1110