1 //===- InputSection.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "InputSection.h" 11 #include "Config.h" 12 #include "EhFrame.h" 13 #include "InputFiles.h" 14 #include "LinkerScript.h" 15 #include "OutputSections.h" 16 #include "Relocations.h" 17 #include "Symbols.h" 18 #include "SyntheticSections.h" 19 #include "Target.h" 20 #include "Thunks.h" 21 #include "lld/Common/ErrorHandler.h" 22 #include "lld/Common/Memory.h" 23 #include "llvm/Object/Decompressor.h" 24 #include "llvm/Support/Compiler.h" 25 #include "llvm/Support/Compression.h" 26 #include "llvm/Support/Endian.h" 27 #include "llvm/Support/Threading.h" 28 #include "llvm/Support/xxhash.h" 29 #include <mutex> 30 31 using namespace llvm; 32 using namespace llvm::ELF; 33 using namespace llvm::object; 34 using namespace llvm::support; 35 using namespace llvm::support::endian; 36 using namespace llvm::sys; 37 38 using namespace lld; 39 using namespace lld::elf; 40 41 std::vector<InputSectionBase *> elf::InputSections; 42 43 // Returns a string to construct an error message. 44 std::string lld::toString(const InputSectionBase *Sec) { 45 return (toString(Sec->File) + ":(" + Sec->Name + ")").str(); 46 } 47 48 template <class ELFT> 49 static ArrayRef<uint8_t> getSectionContents(ObjFile<ELFT> &File, 50 const typename ELFT::Shdr &Hdr) { 51 if (Hdr.sh_type == SHT_NOBITS) 52 return makeArrayRef<uint8_t>(nullptr, Hdr.sh_size); 53 return check(File.getObj().getSectionContents(&Hdr)); 54 } 55 56 InputSectionBase::InputSectionBase(InputFile *File, uint64_t Flags, 57 uint32_t Type, uint64_t Entsize, 58 uint32_t Link, uint32_t Info, 59 uint32_t Alignment, ArrayRef<uint8_t> Data, 60 StringRef Name, Kind SectionKind) 61 : SectionBase(SectionKind, Name, Flags, Entsize, Alignment, Type, Info, 62 Link), 63 File(File), Data(Data) { 64 // In order to reduce memory allocation, we assume that mergeable 65 // sections are smaller than 4 GiB, which is not an unreasonable 66 // assumption as of 2017. 67 if (SectionKind == SectionBase::Merge && Data.size() > UINT32_MAX) 68 error(toString(this) + ": section too large"); 69 70 NumRelocations = 0; 71 AreRelocsRela = false; 72 73 // The ELF spec states that a value of 0 means the section has 74 // no alignment constraits. 75 uint32_t V = std::max<uint64_t>(Alignment, 1); 76 if (!isPowerOf2_64(V)) 77 fatal(toString(File) + ": section sh_addralign is not a power of 2"); 78 this->Alignment = V; 79 } 80 81 // Drop SHF_GROUP bit unless we are producing a re-linkable object file. 82 // SHF_GROUP is a marker that a section belongs to some comdat group. 83 // That flag doesn't make sense in an executable. 84 static uint64_t getFlags(uint64_t Flags) { 85 Flags &= ~(uint64_t)SHF_INFO_LINK; 86 if (!Config->Relocatable) 87 Flags &= ~(uint64_t)SHF_GROUP; 88 return Flags; 89 } 90 91 // GNU assembler 2.24 and LLVM 4.0.0's MC (the newest release as of 92 // March 2017) fail to infer section types for sections starting with 93 // ".init_array." or ".fini_array.". They set SHT_PROGBITS instead of 94 // SHF_INIT_ARRAY. As a result, the following assembler directive 95 // creates ".init_array.100" with SHT_PROGBITS, for example. 96 // 97 // .section .init_array.100, "aw" 98 // 99 // This function forces SHT_{INIT,FINI}_ARRAY so that we can handle 100 // incorrect inputs as if they were correct from the beginning. 101 static uint64_t getType(uint64_t Type, StringRef Name) { 102 if (Type == SHT_PROGBITS && Name.startswith(".init_array.")) 103 return SHT_INIT_ARRAY; 104 if (Type == SHT_PROGBITS && Name.startswith(".fini_array.")) 105 return SHT_FINI_ARRAY; 106 return Type; 107 } 108 109 template <class ELFT> 110 InputSectionBase::InputSectionBase(ObjFile<ELFT> &File, 111 const typename ELFT::Shdr &Hdr, 112 StringRef Name, Kind SectionKind) 113 : InputSectionBase(&File, getFlags(Hdr.sh_flags), 114 getType(Hdr.sh_type, Name), Hdr.sh_entsize, Hdr.sh_link, 115 Hdr.sh_info, Hdr.sh_addralign, 116 getSectionContents(File, Hdr), Name, SectionKind) { 117 // We reject object files having insanely large alignments even though 118 // they are allowed by the spec. I think 4GB is a reasonable limitation. 119 // We might want to relax this in the future. 120 if (Hdr.sh_addralign > UINT32_MAX) 121 fatal(toString(&File) + ": section sh_addralign is too large"); 122 } 123 124 size_t InputSectionBase::getSize() const { 125 if (auto *S = dyn_cast<SyntheticSection>(this)) 126 return S->getSize(); 127 128 return Data.size(); 129 } 130 131 uint64_t InputSectionBase::getOffsetInFile() const { 132 const uint8_t *FileStart = (const uint8_t *)File->MB.getBufferStart(); 133 const uint8_t *SecStart = Data.begin(); 134 return SecStart - FileStart; 135 } 136 137 uint64_t SectionBase::getOffset(uint64_t Offset) const { 138 switch (kind()) { 139 case Output: { 140 auto *OS = cast<OutputSection>(this); 141 // For output sections we treat offset -1 as the end of the section. 142 return Offset == uint64_t(-1) ? OS->Size : Offset; 143 } 144 case Regular: 145 return cast<InputSection>(this)->OutSecOff + Offset; 146 case Synthetic: { 147 auto *IS = cast<InputSection>(this); 148 // For synthetic sections we treat offset -1 as the end of the section. 149 return IS->OutSecOff + (Offset == uint64_t(-1) ? IS->getSize() : Offset); 150 } 151 case EHFrame: 152 // The file crtbeginT.o has relocations pointing to the start of an empty 153 // .eh_frame that is known to be the first in the link. It does that to 154 // identify the start of the output .eh_frame. 155 return Offset; 156 case Merge: 157 const MergeInputSection *MS = cast<MergeInputSection>(this); 158 if (InputSection *IS = MS->getParent()) 159 return IS->OutSecOff + MS->getOffset(Offset); 160 return MS->getOffset(Offset); 161 } 162 llvm_unreachable("invalid section kind"); 163 } 164 165 OutputSection *SectionBase::getOutputSection() { 166 InputSection *Sec; 167 if (auto *IS = dyn_cast<InputSection>(this)) 168 return IS->getParent(); 169 else if (auto *MS = dyn_cast<MergeInputSection>(this)) 170 Sec = MS->getParent(); 171 else if (auto *EH = dyn_cast<EhInputSection>(this)) 172 Sec = EH->getParent(); 173 else 174 return cast<OutputSection>(this); 175 return Sec ? Sec->getParent() : nullptr; 176 } 177 178 // Uncompress section contents if required. Note that this function 179 // is called from parallelForEach, so it must be thread-safe. 180 void InputSectionBase::maybeUncompress() { 181 if (UncompressBuf || !Decompressor::isCompressedELFSection(Flags, Name)) 182 return; 183 184 Decompressor Dec = check(Decompressor::create(Name, toStringRef(Data), 185 Config->IsLE, Config->Is64)); 186 187 size_t Size = Dec.getDecompressedSize(); 188 UncompressBuf.reset(new char[Size]()); 189 if (Error E = Dec.decompress({UncompressBuf.get(), Size})) 190 fatal(toString(this) + 191 ": decompress failed: " + llvm::toString(std::move(E))); 192 193 Data = makeArrayRef((uint8_t *)UncompressBuf.get(), Size); 194 Flags &= ~(uint64_t)SHF_COMPRESSED; 195 } 196 197 InputSection *InputSectionBase::getLinkOrderDep() const { 198 if ((Flags & SHF_LINK_ORDER) && Link != 0) { 199 InputSectionBase *L = File->getSections()[Link]; 200 if (auto *IS = dyn_cast<InputSection>(L)) 201 return IS; 202 error("a section with SHF_LINK_ORDER should not refer a non-regular " 203 "section: " + 204 toString(L)); 205 } 206 return nullptr; 207 } 208 209 // Returns a source location string. Used to construct an error message. 210 template <class ELFT> 211 std::string InputSectionBase::getLocation(uint64_t Offset) { 212 // We don't have file for synthetic sections. 213 if (getFile<ELFT>() == nullptr) 214 return (Config->OutputFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")") 215 .str(); 216 217 // First check if we can get desired values from debugging information. 218 std::string LineInfo = getFile<ELFT>()->getLineInfo(this, Offset); 219 if (!LineInfo.empty()) 220 return LineInfo; 221 222 // File->SourceFile contains STT_FILE symbol that contains a 223 // source file name. If it's missing, we use an object file name. 224 std::string SrcFile = getFile<ELFT>()->SourceFile; 225 if (SrcFile.empty()) 226 SrcFile = toString(File); 227 228 // Find a function symbol that encloses a given location. 229 for (Symbol *B : File->getSymbols()) 230 if (auto *D = dyn_cast<Defined>(B)) 231 if (D->Section == this && D->Type == STT_FUNC) 232 if (D->Value <= Offset && Offset < D->Value + D->Size) 233 return SrcFile + ":(function " + toString(*D) + ")"; 234 235 // If there's no symbol, print out the offset in the section. 236 return (SrcFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")").str(); 237 } 238 239 // This function is intended to be used for constructing an error message. 240 // The returned message looks like this: 241 // 242 // foo.c:42 (/home/alice/possibly/very/long/path/foo.c:42) 243 // 244 // Returns an empty string if there's no way to get line info. 245 std::string InputSectionBase::getSrcMsg(const Symbol &Sym, uint64_t Offset) { 246 // Synthetic sections don't have input files. 247 if (!File) 248 return ""; 249 return File->getSrcMsg(Sym, *this, Offset); 250 } 251 252 // Returns a filename string along with an optional section name. This 253 // function is intended to be used for constructing an error 254 // message. The returned message looks like this: 255 // 256 // path/to/foo.o:(function bar) 257 // 258 // or 259 // 260 // path/to/foo.o:(function bar) in archive path/to/bar.a 261 std::string InputSectionBase::getObjMsg(uint64_t Off) { 262 // Synthetic sections don't have input files. 263 if (!File) 264 return ("<internal>:(" + Name + "+0x" + utohexstr(Off) + ")").str(); 265 std::string Filename = File->getName(); 266 267 std::string Archive; 268 if (!File->ArchiveName.empty()) 269 Archive = (" in archive " + File->ArchiveName).str(); 270 271 // Find a symbol that encloses a given location. 272 for (Symbol *B : File->getSymbols()) 273 if (auto *D = dyn_cast<Defined>(B)) 274 if (D->Section == this && D->Value <= Off && Off < D->Value + D->Size) 275 return Filename + ":(" + toString(*D) + ")" + Archive; 276 277 // If there's no symbol, print out the offset in the section. 278 return (Filename + ":(" + Name + "+0x" + utohexstr(Off) + ")" + Archive) 279 .str(); 280 } 281 282 InputSection InputSection::Discarded(nullptr, 0, 0, 0, ArrayRef<uint8_t>(), ""); 283 284 InputSection::InputSection(InputFile *F, uint64_t Flags, uint32_t Type, 285 uint32_t Alignment, ArrayRef<uint8_t> Data, 286 StringRef Name, Kind K) 287 : InputSectionBase(F, Flags, Type, 288 /*Entsize*/ 0, /*Link*/ 0, /*Info*/ 0, Alignment, Data, 289 Name, K) {} 290 291 template <class ELFT> 292 InputSection::InputSection(ObjFile<ELFT> &F, const typename ELFT::Shdr &Header, 293 StringRef Name) 294 : InputSectionBase(F, Header, Name, InputSectionBase::Regular) {} 295 296 bool InputSection::classof(const SectionBase *S) { 297 return S->kind() == SectionBase::Regular || 298 S->kind() == SectionBase::Synthetic; 299 } 300 301 OutputSection *InputSection::getParent() const { 302 return cast_or_null<OutputSection>(Parent); 303 } 304 305 // Copy SHT_GROUP section contents. Used only for the -r option. 306 template <class ELFT> void InputSection::copyShtGroup(uint8_t *Buf) { 307 // ELFT::Word is the 32-bit integral type in the target endianness. 308 typedef typename ELFT::Word u32; 309 ArrayRef<u32> From = getDataAs<u32>(); 310 auto *To = reinterpret_cast<u32 *>(Buf); 311 312 // The first entry is not a section number but a flag. 313 *To++ = From[0]; 314 315 // Adjust section numbers because section numbers in an input object 316 // files are different in the output. 317 ArrayRef<InputSectionBase *> Sections = File->getSections(); 318 for (uint32_t Idx : From.slice(1)) 319 *To++ = Sections[Idx]->getOutputSection()->SectionIndex; 320 } 321 322 InputSectionBase *InputSection::getRelocatedSection() { 323 assert(Type == SHT_RELA || Type == SHT_REL); 324 ArrayRef<InputSectionBase *> Sections = File->getSections(); 325 return Sections[Info]; 326 } 327 328 // This is used for -r and --emit-relocs. We can't use memcpy to copy 329 // relocations because we need to update symbol table offset and section index 330 // for each relocation. So we copy relocations one by one. 331 template <class ELFT, class RelTy> 332 void InputSection::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) { 333 InputSectionBase *Sec = getRelocatedSection(); 334 335 for (const RelTy &Rel : Rels) { 336 RelType Type = Rel.getType(Config->IsMips64EL); 337 Symbol &Sym = getFile<ELFT>()->getRelocTargetSym(Rel); 338 339 auto *P = reinterpret_cast<typename ELFT::Rela *>(Buf); 340 Buf += sizeof(RelTy); 341 342 if (RelTy::IsRela) 343 P->r_addend = getAddend<ELFT>(Rel); 344 345 // Output section VA is zero for -r, so r_offset is an offset within the 346 // section, but for --emit-relocs it is an virtual address. 347 P->r_offset = Sec->getOutputSection()->Addr + Sec->getOffset(Rel.r_offset); 348 P->setSymbolAndType(InX::SymTab->getSymbolIndex(&Sym), Type, 349 Config->IsMips64EL); 350 351 if (Sym.Type == STT_SECTION) { 352 // We combine multiple section symbols into only one per 353 // section. This means we have to update the addend. That is 354 // trivial for Elf_Rela, but for Elf_Rel we have to write to the 355 // section data. We do that by adding to the Relocation vector. 356 357 // .eh_frame is horribly special and can reference discarded sections. To 358 // avoid having to parse and recreate .eh_frame, we just replace any 359 // relocation in it pointing to discarded sections with R_*_NONE, which 360 // hopefully creates a frame that is ignored at runtime. 361 auto *D = dyn_cast<Defined>(&Sym); 362 if (!D) { 363 error("STT_SECTION symbol should be defined"); 364 continue; 365 } 366 SectionBase *Section = D->Section; 367 if (Section == &InputSection::Discarded) { 368 P->setSymbolAndType(0, 0, false); 369 continue; 370 } 371 372 if (RelTy::IsRela) { 373 P->r_addend = 374 Sym.getVA(getAddend<ELFT>(Rel)) - Section->getOutputSection()->Addr; 375 } else if (Config->Relocatable) { 376 const uint8_t *BufLoc = Sec->Data.begin() + Rel.r_offset; 377 Sec->Relocations.push_back({R_ABS, Type, Rel.r_offset, 378 Target->getImplicitAddend(BufLoc, Type), 379 &Sym}); 380 } 381 } 382 383 } 384 } 385 386 // The ARM and AArch64 ABI handle pc-relative relocations to undefined weak 387 // references specially. The general rule is that the value of the symbol in 388 // this context is the address of the place P. A further special case is that 389 // branch relocations to an undefined weak reference resolve to the next 390 // instruction. 391 static uint32_t getARMUndefinedRelativeWeakVA(RelType Type, uint32_t A, 392 uint32_t P) { 393 switch (Type) { 394 // Unresolved branch relocations to weak references resolve to next 395 // instruction, this will be either 2 or 4 bytes on from P. 396 case R_ARM_THM_JUMP11: 397 return P + 2 + A; 398 case R_ARM_CALL: 399 case R_ARM_JUMP24: 400 case R_ARM_PC24: 401 case R_ARM_PLT32: 402 case R_ARM_PREL31: 403 case R_ARM_THM_JUMP19: 404 case R_ARM_THM_JUMP24: 405 return P + 4 + A; 406 case R_ARM_THM_CALL: 407 // We don't want an interworking BLX to ARM 408 return P + 5 + A; 409 // Unresolved non branch pc-relative relocations 410 // R_ARM_TARGET2 which can be resolved relatively is not present as it never 411 // targets a weak-reference. 412 case R_ARM_MOVW_PREL_NC: 413 case R_ARM_MOVT_PREL: 414 case R_ARM_REL32: 415 case R_ARM_THM_MOVW_PREL_NC: 416 case R_ARM_THM_MOVT_PREL: 417 return P + A; 418 } 419 llvm_unreachable("ARM pc-relative relocation expected\n"); 420 } 421 422 // The comment above getARMUndefinedRelativeWeakVA applies to this function. 423 static uint64_t getAArch64UndefinedRelativeWeakVA(uint64_t Type, uint64_t A, 424 uint64_t P) { 425 switch (Type) { 426 // Unresolved branch relocations to weak references resolve to next 427 // instruction, this is 4 bytes on from P. 428 case R_AARCH64_CALL26: 429 case R_AARCH64_CONDBR19: 430 case R_AARCH64_JUMP26: 431 case R_AARCH64_TSTBR14: 432 return P + 4 + A; 433 // Unresolved non branch pc-relative relocations 434 case R_AARCH64_PREL16: 435 case R_AARCH64_PREL32: 436 case R_AARCH64_PREL64: 437 case R_AARCH64_ADR_PREL_LO21: 438 case R_AARCH64_LD_PREL_LO19: 439 return P + A; 440 } 441 llvm_unreachable("AArch64 pc-relative relocation expected\n"); 442 } 443 444 // ARM SBREL relocations are of the form S + A - B where B is the static base 445 // The ARM ABI defines base to be "addressing origin of the output segment 446 // defining the symbol S". We defined the "addressing origin"/static base to be 447 // the base of the PT_LOAD segment containing the Sym. 448 // The procedure call standard only defines a Read Write Position Independent 449 // RWPI variant so in practice we should expect the static base to be the base 450 // of the RW segment. 451 static uint64_t getARMStaticBase(const Symbol &Sym) { 452 OutputSection *OS = Sym.getOutputSection(); 453 if (!OS || !OS->PtLoad || !OS->PtLoad->FirstSec) 454 fatal("SBREL relocation to " + Sym.getName() + " without static base"); 455 return OS->PtLoad->FirstSec->Addr; 456 } 457 458 static uint64_t getRelocTargetVA(RelType Type, int64_t A, uint64_t P, 459 const Symbol &Sym, RelExpr Expr) { 460 switch (Expr) { 461 case R_INVALID: 462 return 0; 463 case R_ABS: 464 case R_RELAX_GOT_PC_NOPIC: 465 return Sym.getVA(A); 466 case R_ARM_SBREL: 467 return Sym.getVA(A) - getARMStaticBase(Sym); 468 case R_GOT: 469 case R_RELAX_TLS_GD_TO_IE_ABS: 470 return Sym.getGotVA() + A; 471 case R_GOTONLY_PC: 472 return InX::Got->getVA() + A - P; 473 case R_GOTONLY_PC_FROM_END: 474 return InX::Got->getVA() + A - P + InX::Got->getSize(); 475 case R_GOTREL: 476 return Sym.getVA(A) - InX::Got->getVA(); 477 case R_GOTREL_FROM_END: 478 return Sym.getVA(A) - InX::Got->getVA() - InX::Got->getSize(); 479 case R_GOT_FROM_END: 480 case R_RELAX_TLS_GD_TO_IE_END: 481 return Sym.getGotOffset() + A - InX::Got->getSize(); 482 case R_GOT_OFF: 483 return Sym.getGotOffset() + A; 484 case R_GOT_PAGE_PC: 485 case R_RELAX_TLS_GD_TO_IE_PAGE_PC: 486 return getAArch64Page(Sym.getGotVA() + A) - getAArch64Page(P); 487 case R_GOT_PC: 488 case R_RELAX_TLS_GD_TO_IE: 489 return Sym.getGotVA() + A - P; 490 case R_HINT: 491 case R_NONE: 492 case R_TLSDESC_CALL: 493 llvm_unreachable("cannot relocate hint relocs"); 494 case R_MIPS_GOTREL: 495 return Sym.getVA(A) - InX::MipsGot->getGp(); 496 case R_MIPS_GOT_GP: 497 return InX::MipsGot->getGp() + A; 498 case R_MIPS_GOT_GP_PC: { 499 // R_MIPS_LO16 expression has R_MIPS_GOT_GP_PC type iif the target 500 // is _gp_disp symbol. In that case we should use the following 501 // formula for calculation "AHL + GP - P + 4". For details see p. 4-19 at 502 // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf 503 // microMIPS variants of these relocations use slightly different 504 // expressions: AHL + GP - P + 3 for %lo() and AHL + GP - P - 1 for %hi() 505 // to correctly handle less-sugnificant bit of the microMIPS symbol. 506 uint64_t V = InX::MipsGot->getGp() + A - P; 507 if (Type == R_MIPS_LO16 || Type == R_MICROMIPS_LO16) 508 V += 4; 509 if (Type == R_MICROMIPS_LO16 || Type == R_MICROMIPS_HI16) 510 V -= 1; 511 return V; 512 } 513 case R_MIPS_GOT_LOCAL_PAGE: 514 // If relocation against MIPS local symbol requires GOT entry, this entry 515 // should be initialized by 'page address'. This address is high 16-bits 516 // of sum the symbol's value and the addend. 517 return InX::MipsGot->getVA() + InX::MipsGot->getPageEntryOffset(Sym, A) - 518 InX::MipsGot->getGp(); 519 case R_MIPS_GOT_OFF: 520 case R_MIPS_GOT_OFF32: 521 // In case of MIPS if a GOT relocation has non-zero addend this addend 522 // should be applied to the GOT entry content not to the GOT entry offset. 523 // That is why we use separate expression type. 524 return InX::MipsGot->getVA() + InX::MipsGot->getSymEntryOffset(Sym, A) - 525 InX::MipsGot->getGp(); 526 case R_MIPS_TLSGD: 527 return InX::MipsGot->getVA() + InX::MipsGot->getTlsOffset() + 528 InX::MipsGot->getGlobalDynOffset(Sym) - InX::MipsGot->getGp(); 529 case R_MIPS_TLSLD: 530 return InX::MipsGot->getVA() + InX::MipsGot->getTlsOffset() + 531 InX::MipsGot->getTlsIndexOff() - InX::MipsGot->getGp(); 532 case R_PAGE_PC: 533 case R_PLT_PAGE_PC: { 534 uint64_t Dest; 535 if (Sym.isUndefWeak()) 536 Dest = getAArch64Page(A); 537 else 538 Dest = getAArch64Page(Sym.getVA(A)); 539 return Dest - getAArch64Page(P); 540 } 541 case R_PC: { 542 uint64_t Dest; 543 if (Sym.isUndefWeak()) { 544 // On ARM and AArch64 a branch to an undefined weak resolves to the 545 // next instruction, otherwise the place. 546 if (Config->EMachine == EM_ARM) 547 Dest = getARMUndefinedRelativeWeakVA(Type, A, P); 548 else if (Config->EMachine == EM_AARCH64) 549 Dest = getAArch64UndefinedRelativeWeakVA(Type, A, P); 550 else 551 Dest = Sym.getVA(A); 552 } else { 553 Dest = Sym.getVA(A); 554 } 555 return Dest - P; 556 } 557 case R_PLT: 558 return Sym.getPltVA() + A; 559 case R_PLT_PC: 560 case R_PPC_PLT_OPD: 561 return Sym.getPltVA() + A - P; 562 case R_PPC_OPD: { 563 uint64_t SymVA = Sym.getVA(A); 564 // If we have an undefined weak symbol, we might get here with a symbol 565 // address of zero. That could overflow, but the code must be unreachable, 566 // so don't bother doing anything at all. 567 if (!SymVA) 568 return 0; 569 if (Out::Opd) { 570 // If this is a local call, and we currently have the address of a 571 // function-descriptor, get the underlying code address instead. 572 uint64_t OpdStart = Out::Opd->Addr; 573 uint64_t OpdEnd = OpdStart + Out::Opd->Size; 574 bool InOpd = OpdStart <= SymVA && SymVA < OpdEnd; 575 if (InOpd) 576 SymVA = read64be(&Out::OpdBuf[SymVA - OpdStart]); 577 } 578 return SymVA - P; 579 } 580 case R_PPC_TOC: 581 return getPPC64TocBase() + A; 582 case R_RELAX_GOT_PC: 583 return Sym.getVA(A) - P; 584 case R_RELAX_TLS_GD_TO_LE: 585 case R_RELAX_TLS_IE_TO_LE: 586 case R_RELAX_TLS_LD_TO_LE: 587 case R_TLS: 588 // A weak undefined TLS symbol resolves to the base of the TLS 589 // block, i.e. gets a value of zero. If we pass --gc-sections to 590 // lld and .tbss is not referenced, it gets reclaimed and we don't 591 // create a TLS program header. Therefore, we resolve this 592 // statically to zero. 593 if (Sym.isTls() && Sym.isUndefWeak()) 594 return 0; 595 if (Target->TcbSize) 596 return Sym.getVA(A) + alignTo(Target->TcbSize, Out::TlsPhdr->p_align); 597 return Sym.getVA(A) - Out::TlsPhdr->p_memsz; 598 case R_RELAX_TLS_GD_TO_LE_NEG: 599 case R_NEG_TLS: 600 return Out::TlsPhdr->p_memsz - Sym.getVA(A); 601 case R_SIZE: 602 return Sym.getSize() + A; 603 case R_TLSDESC: 604 return InX::Got->getGlobalDynAddr(Sym) + A; 605 case R_TLSDESC_PAGE: 606 return getAArch64Page(InX::Got->getGlobalDynAddr(Sym) + A) - 607 getAArch64Page(P); 608 case R_TLSGD: 609 return InX::Got->getGlobalDynOffset(Sym) + A - InX::Got->getSize(); 610 case R_TLSGD_PC: 611 return InX::Got->getGlobalDynAddr(Sym) + A - P; 612 case R_TLSLD: 613 return InX::Got->getTlsIndexOff() + A - InX::Got->getSize(); 614 case R_TLSLD_PC: 615 return InX::Got->getTlsIndexVA() + A - P; 616 } 617 llvm_unreachable("Invalid expression"); 618 } 619 620 // This function applies relocations to sections without SHF_ALLOC bit. 621 // Such sections are never mapped to memory at runtime. Debug sections are 622 // an example. Relocations in non-alloc sections are much easier to 623 // handle than in allocated sections because it will never need complex 624 // treatement such as GOT or PLT (because at runtime no one refers them). 625 // So, we handle relocations for non-alloc sections directly in this 626 // function as a performance optimization. 627 template <class ELFT, class RelTy> 628 void InputSection::relocateNonAlloc(uint8_t *Buf, ArrayRef<RelTy> Rels) { 629 const unsigned Bits = sizeof(typename ELFT::uint) * 8; 630 631 for (const RelTy &Rel : Rels) { 632 RelType Type = Rel.getType(Config->IsMips64EL); 633 uint64_t Offset = getOffset(Rel.r_offset); 634 uint8_t *BufLoc = Buf + Offset; 635 int64_t Addend = getAddend<ELFT>(Rel); 636 if (!RelTy::IsRela) 637 Addend += Target->getImplicitAddend(BufLoc, Type); 638 639 Symbol &Sym = getFile<ELFT>()->getRelocTargetSym(Rel); 640 RelExpr Expr = Target->getRelExpr(Type, Sym, BufLoc); 641 if (Expr == R_NONE) 642 continue; 643 if (Expr != R_ABS) { 644 // GCC 8.0 or earlier have a bug that it emits R_386_GOTPC relocations 645 // against _GLOBAL_OFFSET_TABLE for .debug_info. The bug seems to have 646 // been fixed in 2017: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82630, 647 // but we need to keep this bug-compatible code for a while. 648 if (Config->EMachine == EM_386 && Type == R_386_GOTPC) 649 continue; 650 651 error(getLocation<ELFT>(Offset) + ": has non-ABS relocation " + 652 toString(Type) + " against symbol '" + toString(Sym) + "'"); 653 return; 654 } 655 656 if (Sym.isTls() && !Out::TlsPhdr) 657 Target->relocateOne(BufLoc, Type, 0); 658 else 659 Target->relocateOne(BufLoc, Type, SignExtend64<Bits>(Sym.getVA(Addend))); 660 } 661 } 662 663 template <class ELFT> 664 void InputSectionBase::relocate(uint8_t *Buf, uint8_t *BufEnd) { 665 if (Flags & SHF_ALLOC) { 666 relocateAlloc(Buf, BufEnd); 667 return; 668 } 669 670 auto *Sec = cast<InputSection>(this); 671 if (Sec->AreRelocsRela) 672 Sec->relocateNonAlloc<ELFT>(Buf, Sec->template relas<ELFT>()); 673 else 674 Sec->relocateNonAlloc<ELFT>(Buf, Sec->template rels<ELFT>()); 675 } 676 677 void InputSectionBase::relocateAlloc(uint8_t *Buf, uint8_t *BufEnd) { 678 assert(Flags & SHF_ALLOC); 679 const unsigned Bits = Config->Wordsize * 8; 680 681 for (const Relocation &Rel : Relocations) { 682 uint64_t Offset = getOffset(Rel.Offset); 683 uint8_t *BufLoc = Buf + Offset; 684 RelType Type = Rel.Type; 685 686 uint64_t AddrLoc = getOutputSection()->Addr + Offset; 687 RelExpr Expr = Rel.Expr; 688 uint64_t TargetVA = SignExtend64( 689 getRelocTargetVA(Type, Rel.Addend, AddrLoc, *Rel.Sym, Expr), Bits); 690 691 switch (Expr) { 692 case R_RELAX_GOT_PC: 693 case R_RELAX_GOT_PC_NOPIC: 694 Target->relaxGot(BufLoc, TargetVA); 695 break; 696 case R_RELAX_TLS_IE_TO_LE: 697 Target->relaxTlsIeToLe(BufLoc, Type, TargetVA); 698 break; 699 case R_RELAX_TLS_LD_TO_LE: 700 Target->relaxTlsLdToLe(BufLoc, Type, TargetVA); 701 break; 702 case R_RELAX_TLS_GD_TO_LE: 703 case R_RELAX_TLS_GD_TO_LE_NEG: 704 Target->relaxTlsGdToLe(BufLoc, Type, TargetVA); 705 break; 706 case R_RELAX_TLS_GD_TO_IE: 707 case R_RELAX_TLS_GD_TO_IE_ABS: 708 case R_RELAX_TLS_GD_TO_IE_PAGE_PC: 709 case R_RELAX_TLS_GD_TO_IE_END: 710 Target->relaxTlsGdToIe(BufLoc, Type, TargetVA); 711 break; 712 case R_PPC_PLT_OPD: 713 // Patch a nop (0x60000000) to a ld. 714 if (BufLoc + 8 <= BufEnd && read32be(BufLoc + 4) == 0x60000000) 715 write32be(BufLoc + 4, 0xe8410028); // ld %r2, 40(%r1) 716 LLVM_FALLTHROUGH; 717 default: 718 Target->relocateOne(BufLoc, Type, TargetVA); 719 break; 720 } 721 } 722 } 723 724 template <class ELFT> void InputSection::writeTo(uint8_t *Buf) { 725 if (Type == SHT_NOBITS) 726 return; 727 728 if (auto *S = dyn_cast<SyntheticSection>(this)) { 729 S->writeTo(Buf + OutSecOff); 730 return; 731 } 732 733 // If -r or --emit-relocs is given, then an InputSection 734 // may be a relocation section. 735 if (Type == SHT_RELA) { 736 copyRelocations<ELFT>(Buf + OutSecOff, getDataAs<typename ELFT::Rela>()); 737 return; 738 } 739 if (Type == SHT_REL) { 740 copyRelocations<ELFT>(Buf + OutSecOff, getDataAs<typename ELFT::Rel>()); 741 return; 742 } 743 744 // If -r is given, we may have a SHT_GROUP section. 745 if (Type == SHT_GROUP) { 746 copyShtGroup<ELFT>(Buf + OutSecOff); 747 return; 748 } 749 750 // Copy section contents from source object file to output file 751 // and then apply relocations. 752 memcpy(Buf + OutSecOff, Data.data(), Data.size()); 753 uint8_t *BufEnd = Buf + OutSecOff + Data.size(); 754 relocate<ELFT>(Buf, BufEnd); 755 } 756 757 void InputSection::replace(InputSection *Other) { 758 Alignment = std::max(Alignment, Other->Alignment); 759 Other->Repl = Repl; 760 Other->Live = false; 761 } 762 763 template <class ELFT> 764 EhInputSection::EhInputSection(ObjFile<ELFT> &F, 765 const typename ELFT::Shdr &Header, 766 StringRef Name) 767 : InputSectionBase(F, Header, Name, InputSectionBase::EHFrame) {} 768 769 SyntheticSection *EhInputSection::getParent() const { 770 return cast_or_null<SyntheticSection>(Parent); 771 } 772 773 // Returns the index of the first relocation that points to a region between 774 // Begin and Begin+Size. 775 template <class IntTy, class RelTy> 776 static unsigned getReloc(IntTy Begin, IntTy Size, const ArrayRef<RelTy> &Rels, 777 unsigned &RelocI) { 778 // Start search from RelocI for fast access. That works because the 779 // relocations are sorted in .eh_frame. 780 for (unsigned N = Rels.size(); RelocI < N; ++RelocI) { 781 const RelTy &Rel = Rels[RelocI]; 782 if (Rel.r_offset < Begin) 783 continue; 784 785 if (Rel.r_offset < Begin + Size) 786 return RelocI; 787 return -1; 788 } 789 return -1; 790 } 791 792 // .eh_frame is a sequence of CIE or FDE records. 793 // This function splits an input section into records and returns them. 794 template <class ELFT> void EhInputSection::split() { 795 // Early exit if already split. 796 if (!Pieces.empty()) 797 return; 798 799 if (AreRelocsRela) 800 split<ELFT>(relas<ELFT>()); 801 else 802 split<ELFT>(rels<ELFT>()); 803 } 804 805 template <class ELFT, class RelTy> 806 void EhInputSection::split(ArrayRef<RelTy> Rels) { 807 unsigned RelI = 0; 808 for (size_t Off = 0, End = Data.size(); Off != End;) { 809 size_t Size = readEhRecordSize(this, Off); 810 Pieces.emplace_back(Off, this, Size, getReloc(Off, Size, Rels, RelI)); 811 // The empty record is the end marker. 812 if (Size == 4) 813 break; 814 Off += Size; 815 } 816 } 817 818 static size_t findNull(StringRef S, size_t EntSize) { 819 // Optimize the common case. 820 if (EntSize == 1) 821 return S.find(0); 822 823 for (unsigned I = 0, N = S.size(); I != N; I += EntSize) { 824 const char *B = S.begin() + I; 825 if (std::all_of(B, B + EntSize, [](char C) { return C == 0; })) 826 return I; 827 } 828 return StringRef::npos; 829 } 830 831 SyntheticSection *MergeInputSection::getParent() const { 832 return cast_or_null<SyntheticSection>(Parent); 833 } 834 835 // Split SHF_STRINGS section. Such section is a sequence of 836 // null-terminated strings. 837 void MergeInputSection::splitStrings(ArrayRef<uint8_t> Data, size_t EntSize) { 838 size_t Off = 0; 839 bool IsAlloc = Flags & SHF_ALLOC; 840 StringRef S = toStringRef(Data); 841 842 while (!S.empty()) { 843 size_t End = findNull(S, EntSize); 844 if (End == StringRef::npos) 845 fatal(toString(this) + ": string is not null terminated"); 846 size_t Size = End + EntSize; 847 848 Pieces.emplace_back(Off, xxHash64(S.substr(0, Size)), !IsAlloc); 849 S = S.substr(Size); 850 Off += Size; 851 } 852 } 853 854 // Split non-SHF_STRINGS section. Such section is a sequence of 855 // fixed size records. 856 void MergeInputSection::splitNonStrings(ArrayRef<uint8_t> Data, 857 size_t EntSize) { 858 size_t Size = Data.size(); 859 assert((Size % EntSize) == 0); 860 bool IsAlloc = Flags & SHF_ALLOC; 861 862 for (size_t I = 0; I != Size; I += EntSize) 863 Pieces.emplace_back(I, xxHash64(toStringRef(Data.slice(I, EntSize))), 864 !IsAlloc); 865 } 866 867 template <class ELFT> 868 MergeInputSection::MergeInputSection(ObjFile<ELFT> &F, 869 const typename ELFT::Shdr &Header, 870 StringRef Name) 871 : InputSectionBase(F, Header, Name, InputSectionBase::Merge) {} 872 873 MergeInputSection::MergeInputSection(uint64_t Flags, uint32_t Type, 874 uint64_t Entsize, ArrayRef<uint8_t> Data, 875 StringRef Name) 876 : InputSectionBase(nullptr, Flags, Type, Entsize, /*Link*/ 0, /*Info*/ 0, 877 /*Alignment*/ Entsize, Data, Name, SectionBase::Merge) {} 878 879 // This function is called after we obtain a complete list of input sections 880 // that need to be linked. This is responsible to split section contents 881 // into small chunks for further processing. 882 // 883 // Note that this function is called from parallelForEach. This must be 884 // thread-safe (i.e. no memory allocation from the pools). 885 void MergeInputSection::splitIntoPieces() { 886 assert(Pieces.empty()); 887 888 if (Flags & SHF_STRINGS) 889 splitStrings(Data, Entsize); 890 else 891 splitNonStrings(Data, Entsize); 892 893 if (Config->GcSections && (Flags & SHF_ALLOC)) 894 for (uint64_t Off : LiveOffsets) 895 getSectionPiece(Off)->Live = true; 896 } 897 898 // Do binary search to get a section piece at a given input offset. 899 SectionPiece *MergeInputSection::getSectionPiece(uint64_t Offset) { 900 auto *This = static_cast<const MergeInputSection *>(this); 901 return const_cast<SectionPiece *>(This->getSectionPiece(Offset)); 902 } 903 904 template <class It, class T, class Compare> 905 static It fastUpperBound(It First, It Last, const T &Value, Compare Comp) { 906 size_t Size = std::distance(First, Last); 907 assert(Size != 0); 908 while (Size != 1) { 909 size_t H = Size / 2; 910 const It MI = First + H; 911 Size -= H; 912 First = Comp(Value, *MI) ? First : First + H; 913 } 914 return Comp(Value, *First) ? First : First + 1; 915 } 916 917 const SectionPiece *MergeInputSection::getSectionPiece(uint64_t Offset) const { 918 if (Data.size() <= Offset) 919 fatal(toString(this) + ": entry is past the end of the section"); 920 921 // Find the element this offset points to. 922 auto I = fastUpperBound( 923 Pieces.begin(), Pieces.end(), Offset, 924 [](const uint64_t &A, const SectionPiece &B) { return A < B.InputOff; }); 925 --I; 926 return &*I; 927 } 928 929 // Returns the offset in an output section for a given input offset. 930 // Because contents of a mergeable section is not contiguous in output, 931 // it is not just an addition to a base output offset. 932 uint64_t MergeInputSection::getOffset(uint64_t Offset) const { 933 if (!Live) 934 return 0; 935 936 // Initialize OffsetMap lazily. 937 llvm::call_once(InitOffsetMap, [&] { 938 OffsetMap.reserve(Pieces.size()); 939 for (size_t I = 0; I < Pieces.size(); ++I) 940 OffsetMap[Pieces[I].InputOff] = I; 941 }); 942 943 // Find a string starting at a given offset. 944 auto It = OffsetMap.find(Offset); 945 if (It != OffsetMap.end()) 946 return Pieces[It->second].OutputOff; 947 948 // If Offset is not at beginning of a section piece, it is not in the map. 949 // In that case we need to search from the original section piece vector. 950 const SectionPiece &Piece = *getSectionPiece(Offset); 951 if (!Piece.Live) 952 return 0; 953 954 uint64_t Addend = Offset - Piece.InputOff; 955 return Piece.OutputOff + Addend; 956 } 957 958 template InputSection::InputSection(ObjFile<ELF32LE> &, const ELF32LE::Shdr &, 959 StringRef); 960 template InputSection::InputSection(ObjFile<ELF32BE> &, const ELF32BE::Shdr &, 961 StringRef); 962 template InputSection::InputSection(ObjFile<ELF64LE> &, const ELF64LE::Shdr &, 963 StringRef); 964 template InputSection::InputSection(ObjFile<ELF64BE> &, const ELF64BE::Shdr &, 965 StringRef); 966 967 template std::string InputSectionBase::getLocation<ELF32LE>(uint64_t); 968 template std::string InputSectionBase::getLocation<ELF32BE>(uint64_t); 969 template std::string InputSectionBase::getLocation<ELF64LE>(uint64_t); 970 template std::string InputSectionBase::getLocation<ELF64BE>(uint64_t); 971 972 template void InputSection::writeTo<ELF32LE>(uint8_t *); 973 template void InputSection::writeTo<ELF32BE>(uint8_t *); 974 template void InputSection::writeTo<ELF64LE>(uint8_t *); 975 template void InputSection::writeTo<ELF64BE>(uint8_t *); 976 977 template MergeInputSection::MergeInputSection(ObjFile<ELF32LE> &, 978 const ELF32LE::Shdr &, StringRef); 979 template MergeInputSection::MergeInputSection(ObjFile<ELF32BE> &, 980 const ELF32BE::Shdr &, StringRef); 981 template MergeInputSection::MergeInputSection(ObjFile<ELF64LE> &, 982 const ELF64LE::Shdr &, StringRef); 983 template MergeInputSection::MergeInputSection(ObjFile<ELF64BE> &, 984 const ELF64BE::Shdr &, StringRef); 985 986 template EhInputSection::EhInputSection(ObjFile<ELF32LE> &, 987 const ELF32LE::Shdr &, StringRef); 988 template EhInputSection::EhInputSection(ObjFile<ELF32BE> &, 989 const ELF32BE::Shdr &, StringRef); 990 template EhInputSection::EhInputSection(ObjFile<ELF64LE> &, 991 const ELF64LE::Shdr &, StringRef); 992 template EhInputSection::EhInputSection(ObjFile<ELF64BE> &, 993 const ELF64BE::Shdr &, StringRef); 994 995 template void EhInputSection::split<ELF32LE>(); 996 template void EhInputSection::split<ELF32BE>(); 997 template void EhInputSection::split<ELF64LE>(); 998 template void EhInputSection::split<ELF64BE>(); 999