1 //===- InputFiles.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputFiles.h" 10 #include "Driver.h" 11 #include "InputSection.h" 12 #include "LinkerScript.h" 13 #include "SymbolTable.h" 14 #include "Symbols.h" 15 #include "SyntheticSections.h" 16 #include "lld/Common/ErrorHandler.h" 17 #include "lld/Common/Memory.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/CodeGen/Analysis.h" 20 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/IR/Module.h" 23 #include "llvm/LTO/LTO.h" 24 #include "llvm/MC/StringTableBuilder.h" 25 #include "llvm/Object/ELFObjectFile.h" 26 #include "llvm/Support/ARMAttributeParser.h" 27 #include "llvm/Support/ARMBuildAttributes.h" 28 #include "llvm/Support/Path.h" 29 #include "llvm/Support/TarWriter.h" 30 #include "llvm/Support/raw_ostream.h" 31 32 using namespace llvm; 33 using namespace llvm::ELF; 34 using namespace llvm::object; 35 using namespace llvm::sys; 36 using namespace llvm::sys::fs; 37 38 using namespace lld; 39 using namespace lld::elf; 40 41 bool InputFile::IsInGroup; 42 uint32_t InputFile::NextGroupId; 43 std::vector<BinaryFile *> elf::BinaryFiles; 44 std::vector<BitcodeFile *> elf::BitcodeFiles; 45 std::vector<LazyObjFile *> elf::LazyObjFiles; 46 std::vector<InputFile *> elf::ObjectFiles; 47 std::vector<SharedFile *> elf::SharedFiles; 48 49 std::unique_ptr<TarWriter> elf::Tar; 50 51 static ELFKind getELFKind(MemoryBufferRef MB, StringRef ArchiveName) { 52 unsigned char Size; 53 unsigned char Endian; 54 std::tie(Size, Endian) = getElfArchType(MB.getBuffer()); 55 56 auto Fatal = [&](StringRef Msg) { 57 StringRef Filename = MB.getBufferIdentifier(); 58 if (ArchiveName.empty()) 59 fatal(Filename + ": " + Msg); 60 else 61 fatal(ArchiveName + "(" + Filename + "): " + Msg); 62 }; 63 64 if (!MB.getBuffer().startswith(ElfMagic)) 65 Fatal("not an ELF file"); 66 if (Endian != ELFDATA2LSB && Endian != ELFDATA2MSB) 67 Fatal("corrupted ELF file: invalid data encoding"); 68 if (Size != ELFCLASS32 && Size != ELFCLASS64) 69 Fatal("corrupted ELF file: invalid file class"); 70 71 size_t BufSize = MB.getBuffer().size(); 72 if ((Size == ELFCLASS32 && BufSize < sizeof(Elf32_Ehdr)) || 73 (Size == ELFCLASS64 && BufSize < sizeof(Elf64_Ehdr))) 74 Fatal("corrupted ELF file: file is too short"); 75 76 if (Size == ELFCLASS32) 77 return (Endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind; 78 return (Endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind; 79 } 80 81 InputFile::InputFile(Kind K, MemoryBufferRef M) 82 : MB(M), GroupId(NextGroupId), FileKind(K) { 83 // All files within the same --{start,end}-group get the same group ID. 84 // Otherwise, a new file will get a new group ID. 85 if (!IsInGroup) 86 ++NextGroupId; 87 } 88 89 Optional<MemoryBufferRef> elf::readFile(StringRef Path) { 90 // The --chroot option changes our virtual root directory. 91 // This is useful when you are dealing with files created by --reproduce. 92 if (!Config->Chroot.empty() && Path.startswith("/")) 93 Path = Saver.save(Config->Chroot + Path); 94 95 log(Path); 96 97 auto MBOrErr = MemoryBuffer::getFile(Path, -1, false); 98 if (auto EC = MBOrErr.getError()) { 99 error("cannot open " + Path + ": " + EC.message()); 100 return None; 101 } 102 103 std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; 104 MemoryBufferRef MBRef = MB->getMemBufferRef(); 105 make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership 106 107 if (Tar) 108 Tar->append(relativeToRoot(Path), MBRef.getBuffer()); 109 return MBRef; 110 } 111 112 // All input object files must be for the same architecture 113 // (e.g. it does not make sense to link x86 object files with 114 // MIPS object files.) This function checks for that error. 115 static bool isCompatible(InputFile *File) { 116 if (!File->isElf() && !isa<BitcodeFile>(File)) 117 return true; 118 119 if (File->EKind == Config->EKind && File->EMachine == Config->EMachine) { 120 if (Config->EMachine != EM_MIPS) 121 return true; 122 if (isMipsN32Abi(File) == Config->MipsN32Abi) 123 return true; 124 } 125 126 if (!Config->Emulation.empty()) { 127 error(toString(File) + " is incompatible with " + Config->Emulation); 128 } else { 129 InputFile *Existing; 130 if (!ObjectFiles.empty()) 131 Existing = ObjectFiles[0]; 132 else if (!SharedFiles.empty()) 133 Existing = SharedFiles[0]; 134 else 135 Existing = BitcodeFiles[0]; 136 137 error(toString(File) + " is incompatible with " + toString(Existing)); 138 } 139 140 return false; 141 } 142 143 template <class ELFT> static void doParseFile(InputFile *File) { 144 if (!isCompatible(File)) 145 return; 146 147 // Binary file 148 if (auto *F = dyn_cast<BinaryFile>(File)) { 149 BinaryFiles.push_back(F); 150 F->parse(); 151 return; 152 } 153 154 // .a file 155 if (auto *F = dyn_cast<ArchiveFile>(File)) { 156 F->parse(); 157 return; 158 } 159 160 // Lazy object file 161 if (auto *F = dyn_cast<LazyObjFile>(File)) { 162 LazyObjFiles.push_back(F); 163 F->parse<ELFT>(); 164 return; 165 } 166 167 if (Config->Trace) 168 message(toString(File)); 169 170 // .so file 171 if (auto *F = dyn_cast<SharedFile>(File)) { 172 F->parse<ELFT>(); 173 return; 174 } 175 176 // LLVM bitcode file 177 if (auto *F = dyn_cast<BitcodeFile>(File)) { 178 BitcodeFiles.push_back(F); 179 F->parse<ELFT>(Symtab->ComdatGroups); 180 return; 181 } 182 183 // Regular object file 184 ObjectFiles.push_back(File); 185 cast<ObjFile<ELFT>>(File)->parse(Symtab->ComdatGroups); 186 } 187 188 // Add symbols in File to the symbol table. 189 void elf::parseFile(InputFile *File) { 190 switch (Config->EKind) { 191 case ELF32LEKind: 192 doParseFile<ELF32LE>(File); 193 return; 194 case ELF32BEKind: 195 doParseFile<ELF32BE>(File); 196 return; 197 case ELF64LEKind: 198 doParseFile<ELF64LE>(File); 199 return; 200 case ELF64BEKind: 201 doParseFile<ELF64BE>(File); 202 return; 203 default: 204 llvm_unreachable("unknown ELFT"); 205 } 206 } 207 208 // Concatenates arguments to construct a string representing an error location. 209 static std::string createFileLineMsg(StringRef Path, unsigned Line) { 210 std::string Filename = path::filename(Path); 211 std::string Lineno = ":" + std::to_string(Line); 212 if (Filename == Path) 213 return Filename + Lineno; 214 return Filename + Lineno + " (" + Path.str() + Lineno + ")"; 215 } 216 217 template <class ELFT> 218 static std::string getSrcMsgAux(ObjFile<ELFT> &File, const Symbol &Sym, 219 InputSectionBase &Sec, uint64_t Offset) { 220 // In DWARF, functions and variables are stored to different places. 221 // First, lookup a function for a given offset. 222 if (Optional<DILineInfo> Info = File.getDILineInfo(&Sec, Offset)) 223 return createFileLineMsg(Info->FileName, Info->Line); 224 225 // If it failed, lookup again as a variable. 226 if (Optional<std::pair<std::string, unsigned>> FileLine = 227 File.getVariableLoc(Sym.getName())) 228 return createFileLineMsg(FileLine->first, FileLine->second); 229 230 // File.SourceFile contains STT_FILE symbol, and that is a last resort. 231 return File.SourceFile; 232 } 233 234 std::string InputFile::getSrcMsg(const Symbol &Sym, InputSectionBase &Sec, 235 uint64_t Offset) { 236 if (kind() != ObjKind) 237 return ""; 238 switch (Config->EKind) { 239 default: 240 llvm_unreachable("Invalid kind"); 241 case ELF32LEKind: 242 return getSrcMsgAux(cast<ObjFile<ELF32LE>>(*this), Sym, Sec, Offset); 243 case ELF32BEKind: 244 return getSrcMsgAux(cast<ObjFile<ELF32BE>>(*this), Sym, Sec, Offset); 245 case ELF64LEKind: 246 return getSrcMsgAux(cast<ObjFile<ELF64LE>>(*this), Sym, Sec, Offset); 247 case ELF64BEKind: 248 return getSrcMsgAux(cast<ObjFile<ELF64BE>>(*this), Sym, Sec, Offset); 249 } 250 } 251 252 template <class ELFT> void ObjFile<ELFT>::initializeDwarf() { 253 Dwarf = llvm::make_unique<DWARFContext>(make_unique<LLDDwarfObj<ELFT>>(this)); 254 for (std::unique_ptr<DWARFUnit> &CU : Dwarf->compile_units()) { 255 auto Report = [](Error Err) { 256 handleAllErrors(std::move(Err), 257 [](ErrorInfoBase &Info) { warn(Info.message()); }); 258 }; 259 Expected<const DWARFDebugLine::LineTable *> ExpectedLT = 260 Dwarf->getLineTableForUnit(CU.get(), Report); 261 const DWARFDebugLine::LineTable *LT = nullptr; 262 if (ExpectedLT) 263 LT = *ExpectedLT; 264 else 265 Report(ExpectedLT.takeError()); 266 if (!LT) 267 continue; 268 LineTables.push_back(LT); 269 270 // Loop over variable records and insert them to VariableLoc. 271 for (const auto &Entry : CU->dies()) { 272 DWARFDie Die(CU.get(), &Entry); 273 // Skip all tags that are not variables. 274 if (Die.getTag() != dwarf::DW_TAG_variable) 275 continue; 276 277 // Skip if a local variable because we don't need them for generating 278 // error messages. In general, only non-local symbols can fail to be 279 // linked. 280 if (!dwarf::toUnsigned(Die.find(dwarf::DW_AT_external), 0)) 281 continue; 282 283 // Get the source filename index for the variable. 284 unsigned File = dwarf::toUnsigned(Die.find(dwarf::DW_AT_decl_file), 0); 285 if (!LT->hasFileAtIndex(File)) 286 continue; 287 288 // Get the line number on which the variable is declared. 289 unsigned Line = dwarf::toUnsigned(Die.find(dwarf::DW_AT_decl_line), 0); 290 291 // Here we want to take the variable name to add it into VariableLoc. 292 // Variable can have regular and linkage name associated. At first, we try 293 // to get linkage name as it can be different, for example when we have 294 // two variables in different namespaces of the same object. Use common 295 // name otherwise, but handle the case when it also absent in case if the 296 // input object file lacks some debug info. 297 StringRef Name = 298 dwarf::toString(Die.find(dwarf::DW_AT_linkage_name), 299 dwarf::toString(Die.find(dwarf::DW_AT_name), "")); 300 if (!Name.empty()) 301 VariableLoc.insert({Name, {LT, File, Line}}); 302 } 303 } 304 } 305 306 // Returns the pair of file name and line number describing location of data 307 // object (variable, array, etc) definition. 308 template <class ELFT> 309 Optional<std::pair<std::string, unsigned>> 310 ObjFile<ELFT>::getVariableLoc(StringRef Name) { 311 llvm::call_once(InitDwarfLine, [this]() { initializeDwarf(); }); 312 313 // Return if we have no debug information about data object. 314 auto It = VariableLoc.find(Name); 315 if (It == VariableLoc.end()) 316 return None; 317 318 // Take file name string from line table. 319 std::string FileName; 320 if (!It->second.LT->getFileNameByIndex( 321 It->second.File, nullptr, 322 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FileName)) 323 return None; 324 325 return std::make_pair(FileName, It->second.Line); 326 } 327 328 // Returns source line information for a given offset 329 // using DWARF debug info. 330 template <class ELFT> 331 Optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *S, 332 uint64_t Offset) { 333 llvm::call_once(InitDwarfLine, [this]() { initializeDwarf(); }); 334 335 // Detect SectionIndex for specified section. 336 uint64_t SectionIndex = object::SectionedAddress::UndefSection; 337 ArrayRef<InputSectionBase *> Sections = S->File->getSections(); 338 for (uint64_t CurIndex = 0; CurIndex < Sections.size(); ++CurIndex) { 339 if (S == Sections[CurIndex]) { 340 SectionIndex = CurIndex; 341 break; 342 } 343 } 344 345 // Use fake address calcuated by adding section file offset and offset in 346 // section. See comments for ObjectInfo class. 347 DILineInfo Info; 348 for (const llvm::DWARFDebugLine::LineTable *LT : LineTables) { 349 if (LT->getFileLineInfoForAddress( 350 {S->getOffsetInFile() + Offset, SectionIndex}, nullptr, 351 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, Info)) 352 return Info; 353 } 354 return None; 355 } 356 357 // Returns "<internal>", "foo.a(bar.o)" or "baz.o". 358 std::string lld::toString(const InputFile *F) { 359 if (!F) 360 return "<internal>"; 361 362 if (F->ToStringCache.empty()) { 363 if (F->ArchiveName.empty()) 364 F->ToStringCache = F->getName(); 365 else 366 F->ToStringCache = (F->ArchiveName + "(" + F->getName() + ")").str(); 367 } 368 return F->ToStringCache; 369 } 370 371 ELFFileBase::ELFFileBase(Kind K, MemoryBufferRef MB) : InputFile(K, MB) {} 372 373 template <class ELFT> void ELFFileBase::parseHeader() { 374 if (ELFT::TargetEndianness == support::little) 375 EKind = ELFT::Is64Bits ? ELF64LEKind : ELF32LEKind; 376 else 377 EKind = ELFT::Is64Bits ? ELF64BEKind : ELF32BEKind; 378 379 EMachine = getObj<ELFT>().getHeader()->e_machine; 380 OSABI = getObj<ELFT>().getHeader()->e_ident[llvm::ELF::EI_OSABI]; 381 ABIVersion = getObj<ELFT>().getHeader()->e_ident[llvm::ELF::EI_ABIVERSION]; 382 } 383 384 template <class ELFT> 385 void ELFFileBase::initSymtab(ArrayRef<typename ELFT::Shdr> Sections, 386 const typename ELFT::Shdr *Symtab) { 387 FirstGlobal = Symtab->sh_info; 388 ArrayRef<typename ELFT::Sym> ELFSyms = 389 CHECK(getObj<ELFT>().symbols(Symtab), this); 390 if (FirstGlobal == 0 || FirstGlobal > ELFSyms.size()) 391 fatal(toString(this) + ": invalid sh_info in symbol table"); 392 this->ELFSyms = reinterpret_cast<const void *>(ELFSyms.data()); 393 this->NumELFSyms = ELFSyms.size(); 394 395 StringTable = 396 CHECK(getObj<ELFT>().getStringTableForSymtab(*Symtab, Sections), this); 397 } 398 399 template <class ELFT> 400 ObjFile<ELFT>::ObjFile(MemoryBufferRef M, StringRef ArchiveName) 401 : ELFFileBase(ObjKind, M) { 402 parseHeader<ELFT>(); 403 this->ArchiveName = ArchiveName; 404 } 405 406 template <class ELFT> 407 uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &Sym) const { 408 return CHECK( 409 this->getObj().getSectionIndex(&Sym, getELFSyms<ELFT>(), ShndxTable), 410 this); 411 } 412 413 template <class ELFT> ArrayRef<Symbol *> ObjFile<ELFT>::getLocalSymbols() { 414 if (this->Symbols.empty()) 415 return {}; 416 return makeArrayRef(this->Symbols).slice(1, this->FirstGlobal - 1); 417 } 418 419 template <class ELFT> ArrayRef<Symbol *> ObjFile<ELFT>::getGlobalSymbols() { 420 return makeArrayRef(this->Symbols).slice(this->FirstGlobal); 421 } 422 423 template <class ELFT> 424 void ObjFile<ELFT>::parse( 425 DenseMap<CachedHashStringRef, const InputFile *> &ComdatGroups) { 426 // Read a section table. JustSymbols is usually false. 427 if (this->JustSymbols) 428 initializeJustSymbols(); 429 else 430 initializeSections(ComdatGroups); 431 432 // Read a symbol table. 433 initializeSymbols(); 434 } 435 436 // Sections with SHT_GROUP and comdat bits define comdat section groups. 437 // They are identified and deduplicated by group name. This function 438 // returns a group name. 439 template <class ELFT> 440 StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> Sections, 441 const Elf_Shdr &Sec) { 442 // Group signatures are stored as symbol names in object files. 443 // sh_info contains a symbol index, so we fetch a symbol and read its name. 444 if (this->getELFSyms<ELFT>().empty()) 445 this->initSymtab<ELFT>( 446 Sections, CHECK(object::getSection<ELFT>(Sections, Sec.sh_link), this)); 447 448 const Elf_Sym *Sym = 449 CHECK(object::getSymbol<ELFT>(this->getELFSyms<ELFT>(), Sec.sh_info), this); 450 StringRef Signature = CHECK(Sym->getName(this->StringTable), this); 451 452 // As a special case, if a symbol is a section symbol and has no name, 453 // we use a section name as a signature. 454 // 455 // Such SHT_GROUP sections are invalid from the perspective of the ELF 456 // standard, but GNU gold 1.14 (the newest version as of July 2017) or 457 // older produce such sections as outputs for the -r option, so we need 458 // a bug-compatibility. 459 if (Signature.empty() && Sym->getType() == STT_SECTION) 460 return getSectionName(Sec); 461 return Signature; 462 } 463 464 template <class ELFT> bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) { 465 // On a regular link we don't merge sections if -O0 (default is -O1). This 466 // sometimes makes the linker significantly faster, although the output will 467 // be bigger. 468 // 469 // Doing the same for -r would create a problem as it would combine sections 470 // with different sh_entsize. One option would be to just copy every SHF_MERGE 471 // section as is to the output. While this would produce a valid ELF file with 472 // usable SHF_MERGE sections, tools like (llvm-)?dwarfdump get confused when 473 // they see two .debug_str. We could have separate logic for combining 474 // SHF_MERGE sections based both on their name and sh_entsize, but that seems 475 // to be more trouble than it is worth. Instead, we just use the regular (-O1) 476 // logic for -r. 477 if (Config->Optimize == 0 && !Config->Relocatable) 478 return false; 479 480 // A mergeable section with size 0 is useless because they don't have 481 // any data to merge. A mergeable string section with size 0 can be 482 // argued as invalid because it doesn't end with a null character. 483 // We'll avoid a mess by handling them as if they were non-mergeable. 484 if (Sec.sh_size == 0) 485 return false; 486 487 // Check for sh_entsize. The ELF spec is not clear about the zero 488 // sh_entsize. It says that "the member [sh_entsize] contains 0 if 489 // the section does not hold a table of fixed-size entries". We know 490 // that Rust 1.13 produces a string mergeable section with a zero 491 // sh_entsize. Here we just accept it rather than being picky about it. 492 uint64_t EntSize = Sec.sh_entsize; 493 if (EntSize == 0) 494 return false; 495 if (Sec.sh_size % EntSize) 496 fatal(toString(this) + 497 ": SHF_MERGE section size must be a multiple of sh_entsize"); 498 499 uint64_t Flags = Sec.sh_flags; 500 if (!(Flags & SHF_MERGE)) 501 return false; 502 if (Flags & SHF_WRITE) 503 fatal(toString(this) + ": writable SHF_MERGE section is not supported"); 504 505 return true; 506 } 507 508 // This is for --just-symbols. 509 // 510 // --just-symbols is a very minor feature that allows you to link your 511 // output against other existing program, so that if you load both your 512 // program and the other program into memory, your output can refer the 513 // other program's symbols. 514 // 515 // When the option is given, we link "just symbols". The section table is 516 // initialized with null pointers. 517 template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() { 518 ArrayRef<Elf_Shdr> ObjSections = CHECK(this->getObj().sections(), this); 519 this->Sections.resize(ObjSections.size()); 520 521 for (const Elf_Shdr &Sec : ObjSections) { 522 if (Sec.sh_type != SHT_SYMTAB) 523 continue; 524 this->initSymtab<ELFT>(ObjSections, &Sec); 525 return; 526 } 527 } 528 529 // An ELF object file may contain a `.deplibs` section. If it exists, the 530 // section contains a list of library specifiers such as `m` for libm. This 531 // function resolves a given name by finding the first matching library checking 532 // the various ways that a library can be specified to LLD. This ELF extension 533 // is a form of autolinking and is called `dependent libraries`. It is currently 534 // unique to LLVM and lld. 535 static void addDependentLibrary(StringRef Specifier, const InputFile *F) { 536 if (!Config->DependentLibraries) 537 return; 538 if (fs::exists(Specifier)) 539 Driver->addFile(Specifier, /*WithLOption=*/false); 540 else if (Optional<std::string> S = findFromSearchPaths(Specifier)) 541 Driver->addFile(*S, /*WithLOption=*/true); 542 else if (Optional<std::string> S = searchLibraryBaseName(Specifier)) 543 Driver->addFile(*S, /*WithLOption=*/true); 544 else 545 error(toString(F) + 546 ": unable to find library from dependent library specifier: " + 547 Specifier); 548 } 549 550 template <class ELFT> 551 void ObjFile<ELFT>::initializeSections( 552 DenseMap<CachedHashStringRef, const InputFile *> &ComdatGroups) { 553 const ELFFile<ELFT> &Obj = this->getObj(); 554 555 ArrayRef<Elf_Shdr> ObjSections = CHECK(Obj.sections(), this); 556 uint64_t Size = ObjSections.size(); 557 this->Sections.resize(Size); 558 this->SectionStringTable = 559 CHECK(Obj.getSectionStringTable(ObjSections), this); 560 561 for (size_t I = 0, E = ObjSections.size(); I < E; I++) { 562 if (this->Sections[I] == &InputSection::Discarded) 563 continue; 564 const Elf_Shdr &Sec = ObjSections[I]; 565 566 if (Sec.sh_type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE) 567 CGProfile = 568 check(Obj.template getSectionContentsAsArray<Elf_CGProfile>(&Sec)); 569 570 // SHF_EXCLUDE'ed sections are discarded by the linker. However, 571 // if -r is given, we'll let the final link discard such sections. 572 // This is compatible with GNU. 573 if ((Sec.sh_flags & SHF_EXCLUDE) && !Config->Relocatable) { 574 if (Sec.sh_type == SHT_LLVM_ADDRSIG) { 575 // We ignore the address-significance table if we know that the object 576 // file was created by objcopy or ld -r. This is because these tools 577 // will reorder the symbols in the symbol table, invalidating the data 578 // in the address-significance table, which refers to symbols by index. 579 if (Sec.sh_link != 0) 580 this->AddrsigSec = &Sec; 581 else if (Config->ICF == ICFLevel::Safe) 582 warn(toString(this) + ": --icf=safe is incompatible with object " 583 "files created using objcopy or ld -r"); 584 } 585 this->Sections[I] = &InputSection::Discarded; 586 continue; 587 } 588 589 switch (Sec.sh_type) { 590 case SHT_GROUP: { 591 // De-duplicate section groups by their signatures. 592 StringRef Signature = getShtGroupSignature(ObjSections, Sec); 593 this->Sections[I] = &InputSection::Discarded; 594 595 596 ArrayRef<Elf_Word> Entries = 597 CHECK(Obj.template getSectionContentsAsArray<Elf_Word>(&Sec), this); 598 if (Entries.empty()) 599 fatal(toString(this) + ": empty SHT_GROUP"); 600 601 // The first word of a SHT_GROUP section contains flags. Currently, 602 // the standard defines only "GRP_COMDAT" flag for the COMDAT group. 603 // An group with the empty flag doesn't define anything; such sections 604 // are just skipped. 605 if (Entries[0] == 0) 606 continue; 607 608 if (Entries[0] != GRP_COMDAT) 609 fatal(toString(this) + ": unsupported SHT_GROUP format"); 610 611 bool IsNew = 612 ComdatGroups.try_emplace(CachedHashStringRef(Signature), this).second; 613 if (IsNew) { 614 if (Config->Relocatable) 615 this->Sections[I] = createInputSection(Sec); 616 continue; 617 } 618 619 // Otherwise, discard group members. 620 for (uint32_t SecIndex : Entries.slice(1)) { 621 if (SecIndex >= Size) 622 fatal(toString(this) + 623 ": invalid section index in group: " + Twine(SecIndex)); 624 this->Sections[SecIndex] = &InputSection::Discarded; 625 } 626 break; 627 } 628 case SHT_SYMTAB: 629 this->initSymtab<ELFT>(ObjSections, &Sec); 630 break; 631 case SHT_SYMTAB_SHNDX: 632 ShndxTable = CHECK(Obj.getSHNDXTable(Sec, ObjSections), this); 633 break; 634 case SHT_STRTAB: 635 case SHT_NULL: 636 break; 637 default: 638 this->Sections[I] = createInputSection(Sec); 639 } 640 641 // .ARM.exidx sections have a reverse dependency on the InputSection they 642 // have a SHF_LINK_ORDER dependency, this is identified by the sh_link. 643 if (Sec.sh_flags & SHF_LINK_ORDER) { 644 InputSectionBase *LinkSec = nullptr; 645 if (Sec.sh_link < this->Sections.size()) 646 LinkSec = this->Sections[Sec.sh_link]; 647 if (!LinkSec) 648 fatal(toString(this) + 649 ": invalid sh_link index: " + Twine(Sec.sh_link)); 650 651 InputSection *IS = cast<InputSection>(this->Sections[I]); 652 LinkSec->DependentSections.push_back(IS); 653 if (!isa<InputSection>(LinkSec)) 654 error("a section " + IS->Name + 655 " with SHF_LINK_ORDER should not refer a non-regular " 656 "section: " + 657 toString(LinkSec)); 658 } 659 } 660 } 661 662 // For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD 663 // flag in the ELF Header we need to look at Tag_ABI_VFP_args to find out how 664 // the input objects have been compiled. 665 static void updateARMVFPArgs(const ARMAttributeParser &Attributes, 666 const InputFile *F) { 667 if (!Attributes.hasAttribute(ARMBuildAttrs::ABI_VFP_args)) 668 // If an ABI tag isn't present then it is implicitly given the value of 0 669 // which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files, 670 // including some in glibc that don't use FP args (and should have value 3) 671 // don't have the attribute so we do not consider an implicit value of 0 672 // as a clash. 673 return; 674 675 unsigned VFPArgs = Attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args); 676 ARMVFPArgKind Arg; 677 switch (VFPArgs) { 678 case ARMBuildAttrs::BaseAAPCS: 679 Arg = ARMVFPArgKind::Base; 680 break; 681 case ARMBuildAttrs::HardFPAAPCS: 682 Arg = ARMVFPArgKind::VFP; 683 break; 684 case ARMBuildAttrs::ToolChainFPPCS: 685 // Tool chain specific convention that conforms to neither AAPCS variant. 686 Arg = ARMVFPArgKind::ToolChain; 687 break; 688 case ARMBuildAttrs::CompatibleFPAAPCS: 689 // Object compatible with all conventions. 690 return; 691 default: 692 error(toString(F) + ": unknown Tag_ABI_VFP_args value: " + Twine(VFPArgs)); 693 return; 694 } 695 // Follow ld.bfd and error if there is a mix of calling conventions. 696 if (Config->ARMVFPArgs != Arg && Config->ARMVFPArgs != ARMVFPArgKind::Default) 697 error(toString(F) + ": incompatible Tag_ABI_VFP_args"); 698 else 699 Config->ARMVFPArgs = Arg; 700 } 701 702 // The ARM support in lld makes some use of instructions that are not available 703 // on all ARM architectures. Namely: 704 // - Use of BLX instruction for interworking between ARM and Thumb state. 705 // - Use of the extended Thumb branch encoding in relocation. 706 // - Use of the MOVT/MOVW instructions in Thumb Thunks. 707 // The ARM Attributes section contains information about the architecture chosen 708 // at compile time. We follow the convention that if at least one input object 709 // is compiled with an architecture that supports these features then lld is 710 // permitted to use them. 711 static void updateSupportedARMFeatures(const ARMAttributeParser &Attributes) { 712 if (!Attributes.hasAttribute(ARMBuildAttrs::CPU_arch)) 713 return; 714 auto Arch = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); 715 switch (Arch) { 716 case ARMBuildAttrs::Pre_v4: 717 case ARMBuildAttrs::v4: 718 case ARMBuildAttrs::v4T: 719 // Architectures prior to v5 do not support BLX instruction 720 break; 721 case ARMBuildAttrs::v5T: 722 case ARMBuildAttrs::v5TE: 723 case ARMBuildAttrs::v5TEJ: 724 case ARMBuildAttrs::v6: 725 case ARMBuildAttrs::v6KZ: 726 case ARMBuildAttrs::v6K: 727 Config->ARMHasBlx = true; 728 // Architectures used in pre-Cortex processors do not support 729 // The J1 = 1 J2 = 1 Thumb branch range extension, with the exception 730 // of Architecture v6T2 (arm1156t2-s and arm1156t2f-s) that do. 731 break; 732 default: 733 // All other Architectures have BLX and extended branch encoding 734 Config->ARMHasBlx = true; 735 Config->ARMJ1J2BranchEncoding = true; 736 if (Arch != ARMBuildAttrs::v6_M && Arch != ARMBuildAttrs::v6S_M) 737 // All Architectures used in Cortex processors with the exception 738 // of v6-M and v6S-M have the MOVT and MOVW instructions. 739 Config->ARMHasMovtMovw = true; 740 break; 741 } 742 } 743 744 template <class ELFT> 745 InputSectionBase *ObjFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) { 746 uint32_t Idx = Sec.sh_info; 747 if (Idx >= this->Sections.size()) 748 fatal(toString(this) + ": invalid relocated section index: " + Twine(Idx)); 749 InputSectionBase *Target = this->Sections[Idx]; 750 751 // Strictly speaking, a relocation section must be included in the 752 // group of the section it relocates. However, LLVM 3.3 and earlier 753 // would fail to do so, so we gracefully handle that case. 754 if (Target == &InputSection::Discarded) 755 return nullptr; 756 757 if (!Target) 758 fatal(toString(this) + ": unsupported relocation reference"); 759 return Target; 760 } 761 762 // Create a regular InputSection class that has the same contents 763 // as a given section. 764 static InputSection *toRegularSection(MergeInputSection *Sec) { 765 return make<InputSection>(Sec->File, Sec->Flags, Sec->Type, Sec->Alignment, 766 Sec->data(), Sec->Name); 767 } 768 769 template <class ELFT> 770 InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &Sec) { 771 StringRef Name = getSectionName(Sec); 772 773 switch (Sec.sh_type) { 774 case SHT_ARM_ATTRIBUTES: { 775 if (Config->EMachine != EM_ARM) 776 break; 777 ARMAttributeParser Attributes; 778 ArrayRef<uint8_t> Contents = check(this->getObj().getSectionContents(&Sec)); 779 Attributes.Parse(Contents, /*isLittle*/ Config->EKind == ELF32LEKind); 780 updateSupportedARMFeatures(Attributes); 781 updateARMVFPArgs(Attributes, this); 782 783 // FIXME: Retain the first attribute section we see. The eglibc ARM 784 // dynamic loaders require the presence of an attribute section for dlopen 785 // to work. In a full implementation we would merge all attribute sections. 786 if (In.ARMAttributes == nullptr) { 787 In.ARMAttributes = make<InputSection>(*this, Sec, Name); 788 return In.ARMAttributes; 789 } 790 return &InputSection::Discarded; 791 } 792 case SHT_LLVM_DEPENDENT_LIBRARIES: { 793 if (Config->Relocatable) 794 break; 795 ArrayRef<char> Data = 796 CHECK(this->getObj().template getSectionContentsAsArray<char>(&Sec), this); 797 if (!Data.empty() && Data.back() != '\0') { 798 error(toString(this) + 799 ": corrupted dependent libraries section (unterminated string): " + 800 Name); 801 return &InputSection::Discarded; 802 } 803 for (const char *D = Data.begin(), *E = Data.end(); D < E;) { 804 StringRef S(D); 805 addDependentLibrary(S, this); 806 D += S.size() + 1; 807 } 808 return &InputSection::Discarded; 809 } 810 case SHT_RELA: 811 case SHT_REL: { 812 // Find a relocation target section and associate this section with that. 813 // Target may have been discarded if it is in a different section group 814 // and the group is discarded, even though it's a violation of the 815 // spec. We handle that situation gracefully by discarding dangling 816 // relocation sections. 817 InputSectionBase *Target = getRelocTarget(Sec); 818 if (!Target) 819 return nullptr; 820 821 // This section contains relocation information. 822 // If -r is given, we do not interpret or apply relocation 823 // but just copy relocation sections to output. 824 if (Config->Relocatable) { 825 InputSection *RelocSec = make<InputSection>(*this, Sec, Name); 826 // We want to add a dependency to target, similar like we do for 827 // -emit-relocs below. This is useful for the case when linker script 828 // contains the "/DISCARD/". It is perhaps uncommon to use a script with 829 // -r, but we faced it in the Linux kernel and have to handle such case 830 // and not to crash. 831 Target->DependentSections.push_back(RelocSec); 832 return RelocSec; 833 } 834 835 if (Target->FirstRelocation) 836 fatal(toString(this) + 837 ": multiple relocation sections to one section are not supported"); 838 839 // ELF spec allows mergeable sections with relocations, but they are 840 // rare, and it is in practice hard to merge such sections by contents, 841 // because applying relocations at end of linking changes section 842 // contents. So, we simply handle such sections as non-mergeable ones. 843 // Degrading like this is acceptable because section merging is optional. 844 if (auto *MS = dyn_cast<MergeInputSection>(Target)) { 845 Target = toRegularSection(MS); 846 this->Sections[Sec.sh_info] = Target; 847 } 848 849 if (Sec.sh_type == SHT_RELA) { 850 ArrayRef<Elf_Rela> Rels = CHECK(getObj().relas(&Sec), this); 851 Target->FirstRelocation = Rels.begin(); 852 Target->NumRelocations = Rels.size(); 853 Target->AreRelocsRela = true; 854 } else { 855 ArrayRef<Elf_Rel> Rels = CHECK(getObj().rels(&Sec), this); 856 Target->FirstRelocation = Rels.begin(); 857 Target->NumRelocations = Rels.size(); 858 Target->AreRelocsRela = false; 859 } 860 assert(isUInt<31>(Target->NumRelocations)); 861 862 // Relocation sections processed by the linker are usually removed 863 // from the output, so returning `nullptr` for the normal case. 864 // However, if -emit-relocs is given, we need to leave them in the output. 865 // (Some post link analysis tools need this information.) 866 if (Config->EmitRelocs) { 867 InputSection *RelocSec = make<InputSection>(*this, Sec, Name); 868 // We will not emit relocation section if target was discarded. 869 Target->DependentSections.push_back(RelocSec); 870 return RelocSec; 871 } 872 return nullptr; 873 } 874 } 875 876 // The GNU linker uses .note.GNU-stack section as a marker indicating 877 // that the code in the object file does not expect that the stack is 878 // executable (in terms of NX bit). If all input files have the marker, 879 // the GNU linker adds a PT_GNU_STACK segment to tells the loader to 880 // make the stack non-executable. Most object files have this section as 881 // of 2017. 882 // 883 // But making the stack non-executable is a norm today for security 884 // reasons. Failure to do so may result in a serious security issue. 885 // Therefore, we make LLD always add PT_GNU_STACK unless it is 886 // explicitly told to do otherwise (by -z execstack). Because the stack 887 // executable-ness is controlled solely by command line options, 888 // .note.GNU-stack sections are simply ignored. 889 if (Name == ".note.GNU-stack") 890 return &InputSection::Discarded; 891 892 // Split stacks is a feature to support a discontiguous stack, 893 // commonly used in the programming language Go. For the details, 894 // see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled 895 // for split stack will include a .note.GNU-split-stack section. 896 if (Name == ".note.GNU-split-stack") { 897 if (Config->Relocatable) { 898 error("cannot mix split-stack and non-split-stack in a relocatable link"); 899 return &InputSection::Discarded; 900 } 901 this->SplitStack = true; 902 return &InputSection::Discarded; 903 } 904 905 // An object file cmpiled for split stack, but where some of the 906 // functions were compiled with the no_split_stack_attribute will 907 // include a .note.GNU-no-split-stack section. 908 if (Name == ".note.GNU-no-split-stack") { 909 this->SomeNoSplitStack = true; 910 return &InputSection::Discarded; 911 } 912 913 // The linkonce feature is a sort of proto-comdat. Some glibc i386 object 914 // files contain definitions of symbol "__x86.get_pc_thunk.bx" in linkonce 915 // sections. Drop those sections to avoid duplicate symbol errors. 916 // FIXME: This is glibc PR20543, we should remove this hack once that has been 917 // fixed for a while. 918 if (Name == ".gnu.linkonce.t.__x86.get_pc_thunk.bx" || 919 Name == ".gnu.linkonce.t.__i686.get_pc_thunk.bx") 920 return &InputSection::Discarded; 921 922 // If we are creating a new .build-id section, strip existing .build-id 923 // sections so that the output won't have more than one .build-id. 924 // This is not usually a problem because input object files normally don't 925 // have .build-id sections, but you can create such files by 926 // "ld.{bfd,gold,lld} -r --build-id", and we want to guard against it. 927 if (Name == ".note.gnu.build-id" && Config->BuildId != BuildIdKind::None) 928 return &InputSection::Discarded; 929 930 // The linker merges EH (exception handling) frames and creates a 931 // .eh_frame_hdr section for runtime. So we handle them with a special 932 // class. For relocatable outputs, they are just passed through. 933 if (Name == ".eh_frame" && !Config->Relocatable) 934 return make<EhInputSection>(*this, Sec, Name); 935 936 if (shouldMerge(Sec)) 937 return make<MergeInputSection>(*this, Sec, Name); 938 return make<InputSection>(*this, Sec, Name); 939 } 940 941 template <class ELFT> 942 StringRef ObjFile<ELFT>::getSectionName(const Elf_Shdr &Sec) { 943 return CHECK(getObj().getSectionName(&Sec, SectionStringTable), this); 944 } 945 946 // Initialize this->Symbols. this->Symbols is a parallel array as 947 // its corresponding ELF symbol table. 948 template <class ELFT> void ObjFile<ELFT>::initializeSymbols() { 949 ArrayRef<Elf_Sym> ESyms = this->getELFSyms<ELFT>(); 950 this->Symbols.resize(ESyms.size()); 951 952 // Our symbol table may have already been partially initialized 953 // because of LazyObjFile. 954 for (size_t I = 0, End = ESyms.size(); I != End; ++I) 955 if (!this->Symbols[I] && ESyms[I].getBinding() != STB_LOCAL) 956 this->Symbols[I] = 957 Symtab->insert(CHECK(ESyms[I].getName(this->StringTable), this)); 958 959 // Fill this->Symbols. A symbol is either local or global. 960 for (size_t I = 0, End = ESyms.size(); I != End; ++I) { 961 const Elf_Sym &ESym = ESyms[I]; 962 963 // Read symbol attributes. 964 uint32_t SecIdx = getSectionIndex(ESym); 965 if (SecIdx >= this->Sections.size()) 966 fatal(toString(this) + ": invalid section index: " + Twine(SecIdx)); 967 968 InputSectionBase *Sec = this->Sections[SecIdx]; 969 uint8_t Binding = ESym.getBinding(); 970 uint8_t StOther = ESym.st_other; 971 uint8_t Type = ESym.getType(); 972 uint64_t Value = ESym.st_value; 973 uint64_t Size = ESym.st_size; 974 StringRefZ Name = this->StringTable.data() + ESym.st_name; 975 976 // Handle local symbols. Local symbols are not added to the symbol 977 // table because they are not visible from other object files. We 978 // allocate symbol instances and add their pointers to Symbols. 979 if (Binding == STB_LOCAL) { 980 if (ESym.getType() == STT_FILE) 981 SourceFile = CHECK(ESym.getName(this->StringTable), this); 982 983 if (this->StringTable.size() <= ESym.st_name) 984 fatal(toString(this) + ": invalid symbol name offset"); 985 986 if (ESym.st_shndx == SHN_UNDEF) 987 this->Symbols[I] = make<Undefined>(this, Name, Binding, StOther, Type); 988 else 989 this->Symbols[I] = 990 make<Defined>(this, Name, Binding, StOther, Type, Value, Size, Sec); 991 continue; 992 } 993 994 // Handle global undefined symbols. 995 if (ESym.st_shndx == SHN_UNDEF) { 996 this->Symbols[I]->resolve(Undefined{this, Name, Binding, StOther, Type}); 997 continue; 998 } 999 1000 // Handle global common symbols. 1001 if (ESym.st_shndx == SHN_COMMON) { 1002 if (Value == 0 || Value >= UINT32_MAX) 1003 fatal(toString(this) + ": common symbol '" + StringRef(Name.Data) + 1004 "' has invalid alignment: " + Twine(Value)); 1005 this->Symbols[I]->resolve( 1006 CommonSymbol{this, Name, Binding, StOther, Type, Value, Size}); 1007 continue; 1008 } 1009 1010 // If a defined symbol is in a discarded section, handle it as if it 1011 // were an undefined symbol. Such symbol doesn't comply with the 1012 // standard, but in practice, a .eh_frame often directly refer 1013 // COMDAT member sections, and if a comdat group is discarded, some 1014 // defined symbol in a .eh_frame becomes dangling symbols. 1015 if (Sec == &InputSection::Discarded) { 1016 this->Symbols[I]->resolve( 1017 Undefined{this, Name, Binding, StOther, Type, SecIdx}); 1018 continue; 1019 } 1020 1021 // Handle global defined symbols. 1022 if (Binding == STB_GLOBAL || Binding == STB_WEAK || 1023 Binding == STB_GNU_UNIQUE) { 1024 this->Symbols[I]->resolve( 1025 Defined{this, Name, Binding, StOther, Type, Value, Size, Sec}); 1026 continue; 1027 } 1028 1029 fatal(toString(this) + ": unexpected binding: " + Twine((int)Binding)); 1030 } 1031 } 1032 1033 ArchiveFile::ArchiveFile(std::unique_ptr<Archive> &&File) 1034 : InputFile(ArchiveKind, File->getMemoryBufferRef()), 1035 File(std::move(File)) {} 1036 1037 void ArchiveFile::parse() { 1038 for (const Archive::Symbol &Sym : File->symbols()) 1039 Symtab->addSymbol(LazyArchive{*this, Sym}); 1040 } 1041 1042 // Returns a buffer pointing to a member file containing a given symbol. 1043 void ArchiveFile::fetch(const Archive::Symbol &Sym) { 1044 Archive::Child C = 1045 CHECK(Sym.getMember(), toString(this) + 1046 ": could not get the member for symbol " + 1047 Sym.getName()); 1048 1049 if (!Seen.insert(C.getChildOffset()).second) 1050 return; 1051 1052 MemoryBufferRef MB = 1053 CHECK(C.getMemoryBufferRef(), 1054 toString(this) + 1055 ": could not get the buffer for the member defining symbol " + 1056 Sym.getName()); 1057 1058 if (Tar && C.getParent()->isThin()) 1059 Tar->append(relativeToRoot(CHECK(C.getFullName(), this)), MB.getBuffer()); 1060 1061 InputFile *File = createObjectFile( 1062 MB, getName(), C.getParent()->isThin() ? 0 : C.getChildOffset()); 1063 File->GroupId = GroupId; 1064 parseFile(File); 1065 } 1066 1067 unsigned SharedFile::VernauxNum; 1068 1069 SharedFile::SharedFile(MemoryBufferRef M, StringRef DefaultSoName) 1070 : ELFFileBase(SharedKind, M), SoName(DefaultSoName), 1071 IsNeeded(!Config->AsNeeded) { 1072 switch (getELFKind(MB, "")) { 1073 case ELF32LEKind: 1074 parseHeader<ELF32LE>(); 1075 break; 1076 case ELF32BEKind: 1077 parseHeader<ELF32BE>(); 1078 break; 1079 case ELF64LEKind: 1080 parseHeader<ELF64LE>(); 1081 break; 1082 case ELF64BEKind: 1083 parseHeader<ELF64BE>(); 1084 break; 1085 default: 1086 llvm_unreachable("getELFKind"); 1087 } 1088 } 1089 1090 // Parse the version definitions in the object file if present, and return a 1091 // vector whose nth element contains a pointer to the Elf_Verdef for version 1092 // identifier n. Version identifiers that are not definitions map to nullptr. 1093 template <typename ELFT> 1094 static std::vector<const void *> parseVerdefs(const uint8_t *Base, 1095 const typename ELFT::Shdr *Sec) { 1096 if (!Sec) 1097 return {}; 1098 1099 // We cannot determine the largest verdef identifier without inspecting 1100 // every Elf_Verdef, but both bfd and gold assign verdef identifiers 1101 // sequentially starting from 1, so we predict that the largest identifier 1102 // will be VerdefCount. 1103 unsigned VerdefCount = Sec->sh_info; 1104 std::vector<const void *> Verdefs(VerdefCount + 1); 1105 1106 // Build the Verdefs array by following the chain of Elf_Verdef objects 1107 // from the start of the .gnu.version_d section. 1108 const uint8_t *Verdef = Base + Sec->sh_offset; 1109 for (unsigned I = 0; I != VerdefCount; ++I) { 1110 auto *CurVerdef = reinterpret_cast<const typename ELFT::Verdef *>(Verdef); 1111 Verdef += CurVerdef->vd_next; 1112 unsigned VerdefIndex = CurVerdef->vd_ndx; 1113 Verdefs.resize(VerdefIndex + 1); 1114 Verdefs[VerdefIndex] = CurVerdef; 1115 } 1116 return Verdefs; 1117 } 1118 1119 // We do not usually care about alignments of data in shared object 1120 // files because the loader takes care of it. However, if we promote a 1121 // DSO symbol to point to .bss due to copy relocation, we need to keep 1122 // the original alignment requirements. We infer it in this function. 1123 template <typename ELFT> 1124 static uint64_t getAlignment(ArrayRef<typename ELFT::Shdr> Sections, 1125 const typename ELFT::Sym &Sym) { 1126 uint64_t Ret = UINT64_MAX; 1127 if (Sym.st_value) 1128 Ret = 1ULL << countTrailingZeros((uint64_t)Sym.st_value); 1129 if (0 < Sym.st_shndx && Sym.st_shndx < Sections.size()) 1130 Ret = std::min<uint64_t>(Ret, Sections[Sym.st_shndx].sh_addralign); 1131 return (Ret > UINT32_MAX) ? 0 : Ret; 1132 } 1133 1134 // Fully parse the shared object file. 1135 // 1136 // This function parses symbol versions. If a DSO has version information, 1137 // the file has a ".gnu.version_d" section which contains symbol version 1138 // definitions. Each symbol is associated to one version through a table in 1139 // ".gnu.version" section. That table is a parallel array for the symbol 1140 // table, and each table entry contains an index in ".gnu.version_d". 1141 // 1142 // The special index 0 is reserved for VERF_NDX_LOCAL and 1 is for 1143 // VER_NDX_GLOBAL. There's no table entry for these special versions in 1144 // ".gnu.version_d". 1145 // 1146 // The file format for symbol versioning is perhaps a bit more complicated 1147 // than necessary, but you can easily understand the code if you wrap your 1148 // head around the data structure described above. 1149 template <class ELFT> void SharedFile::parse() { 1150 using Elf_Dyn = typename ELFT::Dyn; 1151 using Elf_Shdr = typename ELFT::Shdr; 1152 using Elf_Sym = typename ELFT::Sym; 1153 using Elf_Verdef = typename ELFT::Verdef; 1154 using Elf_Versym = typename ELFT::Versym; 1155 1156 ArrayRef<Elf_Dyn> DynamicTags; 1157 const ELFFile<ELFT> Obj = this->getObj<ELFT>(); 1158 ArrayRef<Elf_Shdr> Sections = CHECK(Obj.sections(), this); 1159 1160 const Elf_Shdr *VersymSec = nullptr; 1161 const Elf_Shdr *VerdefSec = nullptr; 1162 1163 // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d. 1164 for (const Elf_Shdr &Sec : Sections) { 1165 switch (Sec.sh_type) { 1166 default: 1167 continue; 1168 case SHT_DYNSYM: 1169 this->initSymtab<ELFT>(Sections, &Sec); 1170 break; 1171 case SHT_DYNAMIC: 1172 DynamicTags = 1173 CHECK(Obj.template getSectionContentsAsArray<Elf_Dyn>(&Sec), this); 1174 break; 1175 case SHT_GNU_versym: 1176 VersymSec = &Sec; 1177 break; 1178 case SHT_GNU_verdef: 1179 VerdefSec = &Sec; 1180 break; 1181 } 1182 } 1183 1184 if (VersymSec && this->getELFSyms<ELFT>().empty()) { 1185 error("SHT_GNU_versym should be associated with symbol table"); 1186 return; 1187 } 1188 1189 // Search for a DT_SONAME tag to initialize this->SoName. 1190 for (const Elf_Dyn &Dyn : DynamicTags) { 1191 if (Dyn.d_tag == DT_NEEDED) { 1192 uint64_t Val = Dyn.getVal(); 1193 if (Val >= this->StringTable.size()) 1194 fatal(toString(this) + ": invalid DT_NEEDED entry"); 1195 DtNeeded.push_back(this->StringTable.data() + Val); 1196 } else if (Dyn.d_tag == DT_SONAME) { 1197 uint64_t Val = Dyn.getVal(); 1198 if (Val >= this->StringTable.size()) 1199 fatal(toString(this) + ": invalid DT_SONAME entry"); 1200 SoName = this->StringTable.data() + Val; 1201 } 1202 } 1203 1204 // DSOs are uniquified not by filename but by soname. 1205 DenseMap<StringRef, SharedFile *>::iterator It; 1206 bool WasInserted; 1207 std::tie(It, WasInserted) = Symtab->SoNames.try_emplace(SoName, this); 1208 1209 // If a DSO appears more than once on the command line with and without 1210 // --as-needed, --no-as-needed takes precedence over --as-needed because a 1211 // user can add an extra DSO with --no-as-needed to force it to be added to 1212 // the dependency list. 1213 It->second->IsNeeded |= IsNeeded; 1214 if (!WasInserted) 1215 return; 1216 1217 SharedFiles.push_back(this); 1218 1219 Verdefs = parseVerdefs<ELFT>(Obj.base(), VerdefSec); 1220 1221 // Parse ".gnu.version" section which is a parallel array for the symbol 1222 // table. If a given file doesn't have a ".gnu.version" section, we use 1223 // VER_NDX_GLOBAL. 1224 size_t Size = this->getELFSyms<ELFT>().size() - this->FirstGlobal; 1225 std::vector<uint32_t> Versyms(Size, VER_NDX_GLOBAL); 1226 if (VersymSec) { 1227 ArrayRef<Elf_Versym> Versym = 1228 CHECK(Obj.template getSectionContentsAsArray<Elf_Versym>(VersymSec), 1229 this) 1230 .slice(FirstGlobal); 1231 for (size_t I = 0; I < Size; ++I) 1232 Versyms[I] = Versym[I].vs_index; 1233 } 1234 1235 // System libraries can have a lot of symbols with versions. Using a 1236 // fixed buffer for computing the versions name (foo@ver) can save a 1237 // lot of allocations. 1238 SmallString<0> VersionedNameBuffer; 1239 1240 // Add symbols to the symbol table. 1241 ArrayRef<Elf_Sym> Syms = this->getGlobalELFSyms<ELFT>(); 1242 for (size_t I = 0; I < Syms.size(); ++I) { 1243 const Elf_Sym &Sym = Syms[I]; 1244 1245 // ELF spec requires that all local symbols precede weak or global 1246 // symbols in each symbol table, and the index of first non-local symbol 1247 // is stored to sh_info. If a local symbol appears after some non-local 1248 // symbol, that's a violation of the spec. 1249 StringRef Name = CHECK(Sym.getName(this->StringTable), this); 1250 if (Sym.getBinding() == STB_LOCAL) { 1251 warn("found local symbol '" + Name + 1252 "' in global part of symbol table in file " + toString(this)); 1253 continue; 1254 } 1255 1256 if (Sym.isUndefined()) { 1257 Symbol *S = Symtab->addSymbol( 1258 Undefined{this, Name, Sym.getBinding(), Sym.st_other, Sym.getType()}); 1259 S->ExportDynamic = true; 1260 continue; 1261 } 1262 1263 // MIPS BFD linker puts _gp_disp symbol into DSO files and incorrectly 1264 // assigns VER_NDX_LOCAL to this section global symbol. Here is a 1265 // workaround for this bug. 1266 uint32_t Idx = Versyms[I] & ~VERSYM_HIDDEN; 1267 if (Config->EMachine == EM_MIPS && Idx == VER_NDX_LOCAL && 1268 Name == "_gp_disp") 1269 continue; 1270 1271 uint32_t Alignment = getAlignment<ELFT>(Sections, Sym); 1272 if (!(Versyms[I] & VERSYM_HIDDEN)) { 1273 Symtab->addSymbol(SharedSymbol{*this, Name, Sym.getBinding(), 1274 Sym.st_other, Sym.getType(), Sym.st_value, 1275 Sym.st_size, Alignment, Idx}); 1276 } 1277 1278 // Also add the symbol with the versioned name to handle undefined symbols 1279 // with explicit versions. 1280 if (Idx == VER_NDX_GLOBAL) 1281 continue; 1282 1283 if (Idx >= Verdefs.size() || Idx == VER_NDX_LOCAL) { 1284 error("corrupt input file: version definition index " + Twine(Idx) + 1285 " for symbol " + Name + " is out of bounds\n>>> defined in " + 1286 toString(this)); 1287 continue; 1288 } 1289 1290 StringRef VerName = 1291 this->StringTable.data() + 1292 reinterpret_cast<const Elf_Verdef *>(Verdefs[Idx])->getAux()->vda_name; 1293 VersionedNameBuffer.clear(); 1294 Name = (Name + "@" + VerName).toStringRef(VersionedNameBuffer); 1295 Symtab->addSymbol(SharedSymbol{*this, Saver.save(Name), Sym.getBinding(), 1296 Sym.st_other, Sym.getType(), Sym.st_value, 1297 Sym.st_size, Alignment, Idx}); 1298 } 1299 } 1300 1301 static ELFKind getBitcodeELFKind(const Triple &T) { 1302 if (T.isLittleEndian()) 1303 return T.isArch64Bit() ? ELF64LEKind : ELF32LEKind; 1304 return T.isArch64Bit() ? ELF64BEKind : ELF32BEKind; 1305 } 1306 1307 static uint8_t getBitcodeMachineKind(StringRef Path, const Triple &T) { 1308 switch (T.getArch()) { 1309 case Triple::aarch64: 1310 return EM_AARCH64; 1311 case Triple::amdgcn: 1312 case Triple::r600: 1313 return EM_AMDGPU; 1314 case Triple::arm: 1315 case Triple::thumb: 1316 return EM_ARM; 1317 case Triple::avr: 1318 return EM_AVR; 1319 case Triple::mips: 1320 case Triple::mipsel: 1321 case Triple::mips64: 1322 case Triple::mips64el: 1323 return EM_MIPS; 1324 case Triple::msp430: 1325 return EM_MSP430; 1326 case Triple::ppc: 1327 return EM_PPC; 1328 case Triple::ppc64: 1329 case Triple::ppc64le: 1330 return EM_PPC64; 1331 case Triple::x86: 1332 return T.isOSIAMCU() ? EM_IAMCU : EM_386; 1333 case Triple::x86_64: 1334 return EM_X86_64; 1335 default: 1336 error(Path + ": could not infer e_machine from bitcode target triple " + 1337 T.str()); 1338 return EM_NONE; 1339 } 1340 } 1341 1342 BitcodeFile::BitcodeFile(MemoryBufferRef MB, StringRef ArchiveName, 1343 uint64_t OffsetInArchive) 1344 : InputFile(BitcodeKind, MB) { 1345 this->ArchiveName = ArchiveName; 1346 1347 std::string Path = MB.getBufferIdentifier().str(); 1348 if (Config->ThinLTOIndexOnly) 1349 Path = replaceThinLTOSuffix(MB.getBufferIdentifier()); 1350 1351 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique 1352 // name. If two archives define two members with the same name, this 1353 // causes a collision which result in only one of the objects being taken 1354 // into consideration at LTO time (which very likely causes undefined 1355 // symbols later in the link stage). So we append file offset to make 1356 // filename unique. 1357 StringRef Name = ArchiveName.empty() 1358 ? Saver.save(Path) 1359 : Saver.save(ArchiveName + "(" + Path + " at " + 1360 utostr(OffsetInArchive) + ")"); 1361 MemoryBufferRef MBRef(MB.getBuffer(), Name); 1362 1363 Obj = CHECK(lto::InputFile::create(MBRef), this); 1364 1365 Triple T(Obj->getTargetTriple()); 1366 EKind = getBitcodeELFKind(T); 1367 EMachine = getBitcodeMachineKind(MB.getBufferIdentifier(), T); 1368 } 1369 1370 static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) { 1371 switch (GvVisibility) { 1372 case GlobalValue::DefaultVisibility: 1373 return STV_DEFAULT; 1374 case GlobalValue::HiddenVisibility: 1375 return STV_HIDDEN; 1376 case GlobalValue::ProtectedVisibility: 1377 return STV_PROTECTED; 1378 } 1379 llvm_unreachable("unknown visibility"); 1380 } 1381 1382 template <class ELFT> 1383 static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats, 1384 const lto::InputFile::Symbol &ObjSym, 1385 BitcodeFile &F) { 1386 StringRef Name = Saver.save(ObjSym.getName()); 1387 uint8_t Binding = ObjSym.isWeak() ? STB_WEAK : STB_GLOBAL; 1388 uint8_t Type = ObjSym.isTLS() ? STT_TLS : STT_NOTYPE; 1389 uint8_t Visibility = mapVisibility(ObjSym.getVisibility()); 1390 bool CanOmitFromDynSym = ObjSym.canBeOmittedFromSymbolTable(); 1391 1392 int C = ObjSym.getComdatIndex(); 1393 if (ObjSym.isUndefined() || (C != -1 && !KeptComdats[C])) { 1394 Undefined New(&F, Name, Binding, Visibility, Type); 1395 if (CanOmitFromDynSym) 1396 New.ExportDynamic = false; 1397 return Symtab->addSymbol(New); 1398 } 1399 1400 if (ObjSym.isCommon()) 1401 return Symtab->addSymbol( 1402 CommonSymbol{&F, Name, Binding, Visibility, STT_OBJECT, 1403 ObjSym.getCommonAlignment(), ObjSym.getCommonSize()}); 1404 1405 Defined New(&F, Name, Binding, Visibility, Type, 0, 0, nullptr); 1406 if (CanOmitFromDynSym) 1407 New.ExportDynamic = false; 1408 return Symtab->addSymbol(New); 1409 } 1410 1411 template <class ELFT> 1412 void BitcodeFile::parse( 1413 DenseMap<CachedHashStringRef, const InputFile *> &ComdatGroups) { 1414 std::vector<bool> KeptComdats; 1415 for (StringRef S : Obj->getComdatTable()) 1416 KeptComdats.push_back( 1417 ComdatGroups.try_emplace(CachedHashStringRef(S), this).second); 1418 1419 for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) 1420 Symbols.push_back(createBitcodeSymbol<ELFT>(KeptComdats, ObjSym, *this)); 1421 1422 for (auto L : Obj->getDependentLibraries()) 1423 addDependentLibrary(L, this); 1424 } 1425 1426 void BinaryFile::parse() { 1427 ArrayRef<uint8_t> Data = arrayRefFromStringRef(MB.getBuffer()); 1428 auto *Section = make<InputSection>(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, 1429 8, Data, ".data"); 1430 Sections.push_back(Section); 1431 1432 // For each input file foo that is embedded to a result as a binary 1433 // blob, we define _binary_foo_{start,end,size} symbols, so that 1434 // user programs can access blobs by name. Non-alphanumeric 1435 // characters in a filename are replaced with underscore. 1436 std::string S = "_binary_" + MB.getBufferIdentifier().str(); 1437 for (size_t I = 0; I < S.size(); ++I) 1438 if (!isAlnum(S[I])) 1439 S[I] = '_'; 1440 1441 Symtab->addSymbol(Defined{nullptr, Saver.save(S + "_start"), STB_GLOBAL, 1442 STV_DEFAULT, STT_OBJECT, 0, 0, Section}); 1443 Symtab->addSymbol(Defined{nullptr, Saver.save(S + "_end"), STB_GLOBAL, 1444 STV_DEFAULT, STT_OBJECT, Data.size(), 0, Section}); 1445 Symtab->addSymbol(Defined{nullptr, Saver.save(S + "_size"), STB_GLOBAL, 1446 STV_DEFAULT, STT_OBJECT, Data.size(), 0, nullptr}); 1447 } 1448 1449 InputFile *elf::createObjectFile(MemoryBufferRef MB, StringRef ArchiveName, 1450 uint64_t OffsetInArchive) { 1451 if (isBitcode(MB)) 1452 return make<BitcodeFile>(MB, ArchiveName, OffsetInArchive); 1453 1454 switch (getELFKind(MB, ArchiveName)) { 1455 case ELF32LEKind: 1456 return make<ObjFile<ELF32LE>>(MB, ArchiveName); 1457 case ELF32BEKind: 1458 return make<ObjFile<ELF32BE>>(MB, ArchiveName); 1459 case ELF64LEKind: 1460 return make<ObjFile<ELF64LE>>(MB, ArchiveName); 1461 case ELF64BEKind: 1462 return make<ObjFile<ELF64BE>>(MB, ArchiveName); 1463 default: 1464 llvm_unreachable("getELFKind"); 1465 } 1466 } 1467 1468 void LazyObjFile::fetch() { 1469 if (MB.getBuffer().empty()) 1470 return; 1471 1472 InputFile *File = createObjectFile(MB, ArchiveName, OffsetInArchive); 1473 File->GroupId = GroupId; 1474 1475 MB = {}; 1476 1477 // Copy symbol vector so that the new InputFile doesn't have to 1478 // insert the same defined symbols to the symbol table again. 1479 File->Symbols = std::move(Symbols); 1480 1481 parseFile(File); 1482 } 1483 1484 template <class ELFT> void LazyObjFile::parse() { 1485 using Elf_Sym = typename ELFT::Sym; 1486 1487 // A lazy object file wraps either a bitcode file or an ELF file. 1488 if (isBitcode(this->MB)) { 1489 std::unique_ptr<lto::InputFile> Obj = 1490 CHECK(lto::InputFile::create(this->MB), this); 1491 for (const lto::InputFile::Symbol &Sym : Obj->symbols()) { 1492 if (Sym.isUndefined()) 1493 continue; 1494 Symtab->addSymbol(LazyObject{*this, Saver.save(Sym.getName())}); 1495 } 1496 return; 1497 } 1498 1499 if (getELFKind(this->MB, ArchiveName) != Config->EKind) { 1500 error("incompatible file: " + this->MB.getBufferIdentifier()); 1501 return; 1502 } 1503 1504 // Find a symbol table. 1505 ELFFile<ELFT> Obj = check(ELFFile<ELFT>::create(MB.getBuffer())); 1506 ArrayRef<typename ELFT::Shdr> Sections = CHECK(Obj.sections(), this); 1507 1508 for (const typename ELFT::Shdr &Sec : Sections) { 1509 if (Sec.sh_type != SHT_SYMTAB) 1510 continue; 1511 1512 // A symbol table is found. 1513 ArrayRef<Elf_Sym> ESyms = CHECK(Obj.symbols(&Sec), this); 1514 uint32_t FirstGlobal = Sec.sh_info; 1515 StringRef Strtab = CHECK(Obj.getStringTableForSymtab(Sec, Sections), this); 1516 this->Symbols.resize(ESyms.size()); 1517 1518 // Get existing symbols or insert placeholder symbols. 1519 for (size_t I = FirstGlobal, End = ESyms.size(); I != End; ++I) 1520 if (ESyms[I].st_shndx != SHN_UNDEF) 1521 this->Symbols[I] = Symtab->insert(CHECK(ESyms[I].getName(Strtab), this)); 1522 1523 // Replace existing symbols with LazyObject symbols. 1524 // 1525 // resolve() may trigger this->fetch() if an existing symbol is an 1526 // undefined symbol. If that happens, this LazyObjFile has served 1527 // its purpose, and we can exit from the loop early. 1528 for (Symbol *Sym : this->Symbols) { 1529 if (!Sym) 1530 continue; 1531 Sym->resolve(LazyObject{*this, Sym->getName()}); 1532 1533 // MemoryBuffer is emptied if this file is instantiated as ObjFile. 1534 if (MB.getBuffer().empty()) 1535 return; 1536 } 1537 return; 1538 } 1539 } 1540 1541 std::string elf::replaceThinLTOSuffix(StringRef Path) { 1542 StringRef Suffix = Config->ThinLTOObjectSuffixReplace.first; 1543 StringRef Repl = Config->ThinLTOObjectSuffixReplace.second; 1544 1545 if (Path.consume_back(Suffix)) 1546 return (Path + Repl).str(); 1547 return Path; 1548 } 1549 1550 template void 1551 BitcodeFile::parse<ELF32LE>(DenseMap<CachedHashStringRef, const InputFile *> &); 1552 template void 1553 BitcodeFile::parse<ELF32BE>(DenseMap<CachedHashStringRef, const InputFile *> &); 1554 template void 1555 BitcodeFile::parse<ELF64LE>(DenseMap<CachedHashStringRef, const InputFile *> &); 1556 template void 1557 BitcodeFile::parse<ELF64BE>(DenseMap<CachedHashStringRef, const InputFile *> &); 1558 1559 template void LazyObjFile::parse<ELF32LE>(); 1560 template void LazyObjFile::parse<ELF32BE>(); 1561 template void LazyObjFile::parse<ELF64LE>(); 1562 template void LazyObjFile::parse<ELF64BE>(); 1563 1564 template class elf::ObjFile<ELF32LE>; 1565 template class elf::ObjFile<ELF32BE>; 1566 template class elf::ObjFile<ELF64LE>; 1567 template class elf::ObjFile<ELF64BE>; 1568 1569 template void SharedFile::parse<ELF32LE>(); 1570 template void SharedFile::parse<ELF32BE>(); 1571 template void SharedFile::parse<ELF64LE>(); 1572 template void SharedFile::parse<ELF64BE>(); 1573