1 //===- InputFiles.cpp -----------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "InputFiles.h" 11 #include "Error.h" 12 #include "InputSection.h" 13 #include "LinkerScript.h" 14 #include "Memory.h" 15 #include "SymbolTable.h" 16 #include "Symbols.h" 17 #include "SyntheticSections.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/CodeGen/Analysis.h" 20 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/IR/Module.h" 23 #include "llvm/LTO/LTO.h" 24 #include "llvm/MC/StringTableBuilder.h" 25 #include "llvm/Object/ELFObjectFile.h" 26 #include "llvm/Support/Path.h" 27 #include "llvm/Support/TarWriter.h" 28 #include "llvm/Support/raw_ostream.h" 29 30 using namespace llvm; 31 using namespace llvm::ELF; 32 using namespace llvm::object; 33 using namespace llvm::sys::fs; 34 35 using namespace lld; 36 using namespace lld::elf; 37 38 TarWriter *elf::Tar; 39 40 InputFile::InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} 41 42 namespace { 43 // In ELF object file all section addresses are zero. If we have multiple 44 // .text sections (when using -ffunction-section or comdat group) then 45 // LLVM DWARF parser will not be able to parse .debug_line correctly, unless 46 // we assign each section some unique address. This callback method assigns 47 // each section an address equal to its offset in ELF object file. 48 class ObjectInfo : public LoadedObjectInfoHelper<ObjectInfo> { 49 public: 50 uint64_t getSectionLoadAddress(const object::SectionRef &Sec) const override { 51 return static_cast<const ELFSectionRef &>(Sec).getOffset(); 52 } 53 }; 54 } 55 56 Optional<MemoryBufferRef> elf::readFile(StringRef Path) { 57 log(Path); 58 auto MBOrErr = MemoryBuffer::getFile(Path); 59 if (auto EC = MBOrErr.getError()) { 60 error("cannot open " + Path + ": " + EC.message()); 61 return None; 62 } 63 64 std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; 65 MemoryBufferRef MBRef = MB->getMemBufferRef(); 66 make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership 67 68 if (Tar) 69 Tar->append(relativeToRoot(Path), MBRef.getBuffer()); 70 return MBRef; 71 } 72 73 template <class ELFT> void elf::ObjectFile<ELFT>::initializeDwarfLine() { 74 std::unique_ptr<object::ObjectFile> Obj = 75 check(object::ObjectFile::createObjectFile(this->MB), toString(this)); 76 77 ObjectInfo ObjInfo; 78 DWARFContextInMemory Dwarf(*Obj, &ObjInfo); 79 DwarfLine.reset(new DWARFDebugLine); 80 DWARFDataExtractor LineData(Dwarf.getLineSection(), Config->IsLE, 81 Config->Wordsize); 82 83 // The second parameter is offset in .debug_line section 84 // for compilation unit (CU) of interest. We have only one 85 // CU (object file), so offset is always 0. 86 DwarfLine->getOrParseLineTable(LineData, 0); 87 } 88 89 // Returns source line information for a given offset 90 // using DWARF debug info. 91 template <class ELFT> 92 Optional<DILineInfo> elf::ObjectFile<ELFT>::getDILineInfo(InputSectionBase *S, 93 uint64_t Offset) { 94 if (!DwarfLine) 95 initializeDwarfLine(); 96 97 // The offset to CU is 0. 98 const DWARFDebugLine::LineTable *Tbl = DwarfLine->getLineTable(0); 99 if (!Tbl) 100 return None; 101 102 // Use fake address calcuated by adding section file offset and offset in 103 // section. See comments for ObjectInfo class. 104 DILineInfo Info; 105 Tbl->getFileLineInfoForAddress( 106 S->getOffsetInFile() + Offset, nullptr, 107 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, Info); 108 if (Info.Line == 0) 109 return None; 110 return Info; 111 } 112 113 // Returns source line information for a given offset 114 // using DWARF debug info. 115 template <class ELFT> 116 std::string elf::ObjectFile<ELFT>::getLineInfo(InputSectionBase *S, 117 uint64_t Offset) { 118 if (Optional<DILineInfo> Info = getDILineInfo(S, Offset)) 119 return Info->FileName + ":" + std::to_string(Info->Line); 120 return ""; 121 } 122 123 // Returns "<internal>", "foo.a(bar.o)" or "baz.o". 124 std::string lld::toString(const InputFile *F) { 125 if (!F) 126 return "<internal>"; 127 128 if (F->ToStringCache.empty()) { 129 if (F->ArchiveName.empty()) 130 F->ToStringCache = F->getName(); 131 else 132 F->ToStringCache = (F->ArchiveName + "(" + F->getName() + ")").str(); 133 } 134 return F->ToStringCache; 135 } 136 137 template <class ELFT> 138 ELFFileBase<ELFT>::ELFFileBase(Kind K, MemoryBufferRef MB) : InputFile(K, MB) { 139 if (ELFT::TargetEndianness == support::little) 140 EKind = ELFT::Is64Bits ? ELF64LEKind : ELF32LEKind; 141 else 142 EKind = ELFT::Is64Bits ? ELF64BEKind : ELF32BEKind; 143 144 EMachine = getObj().getHeader()->e_machine; 145 OSABI = getObj().getHeader()->e_ident[llvm::ELF::EI_OSABI]; 146 } 147 148 template <class ELFT> 149 typename ELFT::SymRange ELFFileBase<ELFT>::getGlobalSymbols() { 150 return makeArrayRef(Symbols.begin() + FirstNonLocal, Symbols.end()); 151 } 152 153 template <class ELFT> 154 uint32_t ELFFileBase<ELFT>::getSectionIndex(const Elf_Sym &Sym) const { 155 return check(getObj().getSectionIndex(&Sym, Symbols, SymtabSHNDX), 156 toString(this)); 157 } 158 159 template <class ELFT> 160 void ELFFileBase<ELFT>::initSymtab(ArrayRef<Elf_Shdr> Sections, 161 const Elf_Shdr *Symtab) { 162 FirstNonLocal = Symtab->sh_info; 163 Symbols = check(getObj().symbols(Symtab), toString(this)); 164 if (FirstNonLocal == 0 || FirstNonLocal > Symbols.size()) 165 fatal(toString(this) + ": invalid sh_info in symbol table"); 166 167 StringTable = check(getObj().getStringTableForSymtab(*Symtab, Sections), 168 toString(this)); 169 } 170 171 template <class ELFT> 172 elf::ObjectFile<ELFT>::ObjectFile(MemoryBufferRef M, StringRef ArchiveName) 173 : ELFFileBase<ELFT>(Base::ObjectKind, M) { 174 this->ArchiveName = ArchiveName; 175 } 176 177 template <class ELFT> 178 ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getLocalSymbols() { 179 if (this->SymbolBodies.empty()) 180 return this->SymbolBodies; 181 return makeArrayRef(this->SymbolBodies).slice(1, this->FirstNonLocal - 1); 182 } 183 184 template <class ELFT> 185 ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getSymbols() { 186 if (this->SymbolBodies.empty()) 187 return this->SymbolBodies; 188 return makeArrayRef(this->SymbolBodies).slice(1); 189 } 190 191 template <class ELFT> 192 void elf::ObjectFile<ELFT>::parse(DenseSet<CachedHashStringRef> &ComdatGroups) { 193 // Read section and symbol tables. 194 initializeSections(ComdatGroups); 195 initializeSymbols(); 196 } 197 198 // Sections with SHT_GROUP and comdat bits define comdat section groups. 199 // They are identified and deduplicated by group name. This function 200 // returns a group name. 201 template <class ELFT> 202 StringRef 203 elf::ObjectFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> Sections, 204 const Elf_Shdr &Sec) { 205 // Group signatures are stored as symbol names in object files. 206 // sh_info contains a symbol index, so we fetch a symbol and read its name. 207 if (this->Symbols.empty()) 208 this->initSymtab( 209 Sections, 210 check(object::getSection<ELFT>(Sections, Sec.sh_link), toString(this))); 211 212 const Elf_Sym *Sym = check( 213 object::getSymbol<ELFT>(this->Symbols, Sec.sh_info), toString(this)); 214 StringRef Signature = check(Sym->getName(this->StringTable), toString(this)); 215 216 // As a special case, if a symbol is a section symbol and has no name, 217 // we use a section name as a signature. 218 // 219 // Such SHT_GROUP sections are invalid from the perspective of the ELF 220 // standard, but GNU gold 1.14 (the neweset version as of July 2017) or 221 // older produce such sections as outputs for the -r option, so we need 222 // a bug-compatibility. 223 if (Signature.empty() && Sym->getType() == STT_SECTION) 224 return getSectionName(Sec); 225 return Signature; 226 } 227 228 template <class ELFT> 229 ArrayRef<typename elf::ObjectFile<ELFT>::Elf_Word> 230 elf::ObjectFile<ELFT>::getShtGroupEntries(const Elf_Shdr &Sec) { 231 const ELFFile<ELFT> &Obj = this->getObj(); 232 ArrayRef<Elf_Word> Entries = check( 233 Obj.template getSectionContentsAsArray<Elf_Word>(&Sec), toString(this)); 234 if (Entries.empty() || Entries[0] != GRP_COMDAT) 235 fatal(toString(this) + ": unsupported SHT_GROUP format"); 236 return Entries.slice(1); 237 } 238 239 template <class ELFT> 240 bool elf::ObjectFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) { 241 // We don't merge sections if -O0 (default is -O1). This makes sometimes 242 // the linker significantly faster, although the output will be bigger. 243 if (Config->Optimize == 0) 244 return false; 245 246 // Do not merge sections if generating a relocatable object. It makes 247 // the code simpler because we do not need to update relocation addends 248 // to reflect changes introduced by merging. Instead of that we write 249 // such "merge" sections into separate OutputSections and keep SHF_MERGE 250 // / SHF_STRINGS flags and sh_entsize value to be able to perform merging 251 // later during a final linking. 252 if (Config->Relocatable) 253 return false; 254 255 // A mergeable section with size 0 is useless because they don't have 256 // any data to merge. A mergeable string section with size 0 can be 257 // argued as invalid because it doesn't end with a null character. 258 // We'll avoid a mess by handling them as if they were non-mergeable. 259 if (Sec.sh_size == 0) 260 return false; 261 262 // Check for sh_entsize. The ELF spec is not clear about the zero 263 // sh_entsize. It says that "the member [sh_entsize] contains 0 if 264 // the section does not hold a table of fixed-size entries". We know 265 // that Rust 1.13 produces a string mergeable section with a zero 266 // sh_entsize. Here we just accept it rather than being picky about it. 267 uint64_t EntSize = Sec.sh_entsize; 268 if (EntSize == 0) 269 return false; 270 if (Sec.sh_size % EntSize) 271 fatal(toString(this) + 272 ": SHF_MERGE section size must be a multiple of sh_entsize"); 273 274 uint64_t Flags = Sec.sh_flags; 275 if (!(Flags & SHF_MERGE)) 276 return false; 277 if (Flags & SHF_WRITE) 278 fatal(toString(this) + ": writable SHF_MERGE section is not supported"); 279 280 // Don't try to merge if the alignment is larger than the sh_entsize and this 281 // is not SHF_STRINGS. 282 // 283 // Since this is not a SHF_STRINGS, we would need to pad after every entity. 284 // It would be equivalent for the producer of the .o to just set a larger 285 // sh_entsize. 286 if (Flags & SHF_STRINGS) 287 return true; 288 289 return Sec.sh_addralign <= EntSize; 290 } 291 292 template <class ELFT> 293 void elf::ObjectFile<ELFT>::initializeSections( 294 DenseSet<CachedHashStringRef> &ComdatGroups) { 295 const ELFFile<ELFT> &Obj = this->getObj(); 296 297 ArrayRef<Elf_Shdr> ObjSections = 298 check(this->getObj().sections(), toString(this)); 299 uint64_t Size = ObjSections.size(); 300 this->Sections.resize(Size); 301 this->SectionStringTable = 302 check(Obj.getSectionStringTable(ObjSections), toString(this)); 303 304 for (size_t I = 0, E = ObjSections.size(); I < E; I++) { 305 if (this->Sections[I] == &InputSection::Discarded) 306 continue; 307 const Elf_Shdr &Sec = ObjSections[I]; 308 309 // SHF_EXCLUDE'ed sections are discarded by the linker. However, 310 // if -r is given, we'll let the final link discard such sections. 311 // This is compatible with GNU. 312 if ((Sec.sh_flags & SHF_EXCLUDE) && !Config->Relocatable) { 313 this->Sections[I] = &InputSection::Discarded; 314 continue; 315 } 316 317 switch (Sec.sh_type) { 318 case SHT_GROUP: { 319 // De-duplicate section groups by their signatures. 320 StringRef Signature = getShtGroupSignature(ObjSections, Sec); 321 bool IsNew = ComdatGroups.insert(CachedHashStringRef(Signature)).second; 322 this->Sections[I] = &InputSection::Discarded; 323 324 // If it is a new section group, we want to keep group members. 325 // Group leader sections, which contain indices of group members, are 326 // discarded because they are useless beyond this point. The only 327 // exception is the -r option because in order to produce re-linkable 328 // object files, we want to pass through basically everything. 329 if (IsNew) { 330 if (Config->Relocatable) 331 this->Sections[I] = createInputSection(Sec); 332 continue; 333 } 334 335 // Otherwise, discard group members. 336 for (uint32_t SecIndex : getShtGroupEntries(Sec)) { 337 if (SecIndex >= Size) 338 fatal(toString(this) + 339 ": invalid section index in group: " + Twine(SecIndex)); 340 this->Sections[SecIndex] = &InputSection::Discarded; 341 } 342 break; 343 } 344 case SHT_SYMTAB: 345 this->initSymtab(ObjSections, &Sec); 346 break; 347 case SHT_SYMTAB_SHNDX: 348 this->SymtabSHNDX = 349 check(Obj.getSHNDXTable(Sec, ObjSections), toString(this)); 350 break; 351 case SHT_STRTAB: 352 case SHT_NULL: 353 break; 354 default: 355 this->Sections[I] = createInputSection(Sec); 356 } 357 358 // .ARM.exidx sections have a reverse dependency on the InputSection they 359 // have a SHF_LINK_ORDER dependency, this is identified by the sh_link. 360 if (Sec.sh_flags & SHF_LINK_ORDER) { 361 if (Sec.sh_link >= this->Sections.size()) 362 fatal(toString(this) + ": invalid sh_link index: " + 363 Twine(Sec.sh_link)); 364 this->Sections[Sec.sh_link]->DependentSections.push_back( 365 this->Sections[I]); 366 } 367 } 368 } 369 370 template <class ELFT> 371 InputSectionBase *elf::ObjectFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) { 372 uint32_t Idx = Sec.sh_info; 373 if (Idx >= this->Sections.size()) 374 fatal(toString(this) + ": invalid relocated section index: " + Twine(Idx)); 375 InputSectionBase *Target = this->Sections[Idx]; 376 377 // Strictly speaking, a relocation section must be included in the 378 // group of the section it relocates. However, LLVM 3.3 and earlier 379 // would fail to do so, so we gracefully handle that case. 380 if (Target == &InputSection::Discarded) 381 return nullptr; 382 383 if (!Target) 384 fatal(toString(this) + ": unsupported relocation reference"); 385 return Target; 386 } 387 388 // Create a regular InputSection class that has the same contents 389 // as a given section. 390 InputSectionBase *toRegularSection(MergeInputSection *Sec) { 391 auto *Ret = make<InputSection>(Sec->Flags, Sec->Type, Sec->Alignment, 392 Sec->Data, Sec->Name); 393 Ret->File = Sec->File; 394 return Ret; 395 } 396 397 template <class ELFT> 398 InputSectionBase * 399 elf::ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec) { 400 StringRef Name = getSectionName(Sec); 401 402 switch (Sec.sh_type) { 403 case SHT_ARM_ATTRIBUTES: 404 // FIXME: ARM meta-data section. Retain the first attribute section 405 // we see. The eglibc ARM dynamic loaders require the presence of an 406 // attribute section for dlopen to work. 407 // In a full implementation we would merge all attribute sections. 408 if (InX::ARMAttributes == nullptr) { 409 InX::ARMAttributes = make<InputSection>(this, &Sec, Name); 410 return InX::ARMAttributes; 411 } 412 return &InputSection::Discarded; 413 case SHT_RELA: 414 case SHT_REL: { 415 // Find the relocation target section and associate this 416 // section with it. Target can be discarded, for example 417 // if it is a duplicated member of SHT_GROUP section, we 418 // do not create or proccess relocatable sections then. 419 InputSectionBase *Target = getRelocTarget(Sec); 420 if (!Target) 421 return nullptr; 422 423 // This section contains relocation information. 424 // If -r is given, we do not interpret or apply relocation 425 // but just copy relocation sections to output. 426 if (Config->Relocatable) 427 return make<InputSection>(this, &Sec, Name); 428 429 if (Target->FirstRelocation) 430 fatal(toString(this) + 431 ": multiple relocation sections to one section are not supported"); 432 433 // Mergeable sections with relocations are tricky because relocations 434 // need to be taken into account when comparing section contents for 435 // merging. It's not worth supporting such mergeable sections because 436 // they are rare and it'd complicates the internal design (we usually 437 // have to determine if two sections are mergeable early in the link 438 // process much before applying relocations). We simply handle mergeable 439 // sections with relocations as non-mergeable. 440 if (auto *MS = dyn_cast<MergeInputSection>(Target)) { 441 Target = toRegularSection(MS); 442 this->Sections[Sec.sh_info] = Target; 443 } 444 445 size_t NumRelocations; 446 if (Sec.sh_type == SHT_RELA) { 447 ArrayRef<Elf_Rela> Rels = 448 check(this->getObj().relas(&Sec), toString(this)); 449 Target->FirstRelocation = Rels.begin(); 450 NumRelocations = Rels.size(); 451 Target->AreRelocsRela = true; 452 } else { 453 ArrayRef<Elf_Rel> Rels = check(this->getObj().rels(&Sec), toString(this)); 454 Target->FirstRelocation = Rels.begin(); 455 NumRelocations = Rels.size(); 456 Target->AreRelocsRela = false; 457 } 458 assert(isUInt<31>(NumRelocations)); 459 Target->NumRelocations = NumRelocations; 460 461 // Relocation sections processed by the linker are usually removed 462 // from the output, so returning `nullptr` for the normal case. 463 // However, if -emit-relocs is given, we need to leave them in the output. 464 // (Some post link analysis tools need this information.) 465 if (Config->EmitRelocs) { 466 InputSection *RelocSec = make<InputSection>(this, &Sec, Name); 467 // We will not emit relocation section if target was discarded. 468 Target->DependentSections.push_back(RelocSec); 469 return RelocSec; 470 } 471 return nullptr; 472 } 473 } 474 475 // The GNU linker uses .note.GNU-stack section as a marker indicating 476 // that the code in the object file does not expect that the stack is 477 // executable (in terms of NX bit). If all input files have the marker, 478 // the GNU linker adds a PT_GNU_STACK segment to tells the loader to 479 // make the stack non-executable. Most object files have this section as 480 // of 2017. 481 // 482 // But making the stack non-executable is a norm today for security 483 // reasons. Failure to do so may result in a serious security issue. 484 // Therefore, we make LLD always add PT_GNU_STACK unless it is 485 // explicitly told to do otherwise (by -z execstack). Because the stack 486 // executable-ness is controlled solely by command line options, 487 // .note.GNU-stack sections are simply ignored. 488 if (Name == ".note.GNU-stack") 489 return &InputSection::Discarded; 490 491 // Split stacks is a feature to support a discontiguous stack. At least 492 // as of 2017, it seems that the feature is not being used widely. 493 // Only GNU gold supports that. We don't. For the details about that, 494 // see https://gcc.gnu.org/wiki/SplitStacks 495 if (Name == ".note.GNU-split-stack") { 496 error(toString(this) + 497 ": object file compiled with -fsplit-stack is not supported"); 498 return &InputSection::Discarded; 499 } 500 501 if (Config->Strip != StripPolicy::None && Name.startswith(".debug")) 502 return &InputSection::Discarded; 503 504 // If -gdb-index is given, LLD creates .gdb_index section, and that 505 // section serves the same purpose as .debug_gnu_pub{names,types} sections. 506 // If that's the case, we want to eliminate .debug_gnu_pub{names,types} 507 // because they are redundant and can waste large amount of disk space 508 // (for example, they are about 400 MiB in total for a clang debug build.) 509 if (Config->GdbIndex && 510 (Name == ".debug_gnu_pubnames" || Name == ".debug_gnu_pubtypes")) 511 return &InputSection::Discarded; 512 513 // The linkonce feature is a sort of proto-comdat. Some glibc i386 object 514 // files contain definitions of symbol "__x86.get_pc_thunk.bx" in linkonce 515 // sections. Drop those sections to avoid duplicate symbol errors. 516 // FIXME: This is glibc PR20543, we should remove this hack once that has been 517 // fixed for a while. 518 if (Name.startswith(".gnu.linkonce.")) 519 return &InputSection::Discarded; 520 521 // The linker merges EH (exception handling) frames and creates a 522 // .eh_frame_hdr section for runtime. So we handle them with a special 523 // class. For relocatable outputs, they are just passed through. 524 if (Name == ".eh_frame" && !Config->Relocatable) 525 return make<EhInputSection>(this, &Sec, Name); 526 527 if (shouldMerge(Sec)) 528 return make<MergeInputSection>(this, &Sec, Name); 529 return make<InputSection>(this, &Sec, Name); 530 } 531 532 template <class ELFT> 533 StringRef elf::ObjectFile<ELFT>::getSectionName(const Elf_Shdr &Sec) { 534 return check(this->getObj().getSectionName(&Sec, SectionStringTable), 535 toString(this)); 536 } 537 538 template <class ELFT> void elf::ObjectFile<ELFT>::initializeSymbols() { 539 SymbolBodies.reserve(this->Symbols.size()); 540 for (const Elf_Sym &Sym : this->Symbols) 541 SymbolBodies.push_back(createSymbolBody(&Sym)); 542 } 543 544 template <class ELFT> 545 InputSectionBase *elf::ObjectFile<ELFT>::getSection(const Elf_Sym &Sym) const { 546 uint32_t Index = this->getSectionIndex(Sym); 547 if (Index >= this->Sections.size()) 548 fatal(toString(this) + ": invalid section index: " + Twine(Index)); 549 InputSectionBase *S = this->Sections[Index]; 550 551 // We found that GNU assembler 2.17.50 [FreeBSD] 2007-07-03 could 552 // generate broken objects. STT_SECTION/STT_NOTYPE symbols can be 553 // associated with SHT_REL[A]/SHT_SYMTAB/SHT_STRTAB sections. 554 // In this case it is fine for section to be null here as we do not 555 // allocate sections of these types. 556 if (!S) { 557 if (Index == 0 || Sym.getType() == STT_SECTION || 558 Sym.getType() == STT_NOTYPE) 559 return nullptr; 560 fatal(toString(this) + ": invalid section index: " + Twine(Index)); 561 } 562 563 if (S == &InputSection::Discarded) 564 return S; 565 return S->Repl; 566 } 567 568 template <class ELFT> 569 SymbolBody *elf::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) { 570 int Binding = Sym->getBinding(); 571 InputSectionBase *Sec = getSection(*Sym); 572 573 uint8_t StOther = Sym->st_other; 574 uint8_t Type = Sym->getType(); 575 uint64_t Value = Sym->st_value; 576 uint64_t Size = Sym->st_size; 577 578 if (Binding == STB_LOCAL) { 579 if (Sym->getType() == STT_FILE) 580 SourceFile = check(Sym->getName(this->StringTable), toString(this)); 581 582 if (this->StringTable.size() <= Sym->st_name) 583 fatal(toString(this) + ": invalid symbol name offset"); 584 585 StringRefZ Name = this->StringTable.data() + Sym->st_name; 586 if (Sym->st_shndx == SHN_UNDEF) 587 return make<Undefined>(Name, /*IsLocal=*/true, StOther, Type, this); 588 589 return make<DefinedRegular>(Name, /*IsLocal=*/true, StOther, Type, Value, 590 Size, Sec, this); 591 } 592 593 StringRef Name = check(Sym->getName(this->StringTable), toString(this)); 594 595 switch (Sym->st_shndx) { 596 case SHN_UNDEF: 597 return elf::Symtab<ELFT>::X 598 ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type, 599 /*CanOmitFromDynSym=*/false, this) 600 ->body(); 601 case SHN_COMMON: 602 if (Value == 0 || Value >= UINT32_MAX) 603 fatal(toString(this) + ": common symbol '" + Name + 604 "' has invalid alignment: " + Twine(Value)); 605 return elf::Symtab<ELFT>::X 606 ->addCommon(Name, Size, Value, Binding, StOther, Type, this) 607 ->body(); 608 } 609 610 switch (Binding) { 611 default: 612 fatal(toString(this) + ": unexpected binding: " + Twine(Binding)); 613 case STB_GLOBAL: 614 case STB_WEAK: 615 case STB_GNU_UNIQUE: 616 if (Sec == &InputSection::Discarded) 617 return elf::Symtab<ELFT>::X 618 ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type, 619 /*CanOmitFromDynSym=*/false, this) 620 ->body(); 621 return elf::Symtab<ELFT>::X 622 ->addRegular(Name, StOther, Type, Value, Size, Binding, Sec, this) 623 ->body(); 624 } 625 } 626 627 ArchiveFile::ArchiveFile(std::unique_ptr<Archive> &&File) 628 : InputFile(ArchiveKind, File->getMemoryBufferRef()), 629 File(std::move(File)) {} 630 631 template <class ELFT> void ArchiveFile::parse() { 632 Symbols.reserve(File->getNumberOfSymbols()); 633 for (const Archive::Symbol &Sym : File->symbols()) 634 Symbols.push_back(Symtab<ELFT>::X->addLazyArchive(this, Sym)); 635 } 636 637 // Returns a buffer pointing to a member file containing a given symbol. 638 std::pair<MemoryBufferRef, uint64_t> 639 ArchiveFile::getMember(const Archive::Symbol *Sym) { 640 Archive::Child C = 641 check(Sym->getMember(), toString(this) + 642 ": could not get the member for symbol " + 643 Sym->getName()); 644 645 if (!Seen.insert(C.getChildOffset()).second) 646 return {MemoryBufferRef(), 0}; 647 648 MemoryBufferRef Ret = 649 check(C.getMemoryBufferRef(), 650 toString(this) + 651 ": could not get the buffer for the member defining symbol " + 652 Sym->getName()); 653 654 if (C.getParent()->isThin() && Tar) 655 Tar->append(relativeToRoot(check(C.getFullName(), toString(this))), 656 Ret.getBuffer()); 657 if (C.getParent()->isThin()) 658 return {Ret, 0}; 659 return {Ret, C.getChildOffset()}; 660 } 661 662 template <class ELFT> 663 SharedFile<ELFT>::SharedFile(MemoryBufferRef M, StringRef DefaultSoName) 664 : ELFFileBase<ELFT>(Base::SharedKind, M), SoName(DefaultSoName), 665 AsNeeded(Config->AsNeeded) {} 666 667 template <class ELFT> 668 const typename ELFT::Shdr * 669 SharedFile<ELFT>::getSection(const Elf_Sym &Sym) const { 670 return check( 671 this->getObj().getSection(&Sym, this->Symbols, this->SymtabSHNDX), 672 toString(this)); 673 } 674 675 // Partially parse the shared object file so that we can call 676 // getSoName on this object. 677 template <class ELFT> void SharedFile<ELFT>::parseSoName() { 678 const Elf_Shdr *DynamicSec = nullptr; 679 const ELFFile<ELFT> Obj = this->getObj(); 680 ArrayRef<Elf_Shdr> Sections = check(Obj.sections(), toString(this)); 681 682 // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d. 683 for (const Elf_Shdr &Sec : Sections) { 684 switch (Sec.sh_type) { 685 default: 686 continue; 687 case SHT_DYNSYM: 688 this->initSymtab(Sections, &Sec); 689 break; 690 case SHT_DYNAMIC: 691 DynamicSec = &Sec; 692 break; 693 case SHT_SYMTAB_SHNDX: 694 this->SymtabSHNDX = 695 check(Obj.getSHNDXTable(Sec, Sections), toString(this)); 696 break; 697 case SHT_GNU_versym: 698 this->VersymSec = &Sec; 699 break; 700 case SHT_GNU_verdef: 701 this->VerdefSec = &Sec; 702 break; 703 } 704 } 705 706 if (this->VersymSec && this->Symbols.empty()) 707 error("SHT_GNU_versym should be associated with symbol table"); 708 709 // Search for a DT_SONAME tag to initialize this->SoName. 710 if (!DynamicSec) 711 return; 712 ArrayRef<Elf_Dyn> Arr = 713 check(Obj.template getSectionContentsAsArray<Elf_Dyn>(DynamicSec), 714 toString(this)); 715 for (const Elf_Dyn &Dyn : Arr) { 716 if (Dyn.d_tag == DT_SONAME) { 717 uint64_t Val = Dyn.getVal(); 718 if (Val >= this->StringTable.size()) 719 fatal(toString(this) + ": invalid DT_SONAME entry"); 720 SoName = this->StringTable.data() + Val; 721 return; 722 } 723 } 724 } 725 726 // Parse the version definitions in the object file if present. Returns a vector 727 // whose nth element contains a pointer to the Elf_Verdef for version identifier 728 // n. Version identifiers that are not definitions map to nullptr. The array 729 // always has at least length 1. 730 template <class ELFT> 731 std::vector<const typename ELFT::Verdef *> 732 SharedFile<ELFT>::parseVerdefs(const Elf_Versym *&Versym) { 733 std::vector<const Elf_Verdef *> Verdefs(1); 734 // We only need to process symbol versions for this DSO if it has both a 735 // versym and a verdef section, which indicates that the DSO contains symbol 736 // version definitions. 737 if (!VersymSec || !VerdefSec) 738 return Verdefs; 739 740 // The location of the first global versym entry. 741 const char *Base = this->MB.getBuffer().data(); 742 Versym = reinterpret_cast<const Elf_Versym *>(Base + VersymSec->sh_offset) + 743 this->FirstNonLocal; 744 745 // We cannot determine the largest verdef identifier without inspecting 746 // every Elf_Verdef, but both bfd and gold assign verdef identifiers 747 // sequentially starting from 1, so we predict that the largest identifier 748 // will be VerdefCount. 749 unsigned VerdefCount = VerdefSec->sh_info; 750 Verdefs.resize(VerdefCount + 1); 751 752 // Build the Verdefs array by following the chain of Elf_Verdef objects 753 // from the start of the .gnu.version_d section. 754 const char *Verdef = Base + VerdefSec->sh_offset; 755 for (unsigned I = 0; I != VerdefCount; ++I) { 756 auto *CurVerdef = reinterpret_cast<const Elf_Verdef *>(Verdef); 757 Verdef += CurVerdef->vd_next; 758 unsigned VerdefIndex = CurVerdef->vd_ndx; 759 if (Verdefs.size() <= VerdefIndex) 760 Verdefs.resize(VerdefIndex + 1); 761 Verdefs[VerdefIndex] = CurVerdef; 762 } 763 764 return Verdefs; 765 } 766 767 // Fully parse the shared object file. This must be called after parseSoName(). 768 template <class ELFT> void SharedFile<ELFT>::parseRest() { 769 // Create mapping from version identifiers to Elf_Verdef entries. 770 const Elf_Versym *Versym = nullptr; 771 std::vector<const Elf_Verdef *> Verdefs = parseVerdefs(Versym); 772 773 Elf_Sym_Range Syms = this->getGlobalSymbols(); 774 for (const Elf_Sym &Sym : Syms) { 775 unsigned VersymIndex = 0; 776 if (Versym) { 777 VersymIndex = Versym->vs_index; 778 ++Versym; 779 } 780 bool Hidden = VersymIndex & VERSYM_HIDDEN; 781 VersymIndex = VersymIndex & ~VERSYM_HIDDEN; 782 783 StringRef Name = check(Sym.getName(this->StringTable), toString(this)); 784 if (Sym.isUndefined()) { 785 Undefs.push_back(Name); 786 continue; 787 } 788 789 // Ignore local symbols. 790 if (Versym && VersymIndex == VER_NDX_LOCAL) 791 continue; 792 793 const Elf_Verdef *V = 794 VersymIndex == VER_NDX_GLOBAL ? nullptr : Verdefs[VersymIndex]; 795 796 if (!Hidden) 797 elf::Symtab<ELFT>::X->addShared(this, Name, Sym, V); 798 799 // Also add the symbol with the versioned name to handle undefined symbols 800 // with explicit versions. 801 if (V) { 802 StringRef VerName = this->StringTable.data() + V->getAux()->vda_name; 803 Name = Saver.save(Name + "@" + VerName); 804 elf::Symtab<ELFT>::X->addShared(this, Name, Sym, V); 805 } 806 } 807 } 808 809 static ELFKind getBitcodeELFKind(const Triple &T) { 810 if (T.isLittleEndian()) 811 return T.isArch64Bit() ? ELF64LEKind : ELF32LEKind; 812 return T.isArch64Bit() ? ELF64BEKind : ELF32BEKind; 813 } 814 815 static uint8_t getBitcodeMachineKind(StringRef Path, const Triple &T) { 816 switch (T.getArch()) { 817 case Triple::aarch64: 818 return EM_AARCH64; 819 case Triple::arm: 820 case Triple::thumb: 821 return EM_ARM; 822 case Triple::avr: 823 return EM_AVR; 824 case Triple::mips: 825 case Triple::mipsel: 826 case Triple::mips64: 827 case Triple::mips64el: 828 return EM_MIPS; 829 case Triple::ppc: 830 return EM_PPC; 831 case Triple::ppc64: 832 return EM_PPC64; 833 case Triple::x86: 834 return T.isOSIAMCU() ? EM_IAMCU : EM_386; 835 case Triple::x86_64: 836 return EM_X86_64; 837 default: 838 fatal(Path + ": could not infer e_machine from bitcode target triple " + 839 T.str()); 840 } 841 } 842 843 BitcodeFile::BitcodeFile(MemoryBufferRef MB, StringRef ArchiveName, 844 uint64_t OffsetInArchive) 845 : InputFile(BitcodeKind, MB) { 846 this->ArchiveName = ArchiveName; 847 848 // Here we pass a new MemoryBufferRef which is identified by ArchiveName 849 // (the fully resolved path of the archive) + member name + offset of the 850 // member in the archive. 851 // ThinLTO uses the MemoryBufferRef identifier to access its internal 852 // data structures and if two archives define two members with the same name, 853 // this causes a collision which result in only one of the objects being 854 // taken into consideration at LTO time (which very likely causes undefined 855 // symbols later in the link stage). 856 MemoryBufferRef MBRef(MB.getBuffer(), 857 Saver.save(ArchiveName + MB.getBufferIdentifier() + 858 utostr(OffsetInArchive))); 859 Obj = check(lto::InputFile::create(MBRef), toString(this)); 860 861 Triple T(Obj->getTargetTriple()); 862 EKind = getBitcodeELFKind(T); 863 EMachine = getBitcodeMachineKind(MB.getBufferIdentifier(), T); 864 } 865 866 static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) { 867 switch (GvVisibility) { 868 case GlobalValue::DefaultVisibility: 869 return STV_DEFAULT; 870 case GlobalValue::HiddenVisibility: 871 return STV_HIDDEN; 872 case GlobalValue::ProtectedVisibility: 873 return STV_PROTECTED; 874 } 875 llvm_unreachable("unknown visibility"); 876 } 877 878 template <class ELFT> 879 static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats, 880 const lto::InputFile::Symbol &ObjSym, 881 BitcodeFile *F) { 882 StringRef NameRef = Saver.save(ObjSym.getName()); 883 uint32_t Binding = ObjSym.isWeak() ? STB_WEAK : STB_GLOBAL; 884 885 uint8_t Type = ObjSym.isTLS() ? STT_TLS : STT_NOTYPE; 886 uint8_t Visibility = mapVisibility(ObjSym.getVisibility()); 887 bool CanOmitFromDynSym = ObjSym.canBeOmittedFromSymbolTable(); 888 889 int C = ObjSym.getComdatIndex(); 890 if (C != -1 && !KeptComdats[C]) 891 return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, 892 Visibility, Type, CanOmitFromDynSym, 893 F); 894 895 if (ObjSym.isUndefined()) 896 return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, 897 Visibility, Type, CanOmitFromDynSym, 898 F); 899 900 if (ObjSym.isCommon()) 901 return Symtab<ELFT>::X->addCommon(NameRef, ObjSym.getCommonSize(), 902 ObjSym.getCommonAlignment(), Binding, 903 Visibility, STT_OBJECT, F); 904 905 return Symtab<ELFT>::X->addBitcode(NameRef, Binding, Visibility, Type, 906 CanOmitFromDynSym, F); 907 } 908 909 template <class ELFT> 910 void BitcodeFile::parse(DenseSet<CachedHashStringRef> &ComdatGroups) { 911 std::vector<bool> KeptComdats; 912 for (StringRef S : Obj->getComdatTable()) 913 KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(S)).second); 914 915 for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) 916 Symbols.push_back(createBitcodeSymbol<ELFT>(KeptComdats, ObjSym, this)); 917 } 918 919 static ELFKind getELFKind(MemoryBufferRef MB) { 920 unsigned char Size; 921 unsigned char Endian; 922 std::tie(Size, Endian) = getElfArchType(MB.getBuffer()); 923 924 if (Endian != ELFDATA2LSB && Endian != ELFDATA2MSB) 925 fatal(MB.getBufferIdentifier() + ": invalid data encoding"); 926 if (Size != ELFCLASS32 && Size != ELFCLASS64) 927 fatal(MB.getBufferIdentifier() + ": invalid file class"); 928 929 size_t BufSize = MB.getBuffer().size(); 930 if ((Size == ELFCLASS32 && BufSize < sizeof(Elf32_Ehdr)) || 931 (Size == ELFCLASS64 && BufSize < sizeof(Elf64_Ehdr))) 932 fatal(MB.getBufferIdentifier() + ": file is too short"); 933 934 if (Size == ELFCLASS32) 935 return (Endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind; 936 return (Endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind; 937 } 938 939 template <class ELFT> void BinaryFile::parse() { 940 ArrayRef<uint8_t> Data = toArrayRef(MB.getBuffer()); 941 auto *Section = 942 make<InputSection>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, 8, Data, ".data"); 943 Sections.push_back(Section); 944 945 // For each input file foo that is embedded to a result as a binary 946 // blob, we define _binary_foo_{start,end,size} symbols, so that 947 // user programs can access blobs by name. Non-alphanumeric 948 // characters in a filename are replaced with underscore. 949 std::string S = "_binary_" + MB.getBufferIdentifier().str(); 950 for (size_t I = 0; I < S.size(); ++I) 951 if (!isalnum(S[I])) 952 S[I] = '_'; 953 954 elf::Symtab<ELFT>::X->addRegular(Saver.save(S + "_start"), STV_DEFAULT, 955 STT_OBJECT, 0, 0, STB_GLOBAL, Section, 956 nullptr); 957 elf::Symtab<ELFT>::X->addRegular(Saver.save(S + "_end"), STV_DEFAULT, 958 STT_OBJECT, Data.size(), 0, STB_GLOBAL, 959 Section, nullptr); 960 elf::Symtab<ELFT>::X->addRegular(Saver.save(S + "_size"), STV_DEFAULT, 961 STT_OBJECT, Data.size(), 0, STB_GLOBAL, 962 nullptr, nullptr); 963 } 964 965 static bool isBitcode(MemoryBufferRef MB) { 966 using namespace sys::fs; 967 return identify_magic(MB.getBuffer()) == file_magic::bitcode; 968 } 969 970 InputFile *elf::createObjectFile(MemoryBufferRef MB, StringRef ArchiveName, 971 uint64_t OffsetInArchive) { 972 if (isBitcode(MB)) 973 return make<BitcodeFile>(MB, ArchiveName, OffsetInArchive); 974 975 switch (getELFKind(MB)) { 976 case ELF32LEKind: 977 return make<ObjectFile<ELF32LE>>(MB, ArchiveName); 978 case ELF32BEKind: 979 return make<ObjectFile<ELF32BE>>(MB, ArchiveName); 980 case ELF64LEKind: 981 return make<ObjectFile<ELF64LE>>(MB, ArchiveName); 982 case ELF64BEKind: 983 return make<ObjectFile<ELF64BE>>(MB, ArchiveName); 984 default: 985 llvm_unreachable("getELFKind"); 986 } 987 } 988 989 InputFile *elf::createSharedFile(MemoryBufferRef MB, StringRef DefaultSoName) { 990 switch (getELFKind(MB)) { 991 case ELF32LEKind: 992 return make<SharedFile<ELF32LE>>(MB, DefaultSoName); 993 case ELF32BEKind: 994 return make<SharedFile<ELF32BE>>(MB, DefaultSoName); 995 case ELF64LEKind: 996 return make<SharedFile<ELF64LE>>(MB, DefaultSoName); 997 case ELF64BEKind: 998 return make<SharedFile<ELF64BE>>(MB, DefaultSoName); 999 default: 1000 llvm_unreachable("getELFKind"); 1001 } 1002 } 1003 1004 MemoryBufferRef LazyObjectFile::getBuffer() { 1005 if (Seen) 1006 return MemoryBufferRef(); 1007 Seen = true; 1008 return MB; 1009 } 1010 1011 InputFile *LazyObjectFile::fetch() { 1012 MemoryBufferRef MBRef = getBuffer(); 1013 if (MBRef.getBuffer().empty()) 1014 return nullptr; 1015 return createObjectFile(MBRef, ArchiveName, OffsetInArchive); 1016 } 1017 1018 template <class ELFT> void LazyObjectFile::parse() { 1019 for (StringRef Sym : getSymbols()) 1020 Symtab<ELFT>::X->addLazyObject(Sym, *this); 1021 } 1022 1023 template <class ELFT> std::vector<StringRef> LazyObjectFile::getElfSymbols() { 1024 typedef typename ELFT::Shdr Elf_Shdr; 1025 typedef typename ELFT::Sym Elf_Sym; 1026 typedef typename ELFT::SymRange Elf_Sym_Range; 1027 1028 const ELFFile<ELFT> Obj(this->MB.getBuffer()); 1029 ArrayRef<Elf_Shdr> Sections = check(Obj.sections(), toString(this)); 1030 for (const Elf_Shdr &Sec : Sections) { 1031 if (Sec.sh_type != SHT_SYMTAB) 1032 continue; 1033 1034 Elf_Sym_Range Syms = check(Obj.symbols(&Sec), toString(this)); 1035 uint32_t FirstNonLocal = Sec.sh_info; 1036 StringRef StringTable = 1037 check(Obj.getStringTableForSymtab(Sec, Sections), toString(this)); 1038 std::vector<StringRef> V; 1039 1040 for (const Elf_Sym &Sym : Syms.slice(FirstNonLocal)) 1041 if (Sym.st_shndx != SHN_UNDEF) 1042 V.push_back(check(Sym.getName(StringTable), toString(this))); 1043 return V; 1044 } 1045 return {}; 1046 } 1047 1048 std::vector<StringRef> LazyObjectFile::getBitcodeSymbols() { 1049 std::unique_ptr<lto::InputFile> Obj = 1050 check(lto::InputFile::create(this->MB), toString(this)); 1051 std::vector<StringRef> V; 1052 for (const lto::InputFile::Symbol &Sym : Obj->symbols()) 1053 if (!Sym.isUndefined()) 1054 V.push_back(Saver.save(Sym.getName())); 1055 return V; 1056 } 1057 1058 // Returns a vector of globally-visible defined symbol names. 1059 std::vector<StringRef> LazyObjectFile::getSymbols() { 1060 if (isBitcode(this->MB)) 1061 return getBitcodeSymbols(); 1062 1063 switch (getELFKind(this->MB)) { 1064 case ELF32LEKind: 1065 return getElfSymbols<ELF32LE>(); 1066 case ELF32BEKind: 1067 return getElfSymbols<ELF32BE>(); 1068 case ELF64LEKind: 1069 return getElfSymbols<ELF64LE>(); 1070 case ELF64BEKind: 1071 return getElfSymbols<ELF64BE>(); 1072 default: 1073 llvm_unreachable("getELFKind"); 1074 } 1075 } 1076 1077 template void ArchiveFile::parse<ELF32LE>(); 1078 template void ArchiveFile::parse<ELF32BE>(); 1079 template void ArchiveFile::parse<ELF64LE>(); 1080 template void ArchiveFile::parse<ELF64BE>(); 1081 1082 template void BitcodeFile::parse<ELF32LE>(DenseSet<CachedHashStringRef> &); 1083 template void BitcodeFile::parse<ELF32BE>(DenseSet<CachedHashStringRef> &); 1084 template void BitcodeFile::parse<ELF64LE>(DenseSet<CachedHashStringRef> &); 1085 template void BitcodeFile::parse<ELF64BE>(DenseSet<CachedHashStringRef> &); 1086 1087 template void LazyObjectFile::parse<ELF32LE>(); 1088 template void LazyObjectFile::parse<ELF32BE>(); 1089 template void LazyObjectFile::parse<ELF64LE>(); 1090 template void LazyObjectFile::parse<ELF64BE>(); 1091 1092 template class elf::ELFFileBase<ELF32LE>; 1093 template class elf::ELFFileBase<ELF32BE>; 1094 template class elf::ELFFileBase<ELF64LE>; 1095 template class elf::ELFFileBase<ELF64BE>; 1096 1097 template class elf::ObjectFile<ELF32LE>; 1098 template class elf::ObjectFile<ELF32BE>; 1099 template class elf::ObjectFile<ELF64LE>; 1100 template class elf::ObjectFile<ELF64BE>; 1101 1102 template class elf::SharedFile<ELF32LE>; 1103 template class elf::SharedFile<ELF32BE>; 1104 template class elf::SharedFile<ELF64LE>; 1105 template class elf::SharedFile<ELF64BE>; 1106 1107 template void BinaryFile::parse<ELF32LE>(); 1108 template void BinaryFile::parse<ELF32BE>(); 1109 template void BinaryFile::parse<ELF64LE>(); 1110 template void BinaryFile::parse<ELF64BE>(); 1111