1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InputFiles.h"
10 #include "Driver.h"
11 #include "InputSection.h"
12 #include "LinkerScript.h"
13 #include "SymbolTable.h"
14 #include "Symbols.h"
15 #include "SyntheticSections.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/CodeGen/Analysis.h"
20 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/IR/Module.h"
23 #include "llvm/LTO/LTO.h"
24 #include "llvm/MC/StringTableBuilder.h"
25 #include "llvm/Object/ELFObjectFile.h"
26 #include "llvm/Support/ARMAttributeParser.h"
27 #include "llvm/Support/ARMBuildAttributes.h"
28 #include "llvm/Support/Path.h"
29 #include "llvm/Support/TarWriter.h"
30 #include "llvm/Support/raw_ostream.h"
31 
32 using namespace llvm;
33 using namespace llvm::ELF;
34 using namespace llvm::object;
35 using namespace llvm::sys;
36 using namespace llvm::sys::fs;
37 
38 using namespace lld;
39 using namespace lld::elf;
40 
41 bool InputFile::IsInGroup;
42 uint32_t InputFile::NextGroupId;
43 std::vector<BinaryFile *> elf::BinaryFiles;
44 std::vector<BitcodeFile *> elf::BitcodeFiles;
45 std::vector<LazyObjFile *> elf::LazyObjFiles;
46 std::vector<InputFile *> elf::ObjectFiles;
47 std::vector<SharedFile *> elf::SharedFiles;
48 
49 std::unique_ptr<TarWriter> elf::Tar;
50 
51 static ELFKind getELFKind(MemoryBufferRef MB, StringRef ArchiveName) {
52   unsigned char Size;
53   unsigned char Endian;
54   std::tie(Size, Endian) = getElfArchType(MB.getBuffer());
55 
56   auto Fatal = [&](StringRef Msg) {
57     StringRef Filename = MB.getBufferIdentifier();
58     if (ArchiveName.empty())
59       fatal(Filename + ": " + Msg);
60     else
61       fatal(ArchiveName + "(" + Filename + "): " + Msg);
62   };
63 
64   if (!MB.getBuffer().startswith(ElfMagic))
65     Fatal("not an ELF file");
66   if (Endian != ELFDATA2LSB && Endian != ELFDATA2MSB)
67     Fatal("corrupted ELF file: invalid data encoding");
68   if (Size != ELFCLASS32 && Size != ELFCLASS64)
69     Fatal("corrupted ELF file: invalid file class");
70 
71   size_t BufSize = MB.getBuffer().size();
72   if ((Size == ELFCLASS32 && BufSize < sizeof(Elf32_Ehdr)) ||
73       (Size == ELFCLASS64 && BufSize < sizeof(Elf64_Ehdr)))
74     Fatal("corrupted ELF file: file is too short");
75 
76   if (Size == ELFCLASS32)
77     return (Endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind;
78   return (Endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind;
79 }
80 
81 InputFile::InputFile(Kind K, MemoryBufferRef M)
82     : MB(M), GroupId(NextGroupId), FileKind(K) {
83   // All files within the same --{start,end}-group get the same group ID.
84   // Otherwise, a new file will get a new group ID.
85   if (!IsInGroup)
86     ++NextGroupId;
87 }
88 
89 Optional<MemoryBufferRef> elf::readFile(StringRef Path) {
90   // The --chroot option changes our virtual root directory.
91   // This is useful when you are dealing with files created by --reproduce.
92   if (!Config->Chroot.empty() && Path.startswith("/"))
93     Path = Saver.save(Config->Chroot + Path);
94 
95   log(Path);
96 
97   auto MBOrErr = MemoryBuffer::getFile(Path, -1, false);
98   if (auto EC = MBOrErr.getError()) {
99     error("cannot open " + Path + ": " + EC.message());
100     return None;
101   }
102 
103   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
104   MemoryBufferRef MBRef = MB->getMemBufferRef();
105   make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership
106 
107   if (Tar)
108     Tar->append(relativeToRoot(Path), MBRef.getBuffer());
109   return MBRef;
110 }
111 
112 // All input object files must be for the same architecture
113 // (e.g. it does not make sense to link x86 object files with
114 // MIPS object files.) This function checks for that error.
115 static bool isCompatible(InputFile *File) {
116   if (!File->isElf() && !isa<BitcodeFile>(File))
117     return true;
118 
119   if (File->EKind == Config->EKind && File->EMachine == Config->EMachine) {
120     if (Config->EMachine != EM_MIPS)
121       return true;
122     if (isMipsN32Abi(File) == Config->MipsN32Abi)
123       return true;
124   }
125 
126   if (!Config->Emulation.empty()) {
127     error(toString(File) + " is incompatible with " + Config->Emulation);
128   } else {
129     InputFile *Existing;
130     if (!ObjectFiles.empty())
131       Existing = ObjectFiles[0];
132     else if (!SharedFiles.empty())
133       Existing = SharedFiles[0];
134     else
135       Existing = BitcodeFiles[0];
136 
137     error(toString(File) + " is incompatible with " + toString(Existing));
138   }
139 
140   return false;
141 }
142 
143 template <class ELFT> static void doParseFile(InputFile *File) {
144   if (!isCompatible(File))
145     return;
146 
147   // Binary file
148   if (auto *F = dyn_cast<BinaryFile>(File)) {
149     BinaryFiles.push_back(F);
150     F->parse();
151     return;
152   }
153 
154   // .a file
155   if (auto *F = dyn_cast<ArchiveFile>(File)) {
156     F->parse();
157     return;
158   }
159 
160   // Lazy object file
161   if (auto *F = dyn_cast<LazyObjFile>(File)) {
162     LazyObjFiles.push_back(F);
163     F->parse<ELFT>();
164     return;
165   }
166 
167   if (Config->Trace)
168     message(toString(File));
169 
170   // .so file
171   if (auto *F = dyn_cast<SharedFile>(File)) {
172     F->parse<ELFT>();
173     return;
174   }
175 
176   // LLVM bitcode file
177   if (auto *F = dyn_cast<BitcodeFile>(File)) {
178     BitcodeFiles.push_back(F);
179     F->parse<ELFT>(Symtab->ComdatGroups);
180     return;
181   }
182 
183   // Regular object file
184   ObjectFiles.push_back(File);
185   cast<ObjFile<ELFT>>(File)->parse(Symtab->ComdatGroups);
186 }
187 
188 // Add symbols in File to the symbol table.
189 void elf::parseFile(InputFile *File) {
190   switch (Config->EKind) {
191   case ELF32LEKind:
192     doParseFile<ELF32LE>(File);
193     return;
194   case ELF32BEKind:
195     doParseFile<ELF32BE>(File);
196     return;
197   case ELF64LEKind:
198     doParseFile<ELF64LE>(File);
199     return;
200   case ELF64BEKind:
201     doParseFile<ELF64BE>(File);
202     return;
203   default:
204     llvm_unreachable("unknown ELFT");
205   }
206 }
207 
208 // Concatenates arguments to construct a string representing an error location.
209 static std::string createFileLineMsg(StringRef Path, unsigned Line) {
210   std::string Filename = path::filename(Path);
211   std::string Lineno = ":" + std::to_string(Line);
212   if (Filename == Path)
213     return Filename + Lineno;
214   return Filename + Lineno + " (" + Path.str() + Lineno + ")";
215 }
216 
217 template <class ELFT>
218 static std::string getSrcMsgAux(ObjFile<ELFT> &File, const Symbol &Sym,
219                                 InputSectionBase &Sec, uint64_t Offset) {
220   // In DWARF, functions and variables are stored to different places.
221   // First, lookup a function for a given offset.
222   if (Optional<DILineInfo> Info = File.getDILineInfo(&Sec, Offset))
223     return createFileLineMsg(Info->FileName, Info->Line);
224 
225   // If it failed, lookup again as a variable.
226   if (Optional<std::pair<std::string, unsigned>> FileLine =
227           File.getVariableLoc(Sym.getName()))
228     return createFileLineMsg(FileLine->first, FileLine->second);
229 
230   // File.SourceFile contains STT_FILE symbol, and that is a last resort.
231   return File.SourceFile;
232 }
233 
234 std::string InputFile::getSrcMsg(const Symbol &Sym, InputSectionBase &Sec,
235                                  uint64_t Offset) {
236   if (kind() != ObjKind)
237     return "";
238   switch (Config->EKind) {
239   default:
240     llvm_unreachable("Invalid kind");
241   case ELF32LEKind:
242     return getSrcMsgAux(cast<ObjFile<ELF32LE>>(*this), Sym, Sec, Offset);
243   case ELF32BEKind:
244     return getSrcMsgAux(cast<ObjFile<ELF32BE>>(*this), Sym, Sec, Offset);
245   case ELF64LEKind:
246     return getSrcMsgAux(cast<ObjFile<ELF64LE>>(*this), Sym, Sec, Offset);
247   case ELF64BEKind:
248     return getSrcMsgAux(cast<ObjFile<ELF64BE>>(*this), Sym, Sec, Offset);
249   }
250 }
251 
252 template <class ELFT> void ObjFile<ELFT>::initializeDwarf() {
253   Dwarf = llvm::make_unique<DWARFContext>(make_unique<LLDDwarfObj<ELFT>>(this));
254   for (std::unique_ptr<DWARFUnit> &CU : Dwarf->compile_units()) {
255     auto Report = [](Error Err) {
256       handleAllErrors(std::move(Err),
257                       [](ErrorInfoBase &Info) { warn(Info.message()); });
258     };
259     Expected<const DWARFDebugLine::LineTable *> ExpectedLT =
260         Dwarf->getLineTableForUnit(CU.get(), Report);
261     const DWARFDebugLine::LineTable *LT = nullptr;
262     if (ExpectedLT)
263       LT = *ExpectedLT;
264     else
265       Report(ExpectedLT.takeError());
266     if (!LT)
267       continue;
268     LineTables.push_back(LT);
269 
270     // Loop over variable records and insert them to VariableLoc.
271     for (const auto &Entry : CU->dies()) {
272       DWARFDie Die(CU.get(), &Entry);
273       // Skip all tags that are not variables.
274       if (Die.getTag() != dwarf::DW_TAG_variable)
275         continue;
276 
277       // Skip if a local variable because we don't need them for generating
278       // error messages. In general, only non-local symbols can fail to be
279       // linked.
280       if (!dwarf::toUnsigned(Die.find(dwarf::DW_AT_external), 0))
281         continue;
282 
283       // Get the source filename index for the variable.
284       unsigned File = dwarf::toUnsigned(Die.find(dwarf::DW_AT_decl_file), 0);
285       if (!LT->hasFileAtIndex(File))
286         continue;
287 
288       // Get the line number on which the variable is declared.
289       unsigned Line = dwarf::toUnsigned(Die.find(dwarf::DW_AT_decl_line), 0);
290 
291       // Here we want to take the variable name to add it into VariableLoc.
292       // Variable can have regular and linkage name associated. At first, we try
293       // to get linkage name as it can be different, for example when we have
294       // two variables in different namespaces of the same object. Use common
295       // name otherwise, but handle the case when it also absent in case if the
296       // input object file lacks some debug info.
297       StringRef Name =
298           dwarf::toString(Die.find(dwarf::DW_AT_linkage_name),
299                           dwarf::toString(Die.find(dwarf::DW_AT_name), ""));
300       if (!Name.empty())
301         VariableLoc.insert({Name, {LT, File, Line}});
302     }
303   }
304 }
305 
306 // Returns the pair of file name and line number describing location of data
307 // object (variable, array, etc) definition.
308 template <class ELFT>
309 Optional<std::pair<std::string, unsigned>>
310 ObjFile<ELFT>::getVariableLoc(StringRef Name) {
311   llvm::call_once(InitDwarfLine, [this]() { initializeDwarf(); });
312 
313   // Return if we have no debug information about data object.
314   auto It = VariableLoc.find(Name);
315   if (It == VariableLoc.end())
316     return None;
317 
318   // Take file name string from line table.
319   std::string FileName;
320   if (!It->second.LT->getFileNameByIndex(
321           It->second.File, nullptr,
322           DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FileName))
323     return None;
324 
325   return std::make_pair(FileName, It->second.Line);
326 }
327 
328 // Returns source line information for a given offset
329 // using DWARF debug info.
330 template <class ELFT>
331 Optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *S,
332                                                   uint64_t Offset) {
333   llvm::call_once(InitDwarfLine, [this]() { initializeDwarf(); });
334 
335   // Detect SectionIndex for specified section.
336   uint64_t SectionIndex = object::SectionedAddress::UndefSection;
337   ArrayRef<InputSectionBase *> Sections = S->File->getSections();
338   for (uint64_t CurIndex = 0; CurIndex < Sections.size(); ++CurIndex) {
339     if (S == Sections[CurIndex]) {
340       SectionIndex = CurIndex;
341       break;
342     }
343   }
344 
345   // Use fake address calcuated by adding section file offset and offset in
346   // section. See comments for ObjectInfo class.
347   DILineInfo Info;
348   for (const llvm::DWARFDebugLine::LineTable *LT : LineTables) {
349     if (LT->getFileLineInfoForAddress(
350             {S->getOffsetInFile() + Offset, SectionIndex}, nullptr,
351             DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, Info))
352       return Info;
353   }
354   return None;
355 }
356 
357 // Returns "<internal>", "foo.a(bar.o)" or "baz.o".
358 std::string lld::toString(const InputFile *F) {
359   if (!F)
360     return "<internal>";
361 
362   if (F->ToStringCache.empty()) {
363     if (F->ArchiveName.empty())
364       F->ToStringCache = F->getName();
365     else
366       F->ToStringCache = (F->ArchiveName + "(" + F->getName() + ")").str();
367   }
368   return F->ToStringCache;
369 }
370 
371 ELFFileBase::ELFFileBase(Kind K, MemoryBufferRef MB) : InputFile(K, MB) {}
372 
373 template <class ELFT> void ELFFileBase::parseHeader() {
374   if (ELFT::TargetEndianness == support::little)
375     EKind = ELFT::Is64Bits ? ELF64LEKind : ELF32LEKind;
376   else
377     EKind = ELFT::Is64Bits ? ELF64BEKind : ELF32BEKind;
378 
379   EMachine = getObj<ELFT>().getHeader()->e_machine;
380   OSABI = getObj<ELFT>().getHeader()->e_ident[llvm::ELF::EI_OSABI];
381   ABIVersion = getObj<ELFT>().getHeader()->e_ident[llvm::ELF::EI_ABIVERSION];
382 }
383 
384 template <class ELFT>
385 void ELFFileBase::initSymtab(ArrayRef<typename ELFT::Shdr> Sections,
386                              const typename ELFT::Shdr *Symtab) {
387   FirstGlobal = Symtab->sh_info;
388   ArrayRef<typename ELFT::Sym> ELFSyms =
389       CHECK(getObj<ELFT>().symbols(Symtab), this);
390   if (FirstGlobal == 0 || FirstGlobal > ELFSyms.size())
391     fatal(toString(this) + ": invalid sh_info in symbol table");
392   this->ELFSyms = reinterpret_cast<const void *>(ELFSyms.data());
393   this->NumELFSyms = ELFSyms.size();
394 
395   StringTable =
396       CHECK(getObj<ELFT>().getStringTableForSymtab(*Symtab, Sections), this);
397 }
398 
399 template <class ELFT>
400 ObjFile<ELFT>::ObjFile(MemoryBufferRef M, StringRef ArchiveName)
401     : ELFFileBase(ObjKind, M) {
402   parseHeader<ELFT>();
403   this->ArchiveName = ArchiveName;
404 }
405 
406 template <class ELFT>
407 uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &Sym) const {
408   return CHECK(
409       this->getObj().getSectionIndex(&Sym, getELFSyms<ELFT>(), ShndxTable),
410       this);
411 }
412 
413 template <class ELFT> ArrayRef<Symbol *> ObjFile<ELFT>::getLocalSymbols() {
414   if (this->Symbols.empty())
415     return {};
416   return makeArrayRef(this->Symbols).slice(1, this->FirstGlobal - 1);
417 }
418 
419 template <class ELFT> ArrayRef<Symbol *> ObjFile<ELFT>::getGlobalSymbols() {
420   return makeArrayRef(this->Symbols).slice(this->FirstGlobal);
421 }
422 
423 template <class ELFT>
424 void ObjFile<ELFT>::parse(
425     DenseMap<CachedHashStringRef, const InputFile *> &ComdatGroups) {
426   // Read a section table. JustSymbols is usually false.
427   if (this->JustSymbols)
428     initializeJustSymbols();
429   else
430     initializeSections(ComdatGroups);
431 
432   // Read a symbol table.
433   initializeSymbols();
434 }
435 
436 // Sections with SHT_GROUP and comdat bits define comdat section groups.
437 // They are identified and deduplicated by group name. This function
438 // returns a group name.
439 template <class ELFT>
440 StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> Sections,
441                                               const Elf_Shdr &Sec) {
442   // Group signatures are stored as symbol names in object files.
443   // sh_info contains a symbol index, so we fetch a symbol and read its name.
444   if (this->getELFSyms<ELFT>().empty())
445     this->initSymtab<ELFT>(
446         Sections, CHECK(object::getSection<ELFT>(Sections, Sec.sh_link), this));
447 
448   const Elf_Sym *Sym =
449       CHECK(object::getSymbol<ELFT>(this->getELFSyms<ELFT>(), Sec.sh_info), this);
450   StringRef Signature = CHECK(Sym->getName(this->StringTable), this);
451 
452   // As a special case, if a symbol is a section symbol and has no name,
453   // we use a section name as a signature.
454   //
455   // Such SHT_GROUP sections are invalid from the perspective of the ELF
456   // standard, but GNU gold 1.14 (the newest version as of July 2017) or
457   // older produce such sections as outputs for the -r option, so we need
458   // a bug-compatibility.
459   if (Signature.empty() && Sym->getType() == STT_SECTION)
460     return getSectionName(Sec);
461   return Signature;
462 }
463 
464 template <class ELFT> bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) {
465   // On a regular link we don't merge sections if -O0 (default is -O1). This
466   // sometimes makes the linker significantly faster, although the output will
467   // be bigger.
468   //
469   // Doing the same for -r would create a problem as it would combine sections
470   // with different sh_entsize. One option would be to just copy every SHF_MERGE
471   // section as is to the output. While this would produce a valid ELF file with
472   // usable SHF_MERGE sections, tools like (llvm-)?dwarfdump get confused when
473   // they see two .debug_str. We could have separate logic for combining
474   // SHF_MERGE sections based both on their name and sh_entsize, but that seems
475   // to be more trouble than it is worth. Instead, we just use the regular (-O1)
476   // logic for -r.
477   if (Config->Optimize == 0 && !Config->Relocatable)
478     return false;
479 
480   // A mergeable section with size 0 is useless because they don't have
481   // any data to merge. A mergeable string section with size 0 can be
482   // argued as invalid because it doesn't end with a null character.
483   // We'll avoid a mess by handling them as if they were non-mergeable.
484   if (Sec.sh_size == 0)
485     return false;
486 
487   // Check for sh_entsize. The ELF spec is not clear about the zero
488   // sh_entsize. It says that "the member [sh_entsize] contains 0 if
489   // the section does not hold a table of fixed-size entries". We know
490   // that Rust 1.13 produces a string mergeable section with a zero
491   // sh_entsize. Here we just accept it rather than being picky about it.
492   uint64_t EntSize = Sec.sh_entsize;
493   if (EntSize == 0)
494     return false;
495   if (Sec.sh_size % EntSize)
496     fatal(toString(this) +
497           ": SHF_MERGE section size must be a multiple of sh_entsize");
498 
499   uint64_t Flags = Sec.sh_flags;
500   if (!(Flags & SHF_MERGE))
501     return false;
502   if (Flags & SHF_WRITE)
503     fatal(toString(this) + ": writable SHF_MERGE section is not supported");
504 
505   return true;
506 }
507 
508 // This is for --just-symbols.
509 //
510 // --just-symbols is a very minor feature that allows you to link your
511 // output against other existing program, so that if you load both your
512 // program and the other program into memory, your output can refer the
513 // other program's symbols.
514 //
515 // When the option is given, we link "just symbols". The section table is
516 // initialized with null pointers.
517 template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() {
518   ArrayRef<Elf_Shdr> ObjSections = CHECK(this->getObj().sections(), this);
519   this->Sections.resize(ObjSections.size());
520 
521   for (const Elf_Shdr &Sec : ObjSections) {
522     if (Sec.sh_type != SHT_SYMTAB)
523       continue;
524     this->initSymtab<ELFT>(ObjSections, &Sec);
525     return;
526   }
527 }
528 
529 // An ELF object file may contain a `.deplibs` section. If it exists, the
530 // section contains a list of library specifiers such as `m` for libm. This
531 // function resolves a given name by finding the first matching library checking
532 // the various ways that a library can be specified to LLD. This ELF extension
533 // is a form of autolinking and is called `dependent libraries`. It is currently
534 // unique to LLVM and lld.
535 static void addDependentLibrary(StringRef Specifier, const InputFile *F) {
536   if (!Config->DependentLibraries)
537     return;
538   if (fs::exists(Specifier))
539     Driver->addFile(Specifier, /*WithLOption=*/false);
540   else if (Optional<std::string> S = findFromSearchPaths(Specifier))
541     Driver->addFile(*S, /*WithLOption=*/true);
542   else if (Optional<std::string> S = searchLibraryBaseName(Specifier))
543     Driver->addFile(*S, /*WithLOption=*/true);
544   else
545     error(toString(F) +
546           ": unable to find library from dependent library specifier: " +
547           Specifier);
548 }
549 
550 template <class ELFT>
551 void ObjFile<ELFT>::initializeSections(
552     DenseMap<CachedHashStringRef, const InputFile *> &ComdatGroups) {
553   const ELFFile<ELFT> &Obj = this->getObj();
554 
555   ArrayRef<Elf_Shdr> ObjSections = CHECK(Obj.sections(), this);
556   uint64_t Size = ObjSections.size();
557   this->Sections.resize(Size);
558   this->SectionStringTable =
559       CHECK(Obj.getSectionStringTable(ObjSections), this);
560 
561   for (size_t I = 0, E = ObjSections.size(); I < E; I++) {
562     if (this->Sections[I] == &InputSection::Discarded)
563       continue;
564     const Elf_Shdr &Sec = ObjSections[I];
565 
566     if (Sec.sh_type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE)
567       CGProfile =
568           check(Obj.template getSectionContentsAsArray<Elf_CGProfile>(&Sec));
569 
570     // SHF_EXCLUDE'ed sections are discarded by the linker. However,
571     // if -r is given, we'll let the final link discard such sections.
572     // This is compatible with GNU.
573     if ((Sec.sh_flags & SHF_EXCLUDE) && !Config->Relocatable) {
574       if (Sec.sh_type == SHT_LLVM_ADDRSIG) {
575         // We ignore the address-significance table if we know that the object
576         // file was created by objcopy or ld -r. This is because these tools
577         // will reorder the symbols in the symbol table, invalidating the data
578         // in the address-significance table, which refers to symbols by index.
579         if (Sec.sh_link != 0)
580           this->AddrsigSec = &Sec;
581         else if (Config->ICF == ICFLevel::Safe)
582           warn(toString(this) + ": --icf=safe is incompatible with object "
583                                 "files created using objcopy or ld -r");
584       }
585       this->Sections[I] = &InputSection::Discarded;
586       continue;
587     }
588 
589     switch (Sec.sh_type) {
590     case SHT_GROUP: {
591       // De-duplicate section groups by their signatures.
592       StringRef Signature = getShtGroupSignature(ObjSections, Sec);
593       this->Sections[I] = &InputSection::Discarded;
594 
595 
596       ArrayRef<Elf_Word> Entries =
597           CHECK(Obj.template getSectionContentsAsArray<Elf_Word>(&Sec), this);
598       if (Entries.empty())
599         fatal(toString(this) + ": empty SHT_GROUP");
600 
601       // The first word of a SHT_GROUP section contains flags. Currently,
602       // the standard defines only "GRP_COMDAT" flag for the COMDAT group.
603       // An group with the empty flag doesn't define anything; such sections
604       // are just skipped.
605       if (Entries[0] == 0)
606         continue;
607 
608       if (Entries[0] != GRP_COMDAT)
609         fatal(toString(this) + ": unsupported SHT_GROUP format");
610 
611       bool IsNew =
612           ComdatGroups.try_emplace(CachedHashStringRef(Signature), this).second;
613       if (IsNew) {
614         if (Config->Relocatable)
615           this->Sections[I] = createInputSection(Sec);
616         continue;
617       }
618 
619       // Otherwise, discard group members.
620       for (uint32_t SecIndex : Entries.slice(1)) {
621         if (SecIndex >= Size)
622           fatal(toString(this) +
623                 ": invalid section index in group: " + Twine(SecIndex));
624         this->Sections[SecIndex] = &InputSection::Discarded;
625       }
626       break;
627     }
628     case SHT_SYMTAB:
629       this->initSymtab<ELFT>(ObjSections, &Sec);
630       break;
631     case SHT_SYMTAB_SHNDX:
632       ShndxTable = CHECK(Obj.getSHNDXTable(Sec, ObjSections), this);
633       break;
634     case SHT_STRTAB:
635     case SHT_NULL:
636       break;
637     default:
638       this->Sections[I] = createInputSection(Sec);
639     }
640 
641     // .ARM.exidx sections have a reverse dependency on the InputSection they
642     // have a SHF_LINK_ORDER dependency, this is identified by the sh_link.
643     if (Sec.sh_flags & SHF_LINK_ORDER) {
644       InputSectionBase *LinkSec = nullptr;
645       if (Sec.sh_link < this->Sections.size())
646         LinkSec = this->Sections[Sec.sh_link];
647       if (!LinkSec)
648         fatal(toString(this) +
649               ": invalid sh_link index: " + Twine(Sec.sh_link));
650 
651       InputSection *IS = cast<InputSection>(this->Sections[I]);
652       LinkSec->DependentSections.push_back(IS);
653       if (!isa<InputSection>(LinkSec))
654         error("a section " + IS->Name +
655               " with SHF_LINK_ORDER should not refer a non-regular "
656               "section: " +
657               toString(LinkSec));
658     }
659   }
660 }
661 
662 // For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD
663 // flag in the ELF Header we need to look at Tag_ABI_VFP_args to find out how
664 // the input objects have been compiled.
665 static void updateARMVFPArgs(const ARMAttributeParser &Attributes,
666                              const InputFile *F) {
667   if (!Attributes.hasAttribute(ARMBuildAttrs::ABI_VFP_args))
668     // If an ABI tag isn't present then it is implicitly given the value of 0
669     // which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files,
670     // including some in glibc that don't use FP args (and should have value 3)
671     // don't have the attribute so we do not consider an implicit value of 0
672     // as a clash.
673     return;
674 
675   unsigned VFPArgs = Attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args);
676   ARMVFPArgKind Arg;
677   switch (VFPArgs) {
678   case ARMBuildAttrs::BaseAAPCS:
679     Arg = ARMVFPArgKind::Base;
680     break;
681   case ARMBuildAttrs::HardFPAAPCS:
682     Arg = ARMVFPArgKind::VFP;
683     break;
684   case ARMBuildAttrs::ToolChainFPPCS:
685     // Tool chain specific convention that conforms to neither AAPCS variant.
686     Arg = ARMVFPArgKind::ToolChain;
687     break;
688   case ARMBuildAttrs::CompatibleFPAAPCS:
689     // Object compatible with all conventions.
690     return;
691   default:
692     error(toString(F) + ": unknown Tag_ABI_VFP_args value: " + Twine(VFPArgs));
693     return;
694   }
695   // Follow ld.bfd and error if there is a mix of calling conventions.
696   if (Config->ARMVFPArgs != Arg && Config->ARMVFPArgs != ARMVFPArgKind::Default)
697     error(toString(F) + ": incompatible Tag_ABI_VFP_args");
698   else
699     Config->ARMVFPArgs = Arg;
700 }
701 
702 // The ARM support in lld makes some use of instructions that are not available
703 // on all ARM architectures. Namely:
704 // - Use of BLX instruction for interworking between ARM and Thumb state.
705 // - Use of the extended Thumb branch encoding in relocation.
706 // - Use of the MOVT/MOVW instructions in Thumb Thunks.
707 // The ARM Attributes section contains information about the architecture chosen
708 // at compile time. We follow the convention that if at least one input object
709 // is compiled with an architecture that supports these features then lld is
710 // permitted to use them.
711 static void updateSupportedARMFeatures(const ARMAttributeParser &Attributes) {
712   if (!Attributes.hasAttribute(ARMBuildAttrs::CPU_arch))
713     return;
714   auto Arch = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch);
715   switch (Arch) {
716   case ARMBuildAttrs::Pre_v4:
717   case ARMBuildAttrs::v4:
718   case ARMBuildAttrs::v4T:
719     // Architectures prior to v5 do not support BLX instruction
720     break;
721   case ARMBuildAttrs::v5T:
722   case ARMBuildAttrs::v5TE:
723   case ARMBuildAttrs::v5TEJ:
724   case ARMBuildAttrs::v6:
725   case ARMBuildAttrs::v6KZ:
726   case ARMBuildAttrs::v6K:
727     Config->ARMHasBlx = true;
728     // Architectures used in pre-Cortex processors do not support
729     // The J1 = 1 J2 = 1 Thumb branch range extension, with the exception
730     // of Architecture v6T2 (arm1156t2-s and arm1156t2f-s) that do.
731     break;
732   default:
733     // All other Architectures have BLX and extended branch encoding
734     Config->ARMHasBlx = true;
735     Config->ARMJ1J2BranchEncoding = true;
736     if (Arch != ARMBuildAttrs::v6_M && Arch != ARMBuildAttrs::v6S_M)
737       // All Architectures used in Cortex processors with the exception
738       // of v6-M and v6S-M have the MOVT and MOVW instructions.
739       Config->ARMHasMovtMovw = true;
740     break;
741   }
742 }
743 
744 template <class ELFT>
745 InputSectionBase *ObjFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) {
746   uint32_t Idx = Sec.sh_info;
747   if (Idx >= this->Sections.size())
748     fatal(toString(this) + ": invalid relocated section index: " + Twine(Idx));
749   InputSectionBase *Target = this->Sections[Idx];
750 
751   // Strictly speaking, a relocation section must be included in the
752   // group of the section it relocates. However, LLVM 3.3 and earlier
753   // would fail to do so, so we gracefully handle that case.
754   if (Target == &InputSection::Discarded)
755     return nullptr;
756 
757   if (!Target)
758     fatal(toString(this) + ": unsupported relocation reference");
759   return Target;
760 }
761 
762 // Create a regular InputSection class that has the same contents
763 // as a given section.
764 static InputSection *toRegularSection(MergeInputSection *Sec) {
765   return make<InputSection>(Sec->File, Sec->Flags, Sec->Type, Sec->Alignment,
766                             Sec->data(), Sec->Name);
767 }
768 
769 template <class ELFT>
770 InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &Sec) {
771   StringRef Name = getSectionName(Sec);
772 
773   switch (Sec.sh_type) {
774   case SHT_ARM_ATTRIBUTES: {
775     if (Config->EMachine != EM_ARM)
776       break;
777     ARMAttributeParser Attributes;
778     ArrayRef<uint8_t> Contents = check(this->getObj().getSectionContents(&Sec));
779     Attributes.Parse(Contents, /*isLittle*/ Config->EKind == ELF32LEKind);
780     updateSupportedARMFeatures(Attributes);
781     updateARMVFPArgs(Attributes, this);
782 
783     // FIXME: Retain the first attribute section we see. The eglibc ARM
784     // dynamic loaders require the presence of an attribute section for dlopen
785     // to work. In a full implementation we would merge all attribute sections.
786     if (In.ARMAttributes == nullptr) {
787       In.ARMAttributes = make<InputSection>(*this, Sec, Name);
788       return In.ARMAttributes;
789     }
790     return &InputSection::Discarded;
791   }
792   case SHT_LLVM_DEPENDENT_LIBRARIES: {
793     if (Config->Relocatable)
794       break;
795     ArrayRef<char> Data =
796         CHECK(this->getObj().template getSectionContentsAsArray<char>(&Sec), this);
797     if (!Data.empty() && Data.back() != '\0') {
798       error(toString(this) +
799             ": corrupted dependent libraries section (unterminated string): " +
800             Name);
801       return &InputSection::Discarded;
802     }
803     for (const char *D = Data.begin(), *E = Data.end(); D < E;) {
804       StringRef S(D);
805       addDependentLibrary(S, this);
806       D += S.size() + 1;
807     }
808     return &InputSection::Discarded;
809   }
810   case SHT_RELA:
811   case SHT_REL: {
812     // Find a relocation target section and associate this section with that.
813     // Target may have been discarded if it is in a different section group
814     // and the group is discarded, even though it's a violation of the
815     // spec. We handle that situation gracefully by discarding dangling
816     // relocation sections.
817     InputSectionBase *Target = getRelocTarget(Sec);
818     if (!Target)
819       return nullptr;
820 
821     // This section contains relocation information.
822     // If -r is given, we do not interpret or apply relocation
823     // but just copy relocation sections to output.
824     if (Config->Relocatable) {
825       InputSection *RelocSec = make<InputSection>(*this, Sec, Name);
826       // We want to add a dependency to target, similar like we do for
827       // -emit-relocs below. This is useful for the case when linker script
828       // contains the "/DISCARD/". It is perhaps uncommon to use a script with
829       // -r, but we faced it in the Linux kernel and have to handle such case
830       // and not to crash.
831       Target->DependentSections.push_back(RelocSec);
832       return RelocSec;
833     }
834 
835     if (Target->FirstRelocation)
836       fatal(toString(this) +
837             ": multiple relocation sections to one section are not supported");
838 
839     // ELF spec allows mergeable sections with relocations, but they are
840     // rare, and it is in practice hard to merge such sections by contents,
841     // because applying relocations at end of linking changes section
842     // contents. So, we simply handle such sections as non-mergeable ones.
843     // Degrading like this is acceptable because section merging is optional.
844     if (auto *MS = dyn_cast<MergeInputSection>(Target)) {
845       Target = toRegularSection(MS);
846       this->Sections[Sec.sh_info] = Target;
847     }
848 
849     if (Sec.sh_type == SHT_RELA) {
850       ArrayRef<Elf_Rela> Rels = CHECK(getObj().relas(&Sec), this);
851       Target->FirstRelocation = Rels.begin();
852       Target->NumRelocations = Rels.size();
853       Target->AreRelocsRela = true;
854     } else {
855       ArrayRef<Elf_Rel> Rels = CHECK(getObj().rels(&Sec), this);
856       Target->FirstRelocation = Rels.begin();
857       Target->NumRelocations = Rels.size();
858       Target->AreRelocsRela = false;
859     }
860     assert(isUInt<31>(Target->NumRelocations));
861 
862     // Relocation sections processed by the linker are usually removed
863     // from the output, so returning `nullptr` for the normal case.
864     // However, if -emit-relocs is given, we need to leave them in the output.
865     // (Some post link analysis tools need this information.)
866     if (Config->EmitRelocs) {
867       InputSection *RelocSec = make<InputSection>(*this, Sec, Name);
868       // We will not emit relocation section if target was discarded.
869       Target->DependentSections.push_back(RelocSec);
870       return RelocSec;
871     }
872     return nullptr;
873   }
874   }
875 
876   // The GNU linker uses .note.GNU-stack section as a marker indicating
877   // that the code in the object file does not expect that the stack is
878   // executable (in terms of NX bit). If all input files have the marker,
879   // the GNU linker adds a PT_GNU_STACK segment to tells the loader to
880   // make the stack non-executable. Most object files have this section as
881   // of 2017.
882   //
883   // But making the stack non-executable is a norm today for security
884   // reasons. Failure to do so may result in a serious security issue.
885   // Therefore, we make LLD always add PT_GNU_STACK unless it is
886   // explicitly told to do otherwise (by -z execstack). Because the stack
887   // executable-ness is controlled solely by command line options,
888   // .note.GNU-stack sections are simply ignored.
889   if (Name == ".note.GNU-stack")
890     return &InputSection::Discarded;
891 
892   // Split stacks is a feature to support a discontiguous stack,
893   // commonly used in the programming language Go. For the details,
894   // see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled
895   // for split stack will include a .note.GNU-split-stack section.
896   if (Name == ".note.GNU-split-stack") {
897     if (Config->Relocatable) {
898       error("cannot mix split-stack and non-split-stack in a relocatable link");
899       return &InputSection::Discarded;
900     }
901     this->SplitStack = true;
902     return &InputSection::Discarded;
903   }
904 
905   // An object file cmpiled for split stack, but where some of the
906   // functions were compiled with the no_split_stack_attribute will
907   // include a .note.GNU-no-split-stack section.
908   if (Name == ".note.GNU-no-split-stack") {
909     this->SomeNoSplitStack = true;
910     return &InputSection::Discarded;
911   }
912 
913   // The linkonce feature is a sort of proto-comdat. Some glibc i386 object
914   // files contain definitions of symbol "__x86.get_pc_thunk.bx" in linkonce
915   // sections. Drop those sections to avoid duplicate symbol errors.
916   // FIXME: This is glibc PR20543, we should remove this hack once that has been
917   // fixed for a while.
918   if (Name == ".gnu.linkonce.t.__x86.get_pc_thunk.bx" ||
919       Name == ".gnu.linkonce.t.__i686.get_pc_thunk.bx")
920     return &InputSection::Discarded;
921 
922   // If we are creating a new .build-id section, strip existing .build-id
923   // sections so that the output won't have more than one .build-id.
924   // This is not usually a problem because input object files normally don't
925   // have .build-id sections, but you can create such files by
926   // "ld.{bfd,gold,lld} -r --build-id", and we want to guard against it.
927   if (Name == ".note.gnu.build-id" && Config->BuildId != BuildIdKind::None)
928     return &InputSection::Discarded;
929 
930   // The linker merges EH (exception handling) frames and creates a
931   // .eh_frame_hdr section for runtime. So we handle them with a special
932   // class. For relocatable outputs, they are just passed through.
933   if (Name == ".eh_frame" && !Config->Relocatable)
934     return make<EhInputSection>(*this, Sec, Name);
935 
936   if (shouldMerge(Sec))
937     return make<MergeInputSection>(*this, Sec, Name);
938   return make<InputSection>(*this, Sec, Name);
939 }
940 
941 template <class ELFT>
942 StringRef ObjFile<ELFT>::getSectionName(const Elf_Shdr &Sec) {
943   return CHECK(getObj().getSectionName(&Sec, SectionStringTable), this);
944 }
945 
946 // Initialize this->Symbols. this->Symbols is a parallel array as
947 // its corresponding ELF symbol table.
948 template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
949   ArrayRef<Elf_Sym> ESyms = this->getELFSyms<ELFT>();
950   this->Symbols.resize(ESyms.size());
951 
952   // Our symbol table may have already been partially initialized
953   // because of LazyObjFile.
954   for (size_t I = 0, End = ESyms.size(); I != End; ++I)
955     if (!this->Symbols[I] && ESyms[I].getBinding() != STB_LOCAL)
956       this->Symbols[I] =
957           Symtab->insert(CHECK(ESyms[I].getName(this->StringTable), this));
958 
959   // Fill this->Symbols. A symbol is either local or global.
960   for (size_t I = 0, End = ESyms.size(); I != End; ++I) {
961     const Elf_Sym &ESym = ESyms[I];
962 
963     // Read symbol attributes.
964     uint32_t SecIdx = getSectionIndex(ESym);
965     if (SecIdx >= this->Sections.size())
966       fatal(toString(this) + ": invalid section index: " + Twine(SecIdx));
967 
968     InputSectionBase *Sec = this->Sections[SecIdx];
969     uint8_t Binding = ESym.getBinding();
970     uint8_t StOther = ESym.st_other;
971     uint8_t Type = ESym.getType();
972     uint64_t Value = ESym.st_value;
973     uint64_t Size = ESym.st_size;
974     StringRefZ Name = this->StringTable.data() + ESym.st_name;
975 
976     // Handle local symbols. Local symbols are not added to the symbol
977     // table because they are not visible from other object files. We
978     // allocate symbol instances and add their pointers to Symbols.
979     if (Binding == STB_LOCAL) {
980       if (ESym.getType() == STT_FILE)
981         SourceFile = CHECK(ESym.getName(this->StringTable), this);
982 
983       if (this->StringTable.size() <= ESym.st_name)
984         fatal(toString(this) + ": invalid symbol name offset");
985 
986       if (ESym.st_shndx == SHN_UNDEF)
987         this->Symbols[I] = make<Undefined>(this, Name, Binding, StOther, Type);
988       else
989         this->Symbols[I] =
990             make<Defined>(this, Name, Binding, StOther, Type, Value, Size, Sec);
991       continue;
992     }
993 
994     // Handle global undefined symbols.
995     if (ESym.st_shndx == SHN_UNDEF) {
996       this->Symbols[I]->resolve(Undefined{this, Name, Binding, StOther, Type});
997       continue;
998     }
999 
1000     // Handle global common symbols.
1001     if (ESym.st_shndx == SHN_COMMON) {
1002       if (Value == 0 || Value >= UINT32_MAX)
1003         fatal(toString(this) + ": common symbol '" + StringRef(Name.Data) +
1004               "' has invalid alignment: " + Twine(Value));
1005       this->Symbols[I]->resolve(
1006           CommonSymbol{this, Name, Binding, StOther, Type, Value, Size});
1007       continue;
1008     }
1009 
1010     // If a defined symbol is in a discarded section, handle it as if it
1011     // were an undefined symbol. Such symbol doesn't comply with the
1012     // standard, but in practice, a .eh_frame often directly refer
1013     // COMDAT member sections, and if a comdat group is discarded, some
1014     // defined symbol in a .eh_frame becomes dangling symbols.
1015     if (Sec == &InputSection::Discarded) {
1016       this->Symbols[I]->resolve(
1017           Undefined{this, Name, Binding, StOther, Type, SecIdx});
1018       continue;
1019     }
1020 
1021     // Handle global defined symbols.
1022     if (Binding == STB_GLOBAL || Binding == STB_WEAK ||
1023         Binding == STB_GNU_UNIQUE) {
1024       this->Symbols[I]->resolve(
1025           Defined{this, Name, Binding, StOther, Type, Value, Size, Sec});
1026       continue;
1027     }
1028 
1029     fatal(toString(this) + ": unexpected binding: " + Twine((int)Binding));
1030   }
1031 }
1032 
1033 ArchiveFile::ArchiveFile(std::unique_ptr<Archive> &&File)
1034     : InputFile(ArchiveKind, File->getMemoryBufferRef()),
1035       File(std::move(File)) {}
1036 
1037 void ArchiveFile::parse() {
1038   for (const Archive::Symbol &Sym : File->symbols())
1039     Symtab->addSymbol(LazyArchive{*this, Sym});
1040 }
1041 
1042 // Returns a buffer pointing to a member file containing a given symbol.
1043 void ArchiveFile::fetch(const Archive::Symbol &Sym) {
1044   Archive::Child C =
1045       CHECK(Sym.getMember(), toString(this) +
1046                                  ": could not get the member for symbol " +
1047                                  Sym.getName());
1048 
1049   if (!Seen.insert(C.getChildOffset()).second)
1050     return;
1051 
1052   MemoryBufferRef MB =
1053       CHECK(C.getMemoryBufferRef(),
1054             toString(this) +
1055                 ": could not get the buffer for the member defining symbol " +
1056                 Sym.getName());
1057 
1058   if (Tar && C.getParent()->isThin())
1059     Tar->append(relativeToRoot(CHECK(C.getFullName(), this)), MB.getBuffer());
1060 
1061   InputFile *File = createObjectFile(
1062       MB, getName(), C.getParent()->isThin() ? 0 : C.getChildOffset());
1063   File->GroupId = GroupId;
1064   parseFile(File);
1065 }
1066 
1067 unsigned SharedFile::VernauxNum;
1068 
1069 SharedFile::SharedFile(MemoryBufferRef M, StringRef DefaultSoName)
1070     : ELFFileBase(SharedKind, M), SoName(DefaultSoName),
1071       IsNeeded(!Config->AsNeeded) {
1072   switch (getELFKind(MB, "")) {
1073   case ELF32LEKind:
1074     parseHeader<ELF32LE>();
1075     break;
1076   case ELF32BEKind:
1077     parseHeader<ELF32BE>();
1078     break;
1079   case ELF64LEKind:
1080     parseHeader<ELF64LE>();
1081     break;
1082   case ELF64BEKind:
1083     parseHeader<ELF64BE>();
1084     break;
1085   default:
1086     llvm_unreachable("getELFKind");
1087   }
1088 }
1089 
1090 // Parse the version definitions in the object file if present, and return a
1091 // vector whose nth element contains a pointer to the Elf_Verdef for version
1092 // identifier n. Version identifiers that are not definitions map to nullptr.
1093 template <typename ELFT>
1094 static std::vector<const void *> parseVerdefs(const uint8_t *Base,
1095                                               const typename ELFT::Shdr *Sec) {
1096   if (!Sec)
1097     return {};
1098 
1099   // We cannot determine the largest verdef identifier without inspecting
1100   // every Elf_Verdef, but both bfd and gold assign verdef identifiers
1101   // sequentially starting from 1, so we predict that the largest identifier
1102   // will be VerdefCount.
1103   unsigned VerdefCount = Sec->sh_info;
1104   std::vector<const void *> Verdefs(VerdefCount + 1);
1105 
1106   // Build the Verdefs array by following the chain of Elf_Verdef objects
1107   // from the start of the .gnu.version_d section.
1108   const uint8_t *Verdef = Base + Sec->sh_offset;
1109   for (unsigned I = 0; I != VerdefCount; ++I) {
1110     auto *CurVerdef = reinterpret_cast<const typename ELFT::Verdef *>(Verdef);
1111     Verdef += CurVerdef->vd_next;
1112     unsigned VerdefIndex = CurVerdef->vd_ndx;
1113     Verdefs.resize(VerdefIndex + 1);
1114     Verdefs[VerdefIndex] = CurVerdef;
1115   }
1116   return Verdefs;
1117 }
1118 
1119 // We do not usually care about alignments of data in shared object
1120 // files because the loader takes care of it. However, if we promote a
1121 // DSO symbol to point to .bss due to copy relocation, we need to keep
1122 // the original alignment requirements. We infer it in this function.
1123 template <typename ELFT>
1124 static uint64_t getAlignment(ArrayRef<typename ELFT::Shdr> Sections,
1125                              const typename ELFT::Sym &Sym) {
1126   uint64_t Ret = UINT64_MAX;
1127   if (Sym.st_value)
1128     Ret = 1ULL << countTrailingZeros((uint64_t)Sym.st_value);
1129   if (0 < Sym.st_shndx && Sym.st_shndx < Sections.size())
1130     Ret = std::min<uint64_t>(Ret, Sections[Sym.st_shndx].sh_addralign);
1131   return (Ret > UINT32_MAX) ? 0 : Ret;
1132 }
1133 
1134 // Fully parse the shared object file.
1135 //
1136 // This function parses symbol versions. If a DSO has version information,
1137 // the file has a ".gnu.version_d" section which contains symbol version
1138 // definitions. Each symbol is associated to one version through a table in
1139 // ".gnu.version" section. That table is a parallel array for the symbol
1140 // table, and each table entry contains an index in ".gnu.version_d".
1141 //
1142 // The special index 0 is reserved for VERF_NDX_LOCAL and 1 is for
1143 // VER_NDX_GLOBAL. There's no table entry for these special versions in
1144 // ".gnu.version_d".
1145 //
1146 // The file format for symbol versioning is perhaps a bit more complicated
1147 // than necessary, but you can easily understand the code if you wrap your
1148 // head around the data structure described above.
1149 template <class ELFT> void SharedFile::parse() {
1150   using Elf_Dyn = typename ELFT::Dyn;
1151   using Elf_Shdr = typename ELFT::Shdr;
1152   using Elf_Sym = typename ELFT::Sym;
1153   using Elf_Verdef = typename ELFT::Verdef;
1154   using Elf_Versym = typename ELFT::Versym;
1155 
1156   ArrayRef<Elf_Dyn> DynamicTags;
1157   const ELFFile<ELFT> Obj = this->getObj<ELFT>();
1158   ArrayRef<Elf_Shdr> Sections = CHECK(Obj.sections(), this);
1159 
1160   const Elf_Shdr *VersymSec = nullptr;
1161   const Elf_Shdr *VerdefSec = nullptr;
1162 
1163   // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d.
1164   for (const Elf_Shdr &Sec : Sections) {
1165     switch (Sec.sh_type) {
1166     default:
1167       continue;
1168     case SHT_DYNSYM:
1169       this->initSymtab<ELFT>(Sections, &Sec);
1170       break;
1171     case SHT_DYNAMIC:
1172       DynamicTags =
1173           CHECK(Obj.template getSectionContentsAsArray<Elf_Dyn>(&Sec), this);
1174       break;
1175     case SHT_GNU_versym:
1176       VersymSec = &Sec;
1177       break;
1178     case SHT_GNU_verdef:
1179       VerdefSec = &Sec;
1180       break;
1181     }
1182   }
1183 
1184   if (VersymSec && this->getELFSyms<ELFT>().empty()) {
1185     error("SHT_GNU_versym should be associated with symbol table");
1186     return;
1187   }
1188 
1189   // Search for a DT_SONAME tag to initialize this->SoName.
1190   for (const Elf_Dyn &Dyn : DynamicTags) {
1191     if (Dyn.d_tag == DT_NEEDED) {
1192       uint64_t Val = Dyn.getVal();
1193       if (Val >= this->StringTable.size())
1194         fatal(toString(this) + ": invalid DT_NEEDED entry");
1195       DtNeeded.push_back(this->StringTable.data() + Val);
1196     } else if (Dyn.d_tag == DT_SONAME) {
1197       uint64_t Val = Dyn.getVal();
1198       if (Val >= this->StringTable.size())
1199         fatal(toString(this) + ": invalid DT_SONAME entry");
1200       SoName = this->StringTable.data() + Val;
1201     }
1202   }
1203 
1204   // DSOs are uniquified not by filename but by soname.
1205   DenseMap<StringRef, SharedFile *>::iterator It;
1206   bool WasInserted;
1207   std::tie(It, WasInserted) = Symtab->SoNames.try_emplace(SoName, this);
1208 
1209   // If a DSO appears more than once on the command line with and without
1210   // --as-needed, --no-as-needed takes precedence over --as-needed because a
1211   // user can add an extra DSO with --no-as-needed to force it to be added to
1212   // the dependency list.
1213   It->second->IsNeeded |= IsNeeded;
1214   if (!WasInserted)
1215     return;
1216 
1217   SharedFiles.push_back(this);
1218 
1219   Verdefs = parseVerdefs<ELFT>(Obj.base(), VerdefSec);
1220 
1221   // Parse ".gnu.version" section which is a parallel array for the symbol
1222   // table. If a given file doesn't have a ".gnu.version" section, we use
1223   // VER_NDX_GLOBAL.
1224   size_t Size = this->getELFSyms<ELFT>().size() - this->FirstGlobal;
1225   std::vector<uint32_t> Versyms(Size, VER_NDX_GLOBAL);
1226   if (VersymSec) {
1227     ArrayRef<Elf_Versym> Versym =
1228         CHECK(Obj.template getSectionContentsAsArray<Elf_Versym>(VersymSec),
1229               this)
1230             .slice(FirstGlobal);
1231     for (size_t I = 0; I < Size; ++I)
1232       Versyms[I] = Versym[I].vs_index;
1233   }
1234 
1235   // System libraries can have a lot of symbols with versions. Using a
1236   // fixed buffer for computing the versions name (foo@ver) can save a
1237   // lot of allocations.
1238   SmallString<0> VersionedNameBuffer;
1239 
1240   // Add symbols to the symbol table.
1241   ArrayRef<Elf_Sym> Syms = this->getGlobalELFSyms<ELFT>();
1242   for (size_t I = 0; I < Syms.size(); ++I) {
1243     const Elf_Sym &Sym = Syms[I];
1244 
1245     // ELF spec requires that all local symbols precede weak or global
1246     // symbols in each symbol table, and the index of first non-local symbol
1247     // is stored to sh_info. If a local symbol appears after some non-local
1248     // symbol, that's a violation of the spec.
1249     StringRef Name = CHECK(Sym.getName(this->StringTable), this);
1250     if (Sym.getBinding() == STB_LOCAL) {
1251       warn("found local symbol '" + Name +
1252            "' in global part of symbol table in file " + toString(this));
1253       continue;
1254     }
1255 
1256     if (Sym.isUndefined()) {
1257       Symbol *S = Symtab->addSymbol(
1258           Undefined{this, Name, Sym.getBinding(), Sym.st_other, Sym.getType()});
1259       S->ExportDynamic = true;
1260       continue;
1261     }
1262 
1263     // MIPS BFD linker puts _gp_disp symbol into DSO files and incorrectly
1264     // assigns VER_NDX_LOCAL to this section global symbol. Here is a
1265     // workaround for this bug.
1266     uint32_t Idx = Versyms[I] & ~VERSYM_HIDDEN;
1267     if (Config->EMachine == EM_MIPS && Idx == VER_NDX_LOCAL &&
1268         Name == "_gp_disp")
1269       continue;
1270 
1271     uint32_t Alignment = getAlignment<ELFT>(Sections, Sym);
1272     if (!(Versyms[I] & VERSYM_HIDDEN)) {
1273       Symtab->addSymbol(SharedSymbol{*this, Name, Sym.getBinding(),
1274                                      Sym.st_other, Sym.getType(), Sym.st_value,
1275                                      Sym.st_size, Alignment, Idx});
1276     }
1277 
1278     // Also add the symbol with the versioned name to handle undefined symbols
1279     // with explicit versions.
1280     if (Idx == VER_NDX_GLOBAL)
1281       continue;
1282 
1283     if (Idx >= Verdefs.size() || Idx == VER_NDX_LOCAL) {
1284       error("corrupt input file: version definition index " + Twine(Idx) +
1285             " for symbol " + Name + " is out of bounds\n>>> defined in " +
1286             toString(this));
1287       continue;
1288     }
1289 
1290     StringRef VerName =
1291         this->StringTable.data() +
1292         reinterpret_cast<const Elf_Verdef *>(Verdefs[Idx])->getAux()->vda_name;
1293     VersionedNameBuffer.clear();
1294     Name = (Name + "@" + VerName).toStringRef(VersionedNameBuffer);
1295     Symtab->addSymbol(SharedSymbol{*this, Saver.save(Name), Sym.getBinding(),
1296                                    Sym.st_other, Sym.getType(), Sym.st_value,
1297                                    Sym.st_size, Alignment, Idx});
1298   }
1299 }
1300 
1301 static ELFKind getBitcodeELFKind(const Triple &T) {
1302   if (T.isLittleEndian())
1303     return T.isArch64Bit() ? ELF64LEKind : ELF32LEKind;
1304   return T.isArch64Bit() ? ELF64BEKind : ELF32BEKind;
1305 }
1306 
1307 static uint8_t getBitcodeMachineKind(StringRef Path, const Triple &T) {
1308   switch (T.getArch()) {
1309   case Triple::aarch64:
1310     return EM_AARCH64;
1311   case Triple::amdgcn:
1312   case Triple::r600:
1313     return EM_AMDGPU;
1314   case Triple::arm:
1315   case Triple::thumb:
1316     return EM_ARM;
1317   case Triple::avr:
1318     return EM_AVR;
1319   case Triple::mips:
1320   case Triple::mipsel:
1321   case Triple::mips64:
1322   case Triple::mips64el:
1323     return EM_MIPS;
1324   case Triple::msp430:
1325     return EM_MSP430;
1326   case Triple::ppc:
1327     return EM_PPC;
1328   case Triple::ppc64:
1329   case Triple::ppc64le:
1330     return EM_PPC64;
1331   case Triple::x86:
1332     return T.isOSIAMCU() ? EM_IAMCU : EM_386;
1333   case Triple::x86_64:
1334     return EM_X86_64;
1335   default:
1336     error(Path + ": could not infer e_machine from bitcode target triple " +
1337           T.str());
1338     return EM_NONE;
1339   }
1340 }
1341 
1342 BitcodeFile::BitcodeFile(MemoryBufferRef MB, StringRef ArchiveName,
1343                          uint64_t OffsetInArchive)
1344     : InputFile(BitcodeKind, MB) {
1345   this->ArchiveName = ArchiveName;
1346 
1347   std::string Path = MB.getBufferIdentifier().str();
1348   if (Config->ThinLTOIndexOnly)
1349     Path = replaceThinLTOSuffix(MB.getBufferIdentifier());
1350 
1351   // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
1352   // name. If two archives define two members with the same name, this
1353   // causes a collision which result in only one of the objects being taken
1354   // into consideration at LTO time (which very likely causes undefined
1355   // symbols later in the link stage). So we append file offset to make
1356   // filename unique.
1357   StringRef Name = ArchiveName.empty()
1358                        ? Saver.save(Path)
1359                        : Saver.save(ArchiveName + "(" + Path + " at " +
1360                                     utostr(OffsetInArchive) + ")");
1361   MemoryBufferRef MBRef(MB.getBuffer(), Name);
1362 
1363   Obj = CHECK(lto::InputFile::create(MBRef), this);
1364 
1365   Triple T(Obj->getTargetTriple());
1366   EKind = getBitcodeELFKind(T);
1367   EMachine = getBitcodeMachineKind(MB.getBufferIdentifier(), T);
1368 }
1369 
1370 static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) {
1371   switch (GvVisibility) {
1372   case GlobalValue::DefaultVisibility:
1373     return STV_DEFAULT;
1374   case GlobalValue::HiddenVisibility:
1375     return STV_HIDDEN;
1376   case GlobalValue::ProtectedVisibility:
1377     return STV_PROTECTED;
1378   }
1379   llvm_unreachable("unknown visibility");
1380 }
1381 
1382 template <class ELFT>
1383 static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats,
1384                                    const lto::InputFile::Symbol &ObjSym,
1385                                    BitcodeFile &F) {
1386   StringRef Name = Saver.save(ObjSym.getName());
1387   uint8_t Binding = ObjSym.isWeak() ? STB_WEAK : STB_GLOBAL;
1388   uint8_t Type = ObjSym.isTLS() ? STT_TLS : STT_NOTYPE;
1389   uint8_t Visibility = mapVisibility(ObjSym.getVisibility());
1390   bool CanOmitFromDynSym = ObjSym.canBeOmittedFromSymbolTable();
1391 
1392   int C = ObjSym.getComdatIndex();
1393   if (ObjSym.isUndefined() || (C != -1 && !KeptComdats[C])) {
1394     Undefined New(&F, Name, Binding, Visibility, Type);
1395     if (CanOmitFromDynSym)
1396       New.ExportDynamic = false;
1397     return Symtab->addSymbol(New);
1398   }
1399 
1400   if (ObjSym.isCommon())
1401     return Symtab->addSymbol(
1402         CommonSymbol{&F, Name, Binding, Visibility, STT_OBJECT,
1403                      ObjSym.getCommonAlignment(), ObjSym.getCommonSize()});
1404 
1405   Defined New(&F, Name, Binding, Visibility, Type, 0, 0, nullptr);
1406   if (CanOmitFromDynSym)
1407     New.ExportDynamic = false;
1408   return Symtab->addSymbol(New);
1409 }
1410 
1411 template <class ELFT>
1412 void BitcodeFile::parse(
1413     DenseMap<CachedHashStringRef, const InputFile *> &ComdatGroups) {
1414   std::vector<bool> KeptComdats;
1415   for (StringRef S : Obj->getComdatTable())
1416     KeptComdats.push_back(
1417         ComdatGroups.try_emplace(CachedHashStringRef(S), this).second);
1418 
1419   for (const lto::InputFile::Symbol &ObjSym : Obj->symbols())
1420     Symbols.push_back(createBitcodeSymbol<ELFT>(KeptComdats, ObjSym, *this));
1421 
1422   for (auto L : Obj->getDependentLibraries())
1423     addDependentLibrary(L, this);
1424 }
1425 
1426 void BinaryFile::parse() {
1427   ArrayRef<uint8_t> Data = arrayRefFromStringRef(MB.getBuffer());
1428   auto *Section = make<InputSection>(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS,
1429                                      8, Data, ".data");
1430   Sections.push_back(Section);
1431 
1432   // For each input file foo that is embedded to a result as a binary
1433   // blob, we define _binary_foo_{start,end,size} symbols, so that
1434   // user programs can access blobs by name. Non-alphanumeric
1435   // characters in a filename are replaced with underscore.
1436   std::string S = "_binary_" + MB.getBufferIdentifier().str();
1437   for (size_t I = 0; I < S.size(); ++I)
1438     if (!isAlnum(S[I]))
1439       S[I] = '_';
1440 
1441   Symtab->addSymbol(Defined{nullptr, Saver.save(S + "_start"), STB_GLOBAL,
1442                             STV_DEFAULT, STT_OBJECT, 0, 0, Section});
1443   Symtab->addSymbol(Defined{nullptr, Saver.save(S + "_end"), STB_GLOBAL,
1444                             STV_DEFAULT, STT_OBJECT, Data.size(), 0, Section});
1445   Symtab->addSymbol(Defined{nullptr, Saver.save(S + "_size"), STB_GLOBAL,
1446                             STV_DEFAULT, STT_OBJECT, Data.size(), 0, nullptr});
1447 }
1448 
1449 InputFile *elf::createObjectFile(MemoryBufferRef MB, StringRef ArchiveName,
1450                                  uint64_t OffsetInArchive) {
1451   if (isBitcode(MB))
1452     return make<BitcodeFile>(MB, ArchiveName, OffsetInArchive);
1453 
1454   switch (getELFKind(MB, ArchiveName)) {
1455   case ELF32LEKind:
1456     return make<ObjFile<ELF32LE>>(MB, ArchiveName);
1457   case ELF32BEKind:
1458     return make<ObjFile<ELF32BE>>(MB, ArchiveName);
1459   case ELF64LEKind:
1460     return make<ObjFile<ELF64LE>>(MB, ArchiveName);
1461   case ELF64BEKind:
1462     return make<ObjFile<ELF64BE>>(MB, ArchiveName);
1463   default:
1464     llvm_unreachable("getELFKind");
1465   }
1466 }
1467 
1468 void LazyObjFile::fetch() {
1469   if (MB.getBuffer().empty())
1470     return;
1471 
1472   InputFile *File = createObjectFile(MB, ArchiveName, OffsetInArchive);
1473   File->GroupId = GroupId;
1474 
1475   MB = {};
1476 
1477   // Copy symbol vector so that the new InputFile doesn't have to
1478   // insert the same defined symbols to the symbol table again.
1479   File->Symbols = std::move(Symbols);
1480 
1481   parseFile(File);
1482 }
1483 
1484 template <class ELFT> void LazyObjFile::parse() {
1485   using Elf_Sym = typename ELFT::Sym;
1486 
1487   // A lazy object file wraps either a bitcode file or an ELF file.
1488   if (isBitcode(this->MB)) {
1489     std::unique_ptr<lto::InputFile> Obj =
1490         CHECK(lto::InputFile::create(this->MB), this);
1491     for (const lto::InputFile::Symbol &Sym : Obj->symbols()) {
1492       if (Sym.isUndefined())
1493         continue;
1494       Symtab->addSymbol(LazyObject{*this, Saver.save(Sym.getName())});
1495     }
1496     return;
1497   }
1498 
1499   if (getELFKind(this->MB, ArchiveName) != Config->EKind) {
1500     error("incompatible file: " + this->MB.getBufferIdentifier());
1501     return;
1502   }
1503 
1504   // Find a symbol table.
1505   ELFFile<ELFT> Obj = check(ELFFile<ELFT>::create(MB.getBuffer()));
1506   ArrayRef<typename ELFT::Shdr> Sections = CHECK(Obj.sections(), this);
1507 
1508   for (const typename ELFT::Shdr &Sec : Sections) {
1509     if (Sec.sh_type != SHT_SYMTAB)
1510       continue;
1511 
1512     // A symbol table is found.
1513     ArrayRef<Elf_Sym> ESyms = CHECK(Obj.symbols(&Sec), this);
1514     uint32_t FirstGlobal = Sec.sh_info;
1515     StringRef Strtab = CHECK(Obj.getStringTableForSymtab(Sec, Sections), this);
1516     this->Symbols.resize(ESyms.size());
1517 
1518     // Get existing symbols or insert placeholder symbols.
1519     for (size_t I = FirstGlobal, End = ESyms.size(); I != End; ++I)
1520       if (ESyms[I].st_shndx != SHN_UNDEF)
1521         this->Symbols[I] = Symtab->insert(CHECK(ESyms[I].getName(Strtab), this));
1522 
1523     // Replace existing symbols with LazyObject symbols.
1524     //
1525     // resolve() may trigger this->fetch() if an existing symbol is an
1526     // undefined symbol. If that happens, this LazyObjFile has served
1527     // its purpose, and we can exit from the loop early.
1528     for (Symbol *Sym : this->Symbols) {
1529       if (!Sym)
1530         continue;
1531       Sym->resolve(LazyObject{*this, Sym->getName()});
1532 
1533       // MemoryBuffer is emptied if this file is instantiated as ObjFile.
1534       if (MB.getBuffer().empty())
1535         return;
1536     }
1537     return;
1538   }
1539 }
1540 
1541 std::string elf::replaceThinLTOSuffix(StringRef Path) {
1542   StringRef Suffix = Config->ThinLTOObjectSuffixReplace.first;
1543   StringRef Repl = Config->ThinLTOObjectSuffixReplace.second;
1544 
1545   if (Path.consume_back(Suffix))
1546     return (Path + Repl).str();
1547   return Path;
1548 }
1549 
1550 template void
1551 BitcodeFile::parse<ELF32LE>(DenseMap<CachedHashStringRef, const InputFile *> &);
1552 template void
1553 BitcodeFile::parse<ELF32BE>(DenseMap<CachedHashStringRef, const InputFile *> &);
1554 template void
1555 BitcodeFile::parse<ELF64LE>(DenseMap<CachedHashStringRef, const InputFile *> &);
1556 template void
1557 BitcodeFile::parse<ELF64BE>(DenseMap<CachedHashStringRef, const InputFile *> &);
1558 
1559 template void LazyObjFile::parse<ELF32LE>();
1560 template void LazyObjFile::parse<ELF32BE>();
1561 template void LazyObjFile::parse<ELF64LE>();
1562 template void LazyObjFile::parse<ELF64BE>();
1563 
1564 template class elf::ObjFile<ELF32LE>;
1565 template class elf::ObjFile<ELF32BE>;
1566 template class elf::ObjFile<ELF64LE>;
1567 template class elf::ObjFile<ELF64BE>;
1568 
1569 template void SharedFile::parse<ELF32LE>();
1570 template void SharedFile::parse<ELF32BE>();
1571 template void SharedFile::parse<ELF64LE>();
1572 template void SharedFile::parse<ELF64BE>();
1573