1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "InputFiles.h"
11 #include "Error.h"
12 #include "InputSection.h"
13 #include "LinkerScript.h"
14 #include "Memory.h"
15 #include "SymbolTable.h"
16 #include "Symbols.h"
17 #include "SyntheticSections.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/CodeGen/Analysis.h"
20 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/IR/Module.h"
23 #include "llvm/LTO/LTO.h"
24 #include "llvm/MC/StringTableBuilder.h"
25 #include "llvm/Object/ELFObjectFile.h"
26 #include "llvm/Support/Path.h"
27 #include "llvm/Support/TarWriter.h"
28 #include "llvm/Support/raw_ostream.h"
29 
30 using namespace llvm;
31 using namespace llvm::ELF;
32 using namespace llvm::object;
33 using namespace llvm::sys::fs;
34 
35 using namespace lld;
36 using namespace lld::elf;
37 
38 TarWriter *elf::Tar;
39 
40 InputFile::InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {}
41 
42 Optional<MemoryBufferRef> elf::readFile(StringRef Path) {
43   // The --chroot option changes our virtual root directory.
44   // This is useful when you are dealing with files created by --reproduce.
45   if (!Config->Chroot.empty() && Path.startswith("/"))
46     Path = Saver.save(Config->Chroot + Path);
47 
48   log(Path);
49 
50   auto MBOrErr = MemoryBuffer::getFile(Path);
51   if (auto EC = MBOrErr.getError()) {
52     error("cannot open " + Path + ": " + EC.message());
53     return None;
54   }
55 
56   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
57   MemoryBufferRef MBRef = MB->getMemBufferRef();
58   make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership
59 
60   if (Tar)
61     Tar->append(relativeToRoot(Path), MBRef.getBuffer());
62   return MBRef;
63 }
64 
65 template <class ELFT> void ObjFile<ELFT>::initializeDwarfLine() {
66   DWARFContext Dwarf(make_unique<LLDDwarfObj<ELFT>>(this));
67   const DWARFObject &Obj = Dwarf.getDWARFObj();
68   DwarfLine.reset(new DWARFDebugLine);
69   DWARFDataExtractor LineData(Obj, Obj.getLineSection(), Config->IsLE,
70                               Config->Wordsize);
71 
72   // The second parameter is offset in .debug_line section
73   // for compilation unit (CU) of interest. We have only one
74   // CU (object file), so offset is always 0.
75   DwarfLine->getOrParseLineTable(LineData, 0);
76 }
77 
78 // Returns source line information for a given offset
79 // using DWARF debug info.
80 template <class ELFT>
81 Optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *S,
82                                                   uint64_t Offset) {
83   llvm::call_once(InitDwarfLine, [this]() { initializeDwarfLine(); });
84 
85   // The offset to CU is 0.
86   const DWARFDebugLine::LineTable *Tbl = DwarfLine->getLineTable(0);
87   if (!Tbl)
88     return None;
89 
90   // Use fake address calcuated by adding section file offset and offset in
91   // section. See comments for ObjectInfo class.
92   DILineInfo Info;
93   Tbl->getFileLineInfoForAddress(
94       S->getOffsetInFile() + Offset, nullptr,
95       DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, Info);
96   if (Info.Line == 0)
97     return None;
98   return Info;
99 }
100 
101 // Returns source line information for a given offset
102 // using DWARF debug info.
103 template <class ELFT>
104 std::string ObjFile<ELFT>::getLineInfo(InputSectionBase *S, uint64_t Offset) {
105   if (Optional<DILineInfo> Info = getDILineInfo(S, Offset))
106     return Info->FileName + ":" + std::to_string(Info->Line);
107   return "";
108 }
109 
110 // Returns "<internal>", "foo.a(bar.o)" or "baz.o".
111 std::string lld::toString(const InputFile *F) {
112   if (!F)
113     return "<internal>";
114 
115   if (F->ToStringCache.empty()) {
116     if (F->ArchiveName.empty())
117       F->ToStringCache = F->getName();
118     else
119       F->ToStringCache = (F->ArchiveName + "(" + F->getName() + ")").str();
120   }
121   return F->ToStringCache;
122 }
123 
124 template <class ELFT>
125 ELFFileBase<ELFT>::ELFFileBase(Kind K, MemoryBufferRef MB) : InputFile(K, MB) {
126   if (ELFT::TargetEndianness == support::little)
127     EKind = ELFT::Is64Bits ? ELF64LEKind : ELF32LEKind;
128   else
129     EKind = ELFT::Is64Bits ? ELF64BEKind : ELF32BEKind;
130 
131   EMachine = getObj().getHeader()->e_machine;
132   OSABI = getObj().getHeader()->e_ident[llvm::ELF::EI_OSABI];
133 }
134 
135 template <class ELFT>
136 typename ELFT::SymRange ELFFileBase<ELFT>::getGlobalSymbols() {
137   return makeArrayRef(Symbols.begin() + FirstNonLocal, Symbols.end());
138 }
139 
140 template <class ELFT>
141 uint32_t ELFFileBase<ELFT>::getSectionIndex(const Elf_Sym &Sym) const {
142   return check(getObj().getSectionIndex(&Sym, Symbols, SymtabSHNDX),
143                toString(this));
144 }
145 
146 template <class ELFT>
147 void ELFFileBase<ELFT>::initSymtab(ArrayRef<Elf_Shdr> Sections,
148                                    const Elf_Shdr *Symtab) {
149   FirstNonLocal = Symtab->sh_info;
150   Symbols = check(getObj().symbols(Symtab), toString(this));
151   if (FirstNonLocal == 0 || FirstNonLocal > Symbols.size())
152     fatal(toString(this) + ": invalid sh_info in symbol table");
153 
154   StringTable = check(getObj().getStringTableForSymtab(*Symtab, Sections),
155                       toString(this));
156 }
157 
158 template <class ELFT>
159 ObjFile<ELFT>::ObjFile(MemoryBufferRef M, StringRef ArchiveName)
160     : ELFFileBase<ELFT>(Base::ObjectKind, M) {
161   this->ArchiveName = ArchiveName;
162 }
163 
164 template <class ELFT> ArrayRef<SymbolBody *> ObjFile<ELFT>::getLocalSymbols() {
165   if (this->SymbolBodies.empty())
166     return this->SymbolBodies;
167   return makeArrayRef(this->SymbolBodies).slice(1, this->FirstNonLocal - 1);
168 }
169 
170 template <class ELFT> ArrayRef<SymbolBody *> ObjFile<ELFT>::getSymbols() {
171   if (this->SymbolBodies.empty())
172     return this->SymbolBodies;
173   return makeArrayRef(this->SymbolBodies).slice(1);
174 }
175 
176 template <class ELFT>
177 void ObjFile<ELFT>::parse(DenseSet<CachedHashStringRef> &ComdatGroups) {
178   // Read section and symbol tables.
179   initializeSections(ComdatGroups);
180   initializeSymbols();
181 }
182 
183 // Sections with SHT_GROUP and comdat bits define comdat section groups.
184 // They are identified and deduplicated by group name. This function
185 // returns a group name.
186 template <class ELFT>
187 StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> Sections,
188                                               const Elf_Shdr &Sec) {
189   // Group signatures are stored as symbol names in object files.
190   // sh_info contains a symbol index, so we fetch a symbol and read its name.
191   if (this->Symbols.empty())
192     this->initSymtab(
193         Sections,
194         check(object::getSection<ELFT>(Sections, Sec.sh_link), toString(this)));
195 
196   const Elf_Sym *Sym = check(
197       object::getSymbol<ELFT>(this->Symbols, Sec.sh_info), toString(this));
198   StringRef Signature = check(Sym->getName(this->StringTable), toString(this));
199 
200   // As a special case, if a symbol is a section symbol and has no name,
201   // we use a section name as a signature.
202   //
203   // Such SHT_GROUP sections are invalid from the perspective of the ELF
204   // standard, but GNU gold 1.14 (the neweset version as of July 2017) or
205   // older produce such sections as outputs for the -r option, so we need
206   // a bug-compatibility.
207   if (Signature.empty() && Sym->getType() == STT_SECTION)
208     return getSectionName(Sec);
209   return Signature;
210 }
211 
212 template <class ELFT>
213 ArrayRef<typename ObjFile<ELFT>::Elf_Word>
214 ObjFile<ELFT>::getShtGroupEntries(const Elf_Shdr &Sec) {
215   const ELFFile<ELFT> &Obj = this->getObj();
216   ArrayRef<Elf_Word> Entries = check(
217       Obj.template getSectionContentsAsArray<Elf_Word>(&Sec), toString(this));
218   if (Entries.empty() || Entries[0] != GRP_COMDAT)
219     fatal(toString(this) + ": unsupported SHT_GROUP format");
220   return Entries.slice(1);
221 }
222 
223 template <class ELFT> bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) {
224   // We don't merge sections if -O0 (default is -O1). This makes sometimes
225   // the linker significantly faster, although the output will be bigger.
226   if (Config->Optimize == 0)
227     return false;
228 
229   // Do not merge sections if generating a relocatable object. It makes
230   // the code simpler because we do not need to update relocation addends
231   // to reflect changes introduced by merging. Instead of that we write
232   // such "merge" sections into separate OutputSections and keep SHF_MERGE
233   // / SHF_STRINGS flags and sh_entsize value to be able to perform merging
234   // later during a final linking.
235   if (Config->Relocatable)
236     return false;
237 
238   // A mergeable section with size 0 is useless because they don't have
239   // any data to merge. A mergeable string section with size 0 can be
240   // argued as invalid because it doesn't end with a null character.
241   // We'll avoid a mess by handling them as if they were non-mergeable.
242   if (Sec.sh_size == 0)
243     return false;
244 
245   // Check for sh_entsize. The ELF spec is not clear about the zero
246   // sh_entsize. It says that "the member [sh_entsize] contains 0 if
247   // the section does not hold a table of fixed-size entries". We know
248   // that Rust 1.13 produces a string mergeable section with a zero
249   // sh_entsize. Here we just accept it rather than being picky about it.
250   uint64_t EntSize = Sec.sh_entsize;
251   if (EntSize == 0)
252     return false;
253   if (Sec.sh_size % EntSize)
254     fatal(toString(this) +
255           ": SHF_MERGE section size must be a multiple of sh_entsize");
256 
257   uint64_t Flags = Sec.sh_flags;
258   if (!(Flags & SHF_MERGE))
259     return false;
260   if (Flags & SHF_WRITE)
261     fatal(toString(this) + ": writable SHF_MERGE section is not supported");
262 
263   // Don't try to merge if the alignment is larger than the sh_entsize and this
264   // is not SHF_STRINGS.
265   //
266   // Since this is not a SHF_STRINGS, we would need to pad after every entity.
267   // It would be equivalent for the producer of the .o to just set a larger
268   // sh_entsize.
269   if (Flags & SHF_STRINGS)
270     return true;
271 
272   return Sec.sh_addralign <= EntSize;
273 }
274 
275 template <class ELFT>
276 void ObjFile<ELFT>::initializeSections(
277     DenseSet<CachedHashStringRef> &ComdatGroups) {
278   const ELFFile<ELFT> &Obj = this->getObj();
279 
280   ArrayRef<Elf_Shdr> ObjSections =
281       check(this->getObj().sections(), toString(this));
282   uint64_t Size = ObjSections.size();
283   this->Sections.resize(Size);
284   this->SectionStringTable =
285       check(Obj.getSectionStringTable(ObjSections), toString(this));
286 
287   for (size_t I = 0, E = ObjSections.size(); I < E; I++) {
288     if (this->Sections[I] == &InputSection::Discarded)
289       continue;
290     const Elf_Shdr &Sec = ObjSections[I];
291 
292     // SHF_EXCLUDE'ed sections are discarded by the linker. However,
293     // if -r is given, we'll let the final link discard such sections.
294     // This is compatible with GNU.
295     if ((Sec.sh_flags & SHF_EXCLUDE) && !Config->Relocatable) {
296       this->Sections[I] = &InputSection::Discarded;
297       continue;
298     }
299 
300     switch (Sec.sh_type) {
301     case SHT_GROUP: {
302       // De-duplicate section groups by their signatures.
303       StringRef Signature = getShtGroupSignature(ObjSections, Sec);
304       bool IsNew = ComdatGroups.insert(CachedHashStringRef(Signature)).second;
305       this->Sections[I] = &InputSection::Discarded;
306 
307       // If it is a new section group, we want to keep group members.
308       // Group leader sections, which contain indices of group members, are
309       // discarded because they are useless beyond this point. The only
310       // exception is the -r option because in order to produce re-linkable
311       // object files, we want to pass through basically everything.
312       if (IsNew) {
313         if (Config->Relocatable)
314           this->Sections[I] = createInputSection(Sec);
315         continue;
316       }
317 
318       // Otherwise, discard group members.
319       for (uint32_t SecIndex : getShtGroupEntries(Sec)) {
320         if (SecIndex >= Size)
321           fatal(toString(this) +
322                 ": invalid section index in group: " + Twine(SecIndex));
323         this->Sections[SecIndex] = &InputSection::Discarded;
324       }
325       break;
326     }
327     case SHT_SYMTAB:
328       this->initSymtab(ObjSections, &Sec);
329       break;
330     case SHT_SYMTAB_SHNDX:
331       this->SymtabSHNDX =
332           check(Obj.getSHNDXTable(Sec, ObjSections), toString(this));
333       break;
334     case SHT_STRTAB:
335     case SHT_NULL:
336       break;
337     default:
338       this->Sections[I] = createInputSection(Sec);
339     }
340 
341     // .ARM.exidx sections have a reverse dependency on the InputSection they
342     // have a SHF_LINK_ORDER dependency, this is identified by the sh_link.
343     if (Sec.sh_flags & SHF_LINK_ORDER) {
344       if (Sec.sh_link >= this->Sections.size())
345         fatal(toString(this) + ": invalid sh_link index: " +
346               Twine(Sec.sh_link));
347       this->Sections[Sec.sh_link]->DependentSections.push_back(
348           this->Sections[I]);
349     }
350   }
351 }
352 
353 template <class ELFT>
354 InputSectionBase *ObjFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) {
355   uint32_t Idx = Sec.sh_info;
356   if (Idx >= this->Sections.size())
357     fatal(toString(this) + ": invalid relocated section index: " + Twine(Idx));
358   InputSectionBase *Target = this->Sections[Idx];
359 
360   // Strictly speaking, a relocation section must be included in the
361   // group of the section it relocates. However, LLVM 3.3 and earlier
362   // would fail to do so, so we gracefully handle that case.
363   if (Target == &InputSection::Discarded)
364     return nullptr;
365 
366   if (!Target)
367     fatal(toString(this) + ": unsupported relocation reference");
368   return Target;
369 }
370 
371 // Create a regular InputSection class that has the same contents
372 // as a given section.
373 InputSectionBase *toRegularSection(MergeInputSection *Sec) {
374   auto *Ret = make<InputSection>(Sec->Flags, Sec->Type, Sec->Alignment,
375                                  Sec->Data, Sec->Name);
376   Ret->File = Sec->File;
377   return Ret;
378 }
379 
380 template <class ELFT>
381 InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &Sec) {
382   StringRef Name = getSectionName(Sec);
383 
384   switch (Sec.sh_type) {
385   case SHT_ARM_ATTRIBUTES:
386     // FIXME: ARM meta-data section. Retain the first attribute section
387     // we see. The eglibc ARM dynamic loaders require the presence of an
388     // attribute section for dlopen to work.
389     // In a full implementation we would merge all attribute sections.
390     if (InX::ARMAttributes == nullptr) {
391       InX::ARMAttributes = make<InputSection>(this, &Sec, Name);
392       return InX::ARMAttributes;
393     }
394     return &InputSection::Discarded;
395   case SHT_RELA:
396   case SHT_REL: {
397     // Find the relocation target section and associate this
398     // section with it. Target can be discarded, for example
399     // if it is a duplicated member of SHT_GROUP section, we
400     // do not create or proccess relocatable sections then.
401     InputSectionBase *Target = getRelocTarget(Sec);
402     if (!Target)
403       return nullptr;
404 
405     // This section contains relocation information.
406     // If -r is given, we do not interpret or apply relocation
407     // but just copy relocation sections to output.
408     if (Config->Relocatable)
409       return make<InputSection>(this, &Sec, Name);
410 
411     if (Target->FirstRelocation)
412       fatal(toString(this) +
413             ": multiple relocation sections to one section are not supported");
414 
415     // Mergeable sections with relocations are tricky because relocations
416     // need to be taken into account when comparing section contents for
417     // merging. It's not worth supporting such mergeable sections because
418     // they are rare and it'd complicates the internal design (we usually
419     // have to determine if two sections are mergeable early in the link
420     // process much before applying relocations). We simply handle mergeable
421     // sections with relocations as non-mergeable.
422     if (auto *MS = dyn_cast<MergeInputSection>(Target)) {
423       Target = toRegularSection(MS);
424       this->Sections[Sec.sh_info] = Target;
425     }
426 
427     size_t NumRelocations;
428     if (Sec.sh_type == SHT_RELA) {
429       ArrayRef<Elf_Rela> Rels =
430           check(this->getObj().relas(&Sec), toString(this));
431       Target->FirstRelocation = Rels.begin();
432       NumRelocations = Rels.size();
433       Target->AreRelocsRela = true;
434     } else {
435       ArrayRef<Elf_Rel> Rels = check(this->getObj().rels(&Sec), toString(this));
436       Target->FirstRelocation = Rels.begin();
437       NumRelocations = Rels.size();
438       Target->AreRelocsRela = false;
439     }
440     assert(isUInt<31>(NumRelocations));
441     Target->NumRelocations = NumRelocations;
442 
443     // Relocation sections processed by the linker are usually removed
444     // from the output, so returning `nullptr` for the normal case.
445     // However, if -emit-relocs is given, we need to leave them in the output.
446     // (Some post link analysis tools need this information.)
447     if (Config->EmitRelocs) {
448       InputSection *RelocSec = make<InputSection>(this, &Sec, Name);
449       // We will not emit relocation section if target was discarded.
450       Target->DependentSections.push_back(RelocSec);
451       return RelocSec;
452     }
453     return nullptr;
454   }
455   }
456 
457   // The GNU linker uses .note.GNU-stack section as a marker indicating
458   // that the code in the object file does not expect that the stack is
459   // executable (in terms of NX bit). If all input files have the marker,
460   // the GNU linker adds a PT_GNU_STACK segment to tells the loader to
461   // make the stack non-executable. Most object files have this section as
462   // of 2017.
463   //
464   // But making the stack non-executable is a norm today for security
465   // reasons. Failure to do so may result in a serious security issue.
466   // Therefore, we make LLD always add PT_GNU_STACK unless it is
467   // explicitly told to do otherwise (by -z execstack). Because the stack
468   // executable-ness is controlled solely by command line options,
469   // .note.GNU-stack sections are simply ignored.
470   if (Name == ".note.GNU-stack")
471     return &InputSection::Discarded;
472 
473   // Split stacks is a feature to support a discontiguous stack. At least
474   // as of 2017, it seems that the feature is not being used widely.
475   // Only GNU gold supports that. We don't. For the details about that,
476   // see https://gcc.gnu.org/wiki/SplitStacks
477   if (Name == ".note.GNU-split-stack") {
478     error(toString(this) +
479           ": object file compiled with -fsplit-stack is not supported");
480     return &InputSection::Discarded;
481   }
482 
483   if (Config->Strip != StripPolicy::None && Name.startswith(".debug"))
484     return &InputSection::Discarded;
485 
486   // If -gdb-index is given, LLD creates .gdb_index section, and that
487   // section serves the same purpose as .debug_gnu_pub{names,types} sections.
488   // If that's the case, we want to eliminate .debug_gnu_pub{names,types}
489   // because they are redundant and can waste large amount of disk space
490   // (for example, they are about 400 MiB in total for a clang debug build.)
491   // We still create the section and mark it dead so that the gdb index code
492   // can use the InputSection to access the data.
493   if (Config->GdbIndex &&
494       (Name == ".debug_gnu_pubnames" || Name == ".debug_gnu_pubtypes")) {
495     auto *Ret = make<InputSection>(this, &Sec, Name);
496     Script->discard({Ret});
497     return Ret;
498   }
499 
500   // The linkonce feature is a sort of proto-comdat. Some glibc i386 object
501   // files contain definitions of symbol "__x86.get_pc_thunk.bx" in linkonce
502   // sections. Drop those sections to avoid duplicate symbol errors.
503   // FIXME: This is glibc PR20543, we should remove this hack once that has been
504   // fixed for a while.
505   if (Name.startswith(".gnu.linkonce."))
506     return &InputSection::Discarded;
507 
508   // The linker merges EH (exception handling) frames and creates a
509   // .eh_frame_hdr section for runtime. So we handle them with a special
510   // class. For relocatable outputs, they are just passed through.
511   if (Name == ".eh_frame" && !Config->Relocatable)
512     return make<EhInputSection>(this, &Sec, Name);
513 
514   if (shouldMerge(Sec))
515     return make<MergeInputSection>(this, &Sec, Name);
516   return make<InputSection>(this, &Sec, Name);
517 }
518 
519 template <class ELFT>
520 StringRef ObjFile<ELFT>::getSectionName(const Elf_Shdr &Sec) {
521   return check(this->getObj().getSectionName(&Sec, SectionStringTable),
522                toString(this));
523 }
524 
525 template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
526   SymbolBodies.reserve(this->Symbols.size());
527   for (const Elf_Sym &Sym : this->Symbols)
528     SymbolBodies.push_back(createSymbolBody(&Sym));
529 }
530 
531 template <class ELFT>
532 InputSectionBase *ObjFile<ELFT>::getSection(const Elf_Sym &Sym) const {
533   uint32_t Index = this->getSectionIndex(Sym);
534   if (Index >= this->Sections.size())
535     fatal(toString(this) + ": invalid section index: " + Twine(Index));
536   InputSectionBase *S = this->Sections[Index];
537 
538   // We found that GNU assembler 2.17.50 [FreeBSD] 2007-07-03 could
539   // generate broken objects. STT_SECTION/STT_NOTYPE symbols can be
540   // associated with SHT_REL[A]/SHT_SYMTAB/SHT_STRTAB sections.
541   // In this case it is fine for section to be null here as we do not
542   // allocate sections of these types.
543   if (!S) {
544     if (Index == 0 || Sym.getType() == STT_SECTION ||
545         Sym.getType() == STT_NOTYPE)
546       return nullptr;
547     fatal(toString(this) + ": invalid section index: " + Twine(Index));
548   }
549 
550   if (S == &InputSection::Discarded)
551     return S;
552   return S->Repl;
553 }
554 
555 template <class ELFT>
556 SymbolBody *ObjFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) {
557   int Binding = Sym->getBinding();
558   InputSectionBase *Sec = getSection(*Sym);
559 
560   uint8_t StOther = Sym->st_other;
561   uint8_t Type = Sym->getType();
562   uint64_t Value = Sym->st_value;
563   uint64_t Size = Sym->st_size;
564 
565   if (Binding == STB_LOCAL) {
566     if (Sym->getType() == STT_FILE)
567       SourceFile = check(Sym->getName(this->StringTable), toString(this));
568 
569     if (this->StringTable.size() <= Sym->st_name)
570       fatal(toString(this) + ": invalid symbol name offset");
571 
572     StringRefZ Name = this->StringTable.data() + Sym->st_name;
573     if (Sym->st_shndx == SHN_UNDEF)
574       return make<Undefined>(Name, /*IsLocal=*/true, StOther, Type, this);
575 
576     return make<DefinedRegular>(Name, /*IsLocal=*/true, StOther, Type, Value,
577                                 Size, Sec, this);
578   }
579 
580   StringRef Name = check(Sym->getName(this->StringTable), toString(this));
581 
582   switch (Sym->st_shndx) {
583   case SHN_UNDEF:
584     return Symtab
585         ->addUndefined<ELFT>(Name, /*IsLocal=*/false, Binding, StOther, Type,
586                              /*CanOmitFromDynSym=*/false, this)
587         ->body();
588   case SHN_COMMON:
589     if (Value == 0 || Value >= UINT32_MAX)
590       fatal(toString(this) + ": common symbol '" + Name +
591             "' has invalid alignment: " + Twine(Value));
592     return Symtab->addCommon(Name, Size, Value, Binding, StOther, Type, this)
593         ->body();
594   }
595 
596   switch (Binding) {
597   default:
598     fatal(toString(this) + ": unexpected binding: " + Twine(Binding));
599   case STB_GLOBAL:
600   case STB_WEAK:
601   case STB_GNU_UNIQUE:
602     if (Sec == &InputSection::Discarded)
603       return Symtab
604           ->addUndefined<ELFT>(Name, /*IsLocal=*/false, Binding, StOther, Type,
605                                /*CanOmitFromDynSym=*/false, this)
606           ->body();
607     return Symtab
608         ->addRegular<ELFT>(Name, StOther, Type, Value, Size, Binding, Sec, this)
609         ->body();
610   }
611 }
612 
613 ArchiveFile::ArchiveFile(std::unique_ptr<Archive> &&File)
614     : InputFile(ArchiveKind, File->getMemoryBufferRef()),
615       File(std::move(File)) {}
616 
617 template <class ELFT> void ArchiveFile::parse() {
618   Symbols.reserve(File->getNumberOfSymbols());
619   for (const Archive::Symbol &Sym : File->symbols())
620     Symbols.push_back(Symtab->addLazyArchive<ELFT>(this, Sym));
621 }
622 
623 // Returns a buffer pointing to a member file containing a given symbol.
624 std::pair<MemoryBufferRef, uint64_t>
625 ArchiveFile::getMember(const Archive::Symbol *Sym) {
626   Archive::Child C =
627       check(Sym->getMember(), toString(this) +
628                                   ": could not get the member for symbol " +
629                                   Sym->getName());
630 
631   if (!Seen.insert(C.getChildOffset()).second)
632     return {MemoryBufferRef(), 0};
633 
634   MemoryBufferRef Ret =
635       check(C.getMemoryBufferRef(),
636             toString(this) +
637                 ": could not get the buffer for the member defining symbol " +
638                 Sym->getName());
639 
640   if (C.getParent()->isThin() && Tar)
641     Tar->append(relativeToRoot(check(C.getFullName(), toString(this))),
642                 Ret.getBuffer());
643   if (C.getParent()->isThin())
644     return {Ret, 0};
645   return {Ret, C.getChildOffset()};
646 }
647 
648 template <class ELFT>
649 SharedFile<ELFT>::SharedFile(MemoryBufferRef M, StringRef DefaultSoName)
650     : ELFFileBase<ELFT>(Base::SharedKind, M), SoName(DefaultSoName),
651       AsNeeded(Config->AsNeeded) {}
652 
653 template <class ELFT>
654 const typename ELFT::Shdr *
655 SharedFile<ELFT>::getSection(const Elf_Sym &Sym) const {
656   return check(
657       this->getObj().getSection(&Sym, this->Symbols, this->SymtabSHNDX),
658       toString(this));
659 }
660 
661 // Partially parse the shared object file so that we can call
662 // getSoName on this object.
663 template <class ELFT> void SharedFile<ELFT>::parseSoName() {
664   const Elf_Shdr *DynamicSec = nullptr;
665   const ELFFile<ELFT> Obj = this->getObj();
666   ArrayRef<Elf_Shdr> Sections = check(Obj.sections(), toString(this));
667 
668   // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d.
669   for (const Elf_Shdr &Sec : Sections) {
670     switch (Sec.sh_type) {
671     default:
672       continue;
673     case SHT_DYNSYM:
674       this->initSymtab(Sections, &Sec);
675       break;
676     case SHT_DYNAMIC:
677       DynamicSec = &Sec;
678       break;
679     case SHT_SYMTAB_SHNDX:
680       this->SymtabSHNDX =
681           check(Obj.getSHNDXTable(Sec, Sections), toString(this));
682       break;
683     case SHT_GNU_versym:
684       this->VersymSec = &Sec;
685       break;
686     case SHT_GNU_verdef:
687       this->VerdefSec = &Sec;
688       break;
689     }
690   }
691 
692   if (this->VersymSec && this->Symbols.empty())
693     error("SHT_GNU_versym should be associated with symbol table");
694 
695   // Search for a DT_SONAME tag to initialize this->SoName.
696   if (!DynamicSec)
697     return;
698   ArrayRef<Elf_Dyn> Arr =
699       check(Obj.template getSectionContentsAsArray<Elf_Dyn>(DynamicSec),
700             toString(this));
701   for (const Elf_Dyn &Dyn : Arr) {
702     if (Dyn.d_tag == DT_SONAME) {
703       uint64_t Val = Dyn.getVal();
704       if (Val >= this->StringTable.size())
705         fatal(toString(this) + ": invalid DT_SONAME entry");
706       SoName = this->StringTable.data() + Val;
707       return;
708     }
709   }
710 }
711 
712 // Parse the version definitions in the object file if present. Returns a vector
713 // whose nth element contains a pointer to the Elf_Verdef for version identifier
714 // n. Version identifiers that are not definitions map to nullptr. The array
715 // always has at least length 1.
716 template <class ELFT>
717 std::vector<const typename ELFT::Verdef *>
718 SharedFile<ELFT>::parseVerdefs(const Elf_Versym *&Versym) {
719   std::vector<const Elf_Verdef *> Verdefs(1);
720   // We only need to process symbol versions for this DSO if it has both a
721   // versym and a verdef section, which indicates that the DSO contains symbol
722   // version definitions.
723   if (!VersymSec || !VerdefSec)
724     return Verdefs;
725 
726   // The location of the first global versym entry.
727   const char *Base = this->MB.getBuffer().data();
728   Versym = reinterpret_cast<const Elf_Versym *>(Base + VersymSec->sh_offset) +
729            this->FirstNonLocal;
730 
731   // We cannot determine the largest verdef identifier without inspecting
732   // every Elf_Verdef, but both bfd and gold assign verdef identifiers
733   // sequentially starting from 1, so we predict that the largest identifier
734   // will be VerdefCount.
735   unsigned VerdefCount = VerdefSec->sh_info;
736   Verdefs.resize(VerdefCount + 1);
737 
738   // Build the Verdefs array by following the chain of Elf_Verdef objects
739   // from the start of the .gnu.version_d section.
740   const char *Verdef = Base + VerdefSec->sh_offset;
741   for (unsigned I = 0; I != VerdefCount; ++I) {
742     auto *CurVerdef = reinterpret_cast<const Elf_Verdef *>(Verdef);
743     Verdef += CurVerdef->vd_next;
744     unsigned VerdefIndex = CurVerdef->vd_ndx;
745     if (Verdefs.size() <= VerdefIndex)
746       Verdefs.resize(VerdefIndex + 1);
747     Verdefs[VerdefIndex] = CurVerdef;
748   }
749 
750   return Verdefs;
751 }
752 
753 // Fully parse the shared object file. This must be called after parseSoName().
754 template <class ELFT> void SharedFile<ELFT>::parseRest() {
755   // Create mapping from version identifiers to Elf_Verdef entries.
756   const Elf_Versym *Versym = nullptr;
757   std::vector<const Elf_Verdef *> Verdefs = parseVerdefs(Versym);
758 
759   Elf_Sym_Range Syms = this->getGlobalSymbols();
760   for (const Elf_Sym &Sym : Syms) {
761     unsigned VersymIndex = 0;
762     if (Versym) {
763       VersymIndex = Versym->vs_index;
764       ++Versym;
765     }
766     bool Hidden = VersymIndex & VERSYM_HIDDEN;
767     VersymIndex = VersymIndex & ~VERSYM_HIDDEN;
768 
769     StringRef Name = check(Sym.getName(this->StringTable), toString(this));
770     if (Sym.isUndefined()) {
771       Undefs.push_back(Name);
772       continue;
773     }
774 
775     // Ignore local symbols.
776     if (Versym && VersymIndex == VER_NDX_LOCAL)
777       continue;
778 
779     const Elf_Verdef *V =
780         VersymIndex == VER_NDX_GLOBAL ? nullptr : Verdefs[VersymIndex];
781 
782     if (!Hidden)
783       Symtab->addShared(this, Name, Sym, V);
784 
785     // Also add the symbol with the versioned name to handle undefined symbols
786     // with explicit versions.
787     if (V) {
788       StringRef VerName = this->StringTable.data() + V->getAux()->vda_name;
789       Name = Saver.save(Name + "@" + VerName);
790       Symtab->addShared(this, Name, Sym, V);
791     }
792   }
793 }
794 
795 static ELFKind getBitcodeELFKind(const Triple &T) {
796   if (T.isLittleEndian())
797     return T.isArch64Bit() ? ELF64LEKind : ELF32LEKind;
798   return T.isArch64Bit() ? ELF64BEKind : ELF32BEKind;
799 }
800 
801 static uint8_t getBitcodeMachineKind(StringRef Path, const Triple &T) {
802   switch (T.getArch()) {
803   case Triple::aarch64:
804     return EM_AARCH64;
805   case Triple::arm:
806   case Triple::thumb:
807     return EM_ARM;
808   case Triple::avr:
809     return EM_AVR;
810   case Triple::mips:
811   case Triple::mipsel:
812   case Triple::mips64:
813   case Triple::mips64el:
814     return EM_MIPS;
815   case Triple::ppc:
816     return EM_PPC;
817   case Triple::ppc64:
818     return EM_PPC64;
819   case Triple::x86:
820     return T.isOSIAMCU() ? EM_IAMCU : EM_386;
821   case Triple::x86_64:
822     return EM_X86_64;
823   default:
824     fatal(Path + ": could not infer e_machine from bitcode target triple " +
825           T.str());
826   }
827 }
828 
829 std::vector<BitcodeFile *> BitcodeFile::Instances;
830 
831 BitcodeFile::BitcodeFile(MemoryBufferRef MB, StringRef ArchiveName,
832                          uint64_t OffsetInArchive)
833     : InputFile(BitcodeKind, MB) {
834   this->ArchiveName = ArchiveName;
835 
836   // Here we pass a new MemoryBufferRef which is identified by ArchiveName
837   // (the fully resolved path of the archive) + member name + offset of the
838   // member in the archive.
839   // ThinLTO uses the MemoryBufferRef identifier to access its internal
840   // data structures and if two archives define two members with the same name,
841   // this causes a collision which result in only one of the objects being
842   // taken into consideration at LTO time (which very likely causes undefined
843   // symbols later in the link stage).
844   MemoryBufferRef MBRef(MB.getBuffer(),
845                         Saver.save(ArchiveName + MB.getBufferIdentifier() +
846                                    utostr(OffsetInArchive)));
847   Obj = check(lto::InputFile::create(MBRef), toString(this));
848 
849   Triple T(Obj->getTargetTriple());
850   EKind = getBitcodeELFKind(T);
851   EMachine = getBitcodeMachineKind(MB.getBufferIdentifier(), T);
852 }
853 
854 static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) {
855   switch (GvVisibility) {
856   case GlobalValue::DefaultVisibility:
857     return STV_DEFAULT;
858   case GlobalValue::HiddenVisibility:
859     return STV_HIDDEN;
860   case GlobalValue::ProtectedVisibility:
861     return STV_PROTECTED;
862   }
863   llvm_unreachable("unknown visibility");
864 }
865 
866 template <class ELFT>
867 static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats,
868                                    const lto::InputFile::Symbol &ObjSym,
869                                    BitcodeFile *F) {
870   StringRef NameRef = Saver.save(ObjSym.getName());
871   uint32_t Binding = ObjSym.isWeak() ? STB_WEAK : STB_GLOBAL;
872 
873   uint8_t Type = ObjSym.isTLS() ? STT_TLS : STT_NOTYPE;
874   uint8_t Visibility = mapVisibility(ObjSym.getVisibility());
875   bool CanOmitFromDynSym = ObjSym.canBeOmittedFromSymbolTable();
876 
877   int C = ObjSym.getComdatIndex();
878   if (C != -1 && !KeptComdats[C])
879     return Symtab->addUndefined<ELFT>(NameRef, /*IsLocal=*/false, Binding,
880                                       Visibility, Type, CanOmitFromDynSym, F);
881 
882   if (ObjSym.isUndefined())
883     return Symtab->addUndefined<ELFT>(NameRef, /*IsLocal=*/false, Binding,
884                                       Visibility, Type, CanOmitFromDynSym, F);
885 
886   if (ObjSym.isCommon())
887     return Symtab->addCommon(NameRef, ObjSym.getCommonSize(),
888                              ObjSym.getCommonAlignment(), Binding, Visibility,
889                              STT_OBJECT, F);
890 
891   return Symtab->addBitcode(NameRef, Binding, Visibility, Type,
892                             CanOmitFromDynSym, F);
893 }
894 
895 template <class ELFT>
896 void BitcodeFile::parse(DenseSet<CachedHashStringRef> &ComdatGroups) {
897   std::vector<bool> KeptComdats;
898   for (StringRef S : Obj->getComdatTable())
899     KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(S)).second);
900 
901   for (const lto::InputFile::Symbol &ObjSym : Obj->symbols())
902     Symbols.push_back(createBitcodeSymbol<ELFT>(KeptComdats, ObjSym, this));
903 }
904 
905 static ELFKind getELFKind(MemoryBufferRef MB) {
906   unsigned char Size;
907   unsigned char Endian;
908   std::tie(Size, Endian) = getElfArchType(MB.getBuffer());
909 
910   if (Endian != ELFDATA2LSB && Endian != ELFDATA2MSB)
911     fatal(MB.getBufferIdentifier() + ": invalid data encoding");
912   if (Size != ELFCLASS32 && Size != ELFCLASS64)
913     fatal(MB.getBufferIdentifier() + ": invalid file class");
914 
915   size_t BufSize = MB.getBuffer().size();
916   if ((Size == ELFCLASS32 && BufSize < sizeof(Elf32_Ehdr)) ||
917       (Size == ELFCLASS64 && BufSize < sizeof(Elf64_Ehdr)))
918     fatal(MB.getBufferIdentifier() + ": file is too short");
919 
920   if (Size == ELFCLASS32)
921     return (Endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind;
922   return (Endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind;
923 }
924 
925 std::vector<BinaryFile *> BinaryFile::Instances;
926 
927 template <class ELFT> void BinaryFile::parse() {
928   ArrayRef<uint8_t> Data = toArrayRef(MB.getBuffer());
929   auto *Section =
930       make<InputSection>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, 8, Data, ".data");
931   Sections.push_back(Section);
932 
933   // For each input file foo that is embedded to a result as a binary
934   // blob, we define _binary_foo_{start,end,size} symbols, so that
935   // user programs can access blobs by name. Non-alphanumeric
936   // characters in a filename are replaced with underscore.
937   std::string S = "_binary_" + MB.getBufferIdentifier().str();
938   for (size_t I = 0; I < S.size(); ++I)
939     if (!isalnum(S[I]))
940       S[I] = '_';
941 
942   Symtab->addRegular<ELFT>(Saver.save(S + "_start"), STV_DEFAULT, STT_OBJECT,
943                            0, 0, STB_GLOBAL, Section, nullptr);
944   Symtab->addRegular<ELFT>(Saver.save(S + "_end"), STV_DEFAULT, STT_OBJECT,
945                            Data.size(), 0, STB_GLOBAL, Section, nullptr);
946   Symtab->addRegular<ELFT>(Saver.save(S + "_size"), STV_DEFAULT, STT_OBJECT,
947                            Data.size(), 0, STB_GLOBAL, nullptr, nullptr);
948 }
949 
950 static bool isBitcode(MemoryBufferRef MB) {
951   using namespace sys::fs;
952   return identify_magic(MB.getBuffer()) == file_magic::bitcode;
953 }
954 
955 InputFile *elf::createObjectFile(MemoryBufferRef MB, StringRef ArchiveName,
956                                  uint64_t OffsetInArchive) {
957   if (isBitcode(MB))
958     return make<BitcodeFile>(MB, ArchiveName, OffsetInArchive);
959 
960   switch (getELFKind(MB)) {
961   case ELF32LEKind:
962     return make<ObjFile<ELF32LE>>(MB, ArchiveName);
963   case ELF32BEKind:
964     return make<ObjFile<ELF32BE>>(MB, ArchiveName);
965   case ELF64LEKind:
966     return make<ObjFile<ELF64LE>>(MB, ArchiveName);
967   case ELF64BEKind:
968     return make<ObjFile<ELF64BE>>(MB, ArchiveName);
969   default:
970     llvm_unreachable("getELFKind");
971   }
972 }
973 
974 InputFile *elf::createSharedFile(MemoryBufferRef MB, StringRef DefaultSoName) {
975   switch (getELFKind(MB)) {
976   case ELF32LEKind:
977     return make<SharedFile<ELF32LE>>(MB, DefaultSoName);
978   case ELF32BEKind:
979     return make<SharedFile<ELF32BE>>(MB, DefaultSoName);
980   case ELF64LEKind:
981     return make<SharedFile<ELF64LE>>(MB, DefaultSoName);
982   case ELF64BEKind:
983     return make<SharedFile<ELF64BE>>(MB, DefaultSoName);
984   default:
985     llvm_unreachable("getELFKind");
986   }
987 }
988 
989 MemoryBufferRef LazyObjFile::getBuffer() {
990   if (Seen)
991     return MemoryBufferRef();
992   Seen = true;
993   return MB;
994 }
995 
996 InputFile *LazyObjFile::fetch() {
997   MemoryBufferRef MBRef = getBuffer();
998   if (MBRef.getBuffer().empty())
999     return nullptr;
1000   return createObjectFile(MBRef, ArchiveName, OffsetInArchive);
1001 }
1002 
1003 template <class ELFT> void LazyObjFile::parse() {
1004   for (StringRef Sym : getSymbols())
1005     Symtab->addLazyObject<ELFT>(Sym, *this);
1006 }
1007 
1008 template <class ELFT> std::vector<StringRef> LazyObjFile::getElfSymbols() {
1009   typedef typename ELFT::Shdr Elf_Shdr;
1010   typedef typename ELFT::Sym Elf_Sym;
1011   typedef typename ELFT::SymRange Elf_Sym_Range;
1012 
1013   const ELFFile<ELFT> Obj(this->MB.getBuffer());
1014   ArrayRef<Elf_Shdr> Sections = check(Obj.sections(), toString(this));
1015   for (const Elf_Shdr &Sec : Sections) {
1016     if (Sec.sh_type != SHT_SYMTAB)
1017       continue;
1018 
1019     Elf_Sym_Range Syms = check(Obj.symbols(&Sec), toString(this));
1020     uint32_t FirstNonLocal = Sec.sh_info;
1021     StringRef StringTable =
1022         check(Obj.getStringTableForSymtab(Sec, Sections), toString(this));
1023     std::vector<StringRef> V;
1024 
1025     for (const Elf_Sym &Sym : Syms.slice(FirstNonLocal))
1026       if (Sym.st_shndx != SHN_UNDEF)
1027         V.push_back(check(Sym.getName(StringTable), toString(this)));
1028     return V;
1029   }
1030   return {};
1031 }
1032 
1033 std::vector<StringRef> LazyObjFile::getBitcodeSymbols() {
1034   std::unique_ptr<lto::InputFile> Obj =
1035       check(lto::InputFile::create(this->MB), toString(this));
1036   std::vector<StringRef> V;
1037   for (const lto::InputFile::Symbol &Sym : Obj->symbols())
1038     if (!Sym.isUndefined())
1039       V.push_back(Saver.save(Sym.getName()));
1040   return V;
1041 }
1042 
1043 // Returns a vector of globally-visible defined symbol names.
1044 std::vector<StringRef> LazyObjFile::getSymbols() {
1045   if (isBitcode(this->MB))
1046     return getBitcodeSymbols();
1047 
1048   switch (getELFKind(this->MB)) {
1049   case ELF32LEKind:
1050     return getElfSymbols<ELF32LE>();
1051   case ELF32BEKind:
1052     return getElfSymbols<ELF32BE>();
1053   case ELF64LEKind:
1054     return getElfSymbols<ELF64LE>();
1055   case ELF64BEKind:
1056     return getElfSymbols<ELF64BE>();
1057   default:
1058     llvm_unreachable("getELFKind");
1059   }
1060 }
1061 
1062 template void ArchiveFile::parse<ELF32LE>();
1063 template void ArchiveFile::parse<ELF32BE>();
1064 template void ArchiveFile::parse<ELF64LE>();
1065 template void ArchiveFile::parse<ELF64BE>();
1066 
1067 template void BitcodeFile::parse<ELF32LE>(DenseSet<CachedHashStringRef> &);
1068 template void BitcodeFile::parse<ELF32BE>(DenseSet<CachedHashStringRef> &);
1069 template void BitcodeFile::parse<ELF64LE>(DenseSet<CachedHashStringRef> &);
1070 template void BitcodeFile::parse<ELF64BE>(DenseSet<CachedHashStringRef> &);
1071 
1072 template void LazyObjFile::parse<ELF32LE>();
1073 template void LazyObjFile::parse<ELF32BE>();
1074 template void LazyObjFile::parse<ELF64LE>();
1075 template void LazyObjFile::parse<ELF64BE>();
1076 
1077 template class elf::ELFFileBase<ELF32LE>;
1078 template class elf::ELFFileBase<ELF32BE>;
1079 template class elf::ELFFileBase<ELF64LE>;
1080 template class elf::ELFFileBase<ELF64BE>;
1081 
1082 template class elf::ObjFile<ELF32LE>;
1083 template class elf::ObjFile<ELF32BE>;
1084 template class elf::ObjFile<ELF64LE>;
1085 template class elf::ObjFile<ELF64BE>;
1086 
1087 template class elf::SharedFile<ELF32LE>;
1088 template class elf::SharedFile<ELF32BE>;
1089 template class elf::SharedFile<ELF64LE>;
1090 template class elf::SharedFile<ELF64BE>;
1091 
1092 template void BinaryFile::parse<ELF32LE>();
1093 template void BinaryFile::parse<ELF32BE>();
1094 template void BinaryFile::parse<ELF64LE>();
1095 template void BinaryFile::parse<ELF64BE>();
1096