1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "InputFiles.h"
11 #include "InputSection.h"
12 #include "LinkerScript.h"
13 #include "Memory.h"
14 #include "SymbolTable.h"
15 #include "Symbols.h"
16 #include "SyntheticSections.h"
17 #include "lld/Common/ErrorHandler.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/CodeGen/Analysis.h"
20 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/IR/Module.h"
23 #include "llvm/LTO/LTO.h"
24 #include "llvm/MC/StringTableBuilder.h"
25 #include "llvm/Object/ELFObjectFile.h"
26 #include "llvm/Support/Path.h"
27 #include "llvm/Support/TarWriter.h"
28 #include "llvm/Support/raw_ostream.h"
29 
30 using namespace llvm;
31 using namespace llvm::ELF;
32 using namespace llvm::object;
33 using namespace llvm::sys::fs;
34 
35 using namespace lld;
36 using namespace lld::elf;
37 
38 std::vector<BinaryFile *> elf::BinaryFiles;
39 std::vector<BitcodeFile *> elf::BitcodeFiles;
40 std::vector<InputFile *> elf::ObjectFiles;
41 std::vector<InputFile *> elf::SharedFiles;
42 
43 TarWriter *elf::Tar;
44 
45 InputFile::InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {}
46 
47 Optional<MemoryBufferRef> elf::readFile(StringRef Path) {
48   // The --chroot option changes our virtual root directory.
49   // This is useful when you are dealing with files created by --reproduce.
50   if (!Config->Chroot.empty() && Path.startswith("/"))
51     Path = Saver.save(Config->Chroot + Path);
52 
53   log(Path);
54 
55   auto MBOrErr = MemoryBuffer::getFile(Path);
56   if (auto EC = MBOrErr.getError()) {
57     error("cannot open " + Path + ": " + EC.message());
58     return None;
59   }
60 
61   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
62   MemoryBufferRef MBRef = MB->getMemBufferRef();
63   make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership
64 
65   if (Tar)
66     Tar->append(relativeToRoot(Path), MBRef.getBuffer());
67   return MBRef;
68 }
69 
70 template <class ELFT> void ObjFile<ELFT>::initializeDwarf() {
71   DWARFContext Dwarf(make_unique<LLDDwarfObj<ELFT>>(this));
72   const DWARFObject &Obj = Dwarf.getDWARFObj();
73   DwarfLine.reset(new DWARFDebugLine);
74   DWARFDataExtractor LineData(Obj, Obj.getLineSection(), Config->IsLE,
75                               Config->Wordsize);
76 
77   // The second parameter is offset in .debug_line section
78   // for compilation unit (CU) of interest. We have only one
79   // CU (object file), so offset is always 0.
80   // FIXME: Provide the associated DWARFUnit if there is one.  DWARF v5
81   // needs it in order to find indirect strings.
82   const DWARFDebugLine::LineTable *LT =
83       DwarfLine->getOrParseLineTable(LineData, 0, nullptr);
84 
85   // Return if there is no debug information about CU available.
86   if (!Dwarf.getNumCompileUnits())
87     return;
88 
89   // Loop over variable records and insert them to VariableLoc.
90   DWARFCompileUnit *CU = Dwarf.getCompileUnitAtIndex(0);
91   for (const auto &Entry : CU->dies()) {
92     DWARFDie Die(CU, &Entry);
93     // Skip all tags that are not variables.
94     if (Die.getTag() != dwarf::DW_TAG_variable)
95       continue;
96 
97     // Skip if a local variable because we don't need them for generating error
98     // messages. In general, only non-local symbols can fail to be linked.
99     if (!dwarf::toUnsigned(Die.find(dwarf::DW_AT_external), 0))
100       continue;
101 
102     // Get the source filename index for the variable.
103     unsigned File = dwarf::toUnsigned(Die.find(dwarf::DW_AT_decl_file), 0);
104     if (!LT->hasFileAtIndex(File))
105       continue;
106 
107     // Get the line number on which the variable is declared.
108     unsigned Line = dwarf::toUnsigned(Die.find(dwarf::DW_AT_decl_line), 0);
109 
110     // Get the name of the variable and add the collected information to
111     // VariableLoc. Usually Name is non-empty, but it can be empty if the input
112     // object file lacks some debug info.
113     StringRef Name = dwarf::toString(Die.find(dwarf::DW_AT_name), "");
114     if (!Name.empty())
115       VariableLoc.insert({Name, {File, Line}});
116   }
117 }
118 
119 // Returns the pair of file name and line number describing location of data
120 // object (variable, array, etc) definition.
121 template <class ELFT>
122 Optional<std::pair<std::string, unsigned>>
123 ObjFile<ELFT>::getVariableLoc(StringRef Name) {
124   llvm::call_once(InitDwarfLine, [this]() { initializeDwarf(); });
125 
126   // There is always only one CU so it's offset is 0.
127   const DWARFDebugLine::LineTable *LT = DwarfLine->getLineTable(0);
128   if (!LT)
129     return None;
130 
131   // Return if we have no debug information about data object.
132   auto It = VariableLoc.find(Name);
133   if (It == VariableLoc.end())
134     return None;
135 
136   // Take file name string from line table.
137   std::string FileName;
138   if (!LT->getFileNameByIndex(
139           It->second.first /* File */, nullptr,
140           DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FileName))
141     return None;
142 
143   return std::make_pair(FileName, It->second.second /*Line*/);
144 }
145 
146 // Returns source line information for a given offset
147 // using DWARF debug info.
148 template <class ELFT>
149 Optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *S,
150                                                   uint64_t Offset) {
151   llvm::call_once(InitDwarfLine, [this]() { initializeDwarf(); });
152 
153   // The offset to CU is 0.
154   const DWARFDebugLine::LineTable *Tbl = DwarfLine->getLineTable(0);
155   if (!Tbl)
156     return None;
157 
158   // Use fake address calcuated by adding section file offset and offset in
159   // section. See comments for ObjectInfo class.
160   DILineInfo Info;
161   Tbl->getFileLineInfoForAddress(
162       S->getOffsetInFile() + Offset, nullptr,
163       DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, Info);
164   if (Info.Line == 0)
165     return None;
166   return Info;
167 }
168 
169 // Returns source line information for a given offset
170 // using DWARF debug info.
171 template <class ELFT>
172 std::string ObjFile<ELFT>::getLineInfo(InputSectionBase *S, uint64_t Offset) {
173   if (Optional<DILineInfo> Info = getDILineInfo(S, Offset))
174     return Info->FileName + ":" + std::to_string(Info->Line);
175   return "";
176 }
177 
178 // Returns "<internal>", "foo.a(bar.o)" or "baz.o".
179 std::string lld::toString(const InputFile *F) {
180   if (!F)
181     return "<internal>";
182 
183   if (F->ToStringCache.empty()) {
184     if (F->ArchiveName.empty())
185       F->ToStringCache = F->getName();
186     else
187       F->ToStringCache = (F->ArchiveName + "(" + F->getName() + ")").str();
188   }
189   return F->ToStringCache;
190 }
191 
192 template <class ELFT>
193 ELFFileBase<ELFT>::ELFFileBase(Kind K, MemoryBufferRef MB) : InputFile(K, MB) {
194   if (ELFT::TargetEndianness == support::little)
195     EKind = ELFT::Is64Bits ? ELF64LEKind : ELF32LEKind;
196   else
197     EKind = ELFT::Is64Bits ? ELF64BEKind : ELF32BEKind;
198 
199   EMachine = getObj().getHeader()->e_machine;
200   OSABI = getObj().getHeader()->e_ident[llvm::ELF::EI_OSABI];
201 }
202 
203 template <class ELFT>
204 typename ELFT::SymRange ELFFileBase<ELFT>::getGlobalELFSyms() {
205   return makeArrayRef(ELFSyms.begin() + FirstNonLocal, ELFSyms.end());
206 }
207 
208 template <class ELFT>
209 uint32_t ELFFileBase<ELFT>::getSectionIndex(const Elf_Sym &Sym) const {
210   return check(getObj().getSectionIndex(&Sym, ELFSyms, SymtabSHNDX),
211                toString(this));
212 }
213 
214 template <class ELFT>
215 void ELFFileBase<ELFT>::initSymtab(ArrayRef<Elf_Shdr> Sections,
216                                    const Elf_Shdr *Symtab) {
217   FirstNonLocal = Symtab->sh_info;
218   ELFSyms = check(getObj().symbols(Symtab), toString(this));
219   if (FirstNonLocal == 0 || FirstNonLocal > ELFSyms.size())
220     fatal(toString(this) + ": invalid sh_info in symbol table");
221 
222   StringTable = check(getObj().getStringTableForSymtab(*Symtab, Sections),
223                       toString(this));
224 }
225 
226 template <class ELFT>
227 ObjFile<ELFT>::ObjFile(MemoryBufferRef M, StringRef ArchiveName)
228     : ELFFileBase<ELFT>(Base::ObjKind, M) {
229   this->ArchiveName = ArchiveName;
230 }
231 
232 template <class ELFT> ArrayRef<Symbol *> ObjFile<ELFT>::getLocalSymbols() {
233   if (this->Symbols.empty())
234     return {};
235   return makeArrayRef(this->Symbols).slice(1, this->FirstNonLocal - 1);
236 }
237 
238 template <class ELFT>
239 void ObjFile<ELFT>::parse(DenseSet<CachedHashStringRef> &ComdatGroups) {
240   // Read section and symbol tables.
241   initializeSections(ComdatGroups);
242   initializeSymbols();
243 }
244 
245 // Sections with SHT_GROUP and comdat bits define comdat section groups.
246 // They are identified and deduplicated by group name. This function
247 // returns a group name.
248 template <class ELFT>
249 StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> Sections,
250                                               const Elf_Shdr &Sec) {
251   // Group signatures are stored as symbol names in object files.
252   // sh_info contains a symbol index, so we fetch a symbol and read its name.
253   if (this->ELFSyms.empty())
254     this->initSymtab(
255         Sections,
256         check(object::getSection<ELFT>(Sections, Sec.sh_link), toString(this)));
257 
258   const Elf_Sym *Sym = check(
259       object::getSymbol<ELFT>(this->ELFSyms, Sec.sh_info), toString(this));
260   StringRef Signature = check(Sym->getName(this->StringTable), toString(this));
261 
262   // As a special case, if a symbol is a section symbol and has no name,
263   // we use a section name as a signature.
264   //
265   // Such SHT_GROUP sections are invalid from the perspective of the ELF
266   // standard, but GNU gold 1.14 (the neweset version as of July 2017) or
267   // older produce such sections as outputs for the -r option, so we need
268   // a bug-compatibility.
269   if (Signature.empty() && Sym->getType() == STT_SECTION)
270     return getSectionName(Sec);
271   return Signature;
272 }
273 
274 template <class ELFT>
275 ArrayRef<typename ObjFile<ELFT>::Elf_Word>
276 ObjFile<ELFT>::getShtGroupEntries(const Elf_Shdr &Sec) {
277   const ELFFile<ELFT> &Obj = this->getObj();
278   ArrayRef<Elf_Word> Entries = check(
279       Obj.template getSectionContentsAsArray<Elf_Word>(&Sec), toString(this));
280   if (Entries.empty() || Entries[0] != GRP_COMDAT)
281     fatal(toString(this) + ": unsupported SHT_GROUP format");
282   return Entries.slice(1);
283 }
284 
285 template <class ELFT> bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) {
286   // We don't merge sections if -O0 (default is -O1). This makes sometimes
287   // the linker significantly faster, although the output will be bigger.
288   if (Config->Optimize == 0)
289     return false;
290 
291   // A mergeable section with size 0 is useless because they don't have
292   // any data to merge. A mergeable string section with size 0 can be
293   // argued as invalid because it doesn't end with a null character.
294   // We'll avoid a mess by handling them as if they were non-mergeable.
295   if (Sec.sh_size == 0)
296     return false;
297 
298   // Check for sh_entsize. The ELF spec is not clear about the zero
299   // sh_entsize. It says that "the member [sh_entsize] contains 0 if
300   // the section does not hold a table of fixed-size entries". We know
301   // that Rust 1.13 produces a string mergeable section with a zero
302   // sh_entsize. Here we just accept it rather than being picky about it.
303   uint64_t EntSize = Sec.sh_entsize;
304   if (EntSize == 0)
305     return false;
306   if (Sec.sh_size % EntSize)
307     fatal(toString(this) +
308           ": SHF_MERGE section size must be a multiple of sh_entsize");
309 
310   uint64_t Flags = Sec.sh_flags;
311   if (!(Flags & SHF_MERGE))
312     return false;
313   if (Flags & SHF_WRITE)
314     fatal(toString(this) + ": writable SHF_MERGE section is not supported");
315 
316   return true;
317 }
318 
319 template <class ELFT>
320 void ObjFile<ELFT>::initializeSections(
321     DenseSet<CachedHashStringRef> &ComdatGroups) {
322   const ELFFile<ELFT> &Obj = this->getObj();
323 
324   ArrayRef<Elf_Shdr> ObjSections =
325       check(this->getObj().sections(), toString(this));
326   uint64_t Size = ObjSections.size();
327   this->Sections.resize(Size);
328   this->SectionStringTable =
329       check(Obj.getSectionStringTable(ObjSections), toString(this));
330 
331   for (size_t I = 0, E = ObjSections.size(); I < E; I++) {
332     if (this->Sections[I] == &InputSection::Discarded)
333       continue;
334     const Elf_Shdr &Sec = ObjSections[I];
335 
336     // SHF_EXCLUDE'ed sections are discarded by the linker. However,
337     // if -r is given, we'll let the final link discard such sections.
338     // This is compatible with GNU.
339     if ((Sec.sh_flags & SHF_EXCLUDE) && !Config->Relocatable) {
340       this->Sections[I] = &InputSection::Discarded;
341       continue;
342     }
343 
344     switch (Sec.sh_type) {
345     case SHT_GROUP: {
346       // De-duplicate section groups by their signatures.
347       StringRef Signature = getShtGroupSignature(ObjSections, Sec);
348       bool IsNew = ComdatGroups.insert(CachedHashStringRef(Signature)).second;
349       this->Sections[I] = &InputSection::Discarded;
350 
351       // If it is a new section group, we want to keep group members.
352       // Group leader sections, which contain indices of group members, are
353       // discarded because they are useless beyond this point. The only
354       // exception is the -r option because in order to produce re-linkable
355       // object files, we want to pass through basically everything.
356       if (IsNew) {
357         if (Config->Relocatable)
358           this->Sections[I] = createInputSection(Sec);
359         continue;
360       }
361 
362       // Otherwise, discard group members.
363       for (uint32_t SecIndex : getShtGroupEntries(Sec)) {
364         if (SecIndex >= Size)
365           fatal(toString(this) +
366                 ": invalid section index in group: " + Twine(SecIndex));
367         this->Sections[SecIndex] = &InputSection::Discarded;
368       }
369       break;
370     }
371     case SHT_SYMTAB:
372       this->initSymtab(ObjSections, &Sec);
373       break;
374     case SHT_SYMTAB_SHNDX:
375       this->SymtabSHNDX =
376           check(Obj.getSHNDXTable(Sec, ObjSections), toString(this));
377       break;
378     case SHT_STRTAB:
379     case SHT_NULL:
380       break;
381     default:
382       this->Sections[I] = createInputSection(Sec);
383     }
384 
385     // .ARM.exidx sections have a reverse dependency on the InputSection they
386     // have a SHF_LINK_ORDER dependency, this is identified by the sh_link.
387     if (Sec.sh_flags & SHF_LINK_ORDER) {
388       if (Sec.sh_link >= this->Sections.size())
389         fatal(toString(this) + ": invalid sh_link index: " +
390               Twine(Sec.sh_link));
391       this->Sections[Sec.sh_link]->DependentSections.push_back(
392           cast<InputSection>(this->Sections[I]));
393     }
394   }
395 }
396 
397 template <class ELFT>
398 InputSectionBase *ObjFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) {
399   uint32_t Idx = Sec.sh_info;
400   if (Idx >= this->Sections.size())
401     fatal(toString(this) + ": invalid relocated section index: " + Twine(Idx));
402   InputSectionBase *Target = this->Sections[Idx];
403 
404   // Strictly speaking, a relocation section must be included in the
405   // group of the section it relocates. However, LLVM 3.3 and earlier
406   // would fail to do so, so we gracefully handle that case.
407   if (Target == &InputSection::Discarded)
408     return nullptr;
409 
410   if (!Target)
411     fatal(toString(this) + ": unsupported relocation reference");
412   return Target;
413 }
414 
415 // Create a regular InputSection class that has the same contents
416 // as a given section.
417 InputSectionBase *toRegularSection(MergeInputSection *Sec) {
418   auto *Ret = make<InputSection>(Sec->Flags, Sec->Type, Sec->Alignment,
419                                  Sec->Data, Sec->Name);
420   Ret->File = Sec->File;
421   return Ret;
422 }
423 
424 template <class ELFT>
425 InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &Sec) {
426   StringRef Name = getSectionName(Sec);
427 
428   switch (Sec.sh_type) {
429   case SHT_ARM_ATTRIBUTES:
430     // FIXME: ARM meta-data section. Retain the first attribute section
431     // we see. The eglibc ARM dynamic loaders require the presence of an
432     // attribute section for dlopen to work.
433     // In a full implementation we would merge all attribute sections.
434     if (InX::ARMAttributes == nullptr) {
435       InX::ARMAttributes = make<InputSection>(this, &Sec, Name);
436       return InX::ARMAttributes;
437     }
438     return &InputSection::Discarded;
439   case SHT_RELA:
440   case SHT_REL: {
441     // Find the relocation target section and associate this
442     // section with it. Target can be discarded, for example
443     // if it is a duplicated member of SHT_GROUP section, we
444     // do not create or proccess relocatable sections then.
445     InputSectionBase *Target = getRelocTarget(Sec);
446     if (!Target)
447       return nullptr;
448 
449     // This section contains relocation information.
450     // If -r is given, we do not interpret or apply relocation
451     // but just copy relocation sections to output.
452     if (Config->Relocatable)
453       return make<InputSection>(this, &Sec, Name);
454 
455     if (Target->FirstRelocation)
456       fatal(toString(this) +
457             ": multiple relocation sections to one section are not supported");
458 
459     // Mergeable sections with relocations are tricky because relocations
460     // need to be taken into account when comparing section contents for
461     // merging. It's not worth supporting such mergeable sections because
462     // they are rare and it'd complicates the internal design (we usually
463     // have to determine if two sections are mergeable early in the link
464     // process much before applying relocations). We simply handle mergeable
465     // sections with relocations as non-mergeable.
466     if (auto *MS = dyn_cast<MergeInputSection>(Target)) {
467       Target = toRegularSection(MS);
468       this->Sections[Sec.sh_info] = Target;
469     }
470 
471     size_t NumRelocations;
472     if (Sec.sh_type == SHT_RELA) {
473       ArrayRef<Elf_Rela> Rels =
474           check(this->getObj().relas(&Sec), toString(this));
475       Target->FirstRelocation = Rels.begin();
476       NumRelocations = Rels.size();
477       Target->AreRelocsRela = true;
478     } else {
479       ArrayRef<Elf_Rel> Rels = check(this->getObj().rels(&Sec), toString(this));
480       Target->FirstRelocation = Rels.begin();
481       NumRelocations = Rels.size();
482       Target->AreRelocsRela = false;
483     }
484     assert(isUInt<31>(NumRelocations));
485     Target->NumRelocations = NumRelocations;
486 
487     // Relocation sections processed by the linker are usually removed
488     // from the output, so returning `nullptr` for the normal case.
489     // However, if -emit-relocs is given, we need to leave them in the output.
490     // (Some post link analysis tools need this information.)
491     if (Config->EmitRelocs) {
492       InputSection *RelocSec = make<InputSection>(this, &Sec, Name);
493       // We will not emit relocation section if target was discarded.
494       Target->DependentSections.push_back(RelocSec);
495       return RelocSec;
496     }
497     return nullptr;
498   }
499   }
500 
501   // The GNU linker uses .note.GNU-stack section as a marker indicating
502   // that the code in the object file does not expect that the stack is
503   // executable (in terms of NX bit). If all input files have the marker,
504   // the GNU linker adds a PT_GNU_STACK segment to tells the loader to
505   // make the stack non-executable. Most object files have this section as
506   // of 2017.
507   //
508   // But making the stack non-executable is a norm today for security
509   // reasons. Failure to do so may result in a serious security issue.
510   // Therefore, we make LLD always add PT_GNU_STACK unless it is
511   // explicitly told to do otherwise (by -z execstack). Because the stack
512   // executable-ness is controlled solely by command line options,
513   // .note.GNU-stack sections are simply ignored.
514   if (Name == ".note.GNU-stack")
515     return &InputSection::Discarded;
516 
517   // Split stacks is a feature to support a discontiguous stack. At least
518   // as of 2017, it seems that the feature is not being used widely.
519   // Only GNU gold supports that. We don't. For the details about that,
520   // see https://gcc.gnu.org/wiki/SplitStacks
521   if (Name == ".note.GNU-split-stack") {
522     error(toString(this) +
523           ": object file compiled with -fsplit-stack is not supported");
524     return &InputSection::Discarded;
525   }
526 
527   // The linkonce feature is a sort of proto-comdat. Some glibc i386 object
528   // files contain definitions of symbol "__x86.get_pc_thunk.bx" in linkonce
529   // sections. Drop those sections to avoid duplicate symbol errors.
530   // FIXME: This is glibc PR20543, we should remove this hack once that has been
531   // fixed for a while.
532   if (Name.startswith(".gnu.linkonce."))
533     return &InputSection::Discarded;
534 
535   // The linker merges EH (exception handling) frames and creates a
536   // .eh_frame_hdr section for runtime. So we handle them with a special
537   // class. For relocatable outputs, they are just passed through.
538   if (Name == ".eh_frame" && !Config->Relocatable)
539     return make<EhInputSection>(this, &Sec, Name);
540 
541   if (shouldMerge(Sec))
542     return make<MergeInputSection>(this, &Sec, Name);
543   return make<InputSection>(this, &Sec, Name);
544 }
545 
546 template <class ELFT>
547 StringRef ObjFile<ELFT>::getSectionName(const Elf_Shdr &Sec) {
548   return check(this->getObj().getSectionName(&Sec, SectionStringTable),
549                toString(this));
550 }
551 
552 template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
553   this->Symbols.reserve(this->ELFSyms.size());
554   for (const Elf_Sym &Sym : this->ELFSyms)
555     this->Symbols.push_back(createSymbol(&Sym));
556 }
557 
558 template <class ELFT>
559 InputSectionBase *ObjFile<ELFT>::getSection(uint32_t Index) const {
560   if (Index == 0)
561     return nullptr;
562   if (Index >= this->Sections.size())
563     fatal(toString(this) + ": invalid section index: " + Twine(Index));
564 
565   if (InputSectionBase *Sec = this->Sections[Index])
566     return Sec->Repl;
567   return nullptr;
568 }
569 
570 template <class ELFT> Symbol *ObjFile<ELFT>::createSymbol(const Elf_Sym *Sym) {
571   int Binding = Sym->getBinding();
572   InputSectionBase *Sec = getSection(this->getSectionIndex(*Sym));
573 
574   uint8_t StOther = Sym->st_other;
575   uint8_t Type = Sym->getType();
576   uint64_t Value = Sym->st_value;
577   uint64_t Size = Sym->st_size;
578 
579   if (Binding == STB_LOCAL) {
580     if (Sym->getType() == STT_FILE)
581       SourceFile = check(Sym->getName(this->StringTable), toString(this));
582 
583     if (this->StringTable.size() <= Sym->st_name)
584       fatal(toString(this) + ": invalid symbol name offset");
585 
586     StringRefZ Name = this->StringTable.data() + Sym->st_name;
587     if (Sym->st_shndx == SHN_UNDEF)
588       return make<Undefined>(Name, Binding, StOther, Type);
589 
590     return make<Defined>(Name, Binding, StOther, Type, Value, Size, Sec);
591   }
592 
593   StringRef Name = check(Sym->getName(this->StringTable), toString(this));
594 
595   switch (Sym->st_shndx) {
596   case SHN_UNDEF:
597     return Symtab->addUndefined<ELFT>(Name, Binding, StOther, Type,
598                                       /*CanOmitFromDynSym=*/false, this);
599   case SHN_COMMON:
600     if (Value == 0 || Value >= UINT32_MAX)
601       fatal(toString(this) + ": common symbol '" + Name +
602             "' has invalid alignment: " + Twine(Value));
603     return Symtab->addCommon(Name, Size, Value, Binding, StOther, Type, this);
604   }
605 
606   switch (Binding) {
607   default:
608     fatal(toString(this) + ": unexpected binding: " + Twine(Binding));
609   case STB_GLOBAL:
610   case STB_WEAK:
611   case STB_GNU_UNIQUE:
612     if (Sec == &InputSection::Discarded)
613       return Symtab->addUndefined<ELFT>(Name, Binding, StOther, Type,
614                                         /*CanOmitFromDynSym=*/false, this);
615     return Symtab->addRegular<ELFT>(Name, StOther, Type, Value, Size, Binding,
616                                     Sec, this);
617   }
618 }
619 
620 ArchiveFile::ArchiveFile(std::unique_ptr<Archive> &&File)
621     : InputFile(ArchiveKind, File->getMemoryBufferRef()),
622       File(std::move(File)) {}
623 
624 template <class ELFT> void ArchiveFile::parse() {
625   Symbols.reserve(File->getNumberOfSymbols());
626   for (const Archive::Symbol &Sym : File->symbols())
627     Symbols.push_back(Symtab->addLazyArchive<ELFT>(Sym.getName(), this, Sym));
628 }
629 
630 // Returns a buffer pointing to a member file containing a given symbol.
631 std::pair<MemoryBufferRef, uint64_t>
632 ArchiveFile::getMember(const Archive::Symbol *Sym) {
633   Archive::Child C =
634       check(Sym->getMember(), toString(this) +
635                                   ": could not get the member for symbol " +
636                                   Sym->getName());
637 
638   if (!Seen.insert(C.getChildOffset()).second)
639     return {MemoryBufferRef(), 0};
640 
641   MemoryBufferRef Ret =
642       check(C.getMemoryBufferRef(),
643             toString(this) +
644                 ": could not get the buffer for the member defining symbol " +
645                 Sym->getName());
646 
647   if (C.getParent()->isThin() && Tar)
648     Tar->append(relativeToRoot(check(C.getFullName(), toString(this))),
649                 Ret.getBuffer());
650   if (C.getParent()->isThin())
651     return {Ret, 0};
652   return {Ret, C.getChildOffset()};
653 }
654 
655 template <class ELFT>
656 SharedFile<ELFT>::SharedFile(MemoryBufferRef M, StringRef DefaultSoName)
657     : ELFFileBase<ELFT>(Base::SharedKind, M), SoName(DefaultSoName),
658       AsNeeded(Config->AsNeeded) {}
659 
660 // Partially parse the shared object file so that we can call
661 // getSoName on this object.
662 template <class ELFT> void SharedFile<ELFT>::parseSoName() {
663   const Elf_Shdr *DynamicSec = nullptr;
664   const ELFFile<ELFT> Obj = this->getObj();
665   ArrayRef<Elf_Shdr> Sections = check(Obj.sections(), toString(this));
666 
667   // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d.
668   for (const Elf_Shdr &Sec : Sections) {
669     switch (Sec.sh_type) {
670     default:
671       continue;
672     case SHT_DYNSYM:
673       this->initSymtab(Sections, &Sec);
674       break;
675     case SHT_DYNAMIC:
676       DynamicSec = &Sec;
677       break;
678     case SHT_SYMTAB_SHNDX:
679       this->SymtabSHNDX =
680           check(Obj.getSHNDXTable(Sec, Sections), toString(this));
681       break;
682     case SHT_GNU_versym:
683       this->VersymSec = &Sec;
684       break;
685     case SHT_GNU_verdef:
686       this->VerdefSec = &Sec;
687       break;
688     }
689   }
690 
691   if (this->VersymSec && this->ELFSyms.empty())
692     error("SHT_GNU_versym should be associated with symbol table");
693 
694   // Search for a DT_SONAME tag to initialize this->SoName.
695   if (!DynamicSec)
696     return;
697   ArrayRef<Elf_Dyn> Arr =
698       check(Obj.template getSectionContentsAsArray<Elf_Dyn>(DynamicSec),
699             toString(this));
700   for (const Elf_Dyn &Dyn : Arr) {
701     if (Dyn.d_tag == DT_SONAME) {
702       uint64_t Val = Dyn.getVal();
703       if (Val >= this->StringTable.size())
704         fatal(toString(this) + ": invalid DT_SONAME entry");
705       SoName = this->StringTable.data() + Val;
706       return;
707     }
708   }
709 }
710 
711 // Parse the version definitions in the object file if present. Returns a vector
712 // whose nth element contains a pointer to the Elf_Verdef for version identifier
713 // n. Version identifiers that are not definitions map to nullptr. The array
714 // always has at least length 1.
715 template <class ELFT>
716 std::vector<const typename ELFT::Verdef *>
717 SharedFile<ELFT>::parseVerdefs(const Elf_Versym *&Versym) {
718   std::vector<const Elf_Verdef *> Verdefs(1);
719   // We only need to process symbol versions for this DSO if it has both a
720   // versym and a verdef section, which indicates that the DSO contains symbol
721   // version definitions.
722   if (!VersymSec || !VerdefSec)
723     return Verdefs;
724 
725   // The location of the first global versym entry.
726   const char *Base = this->MB.getBuffer().data();
727   Versym = reinterpret_cast<const Elf_Versym *>(Base + VersymSec->sh_offset) +
728            this->FirstNonLocal;
729 
730   // We cannot determine the largest verdef identifier without inspecting
731   // every Elf_Verdef, but both bfd and gold assign verdef identifiers
732   // sequentially starting from 1, so we predict that the largest identifier
733   // will be VerdefCount.
734   unsigned VerdefCount = VerdefSec->sh_info;
735   Verdefs.resize(VerdefCount + 1);
736 
737   // Build the Verdefs array by following the chain of Elf_Verdef objects
738   // from the start of the .gnu.version_d section.
739   const char *Verdef = Base + VerdefSec->sh_offset;
740   for (unsigned I = 0; I != VerdefCount; ++I) {
741     auto *CurVerdef = reinterpret_cast<const Elf_Verdef *>(Verdef);
742     Verdef += CurVerdef->vd_next;
743     unsigned VerdefIndex = CurVerdef->vd_ndx;
744     if (Verdefs.size() <= VerdefIndex)
745       Verdefs.resize(VerdefIndex + 1);
746     Verdefs[VerdefIndex] = CurVerdef;
747   }
748 
749   return Verdefs;
750 }
751 
752 // Fully parse the shared object file. This must be called after parseSoName().
753 template <class ELFT> void SharedFile<ELFT>::parseRest() {
754   // Create mapping from version identifiers to Elf_Verdef entries.
755   const Elf_Versym *Versym = nullptr;
756   std::vector<const Elf_Verdef *> Verdefs = parseVerdefs(Versym);
757 
758   ArrayRef<Elf_Shdr> Sections =
759       check(this->getObj().sections(), toString(this));
760 
761   // Add symbols to the symbol table.
762   Elf_Sym_Range Syms = this->getGlobalELFSyms();
763   for (const Elf_Sym &Sym : Syms) {
764     unsigned VersymIndex = 0;
765     if (Versym) {
766       VersymIndex = Versym->vs_index;
767       ++Versym;
768     }
769     bool Hidden = VersymIndex & VERSYM_HIDDEN;
770     VersymIndex = VersymIndex & ~VERSYM_HIDDEN;
771 
772     StringRef Name = check(Sym.getName(this->StringTable), toString(this));
773     if (Sym.isUndefined()) {
774       Undefs.push_back(Name);
775       continue;
776     }
777 
778     // Ignore local symbols.
779     if (Versym && VersymIndex == VER_NDX_LOCAL)
780       continue;
781     const Elf_Verdef *Ver = nullptr;
782     if (VersymIndex != VER_NDX_GLOBAL) {
783       if (VersymIndex >= Verdefs.size()) {
784         error("corrupt input file: version definition index " +
785               Twine(VersymIndex) + " for symbol " + Name +
786               " is out of bounds\n>>> defined in " + toString(this));
787         continue;
788       }
789       Ver = Verdefs[VersymIndex];
790     }
791 
792     // We do not usually care about alignments of data in shared object
793     // files because the loader takes care of it. However, if we promote a
794     // DSO symbol to point to .bss due to copy relocation, we need to keep
795     // the original alignment requirements. We infer it here.
796     uint64_t Alignment = 1;
797     if (Sym.st_value)
798       Alignment = 1ULL << countTrailingZeros((uint64_t)Sym.st_value);
799     if (0 < Sym.st_shndx && Sym.st_shndx < Sections.size()) {
800       uint64_t SecAlign = Sections[Sym.st_shndx].sh_addralign;
801       Alignment = std::min(Alignment, SecAlign);
802     }
803     if (Alignment > UINT32_MAX)
804       error(toString(this) + ": alignment too large: " + Name);
805 
806     if (!Hidden)
807       Symtab->addShared(Name, this, Sym, Alignment, Ver);
808 
809     // Also add the symbol with the versioned name to handle undefined symbols
810     // with explicit versions.
811     if (Ver) {
812       StringRef VerName = this->StringTable.data() + Ver->getAux()->vda_name;
813       Name = Saver.save(Name + "@" + VerName);
814       Symtab->addShared(Name, this, Sym, Alignment, Ver);
815     }
816   }
817 }
818 
819 static ELFKind getBitcodeELFKind(const Triple &T) {
820   if (T.isLittleEndian())
821     return T.isArch64Bit() ? ELF64LEKind : ELF32LEKind;
822   return T.isArch64Bit() ? ELF64BEKind : ELF32BEKind;
823 }
824 
825 static uint8_t getBitcodeMachineKind(StringRef Path, const Triple &T) {
826   switch (T.getArch()) {
827   case Triple::aarch64:
828     return EM_AARCH64;
829   case Triple::arm:
830   case Triple::thumb:
831     return EM_ARM;
832   case Triple::avr:
833     return EM_AVR;
834   case Triple::mips:
835   case Triple::mipsel:
836   case Triple::mips64:
837   case Triple::mips64el:
838     return EM_MIPS;
839   case Triple::ppc:
840     return EM_PPC;
841   case Triple::ppc64:
842     return EM_PPC64;
843   case Triple::x86:
844     return T.isOSIAMCU() ? EM_IAMCU : EM_386;
845   case Triple::x86_64:
846     return EM_X86_64;
847   default:
848     fatal(Path + ": could not infer e_machine from bitcode target triple " +
849           T.str());
850   }
851 }
852 
853 BitcodeFile::BitcodeFile(MemoryBufferRef MB, StringRef ArchiveName,
854                          uint64_t OffsetInArchive)
855     : InputFile(BitcodeKind, MB) {
856   this->ArchiveName = ArchiveName;
857 
858   // Here we pass a new MemoryBufferRef which is identified by ArchiveName
859   // (the fully resolved path of the archive) + member name + offset of the
860   // member in the archive.
861   // ThinLTO uses the MemoryBufferRef identifier to access its internal
862   // data structures and if two archives define two members with the same name,
863   // this causes a collision which result in only one of the objects being
864   // taken into consideration at LTO time (which very likely causes undefined
865   // symbols later in the link stage).
866   MemoryBufferRef MBRef(MB.getBuffer(),
867                         Saver.save(ArchiveName + MB.getBufferIdentifier() +
868                                    utostr(OffsetInArchive)));
869   Obj = check(lto::InputFile::create(MBRef), toString(this));
870 
871   Triple T(Obj->getTargetTriple());
872   EKind = getBitcodeELFKind(T);
873   EMachine = getBitcodeMachineKind(MB.getBufferIdentifier(), T);
874 }
875 
876 static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) {
877   switch (GvVisibility) {
878   case GlobalValue::DefaultVisibility:
879     return STV_DEFAULT;
880   case GlobalValue::HiddenVisibility:
881     return STV_HIDDEN;
882   case GlobalValue::ProtectedVisibility:
883     return STV_PROTECTED;
884   }
885   llvm_unreachable("unknown visibility");
886 }
887 
888 template <class ELFT>
889 static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats,
890                                    const lto::InputFile::Symbol &ObjSym,
891                                    BitcodeFile *F) {
892   StringRef NameRef = Saver.save(ObjSym.getName());
893   uint32_t Binding = ObjSym.isWeak() ? STB_WEAK : STB_GLOBAL;
894 
895   uint8_t Type = ObjSym.isTLS() ? STT_TLS : STT_NOTYPE;
896   uint8_t Visibility = mapVisibility(ObjSym.getVisibility());
897   bool CanOmitFromDynSym = ObjSym.canBeOmittedFromSymbolTable();
898 
899   int C = ObjSym.getComdatIndex();
900   if (C != -1 && !KeptComdats[C])
901     return Symtab->addUndefined<ELFT>(NameRef, Binding, Visibility, Type,
902                                       CanOmitFromDynSym, F);
903 
904   if (ObjSym.isUndefined())
905     return Symtab->addUndefined<ELFT>(NameRef, Binding, Visibility, Type,
906                                       CanOmitFromDynSym, F);
907 
908   if (ObjSym.isCommon())
909     return Symtab->addCommon(NameRef, ObjSym.getCommonSize(),
910                              ObjSym.getCommonAlignment(), Binding, Visibility,
911                              STT_OBJECT, F);
912 
913   return Symtab->addBitcode(NameRef, Binding, Visibility, Type,
914                             CanOmitFromDynSym, F);
915 }
916 
917 template <class ELFT>
918 void BitcodeFile::parse(DenseSet<CachedHashStringRef> &ComdatGroups) {
919   std::vector<bool> KeptComdats;
920   for (StringRef S : Obj->getComdatTable())
921     KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(S)).second);
922 
923   for (const lto::InputFile::Symbol &ObjSym : Obj->symbols())
924     Symbols.push_back(createBitcodeSymbol<ELFT>(KeptComdats, ObjSym, this));
925 }
926 
927 static ELFKind getELFKind(MemoryBufferRef MB) {
928   unsigned char Size;
929   unsigned char Endian;
930   std::tie(Size, Endian) = getElfArchType(MB.getBuffer());
931 
932   if (Endian != ELFDATA2LSB && Endian != ELFDATA2MSB)
933     fatal(MB.getBufferIdentifier() + ": invalid data encoding");
934   if (Size != ELFCLASS32 && Size != ELFCLASS64)
935     fatal(MB.getBufferIdentifier() + ": invalid file class");
936 
937   size_t BufSize = MB.getBuffer().size();
938   if ((Size == ELFCLASS32 && BufSize < sizeof(Elf32_Ehdr)) ||
939       (Size == ELFCLASS64 && BufSize < sizeof(Elf64_Ehdr)))
940     fatal(MB.getBufferIdentifier() + ": file is too short");
941 
942   if (Size == ELFCLASS32)
943     return (Endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind;
944   return (Endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind;
945 }
946 
947 template <class ELFT> void BinaryFile::parse() {
948   ArrayRef<uint8_t> Data = toArrayRef(MB.getBuffer());
949   auto *Section =
950       make<InputSection>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, 8, Data, ".data");
951   Sections.push_back(Section);
952 
953   // For each input file foo that is embedded to a result as a binary
954   // blob, we define _binary_foo_{start,end,size} symbols, so that
955   // user programs can access blobs by name. Non-alphanumeric
956   // characters in a filename are replaced with underscore.
957   std::string S = "_binary_" + MB.getBufferIdentifier().str();
958   for (size_t I = 0; I < S.size(); ++I)
959     if (!isAlnum(S[I]))
960       S[I] = '_';
961 
962   Symtab->addRegular<ELFT>(Saver.save(S + "_start"), STV_DEFAULT, STT_OBJECT,
963                            0, 0, STB_GLOBAL, Section, nullptr);
964   Symtab->addRegular<ELFT>(Saver.save(S + "_end"), STV_DEFAULT, STT_OBJECT,
965                            Data.size(), 0, STB_GLOBAL, Section, nullptr);
966   Symtab->addRegular<ELFT>(Saver.save(S + "_size"), STV_DEFAULT, STT_OBJECT,
967                            Data.size(), 0, STB_GLOBAL, nullptr, nullptr);
968 }
969 
970 static bool isBitcode(MemoryBufferRef MB) {
971   using namespace sys::fs;
972   return identify_magic(MB.getBuffer()) == file_magic::bitcode;
973 }
974 
975 InputFile *elf::createObjectFile(MemoryBufferRef MB, StringRef ArchiveName,
976                                  uint64_t OffsetInArchive) {
977   if (isBitcode(MB))
978     return make<BitcodeFile>(MB, ArchiveName, OffsetInArchive);
979 
980   switch (getELFKind(MB)) {
981   case ELF32LEKind:
982     return make<ObjFile<ELF32LE>>(MB, ArchiveName);
983   case ELF32BEKind:
984     return make<ObjFile<ELF32BE>>(MB, ArchiveName);
985   case ELF64LEKind:
986     return make<ObjFile<ELF64LE>>(MB, ArchiveName);
987   case ELF64BEKind:
988     return make<ObjFile<ELF64BE>>(MB, ArchiveName);
989   default:
990     llvm_unreachable("getELFKind");
991   }
992 }
993 
994 InputFile *elf::createSharedFile(MemoryBufferRef MB, StringRef DefaultSoName) {
995   switch (getELFKind(MB)) {
996   case ELF32LEKind:
997     return make<SharedFile<ELF32LE>>(MB, DefaultSoName);
998   case ELF32BEKind:
999     return make<SharedFile<ELF32BE>>(MB, DefaultSoName);
1000   case ELF64LEKind:
1001     return make<SharedFile<ELF64LE>>(MB, DefaultSoName);
1002   case ELF64BEKind:
1003     return make<SharedFile<ELF64BE>>(MB, DefaultSoName);
1004   default:
1005     llvm_unreachable("getELFKind");
1006   }
1007 }
1008 
1009 MemoryBufferRef LazyObjFile::getBuffer() {
1010   if (Seen)
1011     return MemoryBufferRef();
1012   Seen = true;
1013   return MB;
1014 }
1015 
1016 InputFile *LazyObjFile::fetch() {
1017   MemoryBufferRef MBRef = getBuffer();
1018   if (MBRef.getBuffer().empty())
1019     return nullptr;
1020   return createObjectFile(MBRef, ArchiveName, OffsetInArchive);
1021 }
1022 
1023 template <class ELFT> void LazyObjFile::parse() {
1024   for (StringRef Sym : getSymbolNames())
1025     Symtab->addLazyObject<ELFT>(Sym, *this);
1026 }
1027 
1028 template <class ELFT> std::vector<StringRef> LazyObjFile::getElfSymbols() {
1029   typedef typename ELFT::Shdr Elf_Shdr;
1030   typedef typename ELFT::Sym Elf_Sym;
1031   typedef typename ELFT::SymRange Elf_Sym_Range;
1032 
1033   ELFFile<ELFT> Obj = check(ELFFile<ELFT>::create(this->MB.getBuffer()));
1034   ArrayRef<Elf_Shdr> Sections = check(Obj.sections(), toString(this));
1035   for (const Elf_Shdr &Sec : Sections) {
1036     if (Sec.sh_type != SHT_SYMTAB)
1037       continue;
1038 
1039     Elf_Sym_Range Syms = check(Obj.symbols(&Sec), toString(this));
1040     uint32_t FirstNonLocal = Sec.sh_info;
1041     StringRef StringTable =
1042         check(Obj.getStringTableForSymtab(Sec, Sections), toString(this));
1043     std::vector<StringRef> V;
1044 
1045     for (const Elf_Sym &Sym : Syms.slice(FirstNonLocal))
1046       if (Sym.st_shndx != SHN_UNDEF)
1047         V.push_back(check(Sym.getName(StringTable), toString(this)));
1048     return V;
1049   }
1050   return {};
1051 }
1052 
1053 std::vector<StringRef> LazyObjFile::getBitcodeSymbols() {
1054   std::unique_ptr<lto::InputFile> Obj =
1055       check(lto::InputFile::create(this->MB), toString(this));
1056   std::vector<StringRef> V;
1057   for (const lto::InputFile::Symbol &Sym : Obj->symbols())
1058     if (!Sym.isUndefined())
1059       V.push_back(Saver.save(Sym.getName()));
1060   return V;
1061 }
1062 
1063 // Returns a vector of globally-visible defined symbol names.
1064 std::vector<StringRef> LazyObjFile::getSymbolNames() {
1065   if (isBitcode(this->MB))
1066     return getBitcodeSymbols();
1067 
1068   switch (getELFKind(this->MB)) {
1069   case ELF32LEKind:
1070     return getElfSymbols<ELF32LE>();
1071   case ELF32BEKind:
1072     return getElfSymbols<ELF32BE>();
1073   case ELF64LEKind:
1074     return getElfSymbols<ELF64LE>();
1075   case ELF64BEKind:
1076     return getElfSymbols<ELF64BE>();
1077   default:
1078     llvm_unreachable("getELFKind");
1079   }
1080 }
1081 
1082 template void ArchiveFile::parse<ELF32LE>();
1083 template void ArchiveFile::parse<ELF32BE>();
1084 template void ArchiveFile::parse<ELF64LE>();
1085 template void ArchiveFile::parse<ELF64BE>();
1086 
1087 template void BitcodeFile::parse<ELF32LE>(DenseSet<CachedHashStringRef> &);
1088 template void BitcodeFile::parse<ELF32BE>(DenseSet<CachedHashStringRef> &);
1089 template void BitcodeFile::parse<ELF64LE>(DenseSet<CachedHashStringRef> &);
1090 template void BitcodeFile::parse<ELF64BE>(DenseSet<CachedHashStringRef> &);
1091 
1092 template void LazyObjFile::parse<ELF32LE>();
1093 template void LazyObjFile::parse<ELF32BE>();
1094 template void LazyObjFile::parse<ELF64LE>();
1095 template void LazyObjFile::parse<ELF64BE>();
1096 
1097 template class elf::ELFFileBase<ELF32LE>;
1098 template class elf::ELFFileBase<ELF32BE>;
1099 template class elf::ELFFileBase<ELF64LE>;
1100 template class elf::ELFFileBase<ELF64BE>;
1101 
1102 template class elf::ObjFile<ELF32LE>;
1103 template class elf::ObjFile<ELF32BE>;
1104 template class elf::ObjFile<ELF64LE>;
1105 template class elf::ObjFile<ELF64BE>;
1106 
1107 template class elf::SharedFile<ELF32LE>;
1108 template class elf::SharedFile<ELF32BE>;
1109 template class elf::SharedFile<ELF64LE>;
1110 template class elf::SharedFile<ELF64BE>;
1111 
1112 template void BinaryFile::parse<ELF32LE>();
1113 template void BinaryFile::parse<ELF32BE>();
1114 template void BinaryFile::parse<ELF64LE>();
1115 template void BinaryFile::parse<ELF64BE>();
1116