xref: /llvm-project-15.0.7/lld/ELF/InputFiles.h (revision 4e00a192)
1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_ELF_INPUT_FILES_H
10 #define LLD_ELF_INPUT_FILES_H
11 
12 #include "Config.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "lld/Common/LLVM.h"
15 #include "lld/Common/Reproduce.h"
16 #include "llvm/ADT/CachedHashString.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/IR/Comdat.h"
20 #include "llvm/Object/Archive.h"
21 #include "llvm/Object/ELF.h"
22 #include "llvm/Object/IRObjectFile.h"
23 #include "llvm/Support/Threading.h"
24 #include <map>
25 
26 namespace llvm {
27 struct DILineInfo;
28 class TarWriter;
29 namespace lto {
30 class InputFile;
31 }
32 } // namespace llvm
33 
34 namespace lld {
35 class DWARFCache;
36 
37 // Returns "<internal>", "foo.a(bar.o)" or "baz.o".
38 std::string toString(const elf::InputFile *f);
39 
40 namespace elf {
41 
42 using llvm::object::Archive;
43 
44 class InputSection;
45 class Symbol;
46 
47 // If --reproduce is specified, all input files are written to this tar archive.
48 extern std::unique_ptr<llvm::TarWriter> tar;
49 
50 // Opens a given file.
51 llvm::Optional<MemoryBufferRef> readFile(StringRef path);
52 
53 // Add symbols in File to the symbol table.
54 void parseFile(InputFile *file);
55 
56 // The root class of input files.
57 class InputFile {
58 private:
59   // Cache for getNameForScript().
60   mutable SmallString<0> nameForScriptCache;
61 
62 protected:
63   SmallVector<InputSectionBase *, 0> sections;
64 
65 public:
66   enum Kind : uint8_t {
67     ObjKind,
68     SharedKind,
69     ArchiveKind,
70     BitcodeKind,
71     BinaryKind,
72   };
73 
74   Kind kind() const { return fileKind; }
75 
76   bool isElf() const {
77     Kind k = kind();
78     return k == ObjKind || k == SharedKind;
79   }
80 
81   StringRef getName() const { return mb.getBufferIdentifier(); }
82   MemoryBufferRef mb;
83 
84   // Returns sections. It is a runtime error to call this function
85   // on files that don't have the notion of sections.
86   ArrayRef<InputSectionBase *> getSections() const {
87     assert(fileKind == ObjKind || fileKind == BinaryKind);
88     return sections;
89   }
90 
91   // Returns object file symbols. It is a runtime error to call this
92   // function on files of other types.
93   ArrayRef<Symbol *> getSymbols() const {
94     assert(fileKind == BinaryKind || fileKind == ObjKind ||
95            fileKind == BitcodeKind);
96     return symbols;
97   }
98 
99   // Get filename to use for linker script processing.
100   StringRef getNameForScript() const;
101 
102   // Check if a non-common symbol should be extracted to override a common
103   // definition.
104   bool shouldExtractForCommon(StringRef name);
105 
106   // If not empty, this stores the name of the archive containing this file.
107   // We use this string for creating error messages.
108   SmallString<0> archiveName;
109 
110   // Cache for toString(). Only toString() should use this member.
111   mutable SmallString<0> toStringCache;
112 
113   SmallVector<Symbol *, 0> symbols;
114 
115   // .got2 in the current file. This is used by PPC32 -fPIC/-fPIE to compute
116   // offsets in PLT call stubs.
117   InputSection *ppc32Got2 = nullptr;
118 
119   // Index of MIPS GOT built for this file.
120   uint32_t mipsGotIndex = -1;
121 
122   // groupId is used for --warn-backrefs which is an optional error
123   // checking feature. All files within the same --{start,end}-group or
124   // --{start,end}-lib get the same group ID. Otherwise, each file gets a new
125   // group ID. For more info, see checkDependency() in SymbolTable.cpp.
126   uint32_t groupId;
127   static bool isInGroup;
128   static uint32_t nextGroupId;
129 
130   // If this is an architecture-specific file, the following members
131   // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type.
132   uint16_t emachine = llvm::ELF::EM_NONE;
133   const Kind fileKind;
134   ELFKind ekind = ELFNoneKind;
135   uint8_t osabi = 0;
136   uint8_t abiVersion = 0;
137 
138   // True if this is a relocatable object file/bitcode file between --start-lib
139   // and --end-lib.
140   bool lazy = false;
141 
142   // True if this is an argument for --just-symbols. Usually false.
143   bool justSymbols = false;
144 
145   std::string getSrcMsg(const Symbol &sym, InputSectionBase &sec,
146                         uint64_t offset);
147 
148   // On PPC64 we need to keep track of which files contain small code model
149   // relocations that access the .toc section. To minimize the chance of a
150   // relocation overflow, files that do contain said relocations should have
151   // their .toc sections sorted closer to the .got section than files that do
152   // not contain any small code model relocations. Thats because the toc-pointer
153   // is defined to point at .got + 0x8000 and the instructions used with small
154   // code model relocations support immediates in the range [-0x8000, 0x7FFC],
155   // making the addressable range relative to the toc pointer
156   // [.got, .got + 0xFFFC].
157   bool ppc64SmallCodeModelTocRelocs = false;
158 
159   // True if the file has TLSGD/TLSLD GOT relocations without R_PPC64_TLSGD or
160   // R_PPC64_TLSLD. Disable TLS relaxation to avoid bad code generation.
161   bool ppc64DisableTLSRelax = false;
162 
163 protected:
164   InputFile(Kind k, MemoryBufferRef m);
165 };
166 
167 class ELFFileBase : public InputFile {
168 public:
169   ELFFileBase(Kind k, MemoryBufferRef m);
170   static bool classof(const InputFile *f) { return f->isElf(); }
171 
172   template <typename ELFT> llvm::object::ELFFile<ELFT> getObj() const {
173     return check(llvm::object::ELFFile<ELFT>::create(mb.getBuffer()));
174   }
175 
176   StringRef getStringTable() const { return stringTable; }
177 
178   ArrayRef<Symbol *> getLocalSymbols() {
179     if (symbols.empty())
180       return {};
181     return llvm::makeArrayRef(symbols).slice(1, firstGlobal - 1);
182   }
183   ArrayRef<Symbol *> getGlobalSymbols() {
184     return llvm::makeArrayRef(symbols).slice(firstGlobal);
185   }
186   MutableArrayRef<Symbol *> getMutableGlobalSymbols() {
187     return llvm::makeMutableArrayRef(symbols.data(), symbols.size())
188         .slice(firstGlobal);
189   }
190 
191   template <typename ELFT> typename ELFT::ShdrRange getELFShdrs() const {
192     return typename ELFT::ShdrRange(
193         reinterpret_cast<const typename ELFT::Shdr *>(elfShdrs), numELFShdrs);
194   }
195   template <typename ELFT> typename ELFT::SymRange getELFSyms() const {
196     return typename ELFT::SymRange(
197         reinterpret_cast<const typename ELFT::Sym *>(elfSyms), numELFSyms);
198   }
199   template <typename ELFT> typename ELFT::SymRange getGlobalELFSyms() const {
200     return getELFSyms<ELFT>().slice(firstGlobal);
201   }
202 
203 protected:
204   // Initializes this class's member variables.
205   template <typename ELFT> void init();
206 
207   StringRef stringTable;
208   const void *elfShdrs = nullptr;
209   const void *elfSyms = nullptr;
210   uint32_t numELFShdrs = 0;
211   uint32_t numELFSyms = 0;
212   uint32_t firstGlobal = 0;
213 
214 public:
215   bool hasCommonSyms = false;
216 };
217 
218 // .o file.
219 template <class ELFT> class ObjFile : public ELFFileBase {
220   LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
221 
222 public:
223   static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
224 
225   llvm::object::ELFFile<ELFT> getObj() const {
226     return this->ELFFileBase::getObj<ELFT>();
227   }
228 
229   ObjFile(MemoryBufferRef m, StringRef archiveName) : ELFFileBase(ObjKind, m) {
230     this->archiveName = archiveName;
231   }
232 
233   void parse(bool ignoreComdats = false);
234   void parseLazy();
235 
236   StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> sections,
237                                  const Elf_Shdr &sec);
238 
239   Symbol &getSymbol(uint32_t symbolIndex) const {
240     if (symbolIndex >= this->symbols.size())
241       fatal(toString(this) + ": invalid symbol index");
242     return *this->symbols[symbolIndex];
243   }
244 
245   uint32_t getSectionIndex(const Elf_Sym &sym) const;
246 
247   template <typename RelT> Symbol &getRelocTargetSym(const RelT &rel) const {
248     uint32_t symIndex = rel.getSymbol(config->isMips64EL);
249     return getSymbol(symIndex);
250   }
251 
252   llvm::Optional<llvm::DILineInfo> getDILineInfo(InputSectionBase *, uint64_t);
253   llvm::Optional<std::pair<std::string, unsigned>> getVariableLoc(StringRef name);
254 
255   // Name of source file obtained from STT_FILE symbol value,
256   // or empty string if there is no such symbol in object file
257   // symbol table.
258   StringRef sourceFile;
259 
260   // Pointer to this input file's .llvm_addrsig section, if it has one.
261   const Elf_Shdr *addrsigSec = nullptr;
262 
263   // SHT_LLVM_CALL_GRAPH_PROFILE section index.
264   uint32_t cgProfileSectionIndex = 0;
265 
266   // MIPS GP0 value defined by this file. This value represents the gp value
267   // used to create the relocatable object and required to support
268   // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations.
269   uint32_t mipsGp0 = 0;
270 
271   uint32_t andFeatures = 0;
272 
273   // True if the file defines functions compiled with
274   // -fsplit-stack. Usually false.
275   bool splitStack = false;
276 
277   // True if the file defines functions compiled with -fsplit-stack,
278   // but had one or more functions with the no_split_stack attribute.
279   bool someNoSplitStack = false;
280 
281   // Get cached DWARF information.
282   DWARFCache *getDwarf();
283 
284 private:
285   void initializeSections(bool ignoreComdats);
286   void initializeSymbols();
287   void initializeJustSymbols();
288 
289   InputSectionBase *getRelocTarget(uint32_t idx, StringRef name,
290                                    const Elf_Shdr &sec);
291   InputSectionBase *createInputSection(uint32_t idx, const Elf_Shdr &sec,
292                                        StringRef shstrtab);
293 
294   bool shouldMerge(const Elf_Shdr &sec, StringRef name);
295 
296   // Each ELF symbol contains a section index which the symbol belongs to.
297   // However, because the number of bits dedicated for that is limited, a
298   // symbol can directly point to a section only when the section index is
299   // equal to or smaller than 65280.
300   //
301   // If an object file contains more than 65280 sections, the file must
302   // contain .symtab_shndx section. The section contains an array of
303   // 32-bit integers whose size is the same as the number of symbols.
304   // Nth symbol's section index is in the Nth entry of .symtab_shndx.
305   //
306   // The following variable contains the contents of .symtab_shndx.
307   // If the section does not exist (which is common), the array is empty.
308   ArrayRef<Elf_Word> shndxTable;
309 
310   // Debugging information to retrieve source file and line for error
311   // reporting. Linker may find reasonable number of errors in a
312   // single object file, so we cache debugging information in order to
313   // parse it only once for each object file we link.
314   std::unique_ptr<DWARFCache> dwarf;
315   llvm::once_flag initDwarf;
316 };
317 
318 // An ArchiveFile object represents a .a file.
319 class ArchiveFile : public InputFile {
320 public:
321   explicit ArchiveFile(std::unique_ptr<Archive> &&file);
322   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
323   void parse();
324 
325   // Pulls out an object file that contains a definition for Sym and
326   // returns it. If the same file was instantiated before, this
327   // function does nothing (so we don't instantiate the same file
328   // more than once.)
329   void extract(const Archive::Symbol &sym);
330 
331   // Check if a non-common symbol should be extracted to override a common
332   // definition.
333   bool shouldExtractForCommon(const Archive::Symbol &sym);
334 
335   size_t getMemberCount() const;
336   size_t getExtractedMemberCount() const { return seen.size(); }
337 
338   bool parsed = false;
339 
340 private:
341   std::unique_ptr<Archive> file;
342   llvm::DenseSet<uint64_t> seen;
343 };
344 
345 class BitcodeFile : public InputFile {
346 public:
347   BitcodeFile(MemoryBufferRef m, StringRef archiveName,
348               uint64_t offsetInArchive, bool lazy);
349   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
350   template <class ELFT> void parse();
351   void parseLazy();
352   std::unique_ptr<llvm::lto::InputFile> obj;
353 };
354 
355 // .so file.
356 class SharedFile : public ELFFileBase {
357 public:
358   SharedFile(MemoryBufferRef m, StringRef defaultSoName)
359       : ELFFileBase(SharedKind, m), soName(defaultSoName),
360         isNeeded(!config->asNeeded) {}
361 
362   // This is actually a vector of Elf_Verdef pointers.
363   SmallVector<const void *, 0> verdefs;
364 
365   // If the output file needs Elf_Verneed data structures for this file, this is
366   // a vector of Elf_Vernaux version identifiers that map onto the entries in
367   // Verdefs, otherwise it is empty.
368   SmallVector<uint32_t, 0> vernauxs;
369 
370   static unsigned vernauxNum;
371 
372   SmallVector<StringRef, 0> dtNeeded;
373   StringRef soName;
374 
375   static bool classof(const InputFile *f) { return f->kind() == SharedKind; }
376 
377   template <typename ELFT> void parse();
378 
379   // Used for --as-needed
380   bool isNeeded;
381 
382   // Non-weak undefined symbols which are not yet resolved when the SO is
383   // parsed. Only filled for `--no-allow-shlib-undefined`.
384   SmallVector<Symbol *, 0> requiredSymbols;
385 
386 private:
387   template <typename ELFT>
388   std::vector<uint32_t> parseVerneed(const llvm::object::ELFFile<ELFT> &obj,
389                                      const typename ELFT::Shdr *sec);
390 };
391 
392 class BinaryFile : public InputFile {
393 public:
394   explicit BinaryFile(MemoryBufferRef m) : InputFile(BinaryKind, m) {}
395   static bool classof(const InputFile *f) { return f->kind() == BinaryKind; }
396   void parse();
397 };
398 
399 InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "",
400                             uint64_t offsetInArchive = 0);
401 InputFile *createLazyFile(MemoryBufferRef mb, StringRef archiveName,
402                           uint64_t offsetInArchive);
403 
404 inline bool isBitcode(MemoryBufferRef mb) {
405   return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
406 }
407 
408 std::string replaceThinLTOSuffix(StringRef path);
409 
410 extern SmallVector<std::unique_ptr<MemoryBuffer>> memoryBuffers;
411 extern SmallVector<ArchiveFile *, 0> archiveFiles;
412 extern SmallVector<BinaryFile *, 0> binaryFiles;
413 extern SmallVector<BitcodeFile *, 0> bitcodeFiles;
414 extern SmallVector<BitcodeFile *, 0> lazyBitcodeFiles;
415 extern SmallVector<ELFFileBase *, 0> objectFiles;
416 extern SmallVector<SharedFile *, 0> sharedFiles;
417 
418 } // namespace elf
419 } // namespace lld
420 
421 #endif
422