1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_ELF_INPUT_FILES_H 10 #define LLD_ELF_INPUT_FILES_H 11 12 #include "Config.h" 13 #include "lld/Common/ErrorHandler.h" 14 #include "lld/Common/LLVM.h" 15 #include "lld/Common/Reproduce.h" 16 #include "llvm/ADT/CachedHashString.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/IR/Comdat.h" 20 #include "llvm/Object/Archive.h" 21 #include "llvm/Object/ELF.h" 22 #include "llvm/Object/IRObjectFile.h" 23 #include "llvm/Support/Threading.h" 24 #include <map> 25 26 namespace llvm { 27 struct DILineInfo; 28 class TarWriter; 29 namespace lto { 30 class InputFile; 31 } 32 } // namespace llvm 33 34 namespace lld { 35 class DWARFCache; 36 37 // Returns "<internal>", "foo.a(bar.o)" or "baz.o". 38 std::string toString(const elf::InputFile *f); 39 40 namespace elf { 41 42 using llvm::object::Archive; 43 44 class InputSection; 45 class Symbol; 46 47 // If --reproduce is specified, all input files are written to this tar archive. 48 extern std::unique_ptr<llvm::TarWriter> tar; 49 50 // Opens a given file. 51 llvm::Optional<MemoryBufferRef> readFile(StringRef path); 52 53 // Add symbols in File to the symbol table. 54 void parseFile(InputFile *file); 55 56 // The root class of input files. 57 class InputFile { 58 private: 59 // Cache for getNameForScript(). 60 mutable SmallString<0> nameForScriptCache; 61 62 protected: 63 SmallVector<InputSectionBase *, 0> sections; 64 65 public: 66 enum Kind : uint8_t { 67 ObjKind, 68 SharedKind, 69 ArchiveKind, 70 BitcodeKind, 71 BinaryKind, 72 }; 73 74 Kind kind() const { return fileKind; } 75 76 bool isElf() const { 77 Kind k = kind(); 78 return k == ObjKind || k == SharedKind; 79 } 80 81 StringRef getName() const { return mb.getBufferIdentifier(); } 82 MemoryBufferRef mb; 83 84 // Returns sections. It is a runtime error to call this function 85 // on files that don't have the notion of sections. 86 ArrayRef<InputSectionBase *> getSections() const { 87 assert(fileKind == ObjKind || fileKind == BinaryKind); 88 return sections; 89 } 90 91 // Returns object file symbols. It is a runtime error to call this 92 // function on files of other types. 93 ArrayRef<Symbol *> getSymbols() const { 94 assert(fileKind == BinaryKind || fileKind == ObjKind || 95 fileKind == BitcodeKind); 96 return symbols; 97 } 98 99 // Get filename to use for linker script processing. 100 StringRef getNameForScript() const; 101 102 // Check if a non-common symbol should be extracted to override a common 103 // definition. 104 bool shouldExtractForCommon(StringRef name); 105 106 // If not empty, this stores the name of the archive containing this file. 107 // We use this string for creating error messages. 108 SmallString<0> archiveName; 109 110 // Cache for toString(). Only toString() should use this member. 111 mutable SmallString<0> toStringCache; 112 113 SmallVector<Symbol *, 0> symbols; 114 115 // .got2 in the current file. This is used by PPC32 -fPIC/-fPIE to compute 116 // offsets in PLT call stubs. 117 InputSection *ppc32Got2 = nullptr; 118 119 // Index of MIPS GOT built for this file. 120 uint32_t mipsGotIndex = -1; 121 122 // groupId is used for --warn-backrefs which is an optional error 123 // checking feature. All files within the same --{start,end}-group or 124 // --{start,end}-lib get the same group ID. Otherwise, each file gets a new 125 // group ID. For more info, see checkDependency() in SymbolTable.cpp. 126 uint32_t groupId; 127 static bool isInGroup; 128 static uint32_t nextGroupId; 129 130 // If this is an architecture-specific file, the following members 131 // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type. 132 uint16_t emachine = llvm::ELF::EM_NONE; 133 const Kind fileKind; 134 ELFKind ekind = ELFNoneKind; 135 uint8_t osabi = 0; 136 uint8_t abiVersion = 0; 137 138 // True if this is a relocatable object file/bitcode file between --start-lib 139 // and --end-lib. 140 bool lazy = false; 141 142 // True if this is an argument for --just-symbols. Usually false. 143 bool justSymbols = false; 144 145 std::string getSrcMsg(const Symbol &sym, InputSectionBase &sec, 146 uint64_t offset); 147 148 // On PPC64 we need to keep track of which files contain small code model 149 // relocations that access the .toc section. To minimize the chance of a 150 // relocation overflow, files that do contain said relocations should have 151 // their .toc sections sorted closer to the .got section than files that do 152 // not contain any small code model relocations. Thats because the toc-pointer 153 // is defined to point at .got + 0x8000 and the instructions used with small 154 // code model relocations support immediates in the range [-0x8000, 0x7FFC], 155 // making the addressable range relative to the toc pointer 156 // [.got, .got + 0xFFFC]. 157 bool ppc64SmallCodeModelTocRelocs = false; 158 159 // True if the file has TLSGD/TLSLD GOT relocations without R_PPC64_TLSGD or 160 // R_PPC64_TLSLD. Disable TLS relaxation to avoid bad code generation. 161 bool ppc64DisableTLSRelax = false; 162 163 protected: 164 InputFile(Kind k, MemoryBufferRef m); 165 }; 166 167 class ELFFileBase : public InputFile { 168 public: 169 ELFFileBase(Kind k, MemoryBufferRef m); 170 static bool classof(const InputFile *f) { return f->isElf(); } 171 172 template <typename ELFT> llvm::object::ELFFile<ELFT> getObj() const { 173 return check(llvm::object::ELFFile<ELFT>::create(mb.getBuffer())); 174 } 175 176 StringRef getStringTable() const { return stringTable; } 177 178 ArrayRef<Symbol *> getLocalSymbols() { 179 if (symbols.empty()) 180 return {}; 181 return llvm::makeArrayRef(symbols).slice(1, firstGlobal - 1); 182 } 183 ArrayRef<Symbol *> getGlobalSymbols() { 184 return llvm::makeArrayRef(symbols).slice(firstGlobal); 185 } 186 MutableArrayRef<Symbol *> getMutableGlobalSymbols() { 187 return llvm::makeMutableArrayRef(symbols.data(), symbols.size()) 188 .slice(firstGlobal); 189 } 190 191 template <typename ELFT> typename ELFT::ShdrRange getELFShdrs() const { 192 return typename ELFT::ShdrRange( 193 reinterpret_cast<const typename ELFT::Shdr *>(elfShdrs), numELFShdrs); 194 } 195 template <typename ELFT> typename ELFT::SymRange getELFSyms() const { 196 return typename ELFT::SymRange( 197 reinterpret_cast<const typename ELFT::Sym *>(elfSyms), numELFSyms); 198 } 199 template <typename ELFT> typename ELFT::SymRange getGlobalELFSyms() const { 200 return getELFSyms<ELFT>().slice(firstGlobal); 201 } 202 203 protected: 204 // Initializes this class's member variables. 205 template <typename ELFT> void init(); 206 207 StringRef stringTable; 208 const void *elfShdrs = nullptr; 209 const void *elfSyms = nullptr; 210 uint32_t numELFShdrs = 0; 211 uint32_t numELFSyms = 0; 212 uint32_t firstGlobal = 0; 213 214 public: 215 bool hasCommonSyms = false; 216 }; 217 218 // .o file. 219 template <class ELFT> class ObjFile : public ELFFileBase { 220 LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) 221 222 public: 223 static bool classof(const InputFile *f) { return f->kind() == ObjKind; } 224 225 llvm::object::ELFFile<ELFT> getObj() const { 226 return this->ELFFileBase::getObj<ELFT>(); 227 } 228 229 ObjFile(MemoryBufferRef m, StringRef archiveName) : ELFFileBase(ObjKind, m) { 230 this->archiveName = archiveName; 231 } 232 233 void parse(bool ignoreComdats = false); 234 void parseLazy(); 235 236 StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> sections, 237 const Elf_Shdr &sec); 238 239 Symbol &getSymbol(uint32_t symbolIndex) const { 240 if (symbolIndex >= this->symbols.size()) 241 fatal(toString(this) + ": invalid symbol index"); 242 return *this->symbols[symbolIndex]; 243 } 244 245 uint32_t getSectionIndex(const Elf_Sym &sym) const; 246 247 template <typename RelT> Symbol &getRelocTargetSym(const RelT &rel) const { 248 uint32_t symIndex = rel.getSymbol(config->isMips64EL); 249 return getSymbol(symIndex); 250 } 251 252 llvm::Optional<llvm::DILineInfo> getDILineInfo(InputSectionBase *, uint64_t); 253 llvm::Optional<std::pair<std::string, unsigned>> getVariableLoc(StringRef name); 254 255 // Name of source file obtained from STT_FILE symbol value, 256 // or empty string if there is no such symbol in object file 257 // symbol table. 258 StringRef sourceFile; 259 260 // Pointer to this input file's .llvm_addrsig section, if it has one. 261 const Elf_Shdr *addrsigSec = nullptr; 262 263 // SHT_LLVM_CALL_GRAPH_PROFILE section index. 264 uint32_t cgProfileSectionIndex = 0; 265 266 // MIPS GP0 value defined by this file. This value represents the gp value 267 // used to create the relocatable object and required to support 268 // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations. 269 uint32_t mipsGp0 = 0; 270 271 uint32_t andFeatures = 0; 272 273 // True if the file defines functions compiled with 274 // -fsplit-stack. Usually false. 275 bool splitStack = false; 276 277 // True if the file defines functions compiled with -fsplit-stack, 278 // but had one or more functions with the no_split_stack attribute. 279 bool someNoSplitStack = false; 280 281 // Get cached DWARF information. 282 DWARFCache *getDwarf(); 283 284 private: 285 void initializeSections(bool ignoreComdats); 286 void initializeSymbols(); 287 void initializeJustSymbols(); 288 289 InputSectionBase *getRelocTarget(uint32_t idx, StringRef name, 290 const Elf_Shdr &sec); 291 InputSectionBase *createInputSection(uint32_t idx, const Elf_Shdr &sec, 292 StringRef shstrtab); 293 294 bool shouldMerge(const Elf_Shdr &sec, StringRef name); 295 296 // Each ELF symbol contains a section index which the symbol belongs to. 297 // However, because the number of bits dedicated for that is limited, a 298 // symbol can directly point to a section only when the section index is 299 // equal to or smaller than 65280. 300 // 301 // If an object file contains more than 65280 sections, the file must 302 // contain .symtab_shndx section. The section contains an array of 303 // 32-bit integers whose size is the same as the number of symbols. 304 // Nth symbol's section index is in the Nth entry of .symtab_shndx. 305 // 306 // The following variable contains the contents of .symtab_shndx. 307 // If the section does not exist (which is common), the array is empty. 308 ArrayRef<Elf_Word> shndxTable; 309 310 // Debugging information to retrieve source file and line for error 311 // reporting. Linker may find reasonable number of errors in a 312 // single object file, so we cache debugging information in order to 313 // parse it only once for each object file we link. 314 std::unique_ptr<DWARFCache> dwarf; 315 llvm::once_flag initDwarf; 316 }; 317 318 // An ArchiveFile object represents a .a file. 319 class ArchiveFile : public InputFile { 320 public: 321 explicit ArchiveFile(std::unique_ptr<Archive> &&file); 322 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 323 void parse(); 324 325 // Pulls out an object file that contains a definition for Sym and 326 // returns it. If the same file was instantiated before, this 327 // function does nothing (so we don't instantiate the same file 328 // more than once.) 329 void extract(const Archive::Symbol &sym); 330 331 // Check if a non-common symbol should be extracted to override a common 332 // definition. 333 bool shouldExtractForCommon(const Archive::Symbol &sym); 334 335 size_t getMemberCount() const; 336 size_t getExtractedMemberCount() const { return seen.size(); } 337 338 bool parsed = false; 339 340 private: 341 std::unique_ptr<Archive> file; 342 llvm::DenseSet<uint64_t> seen; 343 }; 344 345 class BitcodeFile : public InputFile { 346 public: 347 BitcodeFile(MemoryBufferRef m, StringRef archiveName, 348 uint64_t offsetInArchive, bool lazy); 349 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 350 template <class ELFT> void parse(); 351 void parseLazy(); 352 std::unique_ptr<llvm::lto::InputFile> obj; 353 }; 354 355 // .so file. 356 class SharedFile : public ELFFileBase { 357 public: 358 SharedFile(MemoryBufferRef m, StringRef defaultSoName) 359 : ELFFileBase(SharedKind, m), soName(defaultSoName), 360 isNeeded(!config->asNeeded) {} 361 362 // This is actually a vector of Elf_Verdef pointers. 363 SmallVector<const void *, 0> verdefs; 364 365 // If the output file needs Elf_Verneed data structures for this file, this is 366 // a vector of Elf_Vernaux version identifiers that map onto the entries in 367 // Verdefs, otherwise it is empty. 368 SmallVector<uint32_t, 0> vernauxs; 369 370 static unsigned vernauxNum; 371 372 SmallVector<StringRef, 0> dtNeeded; 373 StringRef soName; 374 375 static bool classof(const InputFile *f) { return f->kind() == SharedKind; } 376 377 template <typename ELFT> void parse(); 378 379 // Used for --as-needed 380 bool isNeeded; 381 382 // Non-weak undefined symbols which are not yet resolved when the SO is 383 // parsed. Only filled for `--no-allow-shlib-undefined`. 384 SmallVector<Symbol *, 0> requiredSymbols; 385 386 private: 387 template <typename ELFT> 388 std::vector<uint32_t> parseVerneed(const llvm::object::ELFFile<ELFT> &obj, 389 const typename ELFT::Shdr *sec); 390 }; 391 392 class BinaryFile : public InputFile { 393 public: 394 explicit BinaryFile(MemoryBufferRef m) : InputFile(BinaryKind, m) {} 395 static bool classof(const InputFile *f) { return f->kind() == BinaryKind; } 396 void parse(); 397 }; 398 399 InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "", 400 uint64_t offsetInArchive = 0); 401 InputFile *createLazyFile(MemoryBufferRef mb, StringRef archiveName, 402 uint64_t offsetInArchive); 403 404 inline bool isBitcode(MemoryBufferRef mb) { 405 return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; 406 } 407 408 std::string replaceThinLTOSuffix(StringRef path); 409 410 extern SmallVector<std::unique_ptr<MemoryBuffer>> memoryBuffers; 411 extern SmallVector<ArchiveFile *, 0> archiveFiles; 412 extern SmallVector<BinaryFile *, 0> binaryFiles; 413 extern SmallVector<BitcodeFile *, 0> bitcodeFiles; 414 extern SmallVector<BitcodeFile *, 0> lazyBitcodeFiles; 415 extern SmallVector<ELFFileBase *, 0> objectFiles; 416 extern SmallVector<SharedFile *, 0> sharedFiles; 417 418 } // namespace elf 419 } // namespace lld 420 421 #endif 422