1 //===- InputFiles.cpp -----------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "InputFiles.h" 11 #include "Chunks.h" 12 #include "Config.h" 13 #include "Driver.h" 14 #include "SymbolTable.h" 15 #include "Symbols.h" 16 #include "lld/Common/ErrorHandler.h" 17 #include "lld/Common/Memory.h" 18 #include "llvm-c/lto.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/Triple.h" 21 #include "llvm/ADT/Twine.h" 22 #include "llvm/BinaryFormat/COFF.h" 23 #include "llvm/Object/Binary.h" 24 #include "llvm/Object/COFF.h" 25 #include "llvm/Support/Casting.h" 26 #include "llvm/Support/Endian.h" 27 #include "llvm/Support/Error.h" 28 #include "llvm/Support/ErrorOr.h" 29 #include "llvm/Support/FileSystem.h" 30 #include "llvm/Target/TargetOptions.h" 31 #include <cstring> 32 #include <system_error> 33 #include <utility> 34 35 using namespace llvm; 36 using namespace llvm::COFF; 37 using namespace llvm::object; 38 using namespace llvm::support::endian; 39 40 using llvm::Triple; 41 using llvm::support::ulittle32_t; 42 43 namespace lld { 44 namespace coff { 45 46 std::vector<ObjFile *> ObjFile::Instances; 47 std::vector<ImportFile *> ImportFile::Instances; 48 std::vector<BitcodeFile *> BitcodeFile::Instances; 49 50 /// Checks that Source is compatible with being a weak alias to Target. 51 /// If Source is Undefined and has no weak alias set, makes it a weak 52 /// alias to Target. 53 static void checkAndSetWeakAlias(SymbolTable *Symtab, InputFile *F, 54 Symbol *Source, Symbol *Target) { 55 if (auto *U = dyn_cast<Undefined>(Source)) { 56 if (U->WeakAlias && U->WeakAlias != Target) 57 Symtab->reportDuplicate(Source, F); 58 U->WeakAlias = Target; 59 } 60 } 61 62 ArchiveFile::ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} 63 64 void ArchiveFile::parse() { 65 // Parse a MemoryBufferRef as an archive file. 66 File = CHECK(Archive::create(MB), this); 67 68 // Read the symbol table to construct Lazy objects. 69 for (const Archive::Symbol &Sym : File->symbols()) 70 Symtab->addLazy(this, Sym); 71 } 72 73 // Returns a buffer pointing to a member file containing a given symbol. 74 void ArchiveFile::addMember(const Archive::Symbol *Sym) { 75 const Archive::Child &C = 76 CHECK(Sym->getMember(), 77 "could not get the member for symbol " + Sym->getName()); 78 79 // Return an empty buffer if we have already returned the same buffer. 80 if (!Seen.insert(C.getChildOffset()).second) 81 return; 82 83 Driver->enqueueArchiveMember(C, Sym->getName(), getName()); 84 } 85 86 std::vector<MemoryBufferRef> getArchiveMembers(Archive *File) { 87 std::vector<MemoryBufferRef> V; 88 Error Err = Error::success(); 89 for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) { 90 Archive::Child C = 91 CHECK(COrErr, 92 File->getFileName() + ": could not get the child of the archive"); 93 MemoryBufferRef MBRef = 94 CHECK(C.getMemoryBufferRef(), 95 File->getFileName() + 96 ": could not get the buffer for a child of the archive"); 97 V.push_back(MBRef); 98 } 99 if (Err) 100 fatal(File->getFileName() + 101 ": Archive::children failed: " + toString(std::move(Err))); 102 return V; 103 } 104 105 void ObjFile::parse() { 106 // Parse a memory buffer as a COFF file. 107 std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), this); 108 109 if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) { 110 Bin.release(); 111 COFFObj.reset(Obj); 112 } else { 113 fatal(toString(this) + " is not a COFF file"); 114 } 115 116 // Read section and symbol tables. 117 initializeChunks(); 118 initializeSymbols(); 119 } 120 121 // We set SectionChunk pointers in the SparseChunks vector to this value 122 // temporarily to mark comdat sections as having an unknown resolution. As we 123 // walk the object file's symbol table, once we visit either a leader symbol or 124 // an associative section definition together with the parent comdat's leader, 125 // we set the pointer to either nullptr (to mark the section as discarded) or a 126 // valid SectionChunk for that section. 127 static SectionChunk *const PendingComdat = reinterpret_cast<SectionChunk *>(1); 128 129 void ObjFile::initializeChunks() { 130 uint32_t NumSections = COFFObj->getNumberOfSections(); 131 Chunks.reserve(NumSections); 132 SparseChunks.resize(NumSections + 1); 133 for (uint32_t I = 1; I < NumSections + 1; ++I) { 134 const coff_section *Sec; 135 if (auto EC = COFFObj->getSection(I, Sec)) 136 fatal("getSection failed: #" + Twine(I) + ": " + EC.message()); 137 138 if (Sec->Characteristics & IMAGE_SCN_LNK_COMDAT) 139 SparseChunks[I] = PendingComdat; 140 else 141 SparseChunks[I] = readSection(I, nullptr); 142 } 143 } 144 145 SectionChunk *ObjFile::readSection(uint32_t SectionNumber, 146 const coff_aux_section_definition *Def) { 147 const coff_section *Sec; 148 StringRef Name; 149 if (auto EC = COFFObj->getSection(SectionNumber, Sec)) 150 fatal("getSection failed: #" + Twine(SectionNumber) + ": " + EC.message()); 151 if (auto EC = COFFObj->getSectionName(Sec, Name)) 152 fatal("getSectionName failed: #" + Twine(SectionNumber) + ": " + 153 EC.message()); 154 155 if (Name == ".drectve") { 156 ArrayRef<uint8_t> Data; 157 COFFObj->getSectionContents(Sec, Data); 158 Directives = std::string((const char *)Data.data(), Data.size()); 159 return nullptr; 160 } 161 162 // Object files may have DWARF debug info or MS CodeView debug info 163 // (or both). 164 // 165 // DWARF sections don't need any special handling from the perspective 166 // of the linker; they are just a data section containing relocations. 167 // We can just link them to complete debug info. 168 // 169 // CodeView needs a linker support. We need to interpret and debug 170 // info, and then write it to a separate .pdb file. 171 172 // Ignore debug info unless /debug is given. 173 if (!Config->Debug && Name.startswith(".debug")) 174 return nullptr; 175 176 if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE) 177 return nullptr; 178 auto *C = make<SectionChunk>(this, Sec); 179 if (Def) 180 C->Checksum = Def->CheckSum; 181 182 // CodeView sections are stored to a different vector because they are not 183 // linked in the regular manner. 184 if (C->isCodeView()) 185 DebugChunks.push_back(C); 186 else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gfids$y") 187 GuardFidChunks.push_back(C); 188 else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gljmp$y") 189 GuardLJmpChunks.push_back(C); 190 else if (Name == ".sxdata") 191 SXDataChunks.push_back(C); 192 else 193 Chunks.push_back(C); 194 195 return C; 196 } 197 198 void ObjFile::readAssociativeDefinition( 199 COFFSymbolRef Sym, const coff_aux_section_definition *Def) { 200 SectionChunk *Parent = SparseChunks[Def->getNumber(Sym.isBigObj())]; 201 202 // If the parent is pending, it probably means that its section definition 203 // appears after us in the symbol table. Leave the associated section as 204 // pending; we will handle it during the second pass in initializeSymbols(). 205 if (Parent == PendingComdat) 206 return; 207 208 // Check whether the parent is prevailing. If it is, so are we, and we read 209 // the section; otherwise mark it as discarded. 210 int32_t SectionNumber = Sym.getSectionNumber(); 211 if (Parent) { 212 SparseChunks[SectionNumber] = readSection(SectionNumber, Def); 213 if (SparseChunks[SectionNumber]) 214 Parent->addAssociative(SparseChunks[SectionNumber]); 215 } else { 216 SparseChunks[SectionNumber] = nullptr; 217 } 218 } 219 220 Symbol *ObjFile::createRegular(COFFSymbolRef Sym) { 221 SectionChunk *SC = SparseChunks[Sym.getSectionNumber()]; 222 if (Sym.isExternal()) { 223 StringRef Name; 224 COFFObj->getSymbolName(Sym, Name); 225 if (SC) 226 return Symtab->addRegular(this, Name, Sym.getGeneric(), SC); 227 return Symtab->addUndefined(Name, this, false); 228 } 229 if (SC) 230 return make<DefinedRegular>(this, /*Name*/ "", false, 231 /*IsExternal*/ false, Sym.getGeneric(), SC); 232 return nullptr; 233 } 234 235 void ObjFile::initializeSymbols() { 236 uint32_t NumSymbols = COFFObj->getNumberOfSymbols(); 237 Symbols.resize(NumSymbols); 238 239 SmallVector<std::pair<Symbol *, uint32_t>, 8> WeakAliases; 240 std::vector<uint32_t> PendingIndexes; 241 PendingIndexes.reserve(NumSymbols); 242 243 std::vector<const coff_aux_section_definition *> ComdatDefs( 244 COFFObj->getNumberOfSections() + 1); 245 246 for (uint32_t I = 0; I < NumSymbols; ++I) { 247 COFFSymbolRef COFFSym = check(COFFObj->getSymbol(I)); 248 if (COFFSym.isUndefined()) { 249 Symbols[I] = createUndefined(COFFSym); 250 } else if (COFFSym.isWeakExternal()) { 251 Symbols[I] = createUndefined(COFFSym); 252 uint32_t TagIndex = COFFSym.getAux<coff_aux_weak_external>()->TagIndex; 253 WeakAliases.emplace_back(Symbols[I], TagIndex); 254 } else if (Optional<Symbol *> OptSym = createDefined(COFFSym, ComdatDefs)) { 255 Symbols[I] = *OptSym; 256 } else { 257 // createDefined() returns None if a symbol belongs to a section that 258 // was pending at the point when the symbol was read. This can happen in 259 // two cases: 260 // 1) section definition symbol for a comdat leader; 261 // 2) symbol belongs to a comdat section associated with a section whose 262 // section definition symbol appears later in the symbol table. 263 // In both of these cases, we can expect the section to be resolved by 264 // the time we finish visiting the remaining symbols in the symbol 265 // table. So we postpone the handling of this symbol until that time. 266 PendingIndexes.push_back(I); 267 } 268 I += COFFSym.getNumberOfAuxSymbols(); 269 } 270 271 for (uint32_t I : PendingIndexes) { 272 COFFSymbolRef Sym = check(COFFObj->getSymbol(I)); 273 if (auto *Def = Sym.getSectionDefinition()) 274 if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) 275 readAssociativeDefinition(Sym, Def); 276 Symbols[I] = createRegular(Sym); 277 } 278 279 for (auto &KV : WeakAliases) { 280 Symbol *Sym = KV.first; 281 uint32_t Idx = KV.second; 282 checkAndSetWeakAlias(Symtab, this, Sym, Symbols[Idx]); 283 } 284 } 285 286 Symbol *ObjFile::createUndefined(COFFSymbolRef Sym) { 287 StringRef Name; 288 COFFObj->getSymbolName(Sym, Name); 289 return Symtab->addUndefined(Name, this, Sym.isWeakExternal()); 290 } 291 292 Optional<Symbol *> ObjFile::createDefined( 293 COFFSymbolRef Sym, 294 std::vector<const coff_aux_section_definition *> &ComdatDefs) { 295 StringRef Name; 296 if (Sym.isCommon()) { 297 auto *C = make<CommonChunk>(Sym); 298 Chunks.push_back(C); 299 COFFObj->getSymbolName(Sym, Name); 300 Symbol *S = 301 Symtab->addCommon(this, Name, Sym.getValue(), Sym.getGeneric(), C); 302 return S; 303 } 304 if (Sym.isAbsolute()) { 305 COFFObj->getSymbolName(Sym, Name); 306 // Skip special symbols. 307 if (Name == "@comp.id") 308 return nullptr; 309 if (Name == "@feat.00") { 310 Feat00Flags = Sym.getValue(); 311 return nullptr; 312 } 313 if (Sym.isExternal()) 314 return Symtab->addAbsolute(Name, Sym); 315 else 316 return make<DefinedAbsolute>(Name, Sym); 317 } 318 int32_t SectionNumber = Sym.getSectionNumber(); 319 if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG) 320 return nullptr; 321 322 // Reserved sections numbers don't have contents. 323 if (llvm::COFF::isReservedSectionNumber(SectionNumber)) 324 fatal("broken object file: " + toString(this)); 325 326 // This symbol references a section which is not present in the section 327 // header. 328 if ((uint32_t)SectionNumber >= SparseChunks.size()) 329 fatal("broken object file: " + toString(this)); 330 331 // Handle comdat leader symbols. 332 if (const coff_aux_section_definition *Def = ComdatDefs[SectionNumber]) { 333 ComdatDefs[SectionNumber] = nullptr; 334 Symbol *Leader; 335 bool Prevailing; 336 if (Sym.isExternal()) { 337 COFFObj->getSymbolName(Sym, Name); 338 std::tie(Leader, Prevailing) = 339 Symtab->addComdat(this, Name, Sym.getGeneric()); 340 } else { 341 Leader = make<DefinedRegular>(this, /*Name*/ "", false, 342 /*IsExternal*/ false, Sym.getGeneric()); 343 Prevailing = true; 344 } 345 if (Prevailing) { 346 SectionChunk *C = readSection(SectionNumber, Def); 347 SparseChunks[SectionNumber] = C; 348 C->Sym = cast<DefinedRegular>(Leader); 349 cast<DefinedRegular>(Leader)->Data = &C->Repl; 350 } else { 351 SparseChunks[SectionNumber] = nullptr; 352 } 353 return Leader; 354 } 355 356 // Read associative section definitions and prepare to handle the comdat 357 // leader symbol by setting the section's ComdatDefs pointer if we encounter a 358 // non-associative comdat. 359 if (SparseChunks[SectionNumber] == PendingComdat) { 360 if (auto *Def = Sym.getSectionDefinition()) { 361 if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) 362 readAssociativeDefinition(Sym, Def); 363 else 364 ComdatDefs[SectionNumber] = Def; 365 } 366 } 367 368 if (SparseChunks[SectionNumber] == PendingComdat) 369 return None; 370 return createRegular(Sym); 371 } 372 373 MachineTypes ObjFile::getMachineType() { 374 if (COFFObj) 375 return static_cast<MachineTypes>(COFFObj->getMachine()); 376 return IMAGE_FILE_MACHINE_UNKNOWN; 377 } 378 379 StringRef ltrim1(StringRef S, const char *Chars) { 380 if (!S.empty() && strchr(Chars, S[0])) 381 return S.substr(1); 382 return S; 383 } 384 385 void ImportFile::parse() { 386 const char *Buf = MB.getBufferStart(); 387 const char *End = MB.getBufferEnd(); 388 const auto *Hdr = reinterpret_cast<const coff_import_header *>(Buf); 389 390 // Check if the total size is valid. 391 if ((size_t)(End - Buf) != (sizeof(*Hdr) + Hdr->SizeOfData)) 392 fatal("broken import library"); 393 394 // Read names and create an __imp_ symbol. 395 StringRef Name = Saver.save(StringRef(Buf + sizeof(*Hdr))); 396 StringRef ImpName = Saver.save("__imp_" + Name); 397 const char *NameStart = Buf + sizeof(coff_import_header) + Name.size() + 1; 398 DLLName = StringRef(NameStart); 399 StringRef ExtName; 400 switch (Hdr->getNameType()) { 401 case IMPORT_ORDINAL: 402 ExtName = ""; 403 break; 404 case IMPORT_NAME: 405 ExtName = Name; 406 break; 407 case IMPORT_NAME_NOPREFIX: 408 ExtName = ltrim1(Name, "?@_"); 409 break; 410 case IMPORT_NAME_UNDECORATE: 411 ExtName = ltrim1(Name, "?@_"); 412 ExtName = ExtName.substr(0, ExtName.find('@')); 413 break; 414 } 415 416 this->Hdr = Hdr; 417 ExternalName = ExtName; 418 419 ImpSym = Symtab->addImportData(ImpName, this); 420 421 if (Hdr->getType() == llvm::COFF::IMPORT_CONST) 422 static_cast<void>(Symtab->addImportData(Name, this)); 423 424 // If type is function, we need to create a thunk which jump to an 425 // address pointed by the __imp_ symbol. (This allows you to call 426 // DLL functions just like regular non-DLL functions.) 427 if (Hdr->getType() == llvm::COFF::IMPORT_CODE) 428 ThunkSym = Symtab->addImportThunk(Name, ImpSym, Hdr->Machine); 429 } 430 431 void BitcodeFile::parse() { 432 Obj = check(lto::InputFile::create(MemoryBufferRef( 433 MB.getBuffer(), Saver.save(ParentName + MB.getBufferIdentifier())))); 434 std::vector<std::pair<Symbol *, bool>> Comdat(Obj->getComdatTable().size()); 435 for (size_t I = 0; I != Obj->getComdatTable().size(); ++I) 436 Comdat[I] = Symtab->addComdat(this, Saver.save(Obj->getComdatTable()[I])); 437 for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) { 438 StringRef SymName = Saver.save(ObjSym.getName()); 439 int ComdatIndex = ObjSym.getComdatIndex(); 440 Symbol *Sym; 441 if (ObjSym.isUndefined()) { 442 Sym = Symtab->addUndefined(SymName, this, false); 443 } else if (ObjSym.isCommon()) { 444 Sym = Symtab->addCommon(this, SymName, ObjSym.getCommonSize()); 445 } else if (ObjSym.isWeak() && ObjSym.isIndirect()) { 446 // Weak external. 447 Sym = Symtab->addUndefined(SymName, this, true); 448 std::string Fallback = ObjSym.getCOFFWeakExternalFallback(); 449 Symbol *Alias = Symtab->addUndefined(Saver.save(Fallback)); 450 checkAndSetWeakAlias(Symtab, this, Sym, Alias); 451 } else if (ComdatIndex != -1) { 452 if (SymName == Obj->getComdatTable()[ComdatIndex]) 453 Sym = Comdat[ComdatIndex].first; 454 else if (Comdat[ComdatIndex].second) 455 Sym = Symtab->addRegular(this, SymName); 456 else 457 Sym = Symtab->addUndefined(SymName, this, false); 458 } else { 459 Sym = Symtab->addRegular(this, SymName); 460 } 461 SymbolBodies.push_back(Sym); 462 } 463 Directives = Obj->getCOFFLinkerOpts(); 464 } 465 466 MachineTypes BitcodeFile::getMachineType() { 467 switch (Triple(Obj->getTargetTriple()).getArch()) { 468 case Triple::x86_64: 469 return AMD64; 470 case Triple::x86: 471 return I386; 472 case Triple::arm: 473 return ARMNT; 474 case Triple::aarch64: 475 return ARM64; 476 default: 477 return IMAGE_FILE_MACHINE_UNKNOWN; 478 } 479 } 480 } // namespace coff 481 } // namespace lld 482 483 // Returns the last element of a path, which is supposed to be a filename. 484 static StringRef getBasename(StringRef Path) { 485 size_t Pos = Path.find_last_of("\\/"); 486 if (Pos == StringRef::npos) 487 return Path; 488 return Path.substr(Pos + 1); 489 } 490 491 // Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". 492 std::string lld::toString(const coff::InputFile *File) { 493 if (!File) 494 return "<internal>"; 495 if (File->ParentName.empty()) 496 return File->getName(); 497 498 return (getBasename(File->ParentName) + "(" + getBasename(File->getName()) + 499 ")") 500 .str(); 501 } 502