1 //===- InputFiles.cpp -----------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "InputFiles.h" 11 #include "Chunks.h" 12 #include "Config.h" 13 #include "Driver.h" 14 #include "SymbolTable.h" 15 #include "Symbols.h" 16 #include "lld/Common/ErrorHandler.h" 17 #include "lld/Common/Memory.h" 18 #include "llvm-c/lto.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/Triple.h" 21 #include "llvm/ADT/Twine.h" 22 #include "llvm/BinaryFormat/COFF.h" 23 #include "llvm/Object/Binary.h" 24 #include "llvm/Object/COFF.h" 25 #include "llvm/Support/Casting.h" 26 #include "llvm/Support/Endian.h" 27 #include "llvm/Support/Error.h" 28 #include "llvm/Support/ErrorOr.h" 29 #include "llvm/Support/FileSystem.h" 30 #include "llvm/Target/TargetOptions.h" 31 #include <cstring> 32 #include <system_error> 33 #include <utility> 34 35 using namespace llvm; 36 using namespace llvm::COFF; 37 using namespace llvm::object; 38 using namespace llvm::support::endian; 39 40 using llvm::Triple; 41 using llvm::support::ulittle32_t; 42 43 namespace lld { 44 namespace coff { 45 46 std::vector<ObjFile *> ObjFile::Instances; 47 std::vector<ImportFile *> ImportFile::Instances; 48 std::vector<BitcodeFile *> BitcodeFile::Instances; 49 50 /// Checks that Source is compatible with being a weak alias to Target. 51 /// If Source is Undefined and has no weak alias set, makes it a weak 52 /// alias to Target. 53 static void checkAndSetWeakAlias(SymbolTable *Symtab, InputFile *F, 54 Symbol *Source, Symbol *Target) { 55 if (auto *U = dyn_cast<Undefined>(Source)) { 56 if (U->WeakAlias && U->WeakAlias != Target) 57 Symtab->reportDuplicate(Source, F); 58 U->WeakAlias = Target; 59 } 60 } 61 62 ArchiveFile::ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} 63 64 void ArchiveFile::parse() { 65 // Parse a MemoryBufferRef as an archive file. 66 File = CHECK(Archive::create(MB), this); 67 68 // Read the symbol table to construct Lazy objects. 69 for (const Archive::Symbol &Sym : File->symbols()) 70 Symtab->addLazy(this, Sym); 71 } 72 73 // Returns a buffer pointing to a member file containing a given symbol. 74 void ArchiveFile::addMember(const Archive::Symbol *Sym) { 75 const Archive::Child &C = 76 CHECK(Sym->getMember(), 77 "could not get the member for symbol " + Sym->getName()); 78 79 // Return an empty buffer if we have already returned the same buffer. 80 if (!Seen.insert(C.getChildOffset()).second) 81 return; 82 83 Driver->enqueueArchiveMember(C, Sym->getName(), getName()); 84 } 85 86 std::vector<MemoryBufferRef> getArchiveMembers(Archive *File) { 87 std::vector<MemoryBufferRef> V; 88 Error Err = Error::success(); 89 for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) { 90 Archive::Child C = 91 CHECK(COrErr, 92 File->getFileName() + ": could not get the child of the archive"); 93 MemoryBufferRef MBRef = 94 CHECK(C.getMemoryBufferRef(), 95 File->getFileName() + 96 ": could not get the buffer for a child of the archive"); 97 V.push_back(MBRef); 98 } 99 if (Err) 100 fatal(File->getFileName() + 101 ": Archive::children failed: " + toString(std::move(Err))); 102 return V; 103 } 104 105 void ObjFile::parse() { 106 // Parse a memory buffer as a COFF file. 107 std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), this); 108 109 if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) { 110 Bin.release(); 111 COFFObj.reset(Obj); 112 } else { 113 fatal(toString(this) + " is not a COFF file"); 114 } 115 116 // Read section and symbol tables. 117 initializeChunks(); 118 initializeSymbols(); 119 } 120 121 // We set SectionChunk pointers in the SparseChunks vector to this value 122 // temporarily to mark comdat sections as having an unknown resolution. As we 123 // walk the object file's symbol table, once we visit either a leader symbol or 124 // an associative section definition together with the parent comdat's leader, 125 // we set the pointer to either nullptr (to mark the section as discarded) or a 126 // valid SectionChunk for that section. 127 static SectionChunk *const PendingComdat = reinterpret_cast<SectionChunk *>(1); 128 129 void ObjFile::initializeChunks() { 130 uint32_t NumSections = COFFObj->getNumberOfSections(); 131 Chunks.reserve(NumSections); 132 SparseChunks.resize(NumSections + 1); 133 for (uint32_t I = 1; I < NumSections + 1; ++I) { 134 const coff_section *Sec; 135 if (auto EC = COFFObj->getSection(I, Sec)) 136 fatal("getSection failed: #" + Twine(I) + ": " + EC.message()); 137 138 if (Sec->Characteristics & IMAGE_SCN_LNK_COMDAT) 139 SparseChunks[I] = PendingComdat; 140 else 141 SparseChunks[I] = readSection(I, nullptr, ""); 142 } 143 } 144 145 SectionChunk *ObjFile::readSection(uint32_t SectionNumber, 146 const coff_aux_section_definition *Def, 147 StringRef LeaderName) { 148 const coff_section *Sec; 149 StringRef Name; 150 if (auto EC = COFFObj->getSection(SectionNumber, Sec)) 151 fatal("getSection failed: #" + Twine(SectionNumber) + ": " + EC.message()); 152 if (auto EC = COFFObj->getSectionName(Sec, Name)) 153 fatal("getSectionName failed: #" + Twine(SectionNumber) + ": " + 154 EC.message()); 155 156 if (Name == ".drectve") { 157 ArrayRef<uint8_t> Data; 158 COFFObj->getSectionContents(Sec, Data); 159 Directives = std::string((const char *)Data.data(), Data.size()); 160 return nullptr; 161 } 162 163 // Object files may have DWARF debug info or MS CodeView debug info 164 // (or both). 165 // 166 // DWARF sections don't need any special handling from the perspective 167 // of the linker; they are just a data section containing relocations. 168 // We can just link them to complete debug info. 169 // 170 // CodeView needs a linker support. We need to interpret and debug 171 // info, and then write it to a separate .pdb file. 172 173 // Ignore debug info unless /debug is given. 174 if (!Config->Debug && Name.startswith(".debug")) 175 return nullptr; 176 177 if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE) 178 return nullptr; 179 auto *C = make<SectionChunk>(this, Sec); 180 if (Def) 181 C->Checksum = Def->CheckSum; 182 183 // CodeView sections are stored to a different vector because they are not 184 // linked in the regular manner. 185 if (C->isCodeView()) 186 DebugChunks.push_back(C); 187 else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gfids$y") 188 GuardFidChunks.push_back(C); 189 else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gljmp$y") 190 GuardLJmpChunks.push_back(C); 191 else if (Name == ".sxdata") 192 SXDataChunks.push_back(C); 193 else if (Config->DoICF && Sec->NumberOfRelocations == 0 && Name == ".rdata" && 194 LeaderName.startswith("??_C@")) 195 // COFF sections that look like string literal sections (i.e. no 196 // relocations, in .rdata, leader symbol name matches the MSVC name mangling 197 // for string literals) are subject to string tail merging. 198 MergeChunk::addSection(C); 199 else 200 Chunks.push_back(C); 201 202 return C; 203 } 204 205 void ObjFile::readAssociativeDefinition( 206 COFFSymbolRef Sym, const coff_aux_section_definition *Def) { 207 SectionChunk *Parent = SparseChunks[Def->getNumber(Sym.isBigObj())]; 208 209 // If the parent is pending, it probably means that its section definition 210 // appears after us in the symbol table. Leave the associated section as 211 // pending; we will handle it during the second pass in initializeSymbols(). 212 if (Parent == PendingComdat) 213 return; 214 215 // Check whether the parent is prevailing. If it is, so are we, and we read 216 // the section; otherwise mark it as discarded. 217 int32_t SectionNumber = Sym.getSectionNumber(); 218 if (Parent) { 219 SparseChunks[SectionNumber] = readSection(SectionNumber, Def, ""); 220 if (SparseChunks[SectionNumber]) 221 Parent->addAssociative(SparseChunks[SectionNumber]); 222 } else { 223 SparseChunks[SectionNumber] = nullptr; 224 } 225 } 226 227 Symbol *ObjFile::createRegular(COFFSymbolRef Sym) { 228 SectionChunk *SC = SparseChunks[Sym.getSectionNumber()]; 229 if (Sym.isExternal()) { 230 StringRef Name; 231 COFFObj->getSymbolName(Sym, Name); 232 if (SC) 233 return Symtab->addRegular(this, Name, Sym.getGeneric(), SC); 234 return Symtab->addUndefined(Name, this, false); 235 } 236 if (SC) 237 return make<DefinedRegular>(this, /*Name*/ "", false, 238 /*IsExternal*/ false, Sym.getGeneric(), SC); 239 return nullptr; 240 } 241 242 void ObjFile::initializeSymbols() { 243 uint32_t NumSymbols = COFFObj->getNumberOfSymbols(); 244 Symbols.resize(NumSymbols); 245 246 SmallVector<std::pair<Symbol *, uint32_t>, 8> WeakAliases; 247 std::vector<uint32_t> PendingIndexes; 248 PendingIndexes.reserve(NumSymbols); 249 250 std::vector<const coff_aux_section_definition *> ComdatDefs( 251 COFFObj->getNumberOfSections() + 1); 252 253 for (uint32_t I = 0; I < NumSymbols; ++I) { 254 COFFSymbolRef COFFSym = check(COFFObj->getSymbol(I)); 255 if (COFFSym.isUndefined()) { 256 Symbols[I] = createUndefined(COFFSym); 257 } else if (COFFSym.isWeakExternal()) { 258 Symbols[I] = createUndefined(COFFSym); 259 uint32_t TagIndex = COFFSym.getAux<coff_aux_weak_external>()->TagIndex; 260 WeakAliases.emplace_back(Symbols[I], TagIndex); 261 } else if (Optional<Symbol *> OptSym = createDefined(COFFSym, ComdatDefs)) { 262 Symbols[I] = *OptSym; 263 } else { 264 // createDefined() returns None if a symbol belongs to a section that 265 // was pending at the point when the symbol was read. This can happen in 266 // two cases: 267 // 1) section definition symbol for a comdat leader; 268 // 2) symbol belongs to a comdat section associated with a section whose 269 // section definition symbol appears later in the symbol table. 270 // In both of these cases, we can expect the section to be resolved by 271 // the time we finish visiting the remaining symbols in the symbol 272 // table. So we postpone the handling of this symbol until that time. 273 PendingIndexes.push_back(I); 274 } 275 I += COFFSym.getNumberOfAuxSymbols(); 276 } 277 278 for (uint32_t I : PendingIndexes) { 279 COFFSymbolRef Sym = check(COFFObj->getSymbol(I)); 280 if (auto *Def = Sym.getSectionDefinition()) 281 if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) 282 readAssociativeDefinition(Sym, Def); 283 Symbols[I] = createRegular(Sym); 284 } 285 286 for (auto &KV : WeakAliases) { 287 Symbol *Sym = KV.first; 288 uint32_t Idx = KV.second; 289 checkAndSetWeakAlias(Symtab, this, Sym, Symbols[Idx]); 290 } 291 } 292 293 Symbol *ObjFile::createUndefined(COFFSymbolRef Sym) { 294 StringRef Name; 295 COFFObj->getSymbolName(Sym, Name); 296 return Symtab->addUndefined(Name, this, Sym.isWeakExternal()); 297 } 298 299 Optional<Symbol *> ObjFile::createDefined( 300 COFFSymbolRef Sym, 301 std::vector<const coff_aux_section_definition *> &ComdatDefs) { 302 StringRef Name; 303 if (Sym.isCommon()) { 304 auto *C = make<CommonChunk>(Sym); 305 Chunks.push_back(C); 306 COFFObj->getSymbolName(Sym, Name); 307 Symbol *S = 308 Symtab->addCommon(this, Name, Sym.getValue(), Sym.getGeneric(), C); 309 return S; 310 } 311 if (Sym.isAbsolute()) { 312 COFFObj->getSymbolName(Sym, Name); 313 // Skip special symbols. 314 if (Name == "@comp.id") 315 return nullptr; 316 if (Name == "@feat.00") { 317 Feat00Flags = Sym.getValue(); 318 return nullptr; 319 } 320 if (Sym.isExternal()) 321 return Symtab->addAbsolute(Name, Sym); 322 else 323 return make<DefinedAbsolute>(Name, Sym); 324 } 325 int32_t SectionNumber = Sym.getSectionNumber(); 326 if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG) 327 return nullptr; 328 329 // Reserved sections numbers don't have contents. 330 if (llvm::COFF::isReservedSectionNumber(SectionNumber)) 331 fatal("broken object file: " + toString(this)); 332 333 // This symbol references a section which is not present in the section 334 // header. 335 if ((uint32_t)SectionNumber >= SparseChunks.size()) 336 fatal("broken object file: " + toString(this)); 337 338 // Handle comdat leader symbols. 339 if (const coff_aux_section_definition *Def = ComdatDefs[SectionNumber]) { 340 ComdatDefs[SectionNumber] = nullptr; 341 Symbol *Leader; 342 bool Prevailing; 343 if (Sym.isExternal()) { 344 COFFObj->getSymbolName(Sym, Name); 345 std::tie(Leader, Prevailing) = 346 Symtab->addComdat(this, Name, Sym.getGeneric()); 347 } else { 348 Leader = make<DefinedRegular>(this, /*Name*/ "", false, 349 /*IsExternal*/ false, Sym.getGeneric()); 350 Prevailing = true; 351 } 352 if (Prevailing) { 353 SectionChunk *C = readSection(SectionNumber, Def, Name); 354 SparseChunks[SectionNumber] = C; 355 C->Sym = cast<DefinedRegular>(Leader); 356 cast<DefinedRegular>(Leader)->Data = &C->Repl; 357 } else { 358 SparseChunks[SectionNumber] = nullptr; 359 } 360 return Leader; 361 } 362 363 // Read associative section definitions and prepare to handle the comdat 364 // leader symbol by setting the section's ComdatDefs pointer if we encounter a 365 // non-associative comdat. 366 if (SparseChunks[SectionNumber] == PendingComdat) { 367 if (auto *Def = Sym.getSectionDefinition()) { 368 if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) 369 readAssociativeDefinition(Sym, Def); 370 else 371 ComdatDefs[SectionNumber] = Def; 372 } 373 } 374 375 if (SparseChunks[SectionNumber] == PendingComdat) 376 return None; 377 return createRegular(Sym); 378 } 379 380 MachineTypes ObjFile::getMachineType() { 381 if (COFFObj) 382 return static_cast<MachineTypes>(COFFObj->getMachine()); 383 return IMAGE_FILE_MACHINE_UNKNOWN; 384 } 385 386 StringRef ltrim1(StringRef S, const char *Chars) { 387 if (!S.empty() && strchr(Chars, S[0])) 388 return S.substr(1); 389 return S; 390 } 391 392 void ImportFile::parse() { 393 const char *Buf = MB.getBufferStart(); 394 const char *End = MB.getBufferEnd(); 395 const auto *Hdr = reinterpret_cast<const coff_import_header *>(Buf); 396 397 // Check if the total size is valid. 398 if ((size_t)(End - Buf) != (sizeof(*Hdr) + Hdr->SizeOfData)) 399 fatal("broken import library"); 400 401 // Read names and create an __imp_ symbol. 402 StringRef Name = Saver.save(StringRef(Buf + sizeof(*Hdr))); 403 StringRef ImpName = Saver.save("__imp_" + Name); 404 const char *NameStart = Buf + sizeof(coff_import_header) + Name.size() + 1; 405 DLLName = StringRef(NameStart); 406 StringRef ExtName; 407 switch (Hdr->getNameType()) { 408 case IMPORT_ORDINAL: 409 ExtName = ""; 410 break; 411 case IMPORT_NAME: 412 ExtName = Name; 413 break; 414 case IMPORT_NAME_NOPREFIX: 415 ExtName = ltrim1(Name, "?@_"); 416 break; 417 case IMPORT_NAME_UNDECORATE: 418 ExtName = ltrim1(Name, "?@_"); 419 ExtName = ExtName.substr(0, ExtName.find('@')); 420 break; 421 } 422 423 this->Hdr = Hdr; 424 ExternalName = ExtName; 425 426 ImpSym = Symtab->addImportData(ImpName, this); 427 428 if (Hdr->getType() == llvm::COFF::IMPORT_CONST) 429 static_cast<void>(Symtab->addImportData(Name, this)); 430 431 // If type is function, we need to create a thunk which jump to an 432 // address pointed by the __imp_ symbol. (This allows you to call 433 // DLL functions just like regular non-DLL functions.) 434 if (Hdr->getType() == llvm::COFF::IMPORT_CODE) 435 ThunkSym = Symtab->addImportThunk(Name, ImpSym, Hdr->Machine); 436 } 437 438 void BitcodeFile::parse() { 439 Obj = check(lto::InputFile::create(MemoryBufferRef( 440 MB.getBuffer(), Saver.save(ParentName + MB.getBufferIdentifier())))); 441 std::vector<std::pair<Symbol *, bool>> Comdat(Obj->getComdatTable().size()); 442 for (size_t I = 0; I != Obj->getComdatTable().size(); ++I) 443 Comdat[I] = Symtab->addComdat(this, Saver.save(Obj->getComdatTable()[I])); 444 for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) { 445 StringRef SymName = Saver.save(ObjSym.getName()); 446 int ComdatIndex = ObjSym.getComdatIndex(); 447 Symbol *Sym; 448 if (ObjSym.isUndefined()) { 449 Sym = Symtab->addUndefined(SymName, this, false); 450 } else if (ObjSym.isCommon()) { 451 Sym = Symtab->addCommon(this, SymName, ObjSym.getCommonSize()); 452 } else if (ObjSym.isWeak() && ObjSym.isIndirect()) { 453 // Weak external. 454 Sym = Symtab->addUndefined(SymName, this, true); 455 std::string Fallback = ObjSym.getCOFFWeakExternalFallback(); 456 Symbol *Alias = Symtab->addUndefined(Saver.save(Fallback)); 457 checkAndSetWeakAlias(Symtab, this, Sym, Alias); 458 } else if (ComdatIndex != -1) { 459 if (SymName == Obj->getComdatTable()[ComdatIndex]) 460 Sym = Comdat[ComdatIndex].first; 461 else if (Comdat[ComdatIndex].second) 462 Sym = Symtab->addRegular(this, SymName); 463 else 464 Sym = Symtab->addUndefined(SymName, this, false); 465 } else { 466 Sym = Symtab->addRegular(this, SymName); 467 } 468 Symbols.push_back(Sym); 469 } 470 Directives = Obj->getCOFFLinkerOpts(); 471 } 472 473 MachineTypes BitcodeFile::getMachineType() { 474 switch (Triple(Obj->getTargetTriple()).getArch()) { 475 case Triple::x86_64: 476 return AMD64; 477 case Triple::x86: 478 return I386; 479 case Triple::arm: 480 return ARMNT; 481 case Triple::aarch64: 482 return ARM64; 483 default: 484 return IMAGE_FILE_MACHINE_UNKNOWN; 485 } 486 } 487 } // namespace coff 488 } // namespace lld 489 490 // Returns the last element of a path, which is supposed to be a filename. 491 static StringRef getBasename(StringRef Path) { 492 size_t Pos = Path.find_last_of("\\/"); 493 if (Pos == StringRef::npos) 494 return Path; 495 return Path.substr(Pos + 1); 496 } 497 498 // Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". 499 std::string lld::toString(const coff::InputFile *File) { 500 if (!File) 501 return "<internal>"; 502 if (File->ParentName.empty()) 503 return File->getName(); 504 505 return (getBasename(File->ParentName) + "(" + getBasename(File->getName()) + 506 ")") 507 .str(); 508 } 509