1 //===- InputFiles.cpp -----------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "InputFiles.h" 11 #include "Chunks.h" 12 #include "Config.h" 13 #include "Driver.h" 14 #include "SymbolTable.h" 15 #include "Symbols.h" 16 #include "lld/Common/ErrorHandler.h" 17 #include "lld/Common/Memory.h" 18 #include "llvm-c/lto.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/Triple.h" 21 #include "llvm/ADT/Twine.h" 22 #include "llvm/BinaryFormat/COFF.h" 23 #include "llvm/Object/Binary.h" 24 #include "llvm/Object/COFF.h" 25 #include "llvm/Support/Casting.h" 26 #include "llvm/Support/Endian.h" 27 #include "llvm/Support/Error.h" 28 #include "llvm/Support/ErrorOr.h" 29 #include "llvm/Support/FileSystem.h" 30 #include "llvm/Support/Path.h" 31 #include "llvm/Target/TargetOptions.h" 32 #include <cstring> 33 #include <system_error> 34 #include <utility> 35 36 using namespace llvm; 37 using namespace llvm::COFF; 38 using namespace llvm::object; 39 using namespace llvm::support::endian; 40 41 using llvm::Triple; 42 using llvm::support::ulittle32_t; 43 44 namespace lld { 45 namespace coff { 46 47 std::vector<ObjFile *> ObjFile::Instances; 48 std::vector<ImportFile *> ImportFile::Instances; 49 std::vector<BitcodeFile *> BitcodeFile::Instances; 50 51 /// Checks that Source is compatible with being a weak alias to Target. 52 /// If Source is Undefined and has no weak alias set, makes it a weak 53 /// alias to Target. 54 static void checkAndSetWeakAlias(SymbolTable *Symtab, InputFile *F, 55 Symbol *Source, Symbol *Target) { 56 if (auto *U = dyn_cast<Undefined>(Source)) { 57 if (U->WeakAlias && U->WeakAlias != Target) { 58 // Weak aliases as produced by GCC are named in the form 59 // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name 60 // of another symbol emitted near the weak symbol. 61 // Just use the definition from the first object file that defined 62 // this weak symbol. 63 if (Config->MinGW) 64 return; 65 Symtab->reportDuplicate(Source, F); 66 } 67 U->WeakAlias = Target; 68 } 69 } 70 71 ArchiveFile::ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} 72 73 void ArchiveFile::parse() { 74 // Parse a MemoryBufferRef as an archive file. 75 File = CHECK(Archive::create(MB), this); 76 77 // Read the symbol table to construct Lazy objects. 78 for (const Archive::Symbol &Sym : File->symbols()) 79 Symtab->addLazy(this, Sym); 80 } 81 82 // Returns a buffer pointing to a member file containing a given symbol. 83 void ArchiveFile::addMember(const Archive::Symbol *Sym) { 84 const Archive::Child &C = 85 CHECK(Sym->getMember(), 86 "could not get the member for symbol " + Sym->getName()); 87 88 // Return an empty buffer if we have already returned the same buffer. 89 if (!Seen.insert(C.getChildOffset()).second) 90 return; 91 92 Driver->enqueueArchiveMember(C, Sym->getName(), getName()); 93 } 94 95 std::vector<MemoryBufferRef> getArchiveMembers(Archive *File) { 96 std::vector<MemoryBufferRef> V; 97 Error Err = Error::success(); 98 for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) { 99 Archive::Child C = 100 CHECK(COrErr, 101 File->getFileName() + ": could not get the child of the archive"); 102 MemoryBufferRef MBRef = 103 CHECK(C.getMemoryBufferRef(), 104 File->getFileName() + 105 ": could not get the buffer for a child of the archive"); 106 V.push_back(MBRef); 107 } 108 if (Err) 109 fatal(File->getFileName() + 110 ": Archive::children failed: " + toString(std::move(Err))); 111 return V; 112 } 113 114 void ObjFile::parse() { 115 // Parse a memory buffer as a COFF file. 116 std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), this); 117 118 if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) { 119 Bin.release(); 120 COFFObj.reset(Obj); 121 } else { 122 fatal(toString(this) + " is not a COFF file"); 123 } 124 125 // Read section and symbol tables. 126 initializeChunks(); 127 initializeSymbols(); 128 } 129 130 // We set SectionChunk pointers in the SparseChunks vector to this value 131 // temporarily to mark comdat sections as having an unknown resolution. As we 132 // walk the object file's symbol table, once we visit either a leader symbol or 133 // an associative section definition together with the parent comdat's leader, 134 // we set the pointer to either nullptr (to mark the section as discarded) or a 135 // valid SectionChunk for that section. 136 static SectionChunk *const PendingComdat = reinterpret_cast<SectionChunk *>(1); 137 138 void ObjFile::initializeChunks() { 139 uint32_t NumSections = COFFObj->getNumberOfSections(); 140 Chunks.reserve(NumSections); 141 SparseChunks.resize(NumSections + 1); 142 for (uint32_t I = 1; I < NumSections + 1; ++I) { 143 const coff_section *Sec; 144 if (auto EC = COFFObj->getSection(I, Sec)) 145 fatal("getSection failed: #" + Twine(I) + ": " + EC.message()); 146 147 if (Sec->Characteristics & IMAGE_SCN_LNK_COMDAT) 148 SparseChunks[I] = PendingComdat; 149 else 150 SparseChunks[I] = readSection(I, nullptr, ""); 151 } 152 } 153 154 SectionChunk *ObjFile::readSection(uint32_t SectionNumber, 155 const coff_aux_section_definition *Def, 156 StringRef LeaderName) { 157 const coff_section *Sec; 158 StringRef Name; 159 if (auto EC = COFFObj->getSection(SectionNumber, Sec)) 160 fatal("getSection failed: #" + Twine(SectionNumber) + ": " + EC.message()); 161 if (auto EC = COFFObj->getSectionName(Sec, Name)) 162 fatal("getSectionName failed: #" + Twine(SectionNumber) + ": " + 163 EC.message()); 164 165 if (Name == ".drectve") { 166 ArrayRef<uint8_t> Data; 167 COFFObj->getSectionContents(Sec, Data); 168 Directives = std::string((const char *)Data.data(), Data.size()); 169 return nullptr; 170 } 171 172 if (Name == ".llvm_addrsig") { 173 AddrsigSec = Sec; 174 return nullptr; 175 } 176 177 // Object files may have DWARF debug info or MS CodeView debug info 178 // (or both). 179 // 180 // DWARF sections don't need any special handling from the perspective 181 // of the linker; they are just a data section containing relocations. 182 // We can just link them to complete debug info. 183 // 184 // CodeView needs a linker support. We need to interpret and debug 185 // info, and then write it to a separate .pdb file. 186 187 // Ignore DWARF debug info unless /debug is given. 188 if (!Config->Debug && Name.startswith(".debug_")) 189 return nullptr; 190 191 if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE) 192 return nullptr; 193 auto *C = make<SectionChunk>(this, Sec); 194 if (Def) 195 C->Checksum = Def->CheckSum; 196 197 // CodeView sections are stored to a different vector because they are not 198 // linked in the regular manner. 199 if (C->isCodeView()) 200 DebugChunks.push_back(C); 201 else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gfids$y") 202 GuardFidChunks.push_back(C); 203 else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gljmp$y") 204 GuardLJmpChunks.push_back(C); 205 else if (Name == ".sxdata") 206 SXDataChunks.push_back(C); 207 else if (Config->TailMerge && Sec->NumberOfRelocations == 0 && 208 Name == ".rdata" && LeaderName.startswith("??_C@")) 209 // COFF sections that look like string literal sections (i.e. no 210 // relocations, in .rdata, leader symbol name matches the MSVC name mangling 211 // for string literals) are subject to string tail merging. 212 MergeChunk::addSection(C); 213 else 214 Chunks.push_back(C); 215 216 return C; 217 } 218 219 void ObjFile::readAssociativeDefinition( 220 COFFSymbolRef Sym, const coff_aux_section_definition *Def) { 221 readAssociativeDefinition(Sym, Def, Def->getNumber(Sym.isBigObj())); 222 } 223 224 void ObjFile::readAssociativeDefinition(COFFSymbolRef Sym, 225 const coff_aux_section_definition *Def, 226 uint32_t ParentSection) { 227 SectionChunk *Parent = SparseChunks[ParentSection]; 228 229 // If the parent is pending, it probably means that its section definition 230 // appears after us in the symbol table. Leave the associated section as 231 // pending; we will handle it during the second pass in initializeSymbols(). 232 if (Parent == PendingComdat) 233 return; 234 235 // Check whether the parent is prevailing. If it is, so are we, and we read 236 // the section; otherwise mark it as discarded. 237 int32_t SectionNumber = Sym.getSectionNumber(); 238 if (Parent) { 239 SparseChunks[SectionNumber] = readSection(SectionNumber, Def, ""); 240 if (SparseChunks[SectionNumber]) 241 Parent->addAssociative(SparseChunks[SectionNumber]); 242 } else { 243 SparseChunks[SectionNumber] = nullptr; 244 } 245 } 246 247 void ObjFile::recordPrevailingSymbolForMingw( 248 COFFSymbolRef Sym, DenseMap<StringRef, uint32_t> &PrevailingSectionMap) { 249 // For comdat symbols in executable sections, where this is the copy 250 // of the section chunk we actually include instead of discarding it, 251 // add the symbol to a map to allow using it for implicitly 252 // associating .[px]data$<func> sections to it. 253 int32_t SectionNumber = Sym.getSectionNumber(); 254 SectionChunk *SC = SparseChunks[SectionNumber]; 255 if (SC && SC->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) { 256 StringRef Name; 257 COFFObj->getSymbolName(Sym, Name); 258 PrevailingSectionMap[Name] = SectionNumber; 259 } 260 } 261 262 void ObjFile::maybeAssociateSEHForMingw( 263 COFFSymbolRef Sym, const coff_aux_section_definition *Def, 264 const DenseMap<StringRef, uint32_t> &PrevailingSectionMap) { 265 StringRef Name; 266 COFFObj->getSymbolName(Sym, Name); 267 if (Name.consume_front(".pdata$") || Name.consume_front(".xdata$")) { 268 // For MinGW, treat .[px]data$<func> as implicitly associative to 269 // the symbol <func>. 270 auto ParentSym = PrevailingSectionMap.find(Name); 271 if (ParentSym != PrevailingSectionMap.end()) 272 readAssociativeDefinition(Sym, Def, ParentSym->second); 273 } 274 } 275 276 Symbol *ObjFile::createRegular(COFFSymbolRef Sym) { 277 SectionChunk *SC = SparseChunks[Sym.getSectionNumber()]; 278 if (Sym.isExternal()) { 279 StringRef Name; 280 COFFObj->getSymbolName(Sym, Name); 281 if (SC) 282 return Symtab->addRegular(this, Name, Sym.getGeneric(), SC); 283 // For MinGW symbols named .weak.* that point to a discarded section, 284 // don't create an Undefined symbol. If nothing ever refers to the symbol, 285 // everything should be fine. If something actually refers to the symbol 286 // (e.g. the undefined weak alias), linking will fail due to undefined 287 // references at the end. 288 if (Config->MinGW && Name.startswith(".weak.")) 289 return nullptr; 290 return Symtab->addUndefined(Name, this, false); 291 } 292 if (SC) 293 return make<DefinedRegular>(this, /*Name*/ "", false, 294 /*IsExternal*/ false, Sym.getGeneric(), SC); 295 return nullptr; 296 } 297 298 void ObjFile::initializeSymbols() { 299 uint32_t NumSymbols = COFFObj->getNumberOfSymbols(); 300 Symbols.resize(NumSymbols); 301 302 SmallVector<std::pair<Symbol *, uint32_t>, 8> WeakAliases; 303 std::vector<uint32_t> PendingIndexes; 304 PendingIndexes.reserve(NumSymbols); 305 306 DenseMap<StringRef, uint32_t> PrevailingSectionMap; 307 std::vector<const coff_aux_section_definition *> ComdatDefs( 308 COFFObj->getNumberOfSections() + 1); 309 310 for (uint32_t I = 0; I < NumSymbols; ++I) { 311 COFFSymbolRef COFFSym = check(COFFObj->getSymbol(I)); 312 bool PrevailingComdat; 313 if (COFFSym.isUndefined()) { 314 Symbols[I] = createUndefined(COFFSym); 315 } else if (COFFSym.isWeakExternal()) { 316 Symbols[I] = createUndefined(COFFSym); 317 uint32_t TagIndex = COFFSym.getAux<coff_aux_weak_external>()->TagIndex; 318 WeakAliases.emplace_back(Symbols[I], TagIndex); 319 } else if (Optional<Symbol *> OptSym = 320 createDefined(COFFSym, ComdatDefs, PrevailingComdat)) { 321 Symbols[I] = *OptSym; 322 if (Config->MinGW && PrevailingComdat) 323 recordPrevailingSymbolForMingw(COFFSym, PrevailingSectionMap); 324 } else { 325 // createDefined() returns None if a symbol belongs to a section that 326 // was pending at the point when the symbol was read. This can happen in 327 // two cases: 328 // 1) section definition symbol for a comdat leader; 329 // 2) symbol belongs to a comdat section associated with a section whose 330 // section definition symbol appears later in the symbol table. 331 // In both of these cases, we can expect the section to be resolved by 332 // the time we finish visiting the remaining symbols in the symbol 333 // table. So we postpone the handling of this symbol until that time. 334 PendingIndexes.push_back(I); 335 } 336 I += COFFSym.getNumberOfAuxSymbols(); 337 } 338 339 for (uint32_t I : PendingIndexes) { 340 COFFSymbolRef Sym = check(COFFObj->getSymbol(I)); 341 if (auto *Def = Sym.getSectionDefinition()) { 342 if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) 343 readAssociativeDefinition(Sym, Def); 344 else if (Config->MinGW) 345 maybeAssociateSEHForMingw(Sym, Def, PrevailingSectionMap); 346 } 347 if (SparseChunks[Sym.getSectionNumber()] == PendingComdat) { 348 StringRef Name; 349 COFFObj->getSymbolName(Sym, Name); 350 log("comdat section " + Name + 351 " without leader and unassociated, discarding"); 352 continue; 353 } 354 Symbols[I] = createRegular(Sym); 355 } 356 357 for (auto &KV : WeakAliases) { 358 Symbol *Sym = KV.first; 359 uint32_t Idx = KV.second; 360 checkAndSetWeakAlias(Symtab, this, Sym, Symbols[Idx]); 361 } 362 } 363 364 Symbol *ObjFile::createUndefined(COFFSymbolRef Sym) { 365 StringRef Name; 366 COFFObj->getSymbolName(Sym, Name); 367 return Symtab->addUndefined(Name, this, Sym.isWeakExternal()); 368 } 369 370 Optional<Symbol *> ObjFile::createDefined( 371 COFFSymbolRef Sym, 372 std::vector<const coff_aux_section_definition *> &ComdatDefs, 373 bool &Prevailing) { 374 Prevailing = false; 375 auto GetName = [&]() { 376 StringRef S; 377 COFFObj->getSymbolName(Sym, S); 378 return S; 379 }; 380 381 if (Sym.isCommon()) { 382 auto *C = make<CommonChunk>(Sym); 383 Chunks.push_back(C); 384 return Symtab->addCommon(this, GetName(), Sym.getValue(), Sym.getGeneric(), 385 C); 386 } 387 388 if (Sym.isAbsolute()) { 389 StringRef Name = GetName(); 390 391 // Skip special symbols. 392 if (Name == "@comp.id") 393 return nullptr; 394 if (Name == "@feat.00") { 395 Feat00Flags = Sym.getValue(); 396 return nullptr; 397 } 398 399 if (Sym.isExternal()) 400 return Symtab->addAbsolute(Name, Sym); 401 return make<DefinedAbsolute>(Name, Sym); 402 } 403 404 int32_t SectionNumber = Sym.getSectionNumber(); 405 if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG) 406 return nullptr; 407 408 if (llvm::COFF::isReservedSectionNumber(SectionNumber)) 409 fatal(toString(this) + ": " + GetName() + 410 " should not refer to special section " + Twine(SectionNumber)); 411 412 if ((uint32_t)SectionNumber >= SparseChunks.size()) 413 fatal(toString(this) + ": " + GetName() + 414 " should not refer to non-existent section " + Twine(SectionNumber)); 415 416 // Handle comdat leader symbols. 417 if (const coff_aux_section_definition *Def = ComdatDefs[SectionNumber]) { 418 ComdatDefs[SectionNumber] = nullptr; 419 Symbol *Leader; 420 if (Sym.isExternal()) { 421 std::tie(Leader, Prevailing) = 422 Symtab->addComdat(this, GetName(), Sym.getGeneric()); 423 } else { 424 Leader = make<DefinedRegular>(this, /*Name*/ "", false, 425 /*IsExternal*/ false, Sym.getGeneric()); 426 Prevailing = true; 427 } 428 429 if (Prevailing) { 430 SectionChunk *C = readSection(SectionNumber, Def, GetName()); 431 SparseChunks[SectionNumber] = C; 432 C->Sym = cast<DefinedRegular>(Leader); 433 cast<DefinedRegular>(Leader)->Data = &C->Repl; 434 } else { 435 SparseChunks[SectionNumber] = nullptr; 436 } 437 return Leader; 438 } 439 440 // Read associative section definitions and prepare to handle the comdat 441 // leader symbol by setting the section's ComdatDefs pointer if we encounter a 442 // non-associative comdat. 443 if (SparseChunks[SectionNumber] == PendingComdat) { 444 if (auto *Def = Sym.getSectionDefinition()) { 445 if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) 446 readAssociativeDefinition(Sym, Def); 447 else 448 ComdatDefs[SectionNumber] = Def; 449 } 450 } 451 452 if (SparseChunks[SectionNumber] == PendingComdat) 453 return None; 454 return createRegular(Sym); 455 } 456 457 MachineTypes ObjFile::getMachineType() { 458 if (COFFObj) 459 return static_cast<MachineTypes>(COFFObj->getMachine()); 460 return IMAGE_FILE_MACHINE_UNKNOWN; 461 } 462 463 StringRef ltrim1(StringRef S, const char *Chars) { 464 if (!S.empty() && strchr(Chars, S[0])) 465 return S.substr(1); 466 return S; 467 } 468 469 void ImportFile::parse() { 470 const char *Buf = MB.getBufferStart(); 471 const char *End = MB.getBufferEnd(); 472 const auto *Hdr = reinterpret_cast<const coff_import_header *>(Buf); 473 474 // Check if the total size is valid. 475 if ((size_t)(End - Buf) != (sizeof(*Hdr) + Hdr->SizeOfData)) 476 fatal("broken import library"); 477 478 // Read names and create an __imp_ symbol. 479 StringRef Name = Saver.save(StringRef(Buf + sizeof(*Hdr))); 480 StringRef ImpName = Saver.save("__imp_" + Name); 481 const char *NameStart = Buf + sizeof(coff_import_header) + Name.size() + 1; 482 DLLName = StringRef(NameStart); 483 StringRef ExtName; 484 switch (Hdr->getNameType()) { 485 case IMPORT_ORDINAL: 486 ExtName = ""; 487 break; 488 case IMPORT_NAME: 489 ExtName = Name; 490 break; 491 case IMPORT_NAME_NOPREFIX: 492 ExtName = ltrim1(Name, "?@_"); 493 break; 494 case IMPORT_NAME_UNDECORATE: 495 ExtName = ltrim1(Name, "?@_"); 496 ExtName = ExtName.substr(0, ExtName.find('@')); 497 break; 498 } 499 500 this->Hdr = Hdr; 501 ExternalName = ExtName; 502 503 ImpSym = Symtab->addImportData(ImpName, this); 504 // If this was a duplicate, we logged an error but may continue; 505 // in this case, ImpSym is nullptr. 506 if (!ImpSym) 507 return; 508 509 if (Hdr->getType() == llvm::COFF::IMPORT_CONST) 510 static_cast<void>(Symtab->addImportData(Name, this)); 511 512 // If type is function, we need to create a thunk which jump to an 513 // address pointed by the __imp_ symbol. (This allows you to call 514 // DLL functions just like regular non-DLL functions.) 515 if (Hdr->getType() == llvm::COFF::IMPORT_CODE) 516 ThunkSym = Symtab->addImportThunk( 517 Name, cast_or_null<DefinedImportData>(ImpSym), Hdr->Machine); 518 } 519 520 void BitcodeFile::parse() { 521 Obj = check(lto::InputFile::create(MemoryBufferRef( 522 MB.getBuffer(), Saver.save(ParentName + MB.getBufferIdentifier())))); 523 std::vector<std::pair<Symbol *, bool>> Comdat(Obj->getComdatTable().size()); 524 for (size_t I = 0; I != Obj->getComdatTable().size(); ++I) 525 Comdat[I] = Symtab->addComdat(this, Saver.save(Obj->getComdatTable()[I])); 526 for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) { 527 StringRef SymName = Saver.save(ObjSym.getName()); 528 int ComdatIndex = ObjSym.getComdatIndex(); 529 Symbol *Sym; 530 if (ObjSym.isUndefined()) { 531 Sym = Symtab->addUndefined(SymName, this, false); 532 } else if (ObjSym.isCommon()) { 533 Sym = Symtab->addCommon(this, SymName, ObjSym.getCommonSize()); 534 } else if (ObjSym.isWeak() && ObjSym.isIndirect()) { 535 // Weak external. 536 Sym = Symtab->addUndefined(SymName, this, true); 537 std::string Fallback = ObjSym.getCOFFWeakExternalFallback(); 538 Symbol *Alias = Symtab->addUndefined(Saver.save(Fallback)); 539 checkAndSetWeakAlias(Symtab, this, Sym, Alias); 540 } else if (ComdatIndex != -1) { 541 if (SymName == Obj->getComdatTable()[ComdatIndex]) 542 Sym = Comdat[ComdatIndex].first; 543 else if (Comdat[ComdatIndex].second) 544 Sym = Symtab->addRegular(this, SymName); 545 else 546 Sym = Symtab->addUndefined(SymName, this, false); 547 } else { 548 Sym = Symtab->addRegular(this, SymName); 549 } 550 Symbols.push_back(Sym); 551 } 552 Directives = Obj->getCOFFLinkerOpts(); 553 } 554 555 MachineTypes BitcodeFile::getMachineType() { 556 switch (Triple(Obj->getTargetTriple()).getArch()) { 557 case Triple::x86_64: 558 return AMD64; 559 case Triple::x86: 560 return I386; 561 case Triple::arm: 562 return ARMNT; 563 case Triple::aarch64: 564 return ARM64; 565 default: 566 return IMAGE_FILE_MACHINE_UNKNOWN; 567 } 568 } 569 } // namespace coff 570 } // namespace lld 571 572 // Returns the last element of a path, which is supposed to be a filename. 573 static StringRef getBasename(StringRef Path) { 574 return sys::path::filename(Path, sys::path::Style::windows); 575 } 576 577 // Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". 578 std::string lld::toString(const coff::InputFile *File) { 579 if (!File) 580 return "<internal>"; 581 if (File->ParentName.empty()) 582 return File->getName(); 583 584 return (getBasename(File->ParentName) + "(" + getBasename(File->getName()) + 585 ")") 586 .str(); 587 } 588