1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "InputFiles.h"
11 #include "Chunks.h"
12 #include "Config.h"
13 #include "Driver.h"
14 #include "SymbolTable.h"
15 #include "Symbols.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm-c/lto.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Triple.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/BinaryFormat/COFF.h"
23 #include "llvm/Object/Binary.h"
24 #include "llvm/Object/COFF.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/Endian.h"
27 #include "llvm/Support/Error.h"
28 #include "llvm/Support/ErrorOr.h"
29 #include "llvm/Support/FileSystem.h"
30 #include "llvm/Support/Path.h"
31 #include "llvm/Target/TargetOptions.h"
32 #include <cstring>
33 #include <system_error>
34 #include <utility>
35 
36 using namespace llvm;
37 using namespace llvm::COFF;
38 using namespace llvm::object;
39 using namespace llvm::support::endian;
40 
41 using llvm::Triple;
42 using llvm::support::ulittle32_t;
43 
44 namespace lld {
45 namespace coff {
46 
47 std::vector<ObjFile *> ObjFile::Instances;
48 std::vector<ImportFile *> ImportFile::Instances;
49 std::vector<BitcodeFile *> BitcodeFile::Instances;
50 
51 /// Checks that Source is compatible with being a weak alias to Target.
52 /// If Source is Undefined and has no weak alias set, makes it a weak
53 /// alias to Target.
54 static void checkAndSetWeakAlias(SymbolTable *Symtab, InputFile *F,
55                                  Symbol *Source, Symbol *Target) {
56   if (auto *U = dyn_cast<Undefined>(Source)) {
57     if (U->WeakAlias && U->WeakAlias != Target) {
58       // Weak aliases as produced by GCC are named in the form
59       // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
60       // of another symbol emitted near the weak symbol.
61       // Just use the definition from the first object file that defined
62       // this weak symbol.
63       if (Config->MinGW)
64         return;
65       Symtab->reportDuplicate(Source, F);
66     }
67     U->WeakAlias = Target;
68   }
69 }
70 
71 ArchiveFile::ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {}
72 
73 void ArchiveFile::parse() {
74   // Parse a MemoryBufferRef as an archive file.
75   File = CHECK(Archive::create(MB), this);
76 
77   // Read the symbol table to construct Lazy objects.
78   for (const Archive::Symbol &Sym : File->symbols())
79     Symtab->addLazy(this, Sym);
80 }
81 
82 // Returns a buffer pointing to a member file containing a given symbol.
83 void ArchiveFile::addMember(const Archive::Symbol *Sym) {
84   const Archive::Child &C =
85       CHECK(Sym->getMember(),
86             "could not get the member for symbol " + Sym->getName());
87 
88   // Return an empty buffer if we have already returned the same buffer.
89   if (!Seen.insert(C.getChildOffset()).second)
90     return;
91 
92   Driver->enqueueArchiveMember(C, Sym->getName(), getName());
93 }
94 
95 std::vector<MemoryBufferRef> getArchiveMembers(Archive *File) {
96   std::vector<MemoryBufferRef> V;
97   Error Err = Error::success();
98   for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) {
99     Archive::Child C =
100         CHECK(COrErr,
101               File->getFileName() + ": could not get the child of the archive");
102     MemoryBufferRef MBRef =
103         CHECK(C.getMemoryBufferRef(),
104               File->getFileName() +
105                   ": could not get the buffer for a child of the archive");
106     V.push_back(MBRef);
107   }
108   if (Err)
109     fatal(File->getFileName() +
110           ": Archive::children failed: " + toString(std::move(Err)));
111   return V;
112 }
113 
114 void ObjFile::parse() {
115   // Parse a memory buffer as a COFF file.
116   std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), this);
117 
118   if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) {
119     Bin.release();
120     COFFObj.reset(Obj);
121   } else {
122     fatal(toString(this) + " is not a COFF file");
123   }
124 
125   // Read section and symbol tables.
126   initializeChunks();
127   initializeSymbols();
128 }
129 
130 // We set SectionChunk pointers in the SparseChunks vector to this value
131 // temporarily to mark comdat sections as having an unknown resolution. As we
132 // walk the object file's symbol table, once we visit either a leader symbol or
133 // an associative section definition together with the parent comdat's leader,
134 // we set the pointer to either nullptr (to mark the section as discarded) or a
135 // valid SectionChunk for that section.
136 static SectionChunk *const PendingComdat = reinterpret_cast<SectionChunk *>(1);
137 
138 void ObjFile::initializeChunks() {
139   uint32_t NumSections = COFFObj->getNumberOfSections();
140   Chunks.reserve(NumSections);
141   SparseChunks.resize(NumSections + 1);
142   for (uint32_t I = 1; I < NumSections + 1; ++I) {
143     const coff_section *Sec;
144     if (auto EC = COFFObj->getSection(I, Sec))
145       fatal("getSection failed: #" + Twine(I) + ": " + EC.message());
146 
147     if (Sec->Characteristics & IMAGE_SCN_LNK_COMDAT)
148       SparseChunks[I] = PendingComdat;
149     else
150       SparseChunks[I] = readSection(I, nullptr, "");
151   }
152 }
153 
154 SectionChunk *ObjFile::readSection(uint32_t SectionNumber,
155                                    const coff_aux_section_definition *Def,
156                                    StringRef LeaderName) {
157   const coff_section *Sec;
158   StringRef Name;
159   if (auto EC = COFFObj->getSection(SectionNumber, Sec))
160     fatal("getSection failed: #" + Twine(SectionNumber) + ": " + EC.message());
161   if (auto EC = COFFObj->getSectionName(Sec, Name))
162     fatal("getSectionName failed: #" + Twine(SectionNumber) + ": " +
163           EC.message());
164 
165   if (Name == ".drectve") {
166     ArrayRef<uint8_t> Data;
167     COFFObj->getSectionContents(Sec, Data);
168     Directives = std::string((const char *)Data.data(), Data.size());
169     return nullptr;
170   }
171 
172   if (Name == ".llvm_addrsig") {
173     AddrsigSec = Sec;
174     return nullptr;
175   }
176 
177   // Object files may have DWARF debug info or MS CodeView debug info
178   // (or both).
179   //
180   // DWARF sections don't need any special handling from the perspective
181   // of the linker; they are just a data section containing relocations.
182   // We can just link them to complete debug info.
183   //
184   // CodeView needs a linker support. We need to interpret and debug
185   // info, and then write it to a separate .pdb file.
186 
187   // Ignore DWARF debug info unless /debug is given.
188   if (!Config->Debug && Name.startswith(".debug_"))
189     return nullptr;
190 
191   if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
192     return nullptr;
193   auto *C = make<SectionChunk>(this, Sec);
194   if (Def)
195     C->Checksum = Def->CheckSum;
196 
197   // CodeView sections are stored to a different vector because they are not
198   // linked in the regular manner.
199   if (C->isCodeView())
200     DebugChunks.push_back(C);
201   else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gfids$y")
202     GuardFidChunks.push_back(C);
203   else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gljmp$y")
204     GuardLJmpChunks.push_back(C);
205   else if (Name == ".sxdata")
206     SXDataChunks.push_back(C);
207   else if (Config->TailMerge && Sec->NumberOfRelocations == 0 &&
208            Name == ".rdata" && LeaderName.startswith("??_C@"))
209     // COFF sections that look like string literal sections (i.e. no
210     // relocations, in .rdata, leader symbol name matches the MSVC name mangling
211     // for string literals) are subject to string tail merging.
212     MergeChunk::addSection(C);
213   else
214     Chunks.push_back(C);
215 
216   return C;
217 }
218 
219 void ObjFile::readAssociativeDefinition(
220     COFFSymbolRef Sym, const coff_aux_section_definition *Def) {
221   readAssociativeDefinition(Sym, Def, Def->getNumber(Sym.isBigObj()));
222 }
223 
224 void ObjFile::readAssociativeDefinition(COFFSymbolRef Sym,
225                                         const coff_aux_section_definition *Def,
226                                         uint32_t ParentSection) {
227   SectionChunk *Parent = SparseChunks[ParentSection];
228 
229   // If the parent is pending, it probably means that its section definition
230   // appears after us in the symbol table. Leave the associated section as
231   // pending; we will handle it during the second pass in initializeSymbols().
232   if (Parent == PendingComdat)
233     return;
234 
235   // Check whether the parent is prevailing. If it is, so are we, and we read
236   // the section; otherwise mark it as discarded.
237   int32_t SectionNumber = Sym.getSectionNumber();
238   if (Parent) {
239     SparseChunks[SectionNumber] = readSection(SectionNumber, Def, "");
240     if (SparseChunks[SectionNumber])
241       Parent->addAssociative(SparseChunks[SectionNumber]);
242   } else {
243     SparseChunks[SectionNumber] = nullptr;
244   }
245 }
246 
247 void ObjFile::recordPrevailingSymbolForMingw(
248     COFFSymbolRef Sym, DenseMap<StringRef, uint32_t> &PrevailingSectionMap) {
249   // For comdat symbols in executable sections, where this is the copy
250   // of the section chunk we actually include instead of discarding it,
251   // add the symbol to a map to allow using it for implicitly
252   // associating .[px]data$<func> sections to it.
253   int32_t SectionNumber = Sym.getSectionNumber();
254   SectionChunk *SC = SparseChunks[SectionNumber];
255   if (SC && SC->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
256     StringRef Name;
257     COFFObj->getSymbolName(Sym, Name);
258     PrevailingSectionMap[Name] = SectionNumber;
259   }
260 }
261 
262 void ObjFile::maybeAssociateSEHForMingw(
263     COFFSymbolRef Sym, const coff_aux_section_definition *Def,
264     const DenseMap<StringRef, uint32_t> &PrevailingSectionMap) {
265   StringRef Name;
266   COFFObj->getSymbolName(Sym, Name);
267   if (Name.consume_front(".pdata$") || Name.consume_front(".xdata$")) {
268     // For MinGW, treat .[px]data$<func> as implicitly associative to
269     // the symbol <func>.
270     auto ParentSym = PrevailingSectionMap.find(Name);
271     if (ParentSym != PrevailingSectionMap.end())
272       readAssociativeDefinition(Sym, Def, ParentSym->second);
273   }
274 }
275 
276 Symbol *ObjFile::createRegular(COFFSymbolRef Sym) {
277   SectionChunk *SC = SparseChunks[Sym.getSectionNumber()];
278   if (Sym.isExternal()) {
279     StringRef Name;
280     COFFObj->getSymbolName(Sym, Name);
281     if (SC)
282       return Symtab->addRegular(this, Name, Sym.getGeneric(), SC);
283     // For MinGW symbols named .weak.* that point to a discarded section,
284     // don't create an Undefined symbol. If nothing ever refers to the symbol,
285     // everything should be fine. If something actually refers to the symbol
286     // (e.g. the undefined weak alias), linking will fail due to undefined
287     // references at the end.
288     if (Config->MinGW && Name.startswith(".weak."))
289       return nullptr;
290     return Symtab->addUndefined(Name, this, false);
291   }
292   if (SC)
293     return make<DefinedRegular>(this, /*Name*/ "", false,
294                                 /*IsExternal*/ false, Sym.getGeneric(), SC);
295   return nullptr;
296 }
297 
298 void ObjFile::initializeSymbols() {
299   uint32_t NumSymbols = COFFObj->getNumberOfSymbols();
300   Symbols.resize(NumSymbols);
301 
302   SmallVector<std::pair<Symbol *, uint32_t>, 8> WeakAliases;
303   std::vector<uint32_t> PendingIndexes;
304   PendingIndexes.reserve(NumSymbols);
305 
306   DenseMap<StringRef, uint32_t> PrevailingSectionMap;
307   std::vector<const coff_aux_section_definition *> ComdatDefs(
308       COFFObj->getNumberOfSections() + 1);
309 
310   for (uint32_t I = 0; I < NumSymbols; ++I) {
311     COFFSymbolRef COFFSym = check(COFFObj->getSymbol(I));
312     bool PrevailingComdat;
313     if (COFFSym.isUndefined()) {
314       Symbols[I] = createUndefined(COFFSym);
315     } else if (COFFSym.isWeakExternal()) {
316       Symbols[I] = createUndefined(COFFSym);
317       uint32_t TagIndex = COFFSym.getAux<coff_aux_weak_external>()->TagIndex;
318       WeakAliases.emplace_back(Symbols[I], TagIndex);
319     } else if (Optional<Symbol *> OptSym =
320                    createDefined(COFFSym, ComdatDefs, PrevailingComdat)) {
321       Symbols[I] = *OptSym;
322       if (Config->MinGW && PrevailingComdat)
323         recordPrevailingSymbolForMingw(COFFSym, PrevailingSectionMap);
324     } else {
325       // createDefined() returns None if a symbol belongs to a section that
326       // was pending at the point when the symbol was read. This can happen in
327       // two cases:
328       // 1) section definition symbol for a comdat leader;
329       // 2) symbol belongs to a comdat section associated with a section whose
330       //    section definition symbol appears later in the symbol table.
331       // In both of these cases, we can expect the section to be resolved by
332       // the time we finish visiting the remaining symbols in the symbol
333       // table. So we postpone the handling of this symbol until that time.
334       PendingIndexes.push_back(I);
335     }
336     I += COFFSym.getNumberOfAuxSymbols();
337   }
338 
339   for (uint32_t I : PendingIndexes) {
340     COFFSymbolRef Sym = check(COFFObj->getSymbol(I));
341     if (auto *Def = Sym.getSectionDefinition()) {
342       if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
343         readAssociativeDefinition(Sym, Def);
344       else if (Config->MinGW)
345         maybeAssociateSEHForMingw(Sym, Def, PrevailingSectionMap);
346     }
347     if (SparseChunks[Sym.getSectionNumber()] == PendingComdat) {
348       StringRef Name;
349       COFFObj->getSymbolName(Sym, Name);
350       log("comdat section " + Name +
351           " without leader and unassociated, discarding");
352       continue;
353     }
354     Symbols[I] = createRegular(Sym);
355   }
356 
357   for (auto &KV : WeakAliases) {
358     Symbol *Sym = KV.first;
359     uint32_t Idx = KV.second;
360     checkAndSetWeakAlias(Symtab, this, Sym, Symbols[Idx]);
361   }
362 }
363 
364 Symbol *ObjFile::createUndefined(COFFSymbolRef Sym) {
365   StringRef Name;
366   COFFObj->getSymbolName(Sym, Name);
367   return Symtab->addUndefined(Name, this, Sym.isWeakExternal());
368 }
369 
370 Optional<Symbol *> ObjFile::createDefined(
371     COFFSymbolRef Sym,
372     std::vector<const coff_aux_section_definition *> &ComdatDefs,
373     bool &Prevailing) {
374   Prevailing = false;
375   auto GetName = [&]() {
376     StringRef S;
377     COFFObj->getSymbolName(Sym, S);
378     return S;
379   };
380 
381   if (Sym.isCommon()) {
382     auto *C = make<CommonChunk>(Sym);
383     Chunks.push_back(C);
384     return Symtab->addCommon(this, GetName(), Sym.getValue(), Sym.getGeneric(),
385                              C);
386   }
387 
388   if (Sym.isAbsolute()) {
389     StringRef Name = GetName();
390 
391     // Skip special symbols.
392     if (Name == "@comp.id")
393       return nullptr;
394     if (Name == "@feat.00") {
395       Feat00Flags = Sym.getValue();
396       return nullptr;
397     }
398 
399     if (Sym.isExternal())
400       return Symtab->addAbsolute(Name, Sym);
401     return make<DefinedAbsolute>(Name, Sym);
402   }
403 
404   int32_t SectionNumber = Sym.getSectionNumber();
405   if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
406     return nullptr;
407 
408   if (llvm::COFF::isReservedSectionNumber(SectionNumber))
409     fatal(toString(this) + ": " + GetName() +
410           " should not refer to special section " + Twine(SectionNumber));
411 
412   if ((uint32_t)SectionNumber >= SparseChunks.size())
413     fatal(toString(this) + ": " + GetName() +
414           " should not refer to non-existent section " + Twine(SectionNumber));
415 
416   // Handle comdat leader symbols.
417   if (const coff_aux_section_definition *Def = ComdatDefs[SectionNumber]) {
418     ComdatDefs[SectionNumber] = nullptr;
419     Symbol *Leader;
420     if (Sym.isExternal()) {
421       std::tie(Leader, Prevailing) =
422           Symtab->addComdat(this, GetName(), Sym.getGeneric());
423     } else {
424       Leader = make<DefinedRegular>(this, /*Name*/ "", false,
425                                     /*IsExternal*/ false, Sym.getGeneric());
426       Prevailing = true;
427     }
428 
429     if (Prevailing) {
430       SectionChunk *C = readSection(SectionNumber, Def, GetName());
431       SparseChunks[SectionNumber] = C;
432       C->Sym = cast<DefinedRegular>(Leader);
433       cast<DefinedRegular>(Leader)->Data = &C->Repl;
434     } else {
435       SparseChunks[SectionNumber] = nullptr;
436     }
437     return Leader;
438   }
439 
440   // Read associative section definitions and prepare to handle the comdat
441   // leader symbol by setting the section's ComdatDefs pointer if we encounter a
442   // non-associative comdat.
443   if (SparseChunks[SectionNumber] == PendingComdat) {
444     if (auto *Def = Sym.getSectionDefinition()) {
445       if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
446         readAssociativeDefinition(Sym, Def);
447       else
448         ComdatDefs[SectionNumber] = Def;
449     }
450   }
451 
452   if (SparseChunks[SectionNumber] == PendingComdat)
453     return None;
454   return createRegular(Sym);
455 }
456 
457 MachineTypes ObjFile::getMachineType() {
458   if (COFFObj)
459     return static_cast<MachineTypes>(COFFObj->getMachine());
460   return IMAGE_FILE_MACHINE_UNKNOWN;
461 }
462 
463 StringRef ltrim1(StringRef S, const char *Chars) {
464   if (!S.empty() && strchr(Chars, S[0]))
465     return S.substr(1);
466   return S;
467 }
468 
469 void ImportFile::parse() {
470   const char *Buf = MB.getBufferStart();
471   const char *End = MB.getBufferEnd();
472   const auto *Hdr = reinterpret_cast<const coff_import_header *>(Buf);
473 
474   // Check if the total size is valid.
475   if ((size_t)(End - Buf) != (sizeof(*Hdr) + Hdr->SizeOfData))
476     fatal("broken import library");
477 
478   // Read names and create an __imp_ symbol.
479   StringRef Name = Saver.save(StringRef(Buf + sizeof(*Hdr)));
480   StringRef ImpName = Saver.save("__imp_" + Name);
481   const char *NameStart = Buf + sizeof(coff_import_header) + Name.size() + 1;
482   DLLName = StringRef(NameStart);
483   StringRef ExtName;
484   switch (Hdr->getNameType()) {
485   case IMPORT_ORDINAL:
486     ExtName = "";
487     break;
488   case IMPORT_NAME:
489     ExtName = Name;
490     break;
491   case IMPORT_NAME_NOPREFIX:
492     ExtName = ltrim1(Name, "?@_");
493     break;
494   case IMPORT_NAME_UNDECORATE:
495     ExtName = ltrim1(Name, "?@_");
496     ExtName = ExtName.substr(0, ExtName.find('@'));
497     break;
498   }
499 
500   this->Hdr = Hdr;
501   ExternalName = ExtName;
502 
503   ImpSym = Symtab->addImportData(ImpName, this);
504   // If this was a duplicate, we logged an error but may continue;
505   // in this case, ImpSym is nullptr.
506   if (!ImpSym)
507     return;
508 
509   if (Hdr->getType() == llvm::COFF::IMPORT_CONST)
510     static_cast<void>(Symtab->addImportData(Name, this));
511 
512   // If type is function, we need to create a thunk which jump to an
513   // address pointed by the __imp_ symbol. (This allows you to call
514   // DLL functions just like regular non-DLL functions.)
515   if (Hdr->getType() == llvm::COFF::IMPORT_CODE)
516     ThunkSym = Symtab->addImportThunk(
517         Name, cast_or_null<DefinedImportData>(ImpSym), Hdr->Machine);
518 }
519 
520 void BitcodeFile::parse() {
521   Obj = check(lto::InputFile::create(MemoryBufferRef(
522       MB.getBuffer(), Saver.save(ParentName + MB.getBufferIdentifier()))));
523   std::vector<std::pair<Symbol *, bool>> Comdat(Obj->getComdatTable().size());
524   for (size_t I = 0; I != Obj->getComdatTable().size(); ++I)
525     Comdat[I] = Symtab->addComdat(this, Saver.save(Obj->getComdatTable()[I]));
526   for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) {
527     StringRef SymName = Saver.save(ObjSym.getName());
528     int ComdatIndex = ObjSym.getComdatIndex();
529     Symbol *Sym;
530     if (ObjSym.isUndefined()) {
531       Sym = Symtab->addUndefined(SymName, this, false);
532     } else if (ObjSym.isCommon()) {
533       Sym = Symtab->addCommon(this, SymName, ObjSym.getCommonSize());
534     } else if (ObjSym.isWeak() && ObjSym.isIndirect()) {
535       // Weak external.
536       Sym = Symtab->addUndefined(SymName, this, true);
537       std::string Fallback = ObjSym.getCOFFWeakExternalFallback();
538       Symbol *Alias = Symtab->addUndefined(Saver.save(Fallback));
539       checkAndSetWeakAlias(Symtab, this, Sym, Alias);
540     } else if (ComdatIndex != -1) {
541       if (SymName == Obj->getComdatTable()[ComdatIndex])
542         Sym = Comdat[ComdatIndex].first;
543       else if (Comdat[ComdatIndex].second)
544         Sym = Symtab->addRegular(this, SymName);
545       else
546         Sym = Symtab->addUndefined(SymName, this, false);
547     } else {
548       Sym = Symtab->addRegular(this, SymName);
549     }
550     Symbols.push_back(Sym);
551   }
552   Directives = Obj->getCOFFLinkerOpts();
553 }
554 
555 MachineTypes BitcodeFile::getMachineType() {
556   switch (Triple(Obj->getTargetTriple()).getArch()) {
557   case Triple::x86_64:
558     return AMD64;
559   case Triple::x86:
560     return I386;
561   case Triple::arm:
562     return ARMNT;
563   case Triple::aarch64:
564     return ARM64;
565   default:
566     return IMAGE_FILE_MACHINE_UNKNOWN;
567   }
568 }
569 } // namespace coff
570 } // namespace lld
571 
572 // Returns the last element of a path, which is supposed to be a filename.
573 static StringRef getBasename(StringRef Path) {
574   return sys::path::filename(Path, sys::path::Style::windows);
575 }
576 
577 // Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
578 std::string lld::toString(const coff::InputFile *File) {
579   if (!File)
580     return "<internal>";
581   if (File->ParentName.empty())
582     return File->getName();
583 
584   return (getBasename(File->ParentName) + "(" + getBasename(File->getName()) +
585           ")")
586       .str();
587 }
588