1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolTable.h"
10 #include "Config.h"
11 #include "Driver.h"
12 #include "LTO.h"
13 #include "PDB.h"
14 #include "Symbols.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "lld/Common/Timer.h"
18 #include "llvm/IR/LLVMContext.h"
19 #include "llvm/Object/WindowsMachineFlag.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include <utility>
23 
24 using namespace llvm;
25 
26 namespace lld {
27 namespace coff {
28 
29 static Timer LTOTimer("LTO", Timer::root());
30 
31 SymbolTable *Symtab;
32 
33 void SymbolTable::addFile(InputFile *File) {
34   log("Reading " + toString(File));
35   File->parse();
36 
37   MachineTypes MT = File->getMachineType();
38   if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) {
39     Config->Machine = MT;
40   } else if (MT != IMAGE_FILE_MACHINE_UNKNOWN && Config->Machine != MT) {
41     error(toString(File) + ": machine type " + machineToStr(MT) +
42           " conflicts with " + machineToStr(Config->Machine));
43     return;
44   }
45 
46   if (auto *F = dyn_cast<ObjFile>(File)) {
47     ObjFile::Instances.push_back(F);
48   } else if (auto *F = dyn_cast<BitcodeFile>(File)) {
49     BitcodeFile::Instances.push_back(F);
50   } else if (auto *F = dyn_cast<ImportFile>(File)) {
51     ImportFile::Instances.push_back(F);
52   }
53 
54   Driver->parseDirectives(File);
55 }
56 
57 static void errorOrWarn(const Twine &S) {
58   if (Config->ForceUnresolved)
59     warn(S);
60   else
61     error(S);
62 }
63 
64 // Returns the symbol in SC whose value is <= Addr that is closest to Addr.
65 // This is generally the global variable or function whose definition contains
66 // Addr.
67 static Symbol *getSymbol(SectionChunk *SC, uint32_t Addr) {
68   DefinedRegular *Candidate = nullptr;
69 
70   for (Symbol *S : SC->File->getSymbols()) {
71     auto *D = dyn_cast_or_null<DefinedRegular>(S);
72     if (!D || D->getChunk() != SC || D->getValue() > Addr ||
73         (Candidate && D->getValue() < Candidate->getValue()))
74       continue;
75 
76     Candidate = D;
77   }
78 
79   return Candidate;
80 }
81 
82 // Given a file and the index of a symbol in that file, returns a description
83 // of all references to that symbol from that file. If no debug information is
84 // available, returns just the name of the file, else one string per actual
85 // reference as described in the debug info.
86 std::vector<std::string> getSymbolLocations(ObjFile *File, uint32_t SymIndex) {
87   struct Location {
88     Symbol *Sym;
89     std::pair<StringRef, uint32_t> FileLine;
90   };
91   std::vector<Location> Locations;
92 
93   for (Chunk *C : File->getChunks()) {
94     auto *SC = dyn_cast<SectionChunk>(C);
95     if (!SC)
96       continue;
97     for (const coff_relocation &R : SC->getRelocs()) {
98       if (R.SymbolTableIndex != SymIndex)
99         continue;
100       std::pair<StringRef, uint32_t> FileLine =
101           getFileLine(SC, R.VirtualAddress);
102       Symbol *Sym = getSymbol(SC, R.VirtualAddress);
103       if (!FileLine.first.empty() || Sym)
104         Locations.push_back({Sym, FileLine});
105     }
106   }
107 
108   if (Locations.empty())
109     return std::vector<std::string>({"\n>>> referenced by " + toString(File)});
110 
111   std::vector<std::string> SymbolLocations(Locations.size());
112   size_t I = 0;
113   for (Location Loc : Locations) {
114     llvm::raw_string_ostream OS(SymbolLocations[I++]);
115     OS << "\n>>> referenced by ";
116     if (!Loc.FileLine.first.empty())
117       OS << Loc.FileLine.first << ":" << Loc.FileLine.second
118          << "\n>>>               ";
119     OS << toString(File);
120     if (Loc.Sym)
121       OS << ":(" << toString(*Loc.Sym) << ')';
122   }
123   return SymbolLocations;
124 }
125 
126 // For an undefined symbol, stores all files referencing it and the index of
127 // the undefined symbol in each file.
128 struct UndefinedDiag {
129   Symbol *Sym;
130   struct File {
131     ObjFile *OFile;
132     uint64_t SymIndex;
133   };
134   std::vector<File> Files;
135 };
136 
137 static void reportUndefinedSymbol(const UndefinedDiag &UndefDiag) {
138   std::string Out;
139   llvm::raw_string_ostream OS(Out);
140   OS << "undefined symbol: " << toString(*UndefDiag.Sym);
141 
142   const size_t MaxUndefReferences = 10;
143   size_t I = 0, NumRefs = 0;
144   for (const UndefinedDiag::File &Ref : UndefDiag.Files) {
145     std::vector<std::string> SymbolLocations =
146         getSymbolLocations(Ref.OFile, Ref.SymIndex);
147     NumRefs += SymbolLocations.size();
148     for (const std::string &S : SymbolLocations) {
149       if (I >= MaxUndefReferences)
150         break;
151       OS << S;
152       I++;
153     }
154   }
155   if (I < NumRefs)
156     OS << "\n>>> referenced " << NumRefs - I << " more times";
157   errorOrWarn(OS.str());
158 }
159 
160 void SymbolTable::loadMinGWAutomaticImports() {
161   for (auto &I : SymMap) {
162     Symbol *Sym = I.second;
163     auto *Undef = dyn_cast<Undefined>(Sym);
164     if (!Undef)
165       continue;
166     if (!Sym->IsUsedInRegularObj)
167       continue;
168 
169     StringRef Name = Undef->getName();
170 
171     if (Name.startswith("__imp_"))
172       continue;
173     // If we have an undefined symbol, but we have a Lazy representing a
174     // symbol we could load from file, make sure to load that.
175     Lazy *L = dyn_cast_or_null<Lazy>(find(("__imp_" + Name).str()));
176     if (!L || L->PendingArchiveLoad)
177       continue;
178 
179     log("Loading lazy " + L->getName() + " from " + L->File->getName() +
180         " for automatic import");
181     L->PendingArchiveLoad = true;
182     L->File->addMember(&L->Sym);
183   }
184 }
185 
186 bool SymbolTable::handleMinGWAutomaticImport(Symbol *Sym, StringRef Name) {
187   if (Name.startswith("__imp_"))
188     return false;
189   Defined *Imp = dyn_cast_or_null<Defined>(find(("__imp_" + Name).str()));
190   if (!Imp)
191     return false;
192 
193   // Replace the reference directly to a variable with a reference
194   // to the import address table instead. This obviously isn't right,
195   // but we mark the symbol as IsRuntimePseudoReloc, and a later pass
196   // will add runtime pseudo relocations for every relocation against
197   // this Symbol. The runtime pseudo relocation framework expects the
198   // reference itself to point at the IAT entry.
199   size_t ImpSize = 0;
200   if (isa<DefinedImportData>(Imp)) {
201     log("Automatically importing " + Name + " from " +
202         cast<DefinedImportData>(Imp)->getDLLName());
203     ImpSize = sizeof(DefinedImportData);
204   } else if (isa<DefinedRegular>(Imp)) {
205     log("Automatically importing " + Name + " from " +
206         toString(cast<DefinedRegular>(Imp)->File));
207     ImpSize = sizeof(DefinedRegular);
208   } else {
209     warn("unable to automatically import " + Name + " from " + Imp->getName() +
210          " from " + toString(cast<DefinedRegular>(Imp)->File) +
211          "; unexpected symbol type");
212     return false;
213   }
214   Sym->replaceKeepingName(Imp, ImpSize);
215   Sym->IsRuntimePseudoReloc = true;
216 
217   // There may exist symbols named .refptr.<name> which only consist
218   // of a single pointer to <name>. If it turns out <name> is
219   // automatically imported, we don't need to keep the .refptr.<name>
220   // pointer at all, but redirect all accesses to it to the IAT entry
221   // for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
222   DefinedRegular *Refptr =
223       dyn_cast_or_null<DefinedRegular>(find((".refptr." + Name).str()));
224   if (Refptr && Refptr->getChunk()->getSize() == Config->Wordsize) {
225     SectionChunk *SC = dyn_cast_or_null<SectionChunk>(Refptr->getChunk());
226     if (SC && SC->getRelocs().size() == 1 && *SC->symbols().begin() == Sym) {
227       log("Replacing .refptr." + Name + " with " + Imp->getName());
228       Refptr->getChunk()->Live = false;
229       Refptr->replaceKeepingName(Imp, ImpSize);
230     }
231   }
232   return true;
233 }
234 
235 void SymbolTable::reportRemainingUndefines() {
236   SmallPtrSet<Symbol *, 8> Undefs;
237   DenseMap<Symbol *, Symbol *> LocalImports;
238 
239   for (auto &I : SymMap) {
240     Symbol *Sym = I.second;
241     auto *Undef = dyn_cast<Undefined>(Sym);
242     if (!Undef)
243       continue;
244     if (!Sym->IsUsedInRegularObj)
245       continue;
246 
247     StringRef Name = Undef->getName();
248 
249     // A weak alias may have been resolved, so check for that.
250     if (Defined *D = Undef->getWeakAlias()) {
251       // We want to replace Sym with D. However, we can't just blindly
252       // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an
253       // internal symbol, and internal symbols are stored as "unparented"
254       // Symbols. For that reason we need to check which type of symbol we
255       // are dealing with and copy the correct number of bytes.
256       if (isa<DefinedRegular>(D))
257         memcpy(Sym, D, sizeof(DefinedRegular));
258       else if (isa<DefinedAbsolute>(D))
259         memcpy(Sym, D, sizeof(DefinedAbsolute));
260       else
261         memcpy(Sym, D, sizeof(SymbolUnion));
262       continue;
263     }
264 
265     // If we can resolve a symbol by removing __imp_ prefix, do that.
266     // This odd rule is for compatibility with MSVC linker.
267     if (Name.startswith("__imp_")) {
268       Symbol *Imp = find(Name.substr(strlen("__imp_")));
269       if (Imp && isa<Defined>(Imp)) {
270         auto *D = cast<Defined>(Imp);
271         replaceSymbol<DefinedLocalImport>(Sym, Name, D);
272         LocalImportChunks.push_back(cast<DefinedLocalImport>(Sym)->getChunk());
273         LocalImports[Sym] = D;
274         continue;
275       }
276     }
277 
278     // We don't want to report missing Microsoft precompiled headers symbols.
279     // A proper message will be emitted instead in PDBLinker::aquirePrecompObj
280     if (Name.contains("_PchSym_"))
281       continue;
282 
283     if (Config->MinGW && handleMinGWAutomaticImport(Sym, Name))
284       continue;
285 
286     // Remaining undefined symbols are not fatal if /force is specified.
287     // They are replaced with dummy defined symbols.
288     if (Config->ForceUnresolved)
289       replaceSymbol<DefinedAbsolute>(Sym, Name, 0);
290     Undefs.insert(Sym);
291   }
292 
293   if (Undefs.empty() && LocalImports.empty())
294     return;
295 
296   for (Symbol *B : Config->GCRoot) {
297     if (Undefs.count(B))
298       errorOrWarn("<root>: undefined symbol: " + toString(*B));
299     if (Config->WarnLocallyDefinedImported)
300       if (Symbol *Imp = LocalImports.lookup(B))
301         warn("<root>: locally defined symbol imported: " + toString(*Imp) +
302              " (defined in " + toString(Imp->getFile()) + ") [LNK4217]");
303   }
304 
305   std::vector<UndefinedDiag> UndefDiags;
306   DenseMap<Symbol *, int> FirstDiag;
307 
308   for (ObjFile *File : ObjFile::Instances) {
309     size_t SymIndex = (size_t)-1;
310     for (Symbol *Sym : File->getSymbols()) {
311       ++SymIndex;
312       if (!Sym)
313         continue;
314       if (Undefs.count(Sym)) {
315         auto it = FirstDiag.find(Sym);
316         if (it == FirstDiag.end()) {
317           FirstDiag[Sym] = UndefDiags.size();
318           UndefDiags.push_back({Sym, {{File, SymIndex}}});
319         } else {
320           UndefDiags[it->second].Files.push_back({File, SymIndex});
321         }
322       }
323       if (Config->WarnLocallyDefinedImported)
324         if (Symbol *Imp = LocalImports.lookup(Sym))
325           warn(toString(File) +
326                ": locally defined symbol imported: " + toString(*Imp) +
327                " (defined in " + toString(Imp->getFile()) + ") [LNK4217]");
328     }
329   }
330 
331   for (const UndefinedDiag& UndefDiag : UndefDiags)
332     reportUndefinedSymbol(UndefDiag);
333 }
334 
335 std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name) {
336   bool Inserted = false;
337   Symbol *&Sym = SymMap[CachedHashStringRef(Name)];
338   if (!Sym) {
339     Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
340     Sym->IsUsedInRegularObj = false;
341     Sym->PendingArchiveLoad = false;
342     Inserted = true;
343   }
344   return {Sym, Inserted};
345 }
346 
347 std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name, InputFile *File) {
348   std::pair<Symbol *, bool> Result = insert(Name);
349   if (!File || !isa<BitcodeFile>(File))
350     Result.first->IsUsedInRegularObj = true;
351   return Result;
352 }
353 
354 Symbol *SymbolTable::addUndefined(StringRef Name, InputFile *F,
355                                   bool IsWeakAlias) {
356   Symbol *S;
357   bool WasInserted;
358   std::tie(S, WasInserted) = insert(Name, F);
359   if (WasInserted || (isa<Lazy>(S) && IsWeakAlias)) {
360     replaceSymbol<Undefined>(S, Name);
361     return S;
362   }
363   if (auto *L = dyn_cast<Lazy>(S)) {
364     if (!S->PendingArchiveLoad) {
365       S->PendingArchiveLoad = true;
366       L->File->addMember(&L->Sym);
367     }
368   }
369   return S;
370 }
371 
372 void SymbolTable::addLazy(ArchiveFile *F, const Archive::Symbol Sym) {
373   StringRef Name = Sym.getName();
374   Symbol *S;
375   bool WasInserted;
376   std::tie(S, WasInserted) = insert(Name);
377   if (WasInserted) {
378     replaceSymbol<Lazy>(S, F, Sym);
379     return;
380   }
381   auto *U = dyn_cast<Undefined>(S);
382   if (!U || U->WeakAlias || S->PendingArchiveLoad)
383     return;
384   S->PendingArchiveLoad = true;
385   F->addMember(&Sym);
386 }
387 
388 void SymbolTable::reportDuplicate(Symbol *Existing, InputFile *NewFile) {
389   std::string Msg = "duplicate symbol: " + toString(*Existing) + " in " +
390                     toString(Existing->getFile()) + " and in " +
391                     toString(NewFile);
392 
393   if (Config->ForceMultiple)
394     warn(Msg);
395   else
396     error(Msg);
397 }
398 
399 Symbol *SymbolTable::addAbsolute(StringRef N, COFFSymbolRef Sym) {
400   Symbol *S;
401   bool WasInserted;
402   std::tie(S, WasInserted) = insert(N, nullptr);
403   S->IsUsedInRegularObj = true;
404   if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S))
405     replaceSymbol<DefinedAbsolute>(S, N, Sym);
406   else if (!isa<DefinedCOFF>(S))
407     reportDuplicate(S, nullptr);
408   return S;
409 }
410 
411 Symbol *SymbolTable::addAbsolute(StringRef N, uint64_t VA) {
412   Symbol *S;
413   bool WasInserted;
414   std::tie(S, WasInserted) = insert(N, nullptr);
415   S->IsUsedInRegularObj = true;
416   if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S))
417     replaceSymbol<DefinedAbsolute>(S, N, VA);
418   else if (!isa<DefinedCOFF>(S))
419     reportDuplicate(S, nullptr);
420   return S;
421 }
422 
423 Symbol *SymbolTable::addSynthetic(StringRef N, Chunk *C) {
424   Symbol *S;
425   bool WasInserted;
426   std::tie(S, WasInserted) = insert(N, nullptr);
427   S->IsUsedInRegularObj = true;
428   if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S))
429     replaceSymbol<DefinedSynthetic>(S, N, C);
430   else if (!isa<DefinedCOFF>(S))
431     reportDuplicate(S, nullptr);
432   return S;
433 }
434 
435 Symbol *SymbolTable::addRegular(InputFile *F, StringRef N,
436                                 const coff_symbol_generic *Sym,
437                                 SectionChunk *C) {
438   Symbol *S;
439   bool WasInserted;
440   std::tie(S, WasInserted) = insert(N, F);
441   if (WasInserted || !isa<DefinedRegular>(S))
442     replaceSymbol<DefinedRegular>(S, F, N, /*IsCOMDAT*/ false,
443                                   /*IsExternal*/ true, Sym, C);
444   else
445     reportDuplicate(S, F);
446   return S;
447 }
448 
449 std::pair<DefinedRegular *, bool>
450 SymbolTable::addComdat(InputFile *F, StringRef N,
451                        const coff_symbol_generic *Sym) {
452   Symbol *S;
453   bool WasInserted;
454   std::tie(S, WasInserted) = insert(N, F);
455   if (WasInserted || !isa<DefinedRegular>(S)) {
456     replaceSymbol<DefinedRegular>(S, F, N, /*IsCOMDAT*/ true,
457                                   /*IsExternal*/ true, Sym, nullptr);
458     return {cast<DefinedRegular>(S), true};
459   }
460   auto *ExistingSymbol = cast<DefinedRegular>(S);
461   if (!ExistingSymbol->isCOMDAT())
462     reportDuplicate(S, F);
463   return {ExistingSymbol, false};
464 }
465 
466 Symbol *SymbolTable::addCommon(InputFile *F, StringRef N, uint64_t Size,
467                                const coff_symbol_generic *Sym, CommonChunk *C) {
468   Symbol *S;
469   bool WasInserted;
470   std::tie(S, WasInserted) = insert(N, F);
471   if (WasInserted || !isa<DefinedCOFF>(S))
472     replaceSymbol<DefinedCommon>(S, F, N, Size, Sym, C);
473   else if (auto *DC = dyn_cast<DefinedCommon>(S))
474     if (Size > DC->getSize())
475       replaceSymbol<DefinedCommon>(S, F, N, Size, Sym, C);
476   return S;
477 }
478 
479 Symbol *SymbolTable::addImportData(StringRef N, ImportFile *F) {
480   Symbol *S;
481   bool WasInserted;
482   std::tie(S, WasInserted) = insert(N, nullptr);
483   S->IsUsedInRegularObj = true;
484   if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) {
485     replaceSymbol<DefinedImportData>(S, N, F);
486     return S;
487   }
488 
489   reportDuplicate(S, F);
490   return nullptr;
491 }
492 
493 Symbol *SymbolTable::addImportThunk(StringRef Name, DefinedImportData *ID,
494                                     uint16_t Machine) {
495   Symbol *S;
496   bool WasInserted;
497   std::tie(S, WasInserted) = insert(Name, nullptr);
498   S->IsUsedInRegularObj = true;
499   if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) {
500     replaceSymbol<DefinedImportThunk>(S, Name, ID, Machine);
501     return S;
502   }
503 
504   reportDuplicate(S, ID->File);
505   return nullptr;
506 }
507 
508 std::vector<Chunk *> SymbolTable::getChunks() {
509   std::vector<Chunk *> Res;
510   for (ObjFile *File : ObjFile::Instances) {
511     ArrayRef<Chunk *> V = File->getChunks();
512     Res.insert(Res.end(), V.begin(), V.end());
513   }
514   return Res;
515 }
516 
517 Symbol *SymbolTable::find(StringRef Name) {
518   return SymMap.lookup(CachedHashStringRef(Name));
519 }
520 
521 Symbol *SymbolTable::findUnderscore(StringRef Name) {
522   if (Config->Machine == I386)
523     return find(("_" + Name).str());
524   return find(Name);
525 }
526 
527 // Return all symbols that start with Prefix, possibly ignoring the first
528 // character of Prefix or the first character symbol.
529 std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef Prefix) {
530   std::vector<Symbol *> Syms;
531   for (auto Pair : SymMap) {
532     StringRef Name = Pair.first.val();
533     if (Name.startswith(Prefix) || Name.startswith(Prefix.drop_front()) ||
534         Name.drop_front().startswith(Prefix) ||
535         Name.drop_front().startswith(Prefix.drop_front())) {
536       Syms.push_back(Pair.second);
537     }
538   }
539   return Syms;
540 }
541 
542 Symbol *SymbolTable::findMangle(StringRef Name) {
543   if (Symbol *Sym = find(Name))
544     if (!isa<Undefined>(Sym))
545       return Sym;
546 
547   // Efficient fuzzy string lookup is impossible with a hash table, so iterate
548   // the symbol table once and collect all possibly matching symbols into this
549   // vector. Then compare each possibly matching symbol with each possible
550   // mangling.
551   std::vector<Symbol *> Syms = getSymsWithPrefix(Name);
552   auto FindByPrefix = [&Syms](const Twine &T) -> Symbol * {
553     std::string Prefix = T.str();
554     for (auto *S : Syms)
555       if (S->getName().startswith(Prefix))
556         return S;
557     return nullptr;
558   };
559 
560   // For non-x86, just look for C++ functions.
561   if (Config->Machine != I386)
562     return FindByPrefix("?" + Name + "@@Y");
563 
564   if (!Name.startswith("_"))
565     return nullptr;
566   // Search for x86 stdcall function.
567   if (Symbol *S = FindByPrefix(Name + "@"))
568     return S;
569   // Search for x86 fastcall function.
570   if (Symbol *S = FindByPrefix("@" + Name.substr(1) + "@"))
571     return S;
572   // Search for x86 vectorcall function.
573   if (Symbol *S = FindByPrefix(Name.substr(1) + "@@"))
574     return S;
575   // Search for x86 C++ non-member function.
576   return FindByPrefix("?" + Name.substr(1) + "@@Y");
577 }
578 
579 Symbol *SymbolTable::addUndefined(StringRef Name) {
580   return addUndefined(Name, nullptr, false);
581 }
582 
583 std::vector<StringRef> SymbolTable::compileBitcodeFiles() {
584   LTO.reset(new BitcodeCompiler);
585   for (BitcodeFile *F : BitcodeFile::Instances)
586     LTO->add(*F);
587   return LTO->compile();
588 }
589 
590 void SymbolTable::addCombinedLTOObjects() {
591   if (BitcodeFile::Instances.empty())
592     return;
593 
594   ScopedTimer T(LTOTimer);
595   for (StringRef Object : compileBitcodeFiles()) {
596     auto *Obj = make<ObjFile>(MemoryBufferRef(Object, "lto.tmp"));
597     Obj->parse();
598     ObjFile::Instances.push_back(Obj);
599   }
600 }
601 
602 } // namespace coff
603 } // namespace lld
604