1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolTable.h"
10 #include "Config.h"
11 #include "Driver.h"
12 #include "LTO.h"
13 #include "PDB.h"
14 #include "Symbols.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "lld/Common/Timer.h"
18 #include "llvm/IR/LLVMContext.h"
19 #include "llvm/Object/WindowsMachineFlag.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include <utility>
23 
24 using namespace llvm;
25 
26 namespace lld {
27 namespace coff {
28 
29 static Timer LTOTimer("LTO", Timer::root());
30 
31 SymbolTable *Symtab;
32 
33 void SymbolTable::addFile(InputFile *File) {
34   log("Reading " + toString(File));
35   File->parse();
36 
37   MachineTypes MT = File->getMachineType();
38   if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) {
39     Config->Machine = MT;
40   } else if (MT != IMAGE_FILE_MACHINE_UNKNOWN && Config->Machine != MT) {
41     error(toString(File) + ": machine type " + machineToStr(MT) +
42           " conflicts with " + machineToStr(Config->Machine));
43     return;
44   }
45 
46   if (auto *F = dyn_cast<ObjFile>(File)) {
47     ObjFile::Instances.push_back(F);
48   } else if (auto *F = dyn_cast<BitcodeFile>(File)) {
49     BitcodeFile::Instances.push_back(F);
50   } else if (auto *F = dyn_cast<ImportFile>(File)) {
51     ImportFile::Instances.push_back(F);
52   }
53 
54   Driver->parseDirectives(File);
55 }
56 
57 static void errorOrWarn(const Twine &S) {
58   if (Config->ForceUnresolved)
59     warn(S);
60   else
61     error(S);
62 }
63 
64 // Returns the symbol in SC whose value is <= Addr that is closest to Addr.
65 // This is generally the global variable or function whose definition contains
66 // Addr.
67 static Symbol *getSymbol(SectionChunk *SC, uint32_t Addr) {
68   DefinedRegular *Candidate = nullptr;
69 
70   for (Symbol *S : SC->File->getSymbols()) {
71     auto *D = dyn_cast_or_null<DefinedRegular>(S);
72     if (!D || D->getChunk() != SC || D->getValue() > Addr ||
73         (Candidate && D->getValue() < Candidate->getValue()))
74       continue;
75 
76     Candidate = D;
77   }
78 
79   return Candidate;
80 }
81 
82 std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex) {
83   struct Location {
84     Symbol *Sym;
85     std::pair<StringRef, uint32_t> FileLine;
86   };
87   std::vector<Location> Locations;
88 
89   for (Chunk *C : File->getChunks()) {
90     auto *SC = dyn_cast<SectionChunk>(C);
91     if (!SC)
92       continue;
93     for (const coff_relocation &R : SC->getRelocs()) {
94       if (R.SymbolTableIndex != SymIndex)
95         continue;
96       std::pair<StringRef, uint32_t> FileLine =
97           getFileLine(SC, R.VirtualAddress);
98       Symbol *Sym = getSymbol(SC, R.VirtualAddress);
99       if (!FileLine.first.empty() || Sym)
100         Locations.push_back({Sym, FileLine});
101     }
102   }
103 
104   if (Locations.empty())
105     return "\n>>> referenced by " + toString(File);
106 
107   std::string Out;
108   llvm::raw_string_ostream OS(Out);
109   for (Location Loc : Locations) {
110     OS << "\n>>> referenced by ";
111     if (!Loc.FileLine.first.empty())
112       OS << Loc.FileLine.first << ":" << Loc.FileLine.second
113          << "\n>>>               ";
114     OS << toString(File);
115     if (Loc.Sym)
116       OS << ":(" << toString(*Loc.Sym) << ')';
117   }
118   return OS.str();
119 }
120 
121 void SymbolTable::loadMinGWAutomaticImports() {
122   for (auto &I : SymMap) {
123     Symbol *Sym = I.second;
124     auto *Undef = dyn_cast<Undefined>(Sym);
125     if (!Undef)
126       continue;
127     if (!Sym->IsUsedInRegularObj)
128       continue;
129 
130     StringRef Name = Undef->getName();
131 
132     if (Name.startswith("__imp_"))
133       continue;
134     // If we have an undefined symbol, but we have a Lazy representing a
135     // symbol we could load from file, make sure to load that.
136     Lazy *L = dyn_cast_or_null<Lazy>(find(("__imp_" + Name).str()));
137     if (!L || L->PendingArchiveLoad)
138       continue;
139 
140     log("Loading lazy " + L->getName() + " from " + L->File->getName() +
141         " for automatic import");
142     L->PendingArchiveLoad = true;
143     L->File->addMember(&L->Sym);
144   }
145 }
146 
147 bool SymbolTable::handleMinGWAutomaticImport(Symbol *Sym, StringRef Name) {
148   if (Name.startswith("__imp_"))
149     return false;
150   Defined *Imp = dyn_cast_or_null<Defined>(find(("__imp_" + Name).str()));
151   if (!Imp)
152     return false;
153 
154   // Replace the reference directly to a variable with a reference
155   // to the import address table instead. This obviously isn't right,
156   // but we mark the symbol as IsRuntimePseudoReloc, and a later pass
157   // will add runtime pseudo relocations for every relocation against
158   // this Symbol. The runtime pseudo relocation framework expects the
159   // reference itself to point at the IAT entry.
160   size_t ImpSize = 0;
161   if (isa<DefinedImportData>(Imp)) {
162     log("Automatically importing " + Name + " from " +
163         cast<DefinedImportData>(Imp)->getDLLName());
164     ImpSize = sizeof(DefinedImportData);
165   } else if (isa<DefinedRegular>(Imp)) {
166     log("Automatically importing " + Name + " from " +
167         toString(cast<DefinedRegular>(Imp)->File));
168     ImpSize = sizeof(DefinedRegular);
169   } else {
170     warn("unable to automatically import " + Name + " from " + Imp->getName() +
171          " from " + toString(cast<DefinedRegular>(Imp)->File) +
172          "; unexpected symbol type");
173     return false;
174   }
175   Sym->replaceKeepingName(Imp, ImpSize);
176   Sym->IsRuntimePseudoReloc = true;
177 
178   // There may exist symbols named .refptr.<name> which only consist
179   // of a single pointer to <name>. If it turns out <name> is
180   // automatically imported, we don't need to keep the .refptr.<name>
181   // pointer at all, but redirect all accesses to it to the IAT entry
182   // for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
183   DefinedRegular *Refptr =
184       dyn_cast_or_null<DefinedRegular>(find((".refptr." + Name).str()));
185   if (Refptr && Refptr->getChunk()->getSize() == Config->Wordsize) {
186     SectionChunk *SC = dyn_cast_or_null<SectionChunk>(Refptr->getChunk());
187     if (SC && SC->getRelocs().size() == 1 && *SC->symbols().begin() == Sym) {
188       log("Replacing .refptr." + Name + " with " + Imp->getName());
189       Refptr->getChunk()->Live = false;
190       Refptr->replaceKeepingName(Imp, ImpSize);
191     }
192   }
193   return true;
194 }
195 
196 void SymbolTable::reportRemainingUndefines() {
197   SmallPtrSet<Symbol *, 8> Undefs;
198   DenseMap<Symbol *, Symbol *> LocalImports;
199 
200   for (auto &I : SymMap) {
201     Symbol *Sym = I.second;
202     auto *Undef = dyn_cast<Undefined>(Sym);
203     if (!Undef)
204       continue;
205     if (!Sym->IsUsedInRegularObj)
206       continue;
207 
208     StringRef Name = Undef->getName();
209 
210     // A weak alias may have been resolved, so check for that.
211     if (Defined *D = Undef->getWeakAlias()) {
212       // We want to replace Sym with D. However, we can't just blindly
213       // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an
214       // internal symbol, and internal symbols are stored as "unparented"
215       // Symbols. For that reason we need to check which type of symbol we
216       // are dealing with and copy the correct number of bytes.
217       if (isa<DefinedRegular>(D))
218         memcpy(Sym, D, sizeof(DefinedRegular));
219       else if (isa<DefinedAbsolute>(D))
220         memcpy(Sym, D, sizeof(DefinedAbsolute));
221       else
222         memcpy(Sym, D, sizeof(SymbolUnion));
223       continue;
224     }
225 
226     // If we can resolve a symbol by removing __imp_ prefix, do that.
227     // This odd rule is for compatibility with MSVC linker.
228     if (Name.startswith("__imp_")) {
229       Symbol *Imp = find(Name.substr(strlen("__imp_")));
230       if (Imp && isa<Defined>(Imp)) {
231         auto *D = cast<Defined>(Imp);
232         replaceSymbol<DefinedLocalImport>(Sym, Name, D);
233         LocalImportChunks.push_back(cast<DefinedLocalImport>(Sym)->getChunk());
234         LocalImports[Sym] = D;
235         continue;
236       }
237     }
238 
239     // We don't want to report missing Microsoft precompiled headers symbols.
240     // A proper message will be emitted instead in PDBLinker::aquirePrecompObj
241     if (Name.contains("_PchSym_"))
242       continue;
243 
244     if (Config->MinGW && handleMinGWAutomaticImport(Sym, Name))
245       continue;
246 
247     // Remaining undefined symbols are not fatal if /force is specified.
248     // They are replaced with dummy defined symbols.
249     if (Config->ForceUnresolved)
250       replaceSymbol<DefinedAbsolute>(Sym, Name, 0);
251     Undefs.insert(Sym);
252   }
253 
254   if (Undefs.empty() && LocalImports.empty())
255     return;
256 
257   for (Symbol *B : Config->GCRoot) {
258     if (Undefs.count(B))
259       errorOrWarn("<root>: undefined symbol: " + toString(*B));
260     if (Config->WarnLocallyDefinedImported)
261       if (Symbol *Imp = LocalImports.lookup(B))
262         warn("<root>: locally defined symbol imported: " + toString(*Imp) +
263              " (defined in " + toString(Imp->getFile()) + ") [LNK4217]");
264   }
265 
266   for (ObjFile *File : ObjFile::Instances) {
267     size_t SymIndex = (size_t)-1;
268     for (Symbol *Sym : File->getSymbols()) {
269       ++SymIndex;
270       if (!Sym)
271         continue;
272       if (Undefs.count(Sym))
273         errorOrWarn("undefined symbol: " + toString(*Sym) +
274                     getSymbolLocations(File, SymIndex));
275       if (Config->WarnLocallyDefinedImported)
276         if (Symbol *Imp = LocalImports.lookup(Sym))
277           warn(toString(File) +
278                ": locally defined symbol imported: " + toString(*Imp) +
279                " (defined in " + toString(Imp->getFile()) + ") [LNK4217]");
280     }
281   }
282 }
283 
284 std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name) {
285   bool Inserted = false;
286   Symbol *&Sym = SymMap[CachedHashStringRef(Name)];
287   if (!Sym) {
288     Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
289     Sym->IsUsedInRegularObj = false;
290     Sym->PendingArchiveLoad = false;
291     Inserted = true;
292   }
293   return {Sym, Inserted};
294 }
295 
296 std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name, InputFile *File) {
297   std::pair<Symbol *, bool> Result = insert(Name);
298   if (!File || !isa<BitcodeFile>(File))
299     Result.first->IsUsedInRegularObj = true;
300   return Result;
301 }
302 
303 Symbol *SymbolTable::addUndefined(StringRef Name, InputFile *F,
304                                   bool IsWeakAlias) {
305   Symbol *S;
306   bool WasInserted;
307   std::tie(S, WasInserted) = insert(Name, F);
308   if (WasInserted || (isa<Lazy>(S) && IsWeakAlias)) {
309     replaceSymbol<Undefined>(S, Name);
310     return S;
311   }
312   if (auto *L = dyn_cast<Lazy>(S)) {
313     if (!S->PendingArchiveLoad) {
314       S->PendingArchiveLoad = true;
315       L->File->addMember(&L->Sym);
316     }
317   }
318   return S;
319 }
320 
321 void SymbolTable::addLazy(ArchiveFile *F, const Archive::Symbol Sym) {
322   StringRef Name = Sym.getName();
323   Symbol *S;
324   bool WasInserted;
325   std::tie(S, WasInserted) = insert(Name);
326   if (WasInserted) {
327     replaceSymbol<Lazy>(S, F, Sym);
328     return;
329   }
330   auto *U = dyn_cast<Undefined>(S);
331   if (!U || U->WeakAlias || S->PendingArchiveLoad)
332     return;
333   S->PendingArchiveLoad = true;
334   F->addMember(&Sym);
335 }
336 
337 void SymbolTable::reportDuplicate(Symbol *Existing, InputFile *NewFile) {
338   std::string Msg = "duplicate symbol: " + toString(*Existing) + " in " +
339                     toString(Existing->getFile()) + " and in " +
340                     toString(NewFile);
341 
342   if (Config->ForceMultiple)
343     warn(Msg);
344   else
345     error(Msg);
346 }
347 
348 Symbol *SymbolTable::addAbsolute(StringRef N, COFFSymbolRef Sym) {
349   Symbol *S;
350   bool WasInserted;
351   std::tie(S, WasInserted) = insert(N, nullptr);
352   S->IsUsedInRegularObj = true;
353   if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S))
354     replaceSymbol<DefinedAbsolute>(S, N, Sym);
355   else if (!isa<DefinedCOFF>(S))
356     reportDuplicate(S, nullptr);
357   return S;
358 }
359 
360 Symbol *SymbolTable::addAbsolute(StringRef N, uint64_t VA) {
361   Symbol *S;
362   bool WasInserted;
363   std::tie(S, WasInserted) = insert(N, nullptr);
364   S->IsUsedInRegularObj = true;
365   if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S))
366     replaceSymbol<DefinedAbsolute>(S, N, VA);
367   else if (!isa<DefinedCOFF>(S))
368     reportDuplicate(S, nullptr);
369   return S;
370 }
371 
372 Symbol *SymbolTable::addSynthetic(StringRef N, Chunk *C) {
373   Symbol *S;
374   bool WasInserted;
375   std::tie(S, WasInserted) = insert(N, nullptr);
376   S->IsUsedInRegularObj = true;
377   if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S))
378     replaceSymbol<DefinedSynthetic>(S, N, C);
379   else if (!isa<DefinedCOFF>(S))
380     reportDuplicate(S, nullptr);
381   return S;
382 }
383 
384 Symbol *SymbolTable::addRegular(InputFile *F, StringRef N,
385                                 const coff_symbol_generic *Sym,
386                                 SectionChunk *C) {
387   Symbol *S;
388   bool WasInserted;
389   std::tie(S, WasInserted) = insert(N, F);
390   if (WasInserted || !isa<DefinedRegular>(S))
391     replaceSymbol<DefinedRegular>(S, F, N, /*IsCOMDAT*/ false,
392                                   /*IsExternal*/ true, Sym, C);
393   else
394     reportDuplicate(S, F);
395   return S;
396 }
397 
398 std::pair<DefinedRegular *, bool>
399 SymbolTable::addComdat(InputFile *F, StringRef N,
400                        const coff_symbol_generic *Sym) {
401   Symbol *S;
402   bool WasInserted;
403   std::tie(S, WasInserted) = insert(N, F);
404   if (WasInserted || !isa<DefinedRegular>(S)) {
405     replaceSymbol<DefinedRegular>(S, F, N, /*IsCOMDAT*/ true,
406                                   /*IsExternal*/ true, Sym, nullptr);
407     return {cast<DefinedRegular>(S), true};
408   }
409   auto *ExistingSymbol = cast<DefinedRegular>(S);
410   if (!ExistingSymbol->isCOMDAT())
411     reportDuplicate(S, F);
412   return {ExistingSymbol, false};
413 }
414 
415 Symbol *SymbolTable::addCommon(InputFile *F, StringRef N, uint64_t Size,
416                                const coff_symbol_generic *Sym, CommonChunk *C) {
417   Symbol *S;
418   bool WasInserted;
419   std::tie(S, WasInserted) = insert(N, F);
420   if (WasInserted || !isa<DefinedCOFF>(S))
421     replaceSymbol<DefinedCommon>(S, F, N, Size, Sym, C);
422   else if (auto *DC = dyn_cast<DefinedCommon>(S))
423     if (Size > DC->getSize())
424       replaceSymbol<DefinedCommon>(S, F, N, Size, Sym, C);
425   return S;
426 }
427 
428 Symbol *SymbolTable::addImportData(StringRef N, ImportFile *F) {
429   Symbol *S;
430   bool WasInserted;
431   std::tie(S, WasInserted) = insert(N, nullptr);
432   S->IsUsedInRegularObj = true;
433   if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) {
434     replaceSymbol<DefinedImportData>(S, N, F);
435     return S;
436   }
437 
438   reportDuplicate(S, F);
439   return nullptr;
440 }
441 
442 Symbol *SymbolTable::addImportThunk(StringRef Name, DefinedImportData *ID,
443                                     uint16_t Machine) {
444   Symbol *S;
445   bool WasInserted;
446   std::tie(S, WasInserted) = insert(Name, nullptr);
447   S->IsUsedInRegularObj = true;
448   if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) {
449     replaceSymbol<DefinedImportThunk>(S, Name, ID, Machine);
450     return S;
451   }
452 
453   reportDuplicate(S, ID->File);
454   return nullptr;
455 }
456 
457 std::vector<Chunk *> SymbolTable::getChunks() {
458   std::vector<Chunk *> Res;
459   for (ObjFile *File : ObjFile::Instances) {
460     ArrayRef<Chunk *> V = File->getChunks();
461     Res.insert(Res.end(), V.begin(), V.end());
462   }
463   return Res;
464 }
465 
466 Symbol *SymbolTable::find(StringRef Name) {
467   return SymMap.lookup(CachedHashStringRef(Name));
468 }
469 
470 Symbol *SymbolTable::findUnderscore(StringRef Name) {
471   if (Config->Machine == I386)
472     return find(("_" + Name).str());
473   return find(Name);
474 }
475 
476 // Return all symbols that start with Prefix, possibly ignoring the first
477 // character of Prefix or the first character symbol.
478 std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef Prefix) {
479   std::vector<Symbol *> Syms;
480   for (auto Pair : SymMap) {
481     StringRef Name = Pair.first.val();
482     if (Name.startswith(Prefix) || Name.startswith(Prefix.drop_front()) ||
483         Name.drop_front().startswith(Prefix) ||
484         Name.drop_front().startswith(Prefix.drop_front())) {
485       Syms.push_back(Pair.second);
486     }
487   }
488   return Syms;
489 }
490 
491 Symbol *SymbolTable::findMangle(StringRef Name) {
492   if (Symbol *Sym = find(Name))
493     if (!isa<Undefined>(Sym))
494       return Sym;
495 
496   // Efficient fuzzy string lookup is impossible with a hash table, so iterate
497   // the symbol table once and collect all possibly matching symbols into this
498   // vector. Then compare each possibly matching symbol with each possible
499   // mangling.
500   std::vector<Symbol *> Syms = getSymsWithPrefix(Name);
501   auto FindByPrefix = [&Syms](const Twine &T) -> Symbol * {
502     std::string Prefix = T.str();
503     for (auto *S : Syms)
504       if (S->getName().startswith(Prefix))
505         return S;
506     return nullptr;
507   };
508 
509   // For non-x86, just look for C++ functions.
510   if (Config->Machine != I386)
511     return FindByPrefix("?" + Name + "@@Y");
512 
513   if (!Name.startswith("_"))
514     return nullptr;
515   // Search for x86 stdcall function.
516   if (Symbol *S = FindByPrefix(Name + "@"))
517     return S;
518   // Search for x86 fastcall function.
519   if (Symbol *S = FindByPrefix("@" + Name.substr(1) + "@"))
520     return S;
521   // Search for x86 vectorcall function.
522   if (Symbol *S = FindByPrefix(Name.substr(1) + "@@"))
523     return S;
524   // Search for x86 C++ non-member function.
525   return FindByPrefix("?" + Name.substr(1) + "@@Y");
526 }
527 
528 Symbol *SymbolTable::addUndefined(StringRef Name) {
529   return addUndefined(Name, nullptr, false);
530 }
531 
532 std::vector<StringRef> SymbolTable::compileBitcodeFiles() {
533   LTO.reset(new BitcodeCompiler);
534   for (BitcodeFile *F : BitcodeFile::Instances)
535     LTO->add(*F);
536   return LTO->compile();
537 }
538 
539 void SymbolTable::addCombinedLTOObjects() {
540   if (BitcodeFile::Instances.empty())
541     return;
542 
543   ScopedTimer T(LTOTimer);
544   for (StringRef Object : compileBitcodeFiles()) {
545     auto *Obj = make<ObjFile>(MemoryBufferRef(Object, "lto.tmp"));
546     Obj->parse();
547     ObjFile::Instances.push_back(Obj);
548   }
549 }
550 
551 } // namespace coff
552 } // namespace lld
553