1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Symbol table is a bag of all known symbols. We put all symbols of
11 // all input files to the symbol table. The symbol table is basically
12 // a hash table with the logic to resolve symbol name conflicts using
13 // the symbol types.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "SymbolTable.h"
18 #include "Config.h"
19 #include "Error.h"
20 #include "Symbols.h"
21 #include "llvm/Bitcode/ReaderWriter.h"
22 #include "llvm/Support/StringSaver.h"
23 
24 using namespace llvm;
25 using namespace llvm::object;
26 using namespace llvm::ELF;
27 
28 using namespace lld;
29 using namespace lld::elf;
30 
31 // All input object files must be for the same architecture
32 // (e.g. it does not make sense to link x86 object files with
33 // MIPS object files.) This function checks for that error.
34 template <class ELFT> static bool isCompatible(InputFile *FileP) {
35   auto *F = dyn_cast<ELFFileBase<ELFT>>(FileP);
36   if (!F)
37     return true;
38   if (F->getELFKind() == Config->EKind && F->getEMachine() == Config->EMachine)
39     return true;
40   StringRef A = F->getName();
41   StringRef B = Config->Emulation;
42   if (B.empty())
43     B = Config->FirstElf->getName();
44   error(A + " is incompatible with " + B);
45   return false;
46 }
47 
48 // Returns "(internal)", "foo.a(bar.o)" or "baz.o".
49 static std::string getFilename(InputFile *F) {
50   if (!F)
51     return "(internal)";
52   if (!F->ArchiveName.empty())
53     return (F->ArchiveName + "(" + F->getName() + ")").str();
54   return F->getName();
55 }
56 
57 // Add symbols in File to the symbol table.
58 template <class ELFT>
59 void SymbolTable<ELFT>::addFile(std::unique_ptr<InputFile> File) {
60   InputFile *FileP = File.get();
61   if (!isCompatible<ELFT>(FileP))
62     return;
63 
64   // .a file
65   if (auto *F = dyn_cast<ArchiveFile>(FileP)) {
66     ArchiveFiles.emplace_back(cast<ArchiveFile>(File.release()));
67     F->parse();
68     for (Lazy &Sym : F->getLazySymbols())
69       addLazy(&Sym);
70     return;
71   }
72 
73   // Lazy object file
74   if (auto *F = dyn_cast<LazyObjectFile>(FileP)) {
75     LazyObjectFiles.emplace_back(cast<LazyObjectFile>(File.release()));
76     F->parse();
77     for (Lazy &Sym : F->getLazySymbols())
78       addLazy(&Sym);
79     return;
80   }
81 
82   if (Config->Trace)
83     llvm::outs() << getFilename(FileP) << "\n";
84 
85   // .so file
86   if (auto *F = dyn_cast<SharedFile<ELFT>>(FileP)) {
87     // DSOs are uniquified not by filename but by soname.
88     F->parseSoName();
89     if (!SoNames.insert(F->getSoName()).second)
90       return;
91 
92     SharedFiles.emplace_back(cast<SharedFile<ELFT>>(File.release()));
93     F->parseRest();
94     for (SharedSymbol<ELFT> &B : F->getSharedSymbols())
95       resolve(&B);
96     return;
97   }
98 
99   // LLVM bitcode file
100   if (auto *F = dyn_cast<BitcodeFile>(FileP)) {
101     BitcodeFiles.emplace_back(cast<BitcodeFile>(File.release()));
102     F->parse(ComdatGroups);
103     for (SymbolBody *B : F->getSymbols())
104       if (B)
105         resolve(B);
106     return;
107   }
108 
109   // Regular object file
110   auto *F = cast<ObjectFile<ELFT>>(FileP);
111   ObjectFiles.emplace_back(cast<ObjectFile<ELFT>>(File.release()));
112   F->parse(ComdatGroups);
113   for (SymbolBody *B : F->getNonLocalSymbols())
114     resolve(B);
115 }
116 
117 // This function is where all the optimizations of link-time
118 // optimization happens. When LTO is in use, some input files are
119 // not in native object file format but in the LLVM bitcode format.
120 // This function compiles bitcode files into a few big native files
121 // using LLVM functions and replaces bitcode symbols with the results.
122 // Because all bitcode files that consist of a program are passed
123 // to the compiler at once, it can do whole-program optimization.
124 template <class ELFT> void SymbolTable<ELFT>::addCombinedLtoObject() {
125   if (BitcodeFiles.empty())
126     return;
127 
128   // Compile bitcode files.
129   Lto.reset(new BitcodeCompiler);
130   for (const std::unique_ptr<BitcodeFile> &F : BitcodeFiles)
131     Lto->add(*F);
132   std::vector<std::unique_ptr<InputFile>> IFs = Lto->compile();
133 
134   // Replace bitcode symbols.
135   for (auto &IF : IFs) {
136     ObjectFile<ELFT> *Obj = cast<ObjectFile<ELFT>>(IF.release());
137 
138     llvm::DenseSet<StringRef> DummyGroups;
139     Obj->parse(DummyGroups);
140     for (SymbolBody *Body : Obj->getNonLocalSymbols()) {
141       Symbol *Sym = insert(Body);
142       if (!Sym->Body->isUndefined() && Body->isUndefined())
143         continue;
144       Sym->Body = Body;
145     }
146     ObjectFiles.emplace_back(Obj);
147   }
148 }
149 
150 // Add an undefined symbol.
151 template <class ELFT>
152 SymbolBody *SymbolTable<ELFT>::addUndefined(StringRef Name) {
153   auto *Sym = new (Alloc) Undefined(Name, STB_GLOBAL, STV_DEFAULT, /*Type*/ 0,
154                                     /*IsBitcode*/ false);
155   resolve(Sym);
156   return Sym;
157 }
158 
159 template <class ELFT>
160 DefinedRegular<ELFT> *SymbolTable<ELFT>::addAbsolute(StringRef Name,
161                                                      uint8_t Visibility) {
162   // Pass nullptr because absolute symbols have no corresponding input sections.
163   auto *Sym = new (Alloc) DefinedRegular<ELFT>(Name, STB_GLOBAL, Visibility);
164   resolve(Sym);
165   return Sym;
166 }
167 
168 template <class ELFT>
169 SymbolBody *SymbolTable<ELFT>::addSynthetic(StringRef Name,
170                                             OutputSectionBase<ELFT> &Sec,
171                                             uintX_t Val) {
172   auto *Sym = new (Alloc) DefinedSynthetic<ELFT>(Name, Val, Sec);
173   resolve(Sym);
174   return Sym;
175 }
176 
177 // Add Name as an "ignored" symbol. An ignored symbol is a regular
178 // linker-synthesized defined symbol, but is only defined if needed.
179 template <class ELFT>
180 DefinedRegular<ELFT> *SymbolTable<ELFT>::addIgnored(StringRef Name,
181                                                     uint8_t Visibility) {
182   if (!find(Name))
183     return nullptr;
184   return addAbsolute(Name, Visibility);
185 }
186 
187 // Rename SYM as __wrap_SYM. The original symbol is preserved as __real_SYM.
188 // Used to implement --wrap.
189 template <class ELFT> void SymbolTable<ELFT>::wrap(StringRef Name) {
190   SymbolBody *B = find(Name);
191   if (!B)
192     return;
193   StringSaver Saver(Alloc);
194   Symbol *Sym = B->Backref;
195   Symbol *Real = addUndefined(Saver.save("__real_" + Name))->Backref;
196   Symbol *Wrap = addUndefined(Saver.save("__wrap_" + Name))->Backref;
197   Real->Body = Sym->Body;
198   Sym->Body = Wrap->Body;
199 }
200 
201 // Returns a file from which symbol B was created.
202 // If B does not belong to any file, returns a nullptr.
203 // This function is slow, but it's okay as it is used only for error messages.
204 template <class ELFT> InputFile *SymbolTable<ELFT>::findFile(SymbolBody *B) {
205   for (const std::unique_ptr<ObjectFile<ELFT>> &F : ObjectFiles) {
206     ArrayRef<SymbolBody *> Syms = F->getSymbols();
207     if (std::find(Syms.begin(), Syms.end(), B) != Syms.end())
208       return F.get();
209   }
210   for (const std::unique_ptr<BitcodeFile> &F : BitcodeFiles) {
211     ArrayRef<SymbolBody *> Syms = F->getSymbols();
212     if (std::find(Syms.begin(), Syms.end(), B) != Syms.end())
213       return F.get();
214   }
215   return nullptr;
216 }
217 
218 // Construct a string in the form of "Sym in File1 and File2".
219 // Used to construct an error message.
220 template <class ELFT>
221 std::string SymbolTable<ELFT>::conflictMsg(SymbolBody *Old, SymbolBody *New) {
222   InputFile *F1 = findFile(Old);
223   InputFile *F2 = findFile(New);
224   StringRef Sym = Old->getName();
225   return demangle(Sym) + " in " + getFilename(F1) + " and " + getFilename(F2);
226 }
227 
228 // This function resolves conflicts if there's an existing symbol with
229 // the same name. Decisions are made based on symbol type.
230 template <class ELFT> void SymbolTable<ELFT>::resolve(SymbolBody *New) {
231   Symbol *Sym = insert(New);
232   if (Sym->Body == New)
233     return;
234 
235   SymbolBody *Existing = Sym->Body;
236 
237   if (auto *L = dyn_cast<Lazy>(Existing)) {
238     Sym->Binding = New->Binding;
239     if (New->isUndefined()) {
240       addMemberFile(New, L);
241       return;
242     }
243     // Found a definition for something also in an archive.
244     // Ignore the archive definition.
245     Sym->Body = New;
246     return;
247   }
248 
249   if (New->isTls() != Existing->isTls()) {
250     error("TLS attribute mismatch for symbol: " + conflictMsg(Existing, New));
251     return;
252   }
253 
254   // compare() returns -1, 0, or 1 if the lhs symbol is less preferable,
255   // equivalent (conflicting), or more preferable, respectively.
256   int Comp = Existing->compare(New);
257   if (Comp == 0) {
258     std::string S = "duplicate symbol: " + conflictMsg(Existing, New);
259     if (Config->AllowMultipleDefinition)
260       warning(S);
261     else
262       error(S);
263     return;
264   }
265   if (Comp < 0) {
266     Sym->Body = New;
267     if (!New->isShared())
268       Sym->Binding = New->Binding;
269   }
270 }
271 
272 static uint8_t getMinVisibility(uint8_t VA, uint8_t VB) {
273   if (VA == STV_DEFAULT)
274     return VB;
275   if (VB == STV_DEFAULT)
276     return VA;
277   return std::min(VA, VB);
278 }
279 
280 static bool shouldExport(SymbolBody *B) {
281   if (Config->Shared || Config->ExportDynamic) {
282     // Export most symbols except for those that do not need to be exported.
283     return !B->CanOmitFromDynSym;
284   }
285   // Make sure we preempt DSO symbols with default visibility.
286   return B->isShared() && B->getVisibility() == STV_DEFAULT;
287 }
288 
289 // Find an existing symbol or create and insert a new one.
290 template <class ELFT> Symbol *SymbolTable<ELFT>::insert(SymbolBody *New) {
291   StringRef Name = New->getName();
292   unsigned NumSyms = SymVector.size();
293   auto P = Symtab.insert(std::make_pair(Name, NumSyms));
294   Symbol *Sym;
295   if (P.second) {
296     Sym = new (Alloc) Symbol;
297     Sym->Body = New;
298     Sym->Binding = New->isShared() ? STB_GLOBAL : New->Binding;
299     Sym->Visibility = STV_DEFAULT;
300     Sym->IsUsedInRegularObj = false;
301     Sym->ExportDynamic = false;
302     Sym->VersionScriptGlobal = !Config->VersionScript;
303     SymVector.push_back(Sym);
304   } else {
305     Sym = SymVector[P.first->second];
306   }
307   New->Backref = Sym;
308 
309   // Merge in the new symbol's visibility. DSO symbols do not affect visibility
310   // in the output.
311   if (!New->isShared())
312     Sym->Visibility = getMinVisibility(Sym->Visibility, New->getVisibility());
313   Sym->ExportDynamic = Sym->ExportDynamic || shouldExport(New);
314   SymbolBody::Kind K = New->kind();
315   if (K == SymbolBody::DefinedRegularKind ||
316       K == SymbolBody::DefinedCommonKind ||
317       K == SymbolBody::DefinedSyntheticKind ||
318       (K == SymbolBody::UndefinedKind && !New->IsUndefinedBitcode))
319     Sym->IsUsedInRegularObj = true;
320   return Sym;
321 }
322 
323 template <class ELFT> SymbolBody *SymbolTable<ELFT>::find(StringRef Name) {
324   auto It = Symtab.find(Name);
325   if (It == Symtab.end())
326     return nullptr;
327   return SymVector[It->second]->Body;
328 }
329 
330 template <class ELFT> void SymbolTable<ELFT>::addLazy(Lazy *L) {
331   Symbol *Sym = insert(L);
332   SymbolBody *Cur = Sym->Body;
333   if (Cur == L)
334     return;
335   if (Cur->isUndefined()) {
336     Sym->Body = L;
337     addMemberFile(Cur, L);
338   }
339 }
340 
341 template <class ELFT>
342 void SymbolTable<ELFT>::addMemberFile(SymbolBody *Undef, Lazy *L) {
343   // Weak undefined symbols should not fetch members from archives.
344   // If we were to keep old symbol we would not know that an archive member was
345   // available if a strong undefined symbol shows up afterwards in the link.
346   // If a strong undefined symbol never shows up, this lazy symbol will
347   // get to the end of the link and must be treated as the weak undefined one.
348   // We already marked this symbol as used when we added it to the symbol table,
349   // but we also need to preserve its binding and type.
350   if (Undef->isWeak()) {
351     // FIXME: Consider moving these members to Symbol.
352     L->Type = Undef->Type;
353     return;
354   }
355 
356   // Fetch a member file that has the definition for L.
357   // getMember returns nullptr if the member was already read from the library.
358   if (std::unique_ptr<InputFile> File = L->getFile())
359     addFile(std::move(File));
360 }
361 
362 // Process undefined (-u) flags by loading lazy symbols named by those flags.
363 template <class ELFT>
364 void SymbolTable<ELFT>::scanUndefinedFlags() {
365   for (StringRef S : Config->Undefined)
366     if (SymbolBody *Sym = find(S))
367       if (auto *L = dyn_cast<Lazy>(Sym))
368         if (std::unique_ptr<InputFile> File = L->getFile())
369           addFile(std::move(File));
370 }
371 
372 // This function takes care of the case in which shared libraries depend on
373 // the user program (not the other way, which is usual). Shared libraries
374 // may have undefined symbols, expecting that the user program provides
375 // the definitions for them. An example is BSD's __progname symbol.
376 // We need to put such symbols to the main program's .dynsym so that
377 // shared libraries can find them.
378 // Except this, we ignore undefined symbols in DSOs.
379 template <class ELFT> void SymbolTable<ELFT>::scanShlibUndefined() {
380   for (std::unique_ptr<SharedFile<ELFT>> &File : SharedFiles)
381     for (StringRef U : File->getUndefinedSymbols())
382       if (SymbolBody *Sym = find(U))
383         if (Sym->isDefined())
384           Sym->Backref->ExportDynamic = true;
385 }
386 
387 // This function process the dynamic list option by marking all the symbols
388 // to be exported in the dynamic table.
389 template <class ELFT> void SymbolTable<ELFT>::scanDynamicList() {
390   for (StringRef S : Config->DynamicList)
391     if (SymbolBody *B = find(S))
392       B->Backref->ExportDynamic = true;
393 }
394 
395 // This function processes the --version-script option by marking all global
396 // symbols with the VersionScriptGlobal flag, which acts as a filter on the
397 // dynamic symbol table.
398 template <class ELFT> void SymbolTable<ELFT>::scanVersionScript() {
399   for (StringRef S : Config->VersionScriptGlobals)
400     if (SymbolBody *B = find(S))
401       B->Backref->VersionScriptGlobal = true;
402 }
403 
404 template class elf::SymbolTable<ELF32LE>;
405 template class elf::SymbolTable<ELF32BE>;
406 template class elf::SymbolTable<ELF64LE>;
407 template class elf::SymbolTable<ELF64BE>;
408