1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Symbol table is a bag of all known symbols. We put all symbols of
11 // all input files to the symbol table. The symbol table is basically
12 // a hash table with the logic to resolve symbol name conflicts using
13 // the symbol types.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "SymbolTable.h"
18 #include "Config.h"
19 #include "Error.h"
20 #include "Symbols.h"
21 #include "llvm/Bitcode/ReaderWriter.h"
22 #include "llvm/IR/LegacyPassManager.h"
23 #include "llvm/Linker/IRMover.h"
24 #include "llvm/Support/StringSaver.h"
25 #include "llvm/Support/TargetRegistry.h"
26 #include "llvm/Target/TargetMachine.h"
27 
28 using namespace llvm;
29 using namespace llvm::object;
30 using namespace llvm::ELF;
31 
32 using namespace lld;
33 using namespace lld::elf;
34 
35 // All input object files must be for the same architecture
36 // (e.g. it does not make sense to link x86 object files with
37 // MIPS object files.) This function checks for that error.
38 template <class ELFT> static bool isCompatible(InputFile *FileP) {
39   auto *F = dyn_cast<ELFFileBase<ELFT>>(FileP);
40   if (!F)
41     return true;
42   if (F->getELFKind() == Config->EKind && F->getEMachine() == Config->EMachine)
43     return true;
44   StringRef A = F->getName();
45   StringRef B = Config->Emulation;
46   if (B.empty())
47     B = Config->FirstElf->getName();
48   error(A + " is incompatible with " + B);
49   return false;
50 }
51 
52 // Add symbols in File to the symbol table.
53 template <class ELFT>
54 void SymbolTable<ELFT>::addFile(std::unique_ptr<InputFile> File) {
55   InputFile *FileP = File.get();
56   if (!isCompatible<ELFT>(FileP))
57     return;
58 
59   // .a file
60   if (auto *F = dyn_cast<ArchiveFile>(FileP)) {
61     ArchiveFiles.emplace_back(cast<ArchiveFile>(File.release()));
62     F->parse();
63     for (Lazy &Sym : F->getLazySymbols())
64       addLazy(&Sym);
65     return;
66   }
67 
68   // .so file
69   if (auto *F = dyn_cast<SharedFile<ELFT>>(FileP)) {
70     // DSOs are uniquified not by filename but by soname.
71     F->parseSoName();
72     if (!SoNames.insert(F->getSoName()).second)
73       return;
74 
75     SharedFiles.emplace_back(cast<SharedFile<ELFT>>(File.release()));
76     F->parseRest();
77     for (SharedSymbol<ELFT> &B : F->getSharedSymbols())
78       resolve(&B);
79     return;
80   }
81 
82   // LLVM bitcode file.
83   if (auto *F = dyn_cast<BitcodeFile>(FileP)) {
84     BitcodeFiles.emplace_back(cast<BitcodeFile>(File.release()));
85     F->parse(ComdatGroups);
86     for (SymbolBody *B : F->getSymbols())
87       if (B)
88         resolve(B);
89     return;
90   }
91 
92   // .o file
93   auto *F = cast<ObjectFile<ELFT>>(FileP);
94   ObjectFiles.emplace_back(cast<ObjectFile<ELFT>>(File.release()));
95   F->parse(ComdatGroups);
96   for (SymbolBody *B : F->getNonLocalSymbols())
97     resolve(B);
98 }
99 
100 // This is for use when debugging LTO.
101 static void saveLtoObjectFile(StringRef Buffer) {
102   std::error_code EC;
103   raw_fd_ostream OS(Config->OutputFile.str() + ".lto.o", EC,
104                     sys::fs::OpenFlags::F_None);
105   check(EC);
106   OS << Buffer;
107 }
108 
109 // Codegen the module M and returns the resulting InputFile.
110 template <class ELFT>
111 std::unique_ptr<InputFile> SymbolTable<ELFT>::codegen(Module &M) {
112   StringRef TripleStr = M.getTargetTriple();
113   Triple TheTriple(TripleStr);
114 
115   // FIXME: Should we have a default triple? The gold plugin uses
116   // sys::getDefaultTargetTriple(), but that is probably wrong given that this
117   // might be a cross linker.
118 
119   std::string ErrMsg;
120   const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg);
121   if (!TheTarget)
122     fatal("target not found: " + ErrMsg);
123 
124   TargetOptions Options;
125   Reloc::Model R = Config->Shared ? Reloc::PIC_ : Reloc::Static;
126   std::unique_ptr<TargetMachine> TM(
127       TheTarget->createTargetMachine(TripleStr, "", "", Options, R));
128 
129   raw_svector_ostream OS(OwningLTOData);
130   legacy::PassManager CodeGenPasses;
131   if (TM->addPassesToEmitFile(CodeGenPasses, OS,
132                               TargetMachine::CGFT_ObjectFile))
133     fatal("failed to setup codegen");
134   CodeGenPasses.run(M);
135   LtoBuffer = MemoryBuffer::getMemBuffer(OwningLTOData, "", false);
136   if (Config->SaveTemps)
137     saveLtoObjectFile(LtoBuffer->getBuffer());
138   return createObjectFile(*LtoBuffer);
139 }
140 
141 static void addBitcodeFile(IRMover &Mover, BitcodeFile &F,
142                            LLVMContext &Context) {
143 
144   std::unique_ptr<IRObjectFile> Obj =
145       check(IRObjectFile::create(F.MB, Context));
146   std::vector<GlobalValue *> Keep;
147   unsigned BodyIndex = 0;
148   ArrayRef<SymbolBody *> Bodies = F.getSymbols();
149 
150   for (const BasicSymbolRef &Sym : Obj->symbols()) {
151     GlobalValue *GV = Obj->getSymbolGV(Sym.getRawDataRefImpl());
152     assert(GV);
153     if (GV->hasAppendingLinkage()) {
154       Keep.push_back(GV);
155       continue;
156     }
157     if (BitcodeFile::shouldSkip(Sym))
158       continue;
159     SymbolBody *B = Bodies[BodyIndex++];
160     if (!B || &B->repl() != B)
161       continue;
162     auto *DB = dyn_cast<DefinedBitcode>(B);
163     if (!DB)
164       continue;
165     Keep.push_back(GV);
166   }
167 
168   Mover.move(Obj->takeModule(), Keep,
169              [](GlobalValue &, IRMover::ValueAdder) {});
170 }
171 
172 // This is for use when debugging LTO.
173 static void saveBCFile(Module &M) {
174   std::error_code EC;
175   raw_fd_ostream OS(Config->OutputFile.str() + ".lto.bc", EC,
176                     sys::fs::OpenFlags::F_None);
177   check(EC);
178   WriteBitcodeToFile(&M, OS, /* ShouldPreserveUseListOrder */ true);
179 }
180 
181 // Merge all the bitcode files we have seen, codegen the result and return
182 // the resulting ObjectFile.
183 template <class ELFT>
184 elf::ObjectFile<ELFT> *SymbolTable<ELFT>::createCombinedLtoObject() {
185   LLVMContext Context;
186   Module Combined("ld-temp.o", Context);
187   IRMover Mover(Combined);
188   for (const std::unique_ptr<BitcodeFile> &F : BitcodeFiles)
189     addBitcodeFile(Mover, *F, Context);
190   if (Config->SaveTemps)
191     saveBCFile(Combined);
192   std::unique_ptr<InputFile> F = codegen(Combined);
193   ObjectFiles.emplace_back(cast<ObjectFile<ELFT>>(F.release()));
194   return &*ObjectFiles.back();
195 }
196 
197 template <class ELFT> void SymbolTable<ELFT>::addCombinedLtoObject() {
198   if (BitcodeFiles.empty())
199     return;
200   ObjectFile<ELFT> *Obj = createCombinedLtoObject();
201   llvm::DenseSet<StringRef> DummyGroups;
202   Obj->parse(DummyGroups);
203   for (SymbolBody *Body : Obj->getNonLocalSymbols()) {
204     Symbol *Sym = insert(Body);
205     if (!Sym->Body->isUndefined() && Body->isUndefined())
206       continue;
207     Sym->Body = Body;
208   }
209 }
210 
211 // Add an undefined symbol.
212 template <class ELFT>
213 SymbolBody *SymbolTable<ELFT>::addUndefined(StringRef Name) {
214   auto *Sym = new (Alloc) Undefined(Name, false, STV_DEFAULT, false);
215   resolve(Sym);
216   return Sym;
217 }
218 
219 // Add an undefined symbol. Unlike addUndefined, that symbol
220 // doesn't have to be resolved, thus "opt" (optional).
221 template <class ELFT>
222 SymbolBody *SymbolTable<ELFT>::addUndefinedOpt(StringRef Name) {
223   auto *Sym = new (Alloc) Undefined(Name, false, STV_HIDDEN, true);
224   resolve(Sym);
225   return Sym;
226 }
227 
228 template <class ELFT>
229 SymbolBody *SymbolTable<ELFT>::addAbsolute(StringRef Name, Elf_Sym &ESym) {
230   // Pass nullptr because absolute symbols have no corresponding input sections.
231   auto *Sym = new (Alloc) DefinedRegular<ELFT>(Name, ESym, nullptr);
232   resolve(Sym);
233   return Sym;
234 }
235 
236 template <class ELFT>
237 SymbolBody *SymbolTable<ELFT>::addSynthetic(StringRef Name,
238                                             OutputSectionBase<ELFT> &Sec,
239                                             uintX_t Val, uint8_t Visibility) {
240   auto *Sym = new (Alloc) DefinedSynthetic<ELFT>(Name, Val, Sec, Visibility);
241   resolve(Sym);
242   return Sym;
243 }
244 
245 // Add Name as an "ignored" symbol. An ignored symbol is a regular
246 // linker-synthesized defined symbol, but it is not recorded to the output
247 // file's symbol table. Such symbols are useful for some linker-defined symbols.
248 template <class ELFT>
249 SymbolBody *SymbolTable<ELFT>::addIgnored(StringRef Name) {
250   return addAbsolute(Name, ElfSym<ELFT>::Ignored);
251 }
252 
253 // Rename SYM as __wrap_SYM. The original symbol is preserved as __real_SYM.
254 // Used to implement --wrap.
255 template <class ELFT> void SymbolTable<ELFT>::wrap(StringRef Name) {
256   if (Symtab.count(Name) == 0)
257     return;
258   StringSaver Saver(Alloc);
259   Symbol *Sym = addUndefined(Name)->getSymbol();
260   Symbol *Real = addUndefined(Saver.save("__real_" + Name))->getSymbol();
261   Symbol *Wrap = addUndefined(Saver.save("__wrap_" + Name))->getSymbol();
262   Real->Body = Sym->Body;
263   Sym->Body = Wrap->Body;
264 }
265 
266 // Returns a file from which symbol B was created.
267 // If B does not belong to any file, returns a nullptr.
268 template <class ELFT> InputFile *SymbolTable<ELFT>::findFile(SymbolBody *B) {
269   for (const std::unique_ptr<ObjectFile<ELFT>> &F : ObjectFiles) {
270     ArrayRef<SymbolBody *> Syms = F->getSymbols();
271     if (std::find(Syms.begin(), Syms.end(), B) != Syms.end())
272       return F.get();
273   }
274   for (const std::unique_ptr<BitcodeFile> &F : BitcodeFiles) {
275     ArrayRef<SymbolBody *> Syms = F->getSymbols();
276     if (std::find(Syms.begin(), Syms.end(), B) != Syms.end())
277       return F.get();
278   }
279   return nullptr;
280 }
281 
282 // Returns "(internal)", "foo.a(bar.o)" or "baz.o".
283 static std::string getFilename(InputFile *F) {
284   if (!F)
285     return "(internal)";
286   if (!F->ArchiveName.empty())
287     return (F->ArchiveName + "(" + F->getName() + ")").str();
288   return F->getName();
289 }
290 
291 // Construct a string in the form of "Sym in File1 and File2".
292 // Used to construct an error message.
293 template <class ELFT>
294 std::string SymbolTable<ELFT>::conflictMsg(SymbolBody *Old, SymbolBody *New) {
295   InputFile *F1 = findFile(Old);
296   InputFile *F2 = findFile(New);
297   StringRef Sym = Old->getName();
298   return demangle(Sym) + " in " + getFilename(F1) + " and " + getFilename(F2);
299 }
300 
301 // This function resolves conflicts if there's an existing symbol with
302 // the same name. Decisions are made based on symbol type.
303 template <class ELFT> void SymbolTable<ELFT>::resolve(SymbolBody *New) {
304   Symbol *Sym = insert(New);
305   if (Sym->Body == New)
306     return;
307 
308   SymbolBody *Existing = Sym->Body;
309 
310   if (Lazy *L = dyn_cast<Lazy>(Existing)) {
311     if (auto *Undef = dyn_cast<Undefined>(New)) {
312       addMemberFile(Undef, L);
313       return;
314     }
315     // Found a definition for something also in an archive.
316     // Ignore the archive definition.
317     Sym->Body = New;
318     return;
319   }
320 
321   if (New->IsTls != Existing->IsTls) {
322     error("TLS attribute mismatch for symbol: " + conflictMsg(Existing, New));
323     return;
324   }
325 
326   // compare() returns -1, 0, or 1 if the lhs symbol is less preferable,
327   // equivalent (conflicting), or more preferable, respectively.
328   int Comp = Existing->compare<ELFT>(New);
329   if (Comp == 0) {
330     std::string S = "duplicate symbol: " + conflictMsg(Existing, New);
331     if (Config->AllowMultipleDefinition)
332       warning(S);
333     else
334       error(S);
335     return;
336   }
337   if (Comp < 0)
338     Sym->Body = New;
339 }
340 
341 // Find an existing symbol or create and insert a new one.
342 template <class ELFT> Symbol *SymbolTable<ELFT>::insert(SymbolBody *New) {
343   StringRef Name = New->getName();
344   Symbol *&Sym = Symtab[Name];
345   if (!Sym)
346     Sym = new (Alloc) Symbol{New};
347   New->setBackref(Sym);
348   return Sym;
349 }
350 
351 template <class ELFT> SymbolBody *SymbolTable<ELFT>::find(StringRef Name) {
352   auto It = Symtab.find(Name);
353   if (It == Symtab.end())
354     return nullptr;
355   return It->second->Body;
356 }
357 
358 template <class ELFT> void SymbolTable<ELFT>::addLazy(Lazy *L) {
359   Symbol *Sym = insert(L);
360   if (Sym->Body == L)
361     return;
362   if (auto *Undef = dyn_cast<Undefined>(Sym->Body)) {
363     Sym->Body = L;
364     addMemberFile(Undef, L);
365   }
366 }
367 
368 template <class ELFT>
369 void SymbolTable<ELFT>::addMemberFile(Undefined *Undef, Lazy *L) {
370   // Weak undefined symbols should not fetch members from archives.
371   // If we were to keep old symbol we would not know that an archive member was
372   // available if a strong undefined symbol shows up afterwards in the link.
373   // If a strong undefined symbol never shows up, this lazy symbol will
374   // get to the end of the link and must be treated as the weak undefined one.
375   // We set UsedInRegularObj in a similar way to what is done with shared
376   // symbols and copy information to reduce how many special cases are needed.
377   if (Undef->isWeak()) {
378     L->setUsedInRegularObj();
379     L->setWeak();
380 
381     // FIXME: Do we need to copy more?
382     L->IsTls |= Undef->IsTls;
383     return;
384   }
385 
386   // Fetch a member file that has the definition for L.
387   // getMember returns nullptr if the member was already read from the library.
388   if (std::unique_ptr<InputFile> File = L->getMember())
389     addFile(std::move(File));
390 }
391 
392 // This function takes care of the case in which shared libraries depend on
393 // the user program (not the other way, which is usual). Shared libraries
394 // may have undefined symbols, expecting that the user program provides
395 // the definitions for them. An example is BSD's __progname symbol.
396 // We need to put such symbols to the main program's .dynsym so that
397 // shared libraries can find them.
398 // Except this, we ignore undefined symbols in DSOs.
399 template <class ELFT> void SymbolTable<ELFT>::scanShlibUndefined() {
400   for (std::unique_ptr<SharedFile<ELFT>> &File : SharedFiles)
401     for (StringRef U : File->getUndefinedSymbols())
402       if (SymbolBody *Sym = find(U))
403         if (Sym->isDefined())
404           Sym->MustBeInDynSym = true;
405 }
406 
407 template class elf::SymbolTable<ELF32LE>;
408 template class elf::SymbolTable<ELF32BE>;
409 template class elf::SymbolTable<ELF64LE>;
410 template class elf::SymbolTable<ELF64BE>;
411