1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Symbol table is a bag of all known symbols. We put all symbols of
10 // all input files to the symbol table. The symbol table is basically
11 // a hash table with the logic to resolve symbol name conflicts using
12 // the symbol types.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "SymbolTable.h"
17 #include "Config.h"
18 #include "LinkerScript.h"
19 #include "Symbols.h"
20 #include "SyntheticSections.h"
21 #include "lld/Common/ErrorHandler.h"
22 #include "lld/Common/Memory.h"
23 #include "lld/Common/Strings.h"
24 #include "llvm/ADT/STLExtras.h"
25 
26 using namespace llvm;
27 using namespace llvm::object;
28 using namespace llvm::ELF;
29 
30 using namespace lld;
31 using namespace lld::elf;
32 
33 SymbolTable *elf::Symtab;
34 
35 // This function is where all the optimizations of link-time
36 // optimization happens. When LTO is in use, some input files are
37 // not in native object file format but in the LLVM bitcode format.
38 // This function compiles bitcode files into a few big native files
39 // using LLVM functions and replaces bitcode symbols with the results.
40 // Because all bitcode files that the program consists of are passed
41 // to the compiler at once, it can do whole-program optimization.
42 template <class ELFT> void SymbolTable::addCombinedLTOObject() {
43   // Compile bitcode files and replace bitcode symbols.
44   LTO.reset(new BitcodeCompiler);
45   for (BitcodeFile *F : BitcodeFiles)
46     LTO->add(*F);
47 
48   for (InputFile *File : LTO->compile()) {
49     DenseMap<CachedHashStringRef, const InputFile *> DummyGroups;
50     auto *Obj = cast<ObjFile<ELFT>>(File);
51     Obj->parse(DummyGroups);
52     for (Symbol *Sym : Obj->getGlobalSymbols())
53       Sym->parseSymbolVersion();
54     ObjectFiles.push_back(File);
55   }
56 }
57 
58 // Set a flag for --trace-symbol so that we can print out a log message
59 // if a new symbol with the same name is inserted into the symbol table.
60 void SymbolTable::trace(StringRef Name) {
61   SymMap.insert({CachedHashStringRef(Name), -1});
62 }
63 
64 void SymbolTable::wrap(Symbol *Sym, Symbol *Real, Symbol *Wrap) {
65   // Swap symbols as instructed by -wrap.
66   int &Idx1 = SymMap[CachedHashStringRef(Sym->getName())];
67   int &Idx2 = SymMap[CachedHashStringRef(Real->getName())];
68   int &Idx3 = SymMap[CachedHashStringRef(Wrap->getName())];
69 
70   Idx2 = Idx1;
71   Idx1 = Idx3;
72 
73   // Now renaming is complete. No one refers Real symbol. We could leave
74   // Real as-is, but if Real is written to the symbol table, that may
75   // contain irrelevant values. So, we copy all values from Sym to Real.
76   StringRef S = Real->getName();
77   memcpy(Real, Sym, sizeof(SymbolUnion));
78   Real->setName(S);
79 }
80 
81 static uint8_t getMinVisibility(uint8_t VA, uint8_t VB) {
82   if (VA == STV_DEFAULT)
83     return VB;
84   if (VB == STV_DEFAULT)
85     return VA;
86   return std::min(VA, VB);
87 }
88 
89 // Find an existing symbol or create a new one.
90 Symbol *SymbolTable::insert(StringRef Name) {
91   // <name>@@<version> means the symbol is the default version. In that
92   // case <name>@@<version> will be used to resolve references to <name>.
93   //
94   // Since this is a hot path, the following string search code is
95   // optimized for speed. StringRef::find(char) is much faster than
96   // StringRef::find(StringRef).
97   size_t Pos = Name.find('@');
98   if (Pos != StringRef::npos && Pos + 1 < Name.size() && Name[Pos + 1] == '@')
99     Name = Name.take_front(Pos);
100 
101   auto P = SymMap.insert({CachedHashStringRef(Name), (int)SymVector.size()});
102   int &SymIndex = P.first->second;
103   bool IsNew = P.second;
104   bool Traced = false;
105 
106   if (SymIndex == -1) {
107     SymIndex = SymVector.size();
108     IsNew = true;
109     Traced = true;
110   }
111 
112   if (!IsNew)
113     return SymVector[SymIndex];
114 
115   Symbol *Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
116   SymVector.push_back(Sym);
117 
118   Sym->SymbolKind = Symbol::PlaceholderKind;
119   Sym->VersionId = Config->DefaultSymbolVersion;
120   Sym->Visibility = STV_DEFAULT;
121   Sym->IsUsedInRegularObj = false;
122   Sym->ExportDynamic = false;
123   Sym->CanInline = true;
124   Sym->Traced = Traced;
125   Sym->ScriptDefined = false;
126   return Sym;
127 }
128 
129 Symbol *SymbolTable::addSymbol(const Symbol &New) {
130   Symbol *Old = Symtab->insert(New.getName());
131   resolveSymbol(Old, New);
132   return Old;
133 }
134 
135 static void addUndefined(Symbol *Old, const Undefined &New) {
136   // An undefined symbol with non default visibility must be satisfied
137   // in the same DSO.
138   //
139   // If this is a non-weak defined symbol in a discarded section, override the
140   // existing undefined symbol for better error message later.
141   if ((Old->isShared() && New.Visibility != STV_DEFAULT) ||
142       (Old->isUndefined() && New.Binding != STB_WEAK && New.DiscardedSecIdx)) {
143     Old->replace(New);
144     return;
145   }
146 
147   if (Old->isShared() || Old->isLazy() ||
148       (Old->isUndefined() && New.Binding != STB_WEAK))
149     Old->Binding = New.Binding;
150 
151   if (Old->isLazy()) {
152     // An undefined weak will not fetch archive members. See comment on Lazy in
153     // Symbols.h for the details.
154     if (New.Binding == STB_WEAK) {
155       Old->Type = New.Type;
156       return;
157     }
158 
159     // Do extra check for --warn-backrefs.
160     //
161     // --warn-backrefs is an option to prevent an undefined reference from
162     // fetching an archive member written earlier in the command line. It can be
163     // used to keep compatibility with GNU linkers to some degree.
164     // I'll explain the feature and why you may find it useful in this comment.
165     //
166     // lld's symbol resolution semantics is more relaxed than traditional Unix
167     // linkers. For example,
168     //
169     //   ld.lld foo.a bar.o
170     //
171     // succeeds even if bar.o contains an undefined symbol that has to be
172     // resolved by some object file in foo.a. Traditional Unix linkers don't
173     // allow this kind of backward reference, as they visit each file only once
174     // from left to right in the command line while resolving all undefined
175     // symbols at the moment of visiting.
176     //
177     // In the above case, since there's no undefined symbol when a linker visits
178     // foo.a, no files are pulled out from foo.a, and because the linker forgets
179     // about foo.a after visiting, it can't resolve undefined symbols in bar.o
180     // that could have been resolved otherwise.
181     //
182     // That lld accepts more relaxed form means that (besides it'd make more
183     // sense) you can accidentally write a command line or a build file that
184     // works only with lld, even if you have a plan to distribute it to wider
185     // users who may be using GNU linkers. With --warn-backrefs, you can detect
186     // a library order that doesn't work with other Unix linkers.
187     //
188     // The option is also useful to detect cyclic dependencies between static
189     // archives. Again, lld accepts
190     //
191     //   ld.lld foo.a bar.a
192     //
193     // even if foo.a and bar.a depend on each other. With --warn-backrefs, it is
194     // handled as an error.
195     //
196     // Here is how the option works. We assign a group ID to each file. A file
197     // with a smaller group ID can pull out object files from an archive file
198     // with an equal or greater group ID. Otherwise, it is a reverse dependency
199     // and an error.
200     //
201     // A file outside --{start,end}-group gets a fresh ID when instantiated. All
202     // files within the same --{start,end}-group get the same group ID. E.g.
203     //
204     //   ld.lld A B --start-group C D --end-group E
205     //
206     // A forms group 0. B form group 1. C and D (including their member object
207     // files) form group 2. E forms group 3. I think that you can see how this
208     // group assignment rule simulates the traditional linker's semantics.
209     bool Backref = Config->WarnBackrefs && New.File &&
210                    Old->File->GroupId < New.File->GroupId;
211     Symtab->fetchLazy(Old);
212 
213     // We don't report backward references to weak symbols as they can be
214     // overridden later.
215     if (Backref && !Old->isWeak())
216       warn("backward reference detected: " + New.getName() + " in " +
217            toString(New.File) + " refers to " + toString(Old->File));
218   }
219 }
220 
221 // Using .symver foo,foo@@VER unfortunately creates two symbols: foo and
222 // foo@@VER. We want to effectively ignore foo, so give precedence to
223 // foo@@VER.
224 // FIXME: If users can transition to using
225 // .symver foo,foo@@@VER
226 // we can delete this hack.
227 static int compareVersion(StringRef OldName, StringRef NewName) {
228   bool A = OldName.contains("@@");
229   bool B = NewName.contains("@@");
230   if (!A && B)
231     return 1;
232   if (A && !B)
233     return -1;
234   return 0;
235 }
236 
237 // Compare two symbols. Return 1 if the new symbol should win, -1 if
238 // the new symbol should lose, or 0 if there is a conflict.
239 static int compare(const Symbol *Old, const Symbol *New) {
240   assert(New->isDefined() || New->isCommon());
241 
242   if (!Old->isDefined() && !Old->isCommon())
243     return 1;
244 
245   if (int Cmp = compareVersion(Old->getName(), New->getName()))
246     return Cmp;
247 
248   if (New->isWeak())
249     return -1;
250 
251   if (Old->isWeak())
252     return 1;
253 
254   if (Old->isCommon() && New->isCommon()) {
255     if (Config->WarnCommon)
256       warn("multiple common of " + Old->getName());
257     return 0;
258   }
259 
260   if (Old->isCommon()) {
261     if (Config->WarnCommon)
262       warn("common " + Old->getName() + " is overridden");
263     return 1;
264   }
265 
266   if (New->isCommon()) {
267     if (Config->WarnCommon)
268       warn("common " + Old->getName() + " is overridden");
269     return -1;
270   }
271 
272   auto *OldSym = cast<Defined>(Old);
273   auto *NewSym = cast<Defined>(New);
274 
275   if (New->File && isa<BitcodeFile>(New->File))
276     return 0;
277 
278   if (!OldSym->Section && !NewSym->Section && OldSym->Value == NewSym->Value &&
279       NewSym->Binding == STB_GLOBAL)
280     return -1;
281 
282   return 0;
283 }
284 
285 static void addCommon(Symbol *Old, const CommonSymbol &New) {
286   int Cmp = compare(Old, &New);
287   if (Cmp < 0)
288     return;
289 
290   if (Cmp > 0) {
291     Old->replace(New);
292     return;
293   }
294 
295   CommonSymbol *OldSym = cast<CommonSymbol>(Old);
296 
297   OldSym->Alignment = std::max(OldSym->Alignment, New.Alignment);
298   if (OldSym->Size < New.Size) {
299     OldSym->File = New.File;
300     OldSym->Size = New.Size;
301   }
302 }
303 
304 static void reportDuplicate(Symbol *Sym, InputFile *NewFile,
305                             InputSectionBase *ErrSec, uint64_t ErrOffset) {
306   if (Config->AllowMultipleDefinition)
307     return;
308 
309   Defined *D = cast<Defined>(Sym);
310   if (!D->Section || !ErrSec) {
311     error("duplicate symbol: " + toString(*Sym) + "\n>>> defined in " +
312           toString(Sym->File) + "\n>>> defined in " + toString(NewFile));
313     return;
314   }
315 
316   // Construct and print an error message in the form of:
317   //
318   //   ld.lld: error: duplicate symbol: foo
319   //   >>> defined at bar.c:30
320   //   >>>            bar.o (/home/alice/src/bar.o)
321   //   >>> defined at baz.c:563
322   //   >>>            baz.o in archive libbaz.a
323   auto *Sec1 = cast<InputSectionBase>(D->Section);
324   std::string Src1 = Sec1->getSrcMsg(*Sym, D->Value);
325   std::string Obj1 = Sec1->getObjMsg(D->Value);
326   std::string Src2 = ErrSec->getSrcMsg(*Sym, ErrOffset);
327   std::string Obj2 = ErrSec->getObjMsg(ErrOffset);
328 
329   std::string Msg = "duplicate symbol: " + toString(*Sym) + "\n>>> defined at ";
330   if (!Src1.empty())
331     Msg += Src1 + "\n>>>            ";
332   Msg += Obj1 + "\n>>> defined at ";
333   if (!Src2.empty())
334     Msg += Src2 + "\n>>>            ";
335   Msg += Obj2;
336   error(Msg);
337 }
338 
339 static void addDefined(Symbol *Old, const Defined &New) {
340   int Cmp = compare(Old, &New);
341   if (Cmp > 0)
342     Old->replace(New);
343   else if (Cmp == 0)
344     reportDuplicate(Old, New.File,
345                     dyn_cast_or_null<InputSectionBase>(New.Section), New.Value);
346 }
347 
348 static void addShared(Symbol *Old, const SharedSymbol &New) {
349   if (Old->Visibility == STV_DEFAULT && (Old->isUndefined() || Old->isLazy())) {
350     // An undefined symbol with non default visibility must be satisfied
351     // in the same DSO.
352     uint8_t Binding = Old->Binding;
353     Old->replace(New);
354     Old->Binding = Binding;
355   }
356 }
357 
358 Symbol *SymbolTable::find(StringRef Name) {
359   auto It = SymMap.find(CachedHashStringRef(Name));
360   if (It == SymMap.end())
361     return nullptr;
362   if (It->second == -1)
363     return nullptr;
364   return SymVector[It->second];
365 }
366 
367 template <class LazyT> static void addLazy(Symbol *Old, const LazyT &New) {
368   if (!Old->isUndefined())
369     return;
370 
371   // An undefined weak will not fetch archive members. See comment on Lazy in
372   // Symbols.h for the details.
373   if (Old->isWeak()) {
374     uint8_t Type = Old->Type;
375     Old->replace(New);
376     Old->Type = Type;
377     Old->Binding = STB_WEAK;
378     return;
379   }
380 
381   if (InputFile *F = New.fetch())
382     parseFile(F);
383 }
384 
385 static void addLazyArchive(Symbol *Old, const LazyArchive &New) {
386   addLazy(Old, New);
387 }
388 
389 static void addLazyObject(Symbol *Old, const LazyObject &New) {
390   addLazy(Old, New);
391 }
392 
393 void SymbolTable::fetchLazy(Symbol *Sym) {
394   if (auto *S = dyn_cast<LazyArchive>(Sym)) {
395     if (InputFile *File = S->fetch())
396       parseFile(File);
397     return;
398   }
399 
400   auto *S = cast<LazyObject>(Sym);
401   if (InputFile *File = cast<LazyObjFile>(S->File)->fetch())
402     parseFile(File);
403 }
404 
405 // Initialize DemangledSyms with a map from demangled symbols to symbol
406 // objects. Used to handle "extern C++" directive in version scripts.
407 //
408 // The map will contain all demangled symbols. That can be very large,
409 // and in LLD we generally want to avoid do anything for each symbol.
410 // Then, why are we doing this? Here's why.
411 //
412 // Users can use "extern C++ {}" directive to match against demangled
413 // C++ symbols. For example, you can write a pattern such as
414 // "llvm::*::foo(int, ?)". Obviously, there's no way to handle this
415 // other than trying to match a pattern against all demangled symbols.
416 // So, if "extern C++" feature is used, we need to demangle all known
417 // symbols.
418 StringMap<std::vector<Symbol *>> &SymbolTable::getDemangledSyms() {
419   if (!DemangledSyms) {
420     DemangledSyms.emplace();
421     for (Symbol *Sym : SymVector) {
422       if (!Sym->isDefined() && !Sym->isCommon())
423         continue;
424       if (Optional<std::string> S = demangleItanium(Sym->getName()))
425         (*DemangledSyms)[*S].push_back(Sym);
426       else
427         (*DemangledSyms)[Sym->getName()].push_back(Sym);
428     }
429   }
430   return *DemangledSyms;
431 }
432 
433 std::vector<Symbol *> SymbolTable::findByVersion(SymbolVersion Ver) {
434   if (Ver.IsExternCpp)
435     return getDemangledSyms().lookup(Ver.Name);
436   if (Symbol *B = find(Ver.Name))
437     if (B->isDefined() || B->isCommon())
438       return {B};
439   return {};
440 }
441 
442 std::vector<Symbol *> SymbolTable::findAllByVersion(SymbolVersion Ver) {
443   std::vector<Symbol *> Res;
444   StringMatcher M(Ver.Name);
445 
446   if (Ver.IsExternCpp) {
447     for (auto &P : getDemangledSyms())
448       if (M.match(P.first()))
449         Res.insert(Res.end(), P.second.begin(), P.second.end());
450     return Res;
451   }
452 
453   for (Symbol *Sym : SymVector)
454     if ((Sym->isDefined() || Sym->isCommon()) && M.match(Sym->getName()))
455       Res.push_back(Sym);
456   return Res;
457 }
458 
459 // If there's only one anonymous version definition in a version
460 // script file, the script does not actually define any symbol version,
461 // but just specifies symbols visibilities.
462 void SymbolTable::handleAnonymousVersion() {
463   for (SymbolVersion &Ver : Config->VersionScriptGlobals)
464     assignExactVersion(Ver, VER_NDX_GLOBAL, "global");
465   for (SymbolVersion &Ver : Config->VersionScriptGlobals)
466     assignWildcardVersion(Ver, VER_NDX_GLOBAL);
467   for (SymbolVersion &Ver : Config->VersionScriptLocals)
468     assignExactVersion(Ver, VER_NDX_LOCAL, "local");
469   for (SymbolVersion &Ver : Config->VersionScriptLocals)
470     assignWildcardVersion(Ver, VER_NDX_LOCAL);
471 }
472 
473 // Handles -dynamic-list.
474 void SymbolTable::handleDynamicList() {
475   for (SymbolVersion &Ver : Config->DynamicList) {
476     std::vector<Symbol *> Syms;
477     if (Ver.HasWildcard)
478       Syms = findAllByVersion(Ver);
479     else
480       Syms = findByVersion(Ver);
481 
482     for (Symbol *B : Syms) {
483       if (!Config->Shared)
484         B->ExportDynamic = true;
485       else if (B->includeInDynsym())
486         B->IsPreemptible = true;
487     }
488   }
489 }
490 
491 // Set symbol versions to symbols. This function handles patterns
492 // containing no wildcard characters.
493 void SymbolTable::assignExactVersion(SymbolVersion Ver, uint16_t VersionId,
494                                      StringRef VersionName) {
495   if (Ver.HasWildcard)
496     return;
497 
498   // Get a list of symbols which we need to assign the version to.
499   std::vector<Symbol *> Syms = findByVersion(Ver);
500   if (Syms.empty()) {
501     if (!Config->UndefinedVersion)
502       error("version script assignment of '" + VersionName + "' to symbol '" +
503             Ver.Name + "' failed: symbol not defined");
504     return;
505   }
506 
507   // Assign the version.
508   for (Symbol *Sym : Syms) {
509     // Skip symbols containing version info because symbol versions
510     // specified by symbol names take precedence over version scripts.
511     // See parseSymbolVersion().
512     if (Sym->getName().contains('@'))
513       continue;
514 
515     if (Sym->VersionId != Config->DefaultSymbolVersion &&
516         Sym->VersionId != VersionId)
517       error("duplicate symbol '" + Ver.Name + "' in version script");
518     Sym->VersionId = VersionId;
519   }
520 }
521 
522 void SymbolTable::assignWildcardVersion(SymbolVersion Ver, uint16_t VersionId) {
523   if (!Ver.HasWildcard)
524     return;
525 
526   // Exact matching takes precendence over fuzzy matching,
527   // so we set a version to a symbol only if no version has been assigned
528   // to the symbol. This behavior is compatible with GNU.
529   for (Symbol *B : findAllByVersion(Ver))
530     if (B->VersionId == Config->DefaultSymbolVersion)
531       B->VersionId = VersionId;
532 }
533 
534 // This function processes version scripts by updating VersionId
535 // member of symbols.
536 void SymbolTable::scanVersionScript() {
537   // Handle edge cases first.
538   handleAnonymousVersion();
539   handleDynamicList();
540 
541   // Now we have version definitions, so we need to set version ids to symbols.
542   // Each version definition has a glob pattern, and all symbols that match
543   // with the pattern get that version.
544 
545   // First, we assign versions to exact matching symbols,
546   // i.e. version definitions not containing any glob meta-characters.
547   for (VersionDefinition &V : Config->VersionDefinitions)
548     for (SymbolVersion &Ver : V.Globals)
549       assignExactVersion(Ver, V.Id, V.Name);
550 
551   // Next, we assign versions to fuzzy matching symbols,
552   // i.e. version definitions containing glob meta-characters.
553   // Note that because the last match takes precedence over previous matches,
554   // we iterate over the definitions in the reverse order.
555   for (VersionDefinition &V : llvm::reverse(Config->VersionDefinitions))
556     for (SymbolVersion &Ver : V.Globals)
557       assignWildcardVersion(Ver, V.Id);
558 
559   // Symbol themselves might know their versions because symbols
560   // can contain versions in the form of <name>@<version>.
561   // Let them parse and update their names to exclude version suffix.
562   for (Symbol *Sym : SymVector)
563     Sym->parseSymbolVersion();
564 }
565 
566 // Merge symbol properties.
567 //
568 // When we have many symbols of the same name, we choose one of them,
569 // and that's the result of symbol resolution. However, symbols that
570 // were not chosen still affect some symbol properties.
571 void elf::mergeSymbolProperties(Symbol *Old, const Symbol &New) {
572   // Merge symbol properties.
573   Old->ExportDynamic = Old->ExportDynamic || New.ExportDynamic;
574   Old->IsUsedInRegularObj = Old->IsUsedInRegularObj || New.IsUsedInRegularObj;
575 
576   // DSO symbols do not affect visibility in the output.
577   if (!New.isShared())
578     Old->Visibility = getMinVisibility(Old->Visibility, New.Visibility);
579 }
580 
581 void elf::resolveSymbol(Symbol *Old, const Symbol &New) {
582   mergeSymbolProperties(Old, New);
583 
584   if (Old->isPlaceholder()) {
585     Old->replace(New);
586     return;
587   }
588 
589   switch (New.kind()) {
590   case Symbol::UndefinedKind:
591     addUndefined(Old, cast<Undefined>(New));
592     break;
593   case Symbol::CommonKind:
594     addCommon(Old, cast<CommonSymbol>(New));
595     break;
596   case Symbol::DefinedKind:
597     addDefined(Old, cast<Defined>(New));
598     break;
599   case Symbol::LazyArchiveKind:
600     addLazyArchive(Old, cast<LazyArchive>(New));
601     break;
602   case Symbol::LazyObjectKind:
603     addLazyObject(Old, cast<LazyObject>(New));
604     break;
605   case Symbol::SharedKind:
606     addShared(Old, cast<SharedSymbol>(New));
607     break;
608   case Symbol::PlaceholderKind:
609     llvm_unreachable("bad symbol kind");
610   }
611 }
612 
613 template void SymbolTable::addCombinedLTOObject<ELF32LE>();
614 template void SymbolTable::addCombinedLTOObject<ELF32BE>();
615 template void SymbolTable::addCombinedLTOObject<ELF64LE>();
616 template void SymbolTable::addCombinedLTOObject<ELF64BE>();
617