1 //===- SymbolTable.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Symbol table is a bag of all known symbols. We put all symbols of 10 // all input files to the symbol table. The symbol table is basically 11 // a hash table with the logic to resolve symbol name conflicts using 12 // the symbol types. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "SymbolTable.h" 17 #include "Config.h" 18 #include "LinkerScript.h" 19 #include "Symbols.h" 20 #include "SyntheticSections.h" 21 #include "lld/Common/ErrorHandler.h" 22 #include "lld/Common/Memory.h" 23 #include "lld/Common/Strings.h" 24 #include "llvm/ADT/STLExtras.h" 25 26 using namespace llvm; 27 using namespace llvm::object; 28 using namespace llvm::ELF; 29 30 using namespace lld; 31 using namespace lld::elf; 32 33 SymbolTable *elf::Symtab; 34 35 // This function is where all the optimizations of link-time 36 // optimization happens. When LTO is in use, some input files are 37 // not in native object file format but in the LLVM bitcode format. 38 // This function compiles bitcode files into a few big native files 39 // using LLVM functions and replaces bitcode symbols with the results. 40 // Because all bitcode files that the program consists of are passed 41 // to the compiler at once, it can do whole-program optimization. 42 template <class ELFT> void SymbolTable::addCombinedLTOObject() { 43 // Compile bitcode files and replace bitcode symbols. 44 LTO.reset(new BitcodeCompiler); 45 for (BitcodeFile *F : BitcodeFiles) 46 LTO->add(*F); 47 48 for (InputFile *File : LTO->compile()) { 49 DenseMap<CachedHashStringRef, const InputFile *> DummyGroups; 50 auto *Obj = cast<ObjFile<ELFT>>(File); 51 Obj->parse(DummyGroups); 52 for (Symbol *Sym : Obj->getGlobalSymbols()) 53 Sym->parseSymbolVersion(); 54 ObjectFiles.push_back(File); 55 } 56 } 57 58 // Set a flag for --trace-symbol so that we can print out a log message 59 // if a new symbol with the same name is inserted into the symbol table. 60 void SymbolTable::trace(StringRef Name) { 61 SymMap.insert({CachedHashStringRef(Name), -1}); 62 } 63 64 void SymbolTable::wrap(Symbol *Sym, Symbol *Real, Symbol *Wrap) { 65 // Swap symbols as instructed by -wrap. 66 int &Idx1 = SymMap[CachedHashStringRef(Sym->getName())]; 67 int &Idx2 = SymMap[CachedHashStringRef(Real->getName())]; 68 int &Idx3 = SymMap[CachedHashStringRef(Wrap->getName())]; 69 70 Idx2 = Idx1; 71 Idx1 = Idx3; 72 73 // Now renaming is complete. No one refers Real symbol. We could leave 74 // Real as-is, but if Real is written to the symbol table, that may 75 // contain irrelevant values. So, we copy all values from Sym to Real. 76 StringRef S = Real->getName(); 77 memcpy(Real, Sym, sizeof(SymbolUnion)); 78 Real->setName(S); 79 } 80 81 static uint8_t getMinVisibility(uint8_t VA, uint8_t VB) { 82 if (VA == STV_DEFAULT) 83 return VB; 84 if (VB == STV_DEFAULT) 85 return VA; 86 return std::min(VA, VB); 87 } 88 89 // Find an existing symbol or create a new one. 90 Symbol *SymbolTable::insert(StringRef Name) { 91 // <name>@@<version> means the symbol is the default version. In that 92 // case <name>@@<version> will be used to resolve references to <name>. 93 // 94 // Since this is a hot path, the following string search code is 95 // optimized for speed. StringRef::find(char) is much faster than 96 // StringRef::find(StringRef). 97 size_t Pos = Name.find('@'); 98 if (Pos != StringRef::npos && Pos + 1 < Name.size() && Name[Pos + 1] == '@') 99 Name = Name.take_front(Pos); 100 101 auto P = SymMap.insert({CachedHashStringRef(Name), (int)SymVector.size()}); 102 int &SymIndex = P.first->second; 103 bool IsNew = P.second; 104 bool Traced = false; 105 106 if (SymIndex == -1) { 107 SymIndex = SymVector.size(); 108 IsNew = true; 109 Traced = true; 110 } 111 112 if (!IsNew) 113 return SymVector[SymIndex]; 114 115 Symbol *Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); 116 SymVector.push_back(Sym); 117 118 Sym->SymbolKind = Symbol::PlaceholderKind; 119 Sym->VersionId = Config->DefaultSymbolVersion; 120 Sym->Visibility = STV_DEFAULT; 121 Sym->IsUsedInRegularObj = false; 122 Sym->ExportDynamic = false; 123 Sym->CanInline = true; 124 Sym->Traced = Traced; 125 Sym->ScriptDefined = false; 126 return Sym; 127 } 128 129 Symbol *SymbolTable::addSymbol(const Symbol &New) { 130 Symbol *Old = Symtab->insert(New.getName()); 131 resolveSymbol(Old, New); 132 return Old; 133 } 134 135 static void addUndefined(Symbol *Old, const Undefined &New) { 136 // An undefined symbol with non default visibility must be satisfied 137 // in the same DSO. 138 // 139 // If this is a non-weak defined symbol in a discarded section, override the 140 // existing undefined symbol for better error message later. 141 if ((Old->isShared() && New.Visibility != STV_DEFAULT) || 142 (Old->isUndefined() && New.Binding != STB_WEAK && New.DiscardedSecIdx)) { 143 Old->replace(New); 144 return; 145 } 146 147 if (Old->isShared() || Old->isLazy() || 148 (Old->isUndefined() && New.Binding != STB_WEAK)) 149 Old->Binding = New.Binding; 150 151 if (Old->isLazy()) { 152 // An undefined weak will not fetch archive members. See comment on Lazy in 153 // Symbols.h for the details. 154 if (New.Binding == STB_WEAK) { 155 Old->Type = New.Type; 156 return; 157 } 158 159 // Do extra check for --warn-backrefs. 160 // 161 // --warn-backrefs is an option to prevent an undefined reference from 162 // fetching an archive member written earlier in the command line. It can be 163 // used to keep compatibility with GNU linkers to some degree. 164 // I'll explain the feature and why you may find it useful in this comment. 165 // 166 // lld's symbol resolution semantics is more relaxed than traditional Unix 167 // linkers. For example, 168 // 169 // ld.lld foo.a bar.o 170 // 171 // succeeds even if bar.o contains an undefined symbol that has to be 172 // resolved by some object file in foo.a. Traditional Unix linkers don't 173 // allow this kind of backward reference, as they visit each file only once 174 // from left to right in the command line while resolving all undefined 175 // symbols at the moment of visiting. 176 // 177 // In the above case, since there's no undefined symbol when a linker visits 178 // foo.a, no files are pulled out from foo.a, and because the linker forgets 179 // about foo.a after visiting, it can't resolve undefined symbols in bar.o 180 // that could have been resolved otherwise. 181 // 182 // That lld accepts more relaxed form means that (besides it'd make more 183 // sense) you can accidentally write a command line or a build file that 184 // works only with lld, even if you have a plan to distribute it to wider 185 // users who may be using GNU linkers. With --warn-backrefs, you can detect 186 // a library order that doesn't work with other Unix linkers. 187 // 188 // The option is also useful to detect cyclic dependencies between static 189 // archives. Again, lld accepts 190 // 191 // ld.lld foo.a bar.a 192 // 193 // even if foo.a and bar.a depend on each other. With --warn-backrefs, it is 194 // handled as an error. 195 // 196 // Here is how the option works. We assign a group ID to each file. A file 197 // with a smaller group ID can pull out object files from an archive file 198 // with an equal or greater group ID. Otherwise, it is a reverse dependency 199 // and an error. 200 // 201 // A file outside --{start,end}-group gets a fresh ID when instantiated. All 202 // files within the same --{start,end}-group get the same group ID. E.g. 203 // 204 // ld.lld A B --start-group C D --end-group E 205 // 206 // A forms group 0. B form group 1. C and D (including their member object 207 // files) form group 2. E forms group 3. I think that you can see how this 208 // group assignment rule simulates the traditional linker's semantics. 209 bool Backref = Config->WarnBackrefs && New.File && 210 Old->File->GroupId < New.File->GroupId; 211 Symtab->fetchLazy(Old); 212 213 // We don't report backward references to weak symbols as they can be 214 // overridden later. 215 if (Backref && !Old->isWeak()) 216 warn("backward reference detected: " + New.getName() + " in " + 217 toString(New.File) + " refers to " + toString(Old->File)); 218 } 219 } 220 221 // Using .symver foo,foo@@VER unfortunately creates two symbols: foo and 222 // foo@@VER. We want to effectively ignore foo, so give precedence to 223 // foo@@VER. 224 // FIXME: If users can transition to using 225 // .symver foo,foo@@@VER 226 // we can delete this hack. 227 static int compareVersion(StringRef OldName, StringRef NewName) { 228 bool A = OldName.contains("@@"); 229 bool B = NewName.contains("@@"); 230 if (!A && B) 231 return 1; 232 if (A && !B) 233 return -1; 234 return 0; 235 } 236 237 // Compare two symbols. Return 1 if the new symbol should win, -1 if 238 // the new symbol should lose, or 0 if there is a conflict. 239 static int compare(const Symbol *Old, const Symbol *New) { 240 assert(New->isDefined() || New->isCommon()); 241 242 if (!Old->isDefined() && !Old->isCommon()) 243 return 1; 244 245 if (int Cmp = compareVersion(Old->getName(), New->getName())) 246 return Cmp; 247 248 if (New->isWeak()) 249 return -1; 250 251 if (Old->isWeak()) 252 return 1; 253 254 if (Old->isCommon() && New->isCommon()) { 255 if (Config->WarnCommon) 256 warn("multiple common of " + Old->getName()); 257 return 0; 258 } 259 260 if (Old->isCommon()) { 261 if (Config->WarnCommon) 262 warn("common " + Old->getName() + " is overridden"); 263 return 1; 264 } 265 266 if (New->isCommon()) { 267 if (Config->WarnCommon) 268 warn("common " + Old->getName() + " is overridden"); 269 return -1; 270 } 271 272 auto *OldSym = cast<Defined>(Old); 273 auto *NewSym = cast<Defined>(New); 274 275 if (New->File && isa<BitcodeFile>(New->File)) 276 return 0; 277 278 if (!OldSym->Section && !NewSym->Section && OldSym->Value == NewSym->Value && 279 NewSym->Binding == STB_GLOBAL) 280 return -1; 281 282 return 0; 283 } 284 285 static void addCommon(Symbol *Old, const CommonSymbol &New) { 286 int Cmp = compare(Old, &New); 287 if (Cmp < 0) 288 return; 289 290 if (Cmp > 0) { 291 Old->replace(New); 292 return; 293 } 294 295 CommonSymbol *OldSym = cast<CommonSymbol>(Old); 296 297 OldSym->Alignment = std::max(OldSym->Alignment, New.Alignment); 298 if (OldSym->Size < New.Size) { 299 OldSym->File = New.File; 300 OldSym->Size = New.Size; 301 } 302 } 303 304 static void reportDuplicate(Symbol *Sym, InputFile *NewFile, 305 InputSectionBase *ErrSec, uint64_t ErrOffset) { 306 if (Config->AllowMultipleDefinition) 307 return; 308 309 Defined *D = cast<Defined>(Sym); 310 if (!D->Section || !ErrSec) { 311 error("duplicate symbol: " + toString(*Sym) + "\n>>> defined in " + 312 toString(Sym->File) + "\n>>> defined in " + toString(NewFile)); 313 return; 314 } 315 316 // Construct and print an error message in the form of: 317 // 318 // ld.lld: error: duplicate symbol: foo 319 // >>> defined at bar.c:30 320 // >>> bar.o (/home/alice/src/bar.o) 321 // >>> defined at baz.c:563 322 // >>> baz.o in archive libbaz.a 323 auto *Sec1 = cast<InputSectionBase>(D->Section); 324 std::string Src1 = Sec1->getSrcMsg(*Sym, D->Value); 325 std::string Obj1 = Sec1->getObjMsg(D->Value); 326 std::string Src2 = ErrSec->getSrcMsg(*Sym, ErrOffset); 327 std::string Obj2 = ErrSec->getObjMsg(ErrOffset); 328 329 std::string Msg = "duplicate symbol: " + toString(*Sym) + "\n>>> defined at "; 330 if (!Src1.empty()) 331 Msg += Src1 + "\n>>> "; 332 Msg += Obj1 + "\n>>> defined at "; 333 if (!Src2.empty()) 334 Msg += Src2 + "\n>>> "; 335 Msg += Obj2; 336 error(Msg); 337 } 338 339 static void addDefined(Symbol *Old, const Defined &New) { 340 int Cmp = compare(Old, &New); 341 if (Cmp > 0) 342 Old->replace(New); 343 else if (Cmp == 0) 344 reportDuplicate(Old, New.File, 345 dyn_cast_or_null<InputSectionBase>(New.Section), New.Value); 346 } 347 348 static void addShared(Symbol *Old, const SharedSymbol &New) { 349 if (Old->Visibility == STV_DEFAULT && (Old->isUndefined() || Old->isLazy())) { 350 // An undefined symbol with non default visibility must be satisfied 351 // in the same DSO. 352 uint8_t Binding = Old->Binding; 353 Old->replace(New); 354 Old->Binding = Binding; 355 } 356 } 357 358 Symbol *SymbolTable::find(StringRef Name) { 359 auto It = SymMap.find(CachedHashStringRef(Name)); 360 if (It == SymMap.end()) 361 return nullptr; 362 if (It->second == -1) 363 return nullptr; 364 return SymVector[It->second]; 365 } 366 367 template <class LazyT> static void addLazy(Symbol *Old, const LazyT &New) { 368 if (!Old->isUndefined()) 369 return; 370 371 // An undefined weak will not fetch archive members. See comment on Lazy in 372 // Symbols.h for the details. 373 if (Old->isWeak()) { 374 uint8_t Type = Old->Type; 375 Old->replace(New); 376 Old->Type = Type; 377 Old->Binding = STB_WEAK; 378 return; 379 } 380 381 if (InputFile *F = New.fetch()) 382 parseFile(F); 383 } 384 385 static void addLazyArchive(Symbol *Old, const LazyArchive &New) { 386 addLazy(Old, New); 387 } 388 389 static void addLazyObject(Symbol *Old, const LazyObject &New) { 390 addLazy(Old, New); 391 } 392 393 void SymbolTable::fetchLazy(Symbol *Sym) { 394 if (auto *S = dyn_cast<LazyArchive>(Sym)) { 395 if (InputFile *File = S->fetch()) 396 parseFile(File); 397 return; 398 } 399 400 auto *S = cast<LazyObject>(Sym); 401 if (InputFile *File = cast<LazyObjFile>(S->File)->fetch()) 402 parseFile(File); 403 } 404 405 // Initialize DemangledSyms with a map from demangled symbols to symbol 406 // objects. Used to handle "extern C++" directive in version scripts. 407 // 408 // The map will contain all demangled symbols. That can be very large, 409 // and in LLD we generally want to avoid do anything for each symbol. 410 // Then, why are we doing this? Here's why. 411 // 412 // Users can use "extern C++ {}" directive to match against demangled 413 // C++ symbols. For example, you can write a pattern such as 414 // "llvm::*::foo(int, ?)". Obviously, there's no way to handle this 415 // other than trying to match a pattern against all demangled symbols. 416 // So, if "extern C++" feature is used, we need to demangle all known 417 // symbols. 418 StringMap<std::vector<Symbol *>> &SymbolTable::getDemangledSyms() { 419 if (!DemangledSyms) { 420 DemangledSyms.emplace(); 421 for (Symbol *Sym : SymVector) { 422 if (!Sym->isDefined() && !Sym->isCommon()) 423 continue; 424 if (Optional<std::string> S = demangleItanium(Sym->getName())) 425 (*DemangledSyms)[*S].push_back(Sym); 426 else 427 (*DemangledSyms)[Sym->getName()].push_back(Sym); 428 } 429 } 430 return *DemangledSyms; 431 } 432 433 std::vector<Symbol *> SymbolTable::findByVersion(SymbolVersion Ver) { 434 if (Ver.IsExternCpp) 435 return getDemangledSyms().lookup(Ver.Name); 436 if (Symbol *B = find(Ver.Name)) 437 if (B->isDefined() || B->isCommon()) 438 return {B}; 439 return {}; 440 } 441 442 std::vector<Symbol *> SymbolTable::findAllByVersion(SymbolVersion Ver) { 443 std::vector<Symbol *> Res; 444 StringMatcher M(Ver.Name); 445 446 if (Ver.IsExternCpp) { 447 for (auto &P : getDemangledSyms()) 448 if (M.match(P.first())) 449 Res.insert(Res.end(), P.second.begin(), P.second.end()); 450 return Res; 451 } 452 453 for (Symbol *Sym : SymVector) 454 if ((Sym->isDefined() || Sym->isCommon()) && M.match(Sym->getName())) 455 Res.push_back(Sym); 456 return Res; 457 } 458 459 // If there's only one anonymous version definition in a version 460 // script file, the script does not actually define any symbol version, 461 // but just specifies symbols visibilities. 462 void SymbolTable::handleAnonymousVersion() { 463 for (SymbolVersion &Ver : Config->VersionScriptGlobals) 464 assignExactVersion(Ver, VER_NDX_GLOBAL, "global"); 465 for (SymbolVersion &Ver : Config->VersionScriptGlobals) 466 assignWildcardVersion(Ver, VER_NDX_GLOBAL); 467 for (SymbolVersion &Ver : Config->VersionScriptLocals) 468 assignExactVersion(Ver, VER_NDX_LOCAL, "local"); 469 for (SymbolVersion &Ver : Config->VersionScriptLocals) 470 assignWildcardVersion(Ver, VER_NDX_LOCAL); 471 } 472 473 // Handles -dynamic-list. 474 void SymbolTable::handleDynamicList() { 475 for (SymbolVersion &Ver : Config->DynamicList) { 476 std::vector<Symbol *> Syms; 477 if (Ver.HasWildcard) 478 Syms = findAllByVersion(Ver); 479 else 480 Syms = findByVersion(Ver); 481 482 for (Symbol *B : Syms) { 483 if (!Config->Shared) 484 B->ExportDynamic = true; 485 else if (B->includeInDynsym()) 486 B->IsPreemptible = true; 487 } 488 } 489 } 490 491 // Set symbol versions to symbols. This function handles patterns 492 // containing no wildcard characters. 493 void SymbolTable::assignExactVersion(SymbolVersion Ver, uint16_t VersionId, 494 StringRef VersionName) { 495 if (Ver.HasWildcard) 496 return; 497 498 // Get a list of symbols which we need to assign the version to. 499 std::vector<Symbol *> Syms = findByVersion(Ver); 500 if (Syms.empty()) { 501 if (!Config->UndefinedVersion) 502 error("version script assignment of '" + VersionName + "' to symbol '" + 503 Ver.Name + "' failed: symbol not defined"); 504 return; 505 } 506 507 // Assign the version. 508 for (Symbol *Sym : Syms) { 509 // Skip symbols containing version info because symbol versions 510 // specified by symbol names take precedence over version scripts. 511 // See parseSymbolVersion(). 512 if (Sym->getName().contains('@')) 513 continue; 514 515 if (Sym->VersionId != Config->DefaultSymbolVersion && 516 Sym->VersionId != VersionId) 517 error("duplicate symbol '" + Ver.Name + "' in version script"); 518 Sym->VersionId = VersionId; 519 } 520 } 521 522 void SymbolTable::assignWildcardVersion(SymbolVersion Ver, uint16_t VersionId) { 523 if (!Ver.HasWildcard) 524 return; 525 526 // Exact matching takes precendence over fuzzy matching, 527 // so we set a version to a symbol only if no version has been assigned 528 // to the symbol. This behavior is compatible with GNU. 529 for (Symbol *B : findAllByVersion(Ver)) 530 if (B->VersionId == Config->DefaultSymbolVersion) 531 B->VersionId = VersionId; 532 } 533 534 // This function processes version scripts by updating VersionId 535 // member of symbols. 536 void SymbolTable::scanVersionScript() { 537 // Handle edge cases first. 538 handleAnonymousVersion(); 539 handleDynamicList(); 540 541 // Now we have version definitions, so we need to set version ids to symbols. 542 // Each version definition has a glob pattern, and all symbols that match 543 // with the pattern get that version. 544 545 // First, we assign versions to exact matching symbols, 546 // i.e. version definitions not containing any glob meta-characters. 547 for (VersionDefinition &V : Config->VersionDefinitions) 548 for (SymbolVersion &Ver : V.Globals) 549 assignExactVersion(Ver, V.Id, V.Name); 550 551 // Next, we assign versions to fuzzy matching symbols, 552 // i.e. version definitions containing glob meta-characters. 553 // Note that because the last match takes precedence over previous matches, 554 // we iterate over the definitions in the reverse order. 555 for (VersionDefinition &V : llvm::reverse(Config->VersionDefinitions)) 556 for (SymbolVersion &Ver : V.Globals) 557 assignWildcardVersion(Ver, V.Id); 558 559 // Symbol themselves might know their versions because symbols 560 // can contain versions in the form of <name>@<version>. 561 // Let them parse and update their names to exclude version suffix. 562 for (Symbol *Sym : SymVector) 563 Sym->parseSymbolVersion(); 564 } 565 566 // Merge symbol properties. 567 // 568 // When we have many symbols of the same name, we choose one of them, 569 // and that's the result of symbol resolution. However, symbols that 570 // were not chosen still affect some symbol properties. 571 void elf::mergeSymbolProperties(Symbol *Old, const Symbol &New) { 572 // Merge symbol properties. 573 Old->ExportDynamic = Old->ExportDynamic || New.ExportDynamic; 574 Old->IsUsedInRegularObj = Old->IsUsedInRegularObj || New.IsUsedInRegularObj; 575 576 // DSO symbols do not affect visibility in the output. 577 if (!New.isShared()) 578 Old->Visibility = getMinVisibility(Old->Visibility, New.Visibility); 579 } 580 581 void elf::resolveSymbol(Symbol *Old, const Symbol &New) { 582 mergeSymbolProperties(Old, New); 583 584 if (Old->isPlaceholder()) { 585 Old->replace(New); 586 return; 587 } 588 589 switch (New.kind()) { 590 case Symbol::UndefinedKind: 591 addUndefined(Old, cast<Undefined>(New)); 592 break; 593 case Symbol::CommonKind: 594 addCommon(Old, cast<CommonSymbol>(New)); 595 break; 596 case Symbol::DefinedKind: 597 addDefined(Old, cast<Defined>(New)); 598 break; 599 case Symbol::LazyArchiveKind: 600 addLazyArchive(Old, cast<LazyArchive>(New)); 601 break; 602 case Symbol::LazyObjectKind: 603 addLazyObject(Old, cast<LazyObject>(New)); 604 break; 605 case Symbol::SharedKind: 606 addShared(Old, cast<SharedSymbol>(New)); 607 break; 608 case Symbol::PlaceholderKind: 609 llvm_unreachable("bad symbol kind"); 610 } 611 } 612 613 template void SymbolTable::addCombinedLTOObject<ELF32LE>(); 614 template void SymbolTable::addCombinedLTOObject<ELF32BE>(); 615 template void SymbolTable::addCombinedLTOObject<ELF64LE>(); 616 template void SymbolTable::addCombinedLTOObject<ELF64BE>(); 617