1 //===- SymbolTable.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "SymbolTable.h" 10 #include "Config.h" 11 #include "Driver.h" 12 #include "LTO.h" 13 #include "PDB.h" 14 #include "Symbols.h" 15 #include "lld/Common/ErrorHandler.h" 16 #include "lld/Common/Memory.h" 17 #include "lld/Common/Timer.h" 18 #include "llvm/IR/LLVMContext.h" 19 #include "llvm/Object/WindowsMachineFlag.h" 20 #include "llvm/Support/Debug.h" 21 #include "llvm/Support/raw_ostream.h" 22 #include <utility> 23 24 using namespace llvm; 25 26 namespace lld { 27 namespace coff { 28 29 static Timer LTOTimer("LTO", Timer::root()); 30 31 SymbolTable *Symtab; 32 33 void SymbolTable::addFile(InputFile *File) { 34 log("Reading " + toString(File)); 35 File->parse(); 36 37 MachineTypes MT = File->getMachineType(); 38 if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) { 39 Config->Machine = MT; 40 } else if (MT != IMAGE_FILE_MACHINE_UNKNOWN && Config->Machine != MT) { 41 error(toString(File) + ": machine type " + machineToStr(MT) + 42 " conflicts with " + machineToStr(Config->Machine)); 43 return; 44 } 45 46 if (auto *F = dyn_cast<ObjFile>(File)) { 47 ObjFile::Instances.push_back(F); 48 } else if (auto *F = dyn_cast<BitcodeFile>(File)) { 49 BitcodeFile::Instances.push_back(F); 50 } else if (auto *F = dyn_cast<ImportFile>(File)) { 51 ImportFile::Instances.push_back(F); 52 } 53 54 Driver->parseDirectives(File); 55 } 56 57 static void errorOrWarn(const Twine &S) { 58 if (Config->ForceUnresolved) 59 warn(S); 60 else 61 error(S); 62 } 63 64 // Returns the symbol in SC whose value is <= Addr that is closest to Addr. 65 // This is generally the global variable or function whose definition contains 66 // Addr. 67 static Symbol *getSymbol(SectionChunk *SC, uint32_t Addr) { 68 DefinedRegular *Candidate = nullptr; 69 70 for (Symbol *S : SC->File->getSymbols()) { 71 auto *D = dyn_cast_or_null<DefinedRegular>(S); 72 if (!D || D->getChunk() != SC || D->getValue() > Addr || 73 (Candidate && D->getValue() < Candidate->getValue())) 74 continue; 75 76 Candidate = D; 77 } 78 79 return Candidate; 80 } 81 82 // Given a file and the index of a symbol in that file, returns a description 83 // of all references to that symbol from that file. If no debug information is 84 // available, returns just the name of the file, else one string per actual 85 // reference as described in the debug info. 86 std::vector<std::string> getSymbolLocations(ObjFile *File, uint32_t SymIndex) { 87 struct Location { 88 Symbol *Sym; 89 std::pair<StringRef, uint32_t> FileLine; 90 }; 91 std::vector<Location> Locations; 92 93 for (Chunk *C : File->getChunks()) { 94 auto *SC = dyn_cast<SectionChunk>(C); 95 if (!SC) 96 continue; 97 for (const coff_relocation &R : SC->getRelocs()) { 98 if (R.SymbolTableIndex != SymIndex) 99 continue; 100 std::pair<StringRef, uint32_t> FileLine = 101 getFileLine(SC, R.VirtualAddress); 102 Symbol *Sym = getSymbol(SC, R.VirtualAddress); 103 if (!FileLine.first.empty() || Sym) 104 Locations.push_back({Sym, FileLine}); 105 } 106 } 107 108 if (Locations.empty()) 109 return std::vector<std::string>({"\n>>> referenced by " + toString(File)}); 110 111 std::vector<std::string> SymbolLocations(Locations.size()); 112 size_t I = 0; 113 for (Location Loc : Locations) { 114 llvm::raw_string_ostream OS(SymbolLocations[I++]); 115 OS << "\n>>> referenced by "; 116 if (!Loc.FileLine.first.empty()) 117 OS << Loc.FileLine.first << ":" << Loc.FileLine.second 118 << "\n>>> "; 119 OS << toString(File); 120 if (Loc.Sym) 121 OS << ":(" << toString(*Loc.Sym) << ')'; 122 } 123 return SymbolLocations; 124 } 125 126 // For an undefined symbol, stores all files referencing it and the index of 127 // the undefined symbol in each file. 128 struct UndefinedDiag { 129 Symbol *Sym; 130 struct File { 131 ObjFile *OFile; 132 uint64_t SymIndex; 133 }; 134 std::vector<File> Files; 135 }; 136 137 static void reportUndefinedSymbol(const UndefinedDiag &UndefDiag) { 138 std::string Out; 139 llvm::raw_string_ostream OS(Out); 140 OS << "undefined symbol: " << toString(*UndefDiag.Sym); 141 142 const size_t MaxUndefReferences = 10; 143 size_t I = 0, NumRefs = 0; 144 for (const UndefinedDiag::File &Ref : UndefDiag.Files) { 145 std::vector<std::string> SymbolLocations = 146 getSymbolLocations(Ref.OFile, Ref.SymIndex); 147 NumRefs += SymbolLocations.size(); 148 for (const std::string &S : SymbolLocations) { 149 if (I >= MaxUndefReferences) 150 break; 151 OS << S; 152 I++; 153 } 154 } 155 if (I < NumRefs) 156 OS << "\n>>> referenced " << NumRefs - I << " more times"; 157 errorOrWarn(OS.str()); 158 } 159 160 void SymbolTable::loadMinGWAutomaticImports() { 161 for (auto &I : SymMap) { 162 Symbol *Sym = I.second; 163 auto *Undef = dyn_cast<Undefined>(Sym); 164 if (!Undef) 165 continue; 166 if (!Sym->IsUsedInRegularObj) 167 continue; 168 169 StringRef Name = Undef->getName(); 170 171 if (Name.startswith("__imp_")) 172 continue; 173 // If we have an undefined symbol, but we have a Lazy representing a 174 // symbol we could load from file, make sure to load that. 175 Lazy *L = dyn_cast_or_null<Lazy>(find(("__imp_" + Name).str())); 176 if (!L || L->PendingArchiveLoad) 177 continue; 178 179 log("Loading lazy " + L->getName() + " from " + L->File->getName() + 180 " for automatic import"); 181 L->PendingArchiveLoad = true; 182 L->File->addMember(&L->Sym); 183 } 184 } 185 186 bool SymbolTable::handleMinGWAutomaticImport(Symbol *Sym, StringRef Name) { 187 if (Name.startswith("__imp_")) 188 return false; 189 Defined *Imp = dyn_cast_or_null<Defined>(find(("__imp_" + Name).str())); 190 if (!Imp) 191 return false; 192 193 // Replace the reference directly to a variable with a reference 194 // to the import address table instead. This obviously isn't right, 195 // but we mark the symbol as IsRuntimePseudoReloc, and a later pass 196 // will add runtime pseudo relocations for every relocation against 197 // this Symbol. The runtime pseudo relocation framework expects the 198 // reference itself to point at the IAT entry. 199 size_t ImpSize = 0; 200 if (isa<DefinedImportData>(Imp)) { 201 log("Automatically importing " + Name + " from " + 202 cast<DefinedImportData>(Imp)->getDLLName()); 203 ImpSize = sizeof(DefinedImportData); 204 } else if (isa<DefinedRegular>(Imp)) { 205 log("Automatically importing " + Name + " from " + 206 toString(cast<DefinedRegular>(Imp)->File)); 207 ImpSize = sizeof(DefinedRegular); 208 } else { 209 warn("unable to automatically import " + Name + " from " + Imp->getName() + 210 " from " + toString(cast<DefinedRegular>(Imp)->File) + 211 "; unexpected symbol type"); 212 return false; 213 } 214 Sym->replaceKeepingName(Imp, ImpSize); 215 Sym->IsRuntimePseudoReloc = true; 216 217 // There may exist symbols named .refptr.<name> which only consist 218 // of a single pointer to <name>. If it turns out <name> is 219 // automatically imported, we don't need to keep the .refptr.<name> 220 // pointer at all, but redirect all accesses to it to the IAT entry 221 // for __imp_<name> instead, and drop the whole .refptr.<name> chunk. 222 DefinedRegular *Refptr = 223 dyn_cast_or_null<DefinedRegular>(find((".refptr." + Name).str())); 224 if (Refptr && Refptr->getChunk()->getSize() == Config->Wordsize) { 225 SectionChunk *SC = dyn_cast_or_null<SectionChunk>(Refptr->getChunk()); 226 if (SC && SC->getRelocs().size() == 1 && *SC->symbols().begin() == Sym) { 227 log("Replacing .refptr." + Name + " with " + Imp->getName()); 228 Refptr->getChunk()->Live = false; 229 Refptr->replaceKeepingName(Imp, ImpSize); 230 } 231 } 232 return true; 233 } 234 235 void SymbolTable::reportRemainingUndefines() { 236 SmallPtrSet<Symbol *, 8> Undefs; 237 DenseMap<Symbol *, Symbol *> LocalImports; 238 239 for (auto &I : SymMap) { 240 Symbol *Sym = I.second; 241 auto *Undef = dyn_cast<Undefined>(Sym); 242 if (!Undef) 243 continue; 244 if (!Sym->IsUsedInRegularObj) 245 continue; 246 247 StringRef Name = Undef->getName(); 248 249 // A weak alias may have been resolved, so check for that. 250 if (Defined *D = Undef->getWeakAlias()) { 251 // We want to replace Sym with D. However, we can't just blindly 252 // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an 253 // internal symbol, and internal symbols are stored as "unparented" 254 // Symbols. For that reason we need to check which type of symbol we 255 // are dealing with and copy the correct number of bytes. 256 if (isa<DefinedRegular>(D)) 257 memcpy(Sym, D, sizeof(DefinedRegular)); 258 else if (isa<DefinedAbsolute>(D)) 259 memcpy(Sym, D, sizeof(DefinedAbsolute)); 260 else 261 memcpy(Sym, D, sizeof(SymbolUnion)); 262 continue; 263 } 264 265 // If we can resolve a symbol by removing __imp_ prefix, do that. 266 // This odd rule is for compatibility with MSVC linker. 267 if (Name.startswith("__imp_")) { 268 Symbol *Imp = find(Name.substr(strlen("__imp_"))); 269 if (Imp && isa<Defined>(Imp)) { 270 auto *D = cast<Defined>(Imp); 271 replaceSymbol<DefinedLocalImport>(Sym, Name, D); 272 LocalImportChunks.push_back(cast<DefinedLocalImport>(Sym)->getChunk()); 273 LocalImports[Sym] = D; 274 continue; 275 } 276 } 277 278 // We don't want to report missing Microsoft precompiled headers symbols. 279 // A proper message will be emitted instead in PDBLinker::aquirePrecompObj 280 if (Name.contains("_PchSym_")) 281 continue; 282 283 if (Config->MinGW && handleMinGWAutomaticImport(Sym, Name)) 284 continue; 285 286 // Remaining undefined symbols are not fatal if /force is specified. 287 // They are replaced with dummy defined symbols. 288 if (Config->ForceUnresolved) 289 replaceSymbol<DefinedAbsolute>(Sym, Name, 0); 290 Undefs.insert(Sym); 291 } 292 293 if (Undefs.empty() && LocalImports.empty()) 294 return; 295 296 for (Symbol *B : Config->GCRoot) { 297 if (Undefs.count(B)) 298 errorOrWarn("<root>: undefined symbol: " + toString(*B)); 299 if (Config->WarnLocallyDefinedImported) 300 if (Symbol *Imp = LocalImports.lookup(B)) 301 warn("<root>: locally defined symbol imported: " + toString(*Imp) + 302 " (defined in " + toString(Imp->getFile()) + ") [LNK4217]"); 303 } 304 305 std::vector<UndefinedDiag> UndefDiags; 306 DenseMap<Symbol *, int> FirstDiag; 307 308 for (ObjFile *File : ObjFile::Instances) { 309 size_t SymIndex = (size_t)-1; 310 for (Symbol *Sym : File->getSymbols()) { 311 ++SymIndex; 312 if (!Sym) 313 continue; 314 if (Undefs.count(Sym)) { 315 auto it = FirstDiag.find(Sym); 316 if (it == FirstDiag.end()) { 317 FirstDiag[Sym] = UndefDiags.size(); 318 UndefDiags.push_back({Sym, {{File, SymIndex}}}); 319 } else { 320 UndefDiags[it->second].Files.push_back({File, SymIndex}); 321 } 322 } 323 if (Config->WarnLocallyDefinedImported) 324 if (Symbol *Imp = LocalImports.lookup(Sym)) 325 warn(toString(File) + 326 ": locally defined symbol imported: " + toString(*Imp) + 327 " (defined in " + toString(Imp->getFile()) + ") [LNK4217]"); 328 } 329 } 330 331 for (const UndefinedDiag& UndefDiag : UndefDiags) 332 reportUndefinedSymbol(UndefDiag); 333 } 334 335 std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name) { 336 bool Inserted = false; 337 Symbol *&Sym = SymMap[CachedHashStringRef(Name)]; 338 if (!Sym) { 339 Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); 340 Sym->IsUsedInRegularObj = false; 341 Sym->PendingArchiveLoad = false; 342 Inserted = true; 343 } 344 return {Sym, Inserted}; 345 } 346 347 std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name, InputFile *File) { 348 std::pair<Symbol *, bool> Result = insert(Name); 349 if (!File || !isa<BitcodeFile>(File)) 350 Result.first->IsUsedInRegularObj = true; 351 return Result; 352 } 353 354 Symbol *SymbolTable::addUndefined(StringRef Name, InputFile *F, 355 bool IsWeakAlias) { 356 Symbol *S; 357 bool WasInserted; 358 std::tie(S, WasInserted) = insert(Name, F); 359 if (WasInserted || (isa<Lazy>(S) && IsWeakAlias)) { 360 replaceSymbol<Undefined>(S, Name); 361 return S; 362 } 363 if (auto *L = dyn_cast<Lazy>(S)) { 364 if (!S->PendingArchiveLoad) { 365 S->PendingArchiveLoad = true; 366 L->File->addMember(&L->Sym); 367 } 368 } 369 return S; 370 } 371 372 void SymbolTable::addLazy(ArchiveFile *F, const Archive::Symbol Sym) { 373 StringRef Name = Sym.getName(); 374 Symbol *S; 375 bool WasInserted; 376 std::tie(S, WasInserted) = insert(Name); 377 if (WasInserted) { 378 replaceSymbol<Lazy>(S, F, Sym); 379 return; 380 } 381 auto *U = dyn_cast<Undefined>(S); 382 if (!U || U->WeakAlias || S->PendingArchiveLoad) 383 return; 384 S->PendingArchiveLoad = true; 385 F->addMember(&Sym); 386 } 387 388 void SymbolTable::reportDuplicate(Symbol *Existing, InputFile *NewFile) { 389 std::string Msg = "duplicate symbol: " + toString(*Existing) + " in " + 390 toString(Existing->getFile()) + " and in " + 391 toString(NewFile); 392 393 if (Config->ForceMultiple) 394 warn(Msg); 395 else 396 error(Msg); 397 } 398 399 Symbol *SymbolTable::addAbsolute(StringRef N, COFFSymbolRef Sym) { 400 Symbol *S; 401 bool WasInserted; 402 std::tie(S, WasInserted) = insert(N, nullptr); 403 S->IsUsedInRegularObj = true; 404 if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) 405 replaceSymbol<DefinedAbsolute>(S, N, Sym); 406 else if (!isa<DefinedCOFF>(S)) 407 reportDuplicate(S, nullptr); 408 return S; 409 } 410 411 Symbol *SymbolTable::addAbsolute(StringRef N, uint64_t VA) { 412 Symbol *S; 413 bool WasInserted; 414 std::tie(S, WasInserted) = insert(N, nullptr); 415 S->IsUsedInRegularObj = true; 416 if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) 417 replaceSymbol<DefinedAbsolute>(S, N, VA); 418 else if (!isa<DefinedCOFF>(S)) 419 reportDuplicate(S, nullptr); 420 return S; 421 } 422 423 Symbol *SymbolTable::addSynthetic(StringRef N, Chunk *C) { 424 Symbol *S; 425 bool WasInserted; 426 std::tie(S, WasInserted) = insert(N, nullptr); 427 S->IsUsedInRegularObj = true; 428 if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) 429 replaceSymbol<DefinedSynthetic>(S, N, C); 430 else if (!isa<DefinedCOFF>(S)) 431 reportDuplicate(S, nullptr); 432 return S; 433 } 434 435 Symbol *SymbolTable::addRegular(InputFile *F, StringRef N, 436 const coff_symbol_generic *Sym, 437 SectionChunk *C) { 438 Symbol *S; 439 bool WasInserted; 440 std::tie(S, WasInserted) = insert(N, F); 441 if (WasInserted || !isa<DefinedRegular>(S)) 442 replaceSymbol<DefinedRegular>(S, F, N, /*IsCOMDAT*/ false, 443 /*IsExternal*/ true, Sym, C); 444 else 445 reportDuplicate(S, F); 446 return S; 447 } 448 449 std::pair<DefinedRegular *, bool> 450 SymbolTable::addComdat(InputFile *F, StringRef N, 451 const coff_symbol_generic *Sym) { 452 Symbol *S; 453 bool WasInserted; 454 std::tie(S, WasInserted) = insert(N, F); 455 if (WasInserted || !isa<DefinedRegular>(S)) { 456 replaceSymbol<DefinedRegular>(S, F, N, /*IsCOMDAT*/ true, 457 /*IsExternal*/ true, Sym, nullptr); 458 return {cast<DefinedRegular>(S), true}; 459 } 460 auto *ExistingSymbol = cast<DefinedRegular>(S); 461 if (!ExistingSymbol->isCOMDAT()) 462 reportDuplicate(S, F); 463 return {ExistingSymbol, false}; 464 } 465 466 Symbol *SymbolTable::addCommon(InputFile *F, StringRef N, uint64_t Size, 467 const coff_symbol_generic *Sym, CommonChunk *C) { 468 Symbol *S; 469 bool WasInserted; 470 std::tie(S, WasInserted) = insert(N, F); 471 if (WasInserted || !isa<DefinedCOFF>(S)) 472 replaceSymbol<DefinedCommon>(S, F, N, Size, Sym, C); 473 else if (auto *DC = dyn_cast<DefinedCommon>(S)) 474 if (Size > DC->getSize()) 475 replaceSymbol<DefinedCommon>(S, F, N, Size, Sym, C); 476 return S; 477 } 478 479 Symbol *SymbolTable::addImportData(StringRef N, ImportFile *F) { 480 Symbol *S; 481 bool WasInserted; 482 std::tie(S, WasInserted) = insert(N, nullptr); 483 S->IsUsedInRegularObj = true; 484 if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) { 485 replaceSymbol<DefinedImportData>(S, N, F); 486 return S; 487 } 488 489 reportDuplicate(S, F); 490 return nullptr; 491 } 492 493 Symbol *SymbolTable::addImportThunk(StringRef Name, DefinedImportData *ID, 494 uint16_t Machine) { 495 Symbol *S; 496 bool WasInserted; 497 std::tie(S, WasInserted) = insert(Name, nullptr); 498 S->IsUsedInRegularObj = true; 499 if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) { 500 replaceSymbol<DefinedImportThunk>(S, Name, ID, Machine); 501 return S; 502 } 503 504 reportDuplicate(S, ID->File); 505 return nullptr; 506 } 507 508 std::vector<Chunk *> SymbolTable::getChunks() { 509 std::vector<Chunk *> Res; 510 for (ObjFile *File : ObjFile::Instances) { 511 ArrayRef<Chunk *> V = File->getChunks(); 512 Res.insert(Res.end(), V.begin(), V.end()); 513 } 514 return Res; 515 } 516 517 Symbol *SymbolTable::find(StringRef Name) { 518 return SymMap.lookup(CachedHashStringRef(Name)); 519 } 520 521 Symbol *SymbolTable::findUnderscore(StringRef Name) { 522 if (Config->Machine == I386) 523 return find(("_" + Name).str()); 524 return find(Name); 525 } 526 527 // Return all symbols that start with Prefix, possibly ignoring the first 528 // character of Prefix or the first character symbol. 529 std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef Prefix) { 530 std::vector<Symbol *> Syms; 531 for (auto Pair : SymMap) { 532 StringRef Name = Pair.first.val(); 533 if (Name.startswith(Prefix) || Name.startswith(Prefix.drop_front()) || 534 Name.drop_front().startswith(Prefix) || 535 Name.drop_front().startswith(Prefix.drop_front())) { 536 Syms.push_back(Pair.second); 537 } 538 } 539 return Syms; 540 } 541 542 Symbol *SymbolTable::findMangle(StringRef Name) { 543 if (Symbol *Sym = find(Name)) 544 if (!isa<Undefined>(Sym)) 545 return Sym; 546 547 // Efficient fuzzy string lookup is impossible with a hash table, so iterate 548 // the symbol table once and collect all possibly matching symbols into this 549 // vector. Then compare each possibly matching symbol with each possible 550 // mangling. 551 std::vector<Symbol *> Syms = getSymsWithPrefix(Name); 552 auto FindByPrefix = [&Syms](const Twine &T) -> Symbol * { 553 std::string Prefix = T.str(); 554 for (auto *S : Syms) 555 if (S->getName().startswith(Prefix)) 556 return S; 557 return nullptr; 558 }; 559 560 // For non-x86, just look for C++ functions. 561 if (Config->Machine != I386) 562 return FindByPrefix("?" + Name + "@@Y"); 563 564 if (!Name.startswith("_")) 565 return nullptr; 566 // Search for x86 stdcall function. 567 if (Symbol *S = FindByPrefix(Name + "@")) 568 return S; 569 // Search for x86 fastcall function. 570 if (Symbol *S = FindByPrefix("@" + Name.substr(1) + "@")) 571 return S; 572 // Search for x86 vectorcall function. 573 if (Symbol *S = FindByPrefix(Name.substr(1) + "@@")) 574 return S; 575 // Search for x86 C++ non-member function. 576 return FindByPrefix("?" + Name.substr(1) + "@@Y"); 577 } 578 579 Symbol *SymbolTable::addUndefined(StringRef Name) { 580 return addUndefined(Name, nullptr, false); 581 } 582 583 std::vector<StringRef> SymbolTable::compileBitcodeFiles() { 584 LTO.reset(new BitcodeCompiler); 585 for (BitcodeFile *F : BitcodeFile::Instances) 586 LTO->add(*F); 587 return LTO->compile(); 588 } 589 590 void SymbolTable::addCombinedLTOObjects() { 591 if (BitcodeFile::Instances.empty()) 592 return; 593 594 ScopedTimer T(LTOTimer); 595 for (StringRef Object : compileBitcodeFiles()) { 596 auto *Obj = make<ObjFile>(MemoryBufferRef(Object, "lto.tmp")); 597 Obj->parse(); 598 ObjFile::Instances.push_back(Obj); 599 } 600 } 601 602 } // namespace coff 603 } // namespace lld 604