1 //===- SymbolTable.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "SymbolTable.h" 10 #include "Config.h" 11 #include "Driver.h" 12 #include "LTO.h" 13 #include "PDB.h" 14 #include "Symbols.h" 15 #include "lld/Common/ErrorHandler.h" 16 #include "lld/Common/Memory.h" 17 #include "lld/Common/Timer.h" 18 #include "llvm/IR/LLVMContext.h" 19 #include "llvm/Object/WindowsMachineFlag.h" 20 #include "llvm/Support/Debug.h" 21 #include "llvm/Support/raw_ostream.h" 22 #include <utility> 23 24 using namespace llvm; 25 26 namespace lld { 27 namespace coff { 28 29 static Timer LTOTimer("LTO", Timer::root()); 30 31 SymbolTable *Symtab; 32 33 void SymbolTable::addFile(InputFile *File) { 34 log("Reading " + toString(File)); 35 File->parse(); 36 37 MachineTypes MT = File->getMachineType(); 38 if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) { 39 Config->Machine = MT; 40 } else if (MT != IMAGE_FILE_MACHINE_UNKNOWN && Config->Machine != MT) { 41 error(toString(File) + ": machine type " + machineToStr(MT) + 42 " conflicts with " + machineToStr(Config->Machine)); 43 return; 44 } 45 46 if (auto *F = dyn_cast<ObjFile>(File)) { 47 ObjFile::Instances.push_back(F); 48 } else if (auto *F = dyn_cast<BitcodeFile>(File)) { 49 BitcodeFile::Instances.push_back(F); 50 } else if (auto *F = dyn_cast<ImportFile>(File)) { 51 ImportFile::Instances.push_back(F); 52 } 53 54 Driver->parseDirectives(File); 55 } 56 57 static void errorOrWarn(const Twine &S) { 58 if (Config->ForceUnresolved) 59 warn(S); 60 else 61 error(S); 62 } 63 64 // Returns the symbol in SC whose value is <= Addr that is closest to Addr. 65 // This is generally the global variable or function whose definition contains 66 // Addr. 67 static Symbol *getSymbol(SectionChunk *SC, uint32_t Addr) { 68 DefinedRegular *Candidate = nullptr; 69 70 for (Symbol *S : SC->File->getSymbols()) { 71 auto *D = dyn_cast_or_null<DefinedRegular>(S); 72 if (!D || D->getChunk() != SC || D->getValue() > Addr || 73 (Candidate && D->getValue() < Candidate->getValue())) 74 continue; 75 76 Candidate = D; 77 } 78 79 return Candidate; 80 } 81 82 std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex) { 83 struct Location { 84 Symbol *Sym; 85 std::pair<StringRef, uint32_t> FileLine; 86 }; 87 std::vector<Location> Locations; 88 89 for (Chunk *C : File->getChunks()) { 90 auto *SC = dyn_cast<SectionChunk>(C); 91 if (!SC) 92 continue; 93 for (const coff_relocation &R : SC->getRelocs()) { 94 if (R.SymbolTableIndex != SymIndex) 95 continue; 96 std::pair<StringRef, uint32_t> FileLine = 97 getFileLine(SC, R.VirtualAddress); 98 Symbol *Sym = getSymbol(SC, R.VirtualAddress); 99 if (!FileLine.first.empty() || Sym) 100 Locations.push_back({Sym, FileLine}); 101 } 102 } 103 104 if (Locations.empty()) 105 return "\n>>> referenced by " + toString(File); 106 107 std::string Out; 108 llvm::raw_string_ostream OS(Out); 109 for (Location Loc : Locations) { 110 OS << "\n>>> referenced by "; 111 if (!Loc.FileLine.first.empty()) 112 OS << Loc.FileLine.first << ":" << Loc.FileLine.second 113 << "\n>>> "; 114 OS << toString(File); 115 if (Loc.Sym) 116 OS << ":(" << toString(*Loc.Sym) << ')'; 117 } 118 return OS.str(); 119 } 120 121 void SymbolTable::loadMinGWAutomaticImports() { 122 for (auto &I : SymMap) { 123 Symbol *Sym = I.second; 124 auto *Undef = dyn_cast<Undefined>(Sym); 125 if (!Undef) 126 continue; 127 if (!Sym->IsUsedInRegularObj) 128 continue; 129 130 StringRef Name = Undef->getName(); 131 132 if (Name.startswith("__imp_")) 133 continue; 134 // If we have an undefined symbol, but we have a Lazy representing a 135 // symbol we could load from file, make sure to load that. 136 Lazy *L = dyn_cast_or_null<Lazy>(find(("__imp_" + Name).str())); 137 if (!L || L->PendingArchiveLoad) 138 continue; 139 140 log("Loading lazy " + L->getName() + " from " + L->File->getName() + 141 " for automatic import"); 142 L->PendingArchiveLoad = true; 143 L->File->addMember(&L->Sym); 144 } 145 } 146 147 bool SymbolTable::handleMinGWAutomaticImport(Symbol *Sym, StringRef Name) { 148 if (Name.startswith("__imp_")) 149 return false; 150 Defined *Imp = dyn_cast_or_null<Defined>(find(("__imp_" + Name).str())); 151 if (!Imp) 152 return false; 153 154 // Replace the reference directly to a variable with a reference 155 // to the import address table instead. This obviously isn't right, 156 // but we mark the symbol as IsRuntimePseudoReloc, and a later pass 157 // will add runtime pseudo relocations for every relocation against 158 // this Symbol. The runtime pseudo relocation framework expects the 159 // reference itself to point at the IAT entry. 160 size_t ImpSize = 0; 161 if (isa<DefinedImportData>(Imp)) { 162 log("Automatically importing " + Name + " from " + 163 cast<DefinedImportData>(Imp)->getDLLName()); 164 ImpSize = sizeof(DefinedImportData); 165 } else if (isa<DefinedRegular>(Imp)) { 166 log("Automatically importing " + Name + " from " + 167 toString(cast<DefinedRegular>(Imp)->File)); 168 ImpSize = sizeof(DefinedRegular); 169 } else { 170 warn("unable to automatically import " + Name + " from " + Imp->getName() + 171 " from " + toString(cast<DefinedRegular>(Imp)->File) + 172 "; unexpected symbol type"); 173 return false; 174 } 175 Sym->replaceKeepingName(Imp, ImpSize); 176 Sym->IsRuntimePseudoReloc = true; 177 178 // There may exist symbols named .refptr.<name> which only consist 179 // of a single pointer to <name>. If it turns out <name> is 180 // automatically imported, we don't need to keep the .refptr.<name> 181 // pointer at all, but redirect all accesses to it to the IAT entry 182 // for __imp_<name> instead, and drop the whole .refptr.<name> chunk. 183 DefinedRegular *Refptr = 184 dyn_cast_or_null<DefinedRegular>(find((".refptr." + Name).str())); 185 if (Refptr && Refptr->getChunk()->getSize() == Config->Wordsize) { 186 SectionChunk *SC = dyn_cast_or_null<SectionChunk>(Refptr->getChunk()); 187 if (SC && SC->getRelocs().size() == 1 && *SC->symbols().begin() == Sym) { 188 log("Replacing .refptr." + Name + " with " + Imp->getName()); 189 Refptr->getChunk()->Live = false; 190 Refptr->replaceKeepingName(Imp, ImpSize); 191 } 192 } 193 return true; 194 } 195 196 void SymbolTable::reportRemainingUndefines() { 197 SmallPtrSet<Symbol *, 8> Undefs; 198 DenseMap<Symbol *, Symbol *> LocalImports; 199 200 for (auto &I : SymMap) { 201 Symbol *Sym = I.second; 202 auto *Undef = dyn_cast<Undefined>(Sym); 203 if (!Undef) 204 continue; 205 if (!Sym->IsUsedInRegularObj) 206 continue; 207 208 StringRef Name = Undef->getName(); 209 210 // A weak alias may have been resolved, so check for that. 211 if (Defined *D = Undef->getWeakAlias()) { 212 // We want to replace Sym with D. However, we can't just blindly 213 // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an 214 // internal symbol, and internal symbols are stored as "unparented" 215 // Symbols. For that reason we need to check which type of symbol we 216 // are dealing with and copy the correct number of bytes. 217 if (isa<DefinedRegular>(D)) 218 memcpy(Sym, D, sizeof(DefinedRegular)); 219 else if (isa<DefinedAbsolute>(D)) 220 memcpy(Sym, D, sizeof(DefinedAbsolute)); 221 else 222 memcpy(Sym, D, sizeof(SymbolUnion)); 223 continue; 224 } 225 226 // If we can resolve a symbol by removing __imp_ prefix, do that. 227 // This odd rule is for compatibility with MSVC linker. 228 if (Name.startswith("__imp_")) { 229 Symbol *Imp = find(Name.substr(strlen("__imp_"))); 230 if (Imp && isa<Defined>(Imp)) { 231 auto *D = cast<Defined>(Imp); 232 replaceSymbol<DefinedLocalImport>(Sym, Name, D); 233 LocalImportChunks.push_back(cast<DefinedLocalImport>(Sym)->getChunk()); 234 LocalImports[Sym] = D; 235 continue; 236 } 237 } 238 239 // We don't want to report missing Microsoft precompiled headers symbols. 240 // A proper message will be emitted instead in PDBLinker::aquirePrecompObj 241 if (Name.contains("_PchSym_")) 242 continue; 243 244 if (Config->MinGW && handleMinGWAutomaticImport(Sym, Name)) 245 continue; 246 247 // Remaining undefined symbols are not fatal if /force is specified. 248 // They are replaced with dummy defined symbols. 249 if (Config->ForceUnresolved) 250 replaceSymbol<DefinedAbsolute>(Sym, Name, 0); 251 Undefs.insert(Sym); 252 } 253 254 if (Undefs.empty() && LocalImports.empty()) 255 return; 256 257 for (Symbol *B : Config->GCRoot) { 258 if (Undefs.count(B)) 259 errorOrWarn("<root>: undefined symbol: " + toString(*B)); 260 if (Config->WarnLocallyDefinedImported) 261 if (Symbol *Imp = LocalImports.lookup(B)) 262 warn("<root>: locally defined symbol imported: " + toString(*Imp) + 263 " (defined in " + toString(Imp->getFile()) + ") [LNK4217]"); 264 } 265 266 for (ObjFile *File : ObjFile::Instances) { 267 size_t SymIndex = (size_t)-1; 268 for (Symbol *Sym : File->getSymbols()) { 269 ++SymIndex; 270 if (!Sym) 271 continue; 272 if (Undefs.count(Sym)) 273 errorOrWarn("undefined symbol: " + toString(*Sym) + 274 getSymbolLocations(File, SymIndex)); 275 if (Config->WarnLocallyDefinedImported) 276 if (Symbol *Imp = LocalImports.lookup(Sym)) 277 warn(toString(File) + 278 ": locally defined symbol imported: " + toString(*Imp) + 279 " (defined in " + toString(Imp->getFile()) + ") [LNK4217]"); 280 } 281 } 282 } 283 284 std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name) { 285 bool Inserted = false; 286 Symbol *&Sym = SymMap[CachedHashStringRef(Name)]; 287 if (!Sym) { 288 Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); 289 Sym->IsUsedInRegularObj = false; 290 Sym->PendingArchiveLoad = false; 291 Inserted = true; 292 } 293 return {Sym, Inserted}; 294 } 295 296 std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name, InputFile *File) { 297 std::pair<Symbol *, bool> Result = insert(Name); 298 if (!File || !isa<BitcodeFile>(File)) 299 Result.first->IsUsedInRegularObj = true; 300 return Result; 301 } 302 303 Symbol *SymbolTable::addUndefined(StringRef Name, InputFile *F, 304 bool IsWeakAlias) { 305 Symbol *S; 306 bool WasInserted; 307 std::tie(S, WasInserted) = insert(Name, F); 308 if (WasInserted || (isa<Lazy>(S) && IsWeakAlias)) { 309 replaceSymbol<Undefined>(S, Name); 310 return S; 311 } 312 if (auto *L = dyn_cast<Lazy>(S)) { 313 if (!S->PendingArchiveLoad) { 314 S->PendingArchiveLoad = true; 315 L->File->addMember(&L->Sym); 316 } 317 } 318 return S; 319 } 320 321 void SymbolTable::addLazy(ArchiveFile *F, const Archive::Symbol Sym) { 322 StringRef Name = Sym.getName(); 323 Symbol *S; 324 bool WasInserted; 325 std::tie(S, WasInserted) = insert(Name); 326 if (WasInserted) { 327 replaceSymbol<Lazy>(S, F, Sym); 328 return; 329 } 330 auto *U = dyn_cast<Undefined>(S); 331 if (!U || U->WeakAlias || S->PendingArchiveLoad) 332 return; 333 S->PendingArchiveLoad = true; 334 F->addMember(&Sym); 335 } 336 337 void SymbolTable::reportDuplicate(Symbol *Existing, InputFile *NewFile) { 338 std::string Msg = "duplicate symbol: " + toString(*Existing) + " in " + 339 toString(Existing->getFile()) + " and in " + 340 toString(NewFile); 341 342 if (Config->ForceMultiple) 343 warn(Msg); 344 else 345 error(Msg); 346 } 347 348 Symbol *SymbolTable::addAbsolute(StringRef N, COFFSymbolRef Sym) { 349 Symbol *S; 350 bool WasInserted; 351 std::tie(S, WasInserted) = insert(N, nullptr); 352 S->IsUsedInRegularObj = true; 353 if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) 354 replaceSymbol<DefinedAbsolute>(S, N, Sym); 355 else if (!isa<DefinedCOFF>(S)) 356 reportDuplicate(S, nullptr); 357 return S; 358 } 359 360 Symbol *SymbolTable::addAbsolute(StringRef N, uint64_t VA) { 361 Symbol *S; 362 bool WasInserted; 363 std::tie(S, WasInserted) = insert(N, nullptr); 364 S->IsUsedInRegularObj = true; 365 if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) 366 replaceSymbol<DefinedAbsolute>(S, N, VA); 367 else if (!isa<DefinedCOFF>(S)) 368 reportDuplicate(S, nullptr); 369 return S; 370 } 371 372 Symbol *SymbolTable::addSynthetic(StringRef N, Chunk *C) { 373 Symbol *S; 374 bool WasInserted; 375 std::tie(S, WasInserted) = insert(N, nullptr); 376 S->IsUsedInRegularObj = true; 377 if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) 378 replaceSymbol<DefinedSynthetic>(S, N, C); 379 else if (!isa<DefinedCOFF>(S)) 380 reportDuplicate(S, nullptr); 381 return S; 382 } 383 384 Symbol *SymbolTable::addRegular(InputFile *F, StringRef N, 385 const coff_symbol_generic *Sym, 386 SectionChunk *C) { 387 Symbol *S; 388 bool WasInserted; 389 std::tie(S, WasInserted) = insert(N, F); 390 if (WasInserted || !isa<DefinedRegular>(S)) 391 replaceSymbol<DefinedRegular>(S, F, N, /*IsCOMDAT*/ false, 392 /*IsExternal*/ true, Sym, C); 393 else 394 reportDuplicate(S, F); 395 return S; 396 } 397 398 std::pair<DefinedRegular *, bool> 399 SymbolTable::addComdat(InputFile *F, StringRef N, 400 const coff_symbol_generic *Sym) { 401 Symbol *S; 402 bool WasInserted; 403 std::tie(S, WasInserted) = insert(N, F); 404 if (WasInserted || !isa<DefinedRegular>(S)) { 405 replaceSymbol<DefinedRegular>(S, F, N, /*IsCOMDAT*/ true, 406 /*IsExternal*/ true, Sym, nullptr); 407 return {cast<DefinedRegular>(S), true}; 408 } 409 auto *ExistingSymbol = cast<DefinedRegular>(S); 410 if (!ExistingSymbol->isCOMDAT()) 411 reportDuplicate(S, F); 412 return {ExistingSymbol, false}; 413 } 414 415 Symbol *SymbolTable::addCommon(InputFile *F, StringRef N, uint64_t Size, 416 const coff_symbol_generic *Sym, CommonChunk *C) { 417 Symbol *S; 418 bool WasInserted; 419 std::tie(S, WasInserted) = insert(N, F); 420 if (WasInserted || !isa<DefinedCOFF>(S)) 421 replaceSymbol<DefinedCommon>(S, F, N, Size, Sym, C); 422 else if (auto *DC = dyn_cast<DefinedCommon>(S)) 423 if (Size > DC->getSize()) 424 replaceSymbol<DefinedCommon>(S, F, N, Size, Sym, C); 425 return S; 426 } 427 428 Symbol *SymbolTable::addImportData(StringRef N, ImportFile *F) { 429 Symbol *S; 430 bool WasInserted; 431 std::tie(S, WasInserted) = insert(N, nullptr); 432 S->IsUsedInRegularObj = true; 433 if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) { 434 replaceSymbol<DefinedImportData>(S, N, F); 435 return S; 436 } 437 438 reportDuplicate(S, F); 439 return nullptr; 440 } 441 442 Symbol *SymbolTable::addImportThunk(StringRef Name, DefinedImportData *ID, 443 uint16_t Machine) { 444 Symbol *S; 445 bool WasInserted; 446 std::tie(S, WasInserted) = insert(Name, nullptr); 447 S->IsUsedInRegularObj = true; 448 if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) { 449 replaceSymbol<DefinedImportThunk>(S, Name, ID, Machine); 450 return S; 451 } 452 453 reportDuplicate(S, ID->File); 454 return nullptr; 455 } 456 457 std::vector<Chunk *> SymbolTable::getChunks() { 458 std::vector<Chunk *> Res; 459 for (ObjFile *File : ObjFile::Instances) { 460 ArrayRef<Chunk *> V = File->getChunks(); 461 Res.insert(Res.end(), V.begin(), V.end()); 462 } 463 return Res; 464 } 465 466 Symbol *SymbolTable::find(StringRef Name) { 467 return SymMap.lookup(CachedHashStringRef(Name)); 468 } 469 470 Symbol *SymbolTable::findUnderscore(StringRef Name) { 471 if (Config->Machine == I386) 472 return find(("_" + Name).str()); 473 return find(Name); 474 } 475 476 // Return all symbols that start with Prefix, possibly ignoring the first 477 // character of Prefix or the first character symbol. 478 std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef Prefix) { 479 std::vector<Symbol *> Syms; 480 for (auto Pair : SymMap) { 481 StringRef Name = Pair.first.val(); 482 if (Name.startswith(Prefix) || Name.startswith(Prefix.drop_front()) || 483 Name.drop_front().startswith(Prefix) || 484 Name.drop_front().startswith(Prefix.drop_front())) { 485 Syms.push_back(Pair.second); 486 } 487 } 488 return Syms; 489 } 490 491 Symbol *SymbolTable::findMangle(StringRef Name) { 492 if (Symbol *Sym = find(Name)) 493 if (!isa<Undefined>(Sym)) 494 return Sym; 495 496 // Efficient fuzzy string lookup is impossible with a hash table, so iterate 497 // the symbol table once and collect all possibly matching symbols into this 498 // vector. Then compare each possibly matching symbol with each possible 499 // mangling. 500 std::vector<Symbol *> Syms = getSymsWithPrefix(Name); 501 auto FindByPrefix = [&Syms](const Twine &T) -> Symbol * { 502 std::string Prefix = T.str(); 503 for (auto *S : Syms) 504 if (S->getName().startswith(Prefix)) 505 return S; 506 return nullptr; 507 }; 508 509 // For non-x86, just look for C++ functions. 510 if (Config->Machine != I386) 511 return FindByPrefix("?" + Name + "@@Y"); 512 513 if (!Name.startswith("_")) 514 return nullptr; 515 // Search for x86 stdcall function. 516 if (Symbol *S = FindByPrefix(Name + "@")) 517 return S; 518 // Search for x86 fastcall function. 519 if (Symbol *S = FindByPrefix("@" + Name.substr(1) + "@")) 520 return S; 521 // Search for x86 vectorcall function. 522 if (Symbol *S = FindByPrefix(Name.substr(1) + "@@")) 523 return S; 524 // Search for x86 C++ non-member function. 525 return FindByPrefix("?" + Name.substr(1) + "@@Y"); 526 } 527 528 Symbol *SymbolTable::addUndefined(StringRef Name) { 529 return addUndefined(Name, nullptr, false); 530 } 531 532 std::vector<StringRef> SymbolTable::compileBitcodeFiles() { 533 LTO.reset(new BitcodeCompiler); 534 for (BitcodeFile *F : BitcodeFile::Instances) 535 LTO->add(*F); 536 return LTO->compile(); 537 } 538 539 void SymbolTable::addCombinedLTOObjects() { 540 if (BitcodeFile::Instances.empty()) 541 return; 542 543 ScopedTimer T(LTOTimer); 544 for (StringRef Object : compileBitcodeFiles()) { 545 auto *Obj = make<ObjFile>(MemoryBufferRef(Object, "lto.tmp")); 546 Obj->parse(); 547 ObjFile::Instances.push_back(Obj); 548 } 549 } 550 551 } // namespace coff 552 } // namespace lld 553