1 //===- SymbolTable.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "SymbolTable.h" 10 #include "Config.h" 11 #include "InputChunks.h" 12 #include "InputEvent.h" 13 #include "InputGlobal.h" 14 #include "WriterUtils.h" 15 #include "lld/Common/ErrorHandler.h" 16 #include "lld/Common/Memory.h" 17 #include "llvm/ADT/SetVector.h" 18 19 #define DEBUG_TYPE "lld" 20 21 using namespace llvm; 22 using namespace llvm::wasm; 23 using namespace llvm::object; 24 using namespace lld; 25 using namespace lld::wasm; 26 27 SymbolTable *lld::wasm::Symtab; 28 29 void SymbolTable::addFile(InputFile *File) { 30 log("Processing: " + toString(File)); 31 if (Config->Trace) 32 message(toString(File)); 33 File->parse(); 34 35 // LLVM bitcode file 36 if (auto *F = dyn_cast<BitcodeFile>(File)) 37 BitcodeFiles.push_back(F); 38 else if (auto *F = dyn_cast<ObjFile>(File)) 39 ObjectFiles.push_back(F); 40 } 41 42 // This function is where all the optimizations of link-time 43 // optimization happens. When LTO is in use, some input files are 44 // not in native object file format but in the LLVM bitcode format. 45 // This function compiles bitcode files into a few big native files 46 // using LLVM functions and replaces bitcode symbols with the results. 47 // Because all bitcode files that the program consists of are passed 48 // to the compiler at once, it can do whole-program optimization. 49 void SymbolTable::addCombinedLTOObject() { 50 if (BitcodeFiles.empty()) 51 return; 52 53 // Compile bitcode files and replace bitcode symbols. 54 LTO.reset(new BitcodeCompiler); 55 for (BitcodeFile *F : BitcodeFiles) 56 LTO->add(*F); 57 58 for (StringRef Filename : LTO->compile()) { 59 auto *Obj = make<ObjFile>(MemoryBufferRef(Filename, "lto.tmp")); 60 Obj->parse(); 61 ObjectFiles.push_back(Obj); 62 } 63 } 64 65 void SymbolTable::reportRemainingUndefines() { 66 for (Symbol *Sym : SymVector) { 67 if (!Sym->isUndefined() || Sym->isWeak()) 68 continue; 69 if (Config->AllowUndefinedSymbols.count(Sym->getName()) != 0) 70 continue; 71 if (!Sym->IsUsedInRegularObj) 72 continue; 73 error(toString(Sym->getFile()) + ": undefined symbol: " + toString(*Sym)); 74 } 75 } 76 77 Symbol *SymbolTable::find(StringRef Name) { 78 auto It = SymMap.find(CachedHashStringRef(Name)); 79 if (It == SymMap.end() || It->second == -1) 80 return nullptr; 81 return SymVector[It->second]; 82 } 83 84 void SymbolTable::replace(StringRef Name, Symbol* Sym) { 85 auto It = SymMap.find(CachedHashStringRef(Name)); 86 SymVector[It->second] = Sym; 87 } 88 89 std::pair<Symbol *, bool> SymbolTable::insertName(StringRef Name) { 90 bool Trace = false; 91 auto P = SymMap.insert({CachedHashStringRef(Name), (int)SymVector.size()}); 92 int &SymIndex = P.first->second; 93 bool IsNew = P.second; 94 if (SymIndex == -1) { 95 SymIndex = SymVector.size(); 96 Trace = true; 97 IsNew = true; 98 } 99 100 if (!IsNew) 101 return {SymVector[SymIndex], false}; 102 103 Symbol *Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); 104 Sym->IsUsedInRegularObj = false; 105 Sym->Traced = Trace; 106 SymVector.emplace_back(Sym); 107 return {Sym, true}; 108 } 109 110 std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name, 111 const InputFile *File) { 112 Symbol *S; 113 bool WasInserted; 114 std::tie(S, WasInserted) = insertName(Name); 115 116 if (!File || File->kind() == InputFile::ObjectKind) 117 S->IsUsedInRegularObj = true; 118 119 return {S, WasInserted}; 120 } 121 122 static void reportTypeError(const Symbol *Existing, const InputFile *File, 123 llvm::wasm::WasmSymbolType Type) { 124 error("symbol type mismatch: " + toString(*Existing) + "\n>>> defined as " + 125 toString(Existing->getWasmType()) + " in " + 126 toString(Existing->getFile()) + "\n>>> defined as " + toString(Type) + 127 " in " + toString(File)); 128 } 129 130 // Check the type of new symbol matches that of the symbol is replacing. 131 // Returns true if the function types match, false is there is a singature 132 // mismatch. 133 bool signatureMatches(FunctionSymbol *Existing, const WasmSignature *NewSig) { 134 if (!NewSig) 135 return true; 136 137 const WasmSignature *OldSig = Existing->Signature; 138 if (!OldSig) { 139 Existing->Signature = NewSig; 140 return true; 141 } 142 143 return *NewSig == *OldSig; 144 } 145 146 static void checkGlobalType(const Symbol *Existing, const InputFile *File, 147 const WasmGlobalType *NewType) { 148 if (!isa<GlobalSymbol>(Existing)) { 149 reportTypeError(Existing, File, WASM_SYMBOL_TYPE_GLOBAL); 150 return; 151 } 152 153 const WasmGlobalType *OldType = cast<GlobalSymbol>(Existing)->getGlobalType(); 154 if (*NewType != *OldType) { 155 error("Global type mismatch: " + Existing->getName() + "\n>>> defined as " + 156 toString(*OldType) + " in " + toString(Existing->getFile()) + 157 "\n>>> defined as " + toString(*NewType) + " in " + toString(File)); 158 } 159 } 160 161 static void checkEventType(const Symbol *Existing, const InputFile *File, 162 const WasmEventType *NewType, 163 const WasmSignature *NewSig) { 164 auto ExistingEvent = dyn_cast<EventSymbol>(Existing); 165 if (!isa<EventSymbol>(Existing)) { 166 reportTypeError(Existing, File, WASM_SYMBOL_TYPE_EVENT); 167 return; 168 } 169 170 const WasmEventType *OldType = cast<EventSymbol>(Existing)->getEventType(); 171 const WasmSignature *OldSig = ExistingEvent->Signature; 172 if (NewType->Attribute != OldType->Attribute) 173 error("Event type mismatch: " + Existing->getName() + "\n>>> defined as " + 174 toString(*OldType) + " in " + toString(Existing->getFile()) + 175 "\n>>> defined as " + toString(*NewType) + " in " + toString(File)); 176 if (*NewSig != *OldSig) 177 warn("Event signature mismatch: " + Existing->getName() + 178 "\n>>> defined as " + toString(*OldSig) + " in " + 179 toString(Existing->getFile()) + "\n>>> defined as " + 180 toString(*NewSig) + " in " + toString(File)); 181 } 182 183 static void checkDataType(const Symbol *Existing, const InputFile *File) { 184 if (!isa<DataSymbol>(Existing)) 185 reportTypeError(Existing, File, WASM_SYMBOL_TYPE_DATA); 186 } 187 188 DefinedFunction *SymbolTable::addSyntheticFunction(StringRef Name, 189 uint32_t Flags, 190 InputFunction *Function) { 191 LLVM_DEBUG(dbgs() << "addSyntheticFunction: " << Name << "\n"); 192 assert(!find(Name)); 193 SyntheticFunctions.emplace_back(Function); 194 return replaceSymbol<DefinedFunction>(insertName(Name).first, Name, 195 Flags, nullptr, Function); 196 } 197 198 DefinedData *SymbolTable::addSyntheticDataSymbol(StringRef Name, 199 uint32_t Flags) { 200 LLVM_DEBUG(dbgs() << "addSyntheticDataSymbol: " << Name << "\n"); 201 assert(!find(Name)); 202 return replaceSymbol<DefinedData>(insertName(Name).first, Name, Flags); 203 } 204 205 DefinedGlobal *SymbolTable::addSyntheticGlobal(StringRef Name, uint32_t Flags, 206 InputGlobal *Global) { 207 LLVM_DEBUG(dbgs() << "addSyntheticGlobal: " << Name << " -> " << Global 208 << "\n"); 209 assert(!find(Name)); 210 SyntheticGlobals.emplace_back(Global); 211 return replaceSymbol<DefinedGlobal>(insertName(Name).first, Name, Flags, 212 nullptr, Global); 213 } 214 215 static bool shouldReplace(const Symbol *Existing, InputFile *NewFile, 216 uint32_t NewFlags) { 217 // If existing symbol is undefined, replace it. 218 if (!Existing->isDefined()) { 219 LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: " 220 << Existing->getName() << "\n"); 221 return true; 222 } 223 224 // Now we have two defined symbols. If the new one is weak, we can ignore it. 225 if ((NewFlags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) { 226 LLVM_DEBUG(dbgs() << "existing symbol takes precedence\n"); 227 return false; 228 } 229 230 // If the existing symbol is weak, we should replace it. 231 if (Existing->isWeak()) { 232 LLVM_DEBUG(dbgs() << "replacing existing weak symbol\n"); 233 return true; 234 } 235 236 // Neither symbol is week. They conflict. 237 error("duplicate symbol: " + toString(*Existing) + "\n>>> defined in " + 238 toString(Existing->getFile()) + "\n>>> defined in " + 239 toString(NewFile)); 240 return true; 241 } 242 243 Symbol *SymbolTable::addDefinedFunction(StringRef Name, uint32_t Flags, 244 InputFile *File, 245 InputFunction *Function) { 246 LLVM_DEBUG(dbgs() << "addDefinedFunction: " << Name << " [" 247 << (Function ? toString(Function->Signature) : "none") 248 << "]\n"); 249 Symbol *S; 250 bool WasInserted; 251 std::tie(S, WasInserted) = insert(Name, File); 252 253 auto Replace = [&](Symbol* Sym) { 254 // If the new defined function doesn't have signture (i.e. bitcode 255 // functions) but the old symbol does, then preserve the old signature 256 const WasmSignature *OldSig = S->getSignature(); 257 auto* NewSym = replaceSymbol<DefinedFunction>(Sym, Name, Flags, File, Function); 258 if (!NewSym->Signature) 259 NewSym->Signature = OldSig; 260 }; 261 262 if (WasInserted || S->isLazy()) { 263 Replace(S); 264 return S; 265 } 266 267 auto ExistingFunction = dyn_cast<FunctionSymbol>(S); 268 if (!ExistingFunction) { 269 reportTypeError(S, File, WASM_SYMBOL_TYPE_FUNCTION); 270 return S; 271 } 272 273 if (Function && !signatureMatches(ExistingFunction, &Function->Signature)) { 274 Symbol* Variant; 275 if (getFunctionVariant(S, &Function->Signature, File, &Variant)) 276 // New variant, always replace 277 Replace(Variant); 278 else if (shouldReplace(S, File, Flags)) 279 // Variant already exists, replace it after checking shouldReplace 280 Replace(Variant); 281 282 // This variant we found take the place in the symbol table as the primary 283 // variant. 284 replace(Name, Variant); 285 return Variant; 286 } 287 288 // Existing function with matching signature. 289 if (shouldReplace(S, File, Flags)) 290 Replace(S); 291 292 return S; 293 } 294 295 Symbol *SymbolTable::addDefinedData(StringRef Name, uint32_t Flags, 296 InputFile *File, InputSegment *Segment, 297 uint32_t Address, uint32_t Size) { 298 LLVM_DEBUG(dbgs() << "addDefinedData:" << Name << " addr:" << Address 299 << "\n"); 300 Symbol *S; 301 bool WasInserted; 302 std::tie(S, WasInserted) = insert(Name, File); 303 304 auto Replace = [&]() { 305 replaceSymbol<DefinedData>(S, Name, Flags, File, Segment, Address, Size); 306 }; 307 308 if (WasInserted || S->isLazy()) { 309 Replace(); 310 return S; 311 } 312 313 checkDataType(S, File); 314 315 if (shouldReplace(S, File, Flags)) 316 Replace(); 317 return S; 318 } 319 320 Symbol *SymbolTable::addDefinedGlobal(StringRef Name, uint32_t Flags, 321 InputFile *File, InputGlobal *Global) { 322 LLVM_DEBUG(dbgs() << "addDefinedGlobal:" << Name << "\n"); 323 324 Symbol *S; 325 bool WasInserted; 326 std::tie(S, WasInserted) = insert(Name, File); 327 328 auto Replace = [&]() { 329 replaceSymbol<DefinedGlobal>(S, Name, Flags, File, Global); 330 }; 331 332 if (WasInserted || S->isLazy()) { 333 Replace(); 334 return S; 335 } 336 337 checkGlobalType(S, File, &Global->getType()); 338 339 if (shouldReplace(S, File, Flags)) 340 Replace(); 341 return S; 342 } 343 344 Symbol *SymbolTable::addDefinedEvent(StringRef Name, uint32_t Flags, 345 InputFile *File, InputEvent *Event) { 346 LLVM_DEBUG(dbgs() << "addDefinedEvent:" << Name << "\n"); 347 348 Symbol *S; 349 bool WasInserted; 350 std::tie(S, WasInserted) = insert(Name, File); 351 352 auto Replace = [&]() { 353 replaceSymbol<DefinedEvent>(S, Name, Flags, File, Event); 354 }; 355 356 if (WasInserted || S->isLazy()) { 357 Replace(); 358 return S; 359 } 360 361 checkEventType(S, File, &Event->getType(), &Event->Signature); 362 363 if (shouldReplace(S, File, Flags)) 364 Replace(); 365 return S; 366 } 367 368 Symbol *SymbolTable::addUndefinedFunction(StringRef Name, StringRef ImportName, 369 StringRef ImportModule, 370 uint32_t Flags, InputFile *File, 371 const WasmSignature *Sig) { 372 LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << Name << 373 " [" << (Sig ? toString(*Sig) : "none") << "]\n"); 374 375 Symbol *S; 376 bool WasInserted; 377 std::tie(S, WasInserted) = insert(Name, File); 378 379 auto Replace = [&]() { 380 replaceSymbol<UndefinedFunction>(S, Name, ImportName, ImportModule, Flags, 381 File, Sig); 382 }; 383 384 if (WasInserted) 385 Replace(); 386 else if (auto *Lazy = dyn_cast<LazySymbol>(S)) 387 Lazy->fetch(); 388 else { 389 auto ExistingFunction = dyn_cast<FunctionSymbol>(S); 390 if (!ExistingFunction) { 391 reportTypeError(S, File, WASM_SYMBOL_TYPE_FUNCTION); 392 return S; 393 } 394 if (!signatureMatches(ExistingFunction, Sig)) 395 if (getFunctionVariant(S, Sig, File, &S)) 396 Replace(); 397 } 398 399 return S; 400 } 401 402 Symbol *SymbolTable::addUndefinedData(StringRef Name, uint32_t Flags, 403 InputFile *File) { 404 LLVM_DEBUG(dbgs() << "addUndefinedData: " << Name << "\n"); 405 406 Symbol *S; 407 bool WasInserted; 408 std::tie(S, WasInserted) = insert(Name, File); 409 410 if (WasInserted) 411 replaceSymbol<UndefinedData>(S, Name, Flags, File); 412 else if (auto *Lazy = dyn_cast<LazySymbol>(S)) 413 Lazy->fetch(); 414 else if (S->isDefined()) 415 checkDataType(S, File); 416 return S; 417 } 418 419 Symbol *SymbolTable::addUndefinedGlobal(StringRef Name, StringRef ImportName, 420 StringRef ImportModule, uint32_t Flags, 421 InputFile *File, 422 const WasmGlobalType *Type) { 423 LLVM_DEBUG(dbgs() << "addUndefinedGlobal: " << Name << "\n"); 424 425 Symbol *S; 426 bool WasInserted; 427 std::tie(S, WasInserted) = insert(Name, File); 428 429 if (WasInserted) 430 replaceSymbol<UndefinedGlobal>(S, Name, ImportName, ImportModule, Flags, 431 File, Type); 432 else if (auto *Lazy = dyn_cast<LazySymbol>(S)) 433 Lazy->fetch(); 434 else if (S->isDefined()) 435 checkGlobalType(S, File, Type); 436 return S; 437 } 438 439 void SymbolTable::addLazy(ArchiveFile *File, const Archive::Symbol *Sym) { 440 LLVM_DEBUG(dbgs() << "addLazy: " << Sym->getName() << "\n"); 441 StringRef Name = Sym->getName(); 442 443 Symbol *S; 444 bool WasInserted; 445 std::tie(S, WasInserted) = insertName(Name); 446 447 if (WasInserted) { 448 replaceSymbol<LazySymbol>(S, Name, 0, File, *Sym); 449 return; 450 } 451 452 if (!S->isUndefined()) 453 return; 454 455 // The existing symbol is undefined, load a new one from the archive, 456 // unless the the existing symbol is weak in which case replace the undefined 457 // symbols with a LazySymbol. 458 if (S->isWeak()) { 459 const WasmSignature *OldSig = nullptr; 460 // In the case of an UndefinedFunction we need to preserve the expected 461 // signature. 462 if (auto *F = dyn_cast<UndefinedFunction>(S)) 463 OldSig = F->Signature; 464 LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n"); 465 auto NewSym = replaceSymbol<LazySymbol>(S, Name, WASM_SYMBOL_BINDING_WEAK, 466 File, *Sym); 467 NewSym->Signature = OldSig; 468 return; 469 } 470 471 LLVM_DEBUG(dbgs() << "replacing existing undefined\n"); 472 File->addMember(Sym); 473 } 474 475 bool SymbolTable::addComdat(StringRef Name) { 476 return Comdats.insert(CachedHashStringRef(Name)).second; 477 } 478 479 // The new signature doesn't match. Create a variant to the symbol with the 480 // signature encoded in the name and return that instead. These symbols are 481 // then unified later in handleSymbolVariants. 482 bool SymbolTable::getFunctionVariant(Symbol* Sym, const WasmSignature *Sig, 483 const InputFile *File, Symbol **Out) { 484 LLVM_DEBUG(dbgs() << "getFunctionVariant: " << Sym->getName() << " -> " 485 << " " << toString(*Sig) << "\n"); 486 Symbol *Variant = nullptr; 487 488 // Linear search through symbol variants. Should never be more than two 489 // or three entries here. 490 auto &Variants = SymVariants[CachedHashStringRef(Sym->getName())]; 491 if (Variants.size() == 0) 492 Variants.push_back(Sym); 493 494 for (Symbol* V : Variants) { 495 if (*V->getSignature() == *Sig) { 496 Variant = V; 497 break; 498 } 499 } 500 501 bool WasAdded = !Variant; 502 if (WasAdded) { 503 // Create a new variant; 504 LLVM_DEBUG(dbgs() << "added new variant\n"); 505 Variant = reinterpret_cast<Symbol *>(make<SymbolUnion>()); 506 Variants.push_back(Variant); 507 } else { 508 LLVM_DEBUG(dbgs() << "variant already exists: " << toString(*Variant) << "\n"); 509 assert(*Variant->getSignature() == *Sig); 510 } 511 512 *Out = Variant; 513 return WasAdded; 514 } 515 516 // Set a flag for --trace-symbol so that we can print out a log message 517 // if a new symbol with the same name is inserted into the symbol table. 518 void SymbolTable::trace(StringRef Name) { 519 SymMap.insert({CachedHashStringRef(Name), -1}); 520 } 521 522 static const uint8_t UnreachableFn[] = { 523 0x03 /* ULEB length */, 0x00 /* ULEB num locals */, 524 0x00 /* opcode unreachable */, 0x0b /* opcode end */ 525 }; 526 527 // Replace the given symbol body with an unreachable function. 528 // This is used by handleWeakUndefines in order to generate a callable 529 // equivalent of an undefined function and also handleSymbolVariants for 530 // undefined functions that don't match the signature of the definition. 531 InputFunction *SymbolTable::replaceWithUnreachable(Symbol *Sym, 532 const WasmSignature &Sig, 533 StringRef DebugName) { 534 auto *Func = make<SyntheticFunction>(Sig, Sym->getName(), DebugName); 535 Func->setBody(UnreachableFn); 536 SyntheticFunctions.emplace_back(Func); 537 replaceSymbol<DefinedFunction>(Sym, Sym->getName(), Sym->getFlags(), nullptr, 538 Func); 539 return Func; 540 } 541 542 // For weak undefined functions, there may be "call" instructions that reference 543 // the symbol. In this case, we need to synthesise a dummy/stub function that 544 // will abort at runtime, so that relocations can still provided an operand to 545 // the call instruction that passes Wasm validation. 546 void SymbolTable::handleWeakUndefines() { 547 for (Symbol *Sym : getSymbols()) { 548 if (!Sym->isUndefWeak()) 549 continue; 550 551 const WasmSignature *Sig = Sym->getSignature(); 552 if (!Sig) { 553 // It is possible for undefined functions not to have a signature (eg. if 554 // added via "--undefined"), but weak undefined ones do have a signature. 555 // Lazy symbols may not be functions and therefore Sig can still be null 556 // in some circumstantce. 557 assert(!isa<FunctionSymbol>(Sym)); 558 continue; 559 } 560 561 // Add a synthetic dummy for weak undefined functions. These dummies will 562 // be GC'd if not used as the target of any "call" instructions. 563 StringRef DebugName = Saver.save("undefined:" + toString(*Sym)); 564 InputFunction* Func = replaceWithUnreachable(Sym, *Sig, DebugName); 565 // Ensure it compares equal to the null pointer, and so that table relocs 566 // don't pull in the stub body (only call-operand relocs should do that). 567 Func->setTableIndex(0); 568 // Hide our dummy to prevent export. 569 Sym->setHidden(true); 570 } 571 } 572 573 static void reportFunctionSignatureMismatch(StringRef SymName, 574 FunctionSymbol *A, 575 FunctionSymbol *B, bool Error) { 576 std::string msg = ("function signature mismatch: " + SymName + 577 "\n>>> defined as " + toString(*A->Signature) + " in " + 578 toString(A->getFile()) + "\n>>> defined as " + 579 toString(*B->Signature) + " in " + toString(B->getFile())) 580 .str(); 581 if (Error) 582 error(msg); 583 else 584 warn(msg); 585 } 586 587 // Remove any variant symbols that were created due to function signature 588 // mismatches. 589 void SymbolTable::handleSymbolVariants() { 590 for (auto Pair : SymVariants) { 591 // Push the initial symbol onto the list of variants. 592 StringRef SymName = Pair.first.val(); 593 std::vector<Symbol *> &Variants = Pair.second; 594 595 #ifndef NDEBUG 596 LLVM_DEBUG(dbgs() << "symbol with (" << Variants.size() 597 << ") variants: " << SymName << "\n"); 598 for (auto *S: Variants) { 599 auto *F = cast<FunctionSymbol>(S); 600 LLVM_DEBUG(dbgs() << " variant: " + F->getName() << " " 601 << toString(*F->Signature) << "\n"); 602 } 603 #endif 604 605 // Find the one definition. 606 DefinedFunction *Defined = nullptr; 607 for (auto *Symbol : Variants) { 608 if (auto F = dyn_cast<DefinedFunction>(Symbol)) { 609 Defined = F; 610 break; 611 } 612 } 613 614 // If there are no definitions, and the undefined symbols disagree on 615 // the signature, there is not we can do since we don't know which one 616 // to use as the signature on the import. 617 if (!Defined) { 618 reportFunctionSignatureMismatch(SymName, 619 cast<FunctionSymbol>(Variants[0]), 620 cast<FunctionSymbol>(Variants[1]), true); 621 return; 622 } 623 624 for (auto *Symbol : Variants) { 625 if (Symbol != Defined) { 626 auto *F = cast<FunctionSymbol>(Symbol); 627 reportFunctionSignatureMismatch(SymName, F, Defined, false); 628 StringRef DebugName = Saver.save("unreachable:" + toString(*F)); 629 replaceWithUnreachable(F, *F->Signature, DebugName); 630 } 631 } 632 } 633 } 634