1 //===- SymbolTable.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "SymbolTable.h" 10 #include "Config.h" 11 #include "InputChunks.h" 12 #include "InputEvent.h" 13 #include "InputGlobal.h" 14 #include "WriterUtils.h" 15 #include "lld/Common/ErrorHandler.h" 16 #include "lld/Common/Memory.h" 17 #include "llvm/ADT/SetVector.h" 18 19 #define DEBUG_TYPE "lld" 20 21 using namespace llvm; 22 using namespace llvm::wasm; 23 using namespace llvm::object; 24 using namespace lld; 25 using namespace lld::wasm; 26 27 SymbolTable *lld::wasm::Symtab; 28 29 void SymbolTable::addFile(InputFile *File) { 30 log("Processing: " + toString(File)); 31 if (Config->Trace) 32 message(toString(File)); 33 File->parse(); 34 35 // LLVM bitcode file 36 if (auto *F = dyn_cast<BitcodeFile>(File)) 37 BitcodeFiles.push_back(F); 38 else if (auto *F = dyn_cast<ObjFile>(File)) 39 ObjectFiles.push_back(F); 40 else if (auto *F = dyn_cast<SharedFile>(File)) 41 SharedFiles.push_back(F); 42 } 43 44 // This function is where all the optimizations of link-time 45 // optimization happens. When LTO is in use, some input files are 46 // not in native object file format but in the LLVM bitcode format. 47 // This function compiles bitcode files into a few big native files 48 // using LLVM functions and replaces bitcode symbols with the results. 49 // Because all bitcode files that the program consists of are passed 50 // to the compiler at once, it can do whole-program optimization. 51 void SymbolTable::addCombinedLTOObject() { 52 if (BitcodeFiles.empty()) 53 return; 54 55 // Compile bitcode files and replace bitcode symbols. 56 LTO.reset(new BitcodeCompiler); 57 for (BitcodeFile *F : BitcodeFiles) 58 LTO->add(*F); 59 60 for (StringRef Filename : LTO->compile()) { 61 auto *Obj = make<ObjFile>(MemoryBufferRef(Filename, "lto.tmp"), ""); 62 Obj->parse(); 63 ObjectFiles.push_back(Obj); 64 } 65 } 66 67 void SymbolTable::reportRemainingUndefines() { 68 for (Symbol *Sym : SymVector) { 69 if (!Sym->isUndefined() || Sym->isWeak()) 70 continue; 71 if (Config->AllowUndefinedSymbols.count(Sym->getName()) != 0) 72 continue; 73 if (!Sym->IsUsedInRegularObj) 74 continue; 75 error(toString(Sym->getFile()) + ": undefined symbol: " + toString(*Sym)); 76 } 77 } 78 79 Symbol *SymbolTable::find(StringRef Name) { 80 auto It = SymMap.find(CachedHashStringRef(Name)); 81 if (It == SymMap.end() || It->second == -1) 82 return nullptr; 83 return SymVector[It->second]; 84 } 85 86 void SymbolTable::replace(StringRef Name, Symbol* Sym) { 87 auto It = SymMap.find(CachedHashStringRef(Name)); 88 SymVector[It->second] = Sym; 89 } 90 91 std::pair<Symbol *, bool> SymbolTable::insertName(StringRef Name) { 92 bool Trace = false; 93 auto P = SymMap.insert({CachedHashStringRef(Name), (int)SymVector.size()}); 94 int &SymIndex = P.first->second; 95 bool IsNew = P.second; 96 if (SymIndex == -1) { 97 SymIndex = SymVector.size(); 98 Trace = true; 99 IsNew = true; 100 } 101 102 if (!IsNew) 103 return {SymVector[SymIndex], false}; 104 105 Symbol *Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); 106 Sym->IsUsedInRegularObj = false; 107 Sym->Traced = Trace; 108 SymVector.emplace_back(Sym); 109 return {Sym, true}; 110 } 111 112 std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name, 113 const InputFile *File) { 114 Symbol *S; 115 bool WasInserted; 116 std::tie(S, WasInserted) = insertName(Name); 117 118 if (!File || File->kind() == InputFile::ObjectKind) 119 S->IsUsedInRegularObj = true; 120 121 return {S, WasInserted}; 122 } 123 124 static void reportTypeError(const Symbol *Existing, const InputFile *File, 125 llvm::wasm::WasmSymbolType Type) { 126 error("symbol type mismatch: " + toString(*Existing) + "\n>>> defined as " + 127 toString(Existing->getWasmType()) + " in " + 128 toString(Existing->getFile()) + "\n>>> defined as " + toString(Type) + 129 " in " + toString(File)); 130 } 131 132 // Check the type of new symbol matches that of the symbol is replacing. 133 // Returns true if the function types match, false is there is a singature 134 // mismatch. 135 static bool signatureMatches(FunctionSymbol *Existing, 136 const WasmSignature *NewSig) { 137 if (!NewSig) 138 return true; 139 140 const WasmSignature *OldSig = Existing->Signature; 141 if (!OldSig) { 142 Existing->Signature = NewSig; 143 return true; 144 } 145 146 return *NewSig == *OldSig; 147 } 148 149 static void checkGlobalType(const Symbol *Existing, const InputFile *File, 150 const WasmGlobalType *NewType) { 151 if (!isa<GlobalSymbol>(Existing)) { 152 reportTypeError(Existing, File, WASM_SYMBOL_TYPE_GLOBAL); 153 return; 154 } 155 156 const WasmGlobalType *OldType = cast<GlobalSymbol>(Existing)->getGlobalType(); 157 if (*NewType != *OldType) { 158 error("Global type mismatch: " + Existing->getName() + "\n>>> defined as " + 159 toString(*OldType) + " in " + toString(Existing->getFile()) + 160 "\n>>> defined as " + toString(*NewType) + " in " + toString(File)); 161 } 162 } 163 164 static void checkEventType(const Symbol *Existing, const InputFile *File, 165 const WasmEventType *NewType, 166 const WasmSignature *NewSig) { 167 auto ExistingEvent = dyn_cast<EventSymbol>(Existing); 168 if (!isa<EventSymbol>(Existing)) { 169 reportTypeError(Existing, File, WASM_SYMBOL_TYPE_EVENT); 170 return; 171 } 172 173 const WasmEventType *OldType = cast<EventSymbol>(Existing)->getEventType(); 174 const WasmSignature *OldSig = ExistingEvent->Signature; 175 if (NewType->Attribute != OldType->Attribute) 176 error("Event type mismatch: " + Existing->getName() + "\n>>> defined as " + 177 toString(*OldType) + " in " + toString(Existing->getFile()) + 178 "\n>>> defined as " + toString(*NewType) + " in " + toString(File)); 179 if (*NewSig != *OldSig) 180 warn("Event signature mismatch: " + Existing->getName() + 181 "\n>>> defined as " + toString(*OldSig) + " in " + 182 toString(Existing->getFile()) + "\n>>> defined as " + 183 toString(*NewSig) + " in " + toString(File)); 184 } 185 186 static void checkDataType(const Symbol *Existing, const InputFile *File) { 187 if (!isa<DataSymbol>(Existing)) 188 reportTypeError(Existing, File, WASM_SYMBOL_TYPE_DATA); 189 } 190 191 DefinedFunction *SymbolTable::addSyntheticFunction(StringRef Name, 192 uint32_t Flags, 193 InputFunction *Function) { 194 LLVM_DEBUG(dbgs() << "addSyntheticFunction: " << Name << "\n"); 195 assert(!find(Name)); 196 SyntheticFunctions.emplace_back(Function); 197 return replaceSymbol<DefinedFunction>(insertName(Name).first, Name, 198 Flags, nullptr, Function); 199 } 200 201 DefinedData *SymbolTable::addSyntheticDataSymbol(StringRef Name, 202 uint32_t Flags) { 203 LLVM_DEBUG(dbgs() << "addSyntheticDataSymbol: " << Name << "\n"); 204 assert(!find(Name)); 205 return replaceSymbol<DefinedData>(insertName(Name).first, Name, Flags); 206 } 207 208 DefinedGlobal *SymbolTable::addSyntheticGlobal(StringRef Name, uint32_t Flags, 209 InputGlobal *Global) { 210 LLVM_DEBUG(dbgs() << "addSyntheticGlobal: " << Name << " -> " << Global 211 << "\n"); 212 assert(!find(Name)); 213 SyntheticGlobals.emplace_back(Global); 214 return replaceSymbol<DefinedGlobal>(insertName(Name).first, Name, Flags, 215 nullptr, Global); 216 } 217 218 static bool shouldReplace(const Symbol *Existing, InputFile *NewFile, 219 uint32_t NewFlags) { 220 // If existing symbol is undefined, replace it. 221 if (!Existing->isDefined()) { 222 LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: " 223 << Existing->getName() << "\n"); 224 return true; 225 } 226 227 // Now we have two defined symbols. If the new one is weak, we can ignore it. 228 if ((NewFlags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) { 229 LLVM_DEBUG(dbgs() << "existing symbol takes precedence\n"); 230 return false; 231 } 232 233 // If the existing symbol is weak, we should replace it. 234 if (Existing->isWeak()) { 235 LLVM_DEBUG(dbgs() << "replacing existing weak symbol\n"); 236 return true; 237 } 238 239 // Neither symbol is week. They conflict. 240 error("duplicate symbol: " + toString(*Existing) + "\n>>> defined in " + 241 toString(Existing->getFile()) + "\n>>> defined in " + 242 toString(NewFile)); 243 return true; 244 } 245 246 Symbol *SymbolTable::addDefinedFunction(StringRef Name, uint32_t Flags, 247 InputFile *File, 248 InputFunction *Function) { 249 LLVM_DEBUG(dbgs() << "addDefinedFunction: " << Name << " [" 250 << (Function ? toString(Function->Signature) : "none") 251 << "]\n"); 252 Symbol *S; 253 bool WasInserted; 254 std::tie(S, WasInserted) = insert(Name, File); 255 256 auto Replace = [&](Symbol* Sym) { 257 // If the new defined function doesn't have signture (i.e. bitcode 258 // functions) but the old symbol does, then preserve the old signature 259 const WasmSignature *OldSig = S->getSignature(); 260 auto* NewSym = replaceSymbol<DefinedFunction>(Sym, Name, Flags, File, Function); 261 if (!NewSym->Signature) 262 NewSym->Signature = OldSig; 263 }; 264 265 if (WasInserted || S->isLazy()) { 266 Replace(S); 267 return S; 268 } 269 270 auto ExistingFunction = dyn_cast<FunctionSymbol>(S); 271 if (!ExistingFunction) { 272 reportTypeError(S, File, WASM_SYMBOL_TYPE_FUNCTION); 273 return S; 274 } 275 276 if (Function && !signatureMatches(ExistingFunction, &Function->Signature)) { 277 Symbol* Variant; 278 if (getFunctionVariant(S, &Function->Signature, File, &Variant)) 279 // New variant, always replace 280 Replace(Variant); 281 else if (shouldReplace(S, File, Flags)) 282 // Variant already exists, replace it after checking shouldReplace 283 Replace(Variant); 284 285 // This variant we found take the place in the symbol table as the primary 286 // variant. 287 replace(Name, Variant); 288 return Variant; 289 } 290 291 // Existing function with matching signature. 292 if (shouldReplace(S, File, Flags)) 293 Replace(S); 294 295 return S; 296 } 297 298 Symbol *SymbolTable::addDefinedData(StringRef Name, uint32_t Flags, 299 InputFile *File, InputSegment *Segment, 300 uint32_t Address, uint32_t Size) { 301 LLVM_DEBUG(dbgs() << "addDefinedData:" << Name << " addr:" << Address 302 << "\n"); 303 Symbol *S; 304 bool WasInserted; 305 std::tie(S, WasInserted) = insert(Name, File); 306 307 auto Replace = [&]() { 308 replaceSymbol<DefinedData>(S, Name, Flags, File, Segment, Address, Size); 309 }; 310 311 if (WasInserted || S->isLazy()) { 312 Replace(); 313 return S; 314 } 315 316 checkDataType(S, File); 317 318 if (shouldReplace(S, File, Flags)) 319 Replace(); 320 return S; 321 } 322 323 Symbol *SymbolTable::addDefinedGlobal(StringRef Name, uint32_t Flags, 324 InputFile *File, InputGlobal *Global) { 325 LLVM_DEBUG(dbgs() << "addDefinedGlobal:" << Name << "\n"); 326 327 Symbol *S; 328 bool WasInserted; 329 std::tie(S, WasInserted) = insert(Name, File); 330 331 auto Replace = [&]() { 332 replaceSymbol<DefinedGlobal>(S, Name, Flags, File, Global); 333 }; 334 335 if (WasInserted || S->isLazy()) { 336 Replace(); 337 return S; 338 } 339 340 checkGlobalType(S, File, &Global->getType()); 341 342 if (shouldReplace(S, File, Flags)) 343 Replace(); 344 return S; 345 } 346 347 Symbol *SymbolTable::addDefinedEvent(StringRef Name, uint32_t Flags, 348 InputFile *File, InputEvent *Event) { 349 LLVM_DEBUG(dbgs() << "addDefinedEvent:" << Name << "\n"); 350 351 Symbol *S; 352 bool WasInserted; 353 std::tie(S, WasInserted) = insert(Name, File); 354 355 auto Replace = [&]() { 356 replaceSymbol<DefinedEvent>(S, Name, Flags, File, Event); 357 }; 358 359 if (WasInserted || S->isLazy()) { 360 Replace(); 361 return S; 362 } 363 364 checkEventType(S, File, &Event->getType(), &Event->Signature); 365 366 if (shouldReplace(S, File, Flags)) 367 Replace(); 368 return S; 369 } 370 371 Symbol *SymbolTable::addUndefinedFunction(StringRef Name, StringRef ImportName, 372 StringRef ImportModule, 373 uint32_t Flags, InputFile *File, 374 const WasmSignature *Sig) { 375 LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << Name << 376 " [" << (Sig ? toString(*Sig) : "none") << "]\n"); 377 378 Symbol *S; 379 bool WasInserted; 380 std::tie(S, WasInserted) = insert(Name, File); 381 382 auto Replace = [&]() { 383 replaceSymbol<UndefinedFunction>(S, Name, ImportName, ImportModule, Flags, 384 File, Sig); 385 }; 386 387 if (WasInserted) 388 Replace(); 389 else if (auto *Lazy = dyn_cast<LazySymbol>(S)) 390 Lazy->fetch(); 391 else { 392 auto ExistingFunction = dyn_cast<FunctionSymbol>(S); 393 if (!ExistingFunction) { 394 reportTypeError(S, File, WASM_SYMBOL_TYPE_FUNCTION); 395 return S; 396 } 397 if (!signatureMatches(ExistingFunction, Sig)) 398 if (getFunctionVariant(S, Sig, File, &S)) 399 Replace(); 400 } 401 402 return S; 403 } 404 405 Symbol *SymbolTable::addUndefinedData(StringRef Name, uint32_t Flags, 406 InputFile *File) { 407 LLVM_DEBUG(dbgs() << "addUndefinedData: " << Name << "\n"); 408 409 Symbol *S; 410 bool WasInserted; 411 std::tie(S, WasInserted) = insert(Name, File); 412 413 if (WasInserted) 414 replaceSymbol<UndefinedData>(S, Name, Flags, File); 415 else if (auto *Lazy = dyn_cast<LazySymbol>(S)) 416 Lazy->fetch(); 417 else if (S->isDefined()) 418 checkDataType(S, File); 419 return S; 420 } 421 422 Symbol *SymbolTable::addUndefinedGlobal(StringRef Name, StringRef ImportName, 423 StringRef ImportModule, uint32_t Flags, 424 InputFile *File, 425 const WasmGlobalType *Type) { 426 LLVM_DEBUG(dbgs() << "addUndefinedGlobal: " << Name << "\n"); 427 428 Symbol *S; 429 bool WasInserted; 430 std::tie(S, WasInserted) = insert(Name, File); 431 432 if (WasInserted) 433 replaceSymbol<UndefinedGlobal>(S, Name, ImportName, ImportModule, Flags, 434 File, Type); 435 else if (auto *Lazy = dyn_cast<LazySymbol>(S)) 436 Lazy->fetch(); 437 else if (S->isDefined()) 438 checkGlobalType(S, File, Type); 439 return S; 440 } 441 442 void SymbolTable::addLazy(ArchiveFile *File, const Archive::Symbol *Sym) { 443 LLVM_DEBUG(dbgs() << "addLazy: " << Sym->getName() << "\n"); 444 StringRef Name = Sym->getName(); 445 446 Symbol *S; 447 bool WasInserted; 448 std::tie(S, WasInserted) = insertName(Name); 449 450 if (WasInserted) { 451 replaceSymbol<LazySymbol>(S, Name, 0, File, *Sym); 452 return; 453 } 454 455 if (!S->isUndefined()) 456 return; 457 458 // The existing symbol is undefined, load a new one from the archive, 459 // unless the the existing symbol is weak in which case replace the undefined 460 // symbols with a LazySymbol. 461 if (S->isWeak()) { 462 const WasmSignature *OldSig = nullptr; 463 // In the case of an UndefinedFunction we need to preserve the expected 464 // signature. 465 if (auto *F = dyn_cast<UndefinedFunction>(S)) 466 OldSig = F->Signature; 467 LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n"); 468 auto NewSym = replaceSymbol<LazySymbol>(S, Name, WASM_SYMBOL_BINDING_WEAK, 469 File, *Sym); 470 NewSym->Signature = OldSig; 471 return; 472 } 473 474 LLVM_DEBUG(dbgs() << "replacing existing undefined\n"); 475 File->addMember(Sym); 476 } 477 478 bool SymbolTable::addComdat(StringRef Name) { 479 return Comdats.insert(CachedHashStringRef(Name)).second; 480 } 481 482 // The new signature doesn't match. Create a variant to the symbol with the 483 // signature encoded in the name and return that instead. These symbols are 484 // then unified later in handleSymbolVariants. 485 bool SymbolTable::getFunctionVariant(Symbol* Sym, const WasmSignature *Sig, 486 const InputFile *File, Symbol **Out) { 487 LLVM_DEBUG(dbgs() << "getFunctionVariant: " << Sym->getName() << " -> " 488 << " " << toString(*Sig) << "\n"); 489 Symbol *Variant = nullptr; 490 491 // Linear search through symbol variants. Should never be more than two 492 // or three entries here. 493 auto &Variants = SymVariants[CachedHashStringRef(Sym->getName())]; 494 if (Variants.size() == 0) 495 Variants.push_back(Sym); 496 497 for (Symbol* V : Variants) { 498 if (*V->getSignature() == *Sig) { 499 Variant = V; 500 break; 501 } 502 } 503 504 bool WasAdded = !Variant; 505 if (WasAdded) { 506 // Create a new variant; 507 LLVM_DEBUG(dbgs() << "added new variant\n"); 508 Variant = reinterpret_cast<Symbol *>(make<SymbolUnion>()); 509 Variants.push_back(Variant); 510 } else { 511 LLVM_DEBUG(dbgs() << "variant already exists: " << toString(*Variant) << "\n"); 512 assert(*Variant->getSignature() == *Sig); 513 } 514 515 *Out = Variant; 516 return WasAdded; 517 } 518 519 // Set a flag for --trace-symbol so that we can print out a log message 520 // if a new symbol with the same name is inserted into the symbol table. 521 void SymbolTable::trace(StringRef Name) { 522 SymMap.insert({CachedHashStringRef(Name), -1}); 523 } 524 525 static const uint8_t UnreachableFn[] = { 526 0x03 /* ULEB length */, 0x00 /* ULEB num locals */, 527 0x00 /* opcode unreachable */, 0x0b /* opcode end */ 528 }; 529 530 // Replace the given symbol body with an unreachable function. 531 // This is used by handleWeakUndefines in order to generate a callable 532 // equivalent of an undefined function and also handleSymbolVariants for 533 // undefined functions that don't match the signature of the definition. 534 InputFunction *SymbolTable::replaceWithUnreachable(Symbol *Sym, 535 const WasmSignature &Sig, 536 StringRef DebugName) { 537 auto *Func = make<SyntheticFunction>(Sig, Sym->getName(), DebugName); 538 Func->setBody(UnreachableFn); 539 SyntheticFunctions.emplace_back(Func); 540 replaceSymbol<DefinedFunction>(Sym, Sym->getName(), Sym->getFlags(), nullptr, 541 Func); 542 return Func; 543 } 544 545 // For weak undefined functions, there may be "call" instructions that reference 546 // the symbol. In this case, we need to synthesise a dummy/stub function that 547 // will abort at runtime, so that relocations can still provided an operand to 548 // the call instruction that passes Wasm validation. 549 void SymbolTable::handleWeakUndefines() { 550 for (Symbol *Sym : getSymbols()) { 551 if (!Sym->isUndefWeak()) 552 continue; 553 554 const WasmSignature *Sig = Sym->getSignature(); 555 if (!Sig) { 556 // It is possible for undefined functions not to have a signature (eg. if 557 // added via "--undefined"), but weak undefined ones do have a signature. 558 // Lazy symbols may not be functions and therefore Sig can still be null 559 // in some circumstantce. 560 assert(!isa<FunctionSymbol>(Sym)); 561 continue; 562 } 563 564 // Add a synthetic dummy for weak undefined functions. These dummies will 565 // be GC'd if not used as the target of any "call" instructions. 566 StringRef DebugName = Saver.save("undefined:" + toString(*Sym)); 567 InputFunction* Func = replaceWithUnreachable(Sym, *Sig, DebugName); 568 // Ensure it compares equal to the null pointer, and so that table relocs 569 // don't pull in the stub body (only call-operand relocs should do that). 570 Func->setTableIndex(0); 571 // Hide our dummy to prevent export. 572 Sym->setHidden(true); 573 } 574 } 575 576 static void reportFunctionSignatureMismatch(StringRef SymName, 577 FunctionSymbol *A, 578 FunctionSymbol *B, bool Error) { 579 std::string msg = ("function signature mismatch: " + SymName + 580 "\n>>> defined as " + toString(*A->Signature) + " in " + 581 toString(A->getFile()) + "\n>>> defined as " + 582 toString(*B->Signature) + " in " + toString(B->getFile())) 583 .str(); 584 if (Error) 585 error(msg); 586 else 587 warn(msg); 588 } 589 590 // Remove any variant symbols that were created due to function signature 591 // mismatches. 592 void SymbolTable::handleSymbolVariants() { 593 for (auto Pair : SymVariants) { 594 // Push the initial symbol onto the list of variants. 595 StringRef SymName = Pair.first.val(); 596 std::vector<Symbol *> &Variants = Pair.second; 597 598 #ifndef NDEBUG 599 LLVM_DEBUG(dbgs() << "symbol with (" << Variants.size() 600 << ") variants: " << SymName << "\n"); 601 for (auto *S: Variants) { 602 auto *F = cast<FunctionSymbol>(S); 603 LLVM_DEBUG(dbgs() << " variant: " + F->getName() << " " 604 << toString(*F->Signature) << "\n"); 605 } 606 #endif 607 608 // Find the one definition. 609 DefinedFunction *Defined = nullptr; 610 for (auto *Symbol : Variants) { 611 if (auto F = dyn_cast<DefinedFunction>(Symbol)) { 612 Defined = F; 613 break; 614 } 615 } 616 617 // If there are no definitions, and the undefined symbols disagree on 618 // the signature, there is not we can do since we don't know which one 619 // to use as the signature on the import. 620 if (!Defined) { 621 reportFunctionSignatureMismatch(SymName, 622 cast<FunctionSymbol>(Variants[0]), 623 cast<FunctionSymbol>(Variants[1]), true); 624 return; 625 } 626 627 for (auto *Symbol : Variants) { 628 if (Symbol != Defined) { 629 auto *F = cast<FunctionSymbol>(Symbol); 630 reportFunctionSignatureMismatch(SymName, F, Defined, false); 631 StringRef DebugName = Saver.save("unreachable:" + toString(*F)); 632 replaceWithUnreachable(F, *F->Signature, DebugName); 633 } 634 } 635 } 636 } 637