1 //===- InputFiles.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputFiles.h" 10 #include "Config.h" 11 #include "InputChunks.h" 12 #include "InputEvent.h" 13 #include "InputGlobal.h" 14 #include "SymbolTable.h" 15 #include "lld/Common/ErrorHandler.h" 16 #include "lld/Common/Memory.h" 17 #include "lld/Common/Reproduce.h" 18 #include "llvm/Object/Binary.h" 19 #include "llvm/Object/Wasm.h" 20 #include "llvm/Support/TarWriter.h" 21 #include "llvm/Support/raw_ostream.h" 22 23 #define DEBUG_TYPE "lld" 24 25 using namespace lld; 26 using namespace lld::wasm; 27 28 using namespace llvm; 29 using namespace llvm::object; 30 using namespace llvm::wasm; 31 32 std::unique_ptr<llvm::TarWriter> lld::wasm::Tar; 33 34 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) { 35 log("Loading: " + Path); 36 37 auto MBOrErr = MemoryBuffer::getFile(Path); 38 if (auto EC = MBOrErr.getError()) { 39 error("cannot open " + Path + ": " + EC.message()); 40 return None; 41 } 42 std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; 43 MemoryBufferRef MBRef = MB->getMemBufferRef(); 44 make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership 45 46 if (Tar) 47 Tar->append(relativeToRoot(Path), MBRef.getBuffer()); 48 return MBRef; 49 } 50 51 InputFile *lld::wasm::createObjectFile(MemoryBufferRef MB, 52 StringRef ArchiveName) { 53 file_magic Magic = identify_magic(MB.getBuffer()); 54 if (Magic == file_magic::wasm_object) { 55 std::unique_ptr<Binary> Bin = check(createBinary(MB)); 56 auto *Obj = cast<WasmObjectFile>(Bin.get()); 57 if (Obj->isSharedObject()) 58 return make<SharedFile>(MB); 59 return make<ObjFile>(MB, ArchiveName); 60 } 61 62 if (Magic == file_magic::bitcode) 63 return make<BitcodeFile>(MB, ArchiveName); 64 65 fatal("unknown file type: " + MB.getBufferIdentifier()); 66 } 67 68 void ObjFile::dumpInfo() const { 69 log("info for: " + toString(this) + 70 "\n Symbols : " + Twine(Symbols.size()) + 71 "\n Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) + 72 "\n Global Imports : " + Twine(WasmObj->getNumImportedGlobals()) + 73 "\n Event Imports : " + Twine(WasmObj->getNumImportedEvents())); 74 } 75 76 // Relocations contain either symbol or type indices. This function takes a 77 // relocation and returns relocated index (i.e. translates from the input 78 // symbol/type space to the output symbol/type space). 79 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const { 80 if (Reloc.Type == R_WASM_TYPE_INDEX_LEB) { 81 assert(TypeIsUsed[Reloc.Index]); 82 return TypeMap[Reloc.Index]; 83 } 84 const Symbol *Sym = Symbols[Reloc.Index]; 85 if (auto *SS = dyn_cast<SectionSymbol>(Sym)) 86 Sym = SS->getOutputSectionSymbol(); 87 return Sym->getOutputSymbolIndex(); 88 } 89 90 // Relocations can contain addend for combined sections. This function takes a 91 // relocation and returns updated addend by offset in the output section. 92 uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const { 93 switch (Reloc.Type) { 94 case R_WASM_MEMORY_ADDR_LEB: 95 case R_WASM_MEMORY_ADDR_SLEB: 96 case R_WASM_MEMORY_ADDR_I32: 97 case R_WASM_FUNCTION_OFFSET_I32: 98 return Reloc.Addend; 99 case R_WASM_SECTION_OFFSET_I32: 100 return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend; 101 default: 102 llvm_unreachable("unexpected relocation type"); 103 } 104 } 105 106 // Calculate the value we expect to find at the relocation location. 107 // This is used as a sanity check before applying a relocation to a given 108 // location. It is useful for catching bugs in the compiler and linker. 109 uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const { 110 switch (Reloc.Type) { 111 case R_WASM_TABLE_INDEX_I32: 112 case R_WASM_TABLE_INDEX_SLEB: 113 case R_WASM_TABLE_INDEX_REL_SLEB: { 114 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 115 return TableEntries[Sym.Info.ElementIndex]; 116 } 117 case R_WASM_MEMORY_ADDR_SLEB: 118 case R_WASM_MEMORY_ADDR_I32: 119 case R_WASM_MEMORY_ADDR_LEB: 120 case R_WASM_MEMORY_ADDR_REL_SLEB: { 121 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 122 if (Sym.isUndefined()) 123 return 0; 124 const WasmSegment &Segment = 125 WasmObj->dataSegments()[Sym.Info.DataRef.Segment]; 126 return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset + 127 Reloc.Addend; 128 } 129 case R_WASM_FUNCTION_OFFSET_I32: { 130 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 131 InputFunction *F = 132 Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()]; 133 return F->getFunctionInputOffset() + F->getFunctionCodeOffset() + 134 Reloc.Addend; 135 } 136 case R_WASM_SECTION_OFFSET_I32: 137 return Reloc.Addend; 138 case R_WASM_TYPE_INDEX_LEB: 139 return Reloc.Index; 140 case R_WASM_FUNCTION_INDEX_LEB: 141 case R_WASM_GLOBAL_INDEX_LEB: 142 case R_WASM_EVENT_INDEX_LEB: { 143 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 144 return Sym.Info.ElementIndex; 145 } 146 default: 147 llvm_unreachable("unknown relocation type"); 148 } 149 } 150 151 // Translate from the relocation's index into the final linked output value. 152 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const { 153 const Symbol* Sym = nullptr; 154 if (Reloc.Type != R_WASM_TYPE_INDEX_LEB) { 155 Sym = Symbols[Reloc.Index]; 156 157 // We can end up with relocations against non-live symbols. For example 158 // in debug sections. 159 if ((isa<FunctionSymbol>(Sym) || isa<DataSymbol>(Sym)) && !Sym->isLive()) 160 return 0; 161 162 // Special handling for undefined data symbols. Most relocations against 163 // such symbols cannot be resolved. 164 if (isa<DataSymbol>(Sym) && Sym->isUndefined()) { 165 if (Sym->isWeak() || Config->Relocatable) 166 return 0; 167 // R_WASM_MEMORY_ADDR_I32 relocations in PIC code are turned into runtime 168 // fixups in __wasm_apply_relocs 169 if (Config->Pic && Reloc.Type == R_WASM_MEMORY_ADDR_I32) 170 return 0; 171 if (Reloc.Type != R_WASM_GLOBAL_INDEX_LEB) { 172 llvm_unreachable( 173 ("invalid relocation against undefined data symbol: " + toString(*Sym)) 174 .c_str()); 175 } 176 } 177 } 178 179 switch (Reloc.Type) { 180 case R_WASM_TABLE_INDEX_I32: 181 case R_WASM_TABLE_INDEX_SLEB: 182 case R_WASM_TABLE_INDEX_REL_SLEB: 183 if (Config->Pic && !getFunctionSymbol(Reloc.Index)->hasTableIndex()) 184 return 0; 185 return getFunctionSymbol(Reloc.Index)->getTableIndex(); 186 case R_WASM_MEMORY_ADDR_SLEB: 187 case R_WASM_MEMORY_ADDR_I32: 188 case R_WASM_MEMORY_ADDR_LEB: 189 case R_WASM_MEMORY_ADDR_REL_SLEB: 190 return cast<DefinedData>(Sym)->getVirtualAddress() + Reloc.Addend; 191 case R_WASM_TYPE_INDEX_LEB: 192 return TypeMap[Reloc.Index]; 193 case R_WASM_FUNCTION_INDEX_LEB: 194 return getFunctionSymbol(Reloc.Index)->getFunctionIndex(); 195 case R_WASM_GLOBAL_INDEX_LEB: 196 if (auto GS = dyn_cast<GlobalSymbol>(Sym)) 197 return GS->getGlobalIndex(); 198 return Sym->getGOTIndex(); 199 case R_WASM_EVENT_INDEX_LEB: 200 return getEventSymbol(Reloc.Index)->getEventIndex(); 201 case R_WASM_FUNCTION_OFFSET_I32: { 202 auto *F = cast<DefinedFunction>(Sym); 203 return F->Function->OutputOffset + F->Function->getFunctionCodeOffset() + 204 Reloc.Addend; 205 } 206 case R_WASM_SECTION_OFFSET_I32: 207 return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend; 208 default: 209 llvm_unreachable("unknown relocation type"); 210 } 211 } 212 213 template <class T> 214 static void setRelocs(const std::vector<T *> &Chunks, 215 const WasmSection *Section) { 216 if (!Section) 217 return; 218 219 ArrayRef<WasmRelocation> Relocs = Section->Relocations; 220 assert(std::is_sorted(Relocs.begin(), Relocs.end(), 221 [](const WasmRelocation &R1, const WasmRelocation &R2) { 222 return R1.Offset < R2.Offset; 223 })); 224 assert(std::is_sorted( 225 Chunks.begin(), Chunks.end(), [](InputChunk *C1, InputChunk *C2) { 226 return C1->getInputSectionOffset() < C2->getInputSectionOffset(); 227 })); 228 229 auto RelocsNext = Relocs.begin(); 230 auto RelocsEnd = Relocs.end(); 231 auto RelocLess = [](const WasmRelocation &R, uint32_t Val) { 232 return R.Offset < Val; 233 }; 234 for (InputChunk *C : Chunks) { 235 auto RelocsStart = std::lower_bound(RelocsNext, RelocsEnd, 236 C->getInputSectionOffset(), RelocLess); 237 RelocsNext = std::lower_bound( 238 RelocsStart, RelocsEnd, C->getInputSectionOffset() + C->getInputSize(), 239 RelocLess); 240 C->setRelocations(ArrayRef<WasmRelocation>(RelocsStart, RelocsNext)); 241 } 242 } 243 244 void ObjFile::parse(bool IgnoreComdats) { 245 // Parse a memory buffer as a wasm file. 246 LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n"); 247 std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this)); 248 249 auto *Obj = dyn_cast<WasmObjectFile>(Bin.get()); 250 if (!Obj) 251 fatal(toString(this) + ": not a wasm file"); 252 if (!Obj->isRelocatableObject()) 253 fatal(toString(this) + ": not a relocatable wasm file"); 254 255 Bin.release(); 256 WasmObj.reset(Obj); 257 258 // Build up a map of function indices to table indices for use when 259 // verifying the existing table index relocations 260 uint32_t TotalFunctions = 261 WasmObj->getNumImportedFunctions() + WasmObj->functions().size(); 262 TableEntries.resize(TotalFunctions); 263 for (const WasmElemSegment &Seg : WasmObj->elements()) { 264 if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST) 265 fatal(toString(this) + ": invalid table elements"); 266 uint32_t Offset = Seg.Offset.Value.Int32; 267 for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) { 268 269 uint32_t FunctionIndex = Seg.Functions[Index]; 270 TableEntries[FunctionIndex] = Offset + Index; 271 } 272 } 273 274 // Find the code and data sections. Wasm objects can have at most one code 275 // and one data section. 276 uint32_t SectionIndex = 0; 277 for (const SectionRef &Sec : WasmObj->sections()) { 278 const WasmSection &Section = WasmObj->getWasmSection(Sec); 279 if (Section.Type == WASM_SEC_CODE) { 280 CodeSection = &Section; 281 } else if (Section.Type == WASM_SEC_DATA) { 282 DataSection = &Section; 283 } else if (Section.Type == WASM_SEC_CUSTOM) { 284 CustomSections.emplace_back(make<InputSection>(Section, this)); 285 CustomSections.back()->setRelocations(Section.Relocations); 286 CustomSectionsByIndex[SectionIndex] = CustomSections.back(); 287 } 288 SectionIndex++; 289 } 290 291 TypeMap.resize(getWasmObj()->types().size()); 292 TypeIsUsed.resize(getWasmObj()->types().size(), false); 293 294 ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats; 295 for (unsigned I = 0; I < Comdats.size(); ++I) 296 if (IgnoreComdats) 297 KeptComdats.push_back(true); 298 else 299 KeptComdats.push_back(Symtab->addComdat(Comdats[I])); 300 301 // Populate `Segments`. 302 for (const WasmSegment &S : WasmObj->dataSegments()) 303 Segments.emplace_back(make<InputSegment>(S, this)); 304 setRelocs(Segments, DataSection); 305 306 // Populate `Functions`. 307 ArrayRef<WasmFunction> Funcs = WasmObj->functions(); 308 ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes(); 309 ArrayRef<WasmSignature> Types = WasmObj->types(); 310 Functions.reserve(Funcs.size()); 311 312 for (size_t I = 0, E = Funcs.size(); I != E; ++I) 313 Functions.emplace_back( 314 make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this)); 315 setRelocs(Functions, CodeSection); 316 317 // Populate `Globals`. 318 for (const WasmGlobal &G : WasmObj->globals()) 319 Globals.emplace_back(make<InputGlobal>(G, this)); 320 321 // Populate `Events`. 322 for (const WasmEvent &E : WasmObj->events()) 323 Events.emplace_back(make<InputEvent>(Types[E.Type.SigIndex], E, this)); 324 325 // Populate `Symbols` based on the WasmSymbols in the object. 326 Symbols.reserve(WasmObj->getNumberOfSymbols()); 327 for (const SymbolRef &Sym : WasmObj->symbols()) { 328 const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl()); 329 if (Symbol *Sym = createDefined(WasmSym)) 330 Symbols.push_back(Sym); 331 else 332 Symbols.push_back(createUndefined(WasmSym)); 333 } 334 } 335 336 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const { 337 uint32_t C = Chunk->getComdat(); 338 if (C == UINT32_MAX) 339 return false; 340 return !KeptComdats[C]; 341 } 342 343 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const { 344 return cast<FunctionSymbol>(Symbols[Index]); 345 } 346 347 GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const { 348 return cast<GlobalSymbol>(Symbols[Index]); 349 } 350 351 EventSymbol *ObjFile::getEventSymbol(uint32_t Index) const { 352 return cast<EventSymbol>(Symbols[Index]); 353 } 354 355 SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const { 356 return cast<SectionSymbol>(Symbols[Index]); 357 } 358 359 DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const { 360 return cast<DataSymbol>(Symbols[Index]); 361 } 362 363 Symbol *ObjFile::createDefined(const WasmSymbol &Sym) { 364 if (!Sym.isDefined()) 365 return nullptr; 366 367 StringRef Name = Sym.Info.Name; 368 uint32_t Flags = Sym.Info.Flags; 369 370 switch (Sym.Info.Kind) { 371 case WASM_SYMBOL_TYPE_FUNCTION: { 372 InputFunction *Func = 373 Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()]; 374 if (isExcludedByComdat(Func)) { 375 Func->Live = false; 376 return nullptr; 377 } 378 379 if (Sym.isBindingLocal()) 380 return make<DefinedFunction>(Name, Flags, this, Func); 381 return Symtab->addDefinedFunction(Name, Flags, this, Func); 382 } 383 case WASM_SYMBOL_TYPE_DATA: { 384 InputSegment *Seg = Segments[Sym.Info.DataRef.Segment]; 385 if (isExcludedByComdat(Seg)) { 386 Seg->Live = false; 387 return nullptr; 388 } 389 390 uint32_t Offset = Sym.Info.DataRef.Offset; 391 uint32_t Size = Sym.Info.DataRef.Size; 392 393 if (Sym.isBindingLocal()) 394 return make<DefinedData>(Name, Flags, this, Seg, Offset, Size); 395 return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size); 396 } 397 case WASM_SYMBOL_TYPE_GLOBAL: { 398 InputGlobal *Global = 399 Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()]; 400 if (Sym.isBindingLocal()) 401 return make<DefinedGlobal>(Name, Flags, this, Global); 402 return Symtab->addDefinedGlobal(Name, Flags, this, Global); 403 } 404 case WASM_SYMBOL_TYPE_SECTION: { 405 InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex]; 406 assert(Sym.isBindingLocal()); 407 return make<SectionSymbol>(Flags, Section, this); 408 } 409 case WASM_SYMBOL_TYPE_EVENT: { 410 InputEvent *Event = 411 Events[Sym.Info.ElementIndex - WasmObj->getNumImportedEvents()]; 412 if (Sym.isBindingLocal()) 413 return make<DefinedEvent>(Name, Flags, this, Event); 414 return Symtab->addDefinedEvent(Name, Flags, this, Event); 415 } 416 } 417 llvm_unreachable("unknown symbol kind"); 418 } 419 420 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) { 421 StringRef Name = Sym.Info.Name; 422 uint32_t Flags = Sym.Info.Flags; 423 424 switch (Sym.Info.Kind) { 425 case WASM_SYMBOL_TYPE_FUNCTION: 426 return Symtab->addUndefinedFunction(Name, Sym.Info.ImportName, 427 Sym.Info.ImportModule, Flags, this, 428 Sym.Signature); 429 case WASM_SYMBOL_TYPE_DATA: 430 return Symtab->addUndefinedData(Name, Flags, this); 431 case WASM_SYMBOL_TYPE_GLOBAL: 432 return Symtab->addUndefinedGlobal(Name, Sym.Info.ImportName, 433 Sym.Info.ImportModule, Flags, this, 434 Sym.GlobalType); 435 case WASM_SYMBOL_TYPE_SECTION: 436 llvm_unreachable("section symbols cannot be undefined"); 437 } 438 llvm_unreachable("unknown symbol kind"); 439 } 440 441 void ArchiveFile::parse(bool IgnoreComdats) { 442 // Parse a MemoryBufferRef as an archive file. 443 LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n"); 444 File = CHECK(Archive::create(MB), toString(this)); 445 446 // Read the symbol table to construct Lazy symbols. 447 int Count = 0; 448 for (const Archive::Symbol &Sym : File->symbols()) { 449 Symtab->addLazy(this, &Sym); 450 ++Count; 451 } 452 LLVM_DEBUG(dbgs() << "Read " << Count << " symbols\n"); 453 } 454 455 void ArchiveFile::addMember(const Archive::Symbol *Sym) { 456 const Archive::Child &C = 457 CHECK(Sym->getMember(), 458 "could not get the member for symbol " + Sym->getName()); 459 460 // Don't try to load the same member twice (this can happen when members 461 // mutually reference each other). 462 if (!Seen.insert(C.getChildOffset()).second) 463 return; 464 465 LLVM_DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n"); 466 LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n"); 467 468 MemoryBufferRef MB = 469 CHECK(C.getMemoryBufferRef(), 470 "could not get the buffer for the member defining symbol " + 471 Sym->getName()); 472 473 InputFile *Obj = createObjectFile(MB, getName()); 474 Symtab->addFile(Obj); 475 } 476 477 static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) { 478 switch (GvVisibility) { 479 case GlobalValue::DefaultVisibility: 480 return WASM_SYMBOL_VISIBILITY_DEFAULT; 481 case GlobalValue::HiddenVisibility: 482 case GlobalValue::ProtectedVisibility: 483 return WASM_SYMBOL_VISIBILITY_HIDDEN; 484 } 485 llvm_unreachable("unknown visibility"); 486 } 487 488 static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats, 489 const lto::InputFile::Symbol &ObjSym, 490 BitcodeFile &F) { 491 StringRef Name = Saver.save(ObjSym.getName()); 492 493 uint32_t Flags = ObjSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0; 494 Flags |= mapVisibility(ObjSym.getVisibility()); 495 496 int C = ObjSym.getComdatIndex(); 497 bool ExcludedByComdat = C != -1 && !KeptComdats[C]; 498 499 if (ObjSym.isUndefined() || ExcludedByComdat) { 500 if (ObjSym.isExecutable()) 501 return Symtab->addUndefinedFunction(Name, Name, DefaultModule, Flags, &F, 502 nullptr); 503 return Symtab->addUndefinedData(Name, Flags, &F); 504 } 505 506 if (ObjSym.isExecutable()) 507 return Symtab->addDefinedFunction(Name, Flags, &F, nullptr); 508 return Symtab->addDefinedData(Name, Flags, &F, nullptr, 0, 0); 509 } 510 511 void BitcodeFile::parse(bool IgnoreComdats) { 512 Obj = check(lto::InputFile::create(MemoryBufferRef( 513 MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier())))); 514 Triple T(Obj->getTargetTriple()); 515 if (T.getArch() != Triple::wasm32) { 516 error(toString(MB.getBufferIdentifier()) + ": machine type must be wasm32"); 517 return; 518 } 519 std::vector<bool> KeptComdats; 520 for (StringRef S : Obj->getComdatTable()) 521 if (IgnoreComdats) 522 KeptComdats.push_back(true); 523 else 524 KeptComdats.push_back(Symtab->addComdat(S)); 525 526 for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) 527 Symbols.push_back(createBitcodeSymbol(KeptComdats, ObjSym, *this)); 528 } 529 530 // Returns a string in the format of "foo.o" or "foo.a(bar.o)". 531 std::string lld::toString(const wasm::InputFile *File) { 532 if (!File) 533 return "<internal>"; 534 535 if (File->ArchiveName.empty()) 536 return File->getName(); 537 538 return (File->ArchiveName + "(" + File->getName() + ")").str(); 539 } 540