1 //===- InputFiles.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputFiles.h" 10 #include "Config.h" 11 #include "InputChunks.h" 12 #include "InputEvent.h" 13 #include "InputGlobal.h" 14 #include "SymbolTable.h" 15 #include "lld/Common/ErrorHandler.h" 16 #include "lld/Common/Memory.h" 17 #include "llvm/Object/Binary.h" 18 #include "llvm/Object/Wasm.h" 19 #include "llvm/Support/raw_ostream.h" 20 21 #define DEBUG_TYPE "lld" 22 23 using namespace lld; 24 using namespace lld::wasm; 25 26 using namespace llvm; 27 using namespace llvm::object; 28 using namespace llvm::wasm; 29 30 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) { 31 log("Loading: " + Path); 32 33 auto MBOrErr = MemoryBuffer::getFile(Path); 34 if (auto EC = MBOrErr.getError()) { 35 error("cannot open " + Path + ": " + EC.message()); 36 return None; 37 } 38 std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; 39 MemoryBufferRef MBRef = MB->getMemBufferRef(); 40 make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership 41 42 return MBRef; 43 } 44 45 InputFile *lld::wasm::createObjectFile(MemoryBufferRef MB, 46 StringRef ArchiveName) { 47 file_magic Magic = identify_magic(MB.getBuffer()); 48 if (Magic == file_magic::wasm_object) { 49 std::unique_ptr<Binary> Bin = check(createBinary(MB)); 50 auto *Obj = cast<WasmObjectFile>(Bin.get()); 51 if (Obj->isSharedObject()) 52 return make<SharedFile>(MB); 53 return make<ObjFile>(MB, ArchiveName); 54 } 55 56 if (Magic == file_magic::bitcode) 57 return make<BitcodeFile>(MB, ArchiveName); 58 59 fatal("unknown file type: " + MB.getBufferIdentifier()); 60 } 61 62 void ObjFile::dumpInfo() const { 63 log("info for: " + toString(this) + 64 "\n Symbols : " + Twine(Symbols.size()) + 65 "\n Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) + 66 "\n Global Imports : " + Twine(WasmObj->getNumImportedGlobals()) + 67 "\n Event Imports : " + Twine(WasmObj->getNumImportedEvents())); 68 } 69 70 // Relocations contain either symbol or type indices. This function takes a 71 // relocation and returns relocated index (i.e. translates from the input 72 // symbol/type space to the output symbol/type space). 73 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const { 74 if (Reloc.Type == R_WASM_TYPE_INDEX_LEB) { 75 assert(TypeIsUsed[Reloc.Index]); 76 return TypeMap[Reloc.Index]; 77 } 78 return Symbols[Reloc.Index]->getOutputSymbolIndex(); 79 } 80 81 // Relocations can contain addend for combined sections. This function takes a 82 // relocation and returns updated addend by offset in the output section. 83 uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const { 84 switch (Reloc.Type) { 85 case R_WASM_MEMORY_ADDR_LEB: 86 case R_WASM_MEMORY_ADDR_SLEB: 87 case R_WASM_MEMORY_ADDR_I32: 88 case R_WASM_FUNCTION_OFFSET_I32: 89 return Reloc.Addend; 90 case R_WASM_SECTION_OFFSET_I32: 91 return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend; 92 default: 93 llvm_unreachable("unexpected relocation type"); 94 } 95 } 96 97 // Calculate the value we expect to find at the relocation location. 98 // This is used as a sanity check before applying a relocation to a given 99 // location. It is useful for catching bugs in the compiler and linker. 100 uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const { 101 switch (Reloc.Type) { 102 case R_WASM_TABLE_INDEX_I32: 103 case R_WASM_TABLE_INDEX_SLEB: 104 case R_WASM_TABLE_INDEX_REL_SLEB: { 105 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 106 return TableEntries[Sym.Info.ElementIndex]; 107 } 108 case R_WASM_MEMORY_ADDR_SLEB: 109 case R_WASM_MEMORY_ADDR_I32: 110 case R_WASM_MEMORY_ADDR_LEB: 111 case R_WASM_MEMORY_ADDR_REL_SLEB: { 112 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 113 if (Sym.isUndefined()) 114 return 0; 115 const WasmSegment &Segment = 116 WasmObj->dataSegments()[Sym.Info.DataRef.Segment]; 117 return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset + 118 Reloc.Addend; 119 } 120 case R_WASM_FUNCTION_OFFSET_I32: { 121 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 122 InputFunction *F = 123 Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()]; 124 return F->getFunctionInputOffset() + F->getFunctionCodeOffset() + 125 Reloc.Addend; 126 } 127 case R_WASM_SECTION_OFFSET_I32: 128 return Reloc.Addend; 129 case R_WASM_TYPE_INDEX_LEB: 130 return Reloc.Index; 131 case R_WASM_FUNCTION_INDEX_LEB: 132 case R_WASM_GLOBAL_INDEX_LEB: 133 case R_WASM_EVENT_INDEX_LEB: { 134 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 135 return Sym.Info.ElementIndex; 136 } 137 default: 138 llvm_unreachable("unknown relocation type"); 139 } 140 } 141 142 // Translate from the relocation's index into the final linked output value. 143 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const { 144 const Symbol* Sym = nullptr; 145 if (Reloc.Type != R_WASM_TYPE_INDEX_LEB) { 146 Sym = Symbols[Reloc.Index]; 147 148 // We can end up with relocations against non-live symbols. For example 149 // in debug sections. 150 if ((isa<FunctionSymbol>(Sym) || isa<DataSymbol>(Sym)) && !Sym->isLive()) 151 return 0; 152 153 // Special handling for undefined data symbols. Most relocations against 154 // such symbols cannot be resolved. 155 if (isa<DataSymbol>(Sym) && Sym->isUndefined()) { 156 if (Sym->isWeak() || Config->Relocatable) 157 return 0; 158 // R_WASM_MEMORY_ADDR_I32 relocations in PIC code are turned into runtime 159 // fixups in __wasm_apply_relocs 160 if (Config->Pic && Reloc.Type == R_WASM_MEMORY_ADDR_I32) 161 return 0; 162 if (Reloc.Type != R_WASM_GLOBAL_INDEX_LEB) { 163 llvm_unreachable( 164 ("invalid relocation against undefined data symbol: " + toString(*Sym)) 165 .c_str()); 166 } 167 } 168 } 169 170 switch (Reloc.Type) { 171 case R_WASM_TABLE_INDEX_I32: 172 case R_WASM_TABLE_INDEX_SLEB: 173 case R_WASM_TABLE_INDEX_REL_SLEB: 174 if (Config->Pic && !getFunctionSymbol(Reloc.Index)->hasTableIndex()) 175 return 0; 176 return getFunctionSymbol(Reloc.Index)->getTableIndex(); 177 case R_WASM_MEMORY_ADDR_SLEB: 178 case R_WASM_MEMORY_ADDR_I32: 179 case R_WASM_MEMORY_ADDR_LEB: 180 case R_WASM_MEMORY_ADDR_REL_SLEB: 181 return cast<DefinedData>(Sym)->getVirtualAddress() + Reloc.Addend; 182 case R_WASM_TYPE_INDEX_LEB: 183 return TypeMap[Reloc.Index]; 184 case R_WASM_FUNCTION_INDEX_LEB: 185 return getFunctionSymbol(Reloc.Index)->getFunctionIndex(); 186 case R_WASM_GLOBAL_INDEX_LEB: 187 if (auto GS = dyn_cast<GlobalSymbol>(Sym)) 188 return GS->getGlobalIndex(); 189 return Sym->getGOTIndex(); 190 case R_WASM_EVENT_INDEX_LEB: 191 return getEventSymbol(Reloc.Index)->getEventIndex(); 192 case R_WASM_FUNCTION_OFFSET_I32: { 193 auto *F = cast<DefinedFunction>(Sym); 194 return F->Function->OutputOffset + F->Function->getFunctionCodeOffset() + 195 Reloc.Addend; 196 } 197 case R_WASM_SECTION_OFFSET_I32: 198 return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend; 199 default: 200 llvm_unreachable("unknown relocation type"); 201 } 202 } 203 204 template <class T> 205 static void setRelocs(const std::vector<T *> &Chunks, 206 const WasmSection *Section) { 207 if (!Section) 208 return; 209 210 ArrayRef<WasmRelocation> Relocs = Section->Relocations; 211 assert(std::is_sorted(Relocs.begin(), Relocs.end(), 212 [](const WasmRelocation &R1, const WasmRelocation &R2) { 213 return R1.Offset < R2.Offset; 214 })); 215 assert(std::is_sorted( 216 Chunks.begin(), Chunks.end(), [](InputChunk *C1, InputChunk *C2) { 217 return C1->getInputSectionOffset() < C2->getInputSectionOffset(); 218 })); 219 220 auto RelocsNext = Relocs.begin(); 221 auto RelocsEnd = Relocs.end(); 222 auto RelocLess = [](const WasmRelocation &R, uint32_t Val) { 223 return R.Offset < Val; 224 }; 225 for (InputChunk *C : Chunks) { 226 auto RelocsStart = std::lower_bound(RelocsNext, RelocsEnd, 227 C->getInputSectionOffset(), RelocLess); 228 RelocsNext = std::lower_bound( 229 RelocsStart, RelocsEnd, C->getInputSectionOffset() + C->getInputSize(), 230 RelocLess); 231 C->setRelocations(ArrayRef<WasmRelocation>(RelocsStart, RelocsNext)); 232 } 233 } 234 235 void ObjFile::parse(bool IgnoreComdats) { 236 // Parse a memory buffer as a wasm file. 237 LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n"); 238 std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this)); 239 240 auto *Obj = dyn_cast<WasmObjectFile>(Bin.get()); 241 if (!Obj) 242 fatal(toString(this) + ": not a wasm file"); 243 if (!Obj->isRelocatableObject()) 244 fatal(toString(this) + ": not a relocatable wasm file"); 245 246 Bin.release(); 247 WasmObj.reset(Obj); 248 249 // Build up a map of function indices to table indices for use when 250 // verifying the existing table index relocations 251 uint32_t TotalFunctions = 252 WasmObj->getNumImportedFunctions() + WasmObj->functions().size(); 253 TableEntries.resize(TotalFunctions); 254 for (const WasmElemSegment &Seg : WasmObj->elements()) { 255 if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST) 256 fatal(toString(this) + ": invalid table elements"); 257 uint32_t Offset = Seg.Offset.Value.Int32; 258 for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) { 259 260 uint32_t FunctionIndex = Seg.Functions[Index]; 261 TableEntries[FunctionIndex] = Offset + Index; 262 } 263 } 264 265 // Find the code and data sections. Wasm objects can have at most one code 266 // and one data section. 267 uint32_t SectionIndex = 0; 268 for (const SectionRef &Sec : WasmObj->sections()) { 269 const WasmSection &Section = WasmObj->getWasmSection(Sec); 270 if (Section.Type == WASM_SEC_CODE) { 271 CodeSection = &Section; 272 } else if (Section.Type == WASM_SEC_DATA) { 273 DataSection = &Section; 274 } else if (Section.Type == WASM_SEC_CUSTOM) { 275 CustomSections.emplace_back(make<InputSection>(Section, this)); 276 CustomSections.back()->setRelocations(Section.Relocations); 277 CustomSectionsByIndex[SectionIndex] = CustomSections.back(); 278 } 279 SectionIndex++; 280 } 281 282 TypeMap.resize(getWasmObj()->types().size()); 283 TypeIsUsed.resize(getWasmObj()->types().size(), false); 284 285 ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats; 286 for (unsigned I = 0; I < Comdats.size(); ++I) 287 if (IgnoreComdats) 288 KeptComdats.push_back(true); 289 else 290 KeptComdats.push_back(Symtab->addComdat(Comdats[I])); 291 292 // Populate `Segments`. 293 for (const WasmSegment &S : WasmObj->dataSegments()) 294 Segments.emplace_back(make<InputSegment>(S, this)); 295 setRelocs(Segments, DataSection); 296 297 // Populate `Functions`. 298 ArrayRef<WasmFunction> Funcs = WasmObj->functions(); 299 ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes(); 300 ArrayRef<WasmSignature> Types = WasmObj->types(); 301 Functions.reserve(Funcs.size()); 302 303 for (size_t I = 0, E = Funcs.size(); I != E; ++I) 304 Functions.emplace_back( 305 make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this)); 306 setRelocs(Functions, CodeSection); 307 308 // Populate `Globals`. 309 for (const WasmGlobal &G : WasmObj->globals()) 310 Globals.emplace_back(make<InputGlobal>(G, this)); 311 312 // Populate `Events`. 313 for (const WasmEvent &E : WasmObj->events()) 314 Events.emplace_back(make<InputEvent>(Types[E.Type.SigIndex], E, this)); 315 316 // Populate `Symbols` based on the WasmSymbols in the object. 317 Symbols.reserve(WasmObj->getNumberOfSymbols()); 318 for (const SymbolRef &Sym : WasmObj->symbols()) { 319 const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl()); 320 if (Symbol *Sym = createDefined(WasmSym)) 321 Symbols.push_back(Sym); 322 else 323 Symbols.push_back(createUndefined(WasmSym)); 324 } 325 } 326 327 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const { 328 uint32_t C = Chunk->getComdat(); 329 if (C == UINT32_MAX) 330 return false; 331 return !KeptComdats[C]; 332 } 333 334 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const { 335 return cast<FunctionSymbol>(Symbols[Index]); 336 } 337 338 GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const { 339 return cast<GlobalSymbol>(Symbols[Index]); 340 } 341 342 EventSymbol *ObjFile::getEventSymbol(uint32_t Index) const { 343 return cast<EventSymbol>(Symbols[Index]); 344 } 345 346 SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const { 347 return cast<SectionSymbol>(Symbols[Index]); 348 } 349 350 DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const { 351 return cast<DataSymbol>(Symbols[Index]); 352 } 353 354 Symbol *ObjFile::createDefined(const WasmSymbol &Sym) { 355 if (!Sym.isDefined()) 356 return nullptr; 357 358 StringRef Name = Sym.Info.Name; 359 uint32_t Flags = Sym.Info.Flags; 360 361 switch (Sym.Info.Kind) { 362 case WASM_SYMBOL_TYPE_FUNCTION: { 363 InputFunction *Func = 364 Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()]; 365 if (isExcludedByComdat(Func)) { 366 Func->Live = false; 367 return nullptr; 368 } 369 370 if (Sym.isBindingLocal()) 371 return make<DefinedFunction>(Name, Flags, this, Func); 372 return Symtab->addDefinedFunction(Name, Flags, this, Func); 373 } 374 case WASM_SYMBOL_TYPE_DATA: { 375 InputSegment *Seg = Segments[Sym.Info.DataRef.Segment]; 376 if (isExcludedByComdat(Seg)) { 377 Seg->Live = false; 378 return nullptr; 379 } 380 381 uint32_t Offset = Sym.Info.DataRef.Offset; 382 uint32_t Size = Sym.Info.DataRef.Size; 383 384 if (Sym.isBindingLocal()) 385 return make<DefinedData>(Name, Flags, this, Seg, Offset, Size); 386 return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size); 387 } 388 case WASM_SYMBOL_TYPE_GLOBAL: { 389 InputGlobal *Global = 390 Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()]; 391 if (Sym.isBindingLocal()) 392 return make<DefinedGlobal>(Name, Flags, this, Global); 393 return Symtab->addDefinedGlobal(Name, Flags, this, Global); 394 } 395 case WASM_SYMBOL_TYPE_SECTION: { 396 InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex]; 397 assert(Sym.isBindingLocal()); 398 return make<SectionSymbol>(Name, Flags, Section, this); 399 } 400 case WASM_SYMBOL_TYPE_EVENT: { 401 InputEvent *Event = 402 Events[Sym.Info.ElementIndex - WasmObj->getNumImportedEvents()]; 403 if (Sym.isBindingLocal()) 404 return make<DefinedEvent>(Name, Flags, this, Event); 405 return Symtab->addDefinedEvent(Name, Flags, this, Event); 406 } 407 } 408 llvm_unreachable("unknown symbol kind"); 409 } 410 411 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) { 412 StringRef Name = Sym.Info.Name; 413 uint32_t Flags = Sym.Info.Flags; 414 415 switch (Sym.Info.Kind) { 416 case WASM_SYMBOL_TYPE_FUNCTION: 417 return Symtab->addUndefinedFunction(Name, Sym.Info.ImportName, 418 Sym.Info.ImportModule, Flags, this, 419 Sym.Signature); 420 case WASM_SYMBOL_TYPE_DATA: 421 return Symtab->addUndefinedData(Name, Flags, this); 422 case WASM_SYMBOL_TYPE_GLOBAL: 423 return Symtab->addUndefinedGlobal(Name, Sym.Info.ImportName, 424 Sym.Info.ImportModule, Flags, this, 425 Sym.GlobalType); 426 case WASM_SYMBOL_TYPE_SECTION: 427 llvm_unreachable("section symbols cannot be undefined"); 428 } 429 llvm_unreachable("unknown symbol kind"); 430 } 431 432 void ArchiveFile::parse(bool IgnoreComdats) { 433 // Parse a MemoryBufferRef as an archive file. 434 LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n"); 435 File = CHECK(Archive::create(MB), toString(this)); 436 437 // Read the symbol table to construct Lazy symbols. 438 int Count = 0; 439 for (const Archive::Symbol &Sym : File->symbols()) { 440 Symtab->addLazy(this, &Sym); 441 ++Count; 442 } 443 LLVM_DEBUG(dbgs() << "Read " << Count << " symbols\n"); 444 } 445 446 void ArchiveFile::addMember(const Archive::Symbol *Sym) { 447 const Archive::Child &C = 448 CHECK(Sym->getMember(), 449 "could not get the member for symbol " + Sym->getName()); 450 451 // Don't try to load the same member twice (this can happen when members 452 // mutually reference each other). 453 if (!Seen.insert(C.getChildOffset()).second) 454 return; 455 456 LLVM_DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n"); 457 LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n"); 458 459 MemoryBufferRef MB = 460 CHECK(C.getMemoryBufferRef(), 461 "could not get the buffer for the member defining symbol " + 462 Sym->getName()); 463 464 InputFile *Obj = createObjectFile(MB, getName()); 465 Symtab->addFile(Obj); 466 } 467 468 static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) { 469 switch (GvVisibility) { 470 case GlobalValue::DefaultVisibility: 471 return WASM_SYMBOL_VISIBILITY_DEFAULT; 472 case GlobalValue::HiddenVisibility: 473 case GlobalValue::ProtectedVisibility: 474 return WASM_SYMBOL_VISIBILITY_HIDDEN; 475 } 476 llvm_unreachable("unknown visibility"); 477 } 478 479 static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats, 480 const lto::InputFile::Symbol &ObjSym, 481 BitcodeFile &F) { 482 StringRef Name = Saver.save(ObjSym.getName()); 483 484 uint32_t Flags = ObjSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0; 485 Flags |= mapVisibility(ObjSym.getVisibility()); 486 487 int C = ObjSym.getComdatIndex(); 488 bool ExcludedByComdat = C != -1 && !KeptComdats[C]; 489 490 if (ObjSym.isUndefined() || ExcludedByComdat) { 491 if (ObjSym.isExecutable()) 492 return Symtab->addUndefinedFunction(Name, Name, DefaultModule, Flags, &F, 493 nullptr); 494 return Symtab->addUndefinedData(Name, Flags, &F); 495 } 496 497 if (ObjSym.isExecutable()) 498 return Symtab->addDefinedFunction(Name, Flags, &F, nullptr); 499 return Symtab->addDefinedData(Name, Flags, &F, nullptr, 0, 0); 500 } 501 502 void BitcodeFile::parse(bool IgnoreComdats) { 503 Obj = check(lto::InputFile::create(MemoryBufferRef( 504 MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier())))); 505 Triple T(Obj->getTargetTriple()); 506 if (T.getArch() != Triple::wasm32) { 507 error(toString(MB.getBufferIdentifier()) + ": machine type must be wasm32"); 508 return; 509 } 510 std::vector<bool> KeptComdats; 511 for (StringRef S : Obj->getComdatTable()) 512 if (IgnoreComdats) 513 KeptComdats.push_back(true); 514 else 515 KeptComdats.push_back(Symtab->addComdat(S)); 516 517 for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) 518 Symbols.push_back(createBitcodeSymbol(KeptComdats, ObjSym, *this)); 519 } 520 521 // Returns a string in the format of "foo.o" or "foo.a(bar.o)". 522 std::string lld::toString(const wasm::InputFile *File) { 523 if (!File) 524 return "<internal>"; 525 526 if (File->ArchiveName.empty()) 527 return File->getName(); 528 529 return (File->ArchiveName + "(" + File->getName() + ")").str(); 530 } 531