1 //===- InputFiles.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputFiles.h" 10 #include "Config.h" 11 #include "InputChunks.h" 12 #include "InputEvent.h" 13 #include "InputGlobal.h" 14 #include "SymbolTable.h" 15 #include "lld/Common/ErrorHandler.h" 16 #include "lld/Common/Memory.h" 17 #include "llvm/Object/Binary.h" 18 #include "llvm/Object/Wasm.h" 19 #include "llvm/Support/raw_ostream.h" 20 21 #define DEBUG_TYPE "lld" 22 23 using namespace lld; 24 using namespace lld::wasm; 25 26 using namespace llvm; 27 using namespace llvm::object; 28 using namespace llvm::wasm; 29 30 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) { 31 log("Loading: " + Path); 32 33 auto MBOrErr = MemoryBuffer::getFile(Path); 34 if (auto EC = MBOrErr.getError()) { 35 error("cannot open " + Path + ": " + EC.message()); 36 return None; 37 } 38 std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; 39 MemoryBufferRef MBRef = MB->getMemBufferRef(); 40 make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership 41 42 return MBRef; 43 } 44 45 InputFile *lld::wasm::createObjectFile(MemoryBufferRef MB) { 46 file_magic Magic = identify_magic(MB.getBuffer()); 47 if (Magic == file_magic::wasm_object) { 48 std::unique_ptr<Binary> Bin = check(createBinary(MB)); 49 auto *Obj = cast<WasmObjectFile>(Bin.get()); 50 if (Obj->isSharedObject()) 51 return make<SharedFile>(MB); 52 return make<ObjFile>(MB); 53 } 54 55 if (Magic == file_magic::bitcode) 56 return make<BitcodeFile>(MB); 57 58 fatal("unknown file type: " + MB.getBufferIdentifier()); 59 } 60 61 void ObjFile::dumpInfo() const { 62 log("info for: " + getName() + 63 "\n Symbols : " + Twine(Symbols.size()) + 64 "\n Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) + 65 "\n Global Imports : " + Twine(WasmObj->getNumImportedGlobals()) + 66 "\n Event Imports : " + Twine(WasmObj->getNumImportedEvents())); 67 } 68 69 // Relocations contain either symbol or type indices. This function takes a 70 // relocation and returns relocated index (i.e. translates from the input 71 // symbol/type space to the output symbol/type space). 72 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const { 73 if (Reloc.Type == R_WASM_TYPE_INDEX_LEB) { 74 assert(TypeIsUsed[Reloc.Index]); 75 return TypeMap[Reloc.Index]; 76 } 77 return Symbols[Reloc.Index]->getOutputSymbolIndex(); 78 } 79 80 // Relocations can contain addend for combined sections. This function takes a 81 // relocation and returns updated addend by offset in the output section. 82 uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const { 83 switch (Reloc.Type) { 84 case R_WASM_MEMORY_ADDR_LEB: 85 case R_WASM_MEMORY_ADDR_SLEB: 86 case R_WASM_MEMORY_ADDR_I32: 87 case R_WASM_FUNCTION_OFFSET_I32: 88 return Reloc.Addend; 89 case R_WASM_SECTION_OFFSET_I32: 90 return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend; 91 default: 92 llvm_unreachable("unexpected relocation type"); 93 } 94 } 95 96 // Calculate the value we expect to find at the relocation location. 97 // This is used as a sanity check before applying a relocation to a given 98 // location. It is useful for catching bugs in the compiler and linker. 99 uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const { 100 switch (Reloc.Type) { 101 case R_WASM_TABLE_INDEX_I32: 102 case R_WASM_TABLE_INDEX_SLEB: { 103 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 104 return TableEntries[Sym.Info.ElementIndex]; 105 } 106 case R_WASM_MEMORY_ADDR_SLEB: 107 case R_WASM_MEMORY_ADDR_I32: 108 case R_WASM_MEMORY_ADDR_LEB: { 109 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 110 if (Sym.isUndefined()) 111 return 0; 112 const WasmSegment &Segment = 113 WasmObj->dataSegments()[Sym.Info.DataRef.Segment]; 114 return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset + 115 Reloc.Addend; 116 } 117 case R_WASM_FUNCTION_OFFSET_I32: 118 if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) { 119 return Sym->Function->getFunctionInputOffset() + 120 Sym->Function->getFunctionCodeOffset() + Reloc.Addend; 121 } 122 return 0; 123 case R_WASM_SECTION_OFFSET_I32: 124 return Reloc.Addend; 125 case R_WASM_TYPE_INDEX_LEB: 126 return Reloc.Index; 127 case R_WASM_FUNCTION_INDEX_LEB: 128 case R_WASM_GLOBAL_INDEX_LEB: 129 case R_WASM_EVENT_INDEX_LEB: { 130 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 131 return Sym.Info.ElementIndex; 132 } 133 default: 134 llvm_unreachable("unknown relocation type"); 135 } 136 } 137 138 // Translate from the relocation's index into the final linked output value. 139 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const { 140 switch (Reloc.Type) { 141 case R_WASM_TABLE_INDEX_I32: 142 case R_WASM_TABLE_INDEX_SLEB: 143 return getFunctionSymbol(Reloc.Index)->getTableIndex(); 144 case R_WASM_MEMORY_ADDR_SLEB: 145 case R_WASM_MEMORY_ADDR_I32: 146 case R_WASM_MEMORY_ADDR_LEB: 147 if (auto *Sym = dyn_cast<DefinedData>(getDataSymbol(Reloc.Index))) 148 if (Sym->isLive()) 149 return Sym->getVirtualAddress() + Reloc.Addend; 150 return 0; 151 case R_WASM_TYPE_INDEX_LEB: 152 return TypeMap[Reloc.Index]; 153 case R_WASM_FUNCTION_INDEX_LEB: 154 return getFunctionSymbol(Reloc.Index)->getFunctionIndex(); 155 case R_WASM_GLOBAL_INDEX_LEB: { 156 const Symbol* Sym = Symbols[Reloc.Index]; 157 if (auto GS = dyn_cast<GlobalSymbol>(Sym)) 158 return GS->getGlobalIndex(); 159 return Sym->getGOTIndex(); 160 } case R_WASM_EVENT_INDEX_LEB: 161 return getEventSymbol(Reloc.Index)->getEventIndex(); 162 case R_WASM_FUNCTION_OFFSET_I32: 163 if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) { 164 if (Sym->isLive()) 165 return Sym->Function->OutputOffset + 166 Sym->Function->getFunctionCodeOffset() + Reloc.Addend; 167 } 168 return 0; 169 case R_WASM_SECTION_OFFSET_I32: 170 return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend; 171 default: 172 llvm_unreachable("unknown relocation type"); 173 } 174 } 175 176 template <class T> 177 static void setRelocs(const std::vector<T *> &Chunks, 178 const WasmSection *Section) { 179 if (!Section) 180 return; 181 182 ArrayRef<WasmRelocation> Relocs = Section->Relocations; 183 assert(std::is_sorted(Relocs.begin(), Relocs.end(), 184 [](const WasmRelocation &R1, const WasmRelocation &R2) { 185 return R1.Offset < R2.Offset; 186 })); 187 assert(std::is_sorted( 188 Chunks.begin(), Chunks.end(), [](InputChunk *C1, InputChunk *C2) { 189 return C1->getInputSectionOffset() < C2->getInputSectionOffset(); 190 })); 191 192 auto RelocsNext = Relocs.begin(); 193 auto RelocsEnd = Relocs.end(); 194 auto RelocLess = [](const WasmRelocation &R, uint32_t Val) { 195 return R.Offset < Val; 196 }; 197 for (InputChunk *C : Chunks) { 198 auto RelocsStart = std::lower_bound(RelocsNext, RelocsEnd, 199 C->getInputSectionOffset(), RelocLess); 200 RelocsNext = std::lower_bound( 201 RelocsStart, RelocsEnd, C->getInputSectionOffset() + C->getInputSize(), 202 RelocLess); 203 C->setRelocations(ArrayRef<WasmRelocation>(RelocsStart, RelocsNext)); 204 } 205 } 206 207 void ObjFile::parse() { 208 // Parse a memory buffer as a wasm file. 209 LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n"); 210 std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this)); 211 212 auto *Obj = dyn_cast<WasmObjectFile>(Bin.get()); 213 if (!Obj) 214 fatal(toString(this) + ": not a wasm file"); 215 if (!Obj->isRelocatableObject()) 216 fatal(toString(this) + ": not a relocatable wasm file"); 217 218 Bin.release(); 219 WasmObj.reset(Obj); 220 221 // Build up a map of function indices to table indices for use when 222 // verifying the existing table index relocations 223 uint32_t TotalFunctions = 224 WasmObj->getNumImportedFunctions() + WasmObj->functions().size(); 225 TableEntries.resize(TotalFunctions); 226 for (const WasmElemSegment &Seg : WasmObj->elements()) { 227 if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST) 228 fatal(toString(this) + ": invalid table elements"); 229 uint32_t Offset = Seg.Offset.Value.Int32; 230 for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) { 231 232 uint32_t FunctionIndex = Seg.Functions[Index]; 233 TableEntries[FunctionIndex] = Offset + Index; 234 } 235 } 236 237 // Find the code and data sections. Wasm objects can have at most one code 238 // and one data section. 239 uint32_t SectionIndex = 0; 240 for (const SectionRef &Sec : WasmObj->sections()) { 241 const WasmSection &Section = WasmObj->getWasmSection(Sec); 242 if (Section.Type == WASM_SEC_CODE) { 243 CodeSection = &Section; 244 } else if (Section.Type == WASM_SEC_DATA) { 245 DataSection = &Section; 246 } else if (Section.Type == WASM_SEC_CUSTOM) { 247 CustomSections.emplace_back(make<InputSection>(Section, this)); 248 CustomSections.back()->setRelocations(Section.Relocations); 249 CustomSectionsByIndex[SectionIndex] = CustomSections.back(); 250 } 251 SectionIndex++; 252 } 253 254 TypeMap.resize(getWasmObj()->types().size()); 255 TypeIsUsed.resize(getWasmObj()->types().size(), false); 256 257 ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats; 258 UsedComdats.resize(Comdats.size()); 259 for (unsigned I = 0; I < Comdats.size(); ++I) 260 UsedComdats[I] = Symtab->addComdat(Comdats[I]); 261 262 // Populate `Segments`. 263 for (const WasmSegment &S : WasmObj->dataSegments()) 264 Segments.emplace_back(make<InputSegment>(S, this)); 265 setRelocs(Segments, DataSection); 266 267 // Populate `Functions`. 268 ArrayRef<WasmFunction> Funcs = WasmObj->functions(); 269 ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes(); 270 ArrayRef<WasmSignature> Types = WasmObj->types(); 271 Functions.reserve(Funcs.size()); 272 273 for (size_t I = 0, E = Funcs.size(); I != E; ++I) 274 Functions.emplace_back( 275 make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this)); 276 setRelocs(Functions, CodeSection); 277 278 // Populate `Globals`. 279 for (const WasmGlobal &G : WasmObj->globals()) 280 Globals.emplace_back(make<InputGlobal>(G, this)); 281 282 // Populate `Events`. 283 for (const WasmEvent &E : WasmObj->events()) 284 Events.emplace_back(make<InputEvent>(Types[E.Type.SigIndex], E, this)); 285 286 // Populate `Symbols` based on the WasmSymbols in the object. 287 Symbols.reserve(WasmObj->getNumberOfSymbols()); 288 for (const SymbolRef &Sym : WasmObj->symbols()) { 289 const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl()); 290 if (Symbol *Sym = createDefined(WasmSym)) 291 Symbols.push_back(Sym); 292 else 293 Symbols.push_back(createUndefined(WasmSym)); 294 } 295 } 296 297 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const { 298 uint32_t C = Chunk->getComdat(); 299 if (C == UINT32_MAX) 300 return false; 301 return !UsedComdats[C]; 302 } 303 304 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const { 305 return cast<FunctionSymbol>(Symbols[Index]); 306 } 307 308 GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const { 309 return cast<GlobalSymbol>(Symbols[Index]); 310 } 311 312 EventSymbol *ObjFile::getEventSymbol(uint32_t Index) const { 313 return cast<EventSymbol>(Symbols[Index]); 314 } 315 316 SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const { 317 return cast<SectionSymbol>(Symbols[Index]); 318 } 319 320 DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const { 321 return cast<DataSymbol>(Symbols[Index]); 322 } 323 324 Symbol *ObjFile::createDefined(const WasmSymbol &Sym) { 325 if (!Sym.isDefined()) 326 return nullptr; 327 328 StringRef Name = Sym.Info.Name; 329 uint32_t Flags = Sym.Info.Flags; 330 331 switch (Sym.Info.Kind) { 332 case WASM_SYMBOL_TYPE_FUNCTION: { 333 InputFunction *Func = 334 Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()]; 335 if (isExcludedByComdat(Func)) { 336 Func->Live = false; 337 return nullptr; 338 } 339 340 if (Sym.isBindingLocal()) 341 return make<DefinedFunction>(Name, Flags, this, Func); 342 return Symtab->addDefinedFunction(Name, Flags, this, Func); 343 } 344 case WASM_SYMBOL_TYPE_DATA: { 345 InputSegment *Seg = Segments[Sym.Info.DataRef.Segment]; 346 if (isExcludedByComdat(Seg)) { 347 Seg->Live = false; 348 return nullptr; 349 } 350 351 uint32_t Offset = Sym.Info.DataRef.Offset; 352 uint32_t Size = Sym.Info.DataRef.Size; 353 354 if (Sym.isBindingLocal()) 355 return make<DefinedData>(Name, Flags, this, Seg, Offset, Size); 356 return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size); 357 } 358 case WASM_SYMBOL_TYPE_GLOBAL: { 359 InputGlobal *Global = 360 Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()]; 361 if (Sym.isBindingLocal()) 362 return make<DefinedGlobal>(Name, Flags, this, Global); 363 return Symtab->addDefinedGlobal(Name, Flags, this, Global); 364 } 365 case WASM_SYMBOL_TYPE_SECTION: { 366 InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex]; 367 assert(Sym.isBindingLocal()); 368 return make<SectionSymbol>(Name, Flags, Section, this); 369 } 370 case WASM_SYMBOL_TYPE_EVENT: { 371 InputEvent *Event = 372 Events[Sym.Info.ElementIndex - WasmObj->getNumImportedEvents()]; 373 if (Sym.isBindingLocal()) 374 return make<DefinedEvent>(Name, Flags, this, Event); 375 return Symtab->addDefinedEvent(Name, Flags, this, Event); 376 } 377 } 378 llvm_unreachable("unknown symbol kind"); 379 } 380 381 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) { 382 StringRef Name = Sym.Info.Name; 383 uint32_t Flags = Sym.Info.Flags; 384 385 switch (Sym.Info.Kind) { 386 case WASM_SYMBOL_TYPE_FUNCTION: 387 return Symtab->addUndefinedFunction(Name, Sym.Info.ImportName, 388 Sym.Info.ImportModule, Flags, this, 389 Sym.Signature); 390 case WASM_SYMBOL_TYPE_DATA: 391 return Symtab->addUndefinedData(Name, Flags, this); 392 case WASM_SYMBOL_TYPE_GLOBAL: 393 return Symtab->addUndefinedGlobal(Name, Sym.Info.ImportName, 394 Sym.Info.ImportModule, Flags, this, 395 Sym.GlobalType); 396 case WASM_SYMBOL_TYPE_SECTION: 397 llvm_unreachable("section symbols cannot be undefined"); 398 } 399 llvm_unreachable("unknown symbol kind"); 400 } 401 402 void ArchiveFile::parse() { 403 // Parse a MemoryBufferRef as an archive file. 404 LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n"); 405 File = CHECK(Archive::create(MB), toString(this)); 406 407 // Read the symbol table to construct Lazy symbols. 408 int Count = 0; 409 for (const Archive::Symbol &Sym : File->symbols()) { 410 Symtab->addLazy(this, &Sym); 411 ++Count; 412 } 413 LLVM_DEBUG(dbgs() << "Read " << Count << " symbols\n"); 414 } 415 416 void ArchiveFile::addMember(const Archive::Symbol *Sym) { 417 const Archive::Child &C = 418 CHECK(Sym->getMember(), 419 "could not get the member for symbol " + Sym->getName()); 420 421 // Don't try to load the same member twice (this can happen when members 422 // mutually reference each other). 423 if (!Seen.insert(C.getChildOffset()).second) 424 return; 425 426 LLVM_DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n"); 427 LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n"); 428 429 MemoryBufferRef MB = 430 CHECK(C.getMemoryBufferRef(), 431 "could not get the buffer for the member defining symbol " + 432 Sym->getName()); 433 434 InputFile *Obj = createObjectFile(MB); 435 Obj->ArchiveName = getName(); 436 Symtab->addFile(Obj); 437 } 438 439 static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) { 440 switch (GvVisibility) { 441 case GlobalValue::DefaultVisibility: 442 return WASM_SYMBOL_VISIBILITY_DEFAULT; 443 case GlobalValue::HiddenVisibility: 444 case GlobalValue::ProtectedVisibility: 445 return WASM_SYMBOL_VISIBILITY_HIDDEN; 446 } 447 llvm_unreachable("unknown visibility"); 448 } 449 450 static Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &ObjSym, 451 BitcodeFile &F) { 452 StringRef Name = Saver.save(ObjSym.getName()); 453 454 uint32_t Flags = ObjSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0; 455 Flags |= mapVisibility(ObjSym.getVisibility()); 456 457 if (ObjSym.isUndefined()) { 458 if (ObjSym.isExecutable()) 459 return Symtab->addUndefinedFunction(Name, Name, DefaultModule, Flags, &F, 460 nullptr); 461 return Symtab->addUndefinedData(Name, Flags, &F); 462 } 463 464 if (ObjSym.isExecutable()) 465 return Symtab->addDefinedFunction(Name, Flags, &F, nullptr); 466 return Symtab->addDefinedData(Name, Flags, &F, nullptr, 0, 0); 467 } 468 469 void BitcodeFile::parse() { 470 Obj = check(lto::InputFile::create(MemoryBufferRef( 471 MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier())))); 472 Triple T(Obj->getTargetTriple()); 473 if (T.getArch() != Triple::wasm32) { 474 error(toString(MB.getBufferIdentifier()) + ": machine type must be wasm32"); 475 return; 476 } 477 478 for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) 479 Symbols.push_back(createBitcodeSymbol(ObjSym, *this)); 480 } 481 482 // Returns a string in the format of "foo.o" or "foo.a(bar.o)". 483 std::string lld::toString(const wasm::InputFile *File) { 484 if (!File) 485 return "<internal>"; 486 487 if (File->ArchiveName.empty()) 488 return File->getName(); 489 490 return (File->ArchiveName + "(" + File->getName() + ")").str(); 491 } 492