1 //===- InputFiles.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputFiles.h" 10 #include "Config.h" 11 #include "InputChunks.h" 12 #include "InputEvent.h" 13 #include "InputGlobal.h" 14 #include "SymbolTable.h" 15 #include "lld/Common/ErrorHandler.h" 16 #include "lld/Common/Memory.h" 17 #include "llvm/Object/Binary.h" 18 #include "llvm/Object/Wasm.h" 19 #include "llvm/Support/raw_ostream.h" 20 21 #define DEBUG_TYPE "lld" 22 23 using namespace lld; 24 using namespace lld::wasm; 25 26 using namespace llvm; 27 using namespace llvm::object; 28 using namespace llvm::wasm; 29 30 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) { 31 log("Loading: " + Path); 32 33 auto MBOrErr = MemoryBuffer::getFile(Path); 34 if (auto EC = MBOrErr.getError()) { 35 error("cannot open " + Path + ": " + EC.message()); 36 return None; 37 } 38 std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; 39 MemoryBufferRef MBRef = MB->getMemBufferRef(); 40 make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership 41 42 return MBRef; 43 } 44 45 InputFile *lld::wasm::createObjectFile(MemoryBufferRef MB) { 46 file_magic Magic = identify_magic(MB.getBuffer()); 47 if (Magic == file_magic::wasm_object) 48 return make<ObjFile>(MB); 49 50 if (Magic == file_magic::bitcode) 51 return make<BitcodeFile>(MB); 52 53 fatal("unknown file type: " + MB.getBufferIdentifier()); 54 } 55 56 void ObjFile::dumpInfo() const { 57 log("info for: " + getName() + 58 "\n Symbols : " + Twine(Symbols.size()) + 59 "\n Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) + 60 "\n Global Imports : " + Twine(WasmObj->getNumImportedGlobals()) + 61 "\n Event Imports : " + Twine(WasmObj->getNumImportedEvents())); 62 } 63 64 // Relocations contain either symbol or type indices. This function takes a 65 // relocation and returns relocated index (i.e. translates from the input 66 // symbol/type space to the output symbol/type space). 67 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const { 68 if (Reloc.Type == R_WASM_TYPE_INDEX_LEB) { 69 assert(TypeIsUsed[Reloc.Index]); 70 return TypeMap[Reloc.Index]; 71 } 72 return Symbols[Reloc.Index]->getOutputSymbolIndex(); 73 } 74 75 // Relocations can contain addend for combined sections. This function takes a 76 // relocation and returns updated addend by offset in the output section. 77 uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const { 78 switch (Reloc.Type) { 79 case R_WASM_MEMORY_ADDR_LEB: 80 case R_WASM_MEMORY_ADDR_SLEB: 81 case R_WASM_MEMORY_ADDR_I32: 82 case R_WASM_FUNCTION_OFFSET_I32: 83 return Reloc.Addend; 84 case R_WASM_SECTION_OFFSET_I32: 85 return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend; 86 default: 87 llvm_unreachable("unexpected relocation type"); 88 } 89 } 90 91 // Calculate the value we expect to find at the relocation location. 92 // This is used as a sanity check before applying a relocation to a given 93 // location. It is useful for catching bugs in the compiler and linker. 94 uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const { 95 switch (Reloc.Type) { 96 case R_WASM_TABLE_INDEX_I32: 97 case R_WASM_TABLE_INDEX_SLEB: { 98 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 99 return TableEntries[Sym.Info.ElementIndex]; 100 } 101 case R_WASM_MEMORY_ADDR_SLEB: 102 case R_WASM_MEMORY_ADDR_I32: 103 case R_WASM_MEMORY_ADDR_LEB: { 104 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 105 if (Sym.isUndefined()) 106 return 0; 107 const WasmSegment &Segment = 108 WasmObj->dataSegments()[Sym.Info.DataRef.Segment]; 109 return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset + 110 Reloc.Addend; 111 } 112 case R_WASM_FUNCTION_OFFSET_I32: 113 if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) { 114 return Sym->Function->getFunctionInputOffset() + 115 Sym->Function->getFunctionCodeOffset() + Reloc.Addend; 116 } 117 return 0; 118 case R_WASM_SECTION_OFFSET_I32: 119 return Reloc.Addend; 120 case R_WASM_TYPE_INDEX_LEB: 121 return Reloc.Index; 122 case R_WASM_FUNCTION_INDEX_LEB: 123 case R_WASM_GLOBAL_INDEX_LEB: 124 case R_WASM_EVENT_INDEX_LEB: { 125 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 126 return Sym.Info.ElementIndex; 127 } 128 default: 129 llvm_unreachable("unknown relocation type"); 130 } 131 } 132 133 // Translate from the relocation's index into the final linked output value. 134 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const { 135 switch (Reloc.Type) { 136 case R_WASM_TABLE_INDEX_I32: 137 case R_WASM_TABLE_INDEX_SLEB: 138 return getFunctionSymbol(Reloc.Index)->getTableIndex(); 139 case R_WASM_MEMORY_ADDR_SLEB: 140 case R_WASM_MEMORY_ADDR_I32: 141 case R_WASM_MEMORY_ADDR_LEB: 142 if (auto *Sym = dyn_cast<DefinedData>(getDataSymbol(Reloc.Index))) 143 if (Sym->isLive()) 144 return Sym->getVirtualAddress() + Reloc.Addend; 145 return 0; 146 case R_WASM_TYPE_INDEX_LEB: 147 return TypeMap[Reloc.Index]; 148 case R_WASM_FUNCTION_INDEX_LEB: 149 return getFunctionSymbol(Reloc.Index)->getFunctionIndex(); 150 case R_WASM_GLOBAL_INDEX_LEB: 151 return getGlobalSymbol(Reloc.Index)->getGlobalIndex(); 152 case R_WASM_EVENT_INDEX_LEB: 153 return getEventSymbol(Reloc.Index)->getEventIndex(); 154 case R_WASM_FUNCTION_OFFSET_I32: 155 if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) { 156 if (Sym->isLive()) 157 return Sym->Function->OutputOffset + 158 Sym->Function->getFunctionCodeOffset() + Reloc.Addend; 159 } 160 return 0; 161 case R_WASM_SECTION_OFFSET_I32: 162 return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend; 163 default: 164 llvm_unreachable("unknown relocation type"); 165 } 166 } 167 168 template <class T> 169 static void setRelocs(const std::vector<T *> &Chunks, 170 const WasmSection *Section) { 171 if (!Section) 172 return; 173 174 ArrayRef<WasmRelocation> Relocs = Section->Relocations; 175 assert(std::is_sorted(Relocs.begin(), Relocs.end(), 176 [](const WasmRelocation &R1, const WasmRelocation &R2) { 177 return R1.Offset < R2.Offset; 178 })); 179 assert(std::is_sorted( 180 Chunks.begin(), Chunks.end(), [](InputChunk *C1, InputChunk *C2) { 181 return C1->getInputSectionOffset() < C2->getInputSectionOffset(); 182 })); 183 184 auto RelocsNext = Relocs.begin(); 185 auto RelocsEnd = Relocs.end(); 186 auto RelocLess = [](const WasmRelocation &R, uint32_t Val) { 187 return R.Offset < Val; 188 }; 189 for (InputChunk *C : Chunks) { 190 auto RelocsStart = std::lower_bound(RelocsNext, RelocsEnd, 191 C->getInputSectionOffset(), RelocLess); 192 RelocsNext = std::lower_bound( 193 RelocsStart, RelocsEnd, C->getInputSectionOffset() + C->getInputSize(), 194 RelocLess); 195 C->setRelocations(ArrayRef<WasmRelocation>(RelocsStart, RelocsNext)); 196 } 197 } 198 199 void ObjFile::parse() { 200 // Parse a memory buffer as a wasm file. 201 LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n"); 202 std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this)); 203 204 auto *Obj = dyn_cast<WasmObjectFile>(Bin.get()); 205 if (!Obj) 206 fatal(toString(this) + ": not a wasm file"); 207 if (!Obj->isRelocatableObject()) 208 fatal(toString(this) + ": not a relocatable wasm file"); 209 210 Bin.release(); 211 WasmObj.reset(Obj); 212 213 // Build up a map of function indices to table indices for use when 214 // verifying the existing table index relocations 215 uint32_t TotalFunctions = 216 WasmObj->getNumImportedFunctions() + WasmObj->functions().size(); 217 TableEntries.resize(TotalFunctions); 218 for (const WasmElemSegment &Seg : WasmObj->elements()) { 219 if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST) 220 fatal(toString(this) + ": invalid table elements"); 221 uint32_t Offset = Seg.Offset.Value.Int32; 222 for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) { 223 224 uint32_t FunctionIndex = Seg.Functions[Index]; 225 TableEntries[FunctionIndex] = Offset + Index; 226 } 227 } 228 229 // Find the code and data sections. Wasm objects can have at most one code 230 // and one data section. 231 uint32_t SectionIndex = 0; 232 for (const SectionRef &Sec : WasmObj->sections()) { 233 const WasmSection &Section = WasmObj->getWasmSection(Sec); 234 if (Section.Type == WASM_SEC_CODE) { 235 CodeSection = &Section; 236 } else if (Section.Type == WASM_SEC_DATA) { 237 DataSection = &Section; 238 } else if (Section.Type == WASM_SEC_CUSTOM) { 239 CustomSections.emplace_back(make<InputSection>(Section, this)); 240 CustomSections.back()->setRelocations(Section.Relocations); 241 CustomSectionsByIndex[SectionIndex] = CustomSections.back(); 242 if (Section.Name == "producers") 243 ProducersSection = &Section; 244 } 245 SectionIndex++; 246 } 247 248 TypeMap.resize(getWasmObj()->types().size()); 249 TypeIsUsed.resize(getWasmObj()->types().size(), false); 250 251 ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats; 252 UsedComdats.resize(Comdats.size()); 253 for (unsigned I = 0; I < Comdats.size(); ++I) 254 UsedComdats[I] = Symtab->addComdat(Comdats[I]); 255 256 // Populate `Segments`. 257 for (const WasmSegment &S : WasmObj->dataSegments()) 258 Segments.emplace_back(make<InputSegment>(S, this)); 259 setRelocs(Segments, DataSection); 260 261 // Populate `Functions`. 262 ArrayRef<WasmFunction> Funcs = WasmObj->functions(); 263 ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes(); 264 ArrayRef<WasmSignature> Types = WasmObj->types(); 265 Functions.reserve(Funcs.size()); 266 267 for (size_t I = 0, E = Funcs.size(); I != E; ++I) 268 Functions.emplace_back( 269 make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this)); 270 setRelocs(Functions, CodeSection); 271 272 // Populate `Globals`. 273 for (const WasmGlobal &G : WasmObj->globals()) 274 Globals.emplace_back(make<InputGlobal>(G, this)); 275 276 // Populate `Events`. 277 for (const WasmEvent &E : WasmObj->events()) 278 Events.emplace_back(make<InputEvent>(Types[E.Type.SigIndex], E, this)); 279 280 // Populate `Symbols` based on the WasmSymbols in the object. 281 Symbols.reserve(WasmObj->getNumberOfSymbols()); 282 for (const SymbolRef &Sym : WasmObj->symbols()) { 283 const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl()); 284 if (Symbol *Sym = createDefined(WasmSym)) 285 Symbols.push_back(Sym); 286 else 287 Symbols.push_back(createUndefined(WasmSym)); 288 } 289 } 290 291 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const { 292 uint32_t C = Chunk->getComdat(); 293 if (C == UINT32_MAX) 294 return false; 295 return !UsedComdats[C]; 296 } 297 298 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const { 299 return cast<FunctionSymbol>(Symbols[Index]); 300 } 301 302 GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const { 303 return cast<GlobalSymbol>(Symbols[Index]); 304 } 305 306 EventSymbol *ObjFile::getEventSymbol(uint32_t Index) const { 307 return cast<EventSymbol>(Symbols[Index]); 308 } 309 310 SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const { 311 return cast<SectionSymbol>(Symbols[Index]); 312 } 313 314 DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const { 315 return cast<DataSymbol>(Symbols[Index]); 316 } 317 318 Symbol *ObjFile::createDefined(const WasmSymbol &Sym) { 319 if (!Sym.isDefined()) 320 return nullptr; 321 322 StringRef Name = Sym.Info.Name; 323 uint32_t Flags = Sym.Info.Flags; 324 325 switch (Sym.Info.Kind) { 326 case WASM_SYMBOL_TYPE_FUNCTION: { 327 InputFunction *Func = 328 Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()]; 329 if (isExcludedByComdat(Func)) { 330 Func->Live = false; 331 return nullptr; 332 } 333 334 if (Sym.isBindingLocal()) 335 return make<DefinedFunction>(Name, Flags, this, Func); 336 return Symtab->addDefinedFunction(Name, Flags, this, Func); 337 } 338 case WASM_SYMBOL_TYPE_DATA: { 339 InputSegment *Seg = Segments[Sym.Info.DataRef.Segment]; 340 if (isExcludedByComdat(Seg)) { 341 Seg->Live = false; 342 return nullptr; 343 } 344 345 uint32_t Offset = Sym.Info.DataRef.Offset; 346 uint32_t Size = Sym.Info.DataRef.Size; 347 348 if (Sym.isBindingLocal()) 349 return make<DefinedData>(Name, Flags, this, Seg, Offset, Size); 350 return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size); 351 } 352 case WASM_SYMBOL_TYPE_GLOBAL: { 353 InputGlobal *Global = 354 Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()]; 355 if (Sym.isBindingLocal()) 356 return make<DefinedGlobal>(Name, Flags, this, Global); 357 return Symtab->addDefinedGlobal(Name, Flags, this, Global); 358 } 359 case WASM_SYMBOL_TYPE_SECTION: { 360 InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex]; 361 assert(Sym.isBindingLocal()); 362 return make<SectionSymbol>(Name, Flags, Section, this); 363 } 364 case WASM_SYMBOL_TYPE_EVENT: { 365 InputEvent *Event = 366 Events[Sym.Info.ElementIndex - WasmObj->getNumImportedEvents()]; 367 if (Sym.isBindingLocal()) 368 return make<DefinedEvent>(Name, Flags, this, Event); 369 return Symtab->addDefinedEvent(Name, Flags, this, Event); 370 } 371 } 372 llvm_unreachable("unknown symbol kind"); 373 } 374 375 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) { 376 StringRef Name = Sym.Info.Name; 377 uint32_t Flags = Sym.Info.Flags; 378 379 switch (Sym.Info.Kind) { 380 case WASM_SYMBOL_TYPE_FUNCTION: 381 return Symtab->addUndefinedFunction(Name, Sym.Info.ImportName, 382 Sym.Info.ImportModule, Flags, this, 383 Sym.Signature); 384 case WASM_SYMBOL_TYPE_DATA: 385 return Symtab->addUndefinedData(Name, Flags, this); 386 case WASM_SYMBOL_TYPE_GLOBAL: 387 return Symtab->addUndefinedGlobal(Name, Sym.Info.ImportName, 388 Sym.Info.ImportModule, Flags, this, 389 Sym.GlobalType); 390 case WASM_SYMBOL_TYPE_SECTION: 391 llvm_unreachable("section symbols cannot be undefined"); 392 } 393 llvm_unreachable("unknown symbol kind"); 394 } 395 396 void ArchiveFile::parse() { 397 // Parse a MemoryBufferRef as an archive file. 398 LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n"); 399 File = CHECK(Archive::create(MB), toString(this)); 400 401 // Read the symbol table to construct Lazy symbols. 402 int Count = 0; 403 for (const Archive::Symbol &Sym : File->symbols()) { 404 Symtab->addLazy(this, &Sym); 405 ++Count; 406 } 407 LLVM_DEBUG(dbgs() << "Read " << Count << " symbols\n"); 408 } 409 410 void ArchiveFile::addMember(const Archive::Symbol *Sym) { 411 const Archive::Child &C = 412 CHECK(Sym->getMember(), 413 "could not get the member for symbol " + Sym->getName()); 414 415 // Don't try to load the same member twice (this can happen when members 416 // mutually reference each other). 417 if (!Seen.insert(C.getChildOffset()).second) 418 return; 419 420 LLVM_DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n"); 421 LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n"); 422 423 MemoryBufferRef MB = 424 CHECK(C.getMemoryBufferRef(), 425 "could not get the buffer for the member defining symbol " + 426 Sym->getName()); 427 428 InputFile *Obj = createObjectFile(MB); 429 Obj->ArchiveName = getName(); 430 Symtab->addFile(Obj); 431 } 432 433 static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) { 434 switch (GvVisibility) { 435 case GlobalValue::DefaultVisibility: 436 return WASM_SYMBOL_VISIBILITY_DEFAULT; 437 case GlobalValue::HiddenVisibility: 438 case GlobalValue::ProtectedVisibility: 439 return WASM_SYMBOL_VISIBILITY_HIDDEN; 440 } 441 llvm_unreachable("unknown visibility"); 442 } 443 444 static Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &ObjSym, 445 BitcodeFile &F) { 446 StringRef Name = Saver.save(ObjSym.getName()); 447 448 uint32_t Flags = ObjSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0; 449 Flags |= mapVisibility(ObjSym.getVisibility()); 450 451 if (ObjSym.isUndefined()) { 452 if (ObjSym.isExecutable()) 453 return Symtab->addUndefinedFunction(Name, Name, DefaultModule, Flags, &F, 454 nullptr); 455 return Symtab->addUndefinedData(Name, Flags, &F); 456 } 457 458 if (ObjSym.isExecutable()) 459 return Symtab->addDefinedFunction(Name, Flags, &F, nullptr); 460 return Symtab->addDefinedData(Name, Flags, &F, nullptr, 0, 0); 461 } 462 463 void BitcodeFile::parse() { 464 Obj = check(lto::InputFile::create(MemoryBufferRef( 465 MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier())))); 466 Triple T(Obj->getTargetTriple()); 467 if (T.getArch() != Triple::wasm32) { 468 error(toString(MB.getBufferIdentifier()) + ": machine type must be wasm32"); 469 return; 470 } 471 472 for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) 473 Symbols.push_back(createBitcodeSymbol(ObjSym, *this)); 474 } 475 476 // Returns a string in the format of "foo.o" or "foo.a(bar.o)". 477 std::string lld::toString(const wasm::InputFile *File) { 478 if (!File) 479 return "<internal>"; 480 481 if (File->ArchiveName.empty()) 482 return File->getName(); 483 484 return (File->ArchiveName + "(" + File->getName() + ")").str(); 485 } 486