1 //===- InputFiles.cpp -----------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "InputFiles.h" 11 #include "Config.h" 12 #include "InputChunks.h" 13 #include "InputEvent.h" 14 #include "InputGlobal.h" 15 #include "SymbolTable.h" 16 #include "lld/Common/ErrorHandler.h" 17 #include "lld/Common/Memory.h" 18 #include "llvm/Object/Binary.h" 19 #include "llvm/Object/Wasm.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #define DEBUG_TYPE "lld" 23 24 using namespace lld; 25 using namespace lld::wasm; 26 27 using namespace llvm; 28 using namespace llvm::object; 29 using namespace llvm::wasm; 30 31 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) { 32 log("Loading: " + Path); 33 34 auto MBOrErr = MemoryBuffer::getFile(Path); 35 if (auto EC = MBOrErr.getError()) { 36 error("cannot open " + Path + ": " + EC.message()); 37 return None; 38 } 39 std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; 40 MemoryBufferRef MBRef = MB->getMemBufferRef(); 41 make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership 42 43 return MBRef; 44 } 45 46 InputFile *lld::wasm::createObjectFile(MemoryBufferRef MB) { 47 file_magic Magic = identify_magic(MB.getBuffer()); 48 if (Magic == file_magic::wasm_object) 49 return make<ObjFile>(MB); 50 51 if (Magic == file_magic::bitcode) 52 return make<BitcodeFile>(MB); 53 54 fatal("unknown file type: " + MB.getBufferIdentifier()); 55 } 56 57 void ObjFile::dumpInfo() const { 58 log("info for: " + getName() + 59 "\n Symbols : " + Twine(Symbols.size()) + 60 "\n Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) + 61 "\n Global Imports : " + Twine(WasmObj->getNumImportedGlobals()) + 62 "\n Event Imports : " + Twine(WasmObj->getNumImportedEvents())); 63 } 64 65 // Relocations contain either symbol or type indices. This function takes a 66 // relocation and returns relocated index (i.e. translates from the input 67 // symbol/type space to the output symbol/type space). 68 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const { 69 if (Reloc.Type == R_WEBASSEMBLY_TYPE_INDEX_LEB) { 70 assert(TypeIsUsed[Reloc.Index]); 71 return TypeMap[Reloc.Index]; 72 } 73 return Symbols[Reloc.Index]->getOutputSymbolIndex(); 74 } 75 76 // Relocations can contain addend for combined sections. This function takes a 77 // relocation and returns updated addend by offset in the output section. 78 uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const { 79 switch (Reloc.Type) { 80 case R_WEBASSEMBLY_MEMORY_ADDR_LEB: 81 case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: 82 case R_WEBASSEMBLY_MEMORY_ADDR_I32: 83 case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: 84 return Reloc.Addend; 85 case R_WEBASSEMBLY_SECTION_OFFSET_I32: 86 return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend; 87 default: 88 llvm_unreachable("unexpected relocation type"); 89 } 90 } 91 92 // Calculate the value we expect to find at the relocation location. 93 // This is used as a sanity check before applying a relocation to a given 94 // location. It is useful for catching bugs in the compiler and linker. 95 uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const { 96 switch (Reloc.Type) { 97 case R_WEBASSEMBLY_TABLE_INDEX_I32: 98 case R_WEBASSEMBLY_TABLE_INDEX_SLEB: { 99 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 100 return TableEntries[Sym.Info.ElementIndex]; 101 } 102 case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: 103 case R_WEBASSEMBLY_MEMORY_ADDR_I32: 104 case R_WEBASSEMBLY_MEMORY_ADDR_LEB: { 105 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 106 if (Sym.isUndefined()) 107 return 0; 108 const WasmSegment &Segment = 109 WasmObj->dataSegments()[Sym.Info.DataRef.Segment]; 110 return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset + 111 Reloc.Addend; 112 } 113 case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: 114 if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) { 115 return Sym->Function->getFunctionInputOffset() + 116 Sym->Function->getFunctionCodeOffset() + Reloc.Addend; 117 } 118 return 0; 119 case R_WEBASSEMBLY_SECTION_OFFSET_I32: 120 return Reloc.Addend; 121 case R_WEBASSEMBLY_TYPE_INDEX_LEB: 122 return Reloc.Index; 123 case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: 124 case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: 125 case R_WEBASSEMBLY_EVENT_INDEX_LEB: { 126 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 127 return Sym.Info.ElementIndex; 128 } 129 default: 130 llvm_unreachable("unknown relocation type"); 131 } 132 } 133 134 // Translate from the relocation's index into the final linked output value. 135 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const { 136 switch (Reloc.Type) { 137 case R_WEBASSEMBLY_TABLE_INDEX_I32: 138 case R_WEBASSEMBLY_TABLE_INDEX_SLEB: 139 return getFunctionSymbol(Reloc.Index)->getTableIndex(); 140 case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: 141 case R_WEBASSEMBLY_MEMORY_ADDR_I32: 142 case R_WEBASSEMBLY_MEMORY_ADDR_LEB: 143 if (auto *Sym = dyn_cast<DefinedData>(getDataSymbol(Reloc.Index))) 144 if (Sym->isLive()) 145 return Sym->getVirtualAddress() + Reloc.Addend; 146 return 0; 147 case R_WEBASSEMBLY_TYPE_INDEX_LEB: 148 return TypeMap[Reloc.Index]; 149 case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: 150 return getFunctionSymbol(Reloc.Index)->getFunctionIndex(); 151 case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: 152 return getGlobalSymbol(Reloc.Index)->getGlobalIndex(); 153 case R_WEBASSEMBLY_EVENT_INDEX_LEB: 154 return getEventSymbol(Reloc.Index)->getEventIndex(); 155 case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: 156 if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) { 157 if (Sym->isLive()) 158 return Sym->Function->OutputOffset + 159 Sym->Function->getFunctionCodeOffset() + Reloc.Addend; 160 } 161 return 0; 162 case R_WEBASSEMBLY_SECTION_OFFSET_I32: 163 return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend; 164 default: 165 llvm_unreachable("unknown relocation type"); 166 } 167 } 168 169 template <class T> 170 static void setRelocs(const std::vector<T *> &Chunks, 171 const WasmSection *Section) { 172 if (!Section) 173 return; 174 175 ArrayRef<WasmRelocation> Relocs = Section->Relocations; 176 assert(std::is_sorted(Relocs.begin(), Relocs.end(), 177 [](const WasmRelocation &R1, const WasmRelocation &R2) { 178 return R1.Offset < R2.Offset; 179 })); 180 assert(std::is_sorted( 181 Chunks.begin(), Chunks.end(), [](InputChunk *C1, InputChunk *C2) { 182 return C1->getInputSectionOffset() < C2->getInputSectionOffset(); 183 })); 184 185 auto RelocsNext = Relocs.begin(); 186 auto RelocsEnd = Relocs.end(); 187 auto RelocLess = [](const WasmRelocation &R, uint32_t Val) { 188 return R.Offset < Val; 189 }; 190 for (InputChunk *C : Chunks) { 191 auto RelocsStart = std::lower_bound(RelocsNext, RelocsEnd, 192 C->getInputSectionOffset(), RelocLess); 193 RelocsNext = std::lower_bound( 194 RelocsStart, RelocsEnd, C->getInputSectionOffset() + C->getInputSize(), 195 RelocLess); 196 C->setRelocations(ArrayRef<WasmRelocation>(RelocsStart, RelocsNext)); 197 } 198 } 199 200 void ObjFile::parse() { 201 // Parse a memory buffer as a wasm file. 202 LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n"); 203 std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this)); 204 205 auto *Obj = dyn_cast<WasmObjectFile>(Bin.get()); 206 if (!Obj) 207 fatal(toString(this) + ": not a wasm file"); 208 if (!Obj->isRelocatableObject()) 209 fatal(toString(this) + ": not a relocatable wasm file"); 210 211 Bin.release(); 212 WasmObj.reset(Obj); 213 214 // Build up a map of function indices to table indices for use when 215 // verifying the existing table index relocations 216 uint32_t TotalFunctions = 217 WasmObj->getNumImportedFunctions() + WasmObj->functions().size(); 218 TableEntries.resize(TotalFunctions); 219 for (const WasmElemSegment &Seg : WasmObj->elements()) { 220 if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST) 221 fatal(toString(this) + ": invalid table elements"); 222 uint32_t Offset = Seg.Offset.Value.Int32; 223 for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) { 224 225 uint32_t FunctionIndex = Seg.Functions[Index]; 226 TableEntries[FunctionIndex] = Offset + Index; 227 } 228 } 229 230 // Find the code and data sections. Wasm objects can have at most one code 231 // and one data section. 232 uint32_t SectionIndex = 0; 233 for (const SectionRef &Sec : WasmObj->sections()) { 234 const WasmSection &Section = WasmObj->getWasmSection(Sec); 235 if (Section.Type == WASM_SEC_CODE) { 236 CodeSection = &Section; 237 } else if (Section.Type == WASM_SEC_DATA) { 238 DataSection = &Section; 239 } else if (Section.Type == WASM_SEC_CUSTOM) { 240 CustomSections.emplace_back(make<InputSection>(Section, this)); 241 CustomSections.back()->setRelocations(Section.Relocations); 242 CustomSectionsByIndex[SectionIndex] = CustomSections.back(); 243 } 244 SectionIndex++; 245 } 246 247 TypeMap.resize(getWasmObj()->types().size()); 248 TypeIsUsed.resize(getWasmObj()->types().size(), false); 249 250 ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats; 251 UsedComdats.resize(Comdats.size()); 252 for (unsigned I = 0; I < Comdats.size(); ++I) 253 UsedComdats[I] = Symtab->addComdat(Comdats[I]); 254 255 // Populate `Segments`. 256 for (const WasmSegment &S : WasmObj->dataSegments()) 257 Segments.emplace_back(make<InputSegment>(S, this)); 258 setRelocs(Segments, DataSection); 259 260 // Populate `Functions`. 261 ArrayRef<WasmFunction> Funcs = WasmObj->functions(); 262 ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes(); 263 ArrayRef<WasmSignature> Types = WasmObj->types(); 264 Functions.reserve(Funcs.size()); 265 266 for (size_t I = 0, E = Funcs.size(); I != E; ++I) 267 Functions.emplace_back( 268 make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this)); 269 setRelocs(Functions, CodeSection); 270 271 // Populate `Globals`. 272 for (const WasmGlobal &G : WasmObj->globals()) 273 Globals.emplace_back(make<InputGlobal>(G, this)); 274 275 // Populate `Events`. 276 for (const WasmEvent &E : WasmObj->events()) 277 Events.emplace_back(make<InputEvent>(Types[E.Type.SigIndex], E, this)); 278 279 // Populate `Symbols` based on the WasmSymbols in the object. 280 Symbols.reserve(WasmObj->getNumberOfSymbols()); 281 for (const SymbolRef &Sym : WasmObj->symbols()) { 282 const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl()); 283 if (Symbol *Sym = createDefined(WasmSym)) 284 Symbols.push_back(Sym); 285 else 286 Symbols.push_back(createUndefined(WasmSym)); 287 } 288 } 289 290 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const { 291 uint32_t C = Chunk->getComdat(); 292 if (C == UINT32_MAX) 293 return false; 294 return !UsedComdats[C]; 295 } 296 297 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const { 298 return cast<FunctionSymbol>(Symbols[Index]); 299 } 300 301 GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const { 302 return cast<GlobalSymbol>(Symbols[Index]); 303 } 304 305 EventSymbol *ObjFile::getEventSymbol(uint32_t Index) const { 306 return cast<EventSymbol>(Symbols[Index]); 307 } 308 309 SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const { 310 return cast<SectionSymbol>(Symbols[Index]); 311 } 312 313 DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const { 314 return cast<DataSymbol>(Symbols[Index]); 315 } 316 317 Symbol *ObjFile::createDefined(const WasmSymbol &Sym) { 318 if (!Sym.isDefined()) 319 return nullptr; 320 321 StringRef Name = Sym.Info.Name; 322 uint32_t Flags = Sym.Info.Flags; 323 324 switch (Sym.Info.Kind) { 325 case WASM_SYMBOL_TYPE_FUNCTION: { 326 InputFunction *Func = 327 Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()]; 328 if (isExcludedByComdat(Func)) { 329 Func->Live = false; 330 return nullptr; 331 } 332 333 if (Sym.isBindingLocal()) 334 return make<DefinedFunction>(Name, Flags, this, Func); 335 return Symtab->addDefinedFunction(Name, Flags, this, Func); 336 } 337 case WASM_SYMBOL_TYPE_DATA: { 338 InputSegment *Seg = Segments[Sym.Info.DataRef.Segment]; 339 if (isExcludedByComdat(Seg)) { 340 Seg->Live = false; 341 return nullptr; 342 } 343 344 uint32_t Offset = Sym.Info.DataRef.Offset; 345 uint32_t Size = Sym.Info.DataRef.Size; 346 347 if (Sym.isBindingLocal()) 348 return make<DefinedData>(Name, Flags, this, Seg, Offset, Size); 349 return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size); 350 } 351 case WASM_SYMBOL_TYPE_GLOBAL: { 352 InputGlobal *Global = 353 Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()]; 354 if (Sym.isBindingLocal()) 355 return make<DefinedGlobal>(Name, Flags, this, Global); 356 return Symtab->addDefinedGlobal(Name, Flags, this, Global); 357 } 358 case WASM_SYMBOL_TYPE_SECTION: { 359 InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex]; 360 assert(Sym.isBindingLocal()); 361 return make<SectionSymbol>(Name, Flags, Section, this); 362 } 363 case WASM_SYMBOL_TYPE_EVENT: { 364 InputEvent *Event = 365 Events[Sym.Info.ElementIndex - WasmObj->getNumImportedEvents()]; 366 if (Sym.isBindingLocal()) 367 return make<DefinedEvent>(Name, Flags, this, Event); 368 return Symtab->addDefinedEvent(Name, Flags, this, Event); 369 } 370 } 371 llvm_unreachable("unknown symbol kind"); 372 } 373 374 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) { 375 StringRef Name = Sym.Info.Name; 376 uint32_t Flags = Sym.Info.Flags; 377 378 switch (Sym.Info.Kind) { 379 case WASM_SYMBOL_TYPE_FUNCTION: 380 return Symtab->addUndefinedFunction(Name, Flags, this, Sym.Signature); 381 case WASM_SYMBOL_TYPE_DATA: 382 return Symtab->addUndefinedData(Name, Flags, this); 383 case WASM_SYMBOL_TYPE_GLOBAL: 384 return Symtab->addUndefinedGlobal(Name, Flags, this, Sym.GlobalType); 385 case WASM_SYMBOL_TYPE_SECTION: 386 llvm_unreachable("section symbols cannot be undefined"); 387 } 388 llvm_unreachable("unknown symbol kind"); 389 } 390 391 void ArchiveFile::parse() { 392 // Parse a MemoryBufferRef as an archive file. 393 LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n"); 394 File = CHECK(Archive::create(MB), toString(this)); 395 396 // Read the symbol table to construct Lazy symbols. 397 int Count = 0; 398 for (const Archive::Symbol &Sym : File->symbols()) { 399 Symtab->addLazy(this, &Sym); 400 ++Count; 401 } 402 LLVM_DEBUG(dbgs() << "Read " << Count << " symbols\n"); 403 } 404 405 void ArchiveFile::addMember(const Archive::Symbol *Sym) { 406 const Archive::Child &C = 407 CHECK(Sym->getMember(), 408 "could not get the member for symbol " + Sym->getName()); 409 410 // Don't try to load the same member twice (this can happen when members 411 // mutually reference each other). 412 if (!Seen.insert(C.getChildOffset()).second) 413 return; 414 415 LLVM_DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n"); 416 LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n"); 417 418 MemoryBufferRef MB = 419 CHECK(C.getMemoryBufferRef(), 420 "could not get the buffer for the member defining symbol " + 421 Sym->getName()); 422 423 InputFile *Obj = createObjectFile(MB); 424 Obj->ArchiveName = getName(); 425 Symtab->addFile(Obj); 426 } 427 428 static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) { 429 switch (GvVisibility) { 430 case GlobalValue::DefaultVisibility: 431 return WASM_SYMBOL_VISIBILITY_DEFAULT; 432 case GlobalValue::HiddenVisibility: 433 case GlobalValue::ProtectedVisibility: 434 return WASM_SYMBOL_VISIBILITY_HIDDEN; 435 } 436 llvm_unreachable("unknown visibility"); 437 } 438 439 static Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &ObjSym, 440 BitcodeFile &F) { 441 StringRef Name = Saver.save(ObjSym.getName()); 442 443 uint32_t Flags = ObjSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0; 444 Flags |= mapVisibility(ObjSym.getVisibility()); 445 446 if (ObjSym.isUndefined()) { 447 if (ObjSym.isExecutable()) 448 return Symtab->addUndefinedFunction(Name, Flags, &F, nullptr); 449 return Symtab->addUndefinedData(Name, Flags, &F); 450 } 451 452 if (ObjSym.isExecutable()) 453 return Symtab->addDefinedFunction(Name, Flags, &F, nullptr); 454 return Symtab->addDefinedData(Name, Flags, &F, nullptr, 0, 0); 455 } 456 457 void BitcodeFile::parse() { 458 Obj = check(lto::InputFile::create(MemoryBufferRef( 459 MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier())))); 460 Triple T(Obj->getTargetTriple()); 461 if (T.getArch() != Triple::wasm32) { 462 error(toString(MB.getBufferIdentifier()) + ": machine type must be wasm32"); 463 return; 464 } 465 466 for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) 467 Symbols.push_back(createBitcodeSymbol(ObjSym, *this)); 468 } 469 470 // Returns a string in the format of "foo.o" or "foo.a(bar.o)". 471 std::string lld::toString(const wasm::InputFile *File) { 472 if (!File) 473 return "<internal>"; 474 475 if (File->ArchiveName.empty()) 476 return File->getName(); 477 478 return (File->ArchiveName + "(" + File->getName() + ")").str(); 479 } 480