1 //===- InputFiles.cpp -----------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "InputFiles.h" 11 #include "Config.h" 12 #include "InputChunks.h" 13 #include "InputGlobal.h" 14 #include "SymbolTable.h" 15 #include "lld/Common/ErrorHandler.h" 16 #include "lld/Common/Memory.h" 17 #include "llvm/Object/Binary.h" 18 #include "llvm/Object/Wasm.h" 19 #include "llvm/Support/raw_ostream.h" 20 21 #define DEBUG_TYPE "lld" 22 23 using namespace lld; 24 using namespace lld::wasm; 25 26 using namespace llvm; 27 using namespace llvm::object; 28 using namespace llvm::wasm; 29 30 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) { 31 log("Loading: " + Path); 32 33 auto MBOrErr = MemoryBuffer::getFile(Path); 34 if (auto EC = MBOrErr.getError()) { 35 error("cannot open " + Path + ": " + EC.message()); 36 return None; 37 } 38 std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; 39 MemoryBufferRef MBRef = MB->getMemBufferRef(); 40 make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership 41 42 return MBRef; 43 } 44 45 InputFile *lld::wasm::createObjectFile(MemoryBufferRef MB) { 46 file_magic Magic = identify_magic(MB.getBuffer()); 47 if (Magic == file_magic::wasm_object) 48 return make<ObjFile>(MB); 49 50 if (Magic == file_magic::bitcode) 51 return make<BitcodeFile>(MB); 52 53 fatal("unknown file type: " + MB.getBufferIdentifier()); 54 } 55 56 void ObjFile::dumpInfo() const { 57 log("info for: " + getName() + 58 "\n Symbols : " + Twine(Symbols.size()) + 59 "\n Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) + 60 "\n Global Imports : " + Twine(WasmObj->getNumImportedGlobals())); 61 } 62 63 // Relocations contain either symbol or type indices. This function takes a 64 // relocation and returns relocated index (i.e. translates from the input 65 // sybmol/type space to the output symbol/type space). 66 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const { 67 if (Reloc.Type == R_WEBASSEMBLY_TYPE_INDEX_LEB) { 68 assert(TypeIsUsed[Reloc.Index]); 69 return TypeMap[Reloc.Index]; 70 } 71 return Symbols[Reloc.Index]->getOutputSymbolIndex(); 72 } 73 74 // Relocations can contain addend for combined sections. This function takes a 75 // relocation and returns updated addend by offset in the output section. 76 uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const { 77 switch (Reloc.Type) { 78 case R_WEBASSEMBLY_MEMORY_ADDR_LEB: 79 case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: 80 case R_WEBASSEMBLY_MEMORY_ADDR_I32: 81 case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: 82 return Reloc.Addend; 83 case R_WEBASSEMBLY_SECTION_OFFSET_I32: 84 return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend; 85 default: 86 llvm_unreachable("unexpected relocation type"); 87 } 88 } 89 90 // Calculate the value we expect to find at the relocation location. 91 // This is used as a sanity check before applying a relocation to a given 92 // location. It is useful for catching bugs in the compiler and linker. 93 uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const { 94 switch (Reloc.Type) { 95 case R_WEBASSEMBLY_TABLE_INDEX_I32: 96 case R_WEBASSEMBLY_TABLE_INDEX_SLEB: { 97 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 98 return TableEntries[Sym.Info.ElementIndex]; 99 } 100 case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: 101 case R_WEBASSEMBLY_MEMORY_ADDR_I32: 102 case R_WEBASSEMBLY_MEMORY_ADDR_LEB: { 103 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 104 if (Sym.isUndefined()) 105 return 0; 106 const WasmSegment &Segment = 107 WasmObj->dataSegments()[Sym.Info.DataRef.Segment]; 108 return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset + 109 Reloc.Addend; 110 } 111 case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: 112 if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) { 113 return Sym->Function->getFunctionInputOffset() + 114 Sym->Function->getFunctionCodeOffset() + Reloc.Addend; 115 } 116 return 0; 117 case R_WEBASSEMBLY_SECTION_OFFSET_I32: 118 return Reloc.Addend; 119 case R_WEBASSEMBLY_TYPE_INDEX_LEB: 120 return Reloc.Index; 121 case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: 122 case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: { 123 const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index]; 124 return Sym.Info.ElementIndex; 125 } 126 default: 127 llvm_unreachable("unknown relocation type"); 128 } 129 } 130 131 // Translate from the relocation's index into the final linked output value. 132 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const { 133 switch (Reloc.Type) { 134 case R_WEBASSEMBLY_TABLE_INDEX_I32: 135 case R_WEBASSEMBLY_TABLE_INDEX_SLEB: 136 return getFunctionSymbol(Reloc.Index)->getTableIndex(); 137 case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: 138 case R_WEBASSEMBLY_MEMORY_ADDR_I32: 139 case R_WEBASSEMBLY_MEMORY_ADDR_LEB: 140 if (auto *Sym = dyn_cast<DefinedData>(getDataSymbol(Reloc.Index))) 141 if (Sym->isLive()) 142 return Sym->getVirtualAddress() + Reloc.Addend; 143 return 0; 144 case R_WEBASSEMBLY_TYPE_INDEX_LEB: 145 return TypeMap[Reloc.Index]; 146 case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: 147 return getFunctionSymbol(Reloc.Index)->getFunctionIndex(); 148 case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: 149 return getGlobalSymbol(Reloc.Index)->getGlobalIndex(); 150 case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: 151 if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) { 152 return Sym->Function->OutputOffset + 153 Sym->Function->getFunctionCodeOffset() + Reloc.Addend; 154 } 155 return 0; 156 case R_WEBASSEMBLY_SECTION_OFFSET_I32: 157 return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend; 158 default: 159 llvm_unreachable("unknown relocation type"); 160 } 161 } 162 163 template <class T> 164 static void setRelocs(const std::vector<T *> &Chunks, 165 const WasmSection *Section) { 166 if (!Section) 167 return; 168 169 ArrayRef<WasmRelocation> Relocs = Section->Relocations; 170 assert(std::is_sorted(Relocs.begin(), Relocs.end(), 171 [](const WasmRelocation &R1, const WasmRelocation &R2) { 172 return R1.Offset < R2.Offset; 173 })); 174 assert(std::is_sorted( 175 Chunks.begin(), Chunks.end(), [](InputChunk *C1, InputChunk *C2) { 176 return C1->getInputSectionOffset() < C2->getInputSectionOffset(); 177 })); 178 179 auto RelocsNext = Relocs.begin(); 180 auto RelocsEnd = Relocs.end(); 181 auto RelocLess = [](const WasmRelocation &R, uint32_t Val) { 182 return R.Offset < Val; 183 }; 184 for (InputChunk *C : Chunks) { 185 auto RelocsStart = std::lower_bound(RelocsNext, RelocsEnd, 186 C->getInputSectionOffset(), RelocLess); 187 RelocsNext = std::lower_bound( 188 RelocsStart, RelocsEnd, C->getInputSectionOffset() + C->getInputSize(), 189 RelocLess); 190 C->setRelocations(ArrayRef<WasmRelocation>(RelocsStart, RelocsNext)); 191 } 192 } 193 194 void ObjFile::parse() { 195 // Parse a memory buffer as a wasm file. 196 LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n"); 197 std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this)); 198 199 auto *Obj = dyn_cast<WasmObjectFile>(Bin.get()); 200 if (!Obj) 201 fatal(toString(this) + ": not a wasm file"); 202 if (!Obj->isRelocatableObject()) 203 fatal(toString(this) + ": not a relocatable wasm file"); 204 205 Bin.release(); 206 WasmObj.reset(Obj); 207 208 // Build up a map of function indices to table indices for use when 209 // verifying the existing table index relocations 210 uint32_t TotalFunctions = 211 WasmObj->getNumImportedFunctions() + WasmObj->functions().size(); 212 TableEntries.resize(TotalFunctions); 213 for (const WasmElemSegment &Seg : WasmObj->elements()) { 214 if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST) 215 fatal(toString(this) + ": invalid table elements"); 216 uint32_t Offset = Seg.Offset.Value.Int32; 217 for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) { 218 219 uint32_t FunctionIndex = Seg.Functions[Index]; 220 TableEntries[FunctionIndex] = Offset + Index; 221 } 222 } 223 224 // Find the code and data sections. Wasm objects can have at most one code 225 // and one data section. 226 uint32_t SectionIndex = 0; 227 for (const SectionRef &Sec : WasmObj->sections()) { 228 const WasmSection &Section = WasmObj->getWasmSection(Sec); 229 if (Section.Type == WASM_SEC_CODE) { 230 CodeSection = &Section; 231 } else if (Section.Type == WASM_SEC_DATA) { 232 DataSection = &Section; 233 } else if (Section.Type == WASM_SEC_CUSTOM) { 234 CustomSections.emplace_back(make<InputSection>(Section, this)); 235 CustomSections.back()->setRelocations(Section.Relocations); 236 CustomSectionsByIndex[SectionIndex] = CustomSections.back(); 237 } 238 SectionIndex++; 239 } 240 241 TypeMap.resize(getWasmObj()->types().size()); 242 TypeIsUsed.resize(getWasmObj()->types().size(), false); 243 244 ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats; 245 UsedComdats.resize(Comdats.size()); 246 for (unsigned I = 0; I < Comdats.size(); ++I) 247 UsedComdats[I] = Symtab->addComdat(Comdats[I]); 248 249 // Populate `Segments`. 250 for (const WasmSegment &S : WasmObj->dataSegments()) 251 Segments.emplace_back(make<InputSegment>(S, this)); 252 setRelocs(Segments, DataSection); 253 254 // Populate `Functions`. 255 ArrayRef<WasmFunction> Funcs = WasmObj->functions(); 256 ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes(); 257 ArrayRef<WasmSignature> Types = WasmObj->types(); 258 Functions.reserve(Funcs.size()); 259 260 for (size_t I = 0, E = Funcs.size(); I != E; ++I) 261 Functions.emplace_back( 262 make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this)); 263 setRelocs(Functions, CodeSection); 264 265 // Populate `Globals`. 266 for (const WasmGlobal &G : WasmObj->globals()) 267 Globals.emplace_back(make<InputGlobal>(G, this)); 268 269 // Populate `Symbols` based on the WasmSymbols in the object. 270 Symbols.reserve(WasmObj->getNumberOfSymbols()); 271 for (const SymbolRef &Sym : WasmObj->symbols()) { 272 const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl()); 273 if (Symbol *Sym = createDefined(WasmSym)) 274 Symbols.push_back(Sym); 275 else 276 Symbols.push_back(createUndefined(WasmSym)); 277 } 278 } 279 280 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const { 281 uint32_t C = Chunk->getComdat(); 282 if (C == UINT32_MAX) 283 return false; 284 return !UsedComdats[C]; 285 } 286 287 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const { 288 return cast<FunctionSymbol>(Symbols[Index]); 289 } 290 291 GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const { 292 return cast<GlobalSymbol>(Symbols[Index]); 293 } 294 295 SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const { 296 return cast<SectionSymbol>(Symbols[Index]); 297 } 298 299 DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const { 300 return cast<DataSymbol>(Symbols[Index]); 301 } 302 303 Symbol *ObjFile::createDefined(const WasmSymbol &Sym) { 304 if (!Sym.isDefined()) 305 return nullptr; 306 307 StringRef Name = Sym.Info.Name; 308 uint32_t Flags = Sym.Info.Flags; 309 310 switch (Sym.Info.Kind) { 311 case WASM_SYMBOL_TYPE_FUNCTION: { 312 InputFunction *Func = 313 Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()]; 314 if (isExcludedByComdat(Func)) { 315 Func->Live = false; 316 return nullptr; 317 } 318 319 if (Sym.isBindingLocal()) 320 return make<DefinedFunction>(Name, Flags, this, Func); 321 return Symtab->addDefinedFunction(Name, Flags, this, Func); 322 } 323 case WASM_SYMBOL_TYPE_DATA: { 324 InputSegment *Seg = Segments[Sym.Info.DataRef.Segment]; 325 if (isExcludedByComdat(Seg)) { 326 Seg->Live = false; 327 return nullptr; 328 } 329 330 uint32_t Offset = Sym.Info.DataRef.Offset; 331 uint32_t Size = Sym.Info.DataRef.Size; 332 333 if (Sym.isBindingLocal()) 334 return make<DefinedData>(Name, Flags, this, Seg, Offset, Size); 335 return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size); 336 } 337 case WASM_SYMBOL_TYPE_GLOBAL: { 338 InputGlobal *Global = 339 Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()]; 340 if (Sym.isBindingLocal()) 341 return make<DefinedGlobal>(Name, Flags, this, Global); 342 return Symtab->addDefinedGlobal(Name, Flags, this, Global); 343 } 344 case WASM_SYMBOL_TYPE_SECTION: { 345 InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex]; 346 assert(Sym.isBindingLocal()); 347 return make<SectionSymbol>(Name, Flags, Section, this); 348 } 349 } 350 llvm_unreachable("unknown symbol kind"); 351 } 352 353 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) { 354 StringRef Name = Sym.Info.Name; 355 uint32_t Flags = Sym.Info.Flags; 356 357 switch (Sym.Info.Kind) { 358 case WASM_SYMBOL_TYPE_FUNCTION: 359 return Symtab->addUndefinedFunction(Name, Flags, this, Sym.FunctionType); 360 case WASM_SYMBOL_TYPE_DATA: 361 return Symtab->addUndefinedData(Name, Flags, this); 362 case WASM_SYMBOL_TYPE_GLOBAL: 363 return Symtab->addUndefinedGlobal(Name, Flags, this, Sym.GlobalType); 364 case WASM_SYMBOL_TYPE_SECTION: 365 llvm_unreachable("section symbols cannot be undefined"); 366 } 367 llvm_unreachable("unknown symbol kind"); 368 } 369 370 void ArchiveFile::parse() { 371 // Parse a MemoryBufferRef as an archive file. 372 LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n"); 373 File = CHECK(Archive::create(MB), toString(this)); 374 375 // Read the symbol table to construct Lazy symbols. 376 int Count = 0; 377 for (const Archive::Symbol &Sym : File->symbols()) { 378 Symtab->addLazy(this, &Sym); 379 ++Count; 380 } 381 LLVM_DEBUG(dbgs() << "Read " << Count << " symbols\n"); 382 } 383 384 void ArchiveFile::addMember(const Archive::Symbol *Sym) { 385 const Archive::Child &C = 386 CHECK(Sym->getMember(), 387 "could not get the member for symbol " + Sym->getName()); 388 389 // Don't try to load the same member twice (this can happen when members 390 // mutually reference each other). 391 if (!Seen.insert(C.getChildOffset()).second) 392 return; 393 394 LLVM_DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n"); 395 LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n"); 396 397 MemoryBufferRef MB = 398 CHECK(C.getMemoryBufferRef(), 399 "could not get the buffer for the member defining symbol " + 400 Sym->getName()); 401 402 InputFile *Obj = createObjectFile(MB); 403 Obj->ArchiveName = getName(); 404 Symtab->addFile(Obj); 405 } 406 407 static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) { 408 switch (GvVisibility) { 409 case GlobalValue::DefaultVisibility: 410 return WASM_SYMBOL_VISIBILITY_DEFAULT; 411 case GlobalValue::HiddenVisibility: 412 case GlobalValue::ProtectedVisibility: 413 return WASM_SYMBOL_VISIBILITY_HIDDEN; 414 } 415 llvm_unreachable("unknown visibility"); 416 } 417 418 static Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &ObjSym, 419 BitcodeFile &F) { 420 StringRef Name = Saver.save(ObjSym.getName()); 421 422 uint32_t Flags = ObjSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0; 423 Flags |= mapVisibility(ObjSym.getVisibility()); 424 425 if (ObjSym.isUndefined()) { 426 if (ObjSym.isExecutable()) 427 return Symtab->addUndefinedFunction(Name, Flags, &F, nullptr); 428 return Symtab->addUndefinedData(Name, Flags, &F); 429 } 430 431 if (ObjSym.isExecutable()) 432 return Symtab->addDefinedFunction(Name, Flags, &F, nullptr); 433 return Symtab->addDefinedData(Name, Flags, &F, nullptr, 0, 0); 434 } 435 436 void BitcodeFile::parse() { 437 Obj = check(lto::InputFile::create(MemoryBufferRef( 438 MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier())))); 439 Triple T(Obj->getTargetTriple()); 440 if (T.getArch() != Triple::wasm32) { 441 error(toString(MB.getBufferIdentifier()) + ": machine type must be wasm32"); 442 return; 443 } 444 445 for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) 446 Symbols.push_back(createBitcodeSymbol(ObjSym, *this)); 447 } 448 449 // Returns a string in the format of "foo.o" or "foo.a(bar.o)". 450 std::string lld::toString(const wasm::InputFile *File) { 451 if (!File) 452 return "<internal>"; 453 454 if (File->ArchiveName.empty()) 455 return File->getName(); 456 457 return (File->ArchiveName + "(" + File->getName() + ")").str(); 458 } 459