1 //===- InputFiles.cpp -----------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "InputFiles.h" 11 #include "Config.h" 12 #include "InputChunks.h" 13 #include "InputGlobal.h" 14 #include "SymbolTable.h" 15 #include "lld/Common/ErrorHandler.h" 16 #include "lld/Common/Memory.h" 17 #include "llvm/Object/Binary.h" 18 #include "llvm/Object/Wasm.h" 19 #include "llvm/Support/LEB128.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #define DEBUG_TYPE "lld" 23 24 using namespace lld; 25 using namespace lld::wasm; 26 27 using namespace llvm; 28 using namespace llvm::object; 29 using namespace llvm::wasm; 30 31 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) { 32 log("Loading: " + Path); 33 34 auto MBOrErr = MemoryBuffer::getFile(Path); 35 if (auto EC = MBOrErr.getError()) { 36 error("cannot open " + Path + ": " + EC.message()); 37 return None; 38 } 39 std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; 40 MemoryBufferRef MBRef = MB->getMemBufferRef(); 41 make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership 42 43 return MBRef; 44 } 45 46 static size_t getFunctionCodeOffset(ArrayRef<uint8_t> FunctionBody) { 47 unsigned Count; 48 llvm::decodeULEB128(FunctionBody.data(), &Count); 49 return Count; 50 } 51 52 void ObjFile::dumpInfo() const { 53 log("info for: " + getName() + 54 "\n Symbols : " + Twine(Symbols.size()) + 55 "\n Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) + 56 "\n Global Imports : " + Twine(WasmObj->getNumImportedGlobals())); 57 } 58 59 // Relocations contain either symbol or type indices. This function takes a 60 // relocation and returns relocated index (i.e. translates from the input 61 // sybmol/type space to the output symbol/type space). 62 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const { 63 if (Reloc.Type == R_WEBASSEMBLY_TYPE_INDEX_LEB) { 64 assert(TypeIsUsed[Reloc.Index]); 65 return TypeMap[Reloc.Index]; 66 } 67 return Symbols[Reloc.Index]->getOutputSymbolIndex(); 68 } 69 70 // Relocations can contain addend for combined sections. This function takes a 71 // relocation and returns updated addend by offset in the output section. 72 uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const { 73 switch (Reloc.Type) { 74 case R_WEBASSEMBLY_MEMORY_ADDR_LEB: 75 case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: 76 case R_WEBASSEMBLY_MEMORY_ADDR_I32: 77 case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: 78 return Reloc.Addend; 79 case R_WEBASSEMBLY_SECTION_OFFSET_I32: 80 return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend; 81 default: 82 llvm_unreachable("unexpected relocation type"); 83 } 84 } 85 86 // Calculate the value we expect to find at the relocation location. 87 // This is used as a sanity check before applying a relocation to a given 88 // location. It is useful for catching bugs in the compiler and linker. 89 uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const { 90 switch (Reloc.Type) { 91 case R_WEBASSEMBLY_TABLE_INDEX_I32: 92 case R_WEBASSEMBLY_TABLE_INDEX_SLEB: { 93 const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index]; 94 return TableEntries[Sym.Info.ElementIndex]; 95 } 96 case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: 97 case R_WEBASSEMBLY_MEMORY_ADDR_I32: 98 case R_WEBASSEMBLY_MEMORY_ADDR_LEB: { 99 const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index]; 100 if (Sym.isUndefined()) 101 return 0; 102 const WasmSegment& Segment = WasmObj->dataSegments()[Sym.Info.DataRef.Segment]; 103 return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset + 104 Reloc.Addend; 105 } 106 case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: 107 if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) { 108 size_t FunctionCodeOffset = 109 getFunctionCodeOffset(Sym->Function->getFunctionBody()); 110 return Sym->Function->getFunctionInputOffset() + FunctionCodeOffset + 111 Reloc.Addend; 112 } 113 return 0; 114 case R_WEBASSEMBLY_SECTION_OFFSET_I32: 115 return Reloc.Addend; 116 case R_WEBASSEMBLY_TYPE_INDEX_LEB: 117 return Reloc.Index; 118 case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: 119 case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: { 120 const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index]; 121 return Sym.Info.ElementIndex; 122 } 123 default: 124 llvm_unreachable("unknown relocation type"); 125 } 126 } 127 128 // Translate from the relocation's index into the final linked output value. 129 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const { 130 switch (Reloc.Type) { 131 case R_WEBASSEMBLY_TABLE_INDEX_I32: 132 case R_WEBASSEMBLY_TABLE_INDEX_SLEB: 133 return getFunctionSymbol(Reloc.Index)->getTableIndex(); 134 case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: 135 case R_WEBASSEMBLY_MEMORY_ADDR_I32: 136 case R_WEBASSEMBLY_MEMORY_ADDR_LEB: 137 if (auto *Sym = dyn_cast<DefinedData>(getDataSymbol(Reloc.Index))) 138 return Sym->getVirtualAddress() + Reloc.Addend; 139 return 0; 140 case R_WEBASSEMBLY_TYPE_INDEX_LEB: 141 return TypeMap[Reloc.Index]; 142 case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: 143 return getFunctionSymbol(Reloc.Index)->getFunctionIndex(); 144 case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: 145 return getGlobalSymbol(Reloc.Index)->getGlobalIndex(); 146 case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: 147 if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) { 148 size_t FunctionCodeOffset = 149 getFunctionCodeOffset(Sym->Function->getFunctionBody()); 150 return Sym->Function->OutputOffset + FunctionCodeOffset + Reloc.Addend; 151 } 152 return 0; 153 case R_WEBASSEMBLY_SECTION_OFFSET_I32: 154 return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend; 155 default: 156 llvm_unreachable("unknown relocation type"); 157 } 158 } 159 160 void ObjFile::parse() { 161 // Parse a memory buffer as a wasm file. 162 DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n"); 163 std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this)); 164 165 auto *Obj = dyn_cast<WasmObjectFile>(Bin.get()); 166 if (!Obj) 167 fatal(toString(this) + ": not a wasm file"); 168 if (!Obj->isRelocatableObject()) 169 fatal(toString(this) + ": not a relocatable wasm file"); 170 171 Bin.release(); 172 WasmObj.reset(Obj); 173 174 // Build up a map of function indices to table indices for use when 175 // verifying the existing table index relocations 176 uint32_t TotalFunctions = 177 WasmObj->getNumImportedFunctions() + WasmObj->functions().size(); 178 TableEntries.resize(TotalFunctions); 179 for (const WasmElemSegment &Seg : WasmObj->elements()) { 180 if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST) 181 fatal(toString(this) + ": invalid table elements"); 182 uint32_t Offset = Seg.Offset.Value.Int32; 183 for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) { 184 185 uint32_t FunctionIndex = Seg.Functions[Index]; 186 TableEntries[FunctionIndex] = Offset + Index; 187 } 188 } 189 190 // Find the code and data sections. Wasm objects can have at most one code 191 // and one data section. 192 uint32_t SectionIndex = 0; 193 for (const SectionRef &Sec : WasmObj->sections()) { 194 const WasmSection &Section = WasmObj->getWasmSection(Sec); 195 if (Section.Type == WASM_SEC_CODE) { 196 CodeSection = &Section; 197 } else if (Section.Type == WASM_SEC_DATA) { 198 DataSection = &Section; 199 } else if (Section.Type == WASM_SEC_CUSTOM) { 200 CustomSections.emplace_back(make<InputSection>(Section, this)); 201 CustomSections.back()->copyRelocations(Section); 202 CustomSectionsByIndex[SectionIndex] = CustomSections.back(); 203 } 204 SectionIndex++; 205 } 206 207 TypeMap.resize(getWasmObj()->types().size()); 208 TypeIsUsed.resize(getWasmObj()->types().size(), false); 209 210 ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats; 211 UsedComdats.resize(Comdats.size()); 212 for (unsigned I = 0; I < Comdats.size(); ++I) 213 UsedComdats[I] = Symtab->addComdat(Comdats[I]); 214 215 // Populate `Segments`. 216 for (const WasmSegment &S : WasmObj->dataSegments()) { 217 InputSegment *Seg = make<InputSegment>(S, this); 218 Seg->copyRelocations(*DataSection); 219 Segments.emplace_back(Seg); 220 } 221 222 // Populate `Functions`. 223 ArrayRef<WasmFunction> Funcs = WasmObj->functions(); 224 ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes(); 225 ArrayRef<WasmSignature> Types = WasmObj->types(); 226 Functions.reserve(Funcs.size()); 227 228 for (size_t I = 0, E = Funcs.size(); I != E; ++I) { 229 InputFunction *F = 230 make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this); 231 F->copyRelocations(*CodeSection); 232 Functions.emplace_back(F); 233 } 234 235 // Populate `Globals`. 236 for (const WasmGlobal &G : WasmObj->globals()) 237 Globals.emplace_back(make<InputGlobal>(G, this)); 238 239 // Populate `Symbols` based on the WasmSymbols in the object. 240 Symbols.reserve(WasmObj->getNumberOfSymbols()); 241 for (const SymbolRef &Sym : WasmObj->symbols()) { 242 const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl()); 243 if (Symbol *Sym = createDefined(WasmSym)) 244 Symbols.push_back(Sym); 245 else 246 Symbols.push_back(createUndefined(WasmSym)); 247 } 248 } 249 250 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const { 251 uint32_t C = Chunk->getComdat(); 252 if (C == UINT32_MAX) 253 return false; 254 return !UsedComdats[C]; 255 } 256 257 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const { 258 return cast<FunctionSymbol>(Symbols[Index]); 259 } 260 261 GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const { 262 return cast<GlobalSymbol>(Symbols[Index]); 263 } 264 265 SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const { 266 return cast<SectionSymbol>(Symbols[Index]); 267 } 268 269 DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const { 270 return cast<DataSymbol>(Symbols[Index]); 271 } 272 273 Symbol *ObjFile::createDefined(const WasmSymbol &Sym) { 274 if (!Sym.isDefined()) 275 return nullptr; 276 277 StringRef Name = Sym.Info.Name; 278 uint32_t Flags = Sym.Info.Flags; 279 280 switch (Sym.Info.Kind) { 281 case WASM_SYMBOL_TYPE_FUNCTION: { 282 InputFunction *Func = 283 Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()]; 284 if (isExcludedByComdat(Func)) { 285 Func->Live = false; 286 return nullptr; 287 } 288 289 if (Sym.isBindingLocal()) 290 return make<DefinedFunction>(Name, Flags, this, Func); 291 return Symtab->addDefinedFunction(Name, Flags, this, Func); 292 } 293 case WASM_SYMBOL_TYPE_DATA: { 294 InputSegment *Seg = Segments[Sym.Info.DataRef.Segment]; 295 if (isExcludedByComdat(Seg)) { 296 Seg->Live = false; 297 return nullptr; 298 } 299 300 uint32_t Offset = Sym.Info.DataRef.Offset; 301 uint32_t Size = Sym.Info.DataRef.Size; 302 303 if (Sym.isBindingLocal()) 304 return make<DefinedData>(Name, Flags, this, Seg, Offset, Size); 305 return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size); 306 } 307 case WASM_SYMBOL_TYPE_GLOBAL: { 308 InputGlobal *Global = 309 Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()]; 310 if (Sym.isBindingLocal()) 311 return make<DefinedGlobal>(Name, Flags, this, Global); 312 return Symtab->addDefinedGlobal(Name, Flags, this, Global); 313 } 314 case WASM_SYMBOL_TYPE_SECTION: { 315 InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex]; 316 assert(Sym.isBindingLocal()); 317 return make<SectionSymbol>(Name, Flags, Section, this); 318 } 319 } 320 llvm_unreachable("unknown symbol kind"); 321 } 322 323 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) { 324 StringRef Name = Sym.Info.Name; 325 uint32_t Flags = Sym.Info.Flags; 326 327 switch (Sym.Info.Kind) { 328 case WASM_SYMBOL_TYPE_FUNCTION: 329 return Symtab->addUndefinedFunction(Name, Flags, this, Sym.FunctionType); 330 case WASM_SYMBOL_TYPE_DATA: 331 return Symtab->addUndefinedData(Name, Flags, this); 332 case WASM_SYMBOL_TYPE_GLOBAL: 333 return Symtab->addUndefinedGlobal(Name, Flags, this, Sym.GlobalType); 334 case WASM_SYMBOL_TYPE_SECTION: 335 llvm_unreachable("section symbols cannot be undefined"); 336 } 337 llvm_unreachable("unknown symbol kind"); 338 } 339 340 void ArchiveFile::parse() { 341 // Parse a MemoryBufferRef as an archive file. 342 DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n"); 343 File = CHECK(Archive::create(MB), toString(this)); 344 345 // Read the symbol table to construct Lazy symbols. 346 int Count = 0; 347 for (const Archive::Symbol &Sym : File->symbols()) { 348 Symtab->addLazy(this, &Sym); 349 ++Count; 350 } 351 DEBUG(dbgs() << "Read " << Count << " symbols\n"); 352 } 353 354 void ArchiveFile::addMember(const Archive::Symbol *Sym) { 355 const Archive::Child &C = 356 CHECK(Sym->getMember(), 357 "could not get the member for symbol " + Sym->getName()); 358 359 // Don't try to load the same member twice (this can happen when members 360 // mutually reference each other). 361 if (!Seen.insert(C.getChildOffset()).second) 362 return; 363 364 DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n"); 365 DEBUG(dbgs() << "from archive: " << toString(this) << "\n"); 366 367 MemoryBufferRef MB = 368 CHECK(C.getMemoryBufferRef(), 369 "could not get the buffer for the member defining symbol " + 370 Sym->getName()); 371 372 if (identify_magic(MB.getBuffer()) != file_magic::wasm_object) { 373 error("unknown file type: " + MB.getBufferIdentifier()); 374 return; 375 } 376 377 InputFile *Obj = make<ObjFile>(MB); 378 Obj->ParentName = ParentName; 379 Symtab->addFile(Obj); 380 } 381 382 // Returns a string in the format of "foo.o" or "foo.a(bar.o)". 383 std::string lld::toString(const wasm::InputFile *File) { 384 if (!File) 385 return "<internal>"; 386 387 if (File->ParentName.empty()) 388 return File->getName(); 389 390 return (File->ParentName + "(" + File->getName() + ")").str(); 391 } 392