1 //===- InputFiles.cpp -----------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "InputFiles.h" 11 #include "Config.h" 12 #include "InputChunks.h" 13 #include "InputGlobal.h" 14 #include "SymbolTable.h" 15 #include "lld/Common/ErrorHandler.h" 16 #include "lld/Common/Memory.h" 17 #include "llvm/Object/Binary.h" 18 #include "llvm/Object/Wasm.h" 19 #include "llvm/Support/raw_ostream.h" 20 21 #define DEBUG_TYPE "lld" 22 23 using namespace lld; 24 using namespace lld::wasm; 25 26 using namespace llvm; 27 using namespace llvm::object; 28 using namespace llvm::wasm; 29 30 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) { 31 log("Loading: " + Path); 32 33 auto MBOrErr = MemoryBuffer::getFile(Path); 34 if (auto EC = MBOrErr.getError()) { 35 error("cannot open " + Path + ": " + EC.message()); 36 return None; 37 } 38 std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; 39 MemoryBufferRef MBRef = MB->getMemBufferRef(); 40 make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership 41 42 return MBRef; 43 } 44 45 void ObjFile::dumpInfo() const { 46 log("info for: " + getName() + 47 "\n Symbols : " + Twine(Symbols.size()) + 48 "\n Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) + 49 "\n Global Imports : " + Twine(WasmObj->getNumImportedGlobals())); 50 } 51 52 // Relocations contain either symbol or type indices. This function takes a 53 // relocation and returns relocated index (i.e. translates from the input 54 // sybmol/type space to the output symbol/type space). 55 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const { 56 if (Reloc.Type == R_WEBASSEMBLY_TYPE_INDEX_LEB) { 57 assert(TypeIsUsed[Reloc.Index]); 58 return TypeMap[Reloc.Index]; 59 } 60 return Symbols[Reloc.Index]->getOutputSymbolIndex(); 61 } 62 63 // Relocations can contain addend for combined sections. This function takes a 64 // relocation and returns updated addend by offset in the output section. 65 uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const { 66 switch (Reloc.Type) { 67 case R_WEBASSEMBLY_MEMORY_ADDR_LEB: 68 case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: 69 case R_WEBASSEMBLY_MEMORY_ADDR_I32: 70 case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: 71 return Reloc.Addend; 72 case R_WEBASSEMBLY_SECTION_OFFSET_I32: 73 return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend; 74 default: 75 llvm_unreachable("unexpected relocation type"); 76 } 77 } 78 79 // Calculate the value we expect to find at the relocation location. 80 // This is used as a sanity check before applying a relocation to a given 81 // location. It is useful for catching bugs in the compiler and linker. 82 uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const { 83 switch (Reloc.Type) { 84 case R_WEBASSEMBLY_TABLE_INDEX_I32: 85 case R_WEBASSEMBLY_TABLE_INDEX_SLEB: { 86 const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index]; 87 return TableEntries[Sym.Info.ElementIndex]; 88 } 89 case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: 90 case R_WEBASSEMBLY_MEMORY_ADDR_I32: 91 case R_WEBASSEMBLY_MEMORY_ADDR_LEB: { 92 const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index]; 93 if (Sym.isUndefined()) 94 return 0; 95 const WasmSegment& Segment = WasmObj->dataSegments()[Sym.Info.DataRef.Segment]; 96 return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset + 97 Reloc.Addend; 98 } 99 case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: 100 if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) { 101 return Sym->Function->getFunctionInputOffset() + 102 Sym->Function->getFunctionCodeOffset() + Reloc.Addend; 103 } 104 return 0; 105 case R_WEBASSEMBLY_SECTION_OFFSET_I32: 106 return Reloc.Addend; 107 case R_WEBASSEMBLY_TYPE_INDEX_LEB: 108 return Reloc.Index; 109 case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: 110 case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: { 111 const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index]; 112 return Sym.Info.ElementIndex; 113 } 114 default: 115 llvm_unreachable("unknown relocation type"); 116 } 117 } 118 119 // Translate from the relocation's index into the final linked output value. 120 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const { 121 switch (Reloc.Type) { 122 case R_WEBASSEMBLY_TABLE_INDEX_I32: 123 case R_WEBASSEMBLY_TABLE_INDEX_SLEB: 124 return getFunctionSymbol(Reloc.Index)->getTableIndex(); 125 case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: 126 case R_WEBASSEMBLY_MEMORY_ADDR_I32: 127 case R_WEBASSEMBLY_MEMORY_ADDR_LEB: 128 if (auto *Sym = dyn_cast<DefinedData>(getDataSymbol(Reloc.Index))) 129 return Sym->getVirtualAddress() + Reloc.Addend; 130 return 0; 131 case R_WEBASSEMBLY_TYPE_INDEX_LEB: 132 return TypeMap[Reloc.Index]; 133 case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: 134 return getFunctionSymbol(Reloc.Index)->getFunctionIndex(); 135 case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: 136 return getGlobalSymbol(Reloc.Index)->getGlobalIndex(); 137 case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: 138 if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) { 139 return Sym->Function->OutputOffset + 140 Sym->Function->getFunctionCodeOffset() + Reloc.Addend; 141 } 142 return 0; 143 case R_WEBASSEMBLY_SECTION_OFFSET_I32: 144 return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend; 145 default: 146 llvm_unreachable("unknown relocation type"); 147 } 148 } 149 150 void ObjFile::parse() { 151 // Parse a memory buffer as a wasm file. 152 LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n"); 153 std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this)); 154 155 auto *Obj = dyn_cast<WasmObjectFile>(Bin.get()); 156 if (!Obj) 157 fatal(toString(this) + ": not a wasm file"); 158 if (!Obj->isRelocatableObject()) 159 fatal(toString(this) + ": not a relocatable wasm file"); 160 161 Bin.release(); 162 WasmObj.reset(Obj); 163 164 // Build up a map of function indices to table indices for use when 165 // verifying the existing table index relocations 166 uint32_t TotalFunctions = 167 WasmObj->getNumImportedFunctions() + WasmObj->functions().size(); 168 TableEntries.resize(TotalFunctions); 169 for (const WasmElemSegment &Seg : WasmObj->elements()) { 170 if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST) 171 fatal(toString(this) + ": invalid table elements"); 172 uint32_t Offset = Seg.Offset.Value.Int32; 173 for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) { 174 175 uint32_t FunctionIndex = Seg.Functions[Index]; 176 TableEntries[FunctionIndex] = Offset + Index; 177 } 178 } 179 180 // Find the code and data sections. Wasm objects can have at most one code 181 // and one data section. 182 uint32_t SectionIndex = 0; 183 for (const SectionRef &Sec : WasmObj->sections()) { 184 const WasmSection &Section = WasmObj->getWasmSection(Sec); 185 if (Section.Type == WASM_SEC_CODE) { 186 CodeSection = &Section; 187 } else if (Section.Type == WASM_SEC_DATA) { 188 DataSection = &Section; 189 } else if (Section.Type == WASM_SEC_CUSTOM) { 190 CustomSections.emplace_back(make<InputSection>(Section, this)); 191 CustomSections.back()->copyRelocations(Section); 192 CustomSectionsByIndex[SectionIndex] = CustomSections.back(); 193 } 194 SectionIndex++; 195 } 196 197 TypeMap.resize(getWasmObj()->types().size()); 198 TypeIsUsed.resize(getWasmObj()->types().size(), false); 199 200 ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats; 201 UsedComdats.resize(Comdats.size()); 202 for (unsigned I = 0; I < Comdats.size(); ++I) 203 UsedComdats[I] = Symtab->addComdat(Comdats[I]); 204 205 // Populate `Segments`. 206 for (const WasmSegment &S : WasmObj->dataSegments()) { 207 InputSegment *Seg = make<InputSegment>(S, this); 208 Seg->copyRelocations(*DataSection); 209 Segments.emplace_back(Seg); 210 } 211 212 // Populate `Functions`. 213 ArrayRef<WasmFunction> Funcs = WasmObj->functions(); 214 ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes(); 215 ArrayRef<WasmSignature> Types = WasmObj->types(); 216 Functions.reserve(Funcs.size()); 217 218 for (size_t I = 0, E = Funcs.size(); I != E; ++I) { 219 InputFunction *F = 220 make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this); 221 F->copyRelocations(*CodeSection); 222 Functions.emplace_back(F); 223 } 224 225 // Populate `Globals`. 226 for (const WasmGlobal &G : WasmObj->globals()) 227 Globals.emplace_back(make<InputGlobal>(G, this)); 228 229 // Populate `Symbols` based on the WasmSymbols in the object. 230 Symbols.reserve(WasmObj->getNumberOfSymbols()); 231 for (const SymbolRef &Sym : WasmObj->symbols()) { 232 const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl()); 233 if (Symbol *Sym = createDefined(WasmSym)) 234 Symbols.push_back(Sym); 235 else 236 Symbols.push_back(createUndefined(WasmSym)); 237 } 238 } 239 240 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const { 241 uint32_t C = Chunk->getComdat(); 242 if (C == UINT32_MAX) 243 return false; 244 return !UsedComdats[C]; 245 } 246 247 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const { 248 return cast<FunctionSymbol>(Symbols[Index]); 249 } 250 251 GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const { 252 return cast<GlobalSymbol>(Symbols[Index]); 253 } 254 255 SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const { 256 return cast<SectionSymbol>(Symbols[Index]); 257 } 258 259 DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const { 260 return cast<DataSymbol>(Symbols[Index]); 261 } 262 263 Symbol *ObjFile::createDefined(const WasmSymbol &Sym) { 264 if (!Sym.isDefined()) 265 return nullptr; 266 267 StringRef Name = Sym.Info.Name; 268 uint32_t Flags = Sym.Info.Flags; 269 270 switch (Sym.Info.Kind) { 271 case WASM_SYMBOL_TYPE_FUNCTION: { 272 InputFunction *Func = 273 Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()]; 274 if (isExcludedByComdat(Func)) { 275 Func->Live = false; 276 return nullptr; 277 } 278 279 if (Sym.isBindingLocal()) 280 return make<DefinedFunction>(Name, Flags, this, Func); 281 return Symtab->addDefinedFunction(Name, Flags, this, Func); 282 } 283 case WASM_SYMBOL_TYPE_DATA: { 284 InputSegment *Seg = Segments[Sym.Info.DataRef.Segment]; 285 if (isExcludedByComdat(Seg)) { 286 Seg->Live = false; 287 return nullptr; 288 } 289 290 uint32_t Offset = Sym.Info.DataRef.Offset; 291 uint32_t Size = Sym.Info.DataRef.Size; 292 293 if (Sym.isBindingLocal()) 294 return make<DefinedData>(Name, Flags, this, Seg, Offset, Size); 295 return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size); 296 } 297 case WASM_SYMBOL_TYPE_GLOBAL: { 298 InputGlobal *Global = 299 Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()]; 300 if (Sym.isBindingLocal()) 301 return make<DefinedGlobal>(Name, Flags, this, Global); 302 return Symtab->addDefinedGlobal(Name, Flags, this, Global); 303 } 304 case WASM_SYMBOL_TYPE_SECTION: { 305 InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex]; 306 assert(Sym.isBindingLocal()); 307 return make<SectionSymbol>(Name, Flags, Section, this); 308 } 309 } 310 llvm_unreachable("unknown symbol kind"); 311 } 312 313 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) { 314 StringRef Name = Sym.Info.Name; 315 uint32_t Flags = Sym.Info.Flags; 316 317 switch (Sym.Info.Kind) { 318 case WASM_SYMBOL_TYPE_FUNCTION: 319 return Symtab->addUndefinedFunction(Name, Flags, this, Sym.FunctionType); 320 case WASM_SYMBOL_TYPE_DATA: 321 return Symtab->addUndefinedData(Name, Flags, this); 322 case WASM_SYMBOL_TYPE_GLOBAL: 323 return Symtab->addUndefinedGlobal(Name, Flags, this, Sym.GlobalType); 324 case WASM_SYMBOL_TYPE_SECTION: 325 llvm_unreachable("section symbols cannot be undefined"); 326 } 327 llvm_unreachable("unknown symbol kind"); 328 } 329 330 void ArchiveFile::parse() { 331 // Parse a MemoryBufferRef as an archive file. 332 LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n"); 333 File = CHECK(Archive::create(MB), toString(this)); 334 335 // Read the symbol table to construct Lazy symbols. 336 int Count = 0; 337 for (const Archive::Symbol &Sym : File->symbols()) { 338 Symtab->addLazy(this, &Sym); 339 ++Count; 340 } 341 LLVM_DEBUG(dbgs() << "Read " << Count << " symbols\n"); 342 } 343 344 void ArchiveFile::addMember(const Archive::Symbol *Sym) { 345 const Archive::Child &C = 346 CHECK(Sym->getMember(), 347 "could not get the member for symbol " + Sym->getName()); 348 349 // Don't try to load the same member twice (this can happen when members 350 // mutually reference each other). 351 if (!Seen.insert(C.getChildOffset()).second) 352 return; 353 354 LLVM_DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n"); 355 LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n"); 356 357 MemoryBufferRef MB = 358 CHECK(C.getMemoryBufferRef(), 359 "could not get the buffer for the member defining symbol " + 360 Sym->getName()); 361 362 if (identify_magic(MB.getBuffer()) != file_magic::wasm_object) { 363 error("unknown file type: " + MB.getBufferIdentifier()); 364 return; 365 } 366 367 InputFile *Obj = make<ObjFile>(MB); 368 Obj->ParentName = ParentName; 369 Symtab->addFile(Obj); 370 } 371 372 // Returns a string in the format of "foo.o" or "foo.a(bar.o)". 373 std::string lld::toString(const wasm::InputFile *File) { 374 if (!File) 375 return "<internal>"; 376 377 if (File->ParentName.empty()) 378 return File->getName(); 379 380 return (File->ParentName + "(" + File->getName() + ")").str(); 381 } 382