1 //===- InputFiles.cpp -----------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "InputFiles.h" 11 12 #include "Config.h" 13 #include "InputChunks.h" 14 #include "SymbolTable.h" 15 #include "lld/Common/ErrorHandler.h" 16 #include "lld/Common/Memory.h" 17 #include "llvm/Object/Binary.h" 18 #include "llvm/Object/Wasm.h" 19 #include "llvm/Support/raw_ostream.h" 20 21 #define DEBUG_TYPE "lld" 22 23 using namespace lld; 24 using namespace lld::wasm; 25 26 using namespace llvm; 27 using namespace llvm::object; 28 using namespace llvm::wasm; 29 30 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) { 31 log("Loading: " + Path); 32 33 auto MBOrErr = MemoryBuffer::getFile(Path); 34 if (auto EC = MBOrErr.getError()) { 35 error("cannot open " + Path + ": " + EC.message()); 36 return None; 37 } 38 std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; 39 MemoryBufferRef MBRef = MB->getMemBufferRef(); 40 make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership 41 42 return MBRef; 43 } 44 45 void ObjFile::dumpInfo() const { 46 log("info for: " + getName() + "\n" + 47 " Total Functions : " + Twine(FunctionSymbols.size()) + "\n" + 48 " Total Globals : " + Twine(GlobalSymbols.size()) + "\n" + 49 " Function Imports : " + Twine(NumFunctionImports) + "\n" + 50 " Global Imports : " + Twine(NumGlobalImports) + "\n"); 51 } 52 53 uint32_t ObjFile::relocateVirtualAddress(uint32_t GlobalIndex) const { 54 return GlobalSymbols[GlobalIndex]->getVirtualAddress(); 55 } 56 57 uint32_t ObjFile::relocateFunctionIndex(uint32_t Original) const { 58 Symbol *Sym = FunctionSymbols[Original]; 59 uint32_t Index = Sym->getOutputIndex(); 60 DEBUG(dbgs() << "relocateFunctionIndex: " << toString(*Sym) << ": " 61 << Original << " -> " << Index << "\n"); 62 return Index; 63 } 64 65 uint32_t ObjFile::relocateTypeIndex(uint32_t Original) const { 66 return TypeMap[Original]; 67 } 68 69 uint32_t ObjFile::relocateTableIndex(uint32_t Original) const { 70 Symbol *Sym = FunctionSymbols[Original]; 71 uint32_t Index = Sym->hasTableIndex() ? Sym->getTableIndex() : 0; 72 DEBUG(dbgs() << "relocateTableIndex: " << toString(*Sym) << ": " << Original 73 << " -> " << Index << "\n"); 74 return Index; 75 } 76 77 uint32_t ObjFile::relocateGlobalIndex(uint32_t Original) const { 78 Symbol *Sym = GlobalSymbols[Original]; 79 uint32_t Index = Sym->hasOutputIndex() ? Sym->getOutputIndex() : 0; 80 DEBUG(dbgs() << "relocateGlobalIndex: " << toString(*Sym) << ": " << Original 81 << " -> " << Index << "\n"); 82 return Index; 83 } 84 85 // Relocations contain an index into the function, global or table index 86 // space of the input file. This function takes a relocation and returns the 87 // relocated index (i.e. translates from the input index space to the output 88 // index space). 89 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const { 90 switch (Reloc.Type) { 91 case R_WEBASSEMBLY_TYPE_INDEX_LEB: 92 return relocateTypeIndex(Reloc.Index); 93 case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: 94 case R_WEBASSEMBLY_TABLE_INDEX_I32: 95 case R_WEBASSEMBLY_TABLE_INDEX_SLEB: 96 return relocateFunctionIndex(Reloc.Index); 97 case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: 98 case R_WEBASSEMBLY_MEMORY_ADDR_LEB: 99 case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: 100 case R_WEBASSEMBLY_MEMORY_ADDR_I32: 101 return relocateGlobalIndex(Reloc.Index); 102 default: 103 llvm_unreachable("unknown relocation type"); 104 } 105 } 106 107 // Translate from the relocation's index into the final linked output value. 108 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const { 109 switch (Reloc.Type) { 110 case R_WEBASSEMBLY_TABLE_INDEX_I32: 111 case R_WEBASSEMBLY_TABLE_INDEX_SLEB: 112 return relocateTableIndex(Reloc.Index); 113 case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: 114 case R_WEBASSEMBLY_MEMORY_ADDR_I32: 115 case R_WEBASSEMBLY_MEMORY_ADDR_LEB: 116 return relocateVirtualAddress(Reloc.Index) + Reloc.Addend; 117 case R_WEBASSEMBLY_TYPE_INDEX_LEB: 118 return relocateTypeIndex(Reloc.Index); 119 case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: 120 return relocateFunctionIndex(Reloc.Index); 121 case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: 122 return relocateGlobalIndex(Reloc.Index); 123 default: 124 llvm_unreachable("unknown relocation type"); 125 } 126 } 127 128 void ObjFile::parse() { 129 // Parse a memory buffer as a wasm file. 130 DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n"); 131 std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this)); 132 133 auto *Obj = dyn_cast<WasmObjectFile>(Bin.get()); 134 if (!Obj) 135 fatal(toString(this) + ": not a wasm file"); 136 if (!Obj->isRelocatableObject()) 137 fatal(toString(this) + ": not a relocatable wasm file"); 138 139 Bin.release(); 140 WasmObj.reset(Obj); 141 142 // Find the code and data sections. Wasm objects can have at most one code 143 // and one data section. 144 for (const SectionRef &Sec : WasmObj->sections()) { 145 const WasmSection &Section = WasmObj->getWasmSection(Sec); 146 if (Section.Type == WASM_SEC_CODE) 147 CodeSection = &Section; 148 else if (Section.Type == WASM_SEC_DATA) 149 DataSection = &Section; 150 } 151 152 initializeSymbols(); 153 } 154 155 // Return the InputSegment in which a given symbol is defined. 156 InputSegment *ObjFile::getSegment(const WasmSymbol &WasmSym) const { 157 uint32_t Address = WasmObj->getWasmSymbolValue(WasmSym); 158 for (InputSegment *Segment : Segments) { 159 if (Address >= Segment->startVA() && Address < Segment->endVA()) { 160 DEBUG(dbgs() << "Found symbol in segment: " << WasmSym.Name << " -> " 161 << Segment->getName() << "\n"); 162 163 return Segment; 164 } 165 } 166 error("symbol not found in any segment: " + WasmSym.Name); 167 return nullptr; 168 } 169 170 // Get the value stored in the wasm global represented by this symbol. 171 // This represents the virtual address of the symbol in the input file. 172 uint32_t ObjFile::getGlobalValue(const WasmSymbol &Sym) const { 173 const WasmGlobal &Global = 174 getWasmObj()->globals()[Sym.ElementIndex - NumGlobalImports]; 175 assert(Global.Type == llvm::wasm::WASM_TYPE_I32); 176 return Global.InitExpr.Value.Int32; 177 } 178 179 // Get the signature for a given function symbol, either by looking 180 // it up in function sections (for defined functions), of the imports section 181 // (for imported functions). 182 const WasmSignature *ObjFile::getFunctionSig(const WasmSymbol &Sym) const { 183 DEBUG(dbgs() << "getFunctionSig: " << Sym.Name << "\n"); 184 return &WasmObj->types()[Sym.FunctionType]; 185 } 186 187 InputFunction *ObjFile::getFunction(const WasmSymbol &Sym) const { 188 uint32_t FunctionIndex = Sym.ElementIndex - NumFunctionImports; 189 return Functions[FunctionIndex]; 190 } 191 192 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const { 193 StringRef Comdat = Chunk->getComdat(); 194 return !Comdat.empty() && Symtab->findComdat(Comdat) != this; 195 } 196 197 void ObjFile::initializeSymbols() { 198 Symbols.reserve(WasmObj->getNumberOfSymbols()); 199 200 for (const WasmImport &Import : WasmObj->imports()) { 201 switch (Import.Kind) { 202 case WASM_EXTERNAL_FUNCTION: 203 ++NumFunctionImports; 204 break; 205 case WASM_EXTERNAL_GLOBAL: 206 ++NumGlobalImports; 207 break; 208 } 209 } 210 211 FunctionSymbols.resize(NumFunctionImports + WasmObj->functions().size()); 212 GlobalSymbols.resize(NumGlobalImports + WasmObj->globals().size()); 213 214 ArrayRef<WasmFunction> Funcs = WasmObj->functions(); 215 ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes(); 216 ArrayRef<WasmSignature> Types = WasmObj->types(); 217 ArrayRef<WasmGlobal> Globals = WasmObj->globals(); 218 219 for (const auto &C : WasmObj->comdats()) 220 Symtab->addComdat(C, this); 221 222 FunctionSymbols.resize(NumFunctionImports + Funcs.size()); 223 GlobalSymbols.resize(NumGlobalImports + Globals.size()); 224 225 for (const WasmSegment &S : WasmObj->dataSegments()) { 226 InputSegment *Seg = make<InputSegment>(S, this); 227 Seg->copyRelocations(*DataSection); 228 Segments.emplace_back(Seg); 229 } 230 231 for (size_t I = 0; I < Funcs.size(); ++I) { 232 const WasmFunction &Func = Funcs[I]; 233 const WasmSignature &Sig = Types[FuncTypes[I]]; 234 InputFunction *F = make<InputFunction>(Sig, &Func, this); 235 F->copyRelocations(*CodeSection); 236 Functions.emplace_back(F); 237 } 238 239 // Populate `FunctionSymbols` and `GlobalSymbols` based on the WasmSymbols 240 // in the object 241 for (const SymbolRef &Sym : WasmObj->symbols()) { 242 const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl()); 243 Symbol *S; 244 switch (WasmSym.Type) { 245 case WasmSymbol::SymbolType::FUNCTION_EXPORT: { 246 InputFunction *Function = getFunction(WasmSym); 247 if (!isExcludedByComdat(Function)) { 248 S = createDefined(WasmSym, Symbol::Kind::DefinedFunctionKind, Function); 249 break; 250 } else { 251 Function->Discarded = true; 252 LLVM_FALLTHROUGH; // Exclude function, and add the symbol as undefined 253 } 254 } 255 case WasmSymbol::SymbolType::FUNCTION_IMPORT: 256 S = createUndefined(WasmSym, Symbol::Kind::UndefinedFunctionKind, 257 getFunctionSig(WasmSym)); 258 break; 259 case WasmSymbol::SymbolType::GLOBAL_EXPORT: { 260 InputSegment *Segment = getSegment(WasmSym); 261 if (!isExcludedByComdat(Segment)) { 262 S = createDefined(WasmSym, Symbol::Kind::DefinedGlobalKind, Segment, 263 getGlobalValue(WasmSym)); 264 break; 265 } else { 266 Segment->Discarded = true; 267 LLVM_FALLTHROUGH; // Exclude global, and add the symbol as undefined 268 } 269 } 270 case WasmSymbol::SymbolType::GLOBAL_IMPORT: 271 S = createUndefined(WasmSym, Symbol::Kind::UndefinedGlobalKind); 272 break; 273 } 274 275 Symbols.push_back(S); 276 if (WasmSym.isFunction()) { 277 FunctionSymbols[WasmSym.ElementIndex] = S; 278 if (WasmSym.HasAltIndex) 279 FunctionSymbols[WasmSym.AltIndex] = S; 280 } else { 281 GlobalSymbols[WasmSym.ElementIndex] = S; 282 if (WasmSym.HasAltIndex) 283 GlobalSymbols[WasmSym.AltIndex] = S; 284 } 285 } 286 287 DEBUG(for (size_t I = 0; I < FunctionSymbols.size(); ++I) 288 assert(FunctionSymbols[I] != nullptr); 289 for (size_t I = 0; I < GlobalSymbols.size(); ++I) 290 assert(GlobalSymbols[I] != nullptr);); 291 292 DEBUG(dbgs() << "Functions : " << FunctionSymbols.size() << "\n"); 293 DEBUG(dbgs() << "Globals : " << GlobalSymbols.size() << "\n"); 294 } 295 296 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym, Symbol::Kind Kind, 297 const WasmSignature *Signature) { 298 return Symtab->addUndefined(Sym.Name, Kind, Sym.Flags, this, Signature); 299 } 300 301 Symbol *ObjFile::createDefined(const WasmSymbol &Sym, Symbol::Kind Kind, 302 InputChunk *Chunk, uint32_t Address) { 303 Symbol *S; 304 if (Sym.isLocal()) { 305 S = make<Symbol>(Sym.Name, true); 306 S->update(Kind, this, Sym.Flags, Chunk, Address); 307 return S; 308 } 309 return Symtab->addDefined(Sym.Name, Kind, Sym.Flags, this, Chunk, Address); 310 } 311 312 void ArchiveFile::parse() { 313 // Parse a MemoryBufferRef as an archive file. 314 DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n"); 315 File = CHECK(Archive::create(MB), toString(this)); 316 317 // Read the symbol table to construct Lazy symbols. 318 int Count = 0; 319 for (const Archive::Symbol &Sym : File->symbols()) { 320 Symtab->addLazy(this, &Sym); 321 ++Count; 322 } 323 DEBUG(dbgs() << "Read " << Count << " symbols\n"); 324 } 325 326 void ArchiveFile::addMember(const Archive::Symbol *Sym) { 327 const Archive::Child &C = 328 CHECK(Sym->getMember(), 329 "could not get the member for symbol " + Sym->getName()); 330 331 // Don't try to load the same member twice (this can happen when members 332 // mutually reference each other). 333 if (!Seen.insert(C.getChildOffset()).second) 334 return; 335 336 DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n"); 337 DEBUG(dbgs() << "from archive: " << toString(this) << "\n"); 338 339 MemoryBufferRef MB = 340 CHECK(C.getMemoryBufferRef(), 341 "could not get the buffer for the member defining symbol " + 342 Sym->getName()); 343 344 if (identify_magic(MB.getBuffer()) != file_magic::wasm_object) { 345 error("unknown file type: " + MB.getBufferIdentifier()); 346 return; 347 } 348 349 InputFile *Obj = make<ObjFile>(MB); 350 Obj->ParentName = ParentName; 351 Symtab->addFile(Obj); 352 } 353 354 // Returns a string in the format of "foo.o" or "foo.a(bar.o)". 355 std::string lld::toString(const wasm::InputFile *File) { 356 if (!File) 357 return "<internal>"; 358 359 if (File->ParentName.empty()) 360 return File->getName(); 361 362 return (File->ParentName + "(" + File->getName() + ")").str(); 363 } 364