1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "InputFiles.h"
11 #include "Config.h"
12 #include "InputChunks.h"
13 #include "InputGlobal.h"
14 #include "SymbolTable.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "llvm/Object/Binary.h"
18 #include "llvm/Object/Wasm.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "lld"
22 
23 using namespace lld;
24 using namespace lld::wasm;
25 
26 using namespace llvm;
27 using namespace llvm::object;
28 using namespace llvm::wasm;
29 
30 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) {
31   log("Loading: " + Path);
32 
33   auto MBOrErr = MemoryBuffer::getFile(Path);
34   if (auto EC = MBOrErr.getError()) {
35     error("cannot open " + Path + ": " + EC.message());
36     return None;
37   }
38   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
39   MemoryBufferRef MBRef = MB->getMemBufferRef();
40   make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership
41 
42   return MBRef;
43 }
44 
45 void ObjFile::dumpInfo() const {
46   log("info for: " + getName() +
47       "\n              Symbols : " + Twine(Symbols.size()) +
48       "\n     Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) +
49       "\n       Global Imports : " + Twine(WasmObj->getNumImportedGlobals()));
50 }
51 
52 // Relocations contain either symbol or type indices.  This function takes a
53 // relocation and returns relocated index (i.e. translates from the input
54 // sybmol/type space to the output symbol/type space).
55 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const {
56   if (Reloc.Type == R_WEBASSEMBLY_TYPE_INDEX_LEB) {
57     assert(TypeIsUsed[Reloc.Index]);
58     return TypeMap[Reloc.Index];
59   }
60   return Symbols[Reloc.Index]->getOutputSymbolIndex();
61 }
62 
63 // Calculate the value we expect to find at the relocation location.
64 // This is used as a sanity check before applying a relocation to a given
65 // location.  It is useful for catching bugs in the compiler and linker.
66 uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const {
67   switch (Reloc.Type) {
68   case R_WEBASSEMBLY_TABLE_INDEX_I32:
69   case R_WEBASSEMBLY_TABLE_INDEX_SLEB: {
70     const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index];
71     return TableEntries[Sym.Info.ElementIndex];
72   }
73   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
74   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
75   case R_WEBASSEMBLY_MEMORY_ADDR_LEB: {
76     const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index];
77     if (Sym.isUndefined())
78       return 0;
79     const WasmSegment& Segment = WasmObj->dataSegments()[Sym.Info.DataRef.Segment];
80     return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset +
81            Reloc.Addend;
82   }
83   case R_WEBASSEMBLY_TYPE_INDEX_LEB:
84     return Reloc.Index;
85   case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
86   case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: {
87     const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index];
88     return Sym.Info.ElementIndex;
89   }
90   default:
91     llvm_unreachable("unknown relocation type");
92   }
93 }
94 
95 // Translate from the relocation's index into the final linked output value.
96 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const {
97   switch (Reloc.Type) {
98   case R_WEBASSEMBLY_TABLE_INDEX_I32:
99   case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
100     return getFunctionSymbol(Reloc.Index)->getTableIndex();
101   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
102   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
103   case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
104     if (auto *Sym = dyn_cast<DefinedData>(getDataSymbol(Reloc.Index)))
105       return Sym->getVirtualAddress() + Reloc.Addend;
106     return 0;
107   case R_WEBASSEMBLY_TYPE_INDEX_LEB:
108     return TypeMap[Reloc.Index];
109   case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
110     return getFunctionSymbol(Reloc.Index)->getFunctionIndex();
111   case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
112     return getGlobalSymbol(Reloc.Index)->getGlobalIndex();
113   default:
114     llvm_unreachable("unknown relocation type");
115   }
116 }
117 
118 void ObjFile::parse() {
119   // Parse a memory buffer as a wasm file.
120   DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
121   std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this));
122 
123   auto *Obj = dyn_cast<WasmObjectFile>(Bin.get());
124   if (!Obj)
125     fatal(toString(this) + ": not a wasm file");
126   if (!Obj->isRelocatableObject())
127     fatal(toString(this) + ": not a relocatable wasm file");
128 
129   Bin.release();
130   WasmObj.reset(Obj);
131 
132   // Build up a map of function indices to table indices for use when
133   // verifying the existing table index relocations
134   uint32_t TotalFunctions =
135       WasmObj->getNumImportedFunctions() + WasmObj->functions().size();
136   TableEntries.resize(TotalFunctions);
137   for (const WasmElemSegment &Seg : WasmObj->elements()) {
138     if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST)
139       fatal(toString(this) + ": invalid table elements");
140     uint32_t Offset = Seg.Offset.Value.Int32;
141     for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) {
142 
143       uint32_t FunctionIndex = Seg.Functions[Index];
144       TableEntries[FunctionIndex] = Offset + Index;
145     }
146   }
147 
148   // Find the code and data sections.  Wasm objects can have at most one code
149   // and one data section.
150   for (const SectionRef &Sec : WasmObj->sections()) {
151     const WasmSection &Section = WasmObj->getWasmSection(Sec);
152     if (Section.Type == WASM_SEC_CODE)
153       CodeSection = &Section;
154     else if (Section.Type == WASM_SEC_DATA)
155       DataSection = &Section;
156   }
157 
158   TypeMap.resize(getWasmObj()->types().size());
159   TypeIsUsed.resize(getWasmObj()->types().size(), false);
160 
161   ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats;
162   UsedComdats.resize(Comdats.size());
163   for (unsigned I = 0; I < Comdats.size(); ++I)
164     UsedComdats[I] = Symtab->addComdat(Comdats[I]);
165 
166   // Populate `Segments`.
167   for (const WasmSegment &S : WasmObj->dataSegments()) {
168     InputSegment *Seg = make<InputSegment>(S, this);
169     Seg->copyRelocations(*DataSection);
170     Segments.emplace_back(Seg);
171   }
172 
173   // Populate `Functions`.
174   ArrayRef<WasmFunction> Funcs = WasmObj->functions();
175   ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes();
176   ArrayRef<WasmSignature> Types = WasmObj->types();
177   Functions.reserve(Funcs.size());
178 
179   for (size_t I = 0, E = Funcs.size(); I != E; ++I) {
180     InputFunction *F =
181         make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this);
182     F->copyRelocations(*CodeSection);
183     Functions.emplace_back(F);
184   }
185 
186   // Populate `Globals`.
187   for (const WasmGlobal &G : WasmObj->globals())
188     Globals.emplace_back(make<InputGlobal>(G));
189 
190   // Populate `Symbols` based on the WasmSymbols in the object.
191   Symbols.reserve(WasmObj->getNumberOfSymbols());
192   for (const SymbolRef &Sym : WasmObj->symbols()) {
193     const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl());
194     if (Symbol *Sym = createDefined(WasmSym))
195       Symbols.push_back(Sym);
196     else
197       Symbols.push_back(createUndefined(WasmSym));
198   }
199 }
200 
201 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const {
202   uint32_t C = Chunk->getComdat();
203   if (C == UINT32_MAX)
204     return false;
205   return !UsedComdats[C];
206 }
207 
208 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const {
209   return cast<FunctionSymbol>(Symbols[Index]);
210 }
211 
212 GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const {
213   return cast<GlobalSymbol>(Symbols[Index]);
214 }
215 
216 DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const {
217   return cast<DataSymbol>(Symbols[Index]);
218 }
219 
220 Symbol *ObjFile::createDefined(const WasmSymbol &Sym) {
221   if (!Sym.isDefined())
222     return nullptr;
223 
224   StringRef Name = Sym.Info.Name;
225   uint32_t Flags = Sym.Info.Flags;
226 
227   switch (Sym.Info.Kind) {
228   case WASM_SYMBOL_TYPE_FUNCTION: {
229     InputFunction *Func =
230         Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()];
231     if (isExcludedByComdat(Func)) {
232       Func->Live = false;
233       return nullptr;
234     }
235 
236     if (Sym.isBindingLocal())
237       return make<DefinedFunction>(Name, Flags, this, Func);
238     return Symtab->addDefinedFunction(Name, Flags, this, Func);
239   }
240   case WASM_SYMBOL_TYPE_DATA: {
241     InputSegment *Seg = Segments[Sym.Info.DataRef.Segment];
242     if (isExcludedByComdat(Seg)) {
243       Seg->Live = false;
244       return nullptr;
245     }
246 
247     uint32_t Offset = Sym.Info.DataRef.Offset;
248     uint32_t Size = Sym.Info.DataRef.Size;
249 
250     if (Sym.isBindingLocal())
251       return make<DefinedData>(Name, Flags, this, Seg, Offset, Size);
252     return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size);
253   }
254   case WASM_SYMBOL_TYPE_GLOBAL:
255     InputGlobal *Global =
256         Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()];
257     if (Sym.isBindingLocal())
258       return make<DefinedGlobal>(Name, Flags, this, Global);
259     return Symtab->addDefinedGlobal(Name, Flags, this, Global);
260   }
261   llvm_unreachable("unkown symbol kind");
262 }
263 
264 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) {
265   StringRef Name = Sym.Info.Name;
266   uint32_t Flags = Sym.Info.Flags;
267 
268   switch (Sym.Info.Kind) {
269   case WASM_SYMBOL_TYPE_FUNCTION:
270     return Symtab->addUndefinedFunction(Name, Flags, this, Sym.FunctionType);
271   case WASM_SYMBOL_TYPE_DATA:
272     return Symtab->addUndefinedData(Name, Flags, this);
273   case WASM_SYMBOL_TYPE_GLOBAL:
274     return Symtab->addUndefinedGlobal(Name, Flags, this, Sym.GlobalType);
275   }
276   llvm_unreachable("unkown symbol kind");
277 }
278 
279 void ArchiveFile::parse() {
280   // Parse a MemoryBufferRef as an archive file.
281   DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
282   File = CHECK(Archive::create(MB), toString(this));
283 
284   // Read the symbol table to construct Lazy symbols.
285   int Count = 0;
286   for (const Archive::Symbol &Sym : File->symbols()) {
287     Symtab->addLazy(this, &Sym);
288     ++Count;
289   }
290   DEBUG(dbgs() << "Read " << Count << " symbols\n");
291 }
292 
293 void ArchiveFile::addMember(const Archive::Symbol *Sym) {
294   const Archive::Child &C =
295       CHECK(Sym->getMember(),
296             "could not get the member for symbol " + Sym->getName());
297 
298   // Don't try to load the same member twice (this can happen when members
299   // mutually reference each other).
300   if (!Seen.insert(C.getChildOffset()).second)
301     return;
302 
303   DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n");
304   DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
305 
306   MemoryBufferRef MB =
307       CHECK(C.getMemoryBufferRef(),
308             "could not get the buffer for the member defining symbol " +
309                 Sym->getName());
310 
311   if (identify_magic(MB.getBuffer()) != file_magic::wasm_object) {
312     error("unknown file type: " + MB.getBufferIdentifier());
313     return;
314   }
315 
316   InputFile *Obj = make<ObjFile>(MB);
317   Obj->ParentName = ParentName;
318   Symtab->addFile(Obj);
319 }
320 
321 // Returns a string in the format of "foo.o" or "foo.a(bar.o)".
322 std::string lld::toString(const wasm::InputFile *File) {
323   if (!File)
324     return "<internal>";
325 
326   if (File->ParentName.empty())
327     return File->getName();
328 
329   return (File->ParentName + "(" + File->getName() + ")").str();
330 }
331