1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "InputFiles.h"
11 #include "Config.h"
12 #include "InputChunks.h"
13 #include "InputGlobal.h"
14 #include "SymbolTable.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "llvm/Object/Binary.h"
18 #include "llvm/Object/Wasm.h"
19 #include "llvm/Support/LEB128.h"
20 #include "llvm/Support/raw_ostream.h"
21 
22 #define DEBUG_TYPE "lld"
23 
24 using namespace lld;
25 using namespace lld::wasm;
26 
27 using namespace llvm;
28 using namespace llvm::object;
29 using namespace llvm::wasm;
30 
31 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) {
32   log("Loading: " + Path);
33 
34   auto MBOrErr = MemoryBuffer::getFile(Path);
35   if (auto EC = MBOrErr.getError()) {
36     error("cannot open " + Path + ": " + EC.message());
37     return None;
38   }
39   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
40   MemoryBufferRef MBRef = MB->getMemBufferRef();
41   make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership
42 
43   return MBRef;
44 }
45 
46 static size_t getFunctionCodeOffset(ArrayRef<uint8_t> FunctionBody) {
47   unsigned Count;
48   llvm::decodeULEB128(FunctionBody.data(), &Count);
49   return Count;
50 }
51 
52 void ObjFile::dumpInfo() const {
53   log("info for: " + getName() +
54       "\n              Symbols : " + Twine(Symbols.size()) +
55       "\n     Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) +
56       "\n       Global Imports : " + Twine(WasmObj->getNumImportedGlobals()));
57 }
58 
59 // Relocations contain either symbol or type indices.  This function takes a
60 // relocation and returns relocated index (i.e. translates from the input
61 // sybmol/type space to the output symbol/type space).
62 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const {
63   if (Reloc.Type == R_WEBASSEMBLY_TYPE_INDEX_LEB) {
64     assert(TypeIsUsed[Reloc.Index]);
65     return TypeMap[Reloc.Index];
66   }
67   return Symbols[Reloc.Index]->getOutputSymbolIndex();
68 }
69 
70 // Relocations can contain addend for combined sections. This function takes a
71 // relocation and returns updated addend by offset in the output section.
72 uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const {
73   switch (Reloc.Type) {
74   case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
75   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
76   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
77   case R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
78     return Reloc.Addend;
79   case R_WEBASSEMBLY_SECTION_OFFSET_I32:
80     return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend;
81   default:
82     llvm_unreachable("unexpected relocation type");
83   }
84 }
85 
86 // Calculate the value we expect to find at the relocation location.
87 // This is used as a sanity check before applying a relocation to a given
88 // location.  It is useful for catching bugs in the compiler and linker.
89 uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const {
90   switch (Reloc.Type) {
91   case R_WEBASSEMBLY_TABLE_INDEX_I32:
92   case R_WEBASSEMBLY_TABLE_INDEX_SLEB: {
93     const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index];
94     return TableEntries[Sym.Info.ElementIndex];
95   }
96   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
97   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
98   case R_WEBASSEMBLY_MEMORY_ADDR_LEB: {
99     const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index];
100     if (Sym.isUndefined())
101       return 0;
102     const WasmSegment& Segment = WasmObj->dataSegments()[Sym.Info.DataRef.Segment];
103     return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset +
104            Reloc.Addend;
105   }
106   case R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
107     if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) {
108       size_t FunctionCodeOffset =
109           getFunctionCodeOffset(Sym->Function->getFunctionBody());
110       return Sym->Function->getFunctionInputOffset() + FunctionCodeOffset +
111              Reloc.Addend;
112     }
113     return 0;
114   case R_WEBASSEMBLY_SECTION_OFFSET_I32:
115     return Reloc.Addend;
116   case R_WEBASSEMBLY_TYPE_INDEX_LEB:
117     return Reloc.Index;
118   case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
119   case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: {
120     const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index];
121     return Sym.Info.ElementIndex;
122   }
123   default:
124     llvm_unreachable("unknown relocation type");
125   }
126 }
127 
128 // Translate from the relocation's index into the final linked output value.
129 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const {
130   switch (Reloc.Type) {
131   case R_WEBASSEMBLY_TABLE_INDEX_I32:
132   case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
133     return getFunctionSymbol(Reloc.Index)->getTableIndex();
134   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
135   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
136   case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
137     if (auto *Sym = dyn_cast<DefinedData>(getDataSymbol(Reloc.Index)))
138       return Sym->getVirtualAddress() + Reloc.Addend;
139     return 0;
140   case R_WEBASSEMBLY_TYPE_INDEX_LEB:
141     return TypeMap[Reloc.Index];
142   case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
143     return getFunctionSymbol(Reloc.Index)->getFunctionIndex();
144   case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
145     return getGlobalSymbol(Reloc.Index)->getGlobalIndex();
146   case R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
147     if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) {
148       size_t FunctionCodeOffset =
149           getFunctionCodeOffset(Sym->Function->getFunctionBody());
150       return Sym->Function->OutputOffset + FunctionCodeOffset + Reloc.Addend;
151     }
152     return 0;
153   case R_WEBASSEMBLY_SECTION_OFFSET_I32:
154     return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend;
155   default:
156     llvm_unreachable("unknown relocation type");
157   }
158 }
159 
160 void ObjFile::parse() {
161   // Parse a memory buffer as a wasm file.
162   DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
163   std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this));
164 
165   auto *Obj = dyn_cast<WasmObjectFile>(Bin.get());
166   if (!Obj)
167     fatal(toString(this) + ": not a wasm file");
168   if (!Obj->isRelocatableObject())
169     fatal(toString(this) + ": not a relocatable wasm file");
170 
171   Bin.release();
172   WasmObj.reset(Obj);
173 
174   // Build up a map of function indices to table indices for use when
175   // verifying the existing table index relocations
176   uint32_t TotalFunctions =
177       WasmObj->getNumImportedFunctions() + WasmObj->functions().size();
178   TableEntries.resize(TotalFunctions);
179   for (const WasmElemSegment &Seg : WasmObj->elements()) {
180     if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST)
181       fatal(toString(this) + ": invalid table elements");
182     uint32_t Offset = Seg.Offset.Value.Int32;
183     for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) {
184 
185       uint32_t FunctionIndex = Seg.Functions[Index];
186       TableEntries[FunctionIndex] = Offset + Index;
187     }
188   }
189 
190   // Find the code and data sections.  Wasm objects can have at most one code
191   // and one data section.
192   uint32_t SectionIndex = 0;
193   for (const SectionRef &Sec : WasmObj->sections()) {
194     const WasmSection &Section = WasmObj->getWasmSection(Sec);
195     if (Section.Type == WASM_SEC_CODE) {
196       CodeSection = &Section;
197     } else if (Section.Type == WASM_SEC_DATA) {
198       DataSection = &Section;
199     } else if (Section.Type == WASM_SEC_CUSTOM) {
200       CustomSections.emplace_back(make<InputSection>(Section, this));
201       CustomSections.back()->copyRelocations(Section);
202       CustomSectionsByIndex[SectionIndex] = CustomSections.back();
203     }
204     SectionIndex++;
205   }
206 
207   TypeMap.resize(getWasmObj()->types().size());
208   TypeIsUsed.resize(getWasmObj()->types().size(), false);
209 
210   ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats;
211   UsedComdats.resize(Comdats.size());
212   for (unsigned I = 0; I < Comdats.size(); ++I)
213     UsedComdats[I] = Symtab->addComdat(Comdats[I]);
214 
215   // Populate `Segments`.
216   for (const WasmSegment &S : WasmObj->dataSegments()) {
217     InputSegment *Seg = make<InputSegment>(S, this);
218     Seg->copyRelocations(*DataSection);
219     Segments.emplace_back(Seg);
220   }
221 
222   // Populate `Functions`.
223   ArrayRef<WasmFunction> Funcs = WasmObj->functions();
224   ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes();
225   ArrayRef<WasmSignature> Types = WasmObj->types();
226   Functions.reserve(Funcs.size());
227 
228   for (size_t I = 0, E = Funcs.size(); I != E; ++I) {
229     InputFunction *F =
230         make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this);
231     F->copyRelocations(*CodeSection);
232     Functions.emplace_back(F);
233   }
234 
235   // Populate `Globals`.
236   for (const WasmGlobal &G : WasmObj->globals())
237     Globals.emplace_back(make<InputGlobal>(G, this));
238 
239   // Populate `Symbols` based on the WasmSymbols in the object.
240   Symbols.reserve(WasmObj->getNumberOfSymbols());
241   for (const SymbolRef &Sym : WasmObj->symbols()) {
242     const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl());
243     if (Symbol *Sym = createDefined(WasmSym))
244       Symbols.push_back(Sym);
245     else
246       Symbols.push_back(createUndefined(WasmSym));
247   }
248 }
249 
250 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const {
251   uint32_t C = Chunk->getComdat();
252   if (C == UINT32_MAX)
253     return false;
254   return !UsedComdats[C];
255 }
256 
257 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const {
258   return cast<FunctionSymbol>(Symbols[Index]);
259 }
260 
261 GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const {
262   return cast<GlobalSymbol>(Symbols[Index]);
263 }
264 
265 SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const {
266   return cast<SectionSymbol>(Symbols[Index]);
267 }
268 
269 DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const {
270   return cast<DataSymbol>(Symbols[Index]);
271 }
272 
273 Symbol *ObjFile::createDefined(const WasmSymbol &Sym) {
274   if (!Sym.isDefined())
275     return nullptr;
276 
277   StringRef Name = Sym.Info.Name;
278   uint32_t Flags = Sym.Info.Flags;
279 
280   switch (Sym.Info.Kind) {
281   case WASM_SYMBOL_TYPE_FUNCTION: {
282     InputFunction *Func =
283         Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()];
284     if (isExcludedByComdat(Func)) {
285       Func->Live = false;
286       return nullptr;
287     }
288 
289     if (Sym.isBindingLocal())
290       return make<DefinedFunction>(Name, Flags, this, Func);
291     return Symtab->addDefinedFunction(Name, Flags, this, Func);
292   }
293   case WASM_SYMBOL_TYPE_DATA: {
294     InputSegment *Seg = Segments[Sym.Info.DataRef.Segment];
295     if (isExcludedByComdat(Seg)) {
296       Seg->Live = false;
297       return nullptr;
298     }
299 
300     uint32_t Offset = Sym.Info.DataRef.Offset;
301     uint32_t Size = Sym.Info.DataRef.Size;
302 
303     if (Sym.isBindingLocal())
304       return make<DefinedData>(Name, Flags, this, Seg, Offset, Size);
305     return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size);
306   }
307   case WASM_SYMBOL_TYPE_GLOBAL: {
308     InputGlobal *Global =
309         Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()];
310     if (Sym.isBindingLocal())
311       return make<DefinedGlobal>(Name, Flags, this, Global);
312     return Symtab->addDefinedGlobal(Name, Flags, this, Global);
313   }
314   case WASM_SYMBOL_TYPE_SECTION: {
315     InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex];
316     assert(Sym.isBindingLocal());
317     return make<SectionSymbol>(Name, Flags, Section, this);
318   }
319   }
320   llvm_unreachable("unknown symbol kind");
321 }
322 
323 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) {
324   StringRef Name = Sym.Info.Name;
325   uint32_t Flags = Sym.Info.Flags;
326 
327   switch (Sym.Info.Kind) {
328   case WASM_SYMBOL_TYPE_FUNCTION:
329     return Symtab->addUndefinedFunction(Name, Flags, this, Sym.FunctionType);
330   case WASM_SYMBOL_TYPE_DATA:
331     return Symtab->addUndefinedData(Name, Flags, this);
332   case WASM_SYMBOL_TYPE_GLOBAL:
333     return Symtab->addUndefinedGlobal(Name, Flags, this, Sym.GlobalType);
334   case WASM_SYMBOL_TYPE_SECTION:
335     llvm_unreachable("section symbols cannot be undefined");
336   }
337   llvm_unreachable("unknown symbol kind");
338 }
339 
340 void ArchiveFile::parse() {
341   // Parse a MemoryBufferRef as an archive file.
342   DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
343   File = CHECK(Archive::create(MB), toString(this));
344 
345   // Read the symbol table to construct Lazy symbols.
346   int Count = 0;
347   for (const Archive::Symbol &Sym : File->symbols()) {
348     Symtab->addLazy(this, &Sym);
349     ++Count;
350   }
351   DEBUG(dbgs() << "Read " << Count << " symbols\n");
352 }
353 
354 void ArchiveFile::addMember(const Archive::Symbol *Sym) {
355   const Archive::Child &C =
356       CHECK(Sym->getMember(),
357             "could not get the member for symbol " + Sym->getName());
358 
359   // Don't try to load the same member twice (this can happen when members
360   // mutually reference each other).
361   if (!Seen.insert(C.getChildOffset()).second)
362     return;
363 
364   DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n");
365   DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
366 
367   MemoryBufferRef MB =
368       CHECK(C.getMemoryBufferRef(),
369             "could not get the buffer for the member defining symbol " +
370                 Sym->getName());
371 
372   if (identify_magic(MB.getBuffer()) != file_magic::wasm_object) {
373     error("unknown file type: " + MB.getBufferIdentifier());
374     return;
375   }
376 
377   InputFile *Obj = make<ObjFile>(MB);
378   Obj->ParentName = ParentName;
379   Symtab->addFile(Obj);
380 }
381 
382 // Returns a string in the format of "foo.o" or "foo.a(bar.o)".
383 std::string lld::toString(const wasm::InputFile *File) {
384   if (!File)
385     return "<internal>";
386 
387   if (File->ParentName.empty())
388     return File->getName();
389 
390   return (File->ParentName + "(" + File->getName() + ")").str();
391 }
392