1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "InputFiles.h"
11 #include "Config.h"
12 #include "InputChunks.h"
13 #include "InputGlobal.h"
14 #include "SymbolTable.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "llvm/Object/Binary.h"
18 #include "llvm/Object/Wasm.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "lld"
22 
23 using namespace lld;
24 using namespace lld::wasm;
25 
26 using namespace llvm;
27 using namespace llvm::object;
28 using namespace llvm::wasm;
29 
30 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) {
31   log("Loading: " + Path);
32 
33   auto MBOrErr = MemoryBuffer::getFile(Path);
34   if (auto EC = MBOrErr.getError()) {
35     error("cannot open " + Path + ": " + EC.message());
36     return None;
37   }
38   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
39   MemoryBufferRef MBRef = MB->getMemBufferRef();
40   make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership
41 
42   return MBRef;
43 }
44 
45 void ObjFile::dumpInfo() const {
46   log("info for: " + getName() +
47       "\n              Symbols : " + Twine(Symbols.size()) +
48       "\n     Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) +
49       "\n       Global Imports : " + Twine(WasmObj->getNumImportedGlobals()));
50 }
51 
52 // Relocations contain either symbol or type indices.  This function takes a
53 // relocation and returns relocated index (i.e. translates from the input
54 // sybmol/type space to the output symbol/type space).
55 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const {
56   if (Reloc.Type == R_WEBASSEMBLY_TYPE_INDEX_LEB) {
57     assert(TypeIsUsed[Reloc.Index]);
58     return TypeMap[Reloc.Index];
59   }
60   return Symbols[Reloc.Index]->getOutputSymbolIndex();
61 }
62 
63 // Relocations can contain addend for combined sections. This function takes a
64 // relocation and returns updated addend by offset in the output section.
65 uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const {
66   switch (Reloc.Type) {
67   case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
68   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
69   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
70   case R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
71     return Reloc.Addend;
72   case R_WEBASSEMBLY_SECTION_OFFSET_I32:
73     return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend;
74   default:
75     llvm_unreachable("unexpected relocation type");
76   }
77 }
78 
79 // Calculate the value we expect to find at the relocation location.
80 // This is used as a sanity check before applying a relocation to a given
81 // location.  It is useful for catching bugs in the compiler and linker.
82 uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const {
83   switch (Reloc.Type) {
84   case R_WEBASSEMBLY_TABLE_INDEX_I32:
85   case R_WEBASSEMBLY_TABLE_INDEX_SLEB: {
86     const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index];
87     return TableEntries[Sym.Info.ElementIndex];
88   }
89   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
90   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
91   case R_WEBASSEMBLY_MEMORY_ADDR_LEB: {
92     const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index];
93     if (Sym.isUndefined())
94       return 0;
95     const WasmSegment& Segment = WasmObj->dataSegments()[Sym.Info.DataRef.Segment];
96     return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset +
97            Reloc.Addend;
98   }
99   case R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
100     if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) {
101       return Sym->Function->getFunctionInputOffset() +
102              Sym->Function->getFunctionCodeOffset() + Reloc.Addend;
103     }
104     return 0;
105   case R_WEBASSEMBLY_SECTION_OFFSET_I32:
106     return Reloc.Addend;
107   case R_WEBASSEMBLY_TYPE_INDEX_LEB:
108     return Reloc.Index;
109   case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
110   case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: {
111     const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index];
112     return Sym.Info.ElementIndex;
113   }
114   default:
115     llvm_unreachable("unknown relocation type");
116   }
117 }
118 
119 // Translate from the relocation's index into the final linked output value.
120 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const {
121   switch (Reloc.Type) {
122   case R_WEBASSEMBLY_TABLE_INDEX_I32:
123   case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
124     return getFunctionSymbol(Reloc.Index)->getTableIndex();
125   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
126   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
127   case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
128     if (auto *Sym = dyn_cast<DefinedData>(getDataSymbol(Reloc.Index)))
129       if (Sym->isLive())
130         return Sym->getVirtualAddress() + Reloc.Addend;
131     return 0;
132   case R_WEBASSEMBLY_TYPE_INDEX_LEB:
133     return TypeMap[Reloc.Index];
134   case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
135     return getFunctionSymbol(Reloc.Index)->getFunctionIndex();
136   case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
137     return getGlobalSymbol(Reloc.Index)->getGlobalIndex();
138   case R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
139     if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) {
140       return Sym->Function->OutputOffset +
141              Sym->Function->getFunctionCodeOffset() + Reloc.Addend;
142     }
143     return 0;
144   case R_WEBASSEMBLY_SECTION_OFFSET_I32:
145     return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend;
146   default:
147     llvm_unreachable("unknown relocation type");
148   }
149 }
150 
151 void ObjFile::parse() {
152   // Parse a memory buffer as a wasm file.
153   LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
154   std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this));
155 
156   auto *Obj = dyn_cast<WasmObjectFile>(Bin.get());
157   if (!Obj)
158     fatal(toString(this) + ": not a wasm file");
159   if (!Obj->isRelocatableObject())
160     fatal(toString(this) + ": not a relocatable wasm file");
161 
162   Bin.release();
163   WasmObj.reset(Obj);
164 
165   // Build up a map of function indices to table indices for use when
166   // verifying the existing table index relocations
167   uint32_t TotalFunctions =
168       WasmObj->getNumImportedFunctions() + WasmObj->functions().size();
169   TableEntries.resize(TotalFunctions);
170   for (const WasmElemSegment &Seg : WasmObj->elements()) {
171     if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST)
172       fatal(toString(this) + ": invalid table elements");
173     uint32_t Offset = Seg.Offset.Value.Int32;
174     for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) {
175 
176       uint32_t FunctionIndex = Seg.Functions[Index];
177       TableEntries[FunctionIndex] = Offset + Index;
178     }
179   }
180 
181   // Find the code and data sections.  Wasm objects can have at most one code
182   // and one data section.
183   uint32_t SectionIndex = 0;
184   for (const SectionRef &Sec : WasmObj->sections()) {
185     const WasmSection &Section = WasmObj->getWasmSection(Sec);
186     if (Section.Type == WASM_SEC_CODE) {
187       CodeSection = &Section;
188     } else if (Section.Type == WASM_SEC_DATA) {
189       DataSection = &Section;
190     } else if (Section.Type == WASM_SEC_CUSTOM) {
191       CustomSections.emplace_back(make<InputSection>(Section, this));
192       CustomSections.back()->copyRelocations(Section);
193       CustomSectionsByIndex[SectionIndex] = CustomSections.back();
194     }
195     SectionIndex++;
196   }
197 
198   TypeMap.resize(getWasmObj()->types().size());
199   TypeIsUsed.resize(getWasmObj()->types().size(), false);
200 
201   ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats;
202   UsedComdats.resize(Comdats.size());
203   for (unsigned I = 0; I < Comdats.size(); ++I)
204     UsedComdats[I] = Symtab->addComdat(Comdats[I]);
205 
206   // Populate `Segments`.
207   for (const WasmSegment &S : WasmObj->dataSegments()) {
208     InputSegment *Seg = make<InputSegment>(S, this);
209     Seg->copyRelocations(*DataSection);
210     Segments.emplace_back(Seg);
211   }
212 
213   // Populate `Functions`.
214   ArrayRef<WasmFunction> Funcs = WasmObj->functions();
215   ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes();
216   ArrayRef<WasmSignature> Types = WasmObj->types();
217   Functions.reserve(Funcs.size());
218 
219   for (size_t I = 0, E = Funcs.size(); I != E; ++I) {
220     InputFunction *F =
221         make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this);
222     F->copyRelocations(*CodeSection);
223     Functions.emplace_back(F);
224   }
225 
226   // Populate `Globals`.
227   for (const WasmGlobal &G : WasmObj->globals())
228     Globals.emplace_back(make<InputGlobal>(G, this));
229 
230   // Populate `Symbols` based on the WasmSymbols in the object.
231   Symbols.reserve(WasmObj->getNumberOfSymbols());
232   for (const SymbolRef &Sym : WasmObj->symbols()) {
233     const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl());
234     if (Symbol *Sym = createDefined(WasmSym))
235       Symbols.push_back(Sym);
236     else
237       Symbols.push_back(createUndefined(WasmSym));
238   }
239 }
240 
241 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const {
242   uint32_t C = Chunk->getComdat();
243   if (C == UINT32_MAX)
244     return false;
245   return !UsedComdats[C];
246 }
247 
248 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const {
249   return cast<FunctionSymbol>(Symbols[Index]);
250 }
251 
252 GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const {
253   return cast<GlobalSymbol>(Symbols[Index]);
254 }
255 
256 SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const {
257   return cast<SectionSymbol>(Symbols[Index]);
258 }
259 
260 DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const {
261   return cast<DataSymbol>(Symbols[Index]);
262 }
263 
264 Symbol *ObjFile::createDefined(const WasmSymbol &Sym) {
265   if (!Sym.isDefined())
266     return nullptr;
267 
268   StringRef Name = Sym.Info.Name;
269   uint32_t Flags = Sym.Info.Flags;
270 
271   switch (Sym.Info.Kind) {
272   case WASM_SYMBOL_TYPE_FUNCTION: {
273     InputFunction *Func =
274         Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()];
275     if (isExcludedByComdat(Func)) {
276       Func->Live = false;
277       return nullptr;
278     }
279 
280     if (Sym.isBindingLocal())
281       return make<DefinedFunction>(Name, Flags, this, Func);
282     return Symtab->addDefinedFunction(Name, Flags, this, Func);
283   }
284   case WASM_SYMBOL_TYPE_DATA: {
285     InputSegment *Seg = Segments[Sym.Info.DataRef.Segment];
286     if (isExcludedByComdat(Seg)) {
287       Seg->Live = false;
288       return nullptr;
289     }
290 
291     uint32_t Offset = Sym.Info.DataRef.Offset;
292     uint32_t Size = Sym.Info.DataRef.Size;
293 
294     if (Sym.isBindingLocal())
295       return make<DefinedData>(Name, Flags, this, Seg, Offset, Size);
296     return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size);
297   }
298   case WASM_SYMBOL_TYPE_GLOBAL: {
299     InputGlobal *Global =
300         Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()];
301     if (Sym.isBindingLocal())
302       return make<DefinedGlobal>(Name, Flags, this, Global);
303     return Symtab->addDefinedGlobal(Name, Flags, this, Global);
304   }
305   case WASM_SYMBOL_TYPE_SECTION: {
306     InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex];
307     assert(Sym.isBindingLocal());
308     return make<SectionSymbol>(Name, Flags, Section, this);
309   }
310   }
311   llvm_unreachable("unknown symbol kind");
312 }
313 
314 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) {
315   StringRef Name = Sym.Info.Name;
316   uint32_t Flags = Sym.Info.Flags;
317 
318   switch (Sym.Info.Kind) {
319   case WASM_SYMBOL_TYPE_FUNCTION:
320     return Symtab->addUndefinedFunction(Name, Flags, this, Sym.FunctionType);
321   case WASM_SYMBOL_TYPE_DATA:
322     return Symtab->addUndefinedData(Name, Flags, this);
323   case WASM_SYMBOL_TYPE_GLOBAL:
324     return Symtab->addUndefinedGlobal(Name, Flags, this, Sym.GlobalType);
325   case WASM_SYMBOL_TYPE_SECTION:
326     llvm_unreachable("section symbols cannot be undefined");
327   }
328   llvm_unreachable("unknown symbol kind");
329 }
330 
331 void ArchiveFile::parse() {
332   // Parse a MemoryBufferRef as an archive file.
333   LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
334   File = CHECK(Archive::create(MB), toString(this));
335 
336   // Read the symbol table to construct Lazy symbols.
337   int Count = 0;
338   for (const Archive::Symbol &Sym : File->symbols()) {
339     Symtab->addLazy(this, &Sym);
340     ++Count;
341   }
342   LLVM_DEBUG(dbgs() << "Read " << Count << " symbols\n");
343 }
344 
345 void ArchiveFile::addMember(const Archive::Symbol *Sym) {
346   const Archive::Child &C =
347       CHECK(Sym->getMember(),
348             "could not get the member for symbol " + Sym->getName());
349 
350   // Don't try to load the same member twice (this can happen when members
351   // mutually reference each other).
352   if (!Seen.insert(C.getChildOffset()).second)
353     return;
354 
355   LLVM_DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n");
356   LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
357 
358   MemoryBufferRef MB =
359       CHECK(C.getMemoryBufferRef(),
360             "could not get the buffer for the member defining symbol " +
361                 Sym->getName());
362 
363   if (identify_magic(MB.getBuffer()) != file_magic::wasm_object) {
364     error("unknown file type: " + MB.getBufferIdentifier());
365     return;
366   }
367 
368   InputFile *Obj = make<ObjFile>(MB);
369   Obj->ParentName = ParentName;
370   Symtab->addFile(Obj);
371 }
372 
373 // Returns a string in the format of "foo.o" or "foo.a(bar.o)".
374 std::string lld::toString(const wasm::InputFile *File) {
375   if (!File)
376     return "<internal>";
377 
378   if (File->ParentName.empty())
379     return File->getName();
380 
381   return (File->ParentName + "(" + File->getName() + ")").str();
382 }
383