1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "InputFiles.h"
11 #include "Config.h"
12 #include "InputChunks.h"
13 #include "InputGlobal.h"
14 #include "SymbolTable.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "llvm/Object/Binary.h"
18 #include "llvm/Object/Wasm.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "lld"
22 
23 using namespace lld;
24 using namespace lld::wasm;
25 
26 using namespace llvm;
27 using namespace llvm::object;
28 using namespace llvm::wasm;
29 
30 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) {
31   log("Loading: " + Path);
32 
33   auto MBOrErr = MemoryBuffer::getFile(Path);
34   if (auto EC = MBOrErr.getError()) {
35     error("cannot open " + Path + ": " + EC.message());
36     return None;
37   }
38   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
39   MemoryBufferRef MBRef = MB->getMemBufferRef();
40   make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership
41 
42   return MBRef;
43 }
44 
45 InputFile *lld::wasm::createObjectFile(MemoryBufferRef MB) {
46   file_magic Magic = identify_magic(MB.getBuffer());
47   if (Magic == file_magic::wasm_object)
48     return make<ObjFile>(MB);
49 
50   if (Magic == file_magic::bitcode)
51     return make<BitcodeFile>(MB);
52 
53   fatal("unknown file type: " + MB.getBufferIdentifier());
54 }
55 
56 void ObjFile::dumpInfo() const {
57   log("info for: " + getName() +
58       "\n              Symbols : " + Twine(Symbols.size()) +
59       "\n     Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) +
60       "\n       Global Imports : " + Twine(WasmObj->getNumImportedGlobals()));
61 }
62 
63 // Relocations contain either symbol or type indices.  This function takes a
64 // relocation and returns relocated index (i.e. translates from the input
65 // sybmol/type space to the output symbol/type space).
66 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const {
67   if (Reloc.Type == R_WEBASSEMBLY_TYPE_INDEX_LEB) {
68     assert(TypeIsUsed[Reloc.Index]);
69     return TypeMap[Reloc.Index];
70   }
71   return Symbols[Reloc.Index]->getOutputSymbolIndex();
72 }
73 
74 // Relocations can contain addend for combined sections. This function takes a
75 // relocation and returns updated addend by offset in the output section.
76 uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const {
77   switch (Reloc.Type) {
78   case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
79   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
80   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
81   case R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
82     return Reloc.Addend;
83   case R_WEBASSEMBLY_SECTION_OFFSET_I32:
84     return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend;
85   default:
86     llvm_unreachable("unexpected relocation type");
87   }
88 }
89 
90 // Calculate the value we expect to find at the relocation location.
91 // This is used as a sanity check before applying a relocation to a given
92 // location.  It is useful for catching bugs in the compiler and linker.
93 uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const {
94   switch (Reloc.Type) {
95   case R_WEBASSEMBLY_TABLE_INDEX_I32:
96   case R_WEBASSEMBLY_TABLE_INDEX_SLEB: {
97     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
98     return TableEntries[Sym.Info.ElementIndex];
99   }
100   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
101   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
102   case R_WEBASSEMBLY_MEMORY_ADDR_LEB: {
103     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
104     if (Sym.isUndefined())
105       return 0;
106     const WasmSegment &Segment =
107         WasmObj->dataSegments()[Sym.Info.DataRef.Segment];
108     return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset +
109            Reloc.Addend;
110   }
111   case R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
112     if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) {
113       return Sym->Function->getFunctionInputOffset() +
114              Sym->Function->getFunctionCodeOffset() + Reloc.Addend;
115     }
116     return 0;
117   case R_WEBASSEMBLY_SECTION_OFFSET_I32:
118     return Reloc.Addend;
119   case R_WEBASSEMBLY_TYPE_INDEX_LEB:
120     return Reloc.Index;
121   case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
122   case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: {
123     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
124     return Sym.Info.ElementIndex;
125   }
126   default:
127     llvm_unreachable("unknown relocation type");
128   }
129 }
130 
131 // Translate from the relocation's index into the final linked output value.
132 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const {
133   switch (Reloc.Type) {
134   case R_WEBASSEMBLY_TABLE_INDEX_I32:
135   case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
136     return getFunctionSymbol(Reloc.Index)->getTableIndex();
137   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
138   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
139   case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
140     if (auto *Sym = dyn_cast<DefinedData>(getDataSymbol(Reloc.Index)))
141       if (Sym->isLive())
142         return Sym->getVirtualAddress() + Reloc.Addend;
143     return 0;
144   case R_WEBASSEMBLY_TYPE_INDEX_LEB:
145     return TypeMap[Reloc.Index];
146   case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
147     return getFunctionSymbol(Reloc.Index)->getFunctionIndex();
148   case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
149     return getGlobalSymbol(Reloc.Index)->getGlobalIndex();
150   case R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
151     if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) {
152       if (Sym->isLive())
153         return Sym->Function->OutputOffset +
154                Sym->Function->getFunctionCodeOffset() + Reloc.Addend;
155     }
156     return 0;
157   case R_WEBASSEMBLY_SECTION_OFFSET_I32:
158     return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend;
159   default:
160     llvm_unreachable("unknown relocation type");
161   }
162 }
163 
164 template <class T>
165 static void setRelocs(const std::vector<T *> &Chunks,
166                       const WasmSection *Section) {
167   if (!Section)
168     return;
169 
170   ArrayRef<WasmRelocation> Relocs = Section->Relocations;
171   assert(std::is_sorted(Relocs.begin(), Relocs.end(),
172                         [](const WasmRelocation &R1, const WasmRelocation &R2) {
173                           return R1.Offset < R2.Offset;
174                         }));
175   assert(std::is_sorted(
176       Chunks.begin(), Chunks.end(), [](InputChunk *C1, InputChunk *C2) {
177         return C1->getInputSectionOffset() < C2->getInputSectionOffset();
178       }));
179 
180   auto RelocsNext = Relocs.begin();
181   auto RelocsEnd = Relocs.end();
182   auto RelocLess = [](const WasmRelocation &R, uint32_t Val) {
183     return R.Offset < Val;
184   };
185   for (InputChunk *C : Chunks) {
186     auto RelocsStart = std::lower_bound(RelocsNext, RelocsEnd,
187                                         C->getInputSectionOffset(), RelocLess);
188     RelocsNext = std::lower_bound(
189         RelocsStart, RelocsEnd, C->getInputSectionOffset() + C->getInputSize(),
190         RelocLess);
191     C->setRelocations(ArrayRef<WasmRelocation>(RelocsStart, RelocsNext));
192   }
193 }
194 
195 void ObjFile::parse() {
196   // Parse a memory buffer as a wasm file.
197   LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
198   std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this));
199 
200   auto *Obj = dyn_cast<WasmObjectFile>(Bin.get());
201   if (!Obj)
202     fatal(toString(this) + ": not a wasm file");
203   if (!Obj->isRelocatableObject())
204     fatal(toString(this) + ": not a relocatable wasm file");
205 
206   Bin.release();
207   WasmObj.reset(Obj);
208 
209   // Build up a map of function indices to table indices for use when
210   // verifying the existing table index relocations
211   uint32_t TotalFunctions =
212       WasmObj->getNumImportedFunctions() + WasmObj->functions().size();
213   TableEntries.resize(TotalFunctions);
214   for (const WasmElemSegment &Seg : WasmObj->elements()) {
215     if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST)
216       fatal(toString(this) + ": invalid table elements");
217     uint32_t Offset = Seg.Offset.Value.Int32;
218     for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) {
219 
220       uint32_t FunctionIndex = Seg.Functions[Index];
221       TableEntries[FunctionIndex] = Offset + Index;
222     }
223   }
224 
225   // Find the code and data sections.  Wasm objects can have at most one code
226   // and one data section.
227   uint32_t SectionIndex = 0;
228   for (const SectionRef &Sec : WasmObj->sections()) {
229     const WasmSection &Section = WasmObj->getWasmSection(Sec);
230     if (Section.Type == WASM_SEC_CODE) {
231       CodeSection = &Section;
232     } else if (Section.Type == WASM_SEC_DATA) {
233       DataSection = &Section;
234     } else if (Section.Type == WASM_SEC_CUSTOM) {
235       CustomSections.emplace_back(make<InputSection>(Section, this));
236       CustomSections.back()->setRelocations(Section.Relocations);
237       CustomSectionsByIndex[SectionIndex] = CustomSections.back();
238     }
239     SectionIndex++;
240   }
241 
242   TypeMap.resize(getWasmObj()->types().size());
243   TypeIsUsed.resize(getWasmObj()->types().size(), false);
244 
245   ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats;
246   UsedComdats.resize(Comdats.size());
247   for (unsigned I = 0; I < Comdats.size(); ++I)
248     UsedComdats[I] = Symtab->addComdat(Comdats[I]);
249 
250   // Populate `Segments`.
251   for (const WasmSegment &S : WasmObj->dataSegments())
252     Segments.emplace_back(make<InputSegment>(S, this));
253   setRelocs(Segments, DataSection);
254 
255   // Populate `Functions`.
256   ArrayRef<WasmFunction> Funcs = WasmObj->functions();
257   ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes();
258   ArrayRef<WasmSignature> Types = WasmObj->types();
259   Functions.reserve(Funcs.size());
260 
261   for (size_t I = 0, E = Funcs.size(); I != E; ++I)
262     Functions.emplace_back(
263         make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this));
264   setRelocs(Functions, CodeSection);
265 
266   // Populate `Globals`.
267   for (const WasmGlobal &G : WasmObj->globals())
268     Globals.emplace_back(make<InputGlobal>(G, this));
269 
270   // Populate `Symbols` based on the WasmSymbols in the object.
271   Symbols.reserve(WasmObj->getNumberOfSymbols());
272   for (const SymbolRef &Sym : WasmObj->symbols()) {
273     const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl());
274     if (Symbol *Sym = createDefined(WasmSym))
275       Symbols.push_back(Sym);
276     else
277       Symbols.push_back(createUndefined(WasmSym));
278   }
279 }
280 
281 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const {
282   uint32_t C = Chunk->getComdat();
283   if (C == UINT32_MAX)
284     return false;
285   return !UsedComdats[C];
286 }
287 
288 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const {
289   return cast<FunctionSymbol>(Symbols[Index]);
290 }
291 
292 GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const {
293   return cast<GlobalSymbol>(Symbols[Index]);
294 }
295 
296 SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const {
297   return cast<SectionSymbol>(Symbols[Index]);
298 }
299 
300 DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const {
301   return cast<DataSymbol>(Symbols[Index]);
302 }
303 
304 Symbol *ObjFile::createDefined(const WasmSymbol &Sym) {
305   if (!Sym.isDefined())
306     return nullptr;
307 
308   StringRef Name = Sym.Info.Name;
309   uint32_t Flags = Sym.Info.Flags;
310 
311   switch (Sym.Info.Kind) {
312   case WASM_SYMBOL_TYPE_FUNCTION: {
313     InputFunction *Func =
314         Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()];
315     if (isExcludedByComdat(Func)) {
316       Func->Live = false;
317       return nullptr;
318     }
319 
320     if (Sym.isBindingLocal())
321       return make<DefinedFunction>(Name, Flags, this, Func);
322     return Symtab->addDefinedFunction(Name, Flags, this, Func);
323   }
324   case WASM_SYMBOL_TYPE_DATA: {
325     InputSegment *Seg = Segments[Sym.Info.DataRef.Segment];
326     if (isExcludedByComdat(Seg)) {
327       Seg->Live = false;
328       return nullptr;
329     }
330 
331     uint32_t Offset = Sym.Info.DataRef.Offset;
332     uint32_t Size = Sym.Info.DataRef.Size;
333 
334     if (Sym.isBindingLocal())
335       return make<DefinedData>(Name, Flags, this, Seg, Offset, Size);
336     return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size);
337   }
338   case WASM_SYMBOL_TYPE_GLOBAL: {
339     InputGlobal *Global =
340         Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()];
341     if (Sym.isBindingLocal())
342       return make<DefinedGlobal>(Name, Flags, this, Global);
343     return Symtab->addDefinedGlobal(Name, Flags, this, Global);
344   }
345   case WASM_SYMBOL_TYPE_SECTION: {
346     InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex];
347     assert(Sym.isBindingLocal());
348     return make<SectionSymbol>(Name, Flags, Section, this);
349   }
350   }
351   llvm_unreachable("unknown symbol kind");
352 }
353 
354 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) {
355   StringRef Name = Sym.Info.Name;
356   uint32_t Flags = Sym.Info.Flags;
357 
358   switch (Sym.Info.Kind) {
359   case WASM_SYMBOL_TYPE_FUNCTION:
360     return Symtab->addUndefinedFunction(Name, Flags, this, Sym.FunctionType);
361   case WASM_SYMBOL_TYPE_DATA:
362     return Symtab->addUndefinedData(Name, Flags, this);
363   case WASM_SYMBOL_TYPE_GLOBAL:
364     return Symtab->addUndefinedGlobal(Name, Flags, this, Sym.GlobalType);
365   case WASM_SYMBOL_TYPE_SECTION:
366     llvm_unreachable("section symbols cannot be undefined");
367   }
368   llvm_unreachable("unknown symbol kind");
369 }
370 
371 void ArchiveFile::parse() {
372   // Parse a MemoryBufferRef as an archive file.
373   LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
374   File = CHECK(Archive::create(MB), toString(this));
375 
376   // Read the symbol table to construct Lazy symbols.
377   int Count = 0;
378   for (const Archive::Symbol &Sym : File->symbols()) {
379     Symtab->addLazy(this, &Sym);
380     ++Count;
381   }
382   LLVM_DEBUG(dbgs() << "Read " << Count << " symbols\n");
383 }
384 
385 void ArchiveFile::addMember(const Archive::Symbol *Sym) {
386   const Archive::Child &C =
387       CHECK(Sym->getMember(),
388             "could not get the member for symbol " + Sym->getName());
389 
390   // Don't try to load the same member twice (this can happen when members
391   // mutually reference each other).
392   if (!Seen.insert(C.getChildOffset()).second)
393     return;
394 
395   LLVM_DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n");
396   LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
397 
398   MemoryBufferRef MB =
399       CHECK(C.getMemoryBufferRef(),
400             "could not get the buffer for the member defining symbol " +
401                 Sym->getName());
402 
403   InputFile *Obj = createObjectFile(MB);
404   Obj->ArchiveName = getName();
405   Symtab->addFile(Obj);
406 }
407 
408 static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) {
409   switch (GvVisibility) {
410   case GlobalValue::DefaultVisibility:
411     return WASM_SYMBOL_VISIBILITY_DEFAULT;
412   case GlobalValue::HiddenVisibility:
413   case GlobalValue::ProtectedVisibility:
414     return WASM_SYMBOL_VISIBILITY_HIDDEN;
415   }
416   llvm_unreachable("unknown visibility");
417 }
418 
419 static Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &ObjSym,
420                                    BitcodeFile &F) {
421   StringRef Name = Saver.save(ObjSym.getName());
422 
423   uint32_t Flags = ObjSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0;
424   Flags |= mapVisibility(ObjSym.getVisibility());
425 
426   if (ObjSym.isUndefined()) {
427     if (ObjSym.isExecutable())
428       return Symtab->addUndefinedFunction(Name, Flags, &F, nullptr);
429     return Symtab->addUndefinedData(Name, Flags, &F);
430   }
431 
432   if (ObjSym.isExecutable())
433     return Symtab->addDefinedFunction(Name, Flags, &F, nullptr);
434   return Symtab->addDefinedData(Name, Flags, &F, nullptr, 0, 0);
435 }
436 
437 void BitcodeFile::parse() {
438   Obj = check(lto::InputFile::create(MemoryBufferRef(
439       MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier()))));
440   Triple T(Obj->getTargetTriple());
441   if (T.getArch() != Triple::wasm32) {
442     error(toString(MB.getBufferIdentifier()) + ": machine type must be wasm32");
443     return;
444   }
445 
446   for (const lto::InputFile::Symbol &ObjSym : Obj->symbols())
447     Symbols.push_back(createBitcodeSymbol(ObjSym, *this));
448 }
449 
450 // Returns a string in the format of "foo.o" or "foo.a(bar.o)".
451 std::string lld::toString(const wasm::InputFile *File) {
452   if (!File)
453     return "<internal>";
454 
455   if (File->ArchiveName.empty())
456     return File->getName();
457 
458   return (File->ArchiveName + "(" + File->getName() + ")").str();
459 }
460