1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "InputFiles.h"
11 #include "Config.h"
12 #include "InputChunks.h"
13 #include "InputEvent.h"
14 #include "InputGlobal.h"
15 #include "SymbolTable.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm/Object/Binary.h"
19 #include "llvm/Object/Wasm.h"
20 #include "llvm/Support/raw_ostream.h"
21 
22 #define DEBUG_TYPE "lld"
23 
24 using namespace lld;
25 using namespace lld::wasm;
26 
27 using namespace llvm;
28 using namespace llvm::object;
29 using namespace llvm::wasm;
30 
31 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) {
32   log("Loading: " + Path);
33 
34   auto MBOrErr = MemoryBuffer::getFile(Path);
35   if (auto EC = MBOrErr.getError()) {
36     error("cannot open " + Path + ": " + EC.message());
37     return None;
38   }
39   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
40   MemoryBufferRef MBRef = MB->getMemBufferRef();
41   make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership
42 
43   return MBRef;
44 }
45 
46 InputFile *lld::wasm::createObjectFile(MemoryBufferRef MB) {
47   file_magic Magic = identify_magic(MB.getBuffer());
48   if (Magic == file_magic::wasm_object)
49     return make<ObjFile>(MB);
50 
51   if (Magic == file_magic::bitcode)
52     return make<BitcodeFile>(MB);
53 
54   fatal("unknown file type: " + MB.getBufferIdentifier());
55 }
56 
57 void ObjFile::dumpInfo() const {
58   log("info for: " + getName() +
59       "\n              Symbols : " + Twine(Symbols.size()) +
60       "\n     Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) +
61       "\n       Global Imports : " + Twine(WasmObj->getNumImportedGlobals()) +
62       "\n        Event Imports : " + Twine(WasmObj->getNumImportedEvents()));
63 }
64 
65 // Relocations contain either symbol or type indices.  This function takes a
66 // relocation and returns relocated index (i.e. translates from the input
67 // symbol/type space to the output symbol/type space).
68 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const {
69   if (Reloc.Type == R_WEBASSEMBLY_TYPE_INDEX_LEB) {
70     assert(TypeIsUsed[Reloc.Index]);
71     return TypeMap[Reloc.Index];
72   }
73   return Symbols[Reloc.Index]->getOutputSymbolIndex();
74 }
75 
76 // Relocations can contain addend for combined sections. This function takes a
77 // relocation and returns updated addend by offset in the output section.
78 uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const {
79   switch (Reloc.Type) {
80   case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
81   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
82   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
83   case R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
84     return Reloc.Addend;
85   case R_WEBASSEMBLY_SECTION_OFFSET_I32:
86     return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend;
87   default:
88     llvm_unreachable("unexpected relocation type");
89   }
90 }
91 
92 // Calculate the value we expect to find at the relocation location.
93 // This is used as a sanity check before applying a relocation to a given
94 // location.  It is useful for catching bugs in the compiler and linker.
95 uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const {
96   switch (Reloc.Type) {
97   case R_WEBASSEMBLY_TABLE_INDEX_I32:
98   case R_WEBASSEMBLY_TABLE_INDEX_SLEB: {
99     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
100     return TableEntries[Sym.Info.ElementIndex];
101   }
102   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
103   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
104   case R_WEBASSEMBLY_MEMORY_ADDR_LEB: {
105     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
106     if (Sym.isUndefined())
107       return 0;
108     const WasmSegment &Segment =
109         WasmObj->dataSegments()[Sym.Info.DataRef.Segment];
110     return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset +
111            Reloc.Addend;
112   }
113   case R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
114     if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) {
115       return Sym->Function->getFunctionInputOffset() +
116              Sym->Function->getFunctionCodeOffset() + Reloc.Addend;
117     }
118     return 0;
119   case R_WEBASSEMBLY_SECTION_OFFSET_I32:
120     return Reloc.Addend;
121   case R_WEBASSEMBLY_TYPE_INDEX_LEB:
122     return Reloc.Index;
123   case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
124   case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
125   case R_WEBASSEMBLY_EVENT_INDEX_LEB: {
126     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
127     return Sym.Info.ElementIndex;
128   }
129   default:
130     llvm_unreachable("unknown relocation type");
131   }
132 }
133 
134 // Translate from the relocation's index into the final linked output value.
135 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const {
136   switch (Reloc.Type) {
137   case R_WEBASSEMBLY_TABLE_INDEX_I32:
138   case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
139     return getFunctionSymbol(Reloc.Index)->getTableIndex();
140   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
141   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
142   case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
143     if (auto *Sym = dyn_cast<DefinedData>(getDataSymbol(Reloc.Index)))
144       if (Sym->isLive())
145         return Sym->getVirtualAddress() + Reloc.Addend;
146     return 0;
147   case R_WEBASSEMBLY_TYPE_INDEX_LEB:
148     return TypeMap[Reloc.Index];
149   case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
150     return getFunctionSymbol(Reloc.Index)->getFunctionIndex();
151   case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
152     return getGlobalSymbol(Reloc.Index)->getGlobalIndex();
153   case R_WEBASSEMBLY_EVENT_INDEX_LEB:
154     return getEventSymbol(Reloc.Index)->getEventIndex();
155   case R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
156     if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) {
157       if (Sym->isLive())
158         return Sym->Function->OutputOffset +
159                Sym->Function->getFunctionCodeOffset() + Reloc.Addend;
160     }
161     return 0;
162   case R_WEBASSEMBLY_SECTION_OFFSET_I32:
163     return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend;
164   default:
165     llvm_unreachable("unknown relocation type");
166   }
167 }
168 
169 template <class T>
170 static void setRelocs(const std::vector<T *> &Chunks,
171                       const WasmSection *Section) {
172   if (!Section)
173     return;
174 
175   ArrayRef<WasmRelocation> Relocs = Section->Relocations;
176   assert(std::is_sorted(Relocs.begin(), Relocs.end(),
177                         [](const WasmRelocation &R1, const WasmRelocation &R2) {
178                           return R1.Offset < R2.Offset;
179                         }));
180   assert(std::is_sorted(
181       Chunks.begin(), Chunks.end(), [](InputChunk *C1, InputChunk *C2) {
182         return C1->getInputSectionOffset() < C2->getInputSectionOffset();
183       }));
184 
185   auto RelocsNext = Relocs.begin();
186   auto RelocsEnd = Relocs.end();
187   auto RelocLess = [](const WasmRelocation &R, uint32_t Val) {
188     return R.Offset < Val;
189   };
190   for (InputChunk *C : Chunks) {
191     auto RelocsStart = std::lower_bound(RelocsNext, RelocsEnd,
192                                         C->getInputSectionOffset(), RelocLess);
193     RelocsNext = std::lower_bound(
194         RelocsStart, RelocsEnd, C->getInputSectionOffset() + C->getInputSize(),
195         RelocLess);
196     C->setRelocations(ArrayRef<WasmRelocation>(RelocsStart, RelocsNext));
197   }
198 }
199 
200 void ObjFile::parse() {
201   // Parse a memory buffer as a wasm file.
202   LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
203   std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this));
204 
205   auto *Obj = dyn_cast<WasmObjectFile>(Bin.get());
206   if (!Obj)
207     fatal(toString(this) + ": not a wasm file");
208   if (!Obj->isRelocatableObject())
209     fatal(toString(this) + ": not a relocatable wasm file");
210 
211   Bin.release();
212   WasmObj.reset(Obj);
213 
214   // Build up a map of function indices to table indices for use when
215   // verifying the existing table index relocations
216   uint32_t TotalFunctions =
217       WasmObj->getNumImportedFunctions() + WasmObj->functions().size();
218   TableEntries.resize(TotalFunctions);
219   for (const WasmElemSegment &Seg : WasmObj->elements()) {
220     if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST)
221       fatal(toString(this) + ": invalid table elements");
222     uint32_t Offset = Seg.Offset.Value.Int32;
223     for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) {
224 
225       uint32_t FunctionIndex = Seg.Functions[Index];
226       TableEntries[FunctionIndex] = Offset + Index;
227     }
228   }
229 
230   // Find the code and data sections.  Wasm objects can have at most one code
231   // and one data section.
232   uint32_t SectionIndex = 0;
233   for (const SectionRef &Sec : WasmObj->sections()) {
234     const WasmSection &Section = WasmObj->getWasmSection(Sec);
235     if (Section.Type == WASM_SEC_CODE) {
236       CodeSection = &Section;
237     } else if (Section.Type == WASM_SEC_DATA) {
238       DataSection = &Section;
239     } else if (Section.Type == WASM_SEC_CUSTOM) {
240       CustomSections.emplace_back(make<InputSection>(Section, this));
241       CustomSections.back()->setRelocations(Section.Relocations);
242       CustomSectionsByIndex[SectionIndex] = CustomSections.back();
243     }
244     SectionIndex++;
245   }
246 
247   TypeMap.resize(getWasmObj()->types().size());
248   TypeIsUsed.resize(getWasmObj()->types().size(), false);
249 
250   ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats;
251   UsedComdats.resize(Comdats.size());
252   for (unsigned I = 0; I < Comdats.size(); ++I)
253     UsedComdats[I] = Symtab->addComdat(Comdats[I]);
254 
255   // Populate `Segments`.
256   for (const WasmSegment &S : WasmObj->dataSegments())
257     Segments.emplace_back(make<InputSegment>(S, this));
258   setRelocs(Segments, DataSection);
259 
260   // Populate `Functions`.
261   ArrayRef<WasmFunction> Funcs = WasmObj->functions();
262   ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes();
263   ArrayRef<WasmSignature> Types = WasmObj->types();
264   Functions.reserve(Funcs.size());
265 
266   for (size_t I = 0, E = Funcs.size(); I != E; ++I)
267     Functions.emplace_back(
268         make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this));
269   setRelocs(Functions, CodeSection);
270 
271   // Populate `Globals`.
272   for (const WasmGlobal &G : WasmObj->globals())
273     Globals.emplace_back(make<InputGlobal>(G, this));
274 
275   // Populate `Events`.
276   for (const WasmEvent &E : WasmObj->events())
277     Events.emplace_back(make<InputEvent>(Types[E.Type.SigIndex], E, this));
278 
279   // Populate `Symbols` based on the WasmSymbols in the object.
280   Symbols.reserve(WasmObj->getNumberOfSymbols());
281   for (const SymbolRef &Sym : WasmObj->symbols()) {
282     const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl());
283     if (Symbol *Sym = createDefined(WasmSym))
284       Symbols.push_back(Sym);
285     else
286       Symbols.push_back(createUndefined(WasmSym));
287   }
288 }
289 
290 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const {
291   uint32_t C = Chunk->getComdat();
292   if (C == UINT32_MAX)
293     return false;
294   return !UsedComdats[C];
295 }
296 
297 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const {
298   return cast<FunctionSymbol>(Symbols[Index]);
299 }
300 
301 GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const {
302   return cast<GlobalSymbol>(Symbols[Index]);
303 }
304 
305 EventSymbol *ObjFile::getEventSymbol(uint32_t Index) const {
306   return cast<EventSymbol>(Symbols[Index]);
307 }
308 
309 SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const {
310   return cast<SectionSymbol>(Symbols[Index]);
311 }
312 
313 DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const {
314   return cast<DataSymbol>(Symbols[Index]);
315 }
316 
317 Symbol *ObjFile::createDefined(const WasmSymbol &Sym) {
318   if (!Sym.isDefined())
319     return nullptr;
320 
321   StringRef Name = Sym.Info.Name;
322   uint32_t Flags = Sym.Info.Flags;
323 
324   switch (Sym.Info.Kind) {
325   case WASM_SYMBOL_TYPE_FUNCTION: {
326     InputFunction *Func =
327         Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()];
328     if (isExcludedByComdat(Func)) {
329       Func->Live = false;
330       return nullptr;
331     }
332 
333     if (Sym.isBindingLocal())
334       return make<DefinedFunction>(Name, Flags, this, Func);
335     return Symtab->addDefinedFunction(Name, Flags, this, Func);
336   }
337   case WASM_SYMBOL_TYPE_DATA: {
338     InputSegment *Seg = Segments[Sym.Info.DataRef.Segment];
339     if (isExcludedByComdat(Seg)) {
340       Seg->Live = false;
341       return nullptr;
342     }
343 
344     uint32_t Offset = Sym.Info.DataRef.Offset;
345     uint32_t Size = Sym.Info.DataRef.Size;
346 
347     if (Sym.isBindingLocal())
348       return make<DefinedData>(Name, Flags, this, Seg, Offset, Size);
349     return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size);
350   }
351   case WASM_SYMBOL_TYPE_GLOBAL: {
352     InputGlobal *Global =
353         Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()];
354     if (Sym.isBindingLocal())
355       return make<DefinedGlobal>(Name, Flags, this, Global);
356     return Symtab->addDefinedGlobal(Name, Flags, this, Global);
357   }
358   case WASM_SYMBOL_TYPE_SECTION: {
359     InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex];
360     assert(Sym.isBindingLocal());
361     return make<SectionSymbol>(Name, Flags, Section, this);
362   }
363   case WASM_SYMBOL_TYPE_EVENT: {
364     InputEvent *Event =
365         Events[Sym.Info.ElementIndex - WasmObj->getNumImportedEvents()];
366     if (Sym.isBindingLocal())
367       return make<DefinedEvent>(Name, Flags, this, Event);
368     return Symtab->addDefinedEvent(Name, Flags, this, Event);
369   }
370   }
371   llvm_unreachable("unknown symbol kind");
372 }
373 
374 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) {
375   StringRef Name = Sym.Info.Name;
376   uint32_t Flags = Sym.Info.Flags;
377 
378   switch (Sym.Info.Kind) {
379   case WASM_SYMBOL_TYPE_FUNCTION:
380     return Symtab->addUndefinedFunction(Name, Flags, this, Sym.Signature);
381   case WASM_SYMBOL_TYPE_DATA:
382     return Symtab->addUndefinedData(Name, Flags, this);
383   case WASM_SYMBOL_TYPE_GLOBAL:
384     return Symtab->addUndefinedGlobal(Name, Flags, this, Sym.GlobalType);
385   case WASM_SYMBOL_TYPE_SECTION:
386     llvm_unreachable("section symbols cannot be undefined");
387   }
388   llvm_unreachable("unknown symbol kind");
389 }
390 
391 void ArchiveFile::parse() {
392   // Parse a MemoryBufferRef as an archive file.
393   LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
394   File = CHECK(Archive::create(MB), toString(this));
395 
396   // Read the symbol table to construct Lazy symbols.
397   int Count = 0;
398   for (const Archive::Symbol &Sym : File->symbols()) {
399     Symtab->addLazy(this, &Sym);
400     ++Count;
401   }
402   LLVM_DEBUG(dbgs() << "Read " << Count << " symbols\n");
403 }
404 
405 void ArchiveFile::addMember(const Archive::Symbol *Sym) {
406   const Archive::Child &C =
407       CHECK(Sym->getMember(),
408             "could not get the member for symbol " + Sym->getName());
409 
410   // Don't try to load the same member twice (this can happen when members
411   // mutually reference each other).
412   if (!Seen.insert(C.getChildOffset()).second)
413     return;
414 
415   LLVM_DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n");
416   LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
417 
418   MemoryBufferRef MB =
419       CHECK(C.getMemoryBufferRef(),
420             "could not get the buffer for the member defining symbol " +
421                 Sym->getName());
422 
423   InputFile *Obj = createObjectFile(MB);
424   Obj->ArchiveName = getName();
425   Symtab->addFile(Obj);
426 }
427 
428 static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) {
429   switch (GvVisibility) {
430   case GlobalValue::DefaultVisibility:
431     return WASM_SYMBOL_VISIBILITY_DEFAULT;
432   case GlobalValue::HiddenVisibility:
433   case GlobalValue::ProtectedVisibility:
434     return WASM_SYMBOL_VISIBILITY_HIDDEN;
435   }
436   llvm_unreachable("unknown visibility");
437 }
438 
439 static Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &ObjSym,
440                                    BitcodeFile &F) {
441   StringRef Name = Saver.save(ObjSym.getName());
442 
443   uint32_t Flags = ObjSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0;
444   Flags |= mapVisibility(ObjSym.getVisibility());
445 
446   if (ObjSym.isUndefined()) {
447     if (ObjSym.isExecutable())
448       return Symtab->addUndefinedFunction(Name, Flags, &F, nullptr);
449     return Symtab->addUndefinedData(Name, Flags, &F);
450   }
451 
452   if (ObjSym.isExecutable())
453     return Symtab->addDefinedFunction(Name, Flags, &F, nullptr);
454   return Symtab->addDefinedData(Name, Flags, &F, nullptr, 0, 0);
455 }
456 
457 void BitcodeFile::parse() {
458   Obj = check(lto::InputFile::create(MemoryBufferRef(
459       MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier()))));
460   Triple T(Obj->getTargetTriple());
461   if (T.getArch() != Triple::wasm32) {
462     error(toString(MB.getBufferIdentifier()) + ": machine type must be wasm32");
463     return;
464   }
465 
466   for (const lto::InputFile::Symbol &ObjSym : Obj->symbols())
467     Symbols.push_back(createBitcodeSymbol(ObjSym, *this));
468 }
469 
470 // Returns a string in the format of "foo.o" or "foo.a(bar.o)".
471 std::string lld::toString(const wasm::InputFile *File) {
472   if (!File)
473     return "<internal>";
474 
475   if (File->ArchiveName.empty())
476     return File->getName();
477 
478   return (File->ArchiveName + "(" + File->getName() + ")").str();
479 }
480