1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InputFiles.h"
10 #include "Config.h"
11 #include "InputChunks.h"
12 #include "InputEvent.h"
13 #include "InputGlobal.h"
14 #include "SymbolTable.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "llvm/Object/Binary.h"
18 #include "llvm/Object/Wasm.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "lld"
22 
23 using namespace lld;
24 using namespace lld::wasm;
25 
26 using namespace llvm;
27 using namespace llvm::object;
28 using namespace llvm::wasm;
29 
30 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) {
31   log("Loading: " + Path);
32 
33   auto MBOrErr = MemoryBuffer::getFile(Path);
34   if (auto EC = MBOrErr.getError()) {
35     error("cannot open " + Path + ": " + EC.message());
36     return None;
37   }
38   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
39   MemoryBufferRef MBRef = MB->getMemBufferRef();
40   make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership
41 
42   return MBRef;
43 }
44 
45 InputFile *lld::wasm::createObjectFile(MemoryBufferRef MB) {
46   file_magic Magic = identify_magic(MB.getBuffer());
47   if (Magic == file_magic::wasm_object)
48     return make<ObjFile>(MB);
49 
50   if (Magic == file_magic::bitcode)
51     return make<BitcodeFile>(MB);
52 
53   fatal("unknown file type: " + MB.getBufferIdentifier());
54 }
55 
56 void ObjFile::dumpInfo() const {
57   log("info for: " + getName() +
58       "\n              Symbols : " + Twine(Symbols.size()) +
59       "\n     Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) +
60       "\n       Global Imports : " + Twine(WasmObj->getNumImportedGlobals()) +
61       "\n        Event Imports : " + Twine(WasmObj->getNumImportedEvents()));
62 }
63 
64 // Relocations contain either symbol or type indices.  This function takes a
65 // relocation and returns relocated index (i.e. translates from the input
66 // symbol/type space to the output symbol/type space).
67 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const {
68   if (Reloc.Type == R_WASM_TYPE_INDEX_LEB) {
69     assert(TypeIsUsed[Reloc.Index]);
70     return TypeMap[Reloc.Index];
71   }
72   return Symbols[Reloc.Index]->getOutputSymbolIndex();
73 }
74 
75 // Relocations can contain addend for combined sections. This function takes a
76 // relocation and returns updated addend by offset in the output section.
77 uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const {
78   switch (Reloc.Type) {
79   case R_WASM_MEMORY_ADDR_LEB:
80   case R_WASM_MEMORY_ADDR_SLEB:
81   case R_WASM_MEMORY_ADDR_I32:
82   case R_WASM_FUNCTION_OFFSET_I32:
83     return Reloc.Addend;
84   case R_WASM_SECTION_OFFSET_I32:
85     return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend;
86   default:
87     llvm_unreachable("unexpected relocation type");
88   }
89 }
90 
91 // Calculate the value we expect to find at the relocation location.
92 // This is used as a sanity check before applying a relocation to a given
93 // location.  It is useful for catching bugs in the compiler and linker.
94 uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const {
95   switch (Reloc.Type) {
96   case R_WASM_TABLE_INDEX_I32:
97   case R_WASM_TABLE_INDEX_SLEB: {
98     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
99     return TableEntries[Sym.Info.ElementIndex];
100   }
101   case R_WASM_MEMORY_ADDR_SLEB:
102   case R_WASM_MEMORY_ADDR_I32:
103   case R_WASM_MEMORY_ADDR_LEB: {
104     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
105     if (Sym.isUndefined())
106       return 0;
107     const WasmSegment &Segment =
108         WasmObj->dataSegments()[Sym.Info.DataRef.Segment];
109     return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset +
110            Reloc.Addend;
111   }
112   case R_WASM_FUNCTION_OFFSET_I32:
113     if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) {
114       return Sym->Function->getFunctionInputOffset() +
115              Sym->Function->getFunctionCodeOffset() + Reloc.Addend;
116     }
117     return 0;
118   case R_WASM_SECTION_OFFSET_I32:
119     return Reloc.Addend;
120   case R_WASM_TYPE_INDEX_LEB:
121     return Reloc.Index;
122   case R_WASM_FUNCTION_INDEX_LEB:
123   case R_WASM_GLOBAL_INDEX_LEB:
124   case R_WASM_EVENT_INDEX_LEB: {
125     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
126     return Sym.Info.ElementIndex;
127   }
128   default:
129     llvm_unreachable("unknown relocation type");
130   }
131 }
132 
133 // Translate from the relocation's index into the final linked output value.
134 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const {
135   switch (Reloc.Type) {
136   case R_WASM_TABLE_INDEX_I32:
137   case R_WASM_TABLE_INDEX_SLEB:
138     return getFunctionSymbol(Reloc.Index)->getTableIndex();
139   case R_WASM_MEMORY_ADDR_SLEB:
140   case R_WASM_MEMORY_ADDR_I32:
141   case R_WASM_MEMORY_ADDR_LEB:
142     if (auto *Sym = dyn_cast<DefinedData>(getDataSymbol(Reloc.Index)))
143       if (Sym->isLive())
144         return Sym->getVirtualAddress() + Reloc.Addend;
145     return 0;
146   case R_WASM_TYPE_INDEX_LEB:
147     return TypeMap[Reloc.Index];
148   case R_WASM_FUNCTION_INDEX_LEB:
149     return getFunctionSymbol(Reloc.Index)->getFunctionIndex();
150   case R_WASM_GLOBAL_INDEX_LEB:
151     return getGlobalSymbol(Reloc.Index)->getGlobalIndex();
152   case R_WASM_EVENT_INDEX_LEB:
153     return getEventSymbol(Reloc.Index)->getEventIndex();
154   case R_WASM_FUNCTION_OFFSET_I32:
155     if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) {
156       if (Sym->isLive())
157         return Sym->Function->OutputOffset +
158                Sym->Function->getFunctionCodeOffset() + Reloc.Addend;
159     }
160     return 0;
161   case R_WASM_SECTION_OFFSET_I32:
162     return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend;
163   default:
164     llvm_unreachable("unknown relocation type");
165   }
166 }
167 
168 template <class T>
169 static void setRelocs(const std::vector<T *> &Chunks,
170                       const WasmSection *Section) {
171   if (!Section)
172     return;
173 
174   ArrayRef<WasmRelocation> Relocs = Section->Relocations;
175   assert(std::is_sorted(Relocs.begin(), Relocs.end(),
176                         [](const WasmRelocation &R1, const WasmRelocation &R2) {
177                           return R1.Offset < R2.Offset;
178                         }));
179   assert(std::is_sorted(
180       Chunks.begin(), Chunks.end(), [](InputChunk *C1, InputChunk *C2) {
181         return C1->getInputSectionOffset() < C2->getInputSectionOffset();
182       }));
183 
184   auto RelocsNext = Relocs.begin();
185   auto RelocsEnd = Relocs.end();
186   auto RelocLess = [](const WasmRelocation &R, uint32_t Val) {
187     return R.Offset < Val;
188   };
189   for (InputChunk *C : Chunks) {
190     auto RelocsStart = std::lower_bound(RelocsNext, RelocsEnd,
191                                         C->getInputSectionOffset(), RelocLess);
192     RelocsNext = std::lower_bound(
193         RelocsStart, RelocsEnd, C->getInputSectionOffset() + C->getInputSize(),
194         RelocLess);
195     C->setRelocations(ArrayRef<WasmRelocation>(RelocsStart, RelocsNext));
196   }
197 }
198 
199 void ObjFile::parse() {
200   // Parse a memory buffer as a wasm file.
201   LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
202   std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this));
203 
204   auto *Obj = dyn_cast<WasmObjectFile>(Bin.get());
205   if (!Obj)
206     fatal(toString(this) + ": not a wasm file");
207   if (!Obj->isRelocatableObject())
208     fatal(toString(this) + ": not a relocatable wasm file");
209 
210   Bin.release();
211   WasmObj.reset(Obj);
212 
213   // Build up a map of function indices to table indices for use when
214   // verifying the existing table index relocations
215   uint32_t TotalFunctions =
216       WasmObj->getNumImportedFunctions() + WasmObj->functions().size();
217   TableEntries.resize(TotalFunctions);
218   for (const WasmElemSegment &Seg : WasmObj->elements()) {
219     if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST)
220       fatal(toString(this) + ": invalid table elements");
221     uint32_t Offset = Seg.Offset.Value.Int32;
222     for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) {
223 
224       uint32_t FunctionIndex = Seg.Functions[Index];
225       TableEntries[FunctionIndex] = Offset + Index;
226     }
227   }
228 
229   // Find the code and data sections.  Wasm objects can have at most one code
230   // and one data section.
231   uint32_t SectionIndex = 0;
232   for (const SectionRef &Sec : WasmObj->sections()) {
233     const WasmSection &Section = WasmObj->getWasmSection(Sec);
234     if (Section.Type == WASM_SEC_CODE) {
235       CodeSection = &Section;
236     } else if (Section.Type == WASM_SEC_DATA) {
237       DataSection = &Section;
238     } else if (Section.Type == WASM_SEC_CUSTOM) {
239       CustomSections.emplace_back(make<InputSection>(Section, this));
240       CustomSections.back()->setRelocations(Section.Relocations);
241       CustomSectionsByIndex[SectionIndex] = CustomSections.back();
242       if (Section.Name == "producers")
243         ProducersSection = &Section;
244     }
245     SectionIndex++;
246   }
247 
248   TypeMap.resize(getWasmObj()->types().size());
249   TypeIsUsed.resize(getWasmObj()->types().size(), false);
250 
251   ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats;
252   UsedComdats.resize(Comdats.size());
253   for (unsigned I = 0; I < Comdats.size(); ++I)
254     UsedComdats[I] = Symtab->addComdat(Comdats[I]);
255 
256   // Populate `Segments`.
257   for (const WasmSegment &S : WasmObj->dataSegments())
258     Segments.emplace_back(make<InputSegment>(S, this));
259   setRelocs(Segments, DataSection);
260 
261   // Populate `Functions`.
262   ArrayRef<WasmFunction> Funcs = WasmObj->functions();
263   ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes();
264   ArrayRef<WasmSignature> Types = WasmObj->types();
265   Functions.reserve(Funcs.size());
266 
267   for (size_t I = 0, E = Funcs.size(); I != E; ++I)
268     Functions.emplace_back(
269         make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this));
270   setRelocs(Functions, CodeSection);
271 
272   // Populate `Globals`.
273   for (const WasmGlobal &G : WasmObj->globals())
274     Globals.emplace_back(make<InputGlobal>(G, this));
275 
276   // Populate `Events`.
277   for (const WasmEvent &E : WasmObj->events())
278     Events.emplace_back(make<InputEvent>(Types[E.Type.SigIndex], E, this));
279 
280   // Populate `Symbols` based on the WasmSymbols in the object.
281   Symbols.reserve(WasmObj->getNumberOfSymbols());
282   for (const SymbolRef &Sym : WasmObj->symbols()) {
283     const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl());
284     if (Symbol *Sym = createDefined(WasmSym))
285       Symbols.push_back(Sym);
286     else
287       Symbols.push_back(createUndefined(WasmSym));
288   }
289 }
290 
291 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const {
292   uint32_t C = Chunk->getComdat();
293   if (C == UINT32_MAX)
294     return false;
295   return !UsedComdats[C];
296 }
297 
298 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const {
299   return cast<FunctionSymbol>(Symbols[Index]);
300 }
301 
302 GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const {
303   return cast<GlobalSymbol>(Symbols[Index]);
304 }
305 
306 EventSymbol *ObjFile::getEventSymbol(uint32_t Index) const {
307   return cast<EventSymbol>(Symbols[Index]);
308 }
309 
310 SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const {
311   return cast<SectionSymbol>(Symbols[Index]);
312 }
313 
314 DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const {
315   return cast<DataSymbol>(Symbols[Index]);
316 }
317 
318 Symbol *ObjFile::createDefined(const WasmSymbol &Sym) {
319   if (!Sym.isDefined())
320     return nullptr;
321 
322   StringRef Name = Sym.Info.Name;
323   uint32_t Flags = Sym.Info.Flags;
324 
325   switch (Sym.Info.Kind) {
326   case WASM_SYMBOL_TYPE_FUNCTION: {
327     InputFunction *Func =
328         Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()];
329     if (isExcludedByComdat(Func)) {
330       Func->Live = false;
331       return nullptr;
332     }
333 
334     if (Sym.isBindingLocal())
335       return make<DefinedFunction>(Name, Flags, this, Func);
336     return Symtab->addDefinedFunction(Name, Flags, this, Func);
337   }
338   case WASM_SYMBOL_TYPE_DATA: {
339     InputSegment *Seg = Segments[Sym.Info.DataRef.Segment];
340     if (isExcludedByComdat(Seg)) {
341       Seg->Live = false;
342       return nullptr;
343     }
344 
345     uint32_t Offset = Sym.Info.DataRef.Offset;
346     uint32_t Size = Sym.Info.DataRef.Size;
347 
348     if (Sym.isBindingLocal())
349       return make<DefinedData>(Name, Flags, this, Seg, Offset, Size);
350     return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size);
351   }
352   case WASM_SYMBOL_TYPE_GLOBAL: {
353     InputGlobal *Global =
354         Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()];
355     if (Sym.isBindingLocal())
356       return make<DefinedGlobal>(Name, Flags, this, Global);
357     return Symtab->addDefinedGlobal(Name, Flags, this, Global);
358   }
359   case WASM_SYMBOL_TYPE_SECTION: {
360     InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex];
361     assert(Sym.isBindingLocal());
362     return make<SectionSymbol>(Name, Flags, Section, this);
363   }
364   case WASM_SYMBOL_TYPE_EVENT: {
365     InputEvent *Event =
366         Events[Sym.Info.ElementIndex - WasmObj->getNumImportedEvents()];
367     if (Sym.isBindingLocal())
368       return make<DefinedEvent>(Name, Flags, this, Event);
369     return Symtab->addDefinedEvent(Name, Flags, this, Event);
370   }
371   }
372   llvm_unreachable("unknown symbol kind");
373 }
374 
375 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) {
376   StringRef Name = Sym.Info.Name;
377   uint32_t Flags = Sym.Info.Flags;
378 
379   switch (Sym.Info.Kind) {
380   case WASM_SYMBOL_TYPE_FUNCTION:
381     return Symtab->addUndefinedFunction(Name, Sym.Info.ImportName,
382                                         Sym.Info.ImportModule, Flags, this,
383                                         Sym.Signature);
384   case WASM_SYMBOL_TYPE_DATA:
385     return Symtab->addUndefinedData(Name, Flags, this);
386   case WASM_SYMBOL_TYPE_GLOBAL:
387     return Symtab->addUndefinedGlobal(Name, Sym.Info.ImportName,
388                                       Sym.Info.ImportModule, Flags, this,
389                                       Sym.GlobalType);
390   case WASM_SYMBOL_TYPE_SECTION:
391     llvm_unreachable("section symbols cannot be undefined");
392   }
393   llvm_unreachable("unknown symbol kind");
394 }
395 
396 void ArchiveFile::parse() {
397   // Parse a MemoryBufferRef as an archive file.
398   LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
399   File = CHECK(Archive::create(MB), toString(this));
400 
401   // Read the symbol table to construct Lazy symbols.
402   int Count = 0;
403   for (const Archive::Symbol &Sym : File->symbols()) {
404     Symtab->addLazy(this, &Sym);
405     ++Count;
406   }
407   LLVM_DEBUG(dbgs() << "Read " << Count << " symbols\n");
408 }
409 
410 void ArchiveFile::addMember(const Archive::Symbol *Sym) {
411   const Archive::Child &C =
412       CHECK(Sym->getMember(),
413             "could not get the member for symbol " + Sym->getName());
414 
415   // Don't try to load the same member twice (this can happen when members
416   // mutually reference each other).
417   if (!Seen.insert(C.getChildOffset()).second)
418     return;
419 
420   LLVM_DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n");
421   LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
422 
423   MemoryBufferRef MB =
424       CHECK(C.getMemoryBufferRef(),
425             "could not get the buffer for the member defining symbol " +
426                 Sym->getName());
427 
428   InputFile *Obj = createObjectFile(MB);
429   Obj->ArchiveName = getName();
430   Symtab->addFile(Obj);
431 }
432 
433 static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) {
434   switch (GvVisibility) {
435   case GlobalValue::DefaultVisibility:
436     return WASM_SYMBOL_VISIBILITY_DEFAULT;
437   case GlobalValue::HiddenVisibility:
438   case GlobalValue::ProtectedVisibility:
439     return WASM_SYMBOL_VISIBILITY_HIDDEN;
440   }
441   llvm_unreachable("unknown visibility");
442 }
443 
444 static Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &ObjSym,
445                                    BitcodeFile &F) {
446   StringRef Name = Saver.save(ObjSym.getName());
447 
448   uint32_t Flags = ObjSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0;
449   Flags |= mapVisibility(ObjSym.getVisibility());
450 
451   if (ObjSym.isUndefined()) {
452     if (ObjSym.isExecutable())
453       return Symtab->addUndefinedFunction(Name, Name, DefaultModule, Flags, &F,
454                                           nullptr);
455     return Symtab->addUndefinedData(Name, Flags, &F);
456   }
457 
458   if (ObjSym.isExecutable())
459     return Symtab->addDefinedFunction(Name, Flags, &F, nullptr);
460   return Symtab->addDefinedData(Name, Flags, &F, nullptr, 0, 0);
461 }
462 
463 void BitcodeFile::parse() {
464   Obj = check(lto::InputFile::create(MemoryBufferRef(
465       MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier()))));
466   Triple T(Obj->getTargetTriple());
467   if (T.getArch() != Triple::wasm32) {
468     error(toString(MB.getBufferIdentifier()) + ": machine type must be wasm32");
469     return;
470   }
471 
472   for (const lto::InputFile::Symbol &ObjSym : Obj->symbols())
473     Symbols.push_back(createBitcodeSymbol(ObjSym, *this));
474 }
475 
476 // Returns a string in the format of "foo.o" or "foo.a(bar.o)".
477 std::string lld::toString(const wasm::InputFile *File) {
478   if (!File)
479     return "<internal>";
480 
481   if (File->ArchiveName.empty())
482     return File->getName();
483 
484   return (File->ArchiveName + "(" + File->getName() + ")").str();
485 }
486