1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InputFiles.h"
10 #include "Config.h"
11 #include "InputChunks.h"
12 #include "InputEvent.h"
13 #include "InputGlobal.h"
14 #include "SymbolTable.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "llvm/Object/Binary.h"
18 #include "llvm/Object/Wasm.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "lld"
22 
23 using namespace lld;
24 using namespace lld::wasm;
25 
26 using namespace llvm;
27 using namespace llvm::object;
28 using namespace llvm::wasm;
29 
30 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) {
31   log("Loading: " + Path);
32 
33   auto MBOrErr = MemoryBuffer::getFile(Path);
34   if (auto EC = MBOrErr.getError()) {
35     error("cannot open " + Path + ": " + EC.message());
36     return None;
37   }
38   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
39   MemoryBufferRef MBRef = MB->getMemBufferRef();
40   make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership
41 
42   return MBRef;
43 }
44 
45 InputFile *lld::wasm::createObjectFile(MemoryBufferRef MB) {
46   file_magic Magic = identify_magic(MB.getBuffer());
47   if (Magic == file_magic::wasm_object) {
48     std::unique_ptr<Binary> Bin = check(createBinary(MB));
49     auto *Obj = cast<WasmObjectFile>(Bin.get());
50     if (Obj->isSharedObject())
51       return make<SharedFile>(MB);
52     return make<ObjFile>(MB);
53   }
54 
55   if (Magic == file_magic::bitcode)
56     return make<BitcodeFile>(MB);
57 
58   fatal("unknown file type: " + MB.getBufferIdentifier());
59 }
60 
61 void ObjFile::dumpInfo() const {
62   log("info for: " + getName() +
63       "\n              Symbols : " + Twine(Symbols.size()) +
64       "\n     Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) +
65       "\n       Global Imports : " + Twine(WasmObj->getNumImportedGlobals()) +
66       "\n        Event Imports : " + Twine(WasmObj->getNumImportedEvents()));
67 }
68 
69 // Relocations contain either symbol or type indices.  This function takes a
70 // relocation and returns relocated index (i.e. translates from the input
71 // symbol/type space to the output symbol/type space).
72 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const {
73   if (Reloc.Type == R_WASM_TYPE_INDEX_LEB) {
74     assert(TypeIsUsed[Reloc.Index]);
75     return TypeMap[Reloc.Index];
76   }
77   return Symbols[Reloc.Index]->getOutputSymbolIndex();
78 }
79 
80 // Relocations can contain addend for combined sections. This function takes a
81 // relocation and returns updated addend by offset in the output section.
82 uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const {
83   switch (Reloc.Type) {
84   case R_WASM_MEMORY_ADDR_LEB:
85   case R_WASM_MEMORY_ADDR_SLEB:
86   case R_WASM_MEMORY_ADDR_I32:
87   case R_WASM_FUNCTION_OFFSET_I32:
88     return Reloc.Addend;
89   case R_WASM_SECTION_OFFSET_I32:
90     return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend;
91   default:
92     llvm_unreachable("unexpected relocation type");
93   }
94 }
95 
96 // Calculate the value we expect to find at the relocation location.
97 // This is used as a sanity check before applying a relocation to a given
98 // location.  It is useful for catching bugs in the compiler and linker.
99 uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const {
100   switch (Reloc.Type) {
101   case R_WASM_TABLE_INDEX_I32:
102   case R_WASM_TABLE_INDEX_SLEB: {
103     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
104     return TableEntries[Sym.Info.ElementIndex];
105   }
106   case R_WASM_MEMORY_ADDR_SLEB:
107   case R_WASM_MEMORY_ADDR_I32:
108   case R_WASM_MEMORY_ADDR_LEB: {
109     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
110     if (Sym.isUndefined())
111       return 0;
112     const WasmSegment &Segment =
113         WasmObj->dataSegments()[Sym.Info.DataRef.Segment];
114     return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset +
115            Reloc.Addend;
116   }
117   case R_WASM_FUNCTION_OFFSET_I32:
118     if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) {
119       return Sym->Function->getFunctionInputOffset() +
120              Sym->Function->getFunctionCodeOffset() + Reloc.Addend;
121     }
122     return 0;
123   case R_WASM_SECTION_OFFSET_I32:
124     return Reloc.Addend;
125   case R_WASM_TYPE_INDEX_LEB:
126     return Reloc.Index;
127   case R_WASM_FUNCTION_INDEX_LEB:
128   case R_WASM_GLOBAL_INDEX_LEB:
129   case R_WASM_EVENT_INDEX_LEB: {
130     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
131     return Sym.Info.ElementIndex;
132   }
133   default:
134     llvm_unreachable("unknown relocation type");
135   }
136 }
137 
138 // Translate from the relocation's index into the final linked output value.
139 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const {
140   switch (Reloc.Type) {
141   case R_WASM_TABLE_INDEX_I32:
142   case R_WASM_TABLE_INDEX_SLEB:
143     return getFunctionSymbol(Reloc.Index)->getTableIndex();
144   case R_WASM_MEMORY_ADDR_SLEB:
145   case R_WASM_MEMORY_ADDR_I32:
146   case R_WASM_MEMORY_ADDR_LEB:
147     if (auto *Sym = dyn_cast<DefinedData>(getDataSymbol(Reloc.Index)))
148       if (Sym->isLive())
149         return Sym->getVirtualAddress() + Reloc.Addend;
150     return 0;
151   case R_WASM_TYPE_INDEX_LEB:
152     return TypeMap[Reloc.Index];
153   case R_WASM_FUNCTION_INDEX_LEB:
154     return getFunctionSymbol(Reloc.Index)->getFunctionIndex();
155   case R_WASM_GLOBAL_INDEX_LEB: {
156     const Symbol* Sym = Symbols[Reloc.Index];
157     if (auto GS = dyn_cast<GlobalSymbol>(Sym))
158       return GS->getGlobalIndex();
159     return Sym->getGOTIndex();
160   } case R_WASM_EVENT_INDEX_LEB:
161     return getEventSymbol(Reloc.Index)->getEventIndex();
162   case R_WASM_FUNCTION_OFFSET_I32:
163     if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) {
164       if (Sym->isLive())
165         return Sym->Function->OutputOffset +
166                Sym->Function->getFunctionCodeOffset() + Reloc.Addend;
167     }
168     return 0;
169   case R_WASM_SECTION_OFFSET_I32:
170     return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend;
171   default:
172     llvm_unreachable("unknown relocation type");
173   }
174 }
175 
176 template <class T>
177 static void setRelocs(const std::vector<T *> &Chunks,
178                       const WasmSection *Section) {
179   if (!Section)
180     return;
181 
182   ArrayRef<WasmRelocation> Relocs = Section->Relocations;
183   assert(std::is_sorted(Relocs.begin(), Relocs.end(),
184                         [](const WasmRelocation &R1, const WasmRelocation &R2) {
185                           return R1.Offset < R2.Offset;
186                         }));
187   assert(std::is_sorted(
188       Chunks.begin(), Chunks.end(), [](InputChunk *C1, InputChunk *C2) {
189         return C1->getInputSectionOffset() < C2->getInputSectionOffset();
190       }));
191 
192   auto RelocsNext = Relocs.begin();
193   auto RelocsEnd = Relocs.end();
194   auto RelocLess = [](const WasmRelocation &R, uint32_t Val) {
195     return R.Offset < Val;
196   };
197   for (InputChunk *C : Chunks) {
198     auto RelocsStart = std::lower_bound(RelocsNext, RelocsEnd,
199                                         C->getInputSectionOffset(), RelocLess);
200     RelocsNext = std::lower_bound(
201         RelocsStart, RelocsEnd, C->getInputSectionOffset() + C->getInputSize(),
202         RelocLess);
203     C->setRelocations(ArrayRef<WasmRelocation>(RelocsStart, RelocsNext));
204   }
205 }
206 
207 void ObjFile::parse() {
208   // Parse a memory buffer as a wasm file.
209   LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
210   std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this));
211 
212   auto *Obj = dyn_cast<WasmObjectFile>(Bin.get());
213   if (!Obj)
214     fatal(toString(this) + ": not a wasm file");
215   if (!Obj->isRelocatableObject())
216     fatal(toString(this) + ": not a relocatable wasm file");
217 
218   Bin.release();
219   WasmObj.reset(Obj);
220 
221   // Build up a map of function indices to table indices for use when
222   // verifying the existing table index relocations
223   uint32_t TotalFunctions =
224       WasmObj->getNumImportedFunctions() + WasmObj->functions().size();
225   TableEntries.resize(TotalFunctions);
226   for (const WasmElemSegment &Seg : WasmObj->elements()) {
227     if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST)
228       fatal(toString(this) + ": invalid table elements");
229     uint32_t Offset = Seg.Offset.Value.Int32;
230     for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) {
231 
232       uint32_t FunctionIndex = Seg.Functions[Index];
233       TableEntries[FunctionIndex] = Offset + Index;
234     }
235   }
236 
237   // Find the code and data sections.  Wasm objects can have at most one code
238   // and one data section.
239   uint32_t SectionIndex = 0;
240   for (const SectionRef &Sec : WasmObj->sections()) {
241     const WasmSection &Section = WasmObj->getWasmSection(Sec);
242     if (Section.Type == WASM_SEC_CODE) {
243       CodeSection = &Section;
244     } else if (Section.Type == WASM_SEC_DATA) {
245       DataSection = &Section;
246     } else if (Section.Type == WASM_SEC_CUSTOM) {
247       CustomSections.emplace_back(make<InputSection>(Section, this));
248       CustomSections.back()->setRelocations(Section.Relocations);
249       CustomSectionsByIndex[SectionIndex] = CustomSections.back();
250     }
251     SectionIndex++;
252   }
253 
254   TypeMap.resize(getWasmObj()->types().size());
255   TypeIsUsed.resize(getWasmObj()->types().size(), false);
256 
257   ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats;
258   UsedComdats.resize(Comdats.size());
259   for (unsigned I = 0; I < Comdats.size(); ++I)
260     UsedComdats[I] = Symtab->addComdat(Comdats[I]);
261 
262   // Populate `Segments`.
263   for (const WasmSegment &S : WasmObj->dataSegments())
264     Segments.emplace_back(make<InputSegment>(S, this));
265   setRelocs(Segments, DataSection);
266 
267   // Populate `Functions`.
268   ArrayRef<WasmFunction> Funcs = WasmObj->functions();
269   ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes();
270   ArrayRef<WasmSignature> Types = WasmObj->types();
271   Functions.reserve(Funcs.size());
272 
273   for (size_t I = 0, E = Funcs.size(); I != E; ++I)
274     Functions.emplace_back(
275         make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this));
276   setRelocs(Functions, CodeSection);
277 
278   // Populate `Globals`.
279   for (const WasmGlobal &G : WasmObj->globals())
280     Globals.emplace_back(make<InputGlobal>(G, this));
281 
282   // Populate `Events`.
283   for (const WasmEvent &E : WasmObj->events())
284     Events.emplace_back(make<InputEvent>(Types[E.Type.SigIndex], E, this));
285 
286   // Populate `Symbols` based on the WasmSymbols in the object.
287   Symbols.reserve(WasmObj->getNumberOfSymbols());
288   for (const SymbolRef &Sym : WasmObj->symbols()) {
289     const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl());
290     if (Symbol *Sym = createDefined(WasmSym))
291       Symbols.push_back(Sym);
292     else
293       Symbols.push_back(createUndefined(WasmSym));
294   }
295 }
296 
297 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const {
298   uint32_t C = Chunk->getComdat();
299   if (C == UINT32_MAX)
300     return false;
301   return !UsedComdats[C];
302 }
303 
304 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const {
305   return cast<FunctionSymbol>(Symbols[Index]);
306 }
307 
308 GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const {
309   return cast<GlobalSymbol>(Symbols[Index]);
310 }
311 
312 EventSymbol *ObjFile::getEventSymbol(uint32_t Index) const {
313   return cast<EventSymbol>(Symbols[Index]);
314 }
315 
316 SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const {
317   return cast<SectionSymbol>(Symbols[Index]);
318 }
319 
320 DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const {
321   return cast<DataSymbol>(Symbols[Index]);
322 }
323 
324 Symbol *ObjFile::createDefined(const WasmSymbol &Sym) {
325   if (!Sym.isDefined())
326     return nullptr;
327 
328   StringRef Name = Sym.Info.Name;
329   uint32_t Flags = Sym.Info.Flags;
330 
331   switch (Sym.Info.Kind) {
332   case WASM_SYMBOL_TYPE_FUNCTION: {
333     InputFunction *Func =
334         Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()];
335     if (isExcludedByComdat(Func)) {
336       Func->Live = false;
337       return nullptr;
338     }
339 
340     if (Sym.isBindingLocal())
341       return make<DefinedFunction>(Name, Flags, this, Func);
342     return Symtab->addDefinedFunction(Name, Flags, this, Func);
343   }
344   case WASM_SYMBOL_TYPE_DATA: {
345     InputSegment *Seg = Segments[Sym.Info.DataRef.Segment];
346     if (isExcludedByComdat(Seg)) {
347       Seg->Live = false;
348       return nullptr;
349     }
350 
351     uint32_t Offset = Sym.Info.DataRef.Offset;
352     uint32_t Size = Sym.Info.DataRef.Size;
353 
354     if (Sym.isBindingLocal())
355       return make<DefinedData>(Name, Flags, this, Seg, Offset, Size);
356     return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size);
357   }
358   case WASM_SYMBOL_TYPE_GLOBAL: {
359     InputGlobal *Global =
360         Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()];
361     if (Sym.isBindingLocal())
362       return make<DefinedGlobal>(Name, Flags, this, Global);
363     return Symtab->addDefinedGlobal(Name, Flags, this, Global);
364   }
365   case WASM_SYMBOL_TYPE_SECTION: {
366     InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex];
367     assert(Sym.isBindingLocal());
368     return make<SectionSymbol>(Name, Flags, Section, this);
369   }
370   case WASM_SYMBOL_TYPE_EVENT: {
371     InputEvent *Event =
372         Events[Sym.Info.ElementIndex - WasmObj->getNumImportedEvents()];
373     if (Sym.isBindingLocal())
374       return make<DefinedEvent>(Name, Flags, this, Event);
375     return Symtab->addDefinedEvent(Name, Flags, this, Event);
376   }
377   }
378   llvm_unreachable("unknown symbol kind");
379 }
380 
381 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) {
382   StringRef Name = Sym.Info.Name;
383   uint32_t Flags = Sym.Info.Flags;
384 
385   switch (Sym.Info.Kind) {
386   case WASM_SYMBOL_TYPE_FUNCTION:
387     return Symtab->addUndefinedFunction(Name, Sym.Info.ImportName,
388                                         Sym.Info.ImportModule, Flags, this,
389                                         Sym.Signature);
390   case WASM_SYMBOL_TYPE_DATA:
391     return Symtab->addUndefinedData(Name, Flags, this);
392   case WASM_SYMBOL_TYPE_GLOBAL:
393     return Symtab->addUndefinedGlobal(Name, Sym.Info.ImportName,
394                                       Sym.Info.ImportModule, Flags, this,
395                                       Sym.GlobalType);
396   case WASM_SYMBOL_TYPE_SECTION:
397     llvm_unreachable("section symbols cannot be undefined");
398   }
399   llvm_unreachable("unknown symbol kind");
400 }
401 
402 void ArchiveFile::parse() {
403   // Parse a MemoryBufferRef as an archive file.
404   LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
405   File = CHECK(Archive::create(MB), toString(this));
406 
407   // Read the symbol table to construct Lazy symbols.
408   int Count = 0;
409   for (const Archive::Symbol &Sym : File->symbols()) {
410     Symtab->addLazy(this, &Sym);
411     ++Count;
412   }
413   LLVM_DEBUG(dbgs() << "Read " << Count << " symbols\n");
414 }
415 
416 void ArchiveFile::addMember(const Archive::Symbol *Sym) {
417   const Archive::Child &C =
418       CHECK(Sym->getMember(),
419             "could not get the member for symbol " + Sym->getName());
420 
421   // Don't try to load the same member twice (this can happen when members
422   // mutually reference each other).
423   if (!Seen.insert(C.getChildOffset()).second)
424     return;
425 
426   LLVM_DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n");
427   LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
428 
429   MemoryBufferRef MB =
430       CHECK(C.getMemoryBufferRef(),
431             "could not get the buffer for the member defining symbol " +
432                 Sym->getName());
433 
434   InputFile *Obj = createObjectFile(MB);
435   Obj->ArchiveName = getName();
436   Symtab->addFile(Obj);
437 }
438 
439 static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) {
440   switch (GvVisibility) {
441   case GlobalValue::DefaultVisibility:
442     return WASM_SYMBOL_VISIBILITY_DEFAULT;
443   case GlobalValue::HiddenVisibility:
444   case GlobalValue::ProtectedVisibility:
445     return WASM_SYMBOL_VISIBILITY_HIDDEN;
446   }
447   llvm_unreachable("unknown visibility");
448 }
449 
450 static Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &ObjSym,
451                                    BitcodeFile &F) {
452   StringRef Name = Saver.save(ObjSym.getName());
453 
454   uint32_t Flags = ObjSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0;
455   Flags |= mapVisibility(ObjSym.getVisibility());
456 
457   if (ObjSym.isUndefined()) {
458     if (ObjSym.isExecutable())
459       return Symtab->addUndefinedFunction(Name, Name, DefaultModule, Flags, &F,
460                                           nullptr);
461     return Symtab->addUndefinedData(Name, Flags, &F);
462   }
463 
464   if (ObjSym.isExecutable())
465     return Symtab->addDefinedFunction(Name, Flags, &F, nullptr);
466   return Symtab->addDefinedData(Name, Flags, &F, nullptr, 0, 0);
467 }
468 
469 void BitcodeFile::parse() {
470   Obj = check(lto::InputFile::create(MemoryBufferRef(
471       MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier()))));
472   Triple T(Obj->getTargetTriple());
473   if (T.getArch() != Triple::wasm32) {
474     error(toString(MB.getBufferIdentifier()) + ": machine type must be wasm32");
475     return;
476   }
477 
478   for (const lto::InputFile::Symbol &ObjSym : Obj->symbols())
479     Symbols.push_back(createBitcodeSymbol(ObjSym, *this));
480 }
481 
482 // Returns a string in the format of "foo.o" or "foo.a(bar.o)".
483 std::string lld::toString(const wasm::InputFile *File) {
484   if (!File)
485     return "<internal>";
486 
487   if (File->ArchiveName.empty())
488     return File->getName();
489 
490   return (File->ArchiveName + "(" + File->getName() + ")").str();
491 }
492