1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InputFiles.h"
10 #include "Config.h"
11 #include "InputChunks.h"
12 #include "InputEvent.h"
13 #include "InputGlobal.h"
14 #include "SymbolTable.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "llvm/Object/Binary.h"
18 #include "llvm/Object/Wasm.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "lld"
22 
23 using namespace lld;
24 using namespace lld::wasm;
25 
26 using namespace llvm;
27 using namespace llvm::object;
28 using namespace llvm::wasm;
29 
30 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) {
31   log("Loading: " + Path);
32 
33   auto MBOrErr = MemoryBuffer::getFile(Path);
34   if (auto EC = MBOrErr.getError()) {
35     error("cannot open " + Path + ": " + EC.message());
36     return None;
37   }
38   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
39   MemoryBufferRef MBRef = MB->getMemBufferRef();
40   make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership
41 
42   return MBRef;
43 }
44 
45 InputFile *lld::wasm::createObjectFile(MemoryBufferRef MB,
46                                        StringRef ArchiveName) {
47   file_magic Magic = identify_magic(MB.getBuffer());
48   if (Magic == file_magic::wasm_object) {
49     std::unique_ptr<Binary> Bin = check(createBinary(MB));
50     auto *Obj = cast<WasmObjectFile>(Bin.get());
51     if (Obj->isSharedObject())
52       return make<SharedFile>(MB);
53     return make<ObjFile>(MB, ArchiveName);
54   }
55 
56   if (Magic == file_magic::bitcode)
57     return make<BitcodeFile>(MB, ArchiveName);
58 
59   fatal("unknown file type: " + MB.getBufferIdentifier());
60 }
61 
62 void ObjFile::dumpInfo() const {
63   log("info for: " + toString(this) +
64       "\n              Symbols : " + Twine(Symbols.size()) +
65       "\n     Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) +
66       "\n       Global Imports : " + Twine(WasmObj->getNumImportedGlobals()) +
67       "\n        Event Imports : " + Twine(WasmObj->getNumImportedEvents()));
68 }
69 
70 // Relocations contain either symbol or type indices.  This function takes a
71 // relocation and returns relocated index (i.e. translates from the input
72 // symbol/type space to the output symbol/type space).
73 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const {
74   if (Reloc.Type == R_WASM_TYPE_INDEX_LEB) {
75     assert(TypeIsUsed[Reloc.Index]);
76     return TypeMap[Reloc.Index];
77   }
78   return Symbols[Reloc.Index]->getOutputSymbolIndex();
79 }
80 
81 // Relocations can contain addend for combined sections. This function takes a
82 // relocation and returns updated addend by offset in the output section.
83 uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const {
84   switch (Reloc.Type) {
85   case R_WASM_MEMORY_ADDR_LEB:
86   case R_WASM_MEMORY_ADDR_SLEB:
87   case R_WASM_MEMORY_ADDR_I32:
88   case R_WASM_FUNCTION_OFFSET_I32:
89     return Reloc.Addend;
90   case R_WASM_SECTION_OFFSET_I32:
91     return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend;
92   default:
93     llvm_unreachable("unexpected relocation type");
94   }
95 }
96 
97 // Calculate the value we expect to find at the relocation location.
98 // This is used as a sanity check before applying a relocation to a given
99 // location.  It is useful for catching bugs in the compiler and linker.
100 uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const {
101   switch (Reloc.Type) {
102   case R_WASM_TABLE_INDEX_I32:
103   case R_WASM_TABLE_INDEX_SLEB:
104   case R_WASM_TABLE_INDEX_REL_SLEB: {
105     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
106     return TableEntries[Sym.Info.ElementIndex];
107   }
108   case R_WASM_MEMORY_ADDR_SLEB:
109   case R_WASM_MEMORY_ADDR_I32:
110   case R_WASM_MEMORY_ADDR_LEB:
111   case R_WASM_MEMORY_ADDR_REL_SLEB: {
112     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
113     if (Sym.isUndefined())
114       return 0;
115     const WasmSegment &Segment =
116         WasmObj->dataSegments()[Sym.Info.DataRef.Segment];
117     return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset +
118            Reloc.Addend;
119   }
120   case R_WASM_FUNCTION_OFFSET_I32: {
121     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
122     InputFunction *F =
123         Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()];
124     return F->getFunctionInputOffset() + F->getFunctionCodeOffset() +
125            Reloc.Addend;
126   }
127   case R_WASM_SECTION_OFFSET_I32:
128     return Reloc.Addend;
129   case R_WASM_TYPE_INDEX_LEB:
130     return Reloc.Index;
131   case R_WASM_FUNCTION_INDEX_LEB:
132   case R_WASM_GLOBAL_INDEX_LEB:
133   case R_WASM_EVENT_INDEX_LEB: {
134     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
135     return Sym.Info.ElementIndex;
136   }
137   default:
138     llvm_unreachable("unknown relocation type");
139   }
140 }
141 
142 // Translate from the relocation's index into the final linked output value.
143 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const {
144   const Symbol* Sym = nullptr;
145   if (Reloc.Type != R_WASM_TYPE_INDEX_LEB) {
146     Sym = Symbols[Reloc.Index];
147 
148     // We can end up with relocations against non-live symbols.  For example
149     // in debug sections.
150     if ((isa<FunctionSymbol>(Sym) || isa<DataSymbol>(Sym)) && !Sym->isLive())
151       return 0;
152 
153     // Special handling for undefined data symbols.  Most relocations against
154     // such symbols cannot be resolved.
155     if (isa<DataSymbol>(Sym) && Sym->isUndefined()) {
156       if (Sym->isWeak() || Config->Relocatable)
157         return 0;
158       // R_WASM_MEMORY_ADDR_I32 relocations in PIC code are turned into runtime
159       // fixups in __wasm_apply_relocs
160       if (Config->Pic && Reloc.Type == R_WASM_MEMORY_ADDR_I32)
161         return 0;
162       if (Reloc.Type != R_WASM_GLOBAL_INDEX_LEB) {
163         llvm_unreachable(
164           ("invalid relocation against undefined data symbol: " + toString(*Sym))
165               .c_str());
166       }
167     }
168   }
169 
170   switch (Reloc.Type) {
171   case R_WASM_TABLE_INDEX_I32:
172   case R_WASM_TABLE_INDEX_SLEB:
173   case R_WASM_TABLE_INDEX_REL_SLEB:
174     if (Config->Pic && !getFunctionSymbol(Reloc.Index)->hasTableIndex())
175       return 0;
176     return getFunctionSymbol(Reloc.Index)->getTableIndex();
177   case R_WASM_MEMORY_ADDR_SLEB:
178   case R_WASM_MEMORY_ADDR_I32:
179   case R_WASM_MEMORY_ADDR_LEB:
180   case R_WASM_MEMORY_ADDR_REL_SLEB:
181     return cast<DefinedData>(Sym)->getVirtualAddress() + Reloc.Addend;
182   case R_WASM_TYPE_INDEX_LEB:
183     return TypeMap[Reloc.Index];
184   case R_WASM_FUNCTION_INDEX_LEB:
185     return getFunctionSymbol(Reloc.Index)->getFunctionIndex();
186   case R_WASM_GLOBAL_INDEX_LEB:
187     if (auto GS = dyn_cast<GlobalSymbol>(Sym))
188       return GS->getGlobalIndex();
189     return Sym->getGOTIndex();
190   case R_WASM_EVENT_INDEX_LEB:
191     return getEventSymbol(Reloc.Index)->getEventIndex();
192   case R_WASM_FUNCTION_OFFSET_I32: {
193     auto *F = cast<DefinedFunction>(Sym);
194     return F->Function->OutputOffset + F->Function->getFunctionCodeOffset() +
195            Reloc.Addend;
196   }
197   case R_WASM_SECTION_OFFSET_I32:
198     return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend;
199   default:
200     llvm_unreachable("unknown relocation type");
201   }
202 }
203 
204 template <class T>
205 static void setRelocs(const std::vector<T *> &Chunks,
206                       const WasmSection *Section) {
207   if (!Section)
208     return;
209 
210   ArrayRef<WasmRelocation> Relocs = Section->Relocations;
211   assert(std::is_sorted(Relocs.begin(), Relocs.end(),
212                         [](const WasmRelocation &R1, const WasmRelocation &R2) {
213                           return R1.Offset < R2.Offset;
214                         }));
215   assert(std::is_sorted(
216       Chunks.begin(), Chunks.end(), [](InputChunk *C1, InputChunk *C2) {
217         return C1->getInputSectionOffset() < C2->getInputSectionOffset();
218       }));
219 
220   auto RelocsNext = Relocs.begin();
221   auto RelocsEnd = Relocs.end();
222   auto RelocLess = [](const WasmRelocation &R, uint32_t Val) {
223     return R.Offset < Val;
224   };
225   for (InputChunk *C : Chunks) {
226     auto RelocsStart = std::lower_bound(RelocsNext, RelocsEnd,
227                                         C->getInputSectionOffset(), RelocLess);
228     RelocsNext = std::lower_bound(
229         RelocsStart, RelocsEnd, C->getInputSectionOffset() + C->getInputSize(),
230         RelocLess);
231     C->setRelocations(ArrayRef<WasmRelocation>(RelocsStart, RelocsNext));
232   }
233 }
234 
235 void ObjFile::parse(bool IgnoreComdats) {
236   // Parse a memory buffer as a wasm file.
237   LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
238   std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this));
239 
240   auto *Obj = dyn_cast<WasmObjectFile>(Bin.get());
241   if (!Obj)
242     fatal(toString(this) + ": not a wasm file");
243   if (!Obj->isRelocatableObject())
244     fatal(toString(this) + ": not a relocatable wasm file");
245 
246   Bin.release();
247   WasmObj.reset(Obj);
248 
249   // Build up a map of function indices to table indices for use when
250   // verifying the existing table index relocations
251   uint32_t TotalFunctions =
252       WasmObj->getNumImportedFunctions() + WasmObj->functions().size();
253   TableEntries.resize(TotalFunctions);
254   for (const WasmElemSegment &Seg : WasmObj->elements()) {
255     if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST)
256       fatal(toString(this) + ": invalid table elements");
257     uint32_t Offset = Seg.Offset.Value.Int32;
258     for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) {
259 
260       uint32_t FunctionIndex = Seg.Functions[Index];
261       TableEntries[FunctionIndex] = Offset + Index;
262     }
263   }
264 
265   // Find the code and data sections.  Wasm objects can have at most one code
266   // and one data section.
267   uint32_t SectionIndex = 0;
268   for (const SectionRef &Sec : WasmObj->sections()) {
269     const WasmSection &Section = WasmObj->getWasmSection(Sec);
270     if (Section.Type == WASM_SEC_CODE) {
271       CodeSection = &Section;
272     } else if (Section.Type == WASM_SEC_DATA) {
273       DataSection = &Section;
274     } else if (Section.Type == WASM_SEC_CUSTOM) {
275       CustomSections.emplace_back(make<InputSection>(Section, this));
276       CustomSections.back()->setRelocations(Section.Relocations);
277       CustomSectionsByIndex[SectionIndex] = CustomSections.back();
278     }
279     SectionIndex++;
280   }
281 
282   TypeMap.resize(getWasmObj()->types().size());
283   TypeIsUsed.resize(getWasmObj()->types().size(), false);
284 
285   ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats;
286   for (unsigned I = 0; I < Comdats.size(); ++I)
287     if (IgnoreComdats)
288       KeptComdats.push_back(true);
289     else
290       KeptComdats.push_back(Symtab->addComdat(Comdats[I]));
291 
292   // Populate `Segments`.
293   for (const WasmSegment &S : WasmObj->dataSegments())
294     Segments.emplace_back(make<InputSegment>(S, this));
295   setRelocs(Segments, DataSection);
296 
297   // Populate `Functions`.
298   ArrayRef<WasmFunction> Funcs = WasmObj->functions();
299   ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes();
300   ArrayRef<WasmSignature> Types = WasmObj->types();
301   Functions.reserve(Funcs.size());
302 
303   for (size_t I = 0, E = Funcs.size(); I != E; ++I)
304     Functions.emplace_back(
305         make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this));
306   setRelocs(Functions, CodeSection);
307 
308   // Populate `Globals`.
309   for (const WasmGlobal &G : WasmObj->globals())
310     Globals.emplace_back(make<InputGlobal>(G, this));
311 
312   // Populate `Events`.
313   for (const WasmEvent &E : WasmObj->events())
314     Events.emplace_back(make<InputEvent>(Types[E.Type.SigIndex], E, this));
315 
316   // Populate `Symbols` based on the WasmSymbols in the object.
317   Symbols.reserve(WasmObj->getNumberOfSymbols());
318   for (const SymbolRef &Sym : WasmObj->symbols()) {
319     const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl());
320     if (Symbol *Sym = createDefined(WasmSym))
321       Symbols.push_back(Sym);
322     else
323       Symbols.push_back(createUndefined(WasmSym));
324   }
325 }
326 
327 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const {
328   uint32_t C = Chunk->getComdat();
329   if (C == UINT32_MAX)
330     return false;
331   return !KeptComdats[C];
332 }
333 
334 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const {
335   return cast<FunctionSymbol>(Symbols[Index]);
336 }
337 
338 GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const {
339   return cast<GlobalSymbol>(Symbols[Index]);
340 }
341 
342 EventSymbol *ObjFile::getEventSymbol(uint32_t Index) const {
343   return cast<EventSymbol>(Symbols[Index]);
344 }
345 
346 SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const {
347   return cast<SectionSymbol>(Symbols[Index]);
348 }
349 
350 DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const {
351   return cast<DataSymbol>(Symbols[Index]);
352 }
353 
354 Symbol *ObjFile::createDefined(const WasmSymbol &Sym) {
355   if (!Sym.isDefined())
356     return nullptr;
357 
358   StringRef Name = Sym.Info.Name;
359   uint32_t Flags = Sym.Info.Flags;
360 
361   switch (Sym.Info.Kind) {
362   case WASM_SYMBOL_TYPE_FUNCTION: {
363     InputFunction *Func =
364         Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()];
365     if (isExcludedByComdat(Func)) {
366       Func->Live = false;
367       return nullptr;
368     }
369 
370     if (Sym.isBindingLocal())
371       return make<DefinedFunction>(Name, Flags, this, Func);
372     return Symtab->addDefinedFunction(Name, Flags, this, Func);
373   }
374   case WASM_SYMBOL_TYPE_DATA: {
375     InputSegment *Seg = Segments[Sym.Info.DataRef.Segment];
376     if (isExcludedByComdat(Seg)) {
377       Seg->Live = false;
378       return nullptr;
379     }
380 
381     uint32_t Offset = Sym.Info.DataRef.Offset;
382     uint32_t Size = Sym.Info.DataRef.Size;
383 
384     if (Sym.isBindingLocal())
385       return make<DefinedData>(Name, Flags, this, Seg, Offset, Size);
386     return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size);
387   }
388   case WASM_SYMBOL_TYPE_GLOBAL: {
389     InputGlobal *Global =
390         Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()];
391     if (Sym.isBindingLocal())
392       return make<DefinedGlobal>(Name, Flags, this, Global);
393     return Symtab->addDefinedGlobal(Name, Flags, this, Global);
394   }
395   case WASM_SYMBOL_TYPE_SECTION: {
396     InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex];
397     assert(Sym.isBindingLocal());
398     return make<SectionSymbol>(Name, Flags, Section, this);
399   }
400   case WASM_SYMBOL_TYPE_EVENT: {
401     InputEvent *Event =
402         Events[Sym.Info.ElementIndex - WasmObj->getNumImportedEvents()];
403     if (Sym.isBindingLocal())
404       return make<DefinedEvent>(Name, Flags, this, Event);
405     return Symtab->addDefinedEvent(Name, Flags, this, Event);
406   }
407   }
408   llvm_unreachable("unknown symbol kind");
409 }
410 
411 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) {
412   StringRef Name = Sym.Info.Name;
413   uint32_t Flags = Sym.Info.Flags;
414 
415   switch (Sym.Info.Kind) {
416   case WASM_SYMBOL_TYPE_FUNCTION:
417     return Symtab->addUndefinedFunction(Name, Sym.Info.ImportName,
418                                         Sym.Info.ImportModule, Flags, this,
419                                         Sym.Signature);
420   case WASM_SYMBOL_TYPE_DATA:
421     return Symtab->addUndefinedData(Name, Flags, this);
422   case WASM_SYMBOL_TYPE_GLOBAL:
423     return Symtab->addUndefinedGlobal(Name, Sym.Info.ImportName,
424                                       Sym.Info.ImportModule, Flags, this,
425                                       Sym.GlobalType);
426   case WASM_SYMBOL_TYPE_SECTION:
427     llvm_unreachable("section symbols cannot be undefined");
428   }
429   llvm_unreachable("unknown symbol kind");
430 }
431 
432 void ArchiveFile::parse(bool IgnoreComdats) {
433   // Parse a MemoryBufferRef as an archive file.
434   LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
435   File = CHECK(Archive::create(MB), toString(this));
436 
437   // Read the symbol table to construct Lazy symbols.
438   int Count = 0;
439   for (const Archive::Symbol &Sym : File->symbols()) {
440     Symtab->addLazy(this, &Sym);
441     ++Count;
442   }
443   LLVM_DEBUG(dbgs() << "Read " << Count << " symbols\n");
444 }
445 
446 void ArchiveFile::addMember(const Archive::Symbol *Sym) {
447   const Archive::Child &C =
448       CHECK(Sym->getMember(),
449             "could not get the member for symbol " + Sym->getName());
450 
451   // Don't try to load the same member twice (this can happen when members
452   // mutually reference each other).
453   if (!Seen.insert(C.getChildOffset()).second)
454     return;
455 
456   LLVM_DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n");
457   LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
458 
459   MemoryBufferRef MB =
460       CHECK(C.getMemoryBufferRef(),
461             "could not get the buffer for the member defining symbol " +
462                 Sym->getName());
463 
464   InputFile *Obj = createObjectFile(MB, getName());
465   Symtab->addFile(Obj);
466 }
467 
468 static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) {
469   switch (GvVisibility) {
470   case GlobalValue::DefaultVisibility:
471     return WASM_SYMBOL_VISIBILITY_DEFAULT;
472   case GlobalValue::HiddenVisibility:
473   case GlobalValue::ProtectedVisibility:
474     return WASM_SYMBOL_VISIBILITY_HIDDEN;
475   }
476   llvm_unreachable("unknown visibility");
477 }
478 
479 static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats,
480                                    const lto::InputFile::Symbol &ObjSym,
481                                    BitcodeFile &F) {
482   StringRef Name = Saver.save(ObjSym.getName());
483 
484   uint32_t Flags = ObjSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0;
485   Flags |= mapVisibility(ObjSym.getVisibility());
486 
487   int C = ObjSym.getComdatIndex();
488   bool ExcludedByComdat = C != -1 && !KeptComdats[C];
489 
490   if (ObjSym.isUndefined() || ExcludedByComdat) {
491     if (ObjSym.isExecutable())
492       return Symtab->addUndefinedFunction(Name, Name, DefaultModule, Flags, &F,
493                                           nullptr);
494     return Symtab->addUndefinedData(Name, Flags, &F);
495   }
496 
497   if (ObjSym.isExecutable())
498     return Symtab->addDefinedFunction(Name, Flags, &F, nullptr);
499   return Symtab->addDefinedData(Name, Flags, &F, nullptr, 0, 0);
500 }
501 
502 void BitcodeFile::parse(bool IgnoreComdats) {
503   Obj = check(lto::InputFile::create(MemoryBufferRef(
504       MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier()))));
505   Triple T(Obj->getTargetTriple());
506   if (T.getArch() != Triple::wasm32) {
507     error(toString(MB.getBufferIdentifier()) + ": machine type must be wasm32");
508     return;
509   }
510   std::vector<bool> KeptComdats;
511   for (StringRef S : Obj->getComdatTable())
512     if (IgnoreComdats)
513       KeptComdats.push_back(true);
514     else
515       KeptComdats.push_back(Symtab->addComdat(S));
516 
517   for (const lto::InputFile::Symbol &ObjSym : Obj->symbols())
518     Symbols.push_back(createBitcodeSymbol(KeptComdats, ObjSym, *this));
519 }
520 
521 // Returns a string in the format of "foo.o" or "foo.a(bar.o)".
522 std::string lld::toString(const wasm::InputFile *File) {
523   if (!File)
524     return "<internal>";
525 
526   if (File->ArchiveName.empty())
527     return File->getName();
528 
529   return (File->ArchiveName + "(" + File->getName() + ")").str();
530 }
531