1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InputFiles.h"
10 #include "Config.h"
11 #include "InputChunks.h"
12 #include "InputEvent.h"
13 #include "InputGlobal.h"
14 #include "SymbolTable.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "lld/Common/Reproduce.h"
18 #include "llvm/Object/Binary.h"
19 #include "llvm/Object/Wasm.h"
20 #include "llvm/Support/TarWriter.h"
21 #include "llvm/Support/raw_ostream.h"
22 
23 #define DEBUG_TYPE "lld"
24 
25 using namespace lld;
26 using namespace lld::wasm;
27 
28 using namespace llvm;
29 using namespace llvm::object;
30 using namespace llvm::wasm;
31 
32 std::unique_ptr<llvm::TarWriter> lld::wasm::Tar;
33 
34 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) {
35   log("Loading: " + Path);
36 
37   auto MBOrErr = MemoryBuffer::getFile(Path);
38   if (auto EC = MBOrErr.getError()) {
39     error("cannot open " + Path + ": " + EC.message());
40     return None;
41   }
42   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
43   MemoryBufferRef MBRef = MB->getMemBufferRef();
44   make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership
45 
46   if (Tar)
47     Tar->append(relativeToRoot(Path), MBRef.getBuffer());
48   return MBRef;
49 }
50 
51 InputFile *lld::wasm::createObjectFile(MemoryBufferRef MB,
52                                        StringRef ArchiveName) {
53   file_magic Magic = identify_magic(MB.getBuffer());
54   if (Magic == file_magic::wasm_object) {
55     std::unique_ptr<Binary> Bin = check(createBinary(MB));
56     auto *Obj = cast<WasmObjectFile>(Bin.get());
57     if (Obj->isSharedObject())
58       return make<SharedFile>(MB);
59     return make<ObjFile>(MB, ArchiveName);
60   }
61 
62   if (Magic == file_magic::bitcode)
63     return make<BitcodeFile>(MB, ArchiveName);
64 
65   fatal("unknown file type: " + MB.getBufferIdentifier());
66 }
67 
68 void ObjFile::dumpInfo() const {
69   log("info for: " + toString(this) +
70       "\n              Symbols : " + Twine(Symbols.size()) +
71       "\n     Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) +
72       "\n       Global Imports : " + Twine(WasmObj->getNumImportedGlobals()) +
73       "\n        Event Imports : " + Twine(WasmObj->getNumImportedEvents()));
74 }
75 
76 // Relocations contain either symbol or type indices.  This function takes a
77 // relocation and returns relocated index (i.e. translates from the input
78 // symbol/type space to the output symbol/type space).
79 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const {
80   if (Reloc.Type == R_WASM_TYPE_INDEX_LEB) {
81     assert(TypeIsUsed[Reloc.Index]);
82     return TypeMap[Reloc.Index];
83   }
84   const Symbol *Sym = Symbols[Reloc.Index];
85   if (auto *SS = dyn_cast<SectionSymbol>(Sym))
86     Sym = SS->getOutputSectionSymbol();
87   return Sym->getOutputSymbolIndex();
88 }
89 
90 // Relocations can contain addend for combined sections. This function takes a
91 // relocation and returns updated addend by offset in the output section.
92 uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const {
93   switch (Reloc.Type) {
94   case R_WASM_MEMORY_ADDR_LEB:
95   case R_WASM_MEMORY_ADDR_SLEB:
96   case R_WASM_MEMORY_ADDR_I32:
97   case R_WASM_FUNCTION_OFFSET_I32:
98     return Reloc.Addend;
99   case R_WASM_SECTION_OFFSET_I32:
100     return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend;
101   default:
102     llvm_unreachable("unexpected relocation type");
103   }
104 }
105 
106 // Calculate the value we expect to find at the relocation location.
107 // This is used as a sanity check before applying a relocation to a given
108 // location.  It is useful for catching bugs in the compiler and linker.
109 uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const {
110   switch (Reloc.Type) {
111   case R_WASM_TABLE_INDEX_I32:
112   case R_WASM_TABLE_INDEX_SLEB:
113   case R_WASM_TABLE_INDEX_REL_SLEB: {
114     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
115     return TableEntries[Sym.Info.ElementIndex];
116   }
117   case R_WASM_MEMORY_ADDR_SLEB:
118   case R_WASM_MEMORY_ADDR_I32:
119   case R_WASM_MEMORY_ADDR_LEB:
120   case R_WASM_MEMORY_ADDR_REL_SLEB: {
121     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
122     if (Sym.isUndefined())
123       return 0;
124     const WasmSegment &Segment =
125         WasmObj->dataSegments()[Sym.Info.DataRef.Segment];
126     return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset +
127            Reloc.Addend;
128   }
129   case R_WASM_FUNCTION_OFFSET_I32: {
130     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
131     InputFunction *F =
132         Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()];
133     return F->getFunctionInputOffset() + F->getFunctionCodeOffset() +
134            Reloc.Addend;
135   }
136   case R_WASM_SECTION_OFFSET_I32:
137     return Reloc.Addend;
138   case R_WASM_TYPE_INDEX_LEB:
139     return Reloc.Index;
140   case R_WASM_FUNCTION_INDEX_LEB:
141   case R_WASM_GLOBAL_INDEX_LEB:
142   case R_WASM_EVENT_INDEX_LEB: {
143     const WasmSymbol &Sym = WasmObj->syms()[Reloc.Index];
144     return Sym.Info.ElementIndex;
145   }
146   default:
147     llvm_unreachable("unknown relocation type");
148   }
149 }
150 
151 // Translate from the relocation's index into the final linked output value.
152 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const {
153   const Symbol* Sym = nullptr;
154   if (Reloc.Type != R_WASM_TYPE_INDEX_LEB) {
155     Sym = Symbols[Reloc.Index];
156 
157     // We can end up with relocations against non-live symbols.  For example
158     // in debug sections.
159     if ((isa<FunctionSymbol>(Sym) || isa<DataSymbol>(Sym)) && !Sym->isLive())
160       return 0;
161 
162     // Special handling for undefined data symbols.  Most relocations against
163     // such symbols cannot be resolved.
164     if (isa<DataSymbol>(Sym) && Sym->isUndefined()) {
165       if (Sym->isWeak() || Config->Relocatable)
166         return 0;
167       // R_WASM_MEMORY_ADDR_I32 relocations in PIC code are turned into runtime
168       // fixups in __wasm_apply_relocs
169       if (Config->Pic && Reloc.Type == R_WASM_MEMORY_ADDR_I32)
170         return 0;
171       if (Reloc.Type != R_WASM_GLOBAL_INDEX_LEB) {
172         llvm_unreachable(
173           ("invalid relocation against undefined data symbol: " + toString(*Sym))
174               .c_str());
175       }
176     }
177   }
178 
179   switch (Reloc.Type) {
180   case R_WASM_TABLE_INDEX_I32:
181   case R_WASM_TABLE_INDEX_SLEB:
182   case R_WASM_TABLE_INDEX_REL_SLEB:
183     if (Config->Pic && !getFunctionSymbol(Reloc.Index)->hasTableIndex())
184       return 0;
185     return getFunctionSymbol(Reloc.Index)->getTableIndex();
186   case R_WASM_MEMORY_ADDR_SLEB:
187   case R_WASM_MEMORY_ADDR_I32:
188   case R_WASM_MEMORY_ADDR_LEB:
189   case R_WASM_MEMORY_ADDR_REL_SLEB:
190     return cast<DefinedData>(Sym)->getVirtualAddress() + Reloc.Addend;
191   case R_WASM_TYPE_INDEX_LEB:
192     return TypeMap[Reloc.Index];
193   case R_WASM_FUNCTION_INDEX_LEB:
194     return getFunctionSymbol(Reloc.Index)->getFunctionIndex();
195   case R_WASM_GLOBAL_INDEX_LEB:
196     if (auto GS = dyn_cast<GlobalSymbol>(Sym))
197       return GS->getGlobalIndex();
198     return Sym->getGOTIndex();
199   case R_WASM_EVENT_INDEX_LEB:
200     return getEventSymbol(Reloc.Index)->getEventIndex();
201   case R_WASM_FUNCTION_OFFSET_I32: {
202     auto *F = cast<DefinedFunction>(Sym);
203     return F->Function->OutputOffset + F->Function->getFunctionCodeOffset() +
204            Reloc.Addend;
205   }
206   case R_WASM_SECTION_OFFSET_I32:
207     return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend;
208   default:
209     llvm_unreachable("unknown relocation type");
210   }
211 }
212 
213 template <class T>
214 static void setRelocs(const std::vector<T *> &Chunks,
215                       const WasmSection *Section) {
216   if (!Section)
217     return;
218 
219   ArrayRef<WasmRelocation> Relocs = Section->Relocations;
220   assert(std::is_sorted(Relocs.begin(), Relocs.end(),
221                         [](const WasmRelocation &R1, const WasmRelocation &R2) {
222                           return R1.Offset < R2.Offset;
223                         }));
224   assert(std::is_sorted(
225       Chunks.begin(), Chunks.end(), [](InputChunk *C1, InputChunk *C2) {
226         return C1->getInputSectionOffset() < C2->getInputSectionOffset();
227       }));
228 
229   auto RelocsNext = Relocs.begin();
230   auto RelocsEnd = Relocs.end();
231   auto RelocLess = [](const WasmRelocation &R, uint32_t Val) {
232     return R.Offset < Val;
233   };
234   for (InputChunk *C : Chunks) {
235     auto RelocsStart = std::lower_bound(RelocsNext, RelocsEnd,
236                                         C->getInputSectionOffset(), RelocLess);
237     RelocsNext = std::lower_bound(
238         RelocsStart, RelocsEnd, C->getInputSectionOffset() + C->getInputSize(),
239         RelocLess);
240     C->setRelocations(ArrayRef<WasmRelocation>(RelocsStart, RelocsNext));
241   }
242 }
243 
244 void ObjFile::parse(bool IgnoreComdats) {
245   // Parse a memory buffer as a wasm file.
246   LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
247   std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this));
248 
249   auto *Obj = dyn_cast<WasmObjectFile>(Bin.get());
250   if (!Obj)
251     fatal(toString(this) + ": not a wasm file");
252   if (!Obj->isRelocatableObject())
253     fatal(toString(this) + ": not a relocatable wasm file");
254 
255   Bin.release();
256   WasmObj.reset(Obj);
257 
258   // Build up a map of function indices to table indices for use when
259   // verifying the existing table index relocations
260   uint32_t TotalFunctions =
261       WasmObj->getNumImportedFunctions() + WasmObj->functions().size();
262   TableEntries.resize(TotalFunctions);
263   for (const WasmElemSegment &Seg : WasmObj->elements()) {
264     if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST)
265       fatal(toString(this) + ": invalid table elements");
266     uint32_t Offset = Seg.Offset.Value.Int32;
267     for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) {
268 
269       uint32_t FunctionIndex = Seg.Functions[Index];
270       TableEntries[FunctionIndex] = Offset + Index;
271     }
272   }
273 
274   // Find the code and data sections.  Wasm objects can have at most one code
275   // and one data section.
276   uint32_t SectionIndex = 0;
277   for (const SectionRef &Sec : WasmObj->sections()) {
278     const WasmSection &Section = WasmObj->getWasmSection(Sec);
279     if (Section.Type == WASM_SEC_CODE) {
280       CodeSection = &Section;
281     } else if (Section.Type == WASM_SEC_DATA) {
282       DataSection = &Section;
283     } else if (Section.Type == WASM_SEC_CUSTOM) {
284       CustomSections.emplace_back(make<InputSection>(Section, this));
285       CustomSections.back()->setRelocations(Section.Relocations);
286       CustomSectionsByIndex[SectionIndex] = CustomSections.back();
287     }
288     SectionIndex++;
289   }
290 
291   TypeMap.resize(getWasmObj()->types().size());
292   TypeIsUsed.resize(getWasmObj()->types().size(), false);
293 
294   ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats;
295   for (unsigned I = 0; I < Comdats.size(); ++I)
296     if (IgnoreComdats)
297       KeptComdats.push_back(true);
298     else
299       KeptComdats.push_back(Symtab->addComdat(Comdats[I]));
300 
301   // Populate `Segments`.
302   for (const WasmSegment &S : WasmObj->dataSegments())
303     Segments.emplace_back(make<InputSegment>(S, this));
304   setRelocs(Segments, DataSection);
305 
306   // Populate `Functions`.
307   ArrayRef<WasmFunction> Funcs = WasmObj->functions();
308   ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes();
309   ArrayRef<WasmSignature> Types = WasmObj->types();
310   Functions.reserve(Funcs.size());
311 
312   for (size_t I = 0, E = Funcs.size(); I != E; ++I)
313     Functions.emplace_back(
314         make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this));
315   setRelocs(Functions, CodeSection);
316 
317   // Populate `Globals`.
318   for (const WasmGlobal &G : WasmObj->globals())
319     Globals.emplace_back(make<InputGlobal>(G, this));
320 
321   // Populate `Events`.
322   for (const WasmEvent &E : WasmObj->events())
323     Events.emplace_back(make<InputEvent>(Types[E.Type.SigIndex], E, this));
324 
325   // Populate `Symbols` based on the WasmSymbols in the object.
326   Symbols.reserve(WasmObj->getNumberOfSymbols());
327   for (const SymbolRef &Sym : WasmObj->symbols()) {
328     const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl());
329     if (Symbol *Sym = createDefined(WasmSym))
330       Symbols.push_back(Sym);
331     else
332       Symbols.push_back(createUndefined(WasmSym));
333   }
334 }
335 
336 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const {
337   uint32_t C = Chunk->getComdat();
338   if (C == UINT32_MAX)
339     return false;
340   return !KeptComdats[C];
341 }
342 
343 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const {
344   return cast<FunctionSymbol>(Symbols[Index]);
345 }
346 
347 GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const {
348   return cast<GlobalSymbol>(Symbols[Index]);
349 }
350 
351 EventSymbol *ObjFile::getEventSymbol(uint32_t Index) const {
352   return cast<EventSymbol>(Symbols[Index]);
353 }
354 
355 SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const {
356   return cast<SectionSymbol>(Symbols[Index]);
357 }
358 
359 DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const {
360   return cast<DataSymbol>(Symbols[Index]);
361 }
362 
363 Symbol *ObjFile::createDefined(const WasmSymbol &Sym) {
364   if (!Sym.isDefined())
365     return nullptr;
366 
367   StringRef Name = Sym.Info.Name;
368   uint32_t Flags = Sym.Info.Flags;
369 
370   switch (Sym.Info.Kind) {
371   case WASM_SYMBOL_TYPE_FUNCTION: {
372     InputFunction *Func =
373         Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()];
374     if (isExcludedByComdat(Func)) {
375       Func->Live = false;
376       return nullptr;
377     }
378 
379     if (Sym.isBindingLocal())
380       return make<DefinedFunction>(Name, Flags, this, Func);
381     return Symtab->addDefinedFunction(Name, Flags, this, Func);
382   }
383   case WASM_SYMBOL_TYPE_DATA: {
384     InputSegment *Seg = Segments[Sym.Info.DataRef.Segment];
385     if (isExcludedByComdat(Seg)) {
386       Seg->Live = false;
387       return nullptr;
388     }
389 
390     uint32_t Offset = Sym.Info.DataRef.Offset;
391     uint32_t Size = Sym.Info.DataRef.Size;
392 
393     if (Sym.isBindingLocal())
394       return make<DefinedData>(Name, Flags, this, Seg, Offset, Size);
395     return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size);
396   }
397   case WASM_SYMBOL_TYPE_GLOBAL: {
398     InputGlobal *Global =
399         Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()];
400     if (Sym.isBindingLocal())
401       return make<DefinedGlobal>(Name, Flags, this, Global);
402     return Symtab->addDefinedGlobal(Name, Flags, this, Global);
403   }
404   case WASM_SYMBOL_TYPE_SECTION: {
405     InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex];
406     assert(Sym.isBindingLocal());
407     return make<SectionSymbol>(Flags, Section, this);
408   }
409   case WASM_SYMBOL_TYPE_EVENT: {
410     InputEvent *Event =
411         Events[Sym.Info.ElementIndex - WasmObj->getNumImportedEvents()];
412     if (Sym.isBindingLocal())
413       return make<DefinedEvent>(Name, Flags, this, Event);
414     return Symtab->addDefinedEvent(Name, Flags, this, Event);
415   }
416   }
417   llvm_unreachable("unknown symbol kind");
418 }
419 
420 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) {
421   StringRef Name = Sym.Info.Name;
422   uint32_t Flags = Sym.Info.Flags;
423 
424   switch (Sym.Info.Kind) {
425   case WASM_SYMBOL_TYPE_FUNCTION:
426     return Symtab->addUndefinedFunction(Name, Sym.Info.ImportName,
427                                         Sym.Info.ImportModule, Flags, this,
428                                         Sym.Signature);
429   case WASM_SYMBOL_TYPE_DATA:
430     return Symtab->addUndefinedData(Name, Flags, this);
431   case WASM_SYMBOL_TYPE_GLOBAL:
432     return Symtab->addUndefinedGlobal(Name, Sym.Info.ImportName,
433                                       Sym.Info.ImportModule, Flags, this,
434                                       Sym.GlobalType);
435   case WASM_SYMBOL_TYPE_SECTION:
436     llvm_unreachable("section symbols cannot be undefined");
437   }
438   llvm_unreachable("unknown symbol kind");
439 }
440 
441 void ArchiveFile::parse(bool IgnoreComdats) {
442   // Parse a MemoryBufferRef as an archive file.
443   LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
444   File = CHECK(Archive::create(MB), toString(this));
445 
446   // Read the symbol table to construct Lazy symbols.
447   int Count = 0;
448   for (const Archive::Symbol &Sym : File->symbols()) {
449     Symtab->addLazy(this, &Sym);
450     ++Count;
451   }
452   LLVM_DEBUG(dbgs() << "Read " << Count << " symbols\n");
453 }
454 
455 void ArchiveFile::addMember(const Archive::Symbol *Sym) {
456   const Archive::Child &C =
457       CHECK(Sym->getMember(),
458             "could not get the member for symbol " + Sym->getName());
459 
460   // Don't try to load the same member twice (this can happen when members
461   // mutually reference each other).
462   if (!Seen.insert(C.getChildOffset()).second)
463     return;
464 
465   LLVM_DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n");
466   LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
467 
468   MemoryBufferRef MB =
469       CHECK(C.getMemoryBufferRef(),
470             "could not get the buffer for the member defining symbol " +
471                 Sym->getName());
472 
473   InputFile *Obj = createObjectFile(MB, getName());
474   Symtab->addFile(Obj);
475 }
476 
477 static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) {
478   switch (GvVisibility) {
479   case GlobalValue::DefaultVisibility:
480     return WASM_SYMBOL_VISIBILITY_DEFAULT;
481   case GlobalValue::HiddenVisibility:
482   case GlobalValue::ProtectedVisibility:
483     return WASM_SYMBOL_VISIBILITY_HIDDEN;
484   }
485   llvm_unreachable("unknown visibility");
486 }
487 
488 static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats,
489                                    const lto::InputFile::Symbol &ObjSym,
490                                    BitcodeFile &F) {
491   StringRef Name = Saver.save(ObjSym.getName());
492 
493   uint32_t Flags = ObjSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0;
494   Flags |= mapVisibility(ObjSym.getVisibility());
495 
496   int C = ObjSym.getComdatIndex();
497   bool ExcludedByComdat = C != -1 && !KeptComdats[C];
498 
499   if (ObjSym.isUndefined() || ExcludedByComdat) {
500     if (ObjSym.isExecutable())
501       return Symtab->addUndefinedFunction(Name, Name, DefaultModule, Flags, &F,
502                                           nullptr);
503     return Symtab->addUndefinedData(Name, Flags, &F);
504   }
505 
506   if (ObjSym.isExecutable())
507     return Symtab->addDefinedFunction(Name, Flags, &F, nullptr);
508   return Symtab->addDefinedData(Name, Flags, &F, nullptr, 0, 0);
509 }
510 
511 void BitcodeFile::parse(bool IgnoreComdats) {
512   Obj = check(lto::InputFile::create(MemoryBufferRef(
513       MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier()))));
514   Triple T(Obj->getTargetTriple());
515   if (T.getArch() != Triple::wasm32) {
516     error(toString(MB.getBufferIdentifier()) + ": machine type must be wasm32");
517     return;
518   }
519   std::vector<bool> KeptComdats;
520   for (StringRef S : Obj->getComdatTable())
521     if (IgnoreComdats)
522       KeptComdats.push_back(true);
523     else
524       KeptComdats.push_back(Symtab->addComdat(S));
525 
526   for (const lto::InputFile::Symbol &ObjSym : Obj->symbols())
527     Symbols.push_back(createBitcodeSymbol(KeptComdats, ObjSym, *this));
528 }
529 
530 // Returns a string in the format of "foo.o" or "foo.a(bar.o)".
531 std::string lld::toString(const wasm::InputFile *File) {
532   if (!File)
533     return "<internal>";
534 
535   if (File->ArchiveName.empty())
536     return File->getName();
537 
538   return (File->ArchiveName + "(" + File->getName() + ")").str();
539 }
540