1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "InputFiles.h"
11 
12 #include "Config.h"
13 #include "InputChunks.h"
14 #include "SymbolTable.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "llvm/Object/Binary.h"
18 #include "llvm/Object/Wasm.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "lld"
22 
23 using namespace lld;
24 using namespace lld::wasm;
25 
26 using namespace llvm;
27 using namespace llvm::object;
28 using namespace llvm::wasm;
29 
30 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) {
31   log("Loading: " + Path);
32 
33   auto MBOrErr = MemoryBuffer::getFile(Path);
34   if (auto EC = MBOrErr.getError()) {
35     error("cannot open " + Path + ": " + EC.message());
36     return None;
37   }
38   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
39   MemoryBufferRef MBRef = MB->getMemBufferRef();
40   make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership
41 
42   return MBRef;
43 }
44 
45 void ObjFile::dumpInfo() const {
46   log("info for: " + getName() + "\n" +
47       "      Total Functions : " + Twine(FunctionSymbols.size()) + "\n" +
48       "        Total Globals : " + Twine(GlobalSymbols.size()) + "\n" +
49       "     Function Imports : " + Twine(NumFunctionImports) + "\n" +
50       "       Global Imports : " + Twine(NumGlobalImports) + "\n");
51 }
52 
53 uint32_t ObjFile::relocateVirtualAddress(uint32_t GlobalIndex) const {
54   if (auto *DG = dyn_cast<DefinedGlobal>(getGlobalSymbol(GlobalIndex)))
55     return DG->getVirtualAddress();
56   else
57     return 0;
58 }
59 
60 uint32_t ObjFile::relocateFunctionIndex(uint32_t Original) const {
61   const FunctionSymbol *Sym = getFunctionSymbol(Original);
62   uint32_t Index = Sym->getOutputIndex();
63   DEBUG(dbgs() << "relocateFunctionIndex: " << toString(*Sym) << ": "
64                << Original << " -> " << Index << "\n");
65   return Index;
66 }
67 
68 uint32_t ObjFile::relocateTypeIndex(uint32_t Original) const {
69   assert(TypeIsUsed[Original]);
70   return TypeMap[Original];
71 }
72 
73 uint32_t ObjFile::relocateTableIndex(uint32_t Original) const {
74   const FunctionSymbol *Sym = getFunctionSymbol(Original);
75   uint32_t Index = Sym->hasTableIndex() ? Sym->getTableIndex() : 0;
76   DEBUG(dbgs() << "relocateTableIndex: " << toString(*Sym) << ": " << Original
77                << " -> " << Index << "\n");
78   return Index;
79 }
80 
81 uint32_t ObjFile::relocateGlobalIndex(uint32_t Original) const {
82   const Symbol *Sym = getGlobalSymbol(Original);
83   uint32_t Index = Sym->getOutputIndex();
84   DEBUG(dbgs() << "relocateGlobalIndex: " << toString(*Sym) << ": " << Original
85                << " -> " << Index << "\n");
86   return Index;
87 }
88 
89 // Relocations contain an index into the function, global or table index
90 // space of the input file.  This function takes a relocation and returns the
91 // relocated index (i.e. translates from the input index space to the output
92 // index space).
93 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const {
94   switch (Reloc.Type) {
95   case R_WEBASSEMBLY_TYPE_INDEX_LEB:
96     return relocateTypeIndex(Reloc.Index);
97   case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
98   case R_WEBASSEMBLY_TABLE_INDEX_I32:
99   case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
100     return relocateFunctionIndex(Reloc.Index);
101   case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
102   case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
103   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
104   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
105     return relocateGlobalIndex(Reloc.Index);
106   default:
107     llvm_unreachable("unknown relocation type");
108   }
109 }
110 
111 // Translate from the relocation's index into the final linked output value.
112 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const {
113   switch (Reloc.Type) {
114   case R_WEBASSEMBLY_TABLE_INDEX_I32:
115   case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
116     return relocateTableIndex(Reloc.Index);
117   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
118   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
119   case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
120     return relocateVirtualAddress(Reloc.Index) + Reloc.Addend;
121   case R_WEBASSEMBLY_TYPE_INDEX_LEB:
122     return relocateTypeIndex(Reloc.Index);
123   case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
124     return relocateFunctionIndex(Reloc.Index);
125   case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
126     return relocateGlobalIndex(Reloc.Index);
127   default:
128     llvm_unreachable("unknown relocation type");
129   }
130 }
131 
132 void ObjFile::parse() {
133   // Parse a memory buffer as a wasm file.
134   DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
135   std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this));
136 
137   auto *Obj = dyn_cast<WasmObjectFile>(Bin.get());
138   if (!Obj)
139     fatal(toString(this) + ": not a wasm file");
140   if (!Obj->isRelocatableObject())
141     fatal(toString(this) + ": not a relocatable wasm file");
142 
143   Bin.release();
144   WasmObj.reset(Obj);
145 
146   // Find the code and data sections.  Wasm objects can have at most one code
147   // and one data section.
148   for (const SectionRef &Sec : WasmObj->sections()) {
149     const WasmSection &Section = WasmObj->getWasmSection(Sec);
150     if (Section.Type == WASM_SEC_CODE)
151       CodeSection = &Section;
152     else if (Section.Type == WASM_SEC_DATA)
153       DataSection = &Section;
154   }
155 
156   TypeMap.resize(getWasmObj()->types().size());
157   TypeIsUsed.resize(getWasmObj()->types().size(), false);
158 
159   initializeSymbols();
160 }
161 
162 // Return the InputSegment in which a given symbol is defined.
163 InputSegment *ObjFile::getSegment(const WasmSymbol &WasmSym) const {
164   uint32_t Address = WasmObj->getWasmSymbolValue(WasmSym);
165   for (InputSegment *Segment : Segments) {
166     if (Address >= Segment->startVA() && Address < Segment->endVA()) {
167       DEBUG(dbgs() << "Found symbol in segment: " << WasmSym.Name << " -> "
168                    << Segment->getName() << "\n");
169 
170       return Segment;
171     }
172   }
173   error("symbol not found in any segment: " + WasmSym.Name);
174   return nullptr;
175 }
176 
177 // Get the value stored in the wasm global represented by this symbol.
178 // This represents the virtual address of the symbol in the input file.
179 uint32_t ObjFile::getGlobalValue(const WasmSymbol &Sym) const {
180   const WasmGlobal &Global =
181       getWasmObj()->globals()[Sym.ElementIndex - NumGlobalImports];
182   assert(Global.Type.Type == llvm::wasm::WASM_TYPE_I32);
183   return Global.InitExpr.Value.Int32;
184 }
185 
186 // Get the signature for a given function symbol, either by looking
187 // it up in function sections (for defined functions), of the imports section
188 // (for imported functions).
189 const WasmSignature *ObjFile::getFunctionSig(const WasmSymbol &Sym) const {
190   DEBUG(dbgs() << "getFunctionSig: " << Sym.Name << "\n");
191   return &WasmObj->types()[Sym.FunctionType];
192 }
193 
194 InputFunction *ObjFile::getFunction(const WasmSymbol &Sym) const {
195   uint32_t FunctionIndex = Sym.ElementIndex - NumFunctionImports;
196   return Functions[FunctionIndex];
197 }
198 
199 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const {
200   StringRef Comdat = Chunk->getComdat();
201   return !Comdat.empty() && Symtab->findComdat(Comdat) != this;
202 }
203 
204 void ObjFile::initializeSymbols() {
205   Symbols.reserve(WasmObj->getNumberOfSymbols());
206 
207   for (const WasmImport &Import : WasmObj->imports()) {
208     switch (Import.Kind) {
209     case WASM_EXTERNAL_FUNCTION:
210       ++NumFunctionImports;
211       break;
212     case WASM_EXTERNAL_GLOBAL:
213       ++NumGlobalImports;
214       break;
215     }
216   }
217 
218   FunctionSymbols.resize(NumFunctionImports + WasmObj->functions().size());
219   GlobalSymbols.resize(NumGlobalImports + WasmObj->globals().size());
220 
221   ArrayRef<WasmFunction> Funcs = WasmObj->functions();
222   ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes();
223   ArrayRef<WasmSignature> Types = WasmObj->types();
224   ArrayRef<WasmGlobal> Globals = WasmObj->globals();
225 
226   for (const auto &C : WasmObj->comdats())
227     Symtab->addComdat(C, this);
228 
229   FunctionSymbols.resize(NumFunctionImports + Funcs.size());
230   GlobalSymbols.resize(NumGlobalImports + Globals.size());
231 
232   for (const WasmSegment &S : WasmObj->dataSegments()) {
233     InputSegment *Seg = make<InputSegment>(S, this);
234     Seg->copyRelocations(*DataSection);
235     Segments.emplace_back(Seg);
236   }
237 
238   for (size_t I = 0; I < Funcs.size(); ++I) {
239     const WasmFunction &Func = Funcs[I];
240     const WasmSignature &Sig = Types[FuncTypes[I]];
241     InputFunction *F = make<InputFunction>(Sig, &Func, this);
242     F->copyRelocations(*CodeSection);
243     Functions.emplace_back(F);
244   }
245 
246   // Populate `FunctionSymbols` and `GlobalSymbols` based on the WasmSymbols
247   // in the object
248   for (const SymbolRef &Sym : WasmObj->symbols()) {
249     const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl());
250     Symbol *S;
251     switch (WasmSym.Type) {
252     case WasmSymbol::SymbolType::FUNCTION_EXPORT: {
253       InputFunction *Function = getFunction(WasmSym);
254       if (!isExcludedByComdat(Function)) {
255         S = createDefinedFunction(WasmSym, Function);
256         break;
257       } else {
258         Function->Live = false;
259         LLVM_FALLTHROUGH; // Exclude function, and add the symbol as undefined
260       }
261     }
262     case WasmSymbol::SymbolType::FUNCTION_IMPORT:
263       S = createUndefined(WasmSym, Symbol::Kind::UndefinedFunctionKind,
264                           getFunctionSig(WasmSym));
265       break;
266     case WasmSymbol::SymbolType::GLOBAL_EXPORT: {
267       InputSegment *Segment = getSegment(WasmSym);
268       if (!isExcludedByComdat(Segment)) {
269         S = createDefinedGlobal(WasmSym, Segment, getGlobalValue(WasmSym));
270         break;
271       } else {
272         Segment->Live = false;
273         LLVM_FALLTHROUGH; // Exclude global, and add the symbol as undefined
274       }
275     }
276     case WasmSymbol::SymbolType::GLOBAL_IMPORT:
277       S = createUndefined(WasmSym, Symbol::Kind::UndefinedGlobalKind);
278       break;
279     }
280 
281     Symbols.push_back(S);
282     if (WasmSym.isTypeFunction()) {
283       FunctionSymbols[WasmSym.ElementIndex] = S;
284       if (WasmSym.HasAltIndex)
285         FunctionSymbols[WasmSym.AltIndex] = S;
286     } else {
287       GlobalSymbols[WasmSym.ElementIndex] = S;
288       if (WasmSym.HasAltIndex)
289         GlobalSymbols[WasmSym.AltIndex] = S;
290     }
291   }
292 
293   DEBUG(for (size_t I = 0; I < FunctionSymbols.size(); ++I)
294             assert(FunctionSymbols[I] != nullptr);
295         for (size_t I = 0; I < GlobalSymbols.size(); ++I)
296             assert(GlobalSymbols[I] != nullptr););
297 
298   DEBUG(dbgs() << "Functions   : " << FunctionSymbols.size() << "\n");
299   DEBUG(dbgs() << "Globals     : " << GlobalSymbols.size() << "\n");
300 }
301 
302 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym, Symbol::Kind Kind,
303                                  const WasmSignature *Signature) {
304   return Symtab->addUndefined(Sym.Name, Kind, Sym.Flags, this, Signature);
305 }
306 
307 Symbol *ObjFile::createDefinedFunction(const WasmSymbol &Sym,
308                                        InputChunk *Chunk) {
309   if (Sym.isBindingLocal())
310     return make<DefinedFunction>(Sym.Name, Sym.Flags, this, Chunk);
311   return Symtab->addDefined(true, Sym.Name, Sym.Flags, this, Chunk);
312 }
313 
314 Symbol *ObjFile::createDefinedGlobal(const WasmSymbol &Sym, InputChunk *Chunk,
315                                      uint32_t Address) {
316   if (Sym.isBindingLocal())
317     return make<DefinedGlobal>(Sym.Name, Sym.Flags, this, Chunk, Address);
318   return Symtab->addDefined(false, Sym.Name, Sym.Flags, this, Chunk, Address);
319 }
320 
321 void ArchiveFile::parse() {
322   // Parse a MemoryBufferRef as an archive file.
323   DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
324   File = CHECK(Archive::create(MB), toString(this));
325 
326   // Read the symbol table to construct Lazy symbols.
327   int Count = 0;
328   for (const Archive::Symbol &Sym : File->symbols()) {
329     Symtab->addLazy(this, &Sym);
330     ++Count;
331   }
332   DEBUG(dbgs() << "Read " << Count << " symbols\n");
333 }
334 
335 void ArchiveFile::addMember(const Archive::Symbol *Sym) {
336   const Archive::Child &C =
337       CHECK(Sym->getMember(),
338             "could not get the member for symbol " + Sym->getName());
339 
340   // Don't try to load the same member twice (this can happen when members
341   // mutually reference each other).
342   if (!Seen.insert(C.getChildOffset()).second)
343     return;
344 
345   DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n");
346   DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
347 
348   MemoryBufferRef MB =
349       CHECK(C.getMemoryBufferRef(),
350             "could not get the buffer for the member defining symbol " +
351                 Sym->getName());
352 
353   if (identify_magic(MB.getBuffer()) != file_magic::wasm_object) {
354     error("unknown file type: " + MB.getBufferIdentifier());
355     return;
356   }
357 
358   InputFile *Obj = make<ObjFile>(MB);
359   Obj->ParentName = ParentName;
360   Symtab->addFile(Obj);
361 }
362 
363 // Returns a string in the format of "foo.o" or "foo.a(bar.o)".
364 std::string lld::toString(const wasm::InputFile *File) {
365   if (!File)
366     return "<internal>";
367 
368   if (File->ParentName.empty())
369     return File->getName();
370 
371   return (File->ParentName + "(" + File->getName() + ")").str();
372 }
373