1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "InputFiles.h"
11 
12 #include "Config.h"
13 #include "InputChunks.h"
14 #include "SymbolTable.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "llvm/Object/Binary.h"
18 #include "llvm/Object/Wasm.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "lld"
22 
23 using namespace lld;
24 using namespace lld::wasm;
25 
26 using namespace llvm;
27 using namespace llvm::object;
28 using namespace llvm::wasm;
29 
30 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) {
31   log("Loading: " + Path);
32 
33   auto MBOrErr = MemoryBuffer::getFile(Path);
34   if (auto EC = MBOrErr.getError()) {
35     error("cannot open " + Path + ": " + EC.message());
36     return None;
37   }
38   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
39   MemoryBufferRef MBRef = MB->getMemBufferRef();
40   make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership
41 
42   return MBRef;
43 }
44 
45 void ObjFile::dumpInfo() const {
46   log("info for: " + getName() + "\n" +
47       "      Total Functions : " + Twine(FunctionSymbols.size()) + "\n" +
48       "        Total Globals : " + Twine(GlobalSymbols.size()) + "\n" +
49       "     Function Imports : " + Twine(NumFunctionImports) + "\n" +
50       "       Global Imports : " + Twine(NumGlobalImports) + "\n");
51 }
52 
53 uint32_t ObjFile::relocateVirtualAddress(uint32_t GlobalIndex) const {
54   return getGlobalSymbol(GlobalIndex)->getVirtualAddress();
55 }
56 
57 uint32_t ObjFile::relocateFunctionIndex(uint32_t Original) const {
58   const Symbol *Sym = getFunctionSymbol(Original);
59   uint32_t Index = Sym->getOutputIndex();
60   DEBUG(dbgs() << "relocateFunctionIndex: " << toString(*Sym) << ": "
61                << Original << " -> " << Index << "\n");
62   return Index;
63 }
64 
65 uint32_t ObjFile::relocateTypeIndex(uint32_t Original) const {
66   assert(TypeIsUsed[Original]);
67   return TypeMap[Original];
68 }
69 
70 uint32_t ObjFile::relocateTableIndex(uint32_t Original) const {
71   const Symbol *Sym = getFunctionSymbol(Original);
72   uint32_t Index = Sym->hasTableIndex() ? Sym->getTableIndex() : 0;
73   DEBUG(dbgs() << "relocateTableIndex: " << toString(*Sym) << ": " << Original
74                << " -> " << Index << "\n");
75   return Index;
76 }
77 
78 uint32_t ObjFile::relocateGlobalIndex(uint32_t Original) const {
79   const Symbol *Sym = getGlobalSymbol(Original);
80   uint32_t Index = Sym->hasOutputIndex() ? Sym->getOutputIndex() : 0;
81   DEBUG(dbgs() << "relocateGlobalIndex: " << toString(*Sym) << ": " << Original
82                << " -> " << Index << "\n");
83   return Index;
84 }
85 
86 // Relocations contain an index into the function, global or table index
87 // space of the input file.  This function takes a relocation and returns the
88 // relocated index (i.e. translates from the input index space to the output
89 // index space).
90 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const {
91   switch (Reloc.Type) {
92   case R_WEBASSEMBLY_TYPE_INDEX_LEB:
93     return relocateTypeIndex(Reloc.Index);
94   case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
95   case R_WEBASSEMBLY_TABLE_INDEX_I32:
96   case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
97     return relocateFunctionIndex(Reloc.Index);
98   case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
99   case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
100   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
101   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
102     return relocateGlobalIndex(Reloc.Index);
103   default:
104     llvm_unreachable("unknown relocation type");
105   }
106 }
107 
108 // Translate from the relocation's index into the final linked output value.
109 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const {
110   switch (Reloc.Type) {
111   case R_WEBASSEMBLY_TABLE_INDEX_I32:
112   case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
113     return relocateTableIndex(Reloc.Index);
114   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
115   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
116   case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
117     return relocateVirtualAddress(Reloc.Index) + Reloc.Addend;
118   case R_WEBASSEMBLY_TYPE_INDEX_LEB:
119     return relocateTypeIndex(Reloc.Index);
120   case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
121     return relocateFunctionIndex(Reloc.Index);
122   case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
123     return relocateGlobalIndex(Reloc.Index);
124   default:
125     llvm_unreachable("unknown relocation type");
126   }
127 }
128 
129 void ObjFile::parse() {
130   // Parse a memory buffer as a wasm file.
131   DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
132   std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this));
133 
134   auto *Obj = dyn_cast<WasmObjectFile>(Bin.get());
135   if (!Obj)
136     fatal(toString(this) + ": not a wasm file");
137   if (!Obj->isRelocatableObject())
138     fatal(toString(this) + ": not a relocatable wasm file");
139 
140   Bin.release();
141   WasmObj.reset(Obj);
142 
143   // Find the code and data sections.  Wasm objects can have at most one code
144   // and one data section.
145   for (const SectionRef &Sec : WasmObj->sections()) {
146     const WasmSection &Section = WasmObj->getWasmSection(Sec);
147     if (Section.Type == WASM_SEC_CODE)
148       CodeSection = &Section;
149     else if (Section.Type == WASM_SEC_DATA)
150       DataSection = &Section;
151   }
152 
153   TypeMap.resize(getWasmObj()->types().size());
154   TypeIsUsed.resize(getWasmObj()->types().size(), false);
155 
156   initializeSymbols();
157 }
158 
159 // Return the InputSegment in which a given symbol is defined.
160 InputSegment *ObjFile::getSegment(const WasmSymbol &WasmSym) const {
161   uint32_t Address = WasmObj->getWasmSymbolValue(WasmSym);
162   for (InputSegment *Segment : Segments) {
163     if (Address >= Segment->startVA() && Address < Segment->endVA()) {
164       DEBUG(dbgs() << "Found symbol in segment: " << WasmSym.Name << " -> "
165                    << Segment->getName() << "\n");
166 
167       return Segment;
168     }
169   }
170   error("symbol not found in any segment: " + WasmSym.Name);
171   return nullptr;
172 }
173 
174 // Get the value stored in the wasm global represented by this symbol.
175 // This represents the virtual address of the symbol in the input file.
176 uint32_t ObjFile::getGlobalValue(const WasmSymbol &Sym) const {
177   const WasmGlobal &Global =
178       getWasmObj()->globals()[Sym.ElementIndex - NumGlobalImports];
179   assert(Global.Type.Type == llvm::wasm::WASM_TYPE_I32);
180   return Global.InitExpr.Value.Int32;
181 }
182 
183 // Get the signature for a given function symbol, either by looking
184 // it up in function sections (for defined functions), of the imports section
185 // (for imported functions).
186 const WasmSignature *ObjFile::getFunctionSig(const WasmSymbol &Sym) const {
187   DEBUG(dbgs() << "getFunctionSig: " << Sym.Name << "\n");
188   return &WasmObj->types()[Sym.FunctionType];
189 }
190 
191 InputFunction *ObjFile::getFunction(const WasmSymbol &Sym) const {
192   uint32_t FunctionIndex = Sym.ElementIndex - NumFunctionImports;
193   return Functions[FunctionIndex];
194 }
195 
196 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const {
197   StringRef Comdat = Chunk->getComdat();
198   return !Comdat.empty() && Symtab->findComdat(Comdat) != this;
199 }
200 
201 void ObjFile::initializeSymbols() {
202   Symbols.reserve(WasmObj->getNumberOfSymbols());
203 
204   for (const WasmImport &Import : WasmObj->imports()) {
205     switch (Import.Kind) {
206     case WASM_EXTERNAL_FUNCTION:
207       ++NumFunctionImports;
208       break;
209     case WASM_EXTERNAL_GLOBAL:
210       ++NumGlobalImports;
211       break;
212     }
213   }
214 
215   FunctionSymbols.resize(NumFunctionImports + WasmObj->functions().size());
216   GlobalSymbols.resize(NumGlobalImports + WasmObj->globals().size());
217 
218   ArrayRef<WasmFunction> Funcs = WasmObj->functions();
219   ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes();
220   ArrayRef<WasmSignature> Types = WasmObj->types();
221   ArrayRef<WasmGlobal> Globals = WasmObj->globals();
222 
223   for (const auto &C : WasmObj->comdats())
224     Symtab->addComdat(C, this);
225 
226   FunctionSymbols.resize(NumFunctionImports + Funcs.size());
227   GlobalSymbols.resize(NumGlobalImports + Globals.size());
228 
229   for (const WasmSegment &S : WasmObj->dataSegments()) {
230     InputSegment *Seg = make<InputSegment>(S, this);
231     Seg->copyRelocations(*DataSection);
232     Segments.emplace_back(Seg);
233   }
234 
235   for (size_t I = 0; I < Funcs.size(); ++I) {
236     const WasmFunction &Func = Funcs[I];
237     const WasmSignature &Sig = Types[FuncTypes[I]];
238     InputFunction *F = make<InputFunction>(Sig, &Func, this);
239     F->copyRelocations(*CodeSection);
240     Functions.emplace_back(F);
241   }
242 
243   // Populate `FunctionSymbols` and `GlobalSymbols` based on the WasmSymbols
244   // in the object
245   for (const SymbolRef &Sym : WasmObj->symbols()) {
246     const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl());
247     Symbol *S;
248     switch (WasmSym.Type) {
249     case WasmSymbol::SymbolType::FUNCTION_EXPORT: {
250       InputFunction *Function = getFunction(WasmSym);
251       if (!isExcludedByComdat(Function)) {
252         S = createDefined(WasmSym, Symbol::Kind::DefinedFunctionKind, Function);
253         break;
254       } else {
255         Function->Discarded = true;
256         LLVM_FALLTHROUGH; // Exclude function, and add the symbol as undefined
257       }
258     }
259     case WasmSymbol::SymbolType::FUNCTION_IMPORT:
260       S = createUndefined(WasmSym, Symbol::Kind::UndefinedFunctionKind,
261                           getFunctionSig(WasmSym));
262       break;
263     case WasmSymbol::SymbolType::GLOBAL_EXPORT: {
264       InputSegment *Segment = getSegment(WasmSym);
265       if (!isExcludedByComdat(Segment)) {
266         S = createDefined(WasmSym, Symbol::Kind::DefinedGlobalKind, Segment,
267                           getGlobalValue(WasmSym));
268         break;
269       } else {
270         Segment->Discarded = true;
271         LLVM_FALLTHROUGH; // Exclude global, and add the symbol as undefined
272       }
273     }
274     case WasmSymbol::SymbolType::GLOBAL_IMPORT:
275       S = createUndefined(WasmSym, Symbol::Kind::UndefinedGlobalKind);
276       break;
277     }
278 
279     Symbols.push_back(S);
280     if (WasmSym.isTypeFunction()) {
281       FunctionSymbols[WasmSym.ElementIndex] = S;
282       if (WasmSym.HasAltIndex)
283         FunctionSymbols[WasmSym.AltIndex] = S;
284     } else {
285       GlobalSymbols[WasmSym.ElementIndex] = S;
286       if (WasmSym.HasAltIndex)
287         GlobalSymbols[WasmSym.AltIndex] = S;
288     }
289   }
290 
291   DEBUG(for (size_t I = 0; I < FunctionSymbols.size(); ++I)
292             assert(FunctionSymbols[I] != nullptr);
293         for (size_t I = 0; I < GlobalSymbols.size(); ++I)
294             assert(GlobalSymbols[I] != nullptr););
295 
296   DEBUG(dbgs() << "Functions   : " << FunctionSymbols.size() << "\n");
297   DEBUG(dbgs() << "Globals     : " << GlobalSymbols.size() << "\n");
298 }
299 
300 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym, Symbol::Kind Kind,
301                                  const WasmSignature *Signature) {
302   return Symtab->addUndefined(Sym.Name, Kind, Sym.Flags, this, Signature);
303 }
304 
305 Symbol *ObjFile::createDefined(const WasmSymbol &Sym, Symbol::Kind Kind,
306                                InputChunk *Chunk, uint32_t Address) {
307   Symbol *S;
308   if (Sym.isBindingLocal()) {
309     S = make<Symbol>(Sym.Name, true);
310     S->update(Kind, this, Sym.Flags, Chunk, Address);
311     return S;
312   }
313   return Symtab->addDefined(Sym.Name, Kind, Sym.Flags, this, Chunk, Address);
314 }
315 
316 void ArchiveFile::parse() {
317   // Parse a MemoryBufferRef as an archive file.
318   DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
319   File = CHECK(Archive::create(MB), toString(this));
320 
321   // Read the symbol table to construct Lazy symbols.
322   int Count = 0;
323   for (const Archive::Symbol &Sym : File->symbols()) {
324     Symtab->addLazy(this, &Sym);
325     ++Count;
326   }
327   DEBUG(dbgs() << "Read " << Count << " symbols\n");
328 }
329 
330 void ArchiveFile::addMember(const Archive::Symbol *Sym) {
331   const Archive::Child &C =
332       CHECK(Sym->getMember(),
333             "could not get the member for symbol " + Sym->getName());
334 
335   // Don't try to load the same member twice (this can happen when members
336   // mutually reference each other).
337   if (!Seen.insert(C.getChildOffset()).second)
338     return;
339 
340   DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n");
341   DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
342 
343   MemoryBufferRef MB =
344       CHECK(C.getMemoryBufferRef(),
345             "could not get the buffer for the member defining symbol " +
346                 Sym->getName());
347 
348   if (identify_magic(MB.getBuffer()) != file_magic::wasm_object) {
349     error("unknown file type: " + MB.getBufferIdentifier());
350     return;
351   }
352 
353   InputFile *Obj = make<ObjFile>(MB);
354   Obj->ParentName = ParentName;
355   Symtab->addFile(Obj);
356 }
357 
358 // Returns a string in the format of "foo.o" or "foo.a(bar.o)".
359 std::string lld::toString(const wasm::InputFile *File) {
360   if (!File)
361     return "<internal>";
362 
363   if (File->ParentName.empty())
364     return File->getName();
365 
366   return (File->ParentName + "(" + File->getName() + ")").str();
367 }
368