1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "InputFiles.h"
11 
12 #include "Config.h"
13 #include "InputChunks.h"
14 #include "SymbolTable.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "llvm/Object/Binary.h"
18 #include "llvm/Object/Wasm.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "lld"
22 
23 using namespace lld;
24 using namespace lld::wasm;
25 
26 using namespace llvm;
27 using namespace llvm::object;
28 using namespace llvm::wasm;
29 
30 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) {
31   log("Loading: " + Path);
32 
33   auto MBOrErr = MemoryBuffer::getFile(Path);
34   if (auto EC = MBOrErr.getError()) {
35     error("cannot open " + Path + ": " + EC.message());
36     return None;
37   }
38   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
39   MemoryBufferRef MBRef = MB->getMemBufferRef();
40   make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership
41 
42   return MBRef;
43 }
44 
45 void ObjFile::dumpInfo() const {
46   log("info for: " + getName() + "\n" +
47       "      Total Functions : " + Twine(FunctionSymbols.size()) + "\n" +
48       "        Total Globals : " + Twine(GlobalSymbols.size()) + "\n" +
49       "     Function Imports : " + Twine(NumFunctionImports) + "\n" +
50       "       Global Imports : " + Twine(NumGlobalImports) + "\n");
51 }
52 
53 uint32_t ObjFile::relocateVirtualAddress(uint32_t GlobalIndex) const {
54   return GlobalSymbols[GlobalIndex]->getVirtualAddress();
55 }
56 
57 uint32_t ObjFile::relocateFunctionIndex(uint32_t Original) const {
58   Symbol *Sym = FunctionSymbols[Original];
59   uint32_t Index = Sym->getOutputIndex();
60   DEBUG(dbgs() << "relocateFunctionIndex: " << toString(*Sym) << ": "
61                << Original << " -> " << Index << "\n");
62   return Index;
63 }
64 
65 uint32_t ObjFile::relocateTypeIndex(uint32_t Original) const {
66   return TypeMap[Original];
67 }
68 
69 uint32_t ObjFile::relocateTableIndex(uint32_t Original) const {
70   Symbol *Sym = FunctionSymbols[Original];
71   uint32_t Index = Sym->hasTableIndex() ? Sym->getTableIndex() : 0;
72   DEBUG(dbgs() << "relocateTableIndex: " << toString(*Sym) << ": " << Original
73                << " -> " << Index << "\n");
74   return Index;
75 }
76 
77 uint32_t ObjFile::relocateGlobalIndex(uint32_t Original) const {
78   Symbol *Sym = GlobalSymbols[Original];
79   uint32_t Index = Sym->hasOutputIndex() ? Sym->getOutputIndex() : 0;
80   DEBUG(dbgs() << "relocateGlobalIndex: " << toString(*Sym) << ": " << Original
81                << " -> " << Index << "\n");
82   return Index;
83 }
84 
85 // Relocations contain an index into the function, global or table index
86 // space of the input file.  This function takes a relocation and returns the
87 // relocated index (i.e. translates from the input index space to the output
88 // index space).
89 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const {
90   switch (Reloc.Type) {
91   case R_WEBASSEMBLY_TYPE_INDEX_LEB:
92     return relocateTypeIndex(Reloc.Index);
93   case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
94   case R_WEBASSEMBLY_TABLE_INDEX_I32:
95   case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
96     return relocateFunctionIndex(Reloc.Index);
97   case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
98   case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
99   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
100   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
101     return relocateGlobalIndex(Reloc.Index);
102   default:
103     llvm_unreachable("unknown relocation type");
104   }
105 }
106 
107 // Translate from the relocation's index into the final linked output value.
108 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const {
109   switch (Reloc.Type) {
110   case R_WEBASSEMBLY_TABLE_INDEX_I32:
111   case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
112     return relocateTableIndex(Reloc.Index);
113   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
114   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
115   case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
116     return relocateVirtualAddress(Reloc.Index) + Reloc.Addend;
117   case R_WEBASSEMBLY_TYPE_INDEX_LEB:
118     return relocateTypeIndex(Reloc.Index);
119   case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
120     return relocateFunctionIndex(Reloc.Index);
121   case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
122     return relocateGlobalIndex(Reloc.Index);
123   default:
124     llvm_unreachable("unknown relocation type");
125   }
126 }
127 
128 void ObjFile::parse() {
129   // Parse a memory buffer as a wasm file.
130   DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
131   std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this));
132 
133   auto *Obj = dyn_cast<WasmObjectFile>(Bin.get());
134   if (!Obj)
135     fatal(toString(this) + ": not a wasm file");
136   if (!Obj->isRelocatableObject())
137     fatal(toString(this) + ": not a relocatable wasm file");
138 
139   Bin.release();
140   WasmObj.reset(Obj);
141 
142   // Find the code and data sections.  Wasm objects can have at most one code
143   // and one data section.
144   for (const SectionRef &Sec : WasmObj->sections()) {
145     const WasmSection &Section = WasmObj->getWasmSection(Sec);
146     if (Section.Type == WASM_SEC_CODE)
147       CodeSection = &Section;
148     else if (Section.Type == WASM_SEC_DATA)
149       DataSection = &Section;
150   }
151 
152   initializeSymbols();
153 }
154 
155 // Return the InputSegment in which a given symbol is defined.
156 InputSegment *ObjFile::getSegment(const WasmSymbol &WasmSym) const {
157   uint32_t Address = WasmObj->getWasmSymbolValue(WasmSym);
158   for (InputSegment *Segment : Segments) {
159     if (Address >= Segment->startVA() && Address < Segment->endVA()) {
160       DEBUG(dbgs() << "Found symbol in segment: " << WasmSym.Name << " -> "
161                    << Segment->getName() << "\n");
162 
163       return Segment;
164     }
165   }
166   error("symbol not found in any segment: " + WasmSym.Name);
167   return nullptr;
168 }
169 
170 // Get the value stored in the wasm global represented by this symbol.
171 // This represents the virtual address of the symbol in the input file.
172 uint32_t ObjFile::getGlobalValue(const WasmSymbol &Sym) const {
173   const WasmGlobal &Global =
174       getWasmObj()->globals()[Sym.ElementIndex - NumGlobalImports];
175   assert(Global.Type == llvm::wasm::WASM_TYPE_I32);
176   return Global.InitExpr.Value.Int32;
177 }
178 
179 // Get the signature for a given function symbol, either by looking
180 // it up in function sections (for defined functions), of the imports section
181 // (for imported functions).
182 const WasmSignature *ObjFile::getFunctionSig(const WasmSymbol &Sym) const {
183   DEBUG(dbgs() << "getFunctionSig: " << Sym.Name << "\n");
184   return &WasmObj->types()[Sym.FunctionType];
185 }
186 
187 InputFunction *ObjFile::getFunction(const WasmSymbol &Sym) const {
188   uint32_t FunctionIndex = Sym.ElementIndex - NumFunctionImports;
189   return Functions[FunctionIndex];
190 }
191 
192 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const {
193   StringRef Comdat = Chunk->getComdat();
194   return !Comdat.empty() && Symtab->findComdat(Comdat) != this;
195 }
196 
197 void ObjFile::initializeSymbols() {
198   Symbols.reserve(WasmObj->getNumberOfSymbols());
199 
200   for (const WasmImport &Import : WasmObj->imports()) {
201     switch (Import.Kind) {
202     case WASM_EXTERNAL_FUNCTION:
203       ++NumFunctionImports;
204       break;
205     case WASM_EXTERNAL_GLOBAL:
206       ++NumGlobalImports;
207       break;
208     }
209   }
210 
211   FunctionSymbols.resize(NumFunctionImports + WasmObj->functions().size());
212   GlobalSymbols.resize(NumGlobalImports + WasmObj->globals().size());
213 
214   ArrayRef<WasmFunction> Funcs = WasmObj->functions();
215   ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes();
216   ArrayRef<WasmSignature> Types = WasmObj->types();
217   ArrayRef<WasmGlobal> Globals = WasmObj->globals();
218 
219   for (const auto &C : WasmObj->comdats())
220     Symtab->addComdat(C, this);
221 
222   FunctionSymbols.resize(NumFunctionImports + Funcs.size());
223   GlobalSymbols.resize(NumGlobalImports + Globals.size());
224 
225   for (const WasmSegment &S : WasmObj->dataSegments()) {
226     InputSegment *Seg = make<InputSegment>(S, this);
227     Seg->copyRelocations(*DataSection);
228     Segments.emplace_back(Seg);
229   }
230 
231   for (size_t I = 0; I < Funcs.size(); ++I) {
232     const WasmFunction &Func = Funcs[I];
233     const WasmSignature &Sig = Types[FuncTypes[I]];
234     InputFunction *F = make<InputFunction>(Sig, &Func, this);
235     F->copyRelocations(*CodeSection);
236     Functions.emplace_back(F);
237   }
238 
239   // Populate `FunctionSymbols` and `GlobalSymbols` based on the WasmSymbols
240   // in the object
241   for (const SymbolRef &Sym : WasmObj->symbols()) {
242     const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl());
243     Symbol *S;
244     switch (WasmSym.Type) {
245     case WasmSymbol::SymbolType::FUNCTION_EXPORT: {
246       InputFunction *Function = getFunction(WasmSym);
247       if (!isExcludedByComdat(Function)) {
248         S = createDefined(WasmSym, Symbol::Kind::DefinedFunctionKind, Function);
249         break;
250       } else {
251         Function->Discarded = true;
252         LLVM_FALLTHROUGH; // Exclude function, and add the symbol as undefined
253       }
254     }
255     case WasmSymbol::SymbolType::FUNCTION_IMPORT:
256       S = createUndefined(WasmSym, Symbol::Kind::UndefinedFunctionKind,
257                           getFunctionSig(WasmSym));
258       break;
259     case WasmSymbol::SymbolType::GLOBAL_EXPORT: {
260       InputSegment *Segment = getSegment(WasmSym);
261       if (!isExcludedByComdat(Segment)) {
262         S = createDefined(WasmSym, Symbol::Kind::DefinedGlobalKind, Segment,
263                           getGlobalValue(WasmSym));
264         break;
265       } else {
266         Segment->Discarded = true;
267         LLVM_FALLTHROUGH; // Exclude global, and add the symbol as undefined
268       }
269     }
270     case WasmSymbol::SymbolType::GLOBAL_IMPORT:
271       S = createUndefined(WasmSym, Symbol::Kind::UndefinedGlobalKind);
272       break;
273     }
274 
275     Symbols.push_back(S);
276     if (WasmSym.isFunction()) {
277       FunctionSymbols[WasmSym.ElementIndex] = S;
278       if (WasmSym.HasAltIndex)
279         FunctionSymbols[WasmSym.AltIndex] = S;
280     } else {
281       GlobalSymbols[WasmSym.ElementIndex] = S;
282       if (WasmSym.HasAltIndex)
283         GlobalSymbols[WasmSym.AltIndex] = S;
284     }
285   }
286 
287   DEBUG(for (size_t I = 0; I < FunctionSymbols.size(); ++I)
288             assert(FunctionSymbols[I] != nullptr);
289         for (size_t I = 0; I < GlobalSymbols.size(); ++I)
290             assert(GlobalSymbols[I] != nullptr););
291 
292   DEBUG(dbgs() << "Functions   : " << FunctionSymbols.size() << "\n");
293   DEBUG(dbgs() << "Globals     : " << GlobalSymbols.size() << "\n");
294 }
295 
296 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym, Symbol::Kind Kind,
297                                  const WasmSignature *Signature) {
298   return Symtab->addUndefined(Sym.Name, Kind, Sym.Flags, this, Signature);
299 }
300 
301 Symbol *ObjFile::createDefined(const WasmSymbol &Sym, Symbol::Kind Kind,
302                                InputChunk *Chunk, uint32_t Address) {
303   Symbol *S;
304   if (Sym.isLocal()) {
305     S = make<Symbol>(Sym.Name, true);
306     S->update(Kind, this, Sym.Flags, Chunk, Address);
307     return S;
308   }
309   return Symtab->addDefined(Sym.Name, Kind, Sym.Flags, this, Chunk, Address);
310 }
311 
312 void ArchiveFile::parse() {
313   // Parse a MemoryBufferRef as an archive file.
314   DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
315   File = CHECK(Archive::create(MB), toString(this));
316 
317   // Read the symbol table to construct Lazy symbols.
318   int Count = 0;
319   for (const Archive::Symbol &Sym : File->symbols()) {
320     Symtab->addLazy(this, &Sym);
321     ++Count;
322   }
323   DEBUG(dbgs() << "Read " << Count << " symbols\n");
324 }
325 
326 void ArchiveFile::addMember(const Archive::Symbol *Sym) {
327   const Archive::Child &C =
328       CHECK(Sym->getMember(),
329             "could not get the member for symbol " + Sym->getName());
330 
331   // Don't try to load the same member twice (this can happen when members
332   // mutually reference each other).
333   if (!Seen.insert(C.getChildOffset()).second)
334     return;
335 
336   DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n");
337   DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
338 
339   MemoryBufferRef MB =
340       CHECK(C.getMemoryBufferRef(),
341             "could not get the buffer for the member defining symbol " +
342                 Sym->getName());
343 
344   if (identify_magic(MB.getBuffer()) != file_magic::wasm_object) {
345     error("unknown file type: " + MB.getBufferIdentifier());
346     return;
347   }
348 
349   InputFile *Obj = make<ObjFile>(MB);
350   Obj->ParentName = ParentName;
351   Symtab->addFile(Obj);
352 }
353 
354 // Returns a string in the format of "foo.o" or "foo.a(bar.o)".
355 std::string lld::toString(const wasm::InputFile *File) {
356   if (!File)
357     return "<internal>";
358 
359   if (File->ParentName.empty())
360     return File->getName();
361 
362   return (File->ParentName + "(" + File->getName() + ")").str();
363 }
364