1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "InputFiles.h"
11 
12 #include "Config.h"
13 #include "InputChunks.h"
14 #include "SymbolTable.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "llvm/Object/Binary.h"
18 #include "llvm/Object/Wasm.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "lld"
22 
23 using namespace lld;
24 using namespace lld::wasm;
25 
26 using namespace llvm;
27 using namespace llvm::object;
28 using namespace llvm::wasm;
29 
30 Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) {
31   log("Loading: " + Path);
32 
33   auto MBOrErr = MemoryBuffer::getFile(Path);
34   if (auto EC = MBOrErr.getError()) {
35     error("cannot open " + Path + ": " + EC.message());
36     return None;
37   }
38   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
39   MemoryBufferRef MBRef = MB->getMemBufferRef();
40   make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership
41 
42   return MBRef;
43 }
44 
45 void ObjFile::dumpInfo() const {
46   log("info for: " + getName() + "\n" +
47       "      Total Functions : " + Twine(FunctionSymbols.size()) + "\n" +
48       "        Total Globals : " + Twine(GlobalSymbols.size()) + "\n" +
49       "     Function Imports : " + Twine(NumFunctionImports) + "\n" +
50       "       Global Imports : " + Twine(NumGlobalImports) + "\n" +
51       "        Table Entries : " + Twine(TableSymbols.size()) + "\n");
52 }
53 
54 uint32_t ObjFile::getRelocatedAddress(uint32_t GlobalIndex) const {
55   return GlobalSymbols[GlobalIndex]->getVirtualAddress();
56 }
57 
58 uint32_t ObjFile::relocateFunctionIndex(uint32_t Original) const {
59   Symbol *Sym = FunctionSymbols[Original];
60   uint32_t Index = Sym->getOutputIndex();
61   DEBUG(dbgs() << "relocateFunctionIndex: " << toString(*Sym) << ": "
62                << Original << " -> " << Index << "\n");
63   return Index;
64 }
65 
66 uint32_t ObjFile::relocateTypeIndex(uint32_t Original) const {
67   return TypeMap[Original];
68 }
69 
70 uint32_t ObjFile::relocateTableIndex(uint32_t Original) const {
71   Symbol *Sym = TableSymbols[Original];
72   uint32_t Index = Sym->hasTableIndex() ? Sym->getTableIndex() : 0;
73   DEBUG(dbgs() << "relocateTableIndex: " << toString(*Sym) << ": " << Original
74                << " -> " << Index << "\n");
75   return Index;
76 }
77 
78 uint32_t ObjFile::relocateGlobalIndex(uint32_t Original) const {
79   Symbol *Sym = GlobalSymbols[Original];
80   uint32_t Index = Sym->hasOutputIndex() ? Sym->getOutputIndex() : 0;
81   DEBUG(dbgs() << "relocateGlobalIndex: " << toString(*Sym) << ": " << Original
82                << " -> " << Index << "\n");
83   return Index;
84 }
85 
86 // Relocations contain an index into the function, global or table index
87 // space of the input file.  This function takes a relocation and returns the
88 // relocated index (i.e. translates from the input index space to the output
89 // index space).
90 uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const {
91   switch (Reloc.Type) {
92   case R_WEBASSEMBLY_TYPE_INDEX_LEB:
93     return relocateTypeIndex(Reloc.Index);
94   case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
95     return relocateFunctionIndex(Reloc.Index);
96   case R_WEBASSEMBLY_TABLE_INDEX_I32:
97   case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
98     return relocateTableIndex(Reloc.Index);
99   case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
100   case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
101   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
102   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
103     return relocateGlobalIndex(Reloc.Index);
104   default:
105     llvm_unreachable("unknown relocation type");
106   }
107 }
108 
109 void ObjFile::parse() {
110   // Parse a memory buffer as a wasm file.
111   DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
112   std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this));
113 
114   auto *Obj = dyn_cast<WasmObjectFile>(Bin.get());
115   if (!Obj)
116     fatal(toString(this) + ": not a wasm file");
117   if (!Obj->isRelocatableObject())
118     fatal(toString(this) + ": not a relocatable wasm file");
119 
120   Bin.release();
121   WasmObj.reset(Obj);
122 
123   // Find the code and data sections.  Wasm objects can have at most one code
124   // and one data section.
125   for (const SectionRef &Sec : WasmObj->sections()) {
126     const WasmSection &Section = WasmObj->getWasmSection(Sec);
127     if (Section.Type == WASM_SEC_CODE)
128       CodeSection = &Section;
129     else if (Section.Type == WASM_SEC_DATA)
130       DataSection = &Section;
131   }
132 
133   initializeSymbols();
134 }
135 
136 // Return the InputSegment in which a given symbol is defined.
137 InputSegment *ObjFile::getSegment(const WasmSymbol &WasmSym) const {
138   uint32_t Address = WasmObj->getWasmSymbolValue(WasmSym);
139   for (InputSegment *Segment : Segments) {
140     if (Address >= Segment->startVA() && Address < Segment->endVA()) {
141       DEBUG(dbgs() << "Found symbol in segment: " << WasmSym.Name << " -> "
142                    << Segment->getName() << "\n");
143 
144       return Segment;
145     }
146   }
147   error("symbol not found in any segment: " + WasmSym.Name);
148   return nullptr;
149 }
150 
151 // Get the value stored in the wasm global represented by this symbol.
152 // This represents the virtual address of the symbol in the input file.
153 uint32_t ObjFile::getGlobalValue(const WasmSymbol &Sym) const {
154   const WasmGlobal &Global =
155       getWasmObj()->globals()[Sym.ElementIndex - NumGlobalImports];
156   assert(Global.Type == llvm::wasm::WASM_TYPE_I32);
157   return Global.InitExpr.Value.Int32;
158 }
159 
160 // Get the signature for a given function symbol, either by looking
161 // it up in function sections (for defined functions), of the imports section
162 // (for imported functions).
163 const WasmSignature *ObjFile::getFunctionSig(const WasmSymbol &Sym) const {
164   DEBUG(dbgs() << "getFunctionSig: " << Sym.Name << "\n");
165   return &WasmObj->types()[Sym.FunctionType];
166 }
167 
168 InputFunction *ObjFile::getFunction(const WasmSymbol &Sym) const {
169   uint32_t FunctionIndex = Sym.ElementIndex - NumFunctionImports;
170   return Functions[FunctionIndex];
171 }
172 
173 bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const {
174   StringRef Comdat = Chunk->getComdat();
175   return !Comdat.empty() && Symtab->findComdat(Comdat) != this;
176 }
177 
178 void ObjFile::initializeSymbols() {
179   Symbols.reserve(WasmObj->getNumberOfSymbols());
180 
181   for (const WasmImport &Import : WasmObj->imports()) {
182     switch (Import.Kind) {
183     case WASM_EXTERNAL_FUNCTION:
184       ++NumFunctionImports;
185       break;
186     case WASM_EXTERNAL_GLOBAL:
187       ++NumGlobalImports;
188       break;
189     }
190   }
191 
192   FunctionSymbols.resize(NumFunctionImports + WasmObj->functions().size());
193   GlobalSymbols.resize(NumGlobalImports + WasmObj->globals().size());
194 
195   ArrayRef<WasmFunction> Funcs = WasmObj->functions();
196   ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes();
197   ArrayRef<WasmSignature> Types = WasmObj->types();
198   ArrayRef<WasmGlobal> Globals = WasmObj->globals();
199 
200   for (const auto &C : WasmObj->comdats())
201     Symtab->addComdat(C, this);
202 
203   FunctionSymbols.resize(NumFunctionImports + Funcs.size());
204   GlobalSymbols.resize(NumGlobalImports + Globals.size());
205 
206   for (const WasmSegment &S : WasmObj->dataSegments()) {
207     InputSegment *Seg = make<InputSegment>(S, this);
208     Seg->copyRelocations(*DataSection);
209     Segments.emplace_back(Seg);
210   }
211 
212   for (size_t I = 0; I < Funcs.size(); ++I) {
213     const WasmFunction &Func = Funcs[I];
214     const WasmSignature &Sig = Types[FuncTypes[I]];
215     InputFunction *F = make<InputFunction>(Sig, &Func, this);
216     F->copyRelocations(*CodeSection);
217     Functions.emplace_back(F);
218   }
219 
220   // Populate `FunctionSymbols` and `GlobalSymbols` based on the WasmSymbols
221   // in the object
222   for (const SymbolRef &Sym : WasmObj->symbols()) {
223     const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl());
224     Symbol *S;
225     switch (WasmSym.Type) {
226     case WasmSymbol::SymbolType::FUNCTION_EXPORT: {
227       InputFunction *Function = getFunction(WasmSym);
228       if (!isExcludedByComdat(Function)) {
229         S = createDefined(WasmSym, Symbol::Kind::DefinedFunctionKind, nullptr,
230                           Function);
231         break;
232       } else {
233         Function->Discarded = true;
234         LLVM_FALLTHROUGH; // Exclude function, and add the symbol as undefined
235       }
236     }
237     case WasmSymbol::SymbolType::FUNCTION_IMPORT:
238       S = createUndefined(WasmSym, Symbol::Kind::UndefinedFunctionKind,
239                           getFunctionSig(WasmSym));
240       break;
241     case WasmSymbol::SymbolType::GLOBAL_EXPORT: {
242       InputSegment *Segment = getSegment(WasmSym);
243       if (!isExcludedByComdat(Segment)) {
244         S = createDefined(WasmSym, Symbol::Kind::DefinedGlobalKind,
245                           Segment, nullptr, getGlobalValue(WasmSym));
246         break;
247       } else {
248         Segment->Discarded = true;
249         LLVM_FALLTHROUGH; // Exclude global, and add the symbol as undefined
250       }
251     }
252     case WasmSymbol::SymbolType::GLOBAL_IMPORT:
253       S = createUndefined(WasmSym, Symbol::Kind::UndefinedGlobalKind);
254       break;
255     }
256 
257     Symbols.push_back(S);
258     if (WasmSym.isFunction()) {
259       FunctionSymbols[WasmSym.ElementIndex] = S;
260       if (WasmSym.HasAltIndex)
261         FunctionSymbols[WasmSym.AltIndex] = S;
262     } else {
263       GlobalSymbols[WasmSym.ElementIndex] = S;
264       if (WasmSym.HasAltIndex)
265         GlobalSymbols[WasmSym.AltIndex] = S;
266     }
267   }
268 
269   DEBUG(for (size_t I = 0; I < FunctionSymbols.size(); ++I)
270             assert(FunctionSymbols[I] != nullptr);
271         for (size_t I = 0; I < GlobalSymbols.size(); ++I)
272             assert(GlobalSymbols[I] != nullptr););
273 
274   // Populate `TableSymbols` with all symbols that are called indirectly
275   uint32_t SegmentCount = WasmObj->elements().size();
276   if (SegmentCount) {
277     if (SegmentCount > 1)
278       fatal(getName() + ": contains more than one element segment");
279     const WasmElemSegment &Segment = WasmObj->elements()[0];
280     if (Segment.Offset.Opcode != WASM_OPCODE_I32_CONST)
281       fatal(getName() + ": unsupported element segment");
282     if (Segment.TableIndex != 0)
283       fatal(getName() + ": unsupported table index in elem segment");
284     uint32_t Offset = Segment.Offset.Value.Int32;
285     TableSymbols.resize(Offset);
286     TableSymbols.reserve(Offset + Segment.Functions.size());
287     for (uint64_t FunctionIndex : Segment.Functions)
288       TableSymbols.push_back(FunctionSymbols[FunctionIndex]);
289   }
290 
291   DEBUG(dbgs() << "TableSymbols: " << TableSymbols.size() << "\n");
292   DEBUG(dbgs() << "Functions   : " << FunctionSymbols.size() << "\n");
293   DEBUG(dbgs() << "Globals     : " << GlobalSymbols.size() << "\n");
294 }
295 
296 Symbol *ObjFile::createUndefined(const WasmSymbol &Sym, Symbol::Kind Kind,
297                                  const WasmSignature *Signature) {
298   return Symtab->addUndefined(Sym.Name, Kind, Sym.Flags, this, Signature);
299 }
300 
301 Symbol *ObjFile::createDefined(const WasmSymbol &Sym, Symbol::Kind Kind,
302                                const InputSegment *Segment,
303                                InputFunction *Function, uint32_t Address) {
304   Symbol *S;
305   if (Sym.isLocal()) {
306     S = make<Symbol>(Sym.Name, true);
307     S->update(Kind, this, Sym.Flags, Segment, Function, Address);
308     return S;
309   }
310   return Symtab->addDefined(Sym.Name, Kind, Sym.Flags, this, Segment, Function,
311                             Address);
312 }
313 
314 void ArchiveFile::parse() {
315   // Parse a MemoryBufferRef as an archive file.
316   DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
317   File = CHECK(Archive::create(MB), toString(this));
318 
319   // Read the symbol table to construct Lazy symbols.
320   int Count = 0;
321   for (const Archive::Symbol &Sym : File->symbols()) {
322     Symtab->addLazy(this, &Sym);
323     ++Count;
324   }
325   DEBUG(dbgs() << "Read " << Count << " symbols\n");
326 }
327 
328 void ArchiveFile::addMember(const Archive::Symbol *Sym) {
329   const Archive::Child &C =
330       CHECK(Sym->getMember(),
331             "could not get the member for symbol " + Sym->getName());
332 
333   // Don't try to load the same member twice (this can happen when members
334   // mutually reference each other).
335   if (!Seen.insert(C.getChildOffset()).second)
336     return;
337 
338   DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n");
339   DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
340 
341   MemoryBufferRef MB =
342       CHECK(C.getMemoryBufferRef(),
343             "could not get the buffer for the member defining symbol " +
344                 Sym->getName());
345 
346   if (identify_magic(MB.getBuffer()) != file_magic::wasm_object) {
347     error("unknown file type: " + MB.getBufferIdentifier());
348     return;
349   }
350 
351   InputFile *Obj = make<ObjFile>(MB);
352   Obj->ParentName = ParentName;
353   Symtab->addFile(Obj);
354 }
355 
356 // Returns a string in the format of "foo.o" or "foo.a(bar.o)".
357 std::string lld::toString(const wasm::InputFile *File) {
358   if (!File)
359     return "<internal>";
360 
361   if (File->ParentName.empty())
362     return File->getName();
363 
364   return (File->ParentName + "(" + File->getName() + ")").str();
365 }
366