1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolTable.h"
10 #include "Config.h"
11 #include "InputChunks.h"
12 #include "InputElement.h"
13 #include "WriterUtils.h"
14 #include "lld/Common/CommonLinkerContext.h"
15 #include "llvm/ADT/SetVector.h"
16 
17 #define DEBUG_TYPE "lld"
18 
19 using namespace llvm;
20 using namespace llvm::wasm;
21 using namespace llvm::object;
22 
23 namespace lld {
24 namespace wasm {
25 SymbolTable *symtab;
26 
addFile(InputFile * file)27 void SymbolTable::addFile(InputFile *file) {
28   log("Processing: " + toString(file));
29 
30   // .a file
31   if (auto *f = dyn_cast<ArchiveFile>(file)) {
32     f->parse();
33     return;
34   }
35 
36   // .so file
37   if (auto *f = dyn_cast<SharedFile>(file)) {
38     sharedFiles.push_back(f);
39     return;
40   }
41 
42   if (config->trace)
43     message(toString(file));
44 
45   // LLVM bitcode file
46   if (auto *f = dyn_cast<BitcodeFile>(file)) {
47     f->parse();
48     bitcodeFiles.push_back(f);
49     return;
50   }
51 
52   // Regular object file
53   auto *f = cast<ObjFile>(file);
54   f->parse(false);
55   objectFiles.push_back(f);
56 }
57 
58 // This function is where all the optimizations of link-time
59 // optimization happens. When LTO is in use, some input files are
60 // not in native object file format but in the LLVM bitcode format.
61 // This function compiles bitcode files into a few big native files
62 // using LLVM functions and replaces bitcode symbols with the results.
63 // Because all bitcode files that the program consists of are passed
64 // to the compiler at once, it can do whole-program optimization.
compileBitcodeFiles()65 void SymbolTable::compileBitcodeFiles() {
66   // Prevent further LTO objects being included
67   BitcodeFile::doneLTO = true;
68 
69   if (bitcodeFiles.empty())
70     return;
71 
72   // Compile bitcode files and replace bitcode symbols.
73   lto.reset(new BitcodeCompiler);
74   for (BitcodeFile *f : bitcodeFiles)
75     lto->add(*f);
76 
77   for (StringRef filename : lto->compile()) {
78     auto *obj = make<ObjFile>(MemoryBufferRef(filename, "lto.tmp"), "");
79     obj->parse(true);
80     objectFiles.push_back(obj);
81   }
82 }
83 
find(StringRef name)84 Symbol *SymbolTable::find(StringRef name) {
85   auto it = symMap.find(CachedHashStringRef(name));
86   if (it == symMap.end() || it->second == -1)
87     return nullptr;
88   return symVector[it->second];
89 }
90 
replace(StringRef name,Symbol * sym)91 void SymbolTable::replace(StringRef name, Symbol* sym) {
92   auto it = symMap.find(CachedHashStringRef(name));
93   symVector[it->second] = sym;
94 }
95 
insertName(StringRef name)96 std::pair<Symbol *, bool> SymbolTable::insertName(StringRef name) {
97   bool trace = false;
98   auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()});
99   int &symIndex = p.first->second;
100   bool isNew = p.second;
101   if (symIndex == -1) {
102     symIndex = symVector.size();
103     trace = true;
104     isNew = true;
105   }
106 
107   if (!isNew)
108     return {symVector[symIndex], false};
109 
110   Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
111   sym->isUsedInRegularObj = false;
112   sym->canInline = true;
113   sym->traced = trace;
114   sym->forceExport = false;
115   symVector.emplace_back(sym);
116   return {sym, true};
117 }
118 
insert(StringRef name,const InputFile * file)119 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name,
120                                               const InputFile *file) {
121   Symbol *s;
122   bool wasInserted;
123   std::tie(s, wasInserted) = insertName(name);
124 
125   if (!file || file->kind() == InputFile::ObjectKind)
126     s->isUsedInRegularObj = true;
127 
128   return {s, wasInserted};
129 }
130 
reportTypeError(const Symbol * existing,const InputFile * file,llvm::wasm::WasmSymbolType type)131 static void reportTypeError(const Symbol *existing, const InputFile *file,
132                             llvm::wasm::WasmSymbolType type) {
133   error("symbol type mismatch: " + toString(*existing) + "\n>>> defined as " +
134         toString(existing->getWasmType()) + " in " +
135         toString(existing->getFile()) + "\n>>> defined as " + toString(type) +
136         " in " + toString(file));
137 }
138 
139 // Check the type of new symbol matches that of the symbol is replacing.
140 // Returns true if the function types match, false is there is a signature
141 // mismatch.
signatureMatches(FunctionSymbol * existing,const WasmSignature * newSig)142 static bool signatureMatches(FunctionSymbol *existing,
143                              const WasmSignature *newSig) {
144   const WasmSignature *oldSig = existing->signature;
145 
146   // If either function is missing a signature (this happens for bitcode
147   // symbols) then assume they match.  Any mismatch will be reported later
148   // when the LTO objects are added.
149   if (!newSig || !oldSig)
150     return true;
151 
152   return *newSig == *oldSig;
153 }
154 
checkGlobalType(const Symbol * existing,const InputFile * file,const WasmGlobalType * newType)155 static void checkGlobalType(const Symbol *existing, const InputFile *file,
156                             const WasmGlobalType *newType) {
157   if (!isa<GlobalSymbol>(existing)) {
158     reportTypeError(existing, file, WASM_SYMBOL_TYPE_GLOBAL);
159     return;
160   }
161 
162   const WasmGlobalType *oldType = cast<GlobalSymbol>(existing)->getGlobalType();
163   if (*newType != *oldType) {
164     error("Global type mismatch: " + existing->getName() + "\n>>> defined as " +
165           toString(*oldType) + " in " + toString(existing->getFile()) +
166           "\n>>> defined as " + toString(*newType) + " in " + toString(file));
167   }
168 }
169 
checkTagType(const Symbol * existing,const InputFile * file,const WasmSignature * newSig)170 static void checkTagType(const Symbol *existing, const InputFile *file,
171                          const WasmSignature *newSig) {
172   const auto *existingTag = dyn_cast<TagSymbol>(existing);
173   if (!isa<TagSymbol>(existing)) {
174     reportTypeError(existing, file, WASM_SYMBOL_TYPE_TAG);
175     return;
176   }
177 
178   const WasmSignature *oldSig = existingTag->signature;
179   if (*newSig != *oldSig)
180     warn("Tag signature mismatch: " + existing->getName() +
181          "\n>>> defined as " + toString(*oldSig) + " in " +
182          toString(existing->getFile()) + "\n>>> defined as " +
183          toString(*newSig) + " in " + toString(file));
184 }
185 
checkTableType(const Symbol * existing,const InputFile * file,const WasmTableType * newType)186 static void checkTableType(const Symbol *existing, const InputFile *file,
187                            const WasmTableType *newType) {
188   if (!isa<TableSymbol>(existing)) {
189     reportTypeError(existing, file, WASM_SYMBOL_TYPE_TABLE);
190     return;
191   }
192 
193   const WasmTableType *oldType = cast<TableSymbol>(existing)->getTableType();
194   if (newType->ElemType != oldType->ElemType) {
195     error("Table type mismatch: " + existing->getName() + "\n>>> defined as " +
196           toString(*oldType) + " in " + toString(existing->getFile()) +
197           "\n>>> defined as " + toString(*newType) + " in " + toString(file));
198   }
199   // FIXME: No assertions currently on the limits.
200 }
201 
checkDataType(const Symbol * existing,const InputFile * file)202 static void checkDataType(const Symbol *existing, const InputFile *file) {
203   if (!isa<DataSymbol>(existing))
204     reportTypeError(existing, file, WASM_SYMBOL_TYPE_DATA);
205 }
206 
addSyntheticFunction(StringRef name,uint32_t flags,InputFunction * function)207 DefinedFunction *SymbolTable::addSyntheticFunction(StringRef name,
208                                                    uint32_t flags,
209                                                    InputFunction *function) {
210   LLVM_DEBUG(dbgs() << "addSyntheticFunction: " << name << "\n");
211   assert(!find(name));
212   syntheticFunctions.emplace_back(function);
213   return replaceSymbol<DefinedFunction>(insertName(name).first, name,
214                                         flags, nullptr, function);
215 }
216 
217 // Adds an optional, linker generated, data symbol.  The symbol will only be
218 // added if there is an undefine reference to it, or if it is explicitly
219 // exported via the --export flag.  Otherwise we don't add the symbol and return
220 // nullptr.
addOptionalDataSymbol(StringRef name,uint64_t value)221 DefinedData *SymbolTable::addOptionalDataSymbol(StringRef name,
222                                                 uint64_t value) {
223   Symbol *s = find(name);
224   if (!s && (config->exportAll || config->exportedSymbols.count(name) != 0))
225     s = insertName(name).first;
226   else if (!s || s->isDefined())
227     return nullptr;
228   LLVM_DEBUG(dbgs() << "addOptionalDataSymbol: " << name << "\n");
229   auto *rtn = replaceSymbol<DefinedData>(s, name, WASM_SYMBOL_VISIBILITY_HIDDEN);
230   rtn->setVA(value);
231   rtn->referenced = true;
232   return rtn;
233 }
234 
addSyntheticDataSymbol(StringRef name,uint32_t flags)235 DefinedData *SymbolTable::addSyntheticDataSymbol(StringRef name,
236                                                  uint32_t flags) {
237   LLVM_DEBUG(dbgs() << "addSyntheticDataSymbol: " << name << "\n");
238   assert(!find(name));
239   return replaceSymbol<DefinedData>(insertName(name).first, name, flags);
240 }
241 
addSyntheticGlobal(StringRef name,uint32_t flags,InputGlobal * global)242 DefinedGlobal *SymbolTable::addSyntheticGlobal(StringRef name, uint32_t flags,
243                                                InputGlobal *global) {
244   LLVM_DEBUG(dbgs() << "addSyntheticGlobal: " << name << " -> " << global
245                     << "\n");
246   assert(!find(name));
247   syntheticGlobals.emplace_back(global);
248   return replaceSymbol<DefinedGlobal>(insertName(name).first, name, flags,
249                                       nullptr, global);
250 }
251 
addOptionalGlobalSymbol(StringRef name,InputGlobal * global)252 DefinedGlobal *SymbolTable::addOptionalGlobalSymbol(StringRef name,
253                                                     InputGlobal *global) {
254   Symbol *s = find(name);
255   if (!s || s->isDefined())
256     return nullptr;
257   LLVM_DEBUG(dbgs() << "addOptionalGlobalSymbol: " << name << " -> " << global
258                     << "\n");
259   syntheticGlobals.emplace_back(global);
260   return replaceSymbol<DefinedGlobal>(s, name, WASM_SYMBOL_VISIBILITY_HIDDEN,
261                                       nullptr, global);
262 }
263 
addSyntheticTable(StringRef name,uint32_t flags,InputTable * table)264 DefinedTable *SymbolTable::addSyntheticTable(StringRef name, uint32_t flags,
265                                              InputTable *table) {
266   LLVM_DEBUG(dbgs() << "addSyntheticTable: " << name << " -> " << table
267                     << "\n");
268   Symbol *s = find(name);
269   assert(!s || s->isUndefined());
270   if (!s)
271     s = insertName(name).first;
272   syntheticTables.emplace_back(table);
273   return replaceSymbol<DefinedTable>(s, name, flags, nullptr, table);
274 }
275 
shouldReplace(const Symbol * existing,InputFile * newFile,uint32_t newFlags)276 static bool shouldReplace(const Symbol *existing, InputFile *newFile,
277                           uint32_t newFlags) {
278   // If existing symbol is undefined, replace it.
279   if (!existing->isDefined()) {
280     LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: "
281                       << existing->getName() << "\n");
282     return true;
283   }
284 
285   // Now we have two defined symbols. If the new one is weak, we can ignore it.
286   if ((newFlags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) {
287     LLVM_DEBUG(dbgs() << "existing symbol takes precedence\n");
288     return false;
289   }
290 
291   // If the existing symbol is weak, we should replace it.
292   if (existing->isWeak()) {
293     LLVM_DEBUG(dbgs() << "replacing existing weak symbol\n");
294     return true;
295   }
296 
297   // Neither symbol is week. They conflict.
298   error("duplicate symbol: " + toString(*existing) + "\n>>> defined in " +
299         toString(existing->getFile()) + "\n>>> defined in " +
300         toString(newFile));
301   return true;
302 }
303 
addDefinedFunction(StringRef name,uint32_t flags,InputFile * file,InputFunction * function)304 Symbol *SymbolTable::addDefinedFunction(StringRef name, uint32_t flags,
305                                         InputFile *file,
306                                         InputFunction *function) {
307   LLVM_DEBUG(dbgs() << "addDefinedFunction: " << name << " ["
308                     << (function ? toString(function->signature) : "none")
309                     << "]\n");
310   Symbol *s;
311   bool wasInserted;
312   std::tie(s, wasInserted) = insert(name, file);
313 
314   auto replaceSym = [&](Symbol *sym) {
315     // If the new defined function doesn't have signature (i.e. bitcode
316     // functions) but the old symbol does, then preserve the old signature
317     const WasmSignature *oldSig = s->getSignature();
318     auto* newSym = replaceSymbol<DefinedFunction>(sym, name, flags, file, function);
319     if (!newSym->signature)
320       newSym->signature = oldSig;
321   };
322 
323   if (wasInserted || s->isLazy()) {
324     replaceSym(s);
325     return s;
326   }
327 
328   auto existingFunction = dyn_cast<FunctionSymbol>(s);
329   if (!existingFunction) {
330     reportTypeError(s, file, WASM_SYMBOL_TYPE_FUNCTION);
331     return s;
332   }
333 
334   bool checkSig = true;
335   if (auto ud = dyn_cast<UndefinedFunction>(existingFunction))
336     checkSig = ud->isCalledDirectly;
337 
338   if (checkSig && function && !signatureMatches(existingFunction, &function->signature)) {
339     Symbol* variant;
340     if (getFunctionVariant(s, &function->signature, file, &variant))
341       // New variant, always replace
342       replaceSym(variant);
343     else if (shouldReplace(s, file, flags))
344       // Variant already exists, replace it after checking shouldReplace
345       replaceSym(variant);
346 
347     // This variant we found take the place in the symbol table as the primary
348     // variant.
349     replace(name, variant);
350     return variant;
351   }
352 
353   // Existing function with matching signature.
354   if (shouldReplace(s, file, flags))
355     replaceSym(s);
356 
357   return s;
358 }
359 
addDefinedData(StringRef name,uint32_t flags,InputFile * file,InputChunk * segment,uint64_t address,uint64_t size)360 Symbol *SymbolTable::addDefinedData(StringRef name, uint32_t flags,
361                                     InputFile *file, InputChunk *segment,
362                                     uint64_t address, uint64_t size) {
363   LLVM_DEBUG(dbgs() << "addDefinedData:" << name << " addr:" << address
364                     << "\n");
365   Symbol *s;
366   bool wasInserted;
367   std::tie(s, wasInserted) = insert(name, file);
368 
369   auto replaceSym = [&]() {
370     replaceSymbol<DefinedData>(s, name, flags, file, segment, address, size);
371   };
372 
373   if (wasInserted || s->isLazy()) {
374     replaceSym();
375     return s;
376   }
377 
378   checkDataType(s, file);
379 
380   if (shouldReplace(s, file, flags))
381     replaceSym();
382   return s;
383 }
384 
addDefinedGlobal(StringRef name,uint32_t flags,InputFile * file,InputGlobal * global)385 Symbol *SymbolTable::addDefinedGlobal(StringRef name, uint32_t flags,
386                                       InputFile *file, InputGlobal *global) {
387   LLVM_DEBUG(dbgs() << "addDefinedGlobal:" << name << "\n");
388 
389   Symbol *s;
390   bool wasInserted;
391   std::tie(s, wasInserted) = insert(name, file);
392 
393   auto replaceSym = [&]() {
394     replaceSymbol<DefinedGlobal>(s, name, flags, file, global);
395   };
396 
397   if (wasInserted || s->isLazy()) {
398     replaceSym();
399     return s;
400   }
401 
402   checkGlobalType(s, file, &global->getType());
403 
404   if (shouldReplace(s, file, flags))
405     replaceSym();
406   return s;
407 }
408 
addDefinedTag(StringRef name,uint32_t flags,InputFile * file,InputTag * tag)409 Symbol *SymbolTable::addDefinedTag(StringRef name, uint32_t flags,
410                                    InputFile *file, InputTag *tag) {
411   LLVM_DEBUG(dbgs() << "addDefinedTag:" << name << "\n");
412 
413   Symbol *s;
414   bool wasInserted;
415   std::tie(s, wasInserted) = insert(name, file);
416 
417   auto replaceSym = [&]() {
418     replaceSymbol<DefinedTag>(s, name, flags, file, tag);
419   };
420 
421   if (wasInserted || s->isLazy()) {
422     replaceSym();
423     return s;
424   }
425 
426   checkTagType(s, file, &tag->signature);
427 
428   if (shouldReplace(s, file, flags))
429     replaceSym();
430   return s;
431 }
432 
addDefinedTable(StringRef name,uint32_t flags,InputFile * file,InputTable * table)433 Symbol *SymbolTable::addDefinedTable(StringRef name, uint32_t flags,
434                                      InputFile *file, InputTable *table) {
435   LLVM_DEBUG(dbgs() << "addDefinedTable:" << name << "\n");
436 
437   Symbol *s;
438   bool wasInserted;
439   std::tie(s, wasInserted) = insert(name, file);
440 
441   auto replaceSym = [&]() {
442     replaceSymbol<DefinedTable>(s, name, flags, file, table);
443   };
444 
445   if (wasInserted || s->isLazy()) {
446     replaceSym();
447     return s;
448   }
449 
450   checkTableType(s, file, &table->getType());
451 
452   if (shouldReplace(s, file, flags))
453     replaceSym();
454   return s;
455 }
456 
457 // This function get called when an undefined symbol is added, and there is
458 // already an existing one in the symbols table.  In this case we check that
459 // custom 'import-module' and 'import-field' symbol attributes agree.
460 // With LTO these attributes are not available when the bitcode is read and only
461 // become available when the LTO object is read.  In this case we silently
462 // replace the empty attributes with the valid ones.
463 template <typename T>
setImportAttributes(T * existing,Optional<StringRef> importName,Optional<StringRef> importModule,uint32_t flags,InputFile * file)464 static void setImportAttributes(T *existing, Optional<StringRef> importName,
465                                 Optional<StringRef> importModule,
466                                 uint32_t flags, InputFile *file) {
467   if (importName) {
468     if (!existing->importName)
469       existing->importName = importName;
470     if (existing->importName != importName)
471       error("import name mismatch for symbol: " + toString(*existing) +
472             "\n>>> defined as " + *existing->importName + " in " +
473             toString(existing->getFile()) + "\n>>> defined as " + *importName +
474             " in " + toString(file));
475   }
476 
477   if (importModule) {
478     if (!existing->importModule)
479       existing->importModule = importModule;
480     if (existing->importModule != importModule)
481       error("import module mismatch for symbol: " + toString(*existing) +
482             "\n>>> defined as " + *existing->importModule + " in " +
483             toString(existing->getFile()) + "\n>>> defined as " +
484             *importModule + " in " + toString(file));
485   }
486 
487   // Update symbol binding, if the existing symbol is weak
488   uint32_t binding = flags & WASM_SYMBOL_BINDING_MASK;
489   if (existing->isWeak() && binding != WASM_SYMBOL_BINDING_WEAK) {
490     existing->flags = (existing->flags & ~WASM_SYMBOL_BINDING_MASK) | binding;
491   }
492 }
493 
addUndefinedFunction(StringRef name,Optional<StringRef> importName,Optional<StringRef> importModule,uint32_t flags,InputFile * file,const WasmSignature * sig,bool isCalledDirectly)494 Symbol *SymbolTable::addUndefinedFunction(StringRef name,
495                                           Optional<StringRef> importName,
496                                           Optional<StringRef> importModule,
497                                           uint32_t flags, InputFile *file,
498                                           const WasmSignature *sig,
499                                           bool isCalledDirectly) {
500   LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << name << " ["
501                     << (sig ? toString(*sig) : "none")
502                     << "] IsCalledDirectly:" << isCalledDirectly << " flags=0x"
503                     << utohexstr(flags) << "\n");
504   assert(flags & WASM_SYMBOL_UNDEFINED);
505 
506   Symbol *s;
507   bool wasInserted;
508   std::tie(s, wasInserted) = insert(name, file);
509   if (s->traced)
510     printTraceSymbolUndefined(name, file);
511 
512   auto replaceSym = [&]() {
513     replaceSymbol<UndefinedFunction>(s, name, importName, importModule, flags,
514                                      file, sig, isCalledDirectly);
515   };
516 
517   if (wasInserted) {
518     replaceSym();
519   } else if (auto *lazy = dyn_cast<LazySymbol>(s)) {
520     if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) {
521       lazy->setWeak();
522       lazy->signature = sig;
523     } else {
524       lazy->fetch();
525     }
526   } else {
527     auto existingFunction = dyn_cast<FunctionSymbol>(s);
528     if (!existingFunction) {
529       reportTypeError(s, file, WASM_SYMBOL_TYPE_FUNCTION);
530       return s;
531     }
532     if (!existingFunction->signature && sig)
533       existingFunction->signature = sig;
534     auto *existingUndefined = dyn_cast<UndefinedFunction>(existingFunction);
535     if (isCalledDirectly && !signatureMatches(existingFunction, sig)) {
536       // If the existing undefined functions is not called directly then let
537       // this one take precedence.  Otherwise the existing function is either
538       // directly called or defined, in which case we need a function variant.
539       if (existingUndefined && !existingUndefined->isCalledDirectly)
540         replaceSym();
541       else if (getFunctionVariant(s, sig, file, &s))
542         replaceSym();
543     }
544     if (existingUndefined) {
545       setImportAttributes(existingUndefined, importName, importModule, flags,
546                           file);
547       if (isCalledDirectly)
548         existingUndefined->isCalledDirectly = true;
549     }
550   }
551 
552   return s;
553 }
554 
addUndefinedData(StringRef name,uint32_t flags,InputFile * file)555 Symbol *SymbolTable::addUndefinedData(StringRef name, uint32_t flags,
556                                       InputFile *file) {
557   LLVM_DEBUG(dbgs() << "addUndefinedData: " << name << "\n");
558   assert(flags & WASM_SYMBOL_UNDEFINED);
559 
560   Symbol *s;
561   bool wasInserted;
562   std::tie(s, wasInserted) = insert(name, file);
563   if (s->traced)
564     printTraceSymbolUndefined(name, file);
565 
566   if (wasInserted) {
567     replaceSymbol<UndefinedData>(s, name, flags, file);
568   } else if (auto *lazy = dyn_cast<LazySymbol>(s)) {
569     if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK)
570       lazy->setWeak();
571     else
572       lazy->fetch();
573   } else if (s->isDefined()) {
574     checkDataType(s, file);
575   }
576   return s;
577 }
578 
addUndefinedGlobal(StringRef name,Optional<StringRef> importName,Optional<StringRef> importModule,uint32_t flags,InputFile * file,const WasmGlobalType * type)579 Symbol *SymbolTable::addUndefinedGlobal(StringRef name,
580                                         Optional<StringRef> importName,
581                                         Optional<StringRef> importModule,
582                                         uint32_t flags, InputFile *file,
583                                         const WasmGlobalType *type) {
584   LLVM_DEBUG(dbgs() << "addUndefinedGlobal: " << name << "\n");
585   assert(flags & WASM_SYMBOL_UNDEFINED);
586 
587   Symbol *s;
588   bool wasInserted;
589   std::tie(s, wasInserted) = insert(name, file);
590   if (s->traced)
591     printTraceSymbolUndefined(name, file);
592 
593   if (wasInserted)
594     replaceSymbol<UndefinedGlobal>(s, name, importName, importModule, flags,
595                                    file, type);
596   else if (auto *lazy = dyn_cast<LazySymbol>(s))
597     lazy->fetch();
598   else if (s->isDefined())
599     checkGlobalType(s, file, type);
600   return s;
601 }
602 
addUndefinedTable(StringRef name,Optional<StringRef> importName,Optional<StringRef> importModule,uint32_t flags,InputFile * file,const WasmTableType * type)603 Symbol *SymbolTable::addUndefinedTable(StringRef name,
604                                        Optional<StringRef> importName,
605                                        Optional<StringRef> importModule,
606                                        uint32_t flags, InputFile *file,
607                                        const WasmTableType *type) {
608   LLVM_DEBUG(dbgs() << "addUndefinedTable: " << name << "\n");
609   assert(flags & WASM_SYMBOL_UNDEFINED);
610 
611   Symbol *s;
612   bool wasInserted;
613   std::tie(s, wasInserted) = insert(name, file);
614   if (s->traced)
615     printTraceSymbolUndefined(name, file);
616 
617   if (wasInserted)
618     replaceSymbol<UndefinedTable>(s, name, importName, importModule, flags,
619                                   file, type);
620   else if (auto *lazy = dyn_cast<LazySymbol>(s))
621     lazy->fetch();
622   else if (s->isDefined())
623     checkTableType(s, file, type);
624   return s;
625 }
626 
addUndefinedTag(StringRef name,Optional<StringRef> importName,Optional<StringRef> importModule,uint32_t flags,InputFile * file,const WasmSignature * sig)627 Symbol *SymbolTable::addUndefinedTag(StringRef name,
628                                      Optional<StringRef> importName,
629                                      Optional<StringRef> importModule,
630                                      uint32_t flags, InputFile *file,
631                                      const WasmSignature *sig) {
632   LLVM_DEBUG(dbgs() << "addUndefinedTag: " << name << "\n");
633   assert(flags & WASM_SYMBOL_UNDEFINED);
634 
635   Symbol *s;
636   bool wasInserted;
637   std::tie(s, wasInserted) = insert(name, file);
638   if (s->traced)
639     printTraceSymbolUndefined(name, file);
640 
641   if (wasInserted)
642     replaceSymbol<UndefinedTag>(s, name, importName, importModule, flags, file,
643                                 sig);
644   else if (auto *lazy = dyn_cast<LazySymbol>(s))
645     lazy->fetch();
646   else if (s->isDefined())
647     checkTagType(s, file, sig);
648   return s;
649 }
650 
createUndefinedIndirectFunctionTable(StringRef name)651 TableSymbol *SymbolTable::createUndefinedIndirectFunctionTable(StringRef name) {
652   WasmLimits limits{0, 0, 0}; // Set by the writer.
653   WasmTableType *type = make<WasmTableType>();
654   type->ElemType = uint8_t(ValType::FUNCREF);
655   type->Limits = limits;
656   StringRef module(defaultModule);
657   uint32_t flags = config->exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN;
658   flags |= WASM_SYMBOL_UNDEFINED;
659   Symbol *sym = addUndefinedTable(name, name, module, flags, nullptr, type);
660   sym->markLive();
661   sym->forceExport = config->exportTable;
662   return cast<TableSymbol>(sym);
663 }
664 
createDefinedIndirectFunctionTable(StringRef name)665 TableSymbol *SymbolTable::createDefinedIndirectFunctionTable(StringRef name) {
666   const uint32_t invalidIndex = -1;
667   WasmLimits limits{0, 0, 0}; // Set by the writer.
668   WasmTableType type{uint8_t(ValType::FUNCREF), limits};
669   WasmTable desc{invalidIndex, type, name};
670   InputTable *table = make<InputTable>(desc, nullptr);
671   uint32_t flags = config->exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN;
672   TableSymbol *sym = addSyntheticTable(name, flags, table);
673   sym->markLive();
674   sym->forceExport = config->exportTable;
675   return sym;
676 }
677 
678 // Whether or not we need an indirect function table is usually a function of
679 // whether an input declares a need for it.  However sometimes it's possible for
680 // no input to need the indirect function table, but then a late
681 // addInternalGOTEntry causes a function to be allocated an address.  In that
682 // case address we synthesize a definition at the last minute.
resolveIndirectFunctionTable(bool required)683 TableSymbol *SymbolTable::resolveIndirectFunctionTable(bool required) {
684   Symbol *existing = find(functionTableName);
685   if (existing) {
686     if (!isa<TableSymbol>(existing)) {
687       error(Twine("reserved symbol must be of type table: `") +
688             functionTableName + "`");
689       return nullptr;
690     }
691     if (existing->isDefined()) {
692       error(Twine("reserved symbol must not be defined in input files: `") +
693             functionTableName + "`");
694       return nullptr;
695     }
696   }
697 
698   if (config->importTable) {
699     if (existing)
700       return cast<TableSymbol>(existing);
701     if (required)
702       return createUndefinedIndirectFunctionTable(functionTableName);
703   } else if ((existing && existing->isLive()) || config->exportTable ||
704              required) {
705     // A defined table is required.  Either because the user request an exported
706     // table or because the table symbol is already live.  The existing table is
707     // guaranteed to be undefined due to the check above.
708     return createDefinedIndirectFunctionTable(functionTableName);
709   }
710 
711   // An indirect function table will only be present in the symbol table if
712   // needed by a reloc; if we get here, we don't need one.
713   return nullptr;
714 }
715 
addLazy(ArchiveFile * file,const Archive::Symbol * sym)716 void SymbolTable::addLazy(ArchiveFile *file, const Archive::Symbol *sym) {
717   LLVM_DEBUG(dbgs() << "addLazy: " << sym->getName() << "\n");
718   StringRef name = sym->getName();
719 
720   Symbol *s;
721   bool wasInserted;
722   std::tie(s, wasInserted) = insertName(name);
723 
724   if (wasInserted) {
725     replaceSymbol<LazySymbol>(s, name, 0, file, *sym);
726     return;
727   }
728 
729   if (!s->isUndefined())
730     return;
731 
732   // The existing symbol is undefined, load a new one from the archive,
733   // unless the existing symbol is weak in which case replace the undefined
734   // symbols with a LazySymbol.
735   if (s->isWeak()) {
736     const WasmSignature *oldSig = nullptr;
737     // In the case of an UndefinedFunction we need to preserve the expected
738     // signature.
739     if (auto *f = dyn_cast<UndefinedFunction>(s))
740       oldSig = f->signature;
741     LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n");
742     auto newSym = replaceSymbol<LazySymbol>(s, name, WASM_SYMBOL_BINDING_WEAK,
743                                             file, *sym);
744     newSym->signature = oldSig;
745     return;
746   }
747 
748   LLVM_DEBUG(dbgs() << "replacing existing undefined\n");
749   file->addMember(sym);
750 }
751 
addComdat(StringRef name)752 bool SymbolTable::addComdat(StringRef name) {
753   return comdatGroups.insert(CachedHashStringRef(name)).second;
754 }
755 
756 // The new signature doesn't match.  Create a variant to the symbol with the
757 // signature encoded in the name and return that instead.  These symbols are
758 // then unified later in handleSymbolVariants.
getFunctionVariant(Symbol * sym,const WasmSignature * sig,const InputFile * file,Symbol ** out)759 bool SymbolTable::getFunctionVariant(Symbol* sym, const WasmSignature *sig,
760                                      const InputFile *file, Symbol **out) {
761   LLVM_DEBUG(dbgs() << "getFunctionVariant: " << sym->getName() << " -> "
762                     << " " << toString(*sig) << "\n");
763   Symbol *variant = nullptr;
764 
765   // Linear search through symbol variants.  Should never be more than two
766   // or three entries here.
767   auto &variants = symVariants[CachedHashStringRef(sym->getName())];
768   if (variants.empty())
769     variants.push_back(sym);
770 
771   for (Symbol* v : variants) {
772     if (*v->getSignature() == *sig) {
773       variant = v;
774       break;
775     }
776   }
777 
778   bool wasAdded = !variant;
779   if (wasAdded) {
780     // Create a new variant;
781     LLVM_DEBUG(dbgs() << "added new variant\n");
782     variant = reinterpret_cast<Symbol *>(make<SymbolUnion>());
783     variant->isUsedInRegularObj =
784         !file || file->kind() == InputFile::ObjectKind;
785     variant->canInline = true;
786     variant->traced = false;
787     variant->forceExport = false;
788     variants.push_back(variant);
789   } else {
790     LLVM_DEBUG(dbgs() << "variant already exists: " << toString(*variant) << "\n");
791     assert(*variant->getSignature() == *sig);
792   }
793 
794   *out = variant;
795   return wasAdded;
796 }
797 
798 // Set a flag for --trace-symbol so that we can print out a log message
799 // if a new symbol with the same name is inserted into the symbol table.
trace(StringRef name)800 void SymbolTable::trace(StringRef name) {
801   symMap.insert({CachedHashStringRef(name), -1});
802 }
803 
wrap(Symbol * sym,Symbol * real,Symbol * wrap)804 void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) {
805   // Swap symbols as instructed by -wrap.
806   int &origIdx = symMap[CachedHashStringRef(sym->getName())];
807   int &realIdx= symMap[CachedHashStringRef(real->getName())];
808   int &wrapIdx = symMap[CachedHashStringRef(wrap->getName())];
809   LLVM_DEBUG(dbgs() << "wrap: " << sym->getName() << "\n");
810 
811   // Anyone looking up __real symbols should get the original
812   realIdx = origIdx;
813   // Anyone looking up the original should get the __wrap symbol
814   origIdx = wrapIdx;
815 }
816 
817 static const uint8_t unreachableFn[] = {
818     0x03 /* ULEB length */, 0x00 /* ULEB num locals */,
819     0x00 /* opcode unreachable */, 0x0b /* opcode end */
820 };
821 
822 // Replace the given symbol body with an unreachable function.
823 // This is used by handleWeakUndefines in order to generate a callable
824 // equivalent of an undefined function and also handleSymbolVariants for
825 // undefined functions that don't match the signature of the definition.
replaceWithUnreachable(Symbol * sym,const WasmSignature & sig,StringRef debugName)826 InputFunction *SymbolTable::replaceWithUnreachable(Symbol *sym,
827                                                    const WasmSignature &sig,
828                                                    StringRef debugName) {
829   auto *func = make<SyntheticFunction>(sig, sym->getName(), debugName);
830   func->setBody(unreachableFn);
831   syntheticFunctions.emplace_back(func);
832   // Mark new symbols as local. For relocatable output we don't want them
833   // to be exported outside the object file.
834   replaceSymbol<DefinedFunction>(sym, debugName, WASM_SYMBOL_BINDING_LOCAL,
835                                  nullptr, func);
836   // Ensure the stub function doesn't get a table entry.  Its address
837   // should always compare equal to the null pointer.
838   sym->isStub = true;
839   return func;
840 }
841 
replaceWithUndefined(Symbol * sym)842 void SymbolTable::replaceWithUndefined(Symbol *sym) {
843   // Add a synthetic dummy for weak undefined functions.  These dummies will
844   // be GC'd if not used as the target of any "call" instructions.
845   StringRef debugName = saver().save("undefined_weak:" + toString(*sym));
846   replaceWithUnreachable(sym, *sym->getSignature(), debugName);
847   // Hide our dummy to prevent export.
848   sym->setHidden(true);
849 }
850 
851 // For weak undefined functions, there may be "call" instructions that reference
852 // the symbol. In this case, we need to synthesise a dummy/stub function that
853 // will abort at runtime, so that relocations can still provided an operand to
854 // the call instruction that passes Wasm validation.
handleWeakUndefines()855 void SymbolTable::handleWeakUndefines() {
856   for (Symbol *sym : getSymbols()) {
857     if (sym->isUndefWeak() && sym->isUsedInRegularObj) {
858       if (sym->getSignature()) {
859         replaceWithUndefined(sym);
860       } else {
861         // It is possible for undefined functions not to have a signature (eg.
862         // if added via "--undefined"), but weak undefined ones do have a
863         // signature.  Lazy symbols may not be functions and therefore Sig can
864         // still be null in some circumstance.
865         assert(!isa<FunctionSymbol>(sym));
866       }
867     }
868   }
869 }
870 
createUndefinedStub(const WasmSignature & sig)871 DefinedFunction *SymbolTable::createUndefinedStub(const WasmSignature &sig) {
872   if (stubFunctions.count(sig))
873     return stubFunctions[sig];
874   LLVM_DEBUG(dbgs() << "createUndefinedStub: " << toString(sig) << "\n");
875   auto *sym = reinterpret_cast<DefinedFunction *>(make<SymbolUnion>());
876   sym->isUsedInRegularObj = true;
877   sym->canInline = true;
878   sym->traced = false;
879   sym->forceExport = false;
880   sym->signature = &sig;
881   replaceSymbol<DefinedFunction>(
882       sym, "undefined_stub", WASM_SYMBOL_VISIBILITY_HIDDEN, nullptr, nullptr);
883   replaceWithUnreachable(sym, sig, "undefined_stub");
884   stubFunctions[sig] = sym;
885   return sym;
886 }
887 
reportFunctionSignatureMismatch(StringRef symName,FunctionSymbol * a,FunctionSymbol * b,bool isError)888 static void reportFunctionSignatureMismatch(StringRef symName,
889                                             FunctionSymbol *a,
890                                             FunctionSymbol *b, bool isError) {
891   std::string msg = ("function signature mismatch: " + symName +
892                      "\n>>> defined as " + toString(*a->signature) + " in " +
893                      toString(a->getFile()) + "\n>>> defined as " +
894                      toString(*b->signature) + " in " + toString(b->getFile()))
895                         .str();
896   if (isError)
897     error(msg);
898   else
899     warn(msg);
900 }
901 
902 // Remove any variant symbols that were created due to function signature
903 // mismatches.
handleSymbolVariants()904 void SymbolTable::handleSymbolVariants() {
905   for (auto pair : symVariants) {
906     // Push the initial symbol onto the list of variants.
907     StringRef symName = pair.first.val();
908     std::vector<Symbol *> &variants = pair.second;
909 
910 #ifndef NDEBUG
911     LLVM_DEBUG(dbgs() << "symbol with (" << variants.size()
912                       << ") variants: " << symName << "\n");
913     for (auto *s: variants) {
914       auto *f = cast<FunctionSymbol>(s);
915       LLVM_DEBUG(dbgs() << " variant: " + f->getName() << " "
916                         << toString(*f->signature) << "\n");
917     }
918 #endif
919 
920     // Find the one definition.
921     DefinedFunction *defined = nullptr;
922     for (auto *symbol : variants) {
923       if (auto f = dyn_cast<DefinedFunction>(symbol)) {
924         defined = f;
925         break;
926       }
927     }
928 
929     // If there are no definitions, and the undefined symbols disagree on
930     // the signature, there is not we can do since we don't know which one
931     // to use as the signature on the import.
932     if (!defined) {
933       reportFunctionSignatureMismatch(symName,
934                                       cast<FunctionSymbol>(variants[0]),
935                                       cast<FunctionSymbol>(variants[1]), true);
936       return;
937     }
938 
939     for (auto *symbol : variants) {
940       if (symbol != defined) {
941         auto *f = cast<FunctionSymbol>(symbol);
942         reportFunctionSignatureMismatch(symName, f, defined, false);
943         StringRef debugName =
944             saver().save("signature_mismatch:" + toString(*f));
945         replaceWithUnreachable(f, *f->signature, debugName);
946       }
947     }
948   }
949 }
950 
951 } // namespace wasm
952 } // namespace lld
953