1 //===- lib/Linker/LinkModules.cpp - Module Linker Implementation ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the LLVM module linker.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "LinkDiagnosticInfo.h"
14 #include "llvm-c/Linker.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/IR/Comdat.h"
17 #include "llvm/IR/DiagnosticPrinter.h"
18 #include "llvm/IR/GlobalValue.h"
19 #include "llvm/IR/LLVMContext.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/Linker/Linker.h"
22 #include "llvm/Support/Error.h"
23 using namespace llvm;
24 
25 namespace {
26 
27 enum class LinkFrom { Dst, Src };
28 
29 /// This is an implementation class for the LinkModules function, which is the
30 /// entrypoint for this file.
31 class ModuleLinker {
32   IRMover &Mover;
33   std::unique_ptr<Module> SrcM;
34 
35   SetVector<GlobalValue *> ValuesToLink;
36 
37   /// For symbol clashes, prefer those from Src.
38   unsigned Flags;
39 
40   /// List of global value names that should be internalized.
41   StringSet<> Internalize;
42 
43   /// Function that will perform the actual internalization. The reason for a
44   /// callback is that the linker cannot call internalizeModule without
45   /// creating a circular dependency between IPO and the linker.
46   std::function<void(Module &, const StringSet<> &)> InternalizeCallback;
47 
48   /// Used as the callback for lazy linking.
49   /// The mover has just hit GV and we have to decide if it, and other members
50   /// of the same comdat, should be linked. Every member to be linked is passed
51   /// to Add.
52   void addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add);
53 
54   bool shouldOverrideFromSrc() { return Flags & Linker::OverrideFromSrc; }
55   bool shouldLinkOnlyNeeded() { return Flags & Linker::LinkOnlyNeeded; }
56 
57   bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest,
58                             const GlobalValue &Src);
59 
60   /// Should we have mover and linker error diag info?
61   bool emitError(const Twine &Message) {
62     SrcM->getContext().diagnose(LinkDiagnosticInfo(DS_Error, Message));
63     return true;
64   }
65 
66   bool getComdatLeader(Module &M, StringRef ComdatName,
67                        const GlobalVariable *&GVar);
68   bool computeResultingSelectionKind(StringRef ComdatName,
69                                      Comdat::SelectionKind Src,
70                                      Comdat::SelectionKind Dst,
71                                      Comdat::SelectionKind &Result,
72                                      LinkFrom &From);
73   DenseMap<const Comdat *, std::pair<Comdat::SelectionKind, LinkFrom>>
74       ComdatsChosen;
75   bool getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &SK,
76                        LinkFrom &From);
77   // Keep track of the lazy linked global members of each comdat in source.
78   DenseMap<const Comdat *, std::vector<GlobalValue *>> LazyComdatMembers;
79 
80   /// Given a global in the source module, return the global in the
81   /// destination module that is being linked to, if any.
82   GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) {
83     Module &DstM = Mover.getModule();
84     // If the source has no name it can't link.  If it has local linkage,
85     // there is no name match-up going on.
86     if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(SrcGV->getLinkage()))
87       return nullptr;
88 
89     // Otherwise see if we have a match in the destination module's symtab.
90     GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName());
91     if (!DGV)
92       return nullptr;
93 
94     // If we found a global with the same name in the dest module, but it has
95     // internal linkage, we are really not doing any linkage here.
96     if (DGV->hasLocalLinkage())
97       return nullptr;
98 
99     // Otherwise, we do in fact link to the destination global.
100     return DGV;
101   }
102 
103   /// Drop GV if it is a member of a comdat that we are dropping.
104   /// This can happen with COFF's largest selection kind.
105   void dropReplacedComdat(GlobalValue &GV,
106                           const DenseSet<const Comdat *> &ReplacedDstComdats);
107 
108   bool linkIfNeeded(GlobalValue &GV);
109 
110 public:
111   ModuleLinker(IRMover &Mover, std::unique_ptr<Module> SrcM, unsigned Flags,
112                std::function<void(Module &, const StringSet<> &)>
113                    InternalizeCallback = {})
114       : Mover(Mover), SrcM(std::move(SrcM)), Flags(Flags),
115         InternalizeCallback(std::move(InternalizeCallback)) {}
116 
117   bool run();
118 };
119 } // namespace
120 
121 static GlobalValue::VisibilityTypes
122 getMinVisibility(GlobalValue::VisibilityTypes A,
123                  GlobalValue::VisibilityTypes B) {
124   if (A == GlobalValue::HiddenVisibility || B == GlobalValue::HiddenVisibility)
125     return GlobalValue::HiddenVisibility;
126   if (A == GlobalValue::ProtectedVisibility ||
127       B == GlobalValue::ProtectedVisibility)
128     return GlobalValue::ProtectedVisibility;
129   return GlobalValue::DefaultVisibility;
130 }
131 
132 bool ModuleLinker::getComdatLeader(Module &M, StringRef ComdatName,
133                                    const GlobalVariable *&GVar) {
134   const GlobalValue *GVal = M.getNamedValue(ComdatName);
135   if (const auto *GA = dyn_cast_or_null<GlobalAlias>(GVal)) {
136     GVal = GA->getBaseObject();
137     if (!GVal)
138       // We cannot resolve the size of the aliasee yet.
139       return emitError("Linking COMDATs named '" + ComdatName +
140                        "': COMDAT key involves incomputable alias size.");
141   }
142 
143   GVar = dyn_cast_or_null<GlobalVariable>(GVal);
144   if (!GVar)
145     return emitError(
146         "Linking COMDATs named '" + ComdatName +
147         "': GlobalVariable required for data dependent selection!");
148 
149   return false;
150 }
151 
152 bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
153                                                  Comdat::SelectionKind Src,
154                                                  Comdat::SelectionKind Dst,
155                                                  Comdat::SelectionKind &Result,
156                                                  LinkFrom &From) {
157   Module &DstM = Mover.getModule();
158   // The ability to mix Comdat::SelectionKind::Any with
159   // Comdat::SelectionKind::Largest is a behavior that comes from COFF.
160   bool DstAnyOrLargest = Dst == Comdat::SelectionKind::Any ||
161                          Dst == Comdat::SelectionKind::Largest;
162   bool SrcAnyOrLargest = Src == Comdat::SelectionKind::Any ||
163                          Src == Comdat::SelectionKind::Largest;
164   if (DstAnyOrLargest && SrcAnyOrLargest) {
165     if (Dst == Comdat::SelectionKind::Largest ||
166         Src == Comdat::SelectionKind::Largest)
167       Result = Comdat::SelectionKind::Largest;
168     else
169       Result = Comdat::SelectionKind::Any;
170   } else if (Src == Dst) {
171     Result = Dst;
172   } else {
173     return emitError("Linking COMDATs named '" + ComdatName +
174                      "': invalid selection kinds!");
175   }
176 
177   switch (Result) {
178   case Comdat::SelectionKind::Any:
179     // Go with Dst.
180     From = LinkFrom::Dst;
181     break;
182   case Comdat::SelectionKind::NoDeduplicate:
183     return emitError("Linking COMDATs named '" + ComdatName +
184                      "': nodeduplicate has been violated!");
185   case Comdat::SelectionKind::ExactMatch:
186   case Comdat::SelectionKind::Largest:
187   case Comdat::SelectionKind::SameSize: {
188     const GlobalVariable *DstGV;
189     const GlobalVariable *SrcGV;
190     if (getComdatLeader(DstM, ComdatName, DstGV) ||
191         getComdatLeader(*SrcM, ComdatName, SrcGV))
192       return true;
193 
194     const DataLayout &DstDL = DstM.getDataLayout();
195     const DataLayout &SrcDL = SrcM->getDataLayout();
196     uint64_t DstSize = DstDL.getTypeAllocSize(DstGV->getValueType());
197     uint64_t SrcSize = SrcDL.getTypeAllocSize(SrcGV->getValueType());
198     if (Result == Comdat::SelectionKind::ExactMatch) {
199       if (SrcGV->getInitializer() != DstGV->getInitializer())
200         return emitError("Linking COMDATs named '" + ComdatName +
201                          "': ExactMatch violated!");
202       From = LinkFrom::Dst;
203     } else if (Result == Comdat::SelectionKind::Largest) {
204       From = SrcSize > DstSize ? LinkFrom::Src : LinkFrom::Dst;
205     } else if (Result == Comdat::SelectionKind::SameSize) {
206       if (SrcSize != DstSize)
207         return emitError("Linking COMDATs named '" + ComdatName +
208                          "': SameSize violated!");
209       From = LinkFrom::Dst;
210     } else {
211       llvm_unreachable("unknown selection kind");
212     }
213     break;
214   }
215   }
216 
217   return false;
218 }
219 
220 bool ModuleLinker::getComdatResult(const Comdat *SrcC,
221                                    Comdat::SelectionKind &Result,
222                                    LinkFrom &From) {
223   Module &DstM = Mover.getModule();
224   Comdat::SelectionKind SSK = SrcC->getSelectionKind();
225   StringRef ComdatName = SrcC->getName();
226   Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable();
227   Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(ComdatName);
228 
229   if (DstCI == ComdatSymTab.end()) {
230     // Use the comdat if it is only available in one of the modules.
231     From = LinkFrom::Src;
232     Result = SSK;
233     return false;
234   }
235 
236   const Comdat *DstC = &DstCI->second;
237   Comdat::SelectionKind DSK = DstC->getSelectionKind();
238   return computeResultingSelectionKind(ComdatName, SSK, DSK, Result, From);
239 }
240 
241 bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
242                                         const GlobalValue &Dest,
243                                         const GlobalValue &Src) {
244 
245   // Should we unconditionally use the Src?
246   if (shouldOverrideFromSrc()) {
247     LinkFromSrc = true;
248     return false;
249   }
250 
251   // We always have to add Src if it has appending linkage.
252   if (Src.hasAppendingLinkage() || Dest.hasAppendingLinkage()) {
253     LinkFromSrc = true;
254     return false;
255   }
256 
257   bool SrcIsDeclaration = Src.isDeclarationForLinker();
258   bool DestIsDeclaration = Dest.isDeclarationForLinker();
259 
260   if (SrcIsDeclaration) {
261     // If Src is external or if both Src & Dest are external..  Just link the
262     // external globals, we aren't adding anything.
263     if (Src.hasDLLImportStorageClass()) {
264       // If one of GVs is marked as DLLImport, result should be dllimport'ed.
265       LinkFromSrc = DestIsDeclaration;
266       return false;
267     }
268     // If the Dest is weak, use the source linkage.
269     if (Dest.hasExternalWeakLinkage()) {
270       LinkFromSrc = true;
271       return false;
272     }
273     // Link an available_externally over a declaration.
274     LinkFromSrc = !Src.isDeclaration() && Dest.isDeclaration();
275     return false;
276   }
277 
278   if (DestIsDeclaration) {
279     // If Dest is external but Src is not:
280     LinkFromSrc = true;
281     return false;
282   }
283 
284   if (Src.hasCommonLinkage()) {
285     if (Dest.hasLinkOnceLinkage() || Dest.hasWeakLinkage()) {
286       LinkFromSrc = true;
287       return false;
288     }
289 
290     if (!Dest.hasCommonLinkage()) {
291       LinkFromSrc = false;
292       return false;
293     }
294 
295     const DataLayout &DL = Dest.getParent()->getDataLayout();
296     uint64_t DestSize = DL.getTypeAllocSize(Dest.getValueType());
297     uint64_t SrcSize = DL.getTypeAllocSize(Src.getValueType());
298     LinkFromSrc = SrcSize > DestSize;
299     return false;
300   }
301 
302   if (Src.isWeakForLinker()) {
303     assert(!Dest.hasExternalWeakLinkage());
304     assert(!Dest.hasAvailableExternallyLinkage());
305 
306     if (Dest.hasLinkOnceLinkage() && Src.hasWeakLinkage()) {
307       LinkFromSrc = true;
308       return false;
309     }
310 
311     LinkFromSrc = false;
312     return false;
313   }
314 
315   if (Dest.isWeakForLinker()) {
316     assert(Src.hasExternalLinkage());
317     LinkFromSrc = true;
318     return false;
319   }
320 
321   assert(!Src.hasExternalWeakLinkage());
322   assert(!Dest.hasExternalWeakLinkage());
323   assert(Dest.hasExternalLinkage() && Src.hasExternalLinkage() &&
324          "Unexpected linkage type!");
325   return emitError("Linking globals named '" + Src.getName() +
326                    "': symbol multiply defined!");
327 }
328 
329 bool ModuleLinker::linkIfNeeded(GlobalValue &GV) {
330   GlobalValue *DGV = getLinkedToGlobal(&GV);
331 
332   if (shouldLinkOnlyNeeded()) {
333     // Always import variables with appending linkage.
334     if (!GV.hasAppendingLinkage()) {
335       // Don't import globals unless they are referenced by the destination
336       // module.
337       if (!DGV)
338         return false;
339       // Don't import globals that are already defined in the destination module
340       if (!DGV->isDeclaration())
341         return false;
342     }
343   }
344 
345   if (DGV && !GV.hasLocalLinkage() && !GV.hasAppendingLinkage()) {
346     auto *DGVar = dyn_cast<GlobalVariable>(DGV);
347     auto *SGVar = dyn_cast<GlobalVariable>(&GV);
348     if (DGVar && SGVar) {
349       if (DGVar->isDeclaration() && SGVar->isDeclaration() &&
350           (!DGVar->isConstant() || !SGVar->isConstant())) {
351         DGVar->setConstant(false);
352         SGVar->setConstant(false);
353       }
354       if (DGVar->hasCommonLinkage() && SGVar->hasCommonLinkage()) {
355         MaybeAlign Align(
356             std::max(DGVar->getAlignment(), SGVar->getAlignment()));
357         SGVar->setAlignment(Align);
358         DGVar->setAlignment(Align);
359       }
360     }
361 
362     GlobalValue::VisibilityTypes Visibility =
363         getMinVisibility(DGV->getVisibility(), GV.getVisibility());
364     DGV->setVisibility(Visibility);
365     GV.setVisibility(Visibility);
366 
367     GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::getMinUnnamedAddr(
368         DGV->getUnnamedAddr(), GV.getUnnamedAddr());
369     DGV->setUnnamedAddr(UnnamedAddr);
370     GV.setUnnamedAddr(UnnamedAddr);
371   }
372 
373   if (!DGV && !shouldOverrideFromSrc() &&
374       (GV.hasLocalLinkage() || GV.hasLinkOnceLinkage() ||
375        GV.hasAvailableExternallyLinkage()))
376     return false;
377 
378   if (GV.isDeclaration())
379     return false;
380 
381   LinkFrom ComdatFrom = LinkFrom::Dst;
382   if (const Comdat *SC = GV.getComdat()) {
383     std::tie(std::ignore, ComdatFrom) = ComdatsChosen[SC];
384     if (ComdatFrom == LinkFrom::Dst)
385       return false;
386   }
387 
388   bool LinkFromSrc = true;
389   if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, GV))
390     return true;
391   if (LinkFromSrc)
392     ValuesToLink.insert(&GV);
393   return false;
394 }
395 
396 void ModuleLinker::addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add) {
397   // Add these to the internalize list
398   if (!GV.hasLinkOnceLinkage() && !GV.hasAvailableExternallyLinkage() &&
399       !shouldLinkOnlyNeeded())
400     return;
401 
402   if (InternalizeCallback)
403     Internalize.insert(GV.getName());
404   Add(GV);
405 
406   const Comdat *SC = GV.getComdat();
407   if (!SC)
408     return;
409   for (GlobalValue *GV2 : LazyComdatMembers[SC]) {
410     GlobalValue *DGV = getLinkedToGlobal(GV2);
411     bool LinkFromSrc = true;
412     if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, *GV2))
413       return;
414     if (!LinkFromSrc)
415       continue;
416     if (InternalizeCallback)
417       Internalize.insert(GV2->getName());
418     Add(*GV2);
419   }
420 }
421 
422 void ModuleLinker::dropReplacedComdat(
423     GlobalValue &GV, const DenseSet<const Comdat *> &ReplacedDstComdats) {
424   Comdat *C = GV.getComdat();
425   if (!C)
426     return;
427   if (!ReplacedDstComdats.count(C))
428     return;
429   if (GV.use_empty()) {
430     GV.eraseFromParent();
431     return;
432   }
433 
434   if (auto *F = dyn_cast<Function>(&GV)) {
435     F->deleteBody();
436   } else if (auto *Var = dyn_cast<GlobalVariable>(&GV)) {
437     Var->setInitializer(nullptr);
438   } else {
439     auto &Alias = cast<GlobalAlias>(GV);
440     Module &M = *Alias.getParent();
441     GlobalValue *Declaration;
442     if (auto *FTy = dyn_cast<FunctionType>(Alias.getValueType())) {
443       Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage, "", &M);
444     } else {
445       Declaration =
446           new GlobalVariable(M, Alias.getValueType(), /*isConstant*/ false,
447                              GlobalValue::ExternalLinkage,
448                              /*Initializer*/ nullptr);
449     }
450     Declaration->takeName(&Alias);
451     Alias.replaceAllUsesWith(Declaration);
452     Alias.eraseFromParent();
453   }
454 }
455 
456 bool ModuleLinker::run() {
457   Module &DstM = Mover.getModule();
458   DenseSet<const Comdat *> ReplacedDstComdats;
459 
460   for (const auto &SMEC : SrcM->getComdatSymbolTable()) {
461     const Comdat &C = SMEC.getValue();
462     if (ComdatsChosen.count(&C))
463       continue;
464     Comdat::SelectionKind SK;
465     LinkFrom From;
466     if (getComdatResult(&C, SK, From))
467       return true;
468     ComdatsChosen[&C] = std::make_pair(SK, From);
469 
470     if (From != LinkFrom::Src)
471       continue;
472 
473     Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable();
474     Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(C.getName());
475     if (DstCI == ComdatSymTab.end())
476       continue;
477 
478     // The source comdat is replacing the dest one.
479     const Comdat *DstC = &DstCI->second;
480     ReplacedDstComdats.insert(DstC);
481   }
482 
483   // Alias have to go first, since we are not able to find their comdats
484   // otherwise.
485   for (auto I = DstM.alias_begin(), E = DstM.alias_end(); I != E;) {
486     GlobalAlias &GV = *I++;
487     dropReplacedComdat(GV, ReplacedDstComdats);
488   }
489 
490   for (auto I = DstM.global_begin(), E = DstM.global_end(); I != E;) {
491     GlobalVariable &GV = *I++;
492     dropReplacedComdat(GV, ReplacedDstComdats);
493   }
494 
495   for (auto I = DstM.begin(), E = DstM.end(); I != E;) {
496     Function &GV = *I++;
497     dropReplacedComdat(GV, ReplacedDstComdats);
498   }
499 
500   for (GlobalVariable &GV : SrcM->globals())
501     if (GV.hasLinkOnceLinkage())
502       if (const Comdat *SC = GV.getComdat())
503         LazyComdatMembers[SC].push_back(&GV);
504 
505   for (Function &SF : *SrcM)
506     if (SF.hasLinkOnceLinkage())
507       if (const Comdat *SC = SF.getComdat())
508         LazyComdatMembers[SC].push_back(&SF);
509 
510   for (GlobalAlias &GA : SrcM->aliases())
511     if (GA.hasLinkOnceLinkage())
512       if (const Comdat *SC = GA.getComdat())
513         LazyComdatMembers[SC].push_back(&GA);
514 
515   // Insert all of the globals in src into the DstM module... without linking
516   // initializers (which could refer to functions not yet mapped over).
517   for (GlobalVariable &GV : SrcM->globals())
518     if (linkIfNeeded(GV))
519       return true;
520 
521   for (Function &SF : *SrcM)
522     if (linkIfNeeded(SF))
523       return true;
524 
525   for (GlobalAlias &GA : SrcM->aliases())
526     if (linkIfNeeded(GA))
527       return true;
528 
529   for (GlobalIFunc &GI : SrcM->ifuncs())
530     if (linkIfNeeded(GI))
531       return true;
532 
533   for (unsigned I = 0; I < ValuesToLink.size(); ++I) {
534     GlobalValue *GV = ValuesToLink[I];
535     const Comdat *SC = GV->getComdat();
536     if (!SC)
537       continue;
538     for (GlobalValue *GV2 : LazyComdatMembers[SC]) {
539       GlobalValue *DGV = getLinkedToGlobal(GV2);
540       bool LinkFromSrc = true;
541       if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, *GV2))
542         return true;
543       if (LinkFromSrc)
544         ValuesToLink.insert(GV2);
545     }
546   }
547 
548   if (InternalizeCallback) {
549     for (GlobalValue *GV : ValuesToLink)
550       Internalize.insert(GV->getName());
551   }
552 
553   // FIXME: Propagate Errors through to the caller instead of emitting
554   // diagnostics.
555   bool HasErrors = false;
556   if (Error E = Mover.move(std::move(SrcM), ValuesToLink.getArrayRef(),
557                            [this](GlobalValue &GV, IRMover::ValueAdder Add) {
558                              addLazyFor(GV, Add);
559                            },
560                            /* IsPerformingImport */ false)) {
561     handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
562       DstM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, EIB.message()));
563       HasErrors = true;
564     });
565   }
566   if (HasErrors)
567     return true;
568 
569   if (InternalizeCallback)
570     InternalizeCallback(DstM, Internalize);
571 
572   return false;
573 }
574 
575 Linker::Linker(Module &M) : Mover(M) {}
576 
577 bool Linker::linkInModule(
578     std::unique_ptr<Module> Src, unsigned Flags,
579     std::function<void(Module &, const StringSet<> &)> InternalizeCallback) {
580   ModuleLinker ModLinker(Mover, std::move(Src), Flags,
581                          std::move(InternalizeCallback));
582   return ModLinker.run();
583 }
584 
585 //===----------------------------------------------------------------------===//
586 // LinkModules entrypoint.
587 //===----------------------------------------------------------------------===//
588 
589 /// This function links two modules together, with the resulting Dest module
590 /// modified to be the composite of the two input modules. If an error occurs,
591 /// true is returned and ErrorMsg (if not null) is set to indicate the problem.
592 /// Upon failure, the Dest module could be in a modified state, and shouldn't be
593 /// relied on to be consistent.
594 bool Linker::linkModules(
595     Module &Dest, std::unique_ptr<Module> Src, unsigned Flags,
596     std::function<void(Module &, const StringSet<> &)> InternalizeCallback) {
597   Linker L(Dest);
598   return L.linkInModule(std::move(Src), Flags, std::move(InternalizeCallback));
599 }
600 
601 //===----------------------------------------------------------------------===//
602 // C API.
603 //===----------------------------------------------------------------------===//
604 
605 LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src) {
606   Module *D = unwrap(Dest);
607   std::unique_ptr<Module> M(unwrap(Src));
608   return Linker::linkModules(*D, std::move(M));
609 }
610