1 //===- lib/Linker/LinkModules.cpp - Module Linker Implementation ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the LLVM module linker.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "LinkDiagnosticInfo.h"
14 #include "llvm-c/Linker.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/IR/Comdat.h"
17 #include "llvm/IR/DiagnosticPrinter.h"
18 #include "llvm/IR/GlobalValue.h"
19 #include "llvm/IR/LLVMContext.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/Linker/Linker.h"
22 #include "llvm/Support/Error.h"
23 using namespace llvm;
24 
25 namespace {
26 
27 /// This is an implementation class for the LinkModules function, which is the
28 /// entrypoint for this file.
29 class ModuleLinker {
30   IRMover &Mover;
31   std::unique_ptr<Module> SrcM;
32 
33   SetVector<GlobalValue *> ValuesToLink;
34 
35   /// For symbol clashes, prefer those from Src.
36   unsigned Flags;
37 
38   /// List of global value names that should be internalized.
39   StringSet<> Internalize;
40 
41   /// Function that will perform the actual internalization. The reason for a
42   /// callback is that the linker cannot call internalizeModule without
43   /// creating a circular dependency between IPO and the linker.
44   std::function<void(Module &, const StringSet<> &)> InternalizeCallback;
45 
46   /// Used as the callback for lazy linking.
47   /// The mover has just hit GV and we have to decide if it, and other members
48   /// of the same comdat, should be linked. Every member to be linked is passed
49   /// to Add.
50   void addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add);
51 
52   bool shouldOverrideFromSrc() { return Flags & Linker::OverrideFromSrc; }
53   bool shouldLinkOnlyNeeded() { return Flags & Linker::LinkOnlyNeeded; }
54 
55   bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest,
56                             const GlobalValue &Src);
57 
58   /// Should we have mover and linker error diag info?
59   bool emitError(const Twine &Message) {
60     SrcM->getContext().diagnose(LinkDiagnosticInfo(DS_Error, Message));
61     return true;
62   }
63 
64   bool getComdatLeader(Module &M, StringRef ComdatName,
65                        const GlobalVariable *&GVar);
66   bool computeResultingSelectionKind(StringRef ComdatName,
67                                      Comdat::SelectionKind Src,
68                                      Comdat::SelectionKind Dst,
69                                      Comdat::SelectionKind &Result,
70                                      bool &LinkFromSrc);
71   std::map<const Comdat *, std::pair<Comdat::SelectionKind, bool>>
72       ComdatsChosen;
73   bool getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &SK,
74                        bool &LinkFromSrc);
75   // Keep track of the lazy linked global members of each comdat in source.
76   DenseMap<const Comdat *, std::vector<GlobalValue *>> LazyComdatMembers;
77 
78   /// Given a global in the source module, return the global in the
79   /// destination module that is being linked to, if any.
80   GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) {
81     Module &DstM = Mover.getModule();
82     // If the source has no name it can't link.  If it has local linkage,
83     // there is no name match-up going on.
84     if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(SrcGV->getLinkage()))
85       return nullptr;
86 
87     // Otherwise see if we have a match in the destination module's symtab.
88     GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName());
89     if (!DGV)
90       return nullptr;
91 
92     // If we found a global with the same name in the dest module, but it has
93     // internal linkage, we are really not doing any linkage here.
94     if (DGV->hasLocalLinkage())
95       return nullptr;
96 
97     // Otherwise, we do in fact link to the destination global.
98     return DGV;
99   }
100 
101   /// Drop GV if it is a member of a comdat that we are dropping.
102   /// This can happen with COFF's largest selection kind.
103   void dropReplacedComdat(GlobalValue &GV,
104                           const DenseSet<const Comdat *> &ReplacedDstComdats);
105 
106   bool linkIfNeeded(GlobalValue &GV);
107 
108 public:
109   ModuleLinker(IRMover &Mover, std::unique_ptr<Module> SrcM, unsigned Flags,
110                std::function<void(Module &, const StringSet<> &)>
111                    InternalizeCallback = {})
112       : Mover(Mover), SrcM(std::move(SrcM)), Flags(Flags),
113         InternalizeCallback(std::move(InternalizeCallback)) {}
114 
115   bool run();
116 };
117 }
118 
119 static GlobalValue::VisibilityTypes
120 getMinVisibility(GlobalValue::VisibilityTypes A,
121                  GlobalValue::VisibilityTypes B) {
122   if (A == GlobalValue::HiddenVisibility || B == GlobalValue::HiddenVisibility)
123     return GlobalValue::HiddenVisibility;
124   if (A == GlobalValue::ProtectedVisibility ||
125       B == GlobalValue::ProtectedVisibility)
126     return GlobalValue::ProtectedVisibility;
127   return GlobalValue::DefaultVisibility;
128 }
129 
130 bool ModuleLinker::getComdatLeader(Module &M, StringRef ComdatName,
131                                    const GlobalVariable *&GVar) {
132   const GlobalValue *GVal = M.getNamedValue(ComdatName);
133   if (const auto *GA = dyn_cast_or_null<GlobalAlias>(GVal)) {
134     GVal = GA->getBaseObject();
135     if (!GVal)
136       // We cannot resolve the size of the aliasee yet.
137       return emitError("Linking COMDATs named '" + ComdatName +
138                        "': COMDAT key involves incomputable alias size.");
139   }
140 
141   GVar = dyn_cast_or_null<GlobalVariable>(GVal);
142   if (!GVar)
143     return emitError(
144         "Linking COMDATs named '" + ComdatName +
145         "': GlobalVariable required for data dependent selection!");
146 
147   return false;
148 }
149 
150 bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
151                                                  Comdat::SelectionKind Src,
152                                                  Comdat::SelectionKind Dst,
153                                                  Comdat::SelectionKind &Result,
154                                                  bool &LinkFromSrc) {
155   Module &DstM = Mover.getModule();
156   // The ability to mix Comdat::SelectionKind::Any with
157   // Comdat::SelectionKind::Largest is a behavior that comes from COFF.
158   bool DstAnyOrLargest = Dst == Comdat::SelectionKind::Any ||
159                          Dst == Comdat::SelectionKind::Largest;
160   bool SrcAnyOrLargest = Src == Comdat::SelectionKind::Any ||
161                          Src == Comdat::SelectionKind::Largest;
162   if (DstAnyOrLargest && SrcAnyOrLargest) {
163     if (Dst == Comdat::SelectionKind::Largest ||
164         Src == Comdat::SelectionKind::Largest)
165       Result = Comdat::SelectionKind::Largest;
166     else
167       Result = Comdat::SelectionKind::Any;
168   } else if (Src == Dst) {
169     Result = Dst;
170   } else {
171     return emitError("Linking COMDATs named '" + ComdatName +
172                      "': invalid selection kinds!");
173   }
174 
175   switch (Result) {
176   case Comdat::SelectionKind::Any:
177     // Go with Dst.
178     LinkFromSrc = false;
179     break;
180   case Comdat::SelectionKind::NoDeduplicate:
181     return emitError("Linking COMDATs named '" + ComdatName +
182                      "': nodeduplicate has been violated!");
183   case Comdat::SelectionKind::ExactMatch:
184   case Comdat::SelectionKind::Largest:
185   case Comdat::SelectionKind::SameSize: {
186     const GlobalVariable *DstGV;
187     const GlobalVariable *SrcGV;
188     if (getComdatLeader(DstM, ComdatName, DstGV) ||
189         getComdatLeader(*SrcM, ComdatName, SrcGV))
190       return true;
191 
192     const DataLayout &DstDL = DstM.getDataLayout();
193     const DataLayout &SrcDL = SrcM->getDataLayout();
194     uint64_t DstSize = DstDL.getTypeAllocSize(DstGV->getValueType());
195     uint64_t SrcSize = SrcDL.getTypeAllocSize(SrcGV->getValueType());
196     if (Result == Comdat::SelectionKind::ExactMatch) {
197       if (SrcGV->getInitializer() != DstGV->getInitializer())
198         return emitError("Linking COMDATs named '" + ComdatName +
199                          "': ExactMatch violated!");
200       LinkFromSrc = false;
201     } else if (Result == Comdat::SelectionKind::Largest) {
202       LinkFromSrc = SrcSize > DstSize;
203     } else if (Result == Comdat::SelectionKind::SameSize) {
204       if (SrcSize != DstSize)
205         return emitError("Linking COMDATs named '" + ComdatName +
206                          "': SameSize violated!");
207       LinkFromSrc = false;
208     } else {
209       llvm_unreachable("unknown selection kind");
210     }
211     break;
212   }
213   }
214 
215   return false;
216 }
217 
218 bool ModuleLinker::getComdatResult(const Comdat *SrcC,
219                                    Comdat::SelectionKind &Result,
220                                    bool &LinkFromSrc) {
221   Module &DstM = Mover.getModule();
222   Comdat::SelectionKind SSK = SrcC->getSelectionKind();
223   StringRef ComdatName = SrcC->getName();
224   Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable();
225   Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(ComdatName);
226 
227   if (DstCI == ComdatSymTab.end()) {
228     // Use the comdat if it is only available in one of the modules.
229     LinkFromSrc = true;
230     Result = SSK;
231     return false;
232   }
233 
234   const Comdat *DstC = &DstCI->second;
235   Comdat::SelectionKind DSK = DstC->getSelectionKind();
236   return computeResultingSelectionKind(ComdatName, SSK, DSK, Result,
237                                        LinkFromSrc);
238 }
239 
240 bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
241                                         const GlobalValue &Dest,
242                                         const GlobalValue &Src) {
243 
244   // Should we unconditionally use the Src?
245   if (shouldOverrideFromSrc()) {
246     LinkFromSrc = true;
247     return false;
248   }
249 
250   // We always have to add Src if it has appending linkage.
251   if (Src.hasAppendingLinkage() || Dest.hasAppendingLinkage()) {
252     LinkFromSrc = true;
253     return false;
254   }
255 
256   bool SrcIsDeclaration = Src.isDeclarationForLinker();
257   bool DestIsDeclaration = Dest.isDeclarationForLinker();
258 
259   if (SrcIsDeclaration) {
260     // If Src is external or if both Src & Dest are external..  Just link the
261     // external globals, we aren't adding anything.
262     if (Src.hasDLLImportStorageClass()) {
263       // If one of GVs is marked as DLLImport, result should be dllimport'ed.
264       LinkFromSrc = DestIsDeclaration;
265       return false;
266     }
267     // If the Dest is weak, use the source linkage.
268     if (Dest.hasExternalWeakLinkage()) {
269       LinkFromSrc = true;
270       return false;
271     }
272     // Link an available_externally over a declaration.
273     LinkFromSrc = !Src.isDeclaration() && Dest.isDeclaration();
274     return false;
275   }
276 
277   if (DestIsDeclaration) {
278     // If Dest is external but Src is not:
279     LinkFromSrc = true;
280     return false;
281   }
282 
283   if (Src.hasCommonLinkage()) {
284     if (Dest.hasLinkOnceLinkage() || Dest.hasWeakLinkage()) {
285       LinkFromSrc = true;
286       return false;
287     }
288 
289     if (!Dest.hasCommonLinkage()) {
290       LinkFromSrc = false;
291       return false;
292     }
293 
294     const DataLayout &DL = Dest.getParent()->getDataLayout();
295     uint64_t DestSize = DL.getTypeAllocSize(Dest.getValueType());
296     uint64_t SrcSize = DL.getTypeAllocSize(Src.getValueType());
297     LinkFromSrc = SrcSize > DestSize;
298     return false;
299   }
300 
301   if (Src.isWeakForLinker()) {
302     assert(!Dest.hasExternalWeakLinkage());
303     assert(!Dest.hasAvailableExternallyLinkage());
304 
305     if (Dest.hasLinkOnceLinkage() && Src.hasWeakLinkage()) {
306       LinkFromSrc = true;
307       return false;
308     }
309 
310     LinkFromSrc = false;
311     return false;
312   }
313 
314   if (Dest.isWeakForLinker()) {
315     assert(Src.hasExternalLinkage());
316     LinkFromSrc = true;
317     return false;
318   }
319 
320   assert(!Src.hasExternalWeakLinkage());
321   assert(!Dest.hasExternalWeakLinkage());
322   assert(Dest.hasExternalLinkage() && Src.hasExternalLinkage() &&
323          "Unexpected linkage type!");
324   return emitError("Linking globals named '" + Src.getName() +
325                    "': symbol multiply defined!");
326 }
327 
328 bool ModuleLinker::linkIfNeeded(GlobalValue &GV) {
329   GlobalValue *DGV = getLinkedToGlobal(&GV);
330 
331   if (shouldLinkOnlyNeeded()) {
332     // Always import variables with appending linkage.
333     if (!GV.hasAppendingLinkage()) {
334       // Don't import globals unless they are referenced by the destination
335       // module.
336       if (!DGV)
337         return false;
338       // Don't import globals that are already defined in the destination module
339       if (!DGV->isDeclaration())
340         return false;
341     }
342   }
343 
344   if (DGV && !GV.hasLocalLinkage() && !GV.hasAppendingLinkage()) {
345     auto *DGVar = dyn_cast<GlobalVariable>(DGV);
346     auto *SGVar = dyn_cast<GlobalVariable>(&GV);
347     if (DGVar && SGVar) {
348       if (DGVar->isDeclaration() && SGVar->isDeclaration() &&
349           (!DGVar->isConstant() || !SGVar->isConstant())) {
350         DGVar->setConstant(false);
351         SGVar->setConstant(false);
352       }
353       if (DGVar->hasCommonLinkage() && SGVar->hasCommonLinkage()) {
354         MaybeAlign Align(
355             std::max(DGVar->getAlignment(), SGVar->getAlignment()));
356         SGVar->setAlignment(Align);
357         DGVar->setAlignment(Align);
358       }
359     }
360 
361     GlobalValue::VisibilityTypes Visibility =
362         getMinVisibility(DGV->getVisibility(), GV.getVisibility());
363     DGV->setVisibility(Visibility);
364     GV.setVisibility(Visibility);
365 
366     GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::getMinUnnamedAddr(
367         DGV->getUnnamedAddr(), GV.getUnnamedAddr());
368     DGV->setUnnamedAddr(UnnamedAddr);
369     GV.setUnnamedAddr(UnnamedAddr);
370   }
371 
372   if (!DGV && !shouldOverrideFromSrc() &&
373       (GV.hasLocalLinkage() || GV.hasLinkOnceLinkage() ||
374        GV.hasAvailableExternallyLinkage()))
375     return false;
376 
377   if (GV.isDeclaration())
378     return false;
379 
380   if (const Comdat *SC = GV.getComdat()) {
381     bool LinkFromSrc;
382     Comdat::SelectionKind SK;
383     std::tie(SK, LinkFromSrc) = ComdatsChosen[SC];
384     if (!LinkFromSrc)
385       return false;
386   }
387 
388   bool LinkFromSrc = true;
389   if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, GV))
390     return true;
391   if (LinkFromSrc)
392     ValuesToLink.insert(&GV);
393   return false;
394 }
395 
396 void ModuleLinker::addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add) {
397   // Add these to the internalize list
398   if (!GV.hasLinkOnceLinkage() && !GV.hasAvailableExternallyLinkage() &&
399       !shouldLinkOnlyNeeded())
400     return;
401 
402   if (InternalizeCallback)
403     Internalize.insert(GV.getName());
404   Add(GV);
405 
406   const Comdat *SC = GV.getComdat();
407   if (!SC)
408     return;
409   for (GlobalValue *GV2 : LazyComdatMembers[SC]) {
410     GlobalValue *DGV = getLinkedToGlobal(GV2);
411     bool LinkFromSrc = true;
412     if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, *GV2))
413       return;
414     if (!LinkFromSrc)
415       continue;
416     if (InternalizeCallback)
417       Internalize.insert(GV2->getName());
418     Add(*GV2);
419   }
420 }
421 
422 void ModuleLinker::dropReplacedComdat(
423     GlobalValue &GV, const DenseSet<const Comdat *> &ReplacedDstComdats) {
424   Comdat *C = GV.getComdat();
425   if (!C)
426     return;
427   if (!ReplacedDstComdats.count(C))
428     return;
429   if (GV.use_empty()) {
430     GV.eraseFromParent();
431     return;
432   }
433 
434   if (auto *F = dyn_cast<Function>(&GV)) {
435     F->deleteBody();
436   } else if (auto *Var = dyn_cast<GlobalVariable>(&GV)) {
437     Var->setInitializer(nullptr);
438   } else {
439     auto &Alias = cast<GlobalAlias>(GV);
440     Module &M = *Alias.getParent();
441     GlobalValue *Declaration;
442     if (auto *FTy = dyn_cast<FunctionType>(Alias.getValueType())) {
443       Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage, "", &M);
444     } else {
445       Declaration =
446           new GlobalVariable(M, Alias.getValueType(), /*isConstant*/ false,
447                              GlobalValue::ExternalLinkage,
448                              /*Initializer*/ nullptr);
449     }
450     Declaration->takeName(&Alias);
451     Alias.replaceAllUsesWith(Declaration);
452     Alias.eraseFromParent();
453   }
454 }
455 
456 bool ModuleLinker::run() {
457   Module &DstM = Mover.getModule();
458   DenseSet<const Comdat *> ReplacedDstComdats;
459 
460   for (const auto &SMEC : SrcM->getComdatSymbolTable()) {
461     const Comdat &C = SMEC.getValue();
462     if (ComdatsChosen.count(&C))
463       continue;
464     Comdat::SelectionKind SK;
465     bool LinkFromSrc;
466     if (getComdatResult(&C, SK, LinkFromSrc))
467       return true;
468     ComdatsChosen[&C] = std::make_pair(SK, LinkFromSrc);
469 
470     if (!LinkFromSrc)
471       continue;
472 
473     Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable();
474     Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(C.getName());
475     if (DstCI == ComdatSymTab.end())
476       continue;
477 
478     // The source comdat is replacing the dest one.
479     const Comdat *DstC = &DstCI->second;
480     ReplacedDstComdats.insert(DstC);
481   }
482 
483   // Alias have to go first, since we are not able to find their comdats
484   // otherwise.
485   for (auto I = DstM.alias_begin(), E = DstM.alias_end(); I != E;) {
486     GlobalAlias &GV = *I++;
487     dropReplacedComdat(GV, ReplacedDstComdats);
488   }
489 
490   for (auto I = DstM.global_begin(), E = DstM.global_end(); I != E;) {
491     GlobalVariable &GV = *I++;
492     dropReplacedComdat(GV, ReplacedDstComdats);
493   }
494 
495   for (auto I = DstM.begin(), E = DstM.end(); I != E;) {
496     Function &GV = *I++;
497     dropReplacedComdat(GV, ReplacedDstComdats);
498   }
499 
500   for (GlobalVariable &GV : SrcM->globals())
501     if (GV.hasLinkOnceLinkage())
502       if (const Comdat *SC = GV.getComdat())
503         LazyComdatMembers[SC].push_back(&GV);
504 
505   for (Function &SF : *SrcM)
506     if (SF.hasLinkOnceLinkage())
507       if (const Comdat *SC = SF.getComdat())
508         LazyComdatMembers[SC].push_back(&SF);
509 
510   for (GlobalAlias &GA : SrcM->aliases())
511     if (GA.hasLinkOnceLinkage())
512       if (const Comdat *SC = GA.getComdat())
513         LazyComdatMembers[SC].push_back(&GA);
514 
515   // Insert all of the globals in src into the DstM module... without linking
516   // initializers (which could refer to functions not yet mapped over).
517   for (GlobalVariable &GV : SrcM->globals())
518     if (linkIfNeeded(GV))
519       return true;
520 
521   for (Function &SF : *SrcM)
522     if (linkIfNeeded(SF))
523       return true;
524 
525   for (GlobalAlias &GA : SrcM->aliases())
526     if (linkIfNeeded(GA))
527       return true;
528 
529   for (GlobalIFunc &GI : SrcM->ifuncs())
530     if (linkIfNeeded(GI))
531       return true;
532 
533   for (unsigned I = 0; I < ValuesToLink.size(); ++I) {
534     GlobalValue *GV = ValuesToLink[I];
535     const Comdat *SC = GV->getComdat();
536     if (!SC)
537       continue;
538     for (GlobalValue *GV2 : LazyComdatMembers[SC]) {
539       GlobalValue *DGV = getLinkedToGlobal(GV2);
540       bool LinkFromSrc = true;
541       if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, *GV2))
542         return true;
543       if (LinkFromSrc)
544         ValuesToLink.insert(GV2);
545     }
546   }
547 
548   if (InternalizeCallback) {
549     for (GlobalValue *GV : ValuesToLink)
550       Internalize.insert(GV->getName());
551   }
552 
553   // FIXME: Propagate Errors through to the caller instead of emitting
554   // diagnostics.
555   bool HasErrors = false;
556   if (Error E = Mover.move(std::move(SrcM), ValuesToLink.getArrayRef(),
557                            [this](GlobalValue &GV, IRMover::ValueAdder Add) {
558                              addLazyFor(GV, Add);
559                            },
560                            /* IsPerformingImport */ false)) {
561     handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
562       DstM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, EIB.message()));
563       HasErrors = true;
564     });
565   }
566   if (HasErrors)
567     return true;
568 
569   if (InternalizeCallback)
570     InternalizeCallback(DstM, Internalize);
571 
572   return false;
573 }
574 
575 Linker::Linker(Module &M) : Mover(M) {}
576 
577 bool Linker::linkInModule(
578     std::unique_ptr<Module> Src, unsigned Flags,
579     std::function<void(Module &, const StringSet<> &)> InternalizeCallback) {
580   ModuleLinker ModLinker(Mover, std::move(Src), Flags,
581                          std::move(InternalizeCallback));
582   return ModLinker.run();
583 }
584 
585 //===----------------------------------------------------------------------===//
586 // LinkModules entrypoint.
587 //===----------------------------------------------------------------------===//
588 
589 /// This function links two modules together, with the resulting Dest module
590 /// modified to be the composite of the two input modules. If an error occurs,
591 /// true is returned and ErrorMsg (if not null) is set to indicate the problem.
592 /// Upon failure, the Dest module could be in a modified state, and shouldn't be
593 /// relied on to be consistent.
594 bool Linker::linkModules(
595     Module &Dest, std::unique_ptr<Module> Src, unsigned Flags,
596     std::function<void(Module &, const StringSet<> &)> InternalizeCallback) {
597   Linker L(Dest);
598   return L.linkInModule(std::move(Src), Flags, std::move(InternalizeCallback));
599 }
600 
601 //===----------------------------------------------------------------------===//
602 // C API.
603 //===----------------------------------------------------------------------===//
604 
605 LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src) {
606   Module *D = unwrap(Dest);
607   std::unique_ptr<Module> M(unwrap(Src));
608   return Linker::linkModules(*D, std::move(M));
609 }
610