1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
11 #include "llvm/Analysis/BasicAliasAnalysis.h"
12 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
13 #include "llvm/Analysis/ProfileSummaryInfo.h"
14 #include "llvm/Analysis/TypeMetadataUtils.h"
15 #include "llvm/Bitcode/BitcodeWriter.h"
16 #include "llvm/IR/Constants.h"
17 #include "llvm/IR/DebugInfo.h"
18 #include "llvm/IR/Intrinsics.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/IR/PassManager.h"
21 #include "llvm/Object/ModuleSymbolTable.h"
22 #include "llvm/Pass.h"
23 #include "llvm/Support/ScopedPrinter.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "llvm/Transforms/IPO.h"
26 #include "llvm/Transforms/IPO/FunctionAttrs.h"
27 #include "llvm/Transforms/IPO/FunctionImport.h"
28 #include "llvm/Transforms/Utils/Cloning.h"
29 #include "llvm/Transforms/Utils/ModuleUtils.h"
30 using namespace llvm;
31 
32 namespace {
33 
34 // Promote each local-linkage entity defined by ExportM and used by ImportM by
35 // changing visibility and appending the given ModuleId.
36 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
37                       SetVector<GlobalValue *> &PromoteExtra) {
38   DenseMap<const Comdat *, Comdat *> RenamedComdats;
39   for (auto &ExportGV : ExportM.global_values()) {
40     if (!ExportGV.hasLocalLinkage())
41       continue;
42 
43     auto Name = ExportGV.getName();
44     GlobalValue *ImportGV = nullptr;
45     if (!PromoteExtra.count(&ExportGV)) {
46       ImportGV = ImportM.getNamedValue(Name);
47       if (!ImportGV)
48         continue;
49       ImportGV->removeDeadConstantUsers();
50       if (ImportGV->use_empty()) {
51         ImportGV->eraseFromParent();
52         continue;
53       }
54     }
55 
56     std::string NewName = (Name + ModuleId).str();
57 
58     if (const auto *C = ExportGV.getComdat())
59       if (C->getName() == Name)
60         RenamedComdats.try_emplace(C, ExportM.getOrInsertComdat(NewName));
61 
62     ExportGV.setName(NewName);
63     ExportGV.setLinkage(GlobalValue::ExternalLinkage);
64     ExportGV.setVisibility(GlobalValue::HiddenVisibility);
65 
66     if (ImportGV) {
67       ImportGV->setName(NewName);
68       ImportGV->setVisibility(GlobalValue::HiddenVisibility);
69     }
70   }
71 
72   if (!RenamedComdats.empty())
73     for (auto &GO : ExportM.global_objects())
74       if (auto *C = GO.getComdat()) {
75         auto Replacement = RenamedComdats.find(C);
76         if (Replacement != RenamedComdats.end())
77           GO.setComdat(Replacement->second);
78       }
79 }
80 
81 // Promote all internal (i.e. distinct) type ids used by the module by replacing
82 // them with external type ids formed using the module id.
83 //
84 // Note that this needs to be done before we clone the module because each clone
85 // will receive its own set of distinct metadata nodes.
86 void promoteTypeIds(Module &M, StringRef ModuleId) {
87   DenseMap<Metadata *, Metadata *> LocalToGlobal;
88   auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
89     Metadata *MD =
90         cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();
91 
92     if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
93       Metadata *&GlobalMD = LocalToGlobal[MD];
94       if (!GlobalMD) {
95         std::string NewName = (Twine(LocalToGlobal.size()) + ModuleId).str();
96         GlobalMD = MDString::get(M.getContext(), NewName);
97       }
98 
99       CI->setArgOperand(ArgNo,
100                         MetadataAsValue::get(M.getContext(), GlobalMD));
101     }
102   };
103 
104   if (Function *TypeTestFunc =
105           M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
106     for (const Use &U : TypeTestFunc->uses()) {
107       auto CI = cast<CallInst>(U.getUser());
108       ExternalizeTypeId(CI, 1);
109     }
110   }
111 
112   if (Function *TypeCheckedLoadFunc =
113           M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
114     for (const Use &U : TypeCheckedLoadFunc->uses()) {
115       auto CI = cast<CallInst>(U.getUser());
116       ExternalizeTypeId(CI, 2);
117     }
118   }
119 
120   for (GlobalObject &GO : M.global_objects()) {
121     SmallVector<MDNode *, 1> MDs;
122     GO.getMetadata(LLVMContext::MD_type, MDs);
123 
124     GO.eraseMetadata(LLVMContext::MD_type);
125     for (auto MD : MDs) {
126       auto I = LocalToGlobal.find(MD->getOperand(1));
127       if (I == LocalToGlobal.end()) {
128         GO.addMetadata(LLVMContext::MD_type, *MD);
129         continue;
130       }
131       GO.addMetadata(
132           LLVMContext::MD_type,
133           *MDNode::get(M.getContext(),
134                        ArrayRef<Metadata *>{MD->getOperand(0), I->second}));
135     }
136   }
137 }
138 
139 // Drop unused globals, and drop type information from function declarations.
140 // FIXME: If we made functions typeless then there would be no need to do this.
141 void simplifyExternals(Module &M) {
142   FunctionType *EmptyFT =
143       FunctionType::get(Type::getVoidTy(M.getContext()), false);
144 
145   for (auto I = M.begin(), E = M.end(); I != E;) {
146     Function &F = *I++;
147     if (F.isDeclaration() && F.use_empty()) {
148       F.eraseFromParent();
149       continue;
150     }
151 
152     if (!F.isDeclaration() || F.getFunctionType() == EmptyFT ||
153         // Changing the type of an intrinsic may invalidate the IR.
154         F.getName().startswith("llvm."))
155       continue;
156 
157     Function *NewF =
158         Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M);
159     NewF->setVisibility(F.getVisibility());
160     NewF->takeName(&F);
161     F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
162     F.eraseFromParent();
163   }
164 
165   for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
166     GlobalVariable &GV = *I++;
167     if (GV.isDeclaration() && GV.use_empty()) {
168       GV.eraseFromParent();
169       continue;
170     }
171   }
172 }
173 
174 static void
175 filterModule(Module *M,
176              function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
177   std::vector<GlobalValue *> V;
178   for (GlobalValue &GV : M->global_values())
179     if (!ShouldKeepDefinition(&GV))
180       V.push_back(&GV);
181 
182   for (GlobalValue *GV : V)
183     if (!convertToDeclaration(*GV))
184       GV->eraseFromParent();
185 }
186 
187 void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {
188   if (auto *F = dyn_cast<Function>(C))
189     return Fn(F);
190   if (isa<GlobalValue>(C))
191     return;
192   for (Value *Op : C->operands())
193     forEachVirtualFunction(cast<Constant>(Op), Fn);
194 }
195 
196 // If it's possible to split M into regular and thin LTO parts, do so and write
197 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a
198 // regular LTO bitcode file to OS.
199 void splitAndWriteThinLTOBitcode(
200     raw_ostream &OS, raw_ostream *ThinLinkOS,
201     function_ref<AAResults &(Function &)> AARGetter, Module &M) {
202   std::string ModuleId = getUniqueModuleId(&M);
203   if (ModuleId.empty()) {
204     // We couldn't generate a module ID for this module, just write it out as a
205     // regular LTO module.
206     WriteBitcodeToFile(M, OS);
207     if (ThinLinkOS)
208       // We don't have a ThinLTO part, but still write the module to the
209       // ThinLinkOS if requested so that the expected output file is produced.
210       WriteBitcodeToFile(M, *ThinLinkOS);
211     return;
212   }
213 
214   promoteTypeIds(M, ModuleId);
215 
216   // Returns whether a global has attached type metadata. Such globals may
217   // participate in CFI or whole-program devirtualization, so they need to
218   // appear in the merged module instead of the thin LTO module.
219   auto HasTypeMetadata = [&](const GlobalObject *GO) {
220     SmallVector<MDNode *, 1> MDs;
221     GO->getMetadata(LLVMContext::MD_type, MDs);
222     return !MDs.empty();
223   };
224 
225   // Collect the set of virtual functions that are eligible for virtual constant
226   // propagation. Each eligible function must not access memory, must return
227   // an integer of width <=64 bits, must take at least one argument, must not
228   // use its first argument (assumed to be "this") and all arguments other than
229   // the first one must be of <=64 bit integer type.
230   //
231   // Note that we test whether this copy of the function is readnone, rather
232   // than testing function attributes, which must hold for any copy of the
233   // function, even a less optimized version substituted at link time. This is
234   // sound because the virtual constant propagation optimizations effectively
235   // inline all implementations of the virtual function into each call site,
236   // rather than using function attributes to perform local optimization.
237   std::set<const Function *> EligibleVirtualFns;
238   // If any member of a comdat lives in MergedM, put all members of that
239   // comdat in MergedM to keep the comdat together.
240   DenseSet<const Comdat *> MergedMComdats;
241   for (GlobalVariable &GV : M.globals())
242     if (HasTypeMetadata(&GV)) {
243       if (const auto *C = GV.getComdat())
244         MergedMComdats.insert(C);
245       forEachVirtualFunction(GV.getInitializer(), [&](Function *F) {
246         auto *RT = dyn_cast<IntegerType>(F->getReturnType());
247         if (!RT || RT->getBitWidth() > 64 || F->arg_empty() ||
248             !F->arg_begin()->use_empty())
249           return;
250         for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) {
251           auto *ArgT = dyn_cast<IntegerType>(Arg.getType());
252           if (!ArgT || ArgT->getBitWidth() > 64)
253             return;
254         }
255         if (!F->isDeclaration() &&
256             computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
257           EligibleVirtualFns.insert(F);
258       });
259     }
260 
261   ValueToValueMapTy VMap;
262   std::unique_ptr<Module> MergedM(
263       CloneModule(M, VMap, [&](const GlobalValue *GV) -> bool {
264         if (const auto *C = GV->getComdat())
265           if (MergedMComdats.count(C))
266             return true;
267         if (auto *F = dyn_cast<Function>(GV))
268           return EligibleVirtualFns.count(F);
269         if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
270           return HasTypeMetadata(GVar);
271         return false;
272       }));
273   StripDebugInfo(*MergedM);
274   MergedM->setModuleInlineAsm("");
275 
276   for (Function &F : *MergedM)
277     if (!F.isDeclaration()) {
278       // Reset the linkage of all functions eligible for virtual constant
279       // propagation. The canonical definitions live in the thin LTO module so
280       // that they can be imported.
281       F.setLinkage(GlobalValue::AvailableExternallyLinkage);
282       F.setComdat(nullptr);
283     }
284 
285   SetVector<GlobalValue *> CfiFunctions;
286   for (auto &F : M)
287     if ((!F.hasLocalLinkage() || F.hasAddressTaken()) && HasTypeMetadata(&F))
288       CfiFunctions.insert(&F);
289 
290   // Remove all globals with type metadata, globals with comdats that live in
291   // MergedM, and aliases pointing to such globals from the thin LTO module.
292   filterModule(&M, [&](const GlobalValue *GV) {
293     if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
294       if (HasTypeMetadata(GVar))
295         return false;
296     if (const auto *C = GV->getComdat())
297       if (MergedMComdats.count(C))
298         return false;
299     return true;
300   });
301 
302   promoteInternals(*MergedM, M, ModuleId, CfiFunctions);
303   promoteInternals(M, *MergedM, ModuleId, CfiFunctions);
304 
305   auto &Ctx = MergedM->getContext();
306   SmallVector<MDNode *, 8> CfiFunctionMDs;
307   for (auto V : CfiFunctions) {
308     Function &F = *cast<Function>(V);
309     SmallVector<MDNode *, 2> Types;
310     F.getMetadata(LLVMContext::MD_type, Types);
311 
312     SmallVector<Metadata *, 4> Elts;
313     Elts.push_back(MDString::get(Ctx, F.getName()));
314     CfiFunctionLinkage Linkage;
315     if (!F.isDeclarationForLinker())
316       Linkage = CFL_Definition;
317     else if (F.isWeakForLinker())
318       Linkage = CFL_WeakDeclaration;
319     else
320       Linkage = CFL_Declaration;
321     Elts.push_back(ConstantAsMetadata::get(
322         llvm::ConstantInt::get(Type::getInt8Ty(Ctx), Linkage)));
323     for (auto Type : Types)
324       Elts.push_back(Type);
325     CfiFunctionMDs.push_back(MDTuple::get(Ctx, Elts));
326   }
327 
328   if(!CfiFunctionMDs.empty()) {
329     NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("cfi.functions");
330     for (auto MD : CfiFunctionMDs)
331       NMD->addOperand(MD);
332   }
333 
334   SmallVector<MDNode *, 8> FunctionAliases;
335   for (auto &A : M.aliases()) {
336     if (!isa<Function>(A.getAliasee()))
337       continue;
338 
339     auto *F = cast<Function>(A.getAliasee());
340     SmallVector<Metadata *, 4> Elts;
341 
342     Elts.push_back(MDString::get(Ctx, A.getName()));
343     Elts.push_back(MDString::get(Ctx, F->getName()));
344     Elts.push_back(ConstantAsMetadata::get(
345         llvm::ConstantInt::get(Type::getInt8Ty(Ctx), A.getVisibility())));
346     Elts.push_back(ConstantAsMetadata::get(
347         llvm::ConstantInt::get(Type::getInt8Ty(Ctx), A.isWeakForLinker())));
348 
349     FunctionAliases.push_back(MDTuple::get(Ctx, Elts));
350   }
351 
352   if (!FunctionAliases.empty()) {
353     NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("aliases");
354     for (auto MD : FunctionAliases)
355       NMD->addOperand(MD);
356   }
357 
358   SmallVector<MDNode *, 8> Symvers;
359   ModuleSymbolTable::CollectAsmSymvers(M, [&](StringRef Name, StringRef Alias) {
360     Function *F = M.getFunction(Name);
361     if (!F || F->use_empty())
362       return;
363 
364     SmallVector<Metadata *, 2> Elts;
365     Elts.push_back(MDString::get(Ctx, Name));
366     Elts.push_back(MDString::get(Ctx, Alias));
367 
368     Symvers.push_back(MDTuple::get(Ctx, Elts));
369   });
370 
371   if (!Symvers.empty()) {
372     NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("symvers");
373     for (auto MD : Symvers)
374       NMD->addOperand(MD);
375   }
376 
377   simplifyExternals(*MergedM);
378 
379   // FIXME: Try to re-use BSI and PFI from the original module here.
380   ProfileSummaryInfo PSI(M);
381   ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI);
382 
383   // Mark the merged module as requiring full LTO. We still want an index for
384   // it though, so that it can participate in summary-based dead stripping.
385   MergedM->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
386   ModuleSummaryIndex MergedMIndex =
387       buildModuleSummaryIndex(*MergedM, nullptr, &PSI);
388 
389   SmallVector<char, 0> Buffer;
390 
391   BitcodeWriter W(Buffer);
392   // Save the module hash produced for the full bitcode, which will
393   // be used in the backends, and use that in the minimized bitcode
394   // produced for the full link.
395   ModuleHash ModHash = {{0}};
396   W.writeModule(M, /*ShouldPreserveUseListOrder=*/false, &Index,
397                 /*GenerateHash=*/true, &ModHash);
398   W.writeModule(*MergedM, /*ShouldPreserveUseListOrder=*/false, &MergedMIndex);
399   W.writeSymtab();
400   W.writeStrtab();
401   OS << Buffer;
402 
403   // If a minimized bitcode module was requested for the thin link, only
404   // the information that is needed by thin link will be written in the
405   // given OS (the merged module will be written as usual).
406   if (ThinLinkOS) {
407     Buffer.clear();
408     BitcodeWriter W2(Buffer);
409     StripDebugInfo(M);
410     W2.writeThinLinkBitcode(M, Index, ModHash);
411     W2.writeModule(*MergedM, /*ShouldPreserveUseListOrder=*/false,
412                    &MergedMIndex);
413     W2.writeSymtab();
414     W2.writeStrtab();
415     *ThinLinkOS << Buffer;
416   }
417 }
418 
419 // Returns whether this module needs to be split because it uses type metadata.
420 bool requiresSplit(Module &M) {
421   SmallVector<MDNode *, 1> MDs;
422   for (auto &GO : M.global_objects()) {
423     GO.getMetadata(LLVMContext::MD_type, MDs);
424     if (!MDs.empty())
425       return true;
426   }
427 
428   return false;
429 }
430 
431 void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
432                          function_ref<AAResults &(Function &)> AARGetter,
433                          Module &M, const ModuleSummaryIndex *Index) {
434   // See if this module has any type metadata. If so, we need to split it.
435   if (requiresSplit(M))
436     return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
437 
438   // Otherwise we can just write it out as a regular module.
439 
440   // Save the module hash produced for the full bitcode, which will
441   // be used in the backends, and use that in the minimized bitcode
442   // produced for the full link.
443   ModuleHash ModHash = {{0}};
444   WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
445                      /*GenerateHash=*/true, &ModHash);
446   // If a minimized bitcode module was requested for the thin link, only
447   // the information that is needed by thin link will be written in the
448   // given OS.
449   if (ThinLinkOS && Index)
450     WriteThinLinkBitcodeToFile(M, *ThinLinkOS, *Index, ModHash);
451 }
452 
453 class WriteThinLTOBitcode : public ModulePass {
454   raw_ostream &OS; // raw_ostream to print on
455   // The output stream on which to emit a minimized module for use
456   // just in the thin link, if requested.
457   raw_ostream *ThinLinkOS;
458 
459 public:
460   static char ID; // Pass identification, replacement for typeid
461   WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) {
462     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
463   }
464 
465   explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS)
466       : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) {
467     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
468   }
469 
470   StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }
471 
472   bool runOnModule(Module &M) override {
473     const ModuleSummaryIndex *Index =
474         &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
475     writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index);
476     return true;
477   }
478   void getAnalysisUsage(AnalysisUsage &AU) const override {
479     AU.setPreservesAll();
480     AU.addRequired<AssumptionCacheTracker>();
481     AU.addRequired<ModuleSummaryIndexWrapperPass>();
482     AU.addRequired<TargetLibraryInfoWrapperPass>();
483   }
484 };
485 } // anonymous namespace
486 
487 char WriteThinLTOBitcode::ID = 0;
488 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
489                       "Write ThinLTO Bitcode", false, true)
490 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
491 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
492 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
493 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
494                     "Write ThinLTO Bitcode", false, true)
495 
496 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str,
497                                                 raw_ostream *ThinLinkOS) {
498   return new WriteThinLTOBitcode(Str, ThinLinkOS);
499 }
500 
501 PreservedAnalyses
502 llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
503   FunctionAnalysisManager &FAM =
504       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
505   writeThinLTOBitcode(OS, ThinLinkOS,
506                       [&FAM](Function &F) -> AAResults & {
507                         return FAM.getResult<AAManager>(F);
508                       },
509                       M, &AM.getResult<ModuleSummaryIndexAnalysis>(M));
510   return PreservedAnalyses::all();
511 }
512