1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
11 #include "llvm/Analysis/BasicAliasAnalysis.h"
12 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
13 #include "llvm/Analysis/ProfileSummaryInfo.h"
14 #include "llvm/Analysis/TypeMetadataUtils.h"
15 #include "llvm/Bitcode/BitcodeWriter.h"
16 #include "llvm/IR/Constants.h"
17 #include "llvm/IR/DebugInfo.h"
18 #include "llvm/IR/Intrinsics.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/IR/PassManager.h"
21 #include "llvm/Pass.h"
22 #include "llvm/Support/FileSystem.h"
23 #include "llvm/Support/ScopedPrinter.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "llvm/Transforms/IPO.h"
26 #include "llvm/Transforms/IPO/FunctionAttrs.h"
27 #include "llvm/Transforms/Utils/Cloning.h"
28 #include "llvm/Transforms/Utils/ModuleUtils.h"
29 using namespace llvm;
30 
31 namespace {
32 
33 // Promote each local-linkage entity defined by ExportM and used by ImportM by
34 // changing visibility and appending the given ModuleId.
35 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
36                       SetVector<GlobalValue *> &PromoteExtra) {
37   DenseMap<const Comdat *, Comdat *> RenamedComdats;
38   for (auto &ExportGV : ExportM.global_values()) {
39     if (!ExportGV.hasLocalLinkage())
40       continue;
41 
42     auto Name = ExportGV.getName();
43     GlobalValue *ImportGV = ImportM.getNamedValue(Name);
44     if ((!ImportGV || ImportGV->use_empty()) && !PromoteExtra.count(&ExportGV))
45       continue;
46 
47     std::string NewName = (Name + ModuleId).str();
48 
49     if (const auto *C = ExportGV.getComdat())
50       if (C->getName() == Name)
51         RenamedComdats.try_emplace(C, ExportM.getOrInsertComdat(NewName));
52 
53     ExportGV.setName(NewName);
54     ExportGV.setLinkage(GlobalValue::ExternalLinkage);
55     ExportGV.setVisibility(GlobalValue::HiddenVisibility);
56 
57     if (ImportGV) {
58       ImportGV->setName(NewName);
59       ImportGV->setVisibility(GlobalValue::HiddenVisibility);
60     }
61   }
62 
63   if (!RenamedComdats.empty())
64     for (auto &GO : ExportM.global_objects())
65       if (auto *C = GO.getComdat()) {
66         auto Replacement = RenamedComdats.find(C);
67         if (Replacement != RenamedComdats.end())
68           GO.setComdat(Replacement->second);
69       }
70 }
71 
72 // Promote all internal (i.e. distinct) type ids used by the module by replacing
73 // them with external type ids formed using the module id.
74 //
75 // Note that this needs to be done before we clone the module because each clone
76 // will receive its own set of distinct metadata nodes.
77 void promoteTypeIds(Module &M, StringRef ModuleId) {
78   DenseMap<Metadata *, Metadata *> LocalToGlobal;
79   auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
80     Metadata *MD =
81         cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();
82 
83     if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
84       Metadata *&GlobalMD = LocalToGlobal[MD];
85       if (!GlobalMD) {
86         std::string NewName =
87             (to_string(LocalToGlobal.size()) + ModuleId).str();
88         GlobalMD = MDString::get(M.getContext(), NewName);
89       }
90 
91       CI->setArgOperand(ArgNo,
92                         MetadataAsValue::get(M.getContext(), GlobalMD));
93     }
94   };
95 
96   if (Function *TypeTestFunc =
97           M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
98     for (const Use &U : TypeTestFunc->uses()) {
99       auto CI = cast<CallInst>(U.getUser());
100       ExternalizeTypeId(CI, 1);
101     }
102   }
103 
104   if (Function *TypeCheckedLoadFunc =
105           M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
106     for (const Use &U : TypeCheckedLoadFunc->uses()) {
107       auto CI = cast<CallInst>(U.getUser());
108       ExternalizeTypeId(CI, 2);
109     }
110   }
111 
112   for (GlobalObject &GO : M.global_objects()) {
113     SmallVector<MDNode *, 1> MDs;
114     GO.getMetadata(LLVMContext::MD_type, MDs);
115 
116     GO.eraseMetadata(LLVMContext::MD_type);
117     for (auto MD : MDs) {
118       auto I = LocalToGlobal.find(MD->getOperand(1));
119       if (I == LocalToGlobal.end()) {
120         GO.addMetadata(LLVMContext::MD_type, *MD);
121         continue;
122       }
123       GO.addMetadata(
124           LLVMContext::MD_type,
125           *MDNode::get(M.getContext(),
126                        ArrayRef<Metadata *>{MD->getOperand(0), I->second}));
127     }
128   }
129 }
130 
131 // Drop unused globals, and drop type information from function declarations.
132 // FIXME: If we made functions typeless then there would be no need to do this.
133 void simplifyExternals(Module &M) {
134   FunctionType *EmptyFT =
135       FunctionType::get(Type::getVoidTy(M.getContext()), false);
136 
137   for (auto I = M.begin(), E = M.end(); I != E;) {
138     Function &F = *I++;
139     if (F.isDeclaration() && F.use_empty()) {
140       F.eraseFromParent();
141       continue;
142     }
143 
144     if (!F.isDeclaration() || F.getFunctionType() == EmptyFT)
145       continue;
146 
147     Function *NewF =
148         Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M);
149     NewF->setVisibility(F.getVisibility());
150     NewF->takeName(&F);
151     F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
152     F.eraseFromParent();
153   }
154 
155   for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
156     GlobalVariable &GV = *I++;
157     if (GV.isDeclaration() && GV.use_empty()) {
158       GV.eraseFromParent();
159       continue;
160     }
161   }
162 }
163 
164 void filterModule(
165     Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
166   for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
167        I != E;) {
168     GlobalAlias *GA = &*I++;
169     if (ShouldKeepDefinition(GA))
170       continue;
171 
172     GlobalObject *GO;
173     if (GA->getValueType()->isFunctionTy())
174       GO = Function::Create(cast<FunctionType>(GA->getValueType()),
175                             GlobalValue::ExternalLinkage, "", M);
176     else
177       GO = new GlobalVariable(
178           *M, GA->getValueType(), false, GlobalValue::ExternalLinkage,
179           nullptr, "", nullptr,
180           GA->getThreadLocalMode(), GA->getType()->getAddressSpace());
181     GO->takeName(GA);
182     GA->replaceAllUsesWith(GO);
183     GA->eraseFromParent();
184   }
185 
186   for (Function &F : *M) {
187     if (ShouldKeepDefinition(&F))
188       continue;
189 
190     F.deleteBody();
191     F.setComdat(nullptr);
192     F.clearMetadata();
193   }
194 
195   for (GlobalVariable &GV : M->globals()) {
196     if (ShouldKeepDefinition(&GV))
197       continue;
198 
199     GV.setInitializer(nullptr);
200     GV.setLinkage(GlobalValue::ExternalLinkage);
201     GV.setComdat(nullptr);
202     GV.clearMetadata();
203   }
204 }
205 
206 void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {
207   if (auto *F = dyn_cast<Function>(C))
208     return Fn(F);
209   if (isa<GlobalValue>(C))
210     return;
211   for (Value *Op : C->operands())
212     forEachVirtualFunction(cast<Constant>(Op), Fn);
213 }
214 
215 // If it's possible to split M into regular and thin LTO parts, do so and write
216 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a
217 // regular LTO bitcode file to OS.
218 void splitAndWriteThinLTOBitcode(
219     raw_ostream &OS, raw_ostream *ThinLinkOS,
220     function_ref<AAResults &(Function &)> AARGetter, Module &M) {
221   std::string ModuleId = getUniqueModuleId(&M);
222   if (ModuleId.empty()) {
223     // We couldn't generate a module ID for this module, just write it out as a
224     // regular LTO module.
225     WriteBitcodeToFile(&M, OS);
226     if (ThinLinkOS)
227       // We don't have a ThinLTO part, but still write the module to the
228       // ThinLinkOS if requested so that the expected output file is produced.
229       WriteBitcodeToFile(&M, *ThinLinkOS);
230     return;
231   }
232 
233   promoteTypeIds(M, ModuleId);
234 
235   // Returns whether a global has attached type metadata. Such globals may
236   // participate in CFI or whole-program devirtualization, so they need to
237   // appear in the merged module instead of the thin LTO module.
238   auto HasTypeMetadata = [&](const GlobalObject *GO) {
239     SmallVector<MDNode *, 1> MDs;
240     GO->getMetadata(LLVMContext::MD_type, MDs);
241     return !MDs.empty();
242   };
243 
244   // Collect the set of virtual functions that are eligible for virtual constant
245   // propagation. Each eligible function must not access memory, must return
246   // an integer of width <=64 bits, must take at least one argument, must not
247   // use its first argument (assumed to be "this") and all arguments other than
248   // the first one must be of <=64 bit integer type.
249   //
250   // Note that we test whether this copy of the function is readnone, rather
251   // than testing function attributes, which must hold for any copy of the
252   // function, even a less optimized version substituted at link time. This is
253   // sound because the virtual constant propagation optimizations effectively
254   // inline all implementations of the virtual function into each call site,
255   // rather than using function attributes to perform local optimization.
256   std::set<const Function *> EligibleVirtualFns;
257   // If any member of a comdat lives in MergedM, put all members of that
258   // comdat in MergedM to keep the comdat together.
259   DenseSet<const Comdat *> MergedMComdats;
260   for (GlobalVariable &GV : M.globals())
261     if (HasTypeMetadata(&GV)) {
262       if (const auto *C = GV.getComdat())
263         MergedMComdats.insert(C);
264       forEachVirtualFunction(GV.getInitializer(), [&](Function *F) {
265         auto *RT = dyn_cast<IntegerType>(F->getReturnType());
266         if (!RT || RT->getBitWidth() > 64 || F->arg_empty() ||
267             !F->arg_begin()->use_empty())
268           return;
269         for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) {
270           auto *ArgT = dyn_cast<IntegerType>(Arg.getType());
271           if (!ArgT || ArgT->getBitWidth() > 64)
272             return;
273         }
274         if (!F->isDeclaration() &&
275             computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
276           EligibleVirtualFns.insert(F);
277       });
278     }
279 
280   ValueToValueMapTy VMap;
281   std::unique_ptr<Module> MergedM(
282       CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool {
283         if (const auto *C = GV->getComdat())
284           if (MergedMComdats.count(C))
285             return true;
286         if (auto *F = dyn_cast<Function>(GV))
287           return EligibleVirtualFns.count(F);
288         if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
289           return HasTypeMetadata(GVar);
290         return false;
291       }));
292   StripDebugInfo(*MergedM);
293 
294   for (Function &F : *MergedM)
295     if (!F.isDeclaration()) {
296       // Reset the linkage of all functions eligible for virtual constant
297       // propagation. The canonical definitions live in the thin LTO module so
298       // that they can be imported.
299       F.setLinkage(GlobalValue::AvailableExternallyLinkage);
300       F.setComdat(nullptr);
301     }
302 
303   SetVector<GlobalValue *> CfiFunctions;
304   for (auto &F : M)
305     if ((!F.hasLocalLinkage() || F.hasAddressTaken()) && HasTypeMetadata(&F))
306       CfiFunctions.insert(&F);
307 
308   // Remove all globals with type metadata, globals with comdats that live in
309   // MergedM, and aliases pointing to such globals from the thin LTO module.
310   filterModule(&M, [&](const GlobalValue *GV) {
311     if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
312       if (HasTypeMetadata(GVar))
313         return false;
314     if (const auto *C = GV->getComdat())
315       if (MergedMComdats.count(C))
316         return false;
317     return true;
318   });
319 
320   promoteInternals(*MergedM, M, ModuleId, CfiFunctions);
321   promoteInternals(M, *MergedM, ModuleId, CfiFunctions);
322 
323   SmallVector<MDNode *, 8> CfiFunctionMDs;
324   for (auto V : CfiFunctions) {
325     Function &F = *cast<Function>(V);
326     SmallVector<MDNode *, 2> Types;
327     F.getMetadata(LLVMContext::MD_type, Types);
328 
329     auto &Ctx = MergedM->getContext();
330     SmallVector<Metadata *, 4> Elts;
331     Elts.push_back(MDString::get(Ctx, F.getName()));
332     CfiFunctionLinkage Linkage;
333     if (!F.isDeclarationForLinker())
334       Linkage = CFL_Definition;
335     else if (F.isWeakForLinker())
336       Linkage = CFL_WeakDeclaration;
337     else
338       Linkage = CFL_Declaration;
339     Elts.push_back(ConstantAsMetadata::get(
340         llvm::ConstantInt::get(Type::getInt8Ty(Ctx), Linkage)));
341     for (auto Type : Types)
342       Elts.push_back(Type);
343     CfiFunctionMDs.push_back(MDTuple::get(Ctx, Elts));
344   }
345 
346   if(!CfiFunctionMDs.empty()) {
347     NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("cfi.functions");
348     for (auto MD : CfiFunctionMDs)
349       NMD->addOperand(MD);
350   }
351 
352   simplifyExternals(*MergedM);
353 
354   // FIXME: Try to re-use BSI and PFI from the original module here.
355   ProfileSummaryInfo PSI(M);
356   ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI);
357 
358   // Mark the merged module as requiring full LTO. We still want an index for
359   // it though, so that it can participate in summary-based dead stripping.
360   MergedM->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
361   ModuleSummaryIndex MergedMIndex =
362       buildModuleSummaryIndex(*MergedM, nullptr, &PSI);
363 
364   SmallVector<char, 0> Buffer;
365 
366   BitcodeWriter W(Buffer);
367   // Save the module hash produced for the full bitcode, which will
368   // be used in the backends, and use that in the minimized bitcode
369   // produced for the full link.
370   ModuleHash ModHash = {{0}};
371   W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
372                 /*GenerateHash=*/true, &ModHash);
373   W.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false,
374                 &MergedMIndex);
375   W.writeSymtab();
376   W.writeStrtab();
377   OS << Buffer;
378 
379   // If a minimized bitcode module was requested for the thin link,
380   // strip the debug info (the merged module was already stripped above)
381   // and write it to the given OS.
382   if (ThinLinkOS) {
383     Buffer.clear();
384     BitcodeWriter W2(Buffer);
385     StripDebugInfo(M);
386     W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
387                    /*GenerateHash=*/false, &ModHash);
388     W2.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false,
389                    &MergedMIndex);
390     W2.writeSymtab();
391     W2.writeStrtab();
392     *ThinLinkOS << Buffer;
393   }
394 }
395 
396 // Returns whether this module needs to be split because it uses type metadata.
397 bool requiresSplit(Module &M) {
398   SmallVector<MDNode *, 1> MDs;
399   for (auto &GO : M.global_objects()) {
400     GO.getMetadata(LLVMContext::MD_type, MDs);
401     if (!MDs.empty())
402       return true;
403   }
404 
405   return false;
406 }
407 
408 void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
409                          function_ref<AAResults &(Function &)> AARGetter,
410                          Module &M, const ModuleSummaryIndex *Index) {
411   // See if this module has any type metadata. If so, we need to split it.
412   if (requiresSplit(M))
413     return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
414 
415   // Otherwise we can just write it out as a regular module.
416 
417   // Save the module hash produced for the full bitcode, which will
418   // be used in the backends, and use that in the minimized bitcode
419   // produced for the full link.
420   ModuleHash ModHash = {{0}};
421   WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
422                      /*GenerateHash=*/true, &ModHash);
423   // If a minimized bitcode module was requested for the thin link,
424   // strip the debug info and write it to the given OS.
425   if (ThinLinkOS) {
426     StripDebugInfo(M);
427     WriteBitcodeToFile(&M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false,
428                        Index,
429                        /*GenerateHash=*/false, &ModHash);
430   }
431 }
432 
433 class WriteThinLTOBitcode : public ModulePass {
434   raw_ostream &OS; // raw_ostream to print on
435   // The output stream on which to emit a minimized module for use
436   // just in the thin link, if requested.
437   raw_ostream *ThinLinkOS;
438 
439 public:
440   static char ID; // Pass identification, replacement for typeid
441   WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) {
442     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
443   }
444 
445   explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS)
446       : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) {
447     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
448   }
449 
450   StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }
451 
452   bool runOnModule(Module &M) override {
453     const ModuleSummaryIndex *Index =
454         &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
455     writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index);
456     return true;
457   }
458   void getAnalysisUsage(AnalysisUsage &AU) const override {
459     AU.setPreservesAll();
460     AU.addRequired<AssumptionCacheTracker>();
461     AU.addRequired<ModuleSummaryIndexWrapperPass>();
462     AU.addRequired<TargetLibraryInfoWrapperPass>();
463   }
464 };
465 } // anonymous namespace
466 
467 char WriteThinLTOBitcode::ID = 0;
468 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
469                       "Write ThinLTO Bitcode", false, true)
470 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
471 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
472 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
473 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
474                     "Write ThinLTO Bitcode", false, true)
475 
476 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str,
477                                                 raw_ostream *ThinLinkOS) {
478   return new WriteThinLTOBitcode(Str, ThinLinkOS);
479 }
480 
481 PreservedAnalyses
482 llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
483   FunctionAnalysisManager &FAM =
484       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
485   writeThinLTOBitcode(OS, ThinLinkOS,
486                       [&FAM](Function &F) -> AAResults & {
487                         return FAM.getResult<AAManager>(F);
488                       },
489                       M, &AM.getResult<ModuleSummaryIndexAnalysis>(M));
490   return PreservedAnalyses::all();
491 }
492