1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
11 #include "llvm/Analysis/BasicAliasAnalysis.h"
12 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
13 #include "llvm/Analysis/ProfileSummaryInfo.h"
14 #include "llvm/Analysis/TypeMetadataUtils.h"
15 #include "llvm/Bitcode/BitcodeWriter.h"
16 #include "llvm/IR/Constants.h"
17 #include "llvm/IR/DebugInfo.h"
18 #include "llvm/IR/Intrinsics.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/IR/PassManager.h"
21 #include "llvm/Pass.h"
22 #include "llvm/Support/FileSystem.h"
23 #include "llvm/Support/ScopedPrinter.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "llvm/Transforms/IPO.h"
26 #include "llvm/Transforms/IPO/FunctionAttrs.h"
27 #include "llvm/Transforms/Utils/Cloning.h"
28 #include "llvm/Transforms/Utils/ModuleUtils.h"
29 using namespace llvm;
30 
31 namespace {
32 
33 // Promote each local-linkage entity defined by ExportM and used by ImportM by
34 // changing visibility and appending the given ModuleId.
35 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
36                       SetVector<GlobalValue *> &PromoteExtra) {
37   DenseMap<const Comdat *, Comdat *> RenamedComdats;
38   for (auto &ExportGV : ExportM.global_values()) {
39     if (!ExportGV.hasLocalLinkage())
40       continue;
41 
42     auto Name = ExportGV.getName();
43     GlobalValue *ImportGV = nullptr;
44     if (!PromoteExtra.count(&ExportGV)) {
45       ImportGV = ImportM.getNamedValue(Name);
46       if (!ImportGV)
47         continue;
48       ImportGV->removeDeadConstantUsers();
49       if (ImportGV->use_empty()) {
50         ImportGV->eraseFromParent();
51         continue;
52       }
53     }
54 
55     std::string NewName = (Name + ModuleId).str();
56 
57     if (const auto *C = ExportGV.getComdat())
58       if (C->getName() == Name)
59         RenamedComdats.try_emplace(C, ExportM.getOrInsertComdat(NewName));
60 
61     ExportGV.setName(NewName);
62     ExportGV.setLinkage(GlobalValue::ExternalLinkage);
63     ExportGV.setVisibility(GlobalValue::HiddenVisibility);
64 
65     if (ImportGV) {
66       ImportGV->setName(NewName);
67       ImportGV->setVisibility(GlobalValue::HiddenVisibility);
68     }
69   }
70 
71   if (!RenamedComdats.empty())
72     for (auto &GO : ExportM.global_objects())
73       if (auto *C = GO.getComdat()) {
74         auto Replacement = RenamedComdats.find(C);
75         if (Replacement != RenamedComdats.end())
76           GO.setComdat(Replacement->second);
77       }
78 }
79 
80 // Promote all internal (i.e. distinct) type ids used by the module by replacing
81 // them with external type ids formed using the module id.
82 //
83 // Note that this needs to be done before we clone the module because each clone
84 // will receive its own set of distinct metadata nodes.
85 void promoteTypeIds(Module &M, StringRef ModuleId) {
86   DenseMap<Metadata *, Metadata *> LocalToGlobal;
87   auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
88     Metadata *MD =
89         cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();
90 
91     if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
92       Metadata *&GlobalMD = LocalToGlobal[MD];
93       if (!GlobalMD) {
94         std::string NewName =
95             (to_string(LocalToGlobal.size()) + ModuleId).str();
96         GlobalMD = MDString::get(M.getContext(), NewName);
97       }
98 
99       CI->setArgOperand(ArgNo,
100                         MetadataAsValue::get(M.getContext(), GlobalMD));
101     }
102   };
103 
104   if (Function *TypeTestFunc =
105           M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
106     for (const Use &U : TypeTestFunc->uses()) {
107       auto CI = cast<CallInst>(U.getUser());
108       ExternalizeTypeId(CI, 1);
109     }
110   }
111 
112   if (Function *TypeCheckedLoadFunc =
113           M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
114     for (const Use &U : TypeCheckedLoadFunc->uses()) {
115       auto CI = cast<CallInst>(U.getUser());
116       ExternalizeTypeId(CI, 2);
117     }
118   }
119 
120   for (GlobalObject &GO : M.global_objects()) {
121     SmallVector<MDNode *, 1> MDs;
122     GO.getMetadata(LLVMContext::MD_type, MDs);
123 
124     GO.eraseMetadata(LLVMContext::MD_type);
125     for (auto MD : MDs) {
126       auto I = LocalToGlobal.find(MD->getOperand(1));
127       if (I == LocalToGlobal.end()) {
128         GO.addMetadata(LLVMContext::MD_type, *MD);
129         continue;
130       }
131       GO.addMetadata(
132           LLVMContext::MD_type,
133           *MDNode::get(M.getContext(),
134                        ArrayRef<Metadata *>{MD->getOperand(0), I->second}));
135     }
136   }
137 }
138 
139 // Drop unused globals, and drop type information from function declarations.
140 // FIXME: If we made functions typeless then there would be no need to do this.
141 void simplifyExternals(Module &M) {
142   FunctionType *EmptyFT =
143       FunctionType::get(Type::getVoidTy(M.getContext()), false);
144 
145   for (auto I = M.begin(), E = M.end(); I != E;) {
146     Function &F = *I++;
147     if (F.isDeclaration() && F.use_empty()) {
148       F.eraseFromParent();
149       continue;
150     }
151 
152     if (!F.isDeclaration() || F.getFunctionType() == EmptyFT ||
153         // Changing the type of an intrinsic may invalidate the IR.
154         F.getName().startswith("llvm."))
155       continue;
156 
157     Function *NewF =
158         Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M);
159     NewF->setVisibility(F.getVisibility());
160     NewF->takeName(&F);
161     F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
162     F.eraseFromParent();
163   }
164 
165   for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
166     GlobalVariable &GV = *I++;
167     if (GV.isDeclaration() && GV.use_empty()) {
168       GV.eraseFromParent();
169       continue;
170     }
171   }
172 }
173 
174 void filterModule(
175     Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
176   for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
177        I != E;) {
178     GlobalAlias *GA = &*I++;
179     if (ShouldKeepDefinition(GA))
180       continue;
181 
182     GlobalObject *GO;
183     if (GA->getValueType()->isFunctionTy())
184       GO = Function::Create(cast<FunctionType>(GA->getValueType()),
185                             GlobalValue::ExternalLinkage, "", M);
186     else
187       GO = new GlobalVariable(
188           *M, GA->getValueType(), false, GlobalValue::ExternalLinkage,
189           nullptr, "", nullptr,
190           GA->getThreadLocalMode(), GA->getType()->getAddressSpace());
191     GO->takeName(GA);
192     GA->replaceAllUsesWith(GO);
193     GA->eraseFromParent();
194   }
195 
196   for (Function &F : *M) {
197     if (ShouldKeepDefinition(&F))
198       continue;
199 
200     F.deleteBody();
201     F.setComdat(nullptr);
202     F.clearMetadata();
203   }
204 
205   for (GlobalVariable &GV : M->globals()) {
206     if (ShouldKeepDefinition(&GV))
207       continue;
208 
209     GV.setInitializer(nullptr);
210     GV.setLinkage(GlobalValue::ExternalLinkage);
211     GV.setComdat(nullptr);
212     GV.clearMetadata();
213   }
214 }
215 
216 void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {
217   if (auto *F = dyn_cast<Function>(C))
218     return Fn(F);
219   if (isa<GlobalValue>(C))
220     return;
221   for (Value *Op : C->operands())
222     forEachVirtualFunction(cast<Constant>(Op), Fn);
223 }
224 
225 // If it's possible to split M into regular and thin LTO parts, do so and write
226 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a
227 // regular LTO bitcode file to OS.
228 void splitAndWriteThinLTOBitcode(
229     raw_ostream &OS, raw_ostream *ThinLinkOS,
230     function_ref<AAResults &(Function &)> AARGetter, Module &M) {
231   std::string ModuleId = getUniqueModuleId(&M);
232   if (ModuleId.empty()) {
233     // We couldn't generate a module ID for this module, just write it out as a
234     // regular LTO module.
235     WriteBitcodeToFile(&M, OS);
236     if (ThinLinkOS)
237       // We don't have a ThinLTO part, but still write the module to the
238       // ThinLinkOS if requested so that the expected output file is produced.
239       WriteBitcodeToFile(&M, *ThinLinkOS);
240     return;
241   }
242 
243   promoteTypeIds(M, ModuleId);
244 
245   // Returns whether a global has attached type metadata. Such globals may
246   // participate in CFI or whole-program devirtualization, so they need to
247   // appear in the merged module instead of the thin LTO module.
248   auto HasTypeMetadata = [&](const GlobalObject *GO) {
249     SmallVector<MDNode *, 1> MDs;
250     GO->getMetadata(LLVMContext::MD_type, MDs);
251     return !MDs.empty();
252   };
253 
254   // Collect the set of virtual functions that are eligible for virtual constant
255   // propagation. Each eligible function must not access memory, must return
256   // an integer of width <=64 bits, must take at least one argument, must not
257   // use its first argument (assumed to be "this") and all arguments other than
258   // the first one must be of <=64 bit integer type.
259   //
260   // Note that we test whether this copy of the function is readnone, rather
261   // than testing function attributes, which must hold for any copy of the
262   // function, even a less optimized version substituted at link time. This is
263   // sound because the virtual constant propagation optimizations effectively
264   // inline all implementations of the virtual function into each call site,
265   // rather than using function attributes to perform local optimization.
266   std::set<const Function *> EligibleVirtualFns;
267   // If any member of a comdat lives in MergedM, put all members of that
268   // comdat in MergedM to keep the comdat together.
269   DenseSet<const Comdat *> MergedMComdats;
270   for (GlobalVariable &GV : M.globals())
271     if (HasTypeMetadata(&GV)) {
272       if (const auto *C = GV.getComdat())
273         MergedMComdats.insert(C);
274       forEachVirtualFunction(GV.getInitializer(), [&](Function *F) {
275         auto *RT = dyn_cast<IntegerType>(F->getReturnType());
276         if (!RT || RT->getBitWidth() > 64 || F->arg_empty() ||
277             !F->arg_begin()->use_empty())
278           return;
279         for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) {
280           auto *ArgT = dyn_cast<IntegerType>(Arg.getType());
281           if (!ArgT || ArgT->getBitWidth() > 64)
282             return;
283         }
284         if (!F->isDeclaration() &&
285             computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
286           EligibleVirtualFns.insert(F);
287       });
288     }
289 
290   ValueToValueMapTy VMap;
291   std::unique_ptr<Module> MergedM(
292       CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool {
293         if (const auto *C = GV->getComdat())
294           if (MergedMComdats.count(C))
295             return true;
296         if (auto *F = dyn_cast<Function>(GV))
297           return EligibleVirtualFns.count(F);
298         if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
299           return HasTypeMetadata(GVar);
300         return false;
301       }));
302   StripDebugInfo(*MergedM);
303 
304   for (Function &F : *MergedM)
305     if (!F.isDeclaration()) {
306       // Reset the linkage of all functions eligible for virtual constant
307       // propagation. The canonical definitions live in the thin LTO module so
308       // that they can be imported.
309       F.setLinkage(GlobalValue::AvailableExternallyLinkage);
310       F.setComdat(nullptr);
311     }
312 
313   SetVector<GlobalValue *> CfiFunctions;
314   for (auto &F : M)
315     if ((!F.hasLocalLinkage() || F.hasAddressTaken()) && HasTypeMetadata(&F))
316       CfiFunctions.insert(&F);
317 
318   // Remove all globals with type metadata, globals with comdats that live in
319   // MergedM, and aliases pointing to such globals from the thin LTO module.
320   filterModule(&M, [&](const GlobalValue *GV) {
321     if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
322       if (HasTypeMetadata(GVar))
323         return false;
324     if (const auto *C = GV->getComdat())
325       if (MergedMComdats.count(C))
326         return false;
327     return true;
328   });
329 
330   promoteInternals(*MergedM, M, ModuleId, CfiFunctions);
331   promoteInternals(M, *MergedM, ModuleId, CfiFunctions);
332 
333   SmallVector<MDNode *, 8> CfiFunctionMDs;
334   for (auto V : CfiFunctions) {
335     Function &F = *cast<Function>(V);
336     SmallVector<MDNode *, 2> Types;
337     F.getMetadata(LLVMContext::MD_type, Types);
338 
339     auto &Ctx = MergedM->getContext();
340     SmallVector<Metadata *, 4> Elts;
341     Elts.push_back(MDString::get(Ctx, F.getName()));
342     CfiFunctionLinkage Linkage;
343     if (!F.isDeclarationForLinker())
344       Linkage = CFL_Definition;
345     else if (F.isWeakForLinker())
346       Linkage = CFL_WeakDeclaration;
347     else
348       Linkage = CFL_Declaration;
349     Elts.push_back(ConstantAsMetadata::get(
350         llvm::ConstantInt::get(Type::getInt8Ty(Ctx), Linkage)));
351     for (auto Type : Types)
352       Elts.push_back(Type);
353     CfiFunctionMDs.push_back(MDTuple::get(Ctx, Elts));
354   }
355 
356   if(!CfiFunctionMDs.empty()) {
357     NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("cfi.functions");
358     for (auto MD : CfiFunctionMDs)
359       NMD->addOperand(MD);
360   }
361 
362   simplifyExternals(*MergedM);
363 
364   // FIXME: Try to re-use BSI and PFI from the original module here.
365   ProfileSummaryInfo PSI(M);
366   ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI);
367 
368   // Mark the merged module as requiring full LTO. We still want an index for
369   // it though, so that it can participate in summary-based dead stripping.
370   MergedM->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
371   ModuleSummaryIndex MergedMIndex =
372       buildModuleSummaryIndex(*MergedM, nullptr, &PSI);
373 
374   SmallVector<char, 0> Buffer;
375 
376   BitcodeWriter W(Buffer);
377   // Save the module hash produced for the full bitcode, which will
378   // be used in the backends, and use that in the minimized bitcode
379   // produced for the full link.
380   ModuleHash ModHash = {{0}};
381   W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
382                 /*GenerateHash=*/true, &ModHash);
383   W.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false,
384                 &MergedMIndex);
385   W.writeSymtab();
386   W.writeStrtab();
387   OS << Buffer;
388 
389   // If a minimized bitcode module was requested for the thin link, only
390   // the information that is needed by thin link will be written in the
391   // given OS (the merged module will be written as usual).
392   if (ThinLinkOS) {
393     Buffer.clear();
394     BitcodeWriter W2(Buffer);
395     StripDebugInfo(M);
396     W2.writeThinLinkBitcode(&M, Index, ModHash);
397     W2.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false,
398                    &MergedMIndex);
399     W2.writeSymtab();
400     W2.writeStrtab();
401     *ThinLinkOS << Buffer;
402   }
403 }
404 
405 // Returns whether this module needs to be split because it uses type metadata.
406 bool requiresSplit(Module &M) {
407   SmallVector<MDNode *, 1> MDs;
408   for (auto &GO : M.global_objects()) {
409     GO.getMetadata(LLVMContext::MD_type, MDs);
410     if (!MDs.empty())
411       return true;
412   }
413 
414   return false;
415 }
416 
417 void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
418                          function_ref<AAResults &(Function &)> AARGetter,
419                          Module &M, const ModuleSummaryIndex *Index) {
420   // See if this module has any type metadata. If so, we need to split it.
421   if (requiresSplit(M))
422     return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
423 
424   // Otherwise we can just write it out as a regular module.
425 
426   // Save the module hash produced for the full bitcode, which will
427   // be used in the backends, and use that in the minimized bitcode
428   // produced for the full link.
429   ModuleHash ModHash = {{0}};
430   WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
431                      /*GenerateHash=*/true, &ModHash);
432   // If a minimized bitcode module was requested for the thin link, only
433   // the information that is needed by thin link will be written in the
434   // given OS.
435   if (ThinLinkOS && Index)
436     WriteThinLinkBitcodeToFile(&M, *ThinLinkOS, *Index, ModHash);
437 }
438 
439 class WriteThinLTOBitcode : public ModulePass {
440   raw_ostream &OS; // raw_ostream to print on
441   // The output stream on which to emit a minimized module for use
442   // just in the thin link, if requested.
443   raw_ostream *ThinLinkOS;
444 
445 public:
446   static char ID; // Pass identification, replacement for typeid
447   WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) {
448     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
449   }
450 
451   explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS)
452       : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) {
453     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
454   }
455 
456   StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }
457 
458   bool runOnModule(Module &M) override {
459     const ModuleSummaryIndex *Index =
460         &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
461     writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index);
462     return true;
463   }
464   void getAnalysisUsage(AnalysisUsage &AU) const override {
465     AU.setPreservesAll();
466     AU.addRequired<AssumptionCacheTracker>();
467     AU.addRequired<ModuleSummaryIndexWrapperPass>();
468     AU.addRequired<TargetLibraryInfoWrapperPass>();
469   }
470 };
471 } // anonymous namespace
472 
473 char WriteThinLTOBitcode::ID = 0;
474 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
475                       "Write ThinLTO Bitcode", false, true)
476 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
477 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
478 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
479 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
480                     "Write ThinLTO Bitcode", false, true)
481 
482 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str,
483                                                 raw_ostream *ThinLinkOS) {
484   return new WriteThinLTOBitcode(Str, ThinLinkOS);
485 }
486 
487 PreservedAnalyses
488 llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
489   FunctionAnalysisManager &FAM =
490       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
491   writeThinLTOBitcode(OS, ThinLinkOS,
492                       [&FAM](Function &F) -> AAResults & {
493                         return FAM.getResult<AAManager>(F);
494                       },
495                       M, &AM.getResult<ModuleSummaryIndexAnalysis>(M));
496   return PreservedAnalyses::all();
497 }
498