1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
11 #include "llvm/Analysis/BasicAliasAnalysis.h"
12 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
13 #include "llvm/Analysis/ProfileSummaryInfo.h"
14 #include "llvm/Analysis/TypeMetadataUtils.h"
15 #include "llvm/Bitcode/BitcodeWriter.h"
16 #include "llvm/IR/Constants.h"
17 #include "llvm/IR/DebugInfo.h"
18 #include "llvm/IR/Intrinsics.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/IR/PassManager.h"
21 #include "llvm/Pass.h"
22 #include "llvm/Support/FileSystem.h"
23 #include "llvm/Support/ScopedPrinter.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "llvm/Transforms/IPO.h"
26 #include "llvm/Transforms/IPO/FunctionAttrs.h"
27 #include "llvm/Transforms/Utils/Cloning.h"
28 #include "llvm/Transforms/Utils/ModuleUtils.h"
29 using namespace llvm;
30 
31 namespace {
32 
33 // Promote each local-linkage entity defined by ExportM and used by ImportM by
34 // changing visibility and appending the given ModuleId.
35 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
36   DenseMap<const Comdat *, Comdat *> RenamedComdats;
37   for (auto &ExportGV : ExportM.global_values()) {
38     if (!ExportGV.hasLocalLinkage())
39       continue;
40 
41     auto Name = ExportGV.getName();
42     GlobalValue *ImportGV = ImportM.getNamedValue(Name);
43     if (!ImportGV || ImportGV->use_empty())
44       continue;
45 
46     std::string NewName = (Name + ModuleId).str();
47 
48     if (const auto *C = ExportGV.getComdat())
49       if (C->getName() == Name)
50         RenamedComdats.try_emplace(C, ExportM.getOrInsertComdat(NewName));
51 
52     ExportGV.setName(NewName);
53     ExportGV.setLinkage(GlobalValue::ExternalLinkage);
54     ExportGV.setVisibility(GlobalValue::HiddenVisibility);
55 
56     ImportGV->setName(NewName);
57     ImportGV->setVisibility(GlobalValue::HiddenVisibility);
58   }
59 
60   if (!RenamedComdats.empty())
61     for (auto &GO : ExportM.global_objects())
62       if (auto *C = GO.getComdat()) {
63         auto Replacement = RenamedComdats.find(C);
64         if (Replacement != RenamedComdats.end())
65           GO.setComdat(Replacement->second);
66       }
67 }
68 
69 // Promote all internal (i.e. distinct) type ids used by the module by replacing
70 // them with external type ids formed using the module id.
71 //
72 // Note that this needs to be done before we clone the module because each clone
73 // will receive its own set of distinct metadata nodes.
74 void promoteTypeIds(Module &M, StringRef ModuleId) {
75   DenseMap<Metadata *, Metadata *> LocalToGlobal;
76   auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
77     Metadata *MD =
78         cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();
79 
80     if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
81       Metadata *&GlobalMD = LocalToGlobal[MD];
82       if (!GlobalMD) {
83         std::string NewName =
84             (to_string(LocalToGlobal.size()) + ModuleId).str();
85         GlobalMD = MDString::get(M.getContext(), NewName);
86       }
87 
88       CI->setArgOperand(ArgNo,
89                         MetadataAsValue::get(M.getContext(), GlobalMD));
90     }
91   };
92 
93   if (Function *TypeTestFunc =
94           M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
95     for (const Use &U : TypeTestFunc->uses()) {
96       auto CI = cast<CallInst>(U.getUser());
97       ExternalizeTypeId(CI, 1);
98     }
99   }
100 
101   if (Function *TypeCheckedLoadFunc =
102           M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
103     for (const Use &U : TypeCheckedLoadFunc->uses()) {
104       auto CI = cast<CallInst>(U.getUser());
105       ExternalizeTypeId(CI, 2);
106     }
107   }
108 
109   for (GlobalObject &GO : M.global_objects()) {
110     SmallVector<MDNode *, 1> MDs;
111     GO.getMetadata(LLVMContext::MD_type, MDs);
112 
113     GO.eraseMetadata(LLVMContext::MD_type);
114     for (auto MD : MDs) {
115       auto I = LocalToGlobal.find(MD->getOperand(1));
116       if (I == LocalToGlobal.end()) {
117         GO.addMetadata(LLVMContext::MD_type, *MD);
118         continue;
119       }
120       GO.addMetadata(
121           LLVMContext::MD_type,
122           *MDNode::get(M.getContext(),
123                        ArrayRef<Metadata *>{MD->getOperand(0), I->second}));
124     }
125   }
126 }
127 
128 // Drop unused globals, and drop type information from function declarations.
129 // FIXME: If we made functions typeless then there would be no need to do this.
130 void simplifyExternals(Module &M) {
131   FunctionType *EmptyFT =
132       FunctionType::get(Type::getVoidTy(M.getContext()), false);
133 
134   for (auto I = M.begin(), E = M.end(); I != E;) {
135     Function &F = *I++;
136     if (F.isDeclaration() && F.use_empty()) {
137       F.eraseFromParent();
138       continue;
139     }
140 
141     if (!F.isDeclaration() || F.getFunctionType() == EmptyFT)
142       continue;
143 
144     Function *NewF =
145         Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M);
146     NewF->setVisibility(F.getVisibility());
147     NewF->takeName(&F);
148     F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
149     F.eraseFromParent();
150   }
151 
152   for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
153     GlobalVariable &GV = *I++;
154     if (GV.isDeclaration() && GV.use_empty()) {
155       GV.eraseFromParent();
156       continue;
157     }
158   }
159 }
160 
161 void filterModule(
162     Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
163   for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
164        I != E;) {
165     GlobalAlias *GA = &*I++;
166     if (ShouldKeepDefinition(GA))
167       continue;
168 
169     GlobalObject *GO;
170     if (GA->getValueType()->isFunctionTy())
171       GO = Function::Create(cast<FunctionType>(GA->getValueType()),
172                             GlobalValue::ExternalLinkage, "", M);
173     else
174       GO = new GlobalVariable(
175           *M, GA->getValueType(), false, GlobalValue::ExternalLinkage,
176           nullptr, "", nullptr,
177           GA->getThreadLocalMode(), GA->getType()->getAddressSpace());
178     GO->takeName(GA);
179     GA->replaceAllUsesWith(GO);
180     GA->eraseFromParent();
181   }
182 
183   for (Function &F : *M) {
184     if (ShouldKeepDefinition(&F))
185       continue;
186 
187     F.deleteBody();
188     F.setComdat(nullptr);
189     F.clearMetadata();
190   }
191 
192   for (GlobalVariable &GV : M->globals()) {
193     if (ShouldKeepDefinition(&GV))
194       continue;
195 
196     GV.setInitializer(nullptr);
197     GV.setLinkage(GlobalValue::ExternalLinkage);
198     GV.setComdat(nullptr);
199     GV.clearMetadata();
200   }
201 }
202 
203 void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {
204   if (auto *F = dyn_cast<Function>(C))
205     return Fn(F);
206   if (isa<GlobalValue>(C))
207     return;
208   for (Value *Op : C->operands())
209     forEachVirtualFunction(cast<Constant>(Op), Fn);
210 }
211 
212 // If it's possible to split M into regular and thin LTO parts, do so and write
213 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a
214 // regular LTO bitcode file to OS.
215 void splitAndWriteThinLTOBitcode(
216     raw_ostream &OS, raw_ostream *ThinLinkOS,
217     function_ref<AAResults &(Function &)> AARGetter, Module &M) {
218   std::string ModuleId = getUniqueModuleId(&M);
219   if (ModuleId.empty()) {
220     // We couldn't generate a module ID for this module, just write it out as a
221     // regular LTO module.
222     WriteBitcodeToFile(&M, OS);
223     if (ThinLinkOS)
224       // We don't have a ThinLTO part, but still write the module to the
225       // ThinLinkOS if requested so that the expected output file is produced.
226       WriteBitcodeToFile(&M, *ThinLinkOS);
227     return;
228   }
229 
230   promoteTypeIds(M, ModuleId);
231 
232   // Returns whether a global has attached type metadata. Such globals may
233   // participate in CFI or whole-program devirtualization, so they need to
234   // appear in the merged module instead of the thin LTO module.
235   auto HasTypeMetadata = [&](const GlobalObject *GO) {
236     SmallVector<MDNode *, 1> MDs;
237     GO->getMetadata(LLVMContext::MD_type, MDs);
238     return !MDs.empty();
239   };
240 
241   // Collect the set of virtual functions that are eligible for virtual constant
242   // propagation. Each eligible function must not access memory, must return
243   // an integer of width <=64 bits, must take at least one argument, must not
244   // use its first argument (assumed to be "this") and all arguments other than
245   // the first one must be of <=64 bit integer type.
246   //
247   // Note that we test whether this copy of the function is readnone, rather
248   // than testing function attributes, which must hold for any copy of the
249   // function, even a less optimized version substituted at link time. This is
250   // sound because the virtual constant propagation optimizations effectively
251   // inline all implementations of the virtual function into each call site,
252   // rather than using function attributes to perform local optimization.
253   std::set<const Function *> EligibleVirtualFns;
254   // If any member of a comdat lives in MergedM, put all members of that
255   // comdat in MergedM to keep the comdat together.
256   DenseSet<const Comdat *> MergedMComdats;
257   for (GlobalVariable &GV : M.globals())
258     if (HasTypeMetadata(&GV)) {
259       if (const auto *C = GV.getComdat())
260         MergedMComdats.insert(C);
261       forEachVirtualFunction(GV.getInitializer(), [&](Function *F) {
262         auto *RT = dyn_cast<IntegerType>(F->getReturnType());
263         if (!RT || RT->getBitWidth() > 64 || F->arg_empty() ||
264             !F->arg_begin()->use_empty())
265           return;
266         for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) {
267           auto *ArgT = dyn_cast<IntegerType>(Arg.getType());
268           if (!ArgT || ArgT->getBitWidth() > 64)
269             return;
270         }
271         if (computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
272           EligibleVirtualFns.insert(F);
273       });
274     }
275 
276   ValueToValueMapTy VMap;
277   std::unique_ptr<Module> MergedM(
278       CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool {
279         if (const auto *C = GV->getComdat())
280           if (MergedMComdats.count(C))
281             return true;
282         if (auto *F = dyn_cast<Function>(GV))
283           return EligibleVirtualFns.count(F);
284         if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
285           return HasTypeMetadata(GVar);
286         return false;
287       }));
288   StripDebugInfo(*MergedM);
289 
290   for (Function &F : *MergedM)
291     if (!F.isDeclaration()) {
292       // Reset the linkage of all functions eligible for virtual constant
293       // propagation. The canonical definitions live in the thin LTO module so
294       // that they can be imported.
295       F.setLinkage(GlobalValue::AvailableExternallyLinkage);
296       F.setComdat(nullptr);
297     }
298 
299   // Remove all globals with type metadata, globals with comdats that live in
300   // MergedM, and aliases pointing to such globals from the thin LTO module.
301   filterModule(&M, [&](const GlobalValue *GV) {
302     if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
303       if (HasTypeMetadata(GVar))
304         return false;
305     if (const auto *C = GV->getComdat())
306       if (MergedMComdats.count(C))
307         return false;
308     return true;
309   });
310 
311   promoteInternals(*MergedM, M, ModuleId);
312   promoteInternals(M, *MergedM, ModuleId);
313 
314   simplifyExternals(*MergedM);
315 
316 
317   // FIXME: Try to re-use BSI and PFI from the original module here.
318   ProfileSummaryInfo PSI(M);
319   ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI);
320 
321   SmallVector<char, 0> Buffer;
322 
323   BitcodeWriter W(Buffer);
324   // Save the module hash produced for the full bitcode, which will
325   // be used in the backends, and use that in the minimized bitcode
326   // produced for the full link.
327   ModuleHash ModHash = {{0}};
328   W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
329                 /*GenerateHash=*/true, &ModHash);
330   W.writeModule(MergedM.get());
331   W.writeStrtab();
332   OS << Buffer;
333 
334   // If a minimized bitcode module was requested for the thin link,
335   // strip the debug info (the merged module was already stripped above)
336   // and write it to the given OS.
337   if (ThinLinkOS) {
338     Buffer.clear();
339     BitcodeWriter W2(Buffer);
340     StripDebugInfo(M);
341     W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
342                    /*GenerateHash=*/false, &ModHash);
343     W2.writeModule(MergedM.get());
344     W2.writeStrtab();
345     *ThinLinkOS << Buffer;
346   }
347 }
348 
349 // Returns whether this module needs to be split because it uses type metadata.
350 bool requiresSplit(Module &M) {
351   SmallVector<MDNode *, 1> MDs;
352   for (auto &GO : M.global_objects()) {
353     GO.getMetadata(LLVMContext::MD_type, MDs);
354     if (!MDs.empty())
355       return true;
356   }
357 
358   return false;
359 }
360 
361 void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
362                          function_ref<AAResults &(Function &)> AARGetter,
363                          Module &M, const ModuleSummaryIndex *Index) {
364   // See if this module has any type metadata. If so, we need to split it.
365   if (requiresSplit(M))
366     return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
367 
368   // Otherwise we can just write it out as a regular module.
369 
370   // Save the module hash produced for the full bitcode, which will
371   // be used in the backends, and use that in the minimized bitcode
372   // produced for the full link.
373   ModuleHash ModHash = {{0}};
374   WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
375                      /*GenerateHash=*/true, &ModHash);
376   // If a minimized bitcode module was requested for the thin link,
377   // strip the debug info and write it to the given OS.
378   if (ThinLinkOS) {
379     StripDebugInfo(M);
380     WriteBitcodeToFile(&M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false,
381                        Index,
382                        /*GenerateHash=*/false, &ModHash);
383   }
384 }
385 
386 class WriteThinLTOBitcode : public ModulePass {
387   raw_ostream &OS; // raw_ostream to print on
388   // The output stream on which to emit a minimized module for use
389   // just in the thin link, if requested.
390   raw_ostream *ThinLinkOS;
391 
392 public:
393   static char ID; // Pass identification, replacement for typeid
394   WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) {
395     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
396   }
397 
398   explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS)
399       : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) {
400     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
401   }
402 
403   StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }
404 
405   bool runOnModule(Module &M) override {
406     const ModuleSummaryIndex *Index =
407         &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
408     writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index);
409     return true;
410   }
411   void getAnalysisUsage(AnalysisUsage &AU) const override {
412     AU.setPreservesAll();
413     AU.addRequired<AssumptionCacheTracker>();
414     AU.addRequired<ModuleSummaryIndexWrapperPass>();
415     AU.addRequired<TargetLibraryInfoWrapperPass>();
416   }
417 };
418 } // anonymous namespace
419 
420 char WriteThinLTOBitcode::ID = 0;
421 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
422                       "Write ThinLTO Bitcode", false, true)
423 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
424 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
425 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
426 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
427                     "Write ThinLTO Bitcode", false, true)
428 
429 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str,
430                                                 raw_ostream *ThinLinkOS) {
431   return new WriteThinLTOBitcode(Str, ThinLinkOS);
432 }
433 
434 PreservedAnalyses
435 llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
436   FunctionAnalysisManager &FAM =
437       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
438   writeThinLTOBitcode(OS, ThinLinkOS,
439                       [&FAM](Function &F) -> AAResults & {
440                         return FAM.getResult<AAManager>(F);
441                       },
442                       M, &AM.getResult<ModuleSummaryIndexAnalysis>(M));
443   return PreservedAnalyses::all();
444 }
445