1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass prepares a module containing type metadata for ThinLTO by splitting
11 // it into regular and thin LTO parts if possible, and writing both parts to
12 // a multi-module bitcode file. Modules that do not contain type metadata are
13 // written unmodified as a single module.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Analysis/BasicAliasAnalysis.h"
18 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
19 #include "llvm/Analysis/TypeMetadataUtils.h"
20 #include "llvm/Bitcode/BitcodeWriter.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/DebugInfo.h"
23 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/IR/PassManager.h"
26 #include "llvm/Pass.h"
27 #include "llvm/Support/FileSystem.h"
28 #include "llvm/Support/ScopedPrinter.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include "llvm/Transforms/IPO.h"
31 #include "llvm/Transforms/IPO/FunctionAttrs.h"
32 #include "llvm/Transforms/Utils/Cloning.h"
33 using namespace llvm;
34 
35 namespace {
36 
37 // Produce a unique identifier for this module by taking the MD5 sum of the
38 // names of the module's strong external symbols. This identifier is
39 // normally guaranteed to be unique, or the program would fail to link due to
40 // multiply defined symbols.
41 //
42 // If the module has no strong external symbols (such a module may still have a
43 // semantic effect if it performs global initialization), we cannot produce a
44 // unique identifier for this module, so we return the empty string, which
45 // causes the entire module to be written as a regular LTO module.
46 std::string getModuleId(Module *M) {
47   MD5 Md5;
48   bool ExportsSymbols = false;
49   auto AddGlobal = [&](GlobalValue &GV) {
50     if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
51         !GV.hasExternalLinkage())
52       return;
53     ExportsSymbols = true;
54     Md5.update(GV.getName());
55     Md5.update(ArrayRef<uint8_t>{0});
56   };
57 
58   for (auto &F : *M)
59     AddGlobal(F);
60   for (auto &GV : M->globals())
61     AddGlobal(GV);
62   for (auto &GA : M->aliases())
63     AddGlobal(GA);
64   for (auto &IF : M->ifuncs())
65     AddGlobal(IF);
66 
67   if (!ExportsSymbols)
68     return "";
69 
70   MD5::MD5Result R;
71   Md5.final(R);
72 
73   SmallString<32> Str;
74   MD5::stringifyResult(R, Str);
75   return ("$" + Str).str();
76 }
77 
78 // Promote each local-linkage entity defined by ExportM and used by ImportM by
79 // changing visibility and appending the given ModuleId.
80 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
81   auto PromoteInternal = [&](GlobalValue &ExportGV) {
82     if (!ExportGV.hasLocalLinkage())
83       return;
84 
85     GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName());
86     if (!ImportGV || ImportGV->use_empty())
87       return;
88 
89     std::string NewName = (ExportGV.getName() + ModuleId).str();
90 
91     ExportGV.setName(NewName);
92     ExportGV.setLinkage(GlobalValue::ExternalLinkage);
93     ExportGV.setVisibility(GlobalValue::HiddenVisibility);
94 
95     ImportGV->setName(NewName);
96     ImportGV->setVisibility(GlobalValue::HiddenVisibility);
97   };
98 
99   for (auto &F : ExportM)
100     PromoteInternal(F);
101   for (auto &GV : ExportM.globals())
102     PromoteInternal(GV);
103   for (auto &GA : ExportM.aliases())
104     PromoteInternal(GA);
105   for (auto &IF : ExportM.ifuncs())
106     PromoteInternal(IF);
107 }
108 
109 // Promote all internal (i.e. distinct) type ids used by the module by replacing
110 // them with external type ids formed using the module id.
111 //
112 // Note that this needs to be done before we clone the module because each clone
113 // will receive its own set of distinct metadata nodes.
114 void promoteTypeIds(Module &M, StringRef ModuleId) {
115   DenseMap<Metadata *, Metadata *> LocalToGlobal;
116   auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
117     Metadata *MD =
118         cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();
119 
120     if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
121       Metadata *&GlobalMD = LocalToGlobal[MD];
122       if (!GlobalMD) {
123         std::string NewName =
124             (to_string(LocalToGlobal.size()) + ModuleId).str();
125         GlobalMD = MDString::get(M.getContext(), NewName);
126       }
127 
128       CI->setArgOperand(ArgNo,
129                         MetadataAsValue::get(M.getContext(), GlobalMD));
130     }
131   };
132 
133   if (Function *TypeTestFunc =
134           M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
135     for (const Use &U : TypeTestFunc->uses()) {
136       auto CI = cast<CallInst>(U.getUser());
137       ExternalizeTypeId(CI, 1);
138     }
139   }
140 
141   if (Function *TypeCheckedLoadFunc =
142           M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
143     for (const Use &U : TypeCheckedLoadFunc->uses()) {
144       auto CI = cast<CallInst>(U.getUser());
145       ExternalizeTypeId(CI, 2);
146     }
147   }
148 
149   for (GlobalObject &GO : M.global_objects()) {
150     SmallVector<MDNode *, 1> MDs;
151     GO.getMetadata(LLVMContext::MD_type, MDs);
152 
153     GO.eraseMetadata(LLVMContext::MD_type);
154     for (auto MD : MDs) {
155       auto I = LocalToGlobal.find(MD->getOperand(1));
156       if (I == LocalToGlobal.end()) {
157         GO.addMetadata(LLVMContext::MD_type, *MD);
158         continue;
159       }
160       GO.addMetadata(
161           LLVMContext::MD_type,
162           *MDNode::get(M.getContext(),
163                        ArrayRef<Metadata *>{MD->getOperand(0), I->second}));
164     }
165   }
166 }
167 
168 // Drop unused globals, and drop type information from function declarations.
169 // FIXME: If we made functions typeless then there would be no need to do this.
170 void simplifyExternals(Module &M) {
171   FunctionType *EmptyFT =
172       FunctionType::get(Type::getVoidTy(M.getContext()), false);
173 
174   for (auto I = M.begin(), E = M.end(); I != E;) {
175     Function &F = *I++;
176     if (F.isDeclaration() && F.use_empty()) {
177       F.eraseFromParent();
178       continue;
179     }
180 
181     if (!F.isDeclaration() || F.getFunctionType() == EmptyFT)
182       continue;
183 
184     Function *NewF =
185         Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M);
186     NewF->setVisibility(F.getVisibility());
187     NewF->takeName(&F);
188     F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
189     F.eraseFromParent();
190   }
191 
192   for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
193     GlobalVariable &GV = *I++;
194     if (GV.isDeclaration() && GV.use_empty()) {
195       GV.eraseFromParent();
196       continue;
197     }
198   }
199 }
200 
201 void filterModule(
202     Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
203   for (Function &F : *M) {
204     if (ShouldKeepDefinition(&F))
205       continue;
206 
207     F.deleteBody();
208     F.setComdat(nullptr);
209     F.clearMetadata();
210   }
211 
212   for (GlobalVariable &GV : M->globals()) {
213     if (ShouldKeepDefinition(&GV))
214       continue;
215 
216     GV.setInitializer(nullptr);
217     GV.setLinkage(GlobalValue::ExternalLinkage);
218     GV.setComdat(nullptr);
219     GV.clearMetadata();
220   }
221 
222   for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
223        I != E;) {
224     GlobalAlias *GA = &*I++;
225     if (ShouldKeepDefinition(GA))
226       continue;
227 
228     GlobalObject *GO;
229     if (I->getValueType()->isFunctionTy())
230       GO = Function::Create(cast<FunctionType>(GA->getValueType()),
231                             GlobalValue::ExternalLinkage, "", M);
232     else
233       GO = new GlobalVariable(
234           *M, GA->getValueType(), false, GlobalValue::ExternalLinkage,
235           (Constant *)nullptr, "", (GlobalVariable *)nullptr,
236           GA->getThreadLocalMode(), GA->getType()->getAddressSpace());
237     GO->takeName(GA);
238     GA->replaceAllUsesWith(GO);
239     GA->eraseFromParent();
240   }
241 }
242 
243 void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {
244   if (auto *F = dyn_cast<Function>(C))
245     return Fn(F);
246   if (isa<GlobalValue>(C))
247     return;
248   for (Value *Op : C->operands())
249     forEachVirtualFunction(cast<Constant>(Op), Fn);
250 }
251 
252 // If it's possible to split M into regular and thin LTO parts, do so and write
253 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a
254 // regular LTO bitcode file to OS.
255 void splitAndWriteThinLTOBitcode(
256     raw_ostream &OS, raw_ostream *ThinLinkOS,
257     function_ref<AAResults &(Function &)> AARGetter, Module &M) {
258   std::string ModuleId = getModuleId(&M);
259   if (ModuleId.empty()) {
260     // We couldn't generate a module ID for this module, just write it out as a
261     // regular LTO module.
262     WriteBitcodeToFile(&M, OS);
263     if (ThinLinkOS)
264       // We don't have a ThinLTO part, but still write the module to the
265       // ThinLinkOS if requested so that the expected output file is produced.
266       WriteBitcodeToFile(&M, *ThinLinkOS);
267     return;
268   }
269 
270   promoteTypeIds(M, ModuleId);
271 
272   // Returns whether a global has attached type metadata. Such globals may
273   // participate in CFI or whole-program devirtualization, so they need to
274   // appear in the merged module instead of the thin LTO module.
275   auto HasTypeMetadata = [&](const GlobalObject *GO) {
276     SmallVector<MDNode *, 1> MDs;
277     GO->getMetadata(LLVMContext::MD_type, MDs);
278     return !MDs.empty();
279   };
280 
281   // Collect the set of virtual functions that are eligible for virtual constant
282   // propagation. Each eligible function must not access memory, must return
283   // an integer of width <=64 bits, must take at least one argument, must not
284   // use its first argument (assumed to be "this") and all arguments other than
285   // the first one must be of <=64 bit integer type.
286   //
287   // Note that we test whether this copy of the function is readnone, rather
288   // than testing function attributes, which must hold for any copy of the
289   // function, even a less optimized version substituted at link time. This is
290   // sound because the virtual constant propagation optimizations effectively
291   // inline all implementations of the virtual function into each call site,
292   // rather than using function attributes to perform local optimization.
293   std::set<const Function *> EligibleVirtualFns;
294   for (GlobalVariable &GV : M.globals())
295     if (HasTypeMetadata(&GV))
296       forEachVirtualFunction(GV.getInitializer(), [&](Function *F) {
297         auto *RT = dyn_cast<IntegerType>(F->getReturnType());
298         if (!RT || RT->getBitWidth() > 64 || F->arg_empty() ||
299             !F->arg_begin()->use_empty())
300           return;
301         for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) {
302           auto *ArgT = dyn_cast<IntegerType>(Arg.getType());
303           if (!ArgT || ArgT->getBitWidth() > 64)
304             return;
305         }
306         if (computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
307           EligibleVirtualFns.insert(F);
308       });
309 
310   ValueToValueMapTy VMap;
311   std::unique_ptr<Module> MergedM(
312       CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool {
313         if (auto *F = dyn_cast<Function>(GV))
314           return EligibleVirtualFns.count(F);
315         if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
316           return HasTypeMetadata(GVar);
317         return false;
318       }));
319   StripDebugInfo(*MergedM);
320 
321   for (Function &F : *MergedM)
322     if (!F.isDeclaration()) {
323       // Reset the linkage of all functions eligible for virtual constant
324       // propagation. The canonical definitions live in the thin LTO module so
325       // that they can be imported.
326       F.setLinkage(GlobalValue::AvailableExternallyLinkage);
327       F.setComdat(nullptr);
328     }
329 
330   // Remove all globals with type metadata, as well as aliases pointing to them,
331   // from the thin LTO module.
332   filterModule(&M, [&](const GlobalValue *GV) {
333     if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
334       return !HasTypeMetadata(GVar);
335     return true;
336   });
337 
338   promoteInternals(*MergedM, M, ModuleId);
339   promoteInternals(M, *MergedM, ModuleId);
340 
341   simplifyExternals(*MergedM);
342 
343 
344   // FIXME: Try to re-use BSI and PFI from the original module here.
345   ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr);
346 
347   SmallVector<char, 0> Buffer;
348 
349   BitcodeWriter W(Buffer);
350   // Save the module hash produced for the full bitcode, which will
351   // be used in the backends, and use that in the minimized bitcode
352   // produced for the full link.
353   ModuleHash ModHash = {{0}};
354   W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
355                 /*GenerateHash=*/true, &ModHash);
356   W.writeModule(MergedM.get());
357   OS << Buffer;
358 
359   // If a minimized bitcode module was requested for the thin link,
360   // strip the debug info (the merged module was already stripped above)
361   // and write it to the given OS.
362   if (ThinLinkOS) {
363     Buffer.clear();
364     BitcodeWriter W2(Buffer);
365     StripDebugInfo(M);
366     W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
367                    /*GenerateHash=*/false, &ModHash);
368     W2.writeModule(MergedM.get());
369     *ThinLinkOS << Buffer;
370   }
371 }
372 
373 // Returns whether this module needs to be split because it uses type metadata.
374 bool requiresSplit(Module &M) {
375   SmallVector<MDNode *, 1> MDs;
376   for (auto &GO : M.global_objects()) {
377     GO.getMetadata(LLVMContext::MD_type, MDs);
378     if (!MDs.empty())
379       return true;
380   }
381 
382   return false;
383 }
384 
385 void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
386                          function_ref<AAResults &(Function &)> AARGetter,
387                          Module &M, const ModuleSummaryIndex *Index) {
388   // See if this module has any type metadata. If so, we need to split it.
389   if (requiresSplit(M))
390     return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
391 
392   // Otherwise we can just write it out as a regular module.
393 
394   // Save the module hash produced for the full bitcode, which will
395   // be used in the backends, and use that in the minimized bitcode
396   // produced for the full link.
397   ModuleHash ModHash = {{0}};
398   WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
399                      /*GenerateHash=*/true, &ModHash);
400   // If a minimized bitcode module was requested for the thin link,
401   // strip the debug info and write it to the given OS.
402   if (ThinLinkOS) {
403     StripDebugInfo(M);
404     WriteBitcodeToFile(&M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false,
405                        Index,
406                        /*GenerateHash=*/false, &ModHash);
407   }
408 }
409 
410 class WriteThinLTOBitcode : public ModulePass {
411   raw_ostream &OS; // raw_ostream to print on
412   // The output stream on which to emit a minimized module for use
413   // just in the thin link, if requested.
414   raw_ostream *ThinLinkOS;
415 
416 public:
417   static char ID; // Pass identification, replacement for typeid
418   WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) {
419     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
420   }
421 
422   explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS)
423       : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) {
424     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
425   }
426 
427   StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }
428 
429   bool runOnModule(Module &M) override {
430     const ModuleSummaryIndex *Index =
431         &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
432     writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index);
433     return true;
434   }
435   void getAnalysisUsage(AnalysisUsage &AU) const override {
436     AU.setPreservesAll();
437     AU.addRequired<AssumptionCacheTracker>();
438     AU.addRequired<ModuleSummaryIndexWrapperPass>();
439     AU.addRequired<TargetLibraryInfoWrapperPass>();
440   }
441 };
442 } // anonymous namespace
443 
444 char WriteThinLTOBitcode::ID = 0;
445 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
446                       "Write ThinLTO Bitcode", false, true)
447 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
448 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
449 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
450 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
451                     "Write ThinLTO Bitcode", false, true)
452 
453 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str,
454                                                 raw_ostream *ThinLinkOS) {
455   return new WriteThinLTOBitcode(Str, ThinLinkOS);
456 }
457