1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass prepares a module containing type metadata for ThinLTO by splitting
11 // it into regular and thin LTO parts if possible, and writing both parts to
12 // a multi-module bitcode file. Modules that do not contain type metadata are
13 // written unmodified as a single module.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Transforms/IPO.h"
18 #include "llvm/Analysis/BasicAliasAnalysis.h"
19 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
20 #include "llvm/Analysis/TypeMetadataUtils.h"
21 #include "llvm/Bitcode/BitcodeWriter.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/DebugInfo.h"
24 #include "llvm/IR/Intrinsics.h"
25 #include "llvm/IR/Module.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/Pass.h"
28 #include "llvm/Support/ScopedPrinter.h"
29 #include "llvm/Transforms/IPO/FunctionAttrs.h"
30 #include "llvm/Transforms/Utils/Cloning.h"
31 using namespace llvm;
32 
33 namespace {
34 
35 // Produce a unique identifier for this module by taking the MD5 sum of the
36 // names of the module's strong external symbols. This identifier is
37 // normally guaranteed to be unique, or the program would fail to link due to
38 // multiply defined symbols.
39 //
40 // If the module has no strong external symbols (such a module may still have a
41 // semantic effect if it performs global initialization), we cannot produce a
42 // unique identifier for this module, so we return the empty string, which
43 // causes the entire module to be written as a regular LTO module.
44 std::string getModuleId(Module *M) {
45   MD5 Md5;
46   bool ExportsSymbols = false;
47   auto AddGlobal = [&](GlobalValue &GV) {
48     if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
49         !GV.hasExternalLinkage())
50       return;
51     ExportsSymbols = true;
52     Md5.update(GV.getName());
53     Md5.update(ArrayRef<uint8_t>{0});
54   };
55 
56   for (auto &F : *M)
57     AddGlobal(F);
58   for (auto &GV : M->globals())
59     AddGlobal(GV);
60   for (auto &GA : M->aliases())
61     AddGlobal(GA);
62   for (auto &IF : M->ifuncs())
63     AddGlobal(IF);
64 
65   if (!ExportsSymbols)
66     return "";
67 
68   MD5::MD5Result R;
69   Md5.final(R);
70 
71   SmallString<32> Str;
72   MD5::stringifyResult(R, Str);
73   return ("$" + Str).str();
74 }
75 
76 // Promote each local-linkage entity defined by ExportM and used by ImportM by
77 // changing visibility and appending the given ModuleId.
78 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
79   auto PromoteInternal = [&](GlobalValue &ExportGV) {
80     if (!ExportGV.hasLocalLinkage())
81       return;
82 
83     GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName());
84     if (!ImportGV || ImportGV->use_empty())
85       return;
86 
87     std::string NewName = (ExportGV.getName() + ModuleId).str();
88 
89     ExportGV.setName(NewName);
90     ExportGV.setLinkage(GlobalValue::ExternalLinkage);
91     ExportGV.setVisibility(GlobalValue::HiddenVisibility);
92 
93     ImportGV->setName(NewName);
94     ImportGV->setVisibility(GlobalValue::HiddenVisibility);
95   };
96 
97   for (auto &F : ExportM)
98     PromoteInternal(F);
99   for (auto &GV : ExportM.globals())
100     PromoteInternal(GV);
101   for (auto &GA : ExportM.aliases())
102     PromoteInternal(GA);
103   for (auto &IF : ExportM.ifuncs())
104     PromoteInternal(IF);
105 }
106 
107 // Promote all internal (i.e. distinct) type ids used by the module by replacing
108 // them with external type ids formed using the module id.
109 //
110 // Note that this needs to be done before we clone the module because each clone
111 // will receive its own set of distinct metadata nodes.
112 void promoteTypeIds(Module &M, StringRef ModuleId) {
113   DenseMap<Metadata *, Metadata *> LocalToGlobal;
114   auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
115     Metadata *MD =
116         cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();
117 
118     if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
119       Metadata *&GlobalMD = LocalToGlobal[MD];
120       if (!GlobalMD) {
121         std::string NewName =
122             (to_string(LocalToGlobal.size()) + ModuleId).str();
123         GlobalMD = MDString::get(M.getContext(), NewName);
124       }
125 
126       CI->setArgOperand(ArgNo,
127                         MetadataAsValue::get(M.getContext(), GlobalMD));
128     }
129   };
130 
131   if (Function *TypeTestFunc =
132           M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
133     for (const Use &U : TypeTestFunc->uses()) {
134       auto CI = cast<CallInst>(U.getUser());
135       ExternalizeTypeId(CI, 1);
136     }
137   }
138 
139   if (Function *TypeCheckedLoadFunc =
140           M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
141     for (const Use &U : TypeCheckedLoadFunc->uses()) {
142       auto CI = cast<CallInst>(U.getUser());
143       ExternalizeTypeId(CI, 2);
144     }
145   }
146 
147   for (GlobalObject &GO : M.global_objects()) {
148     SmallVector<MDNode *, 1> MDs;
149     GO.getMetadata(LLVMContext::MD_type, MDs);
150 
151     GO.eraseMetadata(LLVMContext::MD_type);
152     for (auto MD : MDs) {
153       auto I = LocalToGlobal.find(MD->getOperand(1));
154       if (I == LocalToGlobal.end()) {
155         GO.addMetadata(LLVMContext::MD_type, *MD);
156         continue;
157       }
158       GO.addMetadata(
159           LLVMContext::MD_type,
160           *MDNode::get(M.getContext(),
161                        ArrayRef<Metadata *>{MD->getOperand(0), I->second}));
162     }
163   }
164 }
165 
166 // Drop unused globals, and drop type information from function declarations.
167 // FIXME: If we made functions typeless then there would be no need to do this.
168 void simplifyExternals(Module &M) {
169   FunctionType *EmptyFT =
170       FunctionType::get(Type::getVoidTy(M.getContext()), false);
171 
172   for (auto I = M.begin(), E = M.end(); I != E;) {
173     Function &F = *I++;
174     if (F.isDeclaration() && F.use_empty()) {
175       F.eraseFromParent();
176       continue;
177     }
178 
179     if (!F.isDeclaration() || F.getFunctionType() == EmptyFT)
180       continue;
181 
182     Function *NewF =
183         Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M);
184     NewF->setVisibility(F.getVisibility());
185     NewF->takeName(&F);
186     F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
187     F.eraseFromParent();
188   }
189 
190   for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
191     GlobalVariable &GV = *I++;
192     if (GV.isDeclaration() && GV.use_empty()) {
193       GV.eraseFromParent();
194       continue;
195     }
196   }
197 }
198 
199 void filterModule(
200     Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
201   for (Function &F : *M) {
202     if (ShouldKeepDefinition(&F))
203       continue;
204 
205     F.deleteBody();
206     F.setComdat(nullptr);
207     F.clearMetadata();
208   }
209 
210   for (GlobalVariable &GV : M->globals()) {
211     if (ShouldKeepDefinition(&GV))
212       continue;
213 
214     GV.setInitializer(nullptr);
215     GV.setLinkage(GlobalValue::ExternalLinkage);
216     GV.setComdat(nullptr);
217     GV.clearMetadata();
218   }
219 
220   for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
221        I != E;) {
222     GlobalAlias *GA = &*I++;
223     if (ShouldKeepDefinition(GA))
224       continue;
225 
226     GlobalObject *GO;
227     if (I->getValueType()->isFunctionTy())
228       GO = Function::Create(cast<FunctionType>(GA->getValueType()),
229                             GlobalValue::ExternalLinkage, "", M);
230     else
231       GO = new GlobalVariable(
232           *M, GA->getValueType(), false, GlobalValue::ExternalLinkage,
233           (Constant *)nullptr, "", (GlobalVariable *)nullptr,
234           GA->getThreadLocalMode(), GA->getType()->getAddressSpace());
235     GO->takeName(GA);
236     GA->replaceAllUsesWith(GO);
237     GA->eraseFromParent();
238   }
239 }
240 
241 void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {
242   if (auto *F = dyn_cast<Function>(C))
243     return Fn(F);
244   for (Value *Op : C->operands())
245     forEachVirtualFunction(cast<Constant>(Op), Fn);
246 }
247 
248 // If it's possible to split M into regular and thin LTO parts, do so and write
249 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a
250 // regular LTO bitcode file to OS.
251 void splitAndWriteThinLTOBitcode(
252     raw_ostream &OS, function_ref<AAResults &(Function &)> AARGetter,
253     Module &M) {
254   std::string ModuleId = getModuleId(&M);
255   if (ModuleId.empty()) {
256     // We couldn't generate a module ID for this module, just write it out as a
257     // regular LTO module.
258     WriteBitcodeToFile(&M, OS);
259     return;
260   }
261 
262   promoteTypeIds(M, ModuleId);
263 
264   // Returns whether a global has attached type metadata. Such globals may
265   // participate in CFI or whole-program devirtualization, so they need to
266   // appear in the merged module instead of the thin LTO module.
267   auto HasTypeMetadata = [&](const GlobalObject *GO) {
268     SmallVector<MDNode *, 1> MDs;
269     GO->getMetadata(LLVMContext::MD_type, MDs);
270     return !MDs.empty();
271   };
272 
273   // Collect the set of virtual functions that are eligible for virtual constant
274   // propagation. Each eligible function must not access memory, must return
275   // an integer of width <=64 bits, must take at least one argument, must not
276   // use its first argument (assumed to be "this") and all arguments other than
277   // the first one must be of <=64 bit integer type.
278   //
279   // Note that we test whether this copy of the function is readnone, rather
280   // than testing function attributes, which must hold for any copy of the
281   // function, even a less optimized version substituted at link time. This is
282   // sound because the virtual constant propagation optimizations effectively
283   // inline all implementations of the virtual function into each call site,
284   // rather than using function attributes to perform local optimization.
285   std::set<const Function *> EligibleVirtualFns;
286   for (GlobalVariable &GV : M.globals())
287     if (HasTypeMetadata(&GV))
288       forEachVirtualFunction(GV.getInitializer(), [&](Function *F) {
289         auto *RT = dyn_cast<IntegerType>(F->getReturnType());
290         if (!RT || RT->getBitWidth() > 64 || F->arg_empty() ||
291             !F->arg_begin()->use_empty())
292           return;
293         for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) {
294           auto *ArgT = dyn_cast<IntegerType>(Arg.getType());
295           if (!ArgT || ArgT->getBitWidth() > 64)
296             return;
297         }
298         if (computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
299           EligibleVirtualFns.insert(F);
300       });
301 
302   ValueToValueMapTy VMap;
303   std::unique_ptr<Module> MergedM(
304       CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool {
305         if (auto *F = dyn_cast<Function>(GV))
306           return EligibleVirtualFns.count(F);
307         if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
308           return HasTypeMetadata(GVar);
309         return false;
310       }));
311   StripDebugInfo(*MergedM);
312 
313   for (Function &F : *MergedM)
314     if (!F.isDeclaration()) {
315       // Reset the linkage of all functions eligible for virtual constant
316       // propagation. The canonical definitions live in the thin LTO module so
317       // that they can be imported.
318       F.setLinkage(GlobalValue::AvailableExternallyLinkage);
319       F.setComdat(nullptr);
320     }
321 
322   // Remove all globals with type metadata, as well as aliases pointing to them,
323   // from the thin LTO module.
324   filterModule(&M, [&](const GlobalValue *GV) {
325     if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
326       return !HasTypeMetadata(GVar);
327     return true;
328   });
329 
330   promoteInternals(*MergedM, M, ModuleId);
331   promoteInternals(M, *MergedM, ModuleId);
332 
333   simplifyExternals(*MergedM);
334 
335   SmallVector<char, 0> Buffer;
336   BitcodeWriter W(Buffer);
337 
338   // FIXME: Try to re-use BSI and PFI from the original module here.
339   ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr);
340   W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
341                 /*GenerateHash=*/true);
342 
343   W.writeModule(MergedM.get());
344 
345   OS << Buffer;
346 }
347 
348 // Returns whether this module needs to be split because it uses type metadata.
349 bool requiresSplit(Module &M) {
350   SmallVector<MDNode *, 1> MDs;
351   for (auto &GO : M.global_objects()) {
352     GO.getMetadata(LLVMContext::MD_type, MDs);
353     if (!MDs.empty())
354       return true;
355   }
356 
357   return false;
358 }
359 
360 void writeThinLTOBitcode(raw_ostream &OS,
361                          function_ref<AAResults &(Function &)> AARGetter,
362                          Module &M, const ModuleSummaryIndex *Index) {
363   // See if this module has any type metadata. If so, we need to split it.
364   if (requiresSplit(M))
365     return splitAndWriteThinLTOBitcode(OS, AARGetter, M);
366 
367   // Otherwise we can just write it out as a regular module.
368   WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
369                      /*GenerateHash=*/true);
370 }
371 
372 class WriteThinLTOBitcode : public ModulePass {
373   raw_ostream &OS; // raw_ostream to print on
374 
375 public:
376   static char ID; // Pass identification, replacement for typeid
377   WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) {
378     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
379   }
380 
381   explicit WriteThinLTOBitcode(raw_ostream &o)
382       : ModulePass(ID), OS(o) {
383     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
384   }
385 
386   StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }
387 
388   bool runOnModule(Module &M) override {
389     const ModuleSummaryIndex *Index =
390         &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
391     writeThinLTOBitcode(OS, LegacyAARGetter(*this), M, Index);
392     return true;
393   }
394   void getAnalysisUsage(AnalysisUsage &AU) const override {
395     AU.setPreservesAll();
396     AU.addRequired<AssumptionCacheTracker>();
397     AU.addRequired<ModuleSummaryIndexWrapperPass>();
398     AU.addRequired<TargetLibraryInfoWrapperPass>();
399   }
400 };
401 } // anonymous namespace
402 
403 char WriteThinLTOBitcode::ID = 0;
404 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
405                       "Write ThinLTO Bitcode", false, true)
406 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
407 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
408 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
409 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
410                     "Write ThinLTO Bitcode", false, true)
411 
412 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str) {
413   return new WriteThinLTOBitcode(Str);
414 }
415