1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass prepares a module containing type metadata for ThinLTO by splitting
11 // it into regular and thin LTO parts if possible, and writing both parts to
12 // a multi-module bitcode file. Modules that do not contain type metadata are
13 // written unmodified as a single module.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Transforms/IPO.h"
18 #include "llvm/Analysis/BasicAliasAnalysis.h"
19 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
20 #include "llvm/Analysis/TypeMetadataUtils.h"
21 #include "llvm/Bitcode/BitcodeWriter.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/DebugInfo.h"
24 #include "llvm/IR/Intrinsics.h"
25 #include "llvm/IR/Module.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/Pass.h"
28 #include "llvm/Support/ScopedPrinter.h"
29 #include "llvm/Transforms/IPO/FunctionAttrs.h"
30 #include "llvm/Transforms/Utils/Cloning.h"
31 using namespace llvm;
32 
33 namespace {
34 
35 // Produce a unique identifier for this module by taking the MD5 sum of the
36 // names of the module's strong external symbols. This identifier is
37 // normally guaranteed to be unique, or the program would fail to link due to
38 // multiply defined symbols.
39 //
40 // If the module has no strong external symbols (such a module may still have a
41 // semantic effect if it performs global initialization), we cannot produce a
42 // unique identifier for this module, so we return the empty string, which
43 // causes the entire module to be written as a regular LTO module.
44 std::string getModuleId(Module *M) {
45   MD5 Md5;
46   bool ExportsSymbols = false;
47   auto AddGlobal = [&](GlobalValue &GV) {
48     if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
49         !GV.hasExternalLinkage())
50       return;
51     ExportsSymbols = true;
52     Md5.update(GV.getName());
53     Md5.update(ArrayRef<uint8_t>{0});
54   };
55 
56   for (auto &F : *M)
57     AddGlobal(F);
58   for (auto &GV : M->globals())
59     AddGlobal(GV);
60   for (auto &GA : M->aliases())
61     AddGlobal(GA);
62   for (auto &IF : M->ifuncs())
63     AddGlobal(IF);
64 
65   if (!ExportsSymbols)
66     return "";
67 
68   MD5::MD5Result R;
69   Md5.final(R);
70 
71   SmallString<32> Str;
72   MD5::stringifyResult(R, Str);
73   return ("$" + Str).str();
74 }
75 
76 // Promote each local-linkage entity defined by ExportM and used by ImportM by
77 // changing visibility and appending the given ModuleId.
78 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
79   auto PromoteInternal = [&](GlobalValue &ExportGV) {
80     if (!ExportGV.hasLocalLinkage())
81       return;
82 
83     GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName());
84     if (!ImportGV || ImportGV->use_empty())
85       return;
86 
87     std::string NewName = (ExportGV.getName() + ModuleId).str();
88 
89     ExportGV.setName(NewName);
90     ExportGV.setLinkage(GlobalValue::ExternalLinkage);
91     ExportGV.setVisibility(GlobalValue::HiddenVisibility);
92 
93     ImportGV->setName(NewName);
94     ImportGV->setVisibility(GlobalValue::HiddenVisibility);
95   };
96 
97   for (auto &F : ExportM)
98     PromoteInternal(F);
99   for (auto &GV : ExportM.globals())
100     PromoteInternal(GV);
101   for (auto &GA : ExportM.aliases())
102     PromoteInternal(GA);
103   for (auto &IF : ExportM.ifuncs())
104     PromoteInternal(IF);
105 }
106 
107 // Promote all internal (i.e. distinct) type ids used by the module by replacing
108 // them with external type ids formed using the module id.
109 //
110 // Note that this needs to be done before we clone the module because each clone
111 // will receive its own set of distinct metadata nodes.
112 void promoteTypeIds(Module &M, StringRef ModuleId) {
113   DenseMap<Metadata *, Metadata *> LocalToGlobal;
114   auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
115     Metadata *MD =
116         cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();
117 
118     if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
119       Metadata *&GlobalMD = LocalToGlobal[MD];
120       if (!GlobalMD) {
121         std::string NewName =
122             (to_string(LocalToGlobal.size()) + ModuleId).str();
123         GlobalMD = MDString::get(M.getContext(), NewName);
124       }
125 
126       CI->setArgOperand(ArgNo,
127                         MetadataAsValue::get(M.getContext(), GlobalMD));
128     }
129   };
130 
131   if (Function *TypeTestFunc =
132           M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
133     for (const Use &U : TypeTestFunc->uses()) {
134       auto CI = cast<CallInst>(U.getUser());
135       ExternalizeTypeId(CI, 1);
136     }
137   }
138 
139   if (Function *TypeCheckedLoadFunc =
140           M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
141     for (const Use &U : TypeCheckedLoadFunc->uses()) {
142       auto CI = cast<CallInst>(U.getUser());
143       ExternalizeTypeId(CI, 2);
144     }
145   }
146 
147   for (GlobalObject &GO : M.global_objects()) {
148     SmallVector<MDNode *, 1> MDs;
149     GO.getMetadata(LLVMContext::MD_type, MDs);
150 
151     GO.eraseMetadata(LLVMContext::MD_type);
152     for (auto MD : MDs) {
153       auto I = LocalToGlobal.find(MD->getOperand(1));
154       if (I == LocalToGlobal.end()) {
155         GO.addMetadata(LLVMContext::MD_type, *MD);
156         continue;
157       }
158       GO.addMetadata(
159           LLVMContext::MD_type,
160           *MDNode::get(M.getContext(),
161                        ArrayRef<Metadata *>{MD->getOperand(0), I->second}));
162     }
163   }
164 }
165 
166 // Drop unused globals, and drop type information from function declarations.
167 // FIXME: If we made functions typeless then there would be no need to do this.
168 void simplifyExternals(Module &M) {
169   FunctionType *EmptyFT =
170       FunctionType::get(Type::getVoidTy(M.getContext()), false);
171 
172   for (auto I = M.begin(), E = M.end(); I != E;) {
173     Function &F = *I++;
174     if (F.isDeclaration() && F.use_empty()) {
175       F.eraseFromParent();
176       continue;
177     }
178 
179     if (!F.isDeclaration() || F.getFunctionType() == EmptyFT)
180       continue;
181 
182     Function *NewF =
183         Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M);
184     NewF->setVisibility(F.getVisibility());
185     NewF->takeName(&F);
186     F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
187     F.eraseFromParent();
188   }
189 
190   for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
191     GlobalVariable &GV = *I++;
192     if (GV.isDeclaration() && GV.use_empty()) {
193       GV.eraseFromParent();
194       continue;
195     }
196   }
197 }
198 
199 void filterModule(
200     Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
201   for (Function &F : *M) {
202     if (ShouldKeepDefinition(&F))
203       continue;
204 
205     F.deleteBody();
206     F.setComdat(nullptr);
207     F.clearMetadata();
208   }
209 
210   for (GlobalVariable &GV : M->globals()) {
211     if (ShouldKeepDefinition(&GV))
212       continue;
213 
214     GV.setInitializer(nullptr);
215     GV.setLinkage(GlobalValue::ExternalLinkage);
216     GV.setComdat(nullptr);
217     GV.clearMetadata();
218   }
219 
220   for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
221        I != E;) {
222     GlobalAlias *GA = &*I++;
223     if (ShouldKeepDefinition(GA))
224       continue;
225 
226     GlobalObject *GO;
227     if (I->getValueType()->isFunctionTy())
228       GO = Function::Create(cast<FunctionType>(GA->getValueType()),
229                             GlobalValue::ExternalLinkage, "", M);
230     else
231       GO = new GlobalVariable(
232           *M, GA->getValueType(), false, GlobalValue::ExternalLinkage,
233           (Constant *)nullptr, "", (GlobalVariable *)nullptr,
234           GA->getThreadLocalMode(), GA->getType()->getAddressSpace());
235     GO->takeName(GA);
236     GA->replaceAllUsesWith(GO);
237     GA->eraseFromParent();
238   }
239 }
240 
241 void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {
242   if (auto *F = dyn_cast<Function>(C))
243     return Fn(F);
244   if (isa<GlobalValue>(C))
245     return;
246   for (Value *Op : C->operands())
247     forEachVirtualFunction(cast<Constant>(Op), Fn);
248 }
249 
250 // If it's possible to split M into regular and thin LTO parts, do so and write
251 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a
252 // regular LTO bitcode file to OS.
253 void splitAndWriteThinLTOBitcode(
254     raw_ostream &OS, function_ref<AAResults &(Function &)> AARGetter,
255     Module &M) {
256   std::string ModuleId = getModuleId(&M);
257   if (ModuleId.empty()) {
258     // We couldn't generate a module ID for this module, just write it out as a
259     // regular LTO module.
260     WriteBitcodeToFile(&M, OS);
261     return;
262   }
263 
264   promoteTypeIds(M, ModuleId);
265 
266   // Returns whether a global has attached type metadata. Such globals may
267   // participate in CFI or whole-program devirtualization, so they need to
268   // appear in the merged module instead of the thin LTO module.
269   auto HasTypeMetadata = [&](const GlobalObject *GO) {
270     SmallVector<MDNode *, 1> MDs;
271     GO->getMetadata(LLVMContext::MD_type, MDs);
272     return !MDs.empty();
273   };
274 
275   // Collect the set of virtual functions that are eligible for virtual constant
276   // propagation. Each eligible function must not access memory, must return
277   // an integer of width <=64 bits, must take at least one argument, must not
278   // use its first argument (assumed to be "this") and all arguments other than
279   // the first one must be of <=64 bit integer type.
280   //
281   // Note that we test whether this copy of the function is readnone, rather
282   // than testing function attributes, which must hold for any copy of the
283   // function, even a less optimized version substituted at link time. This is
284   // sound because the virtual constant propagation optimizations effectively
285   // inline all implementations of the virtual function into each call site,
286   // rather than using function attributes to perform local optimization.
287   std::set<const Function *> EligibleVirtualFns;
288   for (GlobalVariable &GV : M.globals())
289     if (HasTypeMetadata(&GV))
290       forEachVirtualFunction(GV.getInitializer(), [&](Function *F) {
291         auto *RT = dyn_cast<IntegerType>(F->getReturnType());
292         if (!RT || RT->getBitWidth() > 64 || F->arg_empty() ||
293             !F->arg_begin()->use_empty())
294           return;
295         for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) {
296           auto *ArgT = dyn_cast<IntegerType>(Arg.getType());
297           if (!ArgT || ArgT->getBitWidth() > 64)
298             return;
299         }
300         if (computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
301           EligibleVirtualFns.insert(F);
302       });
303 
304   ValueToValueMapTy VMap;
305   std::unique_ptr<Module> MergedM(
306       CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool {
307         if (auto *F = dyn_cast<Function>(GV))
308           return EligibleVirtualFns.count(F);
309         if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
310           return HasTypeMetadata(GVar);
311         return false;
312       }));
313   StripDebugInfo(*MergedM);
314 
315   for (Function &F : *MergedM)
316     if (!F.isDeclaration()) {
317       // Reset the linkage of all functions eligible for virtual constant
318       // propagation. The canonical definitions live in the thin LTO module so
319       // that they can be imported.
320       F.setLinkage(GlobalValue::AvailableExternallyLinkage);
321       F.setComdat(nullptr);
322     }
323 
324   // Remove all globals with type metadata, as well as aliases pointing to them,
325   // from the thin LTO module.
326   filterModule(&M, [&](const GlobalValue *GV) {
327     if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
328       return !HasTypeMetadata(GVar);
329     return true;
330   });
331 
332   promoteInternals(*MergedM, M, ModuleId);
333   promoteInternals(M, *MergedM, ModuleId);
334 
335   simplifyExternals(*MergedM);
336 
337   SmallVector<char, 0> Buffer;
338   BitcodeWriter W(Buffer);
339 
340   // FIXME: Try to re-use BSI and PFI from the original module here.
341   ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr);
342   W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
343                 /*GenerateHash=*/true);
344 
345   W.writeModule(MergedM.get());
346 
347   OS << Buffer;
348 }
349 
350 // Returns whether this module needs to be split because it uses type metadata.
351 bool requiresSplit(Module &M) {
352   SmallVector<MDNode *, 1> MDs;
353   for (auto &GO : M.global_objects()) {
354     GO.getMetadata(LLVMContext::MD_type, MDs);
355     if (!MDs.empty())
356       return true;
357   }
358 
359   return false;
360 }
361 
362 void writeThinLTOBitcode(raw_ostream &OS,
363                          function_ref<AAResults &(Function &)> AARGetter,
364                          Module &M, const ModuleSummaryIndex *Index) {
365   // See if this module has any type metadata. If so, we need to split it.
366   if (requiresSplit(M))
367     return splitAndWriteThinLTOBitcode(OS, AARGetter, M);
368 
369   // Otherwise we can just write it out as a regular module.
370   WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
371                      /*GenerateHash=*/true);
372 }
373 
374 class WriteThinLTOBitcode : public ModulePass {
375   raw_ostream &OS; // raw_ostream to print on
376 
377 public:
378   static char ID; // Pass identification, replacement for typeid
379   WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) {
380     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
381   }
382 
383   explicit WriteThinLTOBitcode(raw_ostream &o)
384       : ModulePass(ID), OS(o) {
385     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
386   }
387 
388   StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }
389 
390   bool runOnModule(Module &M) override {
391     const ModuleSummaryIndex *Index =
392         &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
393     writeThinLTOBitcode(OS, LegacyAARGetter(*this), M, Index);
394     return true;
395   }
396   void getAnalysisUsage(AnalysisUsage &AU) const override {
397     AU.setPreservesAll();
398     AU.addRequired<AssumptionCacheTracker>();
399     AU.addRequired<ModuleSummaryIndexWrapperPass>();
400     AU.addRequired<TargetLibraryInfoWrapperPass>();
401   }
402 };
403 } // anonymous namespace
404 
405 char WriteThinLTOBitcode::ID = 0;
406 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
407                       "Write ThinLTO Bitcode", false, true)
408 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
409 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
410 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
411 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
412                     "Write ThinLTO Bitcode", false, true)
413 
414 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str) {
415   return new WriteThinLTOBitcode(Str);
416 }
417