1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass prepares a module containing type metadata for ThinLTO by splitting
11 // it into regular and thin LTO parts if possible, and writing both parts to
12 // a multi-module bitcode file. Modules that do not contain type metadata are
13 // written unmodified as a single module.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Analysis/BasicAliasAnalysis.h"
18 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
19 #include "llvm/Analysis/TypeMetadataUtils.h"
20 #include "llvm/Bitcode/BitcodeWriter.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/DebugInfo.h"
23 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/IR/PassManager.h"
26 #include "llvm/Pass.h"
27 #include "llvm/Support/FileSystem.h"
28 #include "llvm/Support/ScopedPrinter.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include "llvm/Transforms/IPO.h"
31 #include "llvm/Transforms/IPO/FunctionAttrs.h"
32 #include "llvm/Transforms/Utils/Cloning.h"
33 using namespace llvm;
34 
35 namespace {
36 
37 // Produce a unique identifier for this module by taking the MD5 sum of the
38 // names of the module's strong external symbols. This identifier is
39 // normally guaranteed to be unique, or the program would fail to link due to
40 // multiply defined symbols.
41 //
42 // If the module has no strong external symbols (such a module may still have a
43 // semantic effect if it performs global initialization), we cannot produce a
44 // unique identifier for this module, so we return the empty string, which
45 // causes the entire module to be written as a regular LTO module.
46 std::string getModuleId(Module *M) {
47   MD5 Md5;
48   bool ExportsSymbols = false;
49   for (auto &GV : M->global_values()) {
50     if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
51         !GV.hasExternalLinkage())
52       continue;
53     ExportsSymbols = true;
54     Md5.update(GV.getName());
55     Md5.update(ArrayRef<uint8_t>{0});
56   }
57 
58   if (!ExportsSymbols)
59     return "";
60 
61   MD5::MD5Result R;
62   Md5.final(R);
63 
64   SmallString<32> Str;
65   MD5::stringifyResult(R, Str);
66   return ("$" + Str).str();
67 }
68 
69 // Promote each local-linkage entity defined by ExportM and used by ImportM by
70 // changing visibility and appending the given ModuleId.
71 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
72   for (auto &ExportGV : ExportM.global_values()) {
73     if (!ExportGV.hasLocalLinkage())
74       continue;
75 
76     GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName());
77     if (!ImportGV || ImportGV->use_empty())
78       continue;
79 
80     std::string NewName = (ExportGV.getName() + ModuleId).str();
81 
82     ExportGV.setName(NewName);
83     ExportGV.setLinkage(GlobalValue::ExternalLinkage);
84     ExportGV.setVisibility(GlobalValue::HiddenVisibility);
85 
86     ImportGV->setName(NewName);
87     ImportGV->setVisibility(GlobalValue::HiddenVisibility);
88   }
89 }
90 
91 // Promote all internal (i.e. distinct) type ids used by the module by replacing
92 // them with external type ids formed using the module id.
93 //
94 // Note that this needs to be done before we clone the module because each clone
95 // will receive its own set of distinct metadata nodes.
96 void promoteTypeIds(Module &M, StringRef ModuleId) {
97   DenseMap<Metadata *, Metadata *> LocalToGlobal;
98   auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
99     Metadata *MD =
100         cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();
101 
102     if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
103       Metadata *&GlobalMD = LocalToGlobal[MD];
104       if (!GlobalMD) {
105         std::string NewName =
106             (to_string(LocalToGlobal.size()) + ModuleId).str();
107         GlobalMD = MDString::get(M.getContext(), NewName);
108       }
109 
110       CI->setArgOperand(ArgNo,
111                         MetadataAsValue::get(M.getContext(), GlobalMD));
112     }
113   };
114 
115   if (Function *TypeTestFunc =
116           M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
117     for (const Use &U : TypeTestFunc->uses()) {
118       auto CI = cast<CallInst>(U.getUser());
119       ExternalizeTypeId(CI, 1);
120     }
121   }
122 
123   if (Function *TypeCheckedLoadFunc =
124           M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
125     for (const Use &U : TypeCheckedLoadFunc->uses()) {
126       auto CI = cast<CallInst>(U.getUser());
127       ExternalizeTypeId(CI, 2);
128     }
129   }
130 
131   for (GlobalObject &GO : M.global_objects()) {
132     SmallVector<MDNode *, 1> MDs;
133     GO.getMetadata(LLVMContext::MD_type, MDs);
134 
135     GO.eraseMetadata(LLVMContext::MD_type);
136     for (auto MD : MDs) {
137       auto I = LocalToGlobal.find(MD->getOperand(1));
138       if (I == LocalToGlobal.end()) {
139         GO.addMetadata(LLVMContext::MD_type, *MD);
140         continue;
141       }
142       GO.addMetadata(
143           LLVMContext::MD_type,
144           *MDNode::get(M.getContext(),
145                        ArrayRef<Metadata *>{MD->getOperand(0), I->second}));
146     }
147   }
148 }
149 
150 // Drop unused globals, and drop type information from function declarations.
151 // FIXME: If we made functions typeless then there would be no need to do this.
152 void simplifyExternals(Module &M) {
153   FunctionType *EmptyFT =
154       FunctionType::get(Type::getVoidTy(M.getContext()), false);
155 
156   for (auto I = M.begin(), E = M.end(); I != E;) {
157     Function &F = *I++;
158     if (F.isDeclaration() && F.use_empty()) {
159       F.eraseFromParent();
160       continue;
161     }
162 
163     if (!F.isDeclaration() || F.getFunctionType() == EmptyFT)
164       continue;
165 
166     Function *NewF =
167         Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M);
168     NewF->setVisibility(F.getVisibility());
169     NewF->takeName(&F);
170     F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
171     F.eraseFromParent();
172   }
173 
174   for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
175     GlobalVariable &GV = *I++;
176     if (GV.isDeclaration() && GV.use_empty()) {
177       GV.eraseFromParent();
178       continue;
179     }
180   }
181 }
182 
183 void filterModule(
184     Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
185   for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
186        I != E;) {
187     GlobalAlias *GA = &*I++;
188     if (ShouldKeepDefinition(GA))
189       continue;
190 
191     GlobalObject *GO;
192     if (GA->getValueType()->isFunctionTy())
193       GO = Function::Create(cast<FunctionType>(GA->getValueType()),
194                             GlobalValue::ExternalLinkage, "", M);
195     else
196       GO = new GlobalVariable(
197           *M, GA->getValueType(), false, GlobalValue::ExternalLinkage,
198           (Constant *)nullptr, "", (GlobalVariable *)nullptr,
199           GA->getThreadLocalMode(), GA->getType()->getAddressSpace());
200     GO->takeName(GA);
201     GA->replaceAllUsesWith(GO);
202     GA->eraseFromParent();
203   }
204 
205   for (Function &F : *M) {
206     if (ShouldKeepDefinition(&F))
207       continue;
208 
209     F.deleteBody();
210     F.setComdat(nullptr);
211     F.clearMetadata();
212   }
213 
214   for (GlobalVariable &GV : M->globals()) {
215     if (ShouldKeepDefinition(&GV))
216       continue;
217 
218     GV.setInitializer(nullptr);
219     GV.setLinkage(GlobalValue::ExternalLinkage);
220     GV.setComdat(nullptr);
221     GV.clearMetadata();
222   }
223 }
224 
225 void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {
226   if (auto *F = dyn_cast<Function>(C))
227     return Fn(F);
228   if (isa<GlobalValue>(C))
229     return;
230   for (Value *Op : C->operands())
231     forEachVirtualFunction(cast<Constant>(Op), Fn);
232 }
233 
234 // If it's possible to split M into regular and thin LTO parts, do so and write
235 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a
236 // regular LTO bitcode file to OS.
237 void splitAndWriteThinLTOBitcode(
238     raw_ostream &OS, raw_ostream *ThinLinkOS,
239     function_ref<AAResults &(Function &)> AARGetter, Module &M) {
240   std::string ModuleId = getModuleId(&M);
241   if (ModuleId.empty()) {
242     // We couldn't generate a module ID for this module, just write it out as a
243     // regular LTO module.
244     WriteBitcodeToFile(&M, OS);
245     if (ThinLinkOS)
246       // We don't have a ThinLTO part, but still write the module to the
247       // ThinLinkOS if requested so that the expected output file is produced.
248       WriteBitcodeToFile(&M, *ThinLinkOS);
249     return;
250   }
251 
252   promoteTypeIds(M, ModuleId);
253 
254   // Returns whether a global has attached type metadata. Such globals may
255   // participate in CFI or whole-program devirtualization, so they need to
256   // appear in the merged module instead of the thin LTO module.
257   auto HasTypeMetadata = [&](const GlobalObject *GO) {
258     SmallVector<MDNode *, 1> MDs;
259     GO->getMetadata(LLVMContext::MD_type, MDs);
260     return !MDs.empty();
261   };
262 
263   // Collect the set of virtual functions that are eligible for virtual constant
264   // propagation. Each eligible function must not access memory, must return
265   // an integer of width <=64 bits, must take at least one argument, must not
266   // use its first argument (assumed to be "this") and all arguments other than
267   // the first one must be of <=64 bit integer type.
268   //
269   // Note that we test whether this copy of the function is readnone, rather
270   // than testing function attributes, which must hold for any copy of the
271   // function, even a less optimized version substituted at link time. This is
272   // sound because the virtual constant propagation optimizations effectively
273   // inline all implementations of the virtual function into each call site,
274   // rather than using function attributes to perform local optimization.
275   std::set<const Function *> EligibleVirtualFns;
276   for (GlobalVariable &GV : M.globals())
277     if (HasTypeMetadata(&GV))
278       forEachVirtualFunction(GV.getInitializer(), [&](Function *F) {
279         auto *RT = dyn_cast<IntegerType>(F->getReturnType());
280         if (!RT || RT->getBitWidth() > 64 || F->arg_empty() ||
281             !F->arg_begin()->use_empty())
282           return;
283         for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) {
284           auto *ArgT = dyn_cast<IntegerType>(Arg.getType());
285           if (!ArgT || ArgT->getBitWidth() > 64)
286             return;
287         }
288         if (computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
289           EligibleVirtualFns.insert(F);
290       });
291 
292   ValueToValueMapTy VMap;
293   std::unique_ptr<Module> MergedM(
294       CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool {
295         if (auto *F = dyn_cast<Function>(GV))
296           return EligibleVirtualFns.count(F);
297         if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
298           return HasTypeMetadata(GVar);
299         return false;
300       }));
301   StripDebugInfo(*MergedM);
302 
303   for (Function &F : *MergedM)
304     if (!F.isDeclaration()) {
305       // Reset the linkage of all functions eligible for virtual constant
306       // propagation. The canonical definitions live in the thin LTO module so
307       // that they can be imported.
308       F.setLinkage(GlobalValue::AvailableExternallyLinkage);
309       F.setComdat(nullptr);
310     }
311 
312   // Remove all globals with type metadata, as well as aliases pointing to them,
313   // from the thin LTO module.
314   filterModule(&M, [&](const GlobalValue *GV) {
315     if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
316       return !HasTypeMetadata(GVar);
317     return true;
318   });
319 
320   promoteInternals(*MergedM, M, ModuleId);
321   promoteInternals(M, *MergedM, ModuleId);
322 
323   simplifyExternals(*MergedM);
324 
325 
326   // FIXME: Try to re-use BSI and PFI from the original module here.
327   ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr);
328 
329   SmallVector<char, 0> Buffer;
330 
331   BitcodeWriter W(Buffer);
332   // Save the module hash produced for the full bitcode, which will
333   // be used in the backends, and use that in the minimized bitcode
334   // produced for the full link.
335   ModuleHash ModHash = {{0}};
336   W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
337                 /*GenerateHash=*/true, &ModHash);
338   W.writeModule(MergedM.get());
339   OS << Buffer;
340 
341   // If a minimized bitcode module was requested for the thin link,
342   // strip the debug info (the merged module was already stripped above)
343   // and write it to the given OS.
344   if (ThinLinkOS) {
345     Buffer.clear();
346     BitcodeWriter W2(Buffer);
347     StripDebugInfo(M);
348     W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
349                    /*GenerateHash=*/false, &ModHash);
350     W2.writeModule(MergedM.get());
351     *ThinLinkOS << Buffer;
352   }
353 }
354 
355 // Returns whether this module needs to be split because it uses type metadata.
356 bool requiresSplit(Module &M) {
357   SmallVector<MDNode *, 1> MDs;
358   for (auto &GO : M.global_objects()) {
359     GO.getMetadata(LLVMContext::MD_type, MDs);
360     if (!MDs.empty())
361       return true;
362   }
363 
364   return false;
365 }
366 
367 void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
368                          function_ref<AAResults &(Function &)> AARGetter,
369                          Module &M, const ModuleSummaryIndex *Index) {
370   // See if this module has any type metadata. If so, we need to split it.
371   if (requiresSplit(M))
372     return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
373 
374   // Otherwise we can just write it out as a regular module.
375 
376   // Save the module hash produced for the full bitcode, which will
377   // be used in the backends, and use that in the minimized bitcode
378   // produced for the full link.
379   ModuleHash ModHash = {{0}};
380   WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
381                      /*GenerateHash=*/true, &ModHash);
382   // If a minimized bitcode module was requested for the thin link,
383   // strip the debug info and write it to the given OS.
384   if (ThinLinkOS) {
385     StripDebugInfo(M);
386     WriteBitcodeToFile(&M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false,
387                        Index,
388                        /*GenerateHash=*/false, &ModHash);
389   }
390 }
391 
392 class WriteThinLTOBitcode : public ModulePass {
393   raw_ostream &OS; // raw_ostream to print on
394   // The output stream on which to emit a minimized module for use
395   // just in the thin link, if requested.
396   raw_ostream *ThinLinkOS;
397 
398 public:
399   static char ID; // Pass identification, replacement for typeid
400   WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) {
401     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
402   }
403 
404   explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS)
405       : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) {
406     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
407   }
408 
409   StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }
410 
411   bool runOnModule(Module &M) override {
412     const ModuleSummaryIndex *Index =
413         &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
414     writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index);
415     return true;
416   }
417   void getAnalysisUsage(AnalysisUsage &AU) const override {
418     AU.setPreservesAll();
419     AU.addRequired<AssumptionCacheTracker>();
420     AU.addRequired<ModuleSummaryIndexWrapperPass>();
421     AU.addRequired<TargetLibraryInfoWrapperPass>();
422   }
423 };
424 } // anonymous namespace
425 
426 char WriteThinLTOBitcode::ID = 0;
427 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
428                       "Write ThinLTO Bitcode", false, true)
429 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
430 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
431 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
432 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
433                     "Write ThinLTO Bitcode", false, true)
434 
435 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str,
436                                                 raw_ostream *ThinLinkOS) {
437   return new WriteThinLTOBitcode(Str, ThinLinkOS);
438 }
439