1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass prepares a module containing type metadata for ThinLTO by splitting
11 // it into regular and thin LTO parts if possible, and writing both parts to
12 // a multi-module bitcode file. Modules that do not contain type metadata are
13 // written unmodified as a single module.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Transforms/IPO.h"
18 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
19 #include "llvm/Analysis/TypeMetadataUtils.h"
20 #include "llvm/Bitcode/BitcodeWriter.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/DebugInfo.h"
23 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/IR/PassManager.h"
26 #include "llvm/Pass.h"
27 #include "llvm/Support/ScopedPrinter.h"
28 #include "llvm/Transforms/Utils/Cloning.h"
29 using namespace llvm;
30 
31 namespace {
32 
33 // Produce a unique identifier for this module by taking the MD5 sum of the
34 // names of the module's strong external symbols. This identifier is
35 // normally guaranteed to be unique, or the program would fail to link due to
36 // multiply defined symbols.
37 //
38 // If the module has no strong external symbols (such a module may still have a
39 // semantic effect if it performs global initialization), we cannot produce a
40 // unique identifier for this module, so we return the empty string, which
41 // causes the entire module to be written as a regular LTO module.
42 std::string getModuleId(Module *M) {
43   MD5 Md5;
44   bool ExportsSymbols = false;
45   auto AddGlobal = [&](GlobalValue &GV) {
46     if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
47         !GV.hasExternalLinkage())
48       return;
49     ExportsSymbols = true;
50     Md5.update(GV.getName());
51     Md5.update(ArrayRef<uint8_t>{0});
52   };
53 
54   for (auto &F : *M)
55     AddGlobal(F);
56   for (auto &GV : M->globals())
57     AddGlobal(GV);
58   for (auto &GA : M->aliases())
59     AddGlobal(GA);
60   for (auto &IF : M->ifuncs())
61     AddGlobal(IF);
62 
63   if (!ExportsSymbols)
64     return "";
65 
66   MD5::MD5Result R;
67   Md5.final(R);
68 
69   SmallString<32> Str;
70   MD5::stringifyResult(R, Str);
71   return ("$" + Str).str();
72 }
73 
74 // Promote each local-linkage entity defined by ExportM and used by ImportM by
75 // changing visibility and appending the given ModuleId.
76 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
77   auto PromoteInternal = [&](GlobalValue &ExportGV) {
78     if (!ExportGV.hasLocalLinkage())
79       return;
80 
81     GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName());
82     if (!ImportGV || ImportGV->use_empty())
83       return;
84 
85     std::string NewName = (ExportGV.getName() + ModuleId).str();
86 
87     ExportGV.setName(NewName);
88     ExportGV.setLinkage(GlobalValue::ExternalLinkage);
89     ExportGV.setVisibility(GlobalValue::HiddenVisibility);
90 
91     ImportGV->setName(NewName);
92     ImportGV->setVisibility(GlobalValue::HiddenVisibility);
93   };
94 
95   for (auto &F : ExportM)
96     PromoteInternal(F);
97   for (auto &GV : ExportM.globals())
98     PromoteInternal(GV);
99   for (auto &GA : ExportM.aliases())
100     PromoteInternal(GA);
101   for (auto &IF : ExportM.ifuncs())
102     PromoteInternal(IF);
103 }
104 
105 // Promote all internal (i.e. distinct) type ids used by the module by replacing
106 // them with external type ids formed using the module id.
107 //
108 // Note that this needs to be done before we clone the module because each clone
109 // will receive its own set of distinct metadata nodes.
110 void promoteTypeIds(Module &M, StringRef ModuleId) {
111   DenseMap<Metadata *, Metadata *> LocalToGlobal;
112   auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
113     Metadata *MD =
114         cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();
115 
116     if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
117       Metadata *&GlobalMD = LocalToGlobal[MD];
118       if (!GlobalMD) {
119         std::string NewName =
120             (to_string(LocalToGlobal.size()) + ModuleId).str();
121         GlobalMD = MDString::get(M.getContext(), NewName);
122       }
123 
124       CI->setArgOperand(ArgNo,
125                         MetadataAsValue::get(M.getContext(), GlobalMD));
126     }
127   };
128 
129   if (Function *TypeTestFunc =
130           M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
131     for (const Use &U : TypeTestFunc->uses()) {
132       auto CI = cast<CallInst>(U.getUser());
133       ExternalizeTypeId(CI, 1);
134     }
135   }
136 
137   if (Function *TypeCheckedLoadFunc =
138           M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
139     for (const Use &U : TypeCheckedLoadFunc->uses()) {
140       auto CI = cast<CallInst>(U.getUser());
141       ExternalizeTypeId(CI, 2);
142     }
143   }
144 
145   for (GlobalObject &GO : M.global_objects()) {
146     SmallVector<MDNode *, 1> MDs;
147     GO.getMetadata(LLVMContext::MD_type, MDs);
148 
149     GO.eraseMetadata(LLVMContext::MD_type);
150     for (auto MD : MDs) {
151       auto I = LocalToGlobal.find(MD->getOperand(1));
152       if (I == LocalToGlobal.end()) {
153         GO.addMetadata(LLVMContext::MD_type, *MD);
154         continue;
155       }
156       GO.addMetadata(
157           LLVMContext::MD_type,
158           *MDNode::get(M.getContext(),
159                        ArrayRef<Metadata *>{MD->getOperand(0), I->second}));
160     }
161   }
162 }
163 
164 // Drop unused globals, and drop type information from function declarations.
165 // FIXME: If we made functions typeless then there would be no need to do this.
166 void simplifyExternals(Module &M) {
167   FunctionType *EmptyFT =
168       FunctionType::get(Type::getVoidTy(M.getContext()), false);
169 
170   for (auto I = M.begin(), E = M.end(); I != E;) {
171     Function &F = *I++;
172     if (F.isDeclaration() && F.use_empty()) {
173       F.eraseFromParent();
174       continue;
175     }
176 
177     if (!F.isDeclaration() || F.getFunctionType() == EmptyFT)
178       continue;
179 
180     Function *NewF =
181         Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M);
182     NewF->setVisibility(F.getVisibility());
183     NewF->takeName(&F);
184     F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
185     F.eraseFromParent();
186   }
187 
188   for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
189     GlobalVariable &GV = *I++;
190     if (GV.isDeclaration() && GV.use_empty()) {
191       GV.eraseFromParent();
192       continue;
193     }
194   }
195 }
196 
197 void filterModule(
198     Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
199   for (Function &F : *M) {
200     if (ShouldKeepDefinition(&F))
201       continue;
202 
203     F.deleteBody();
204     F.setComdat(nullptr);
205     F.clearMetadata();
206   }
207 
208   for (GlobalVariable &GV : M->globals()) {
209     if (ShouldKeepDefinition(&GV))
210       continue;
211 
212     GV.setInitializer(nullptr);
213     GV.setLinkage(GlobalValue::ExternalLinkage);
214     GV.setComdat(nullptr);
215     GV.clearMetadata();
216   }
217 
218   for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
219        I != E;) {
220     GlobalAlias *GA = &*I++;
221     if (ShouldKeepDefinition(GA))
222       continue;
223 
224     GlobalObject *GO;
225     if (I->getValueType()->isFunctionTy())
226       GO = Function::Create(cast<FunctionType>(GA->getValueType()),
227                             GlobalValue::ExternalLinkage, "", M);
228     else
229       GO = new GlobalVariable(
230           *M, GA->getValueType(), false, GlobalValue::ExternalLinkage,
231           (Constant *)nullptr, "", (GlobalVariable *)nullptr,
232           GA->getThreadLocalMode(), GA->getType()->getAddressSpace());
233     GO->takeName(GA);
234     GA->replaceAllUsesWith(GO);
235     GA->eraseFromParent();
236   }
237 }
238 
239 // If it's possible to split M into regular and thin LTO parts, do so and write
240 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a
241 // regular LTO bitcode file to OS.
242 void splitAndWriteThinLTOBitcode(raw_ostream &OS, Module &M) {
243   std::string ModuleId = getModuleId(&M);
244   if (ModuleId.empty()) {
245     // We couldn't generate a module ID for this module, just write it out as a
246     // regular LTO module.
247     WriteBitcodeToFile(&M, OS);
248     return;
249   }
250 
251   promoteTypeIds(M, ModuleId);
252 
253   auto IsInMergedM = [&](const GlobalValue *GV) {
254     auto *GVar = dyn_cast<GlobalVariable>(GV->getBaseObject());
255     if (!GVar)
256       return false;
257 
258     SmallVector<MDNode *, 1> MDs;
259     GVar->getMetadata(LLVMContext::MD_type, MDs);
260     return !MDs.empty();
261   };
262 
263   ValueToValueMapTy VMap;
264   std::unique_ptr<Module> MergedM(CloneModule(&M, VMap, IsInMergedM));
265   StripDebugInfo(*MergedM);
266 
267   filterModule(&M, [&](const GlobalValue *GV) { return !IsInMergedM(GV); });
268 
269   promoteInternals(*MergedM, M, ModuleId);
270   promoteInternals(M, *MergedM, ModuleId);
271 
272   simplifyExternals(*MergedM);
273 
274   SmallVector<char, 0> Buffer;
275   BitcodeWriter W(Buffer);
276 
277   // FIXME: Try to re-use BSI and PFI from the original module here.
278   ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr);
279   W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
280                 /*GenerateHash=*/true);
281 
282   W.writeModule(MergedM.get());
283 
284   OS << Buffer;
285 }
286 
287 // Returns whether this module needs to be split because it uses type metadata.
288 bool requiresSplit(Module &M) {
289   SmallVector<MDNode *, 1> MDs;
290   for (auto &GO : M.global_objects()) {
291     GO.getMetadata(LLVMContext::MD_type, MDs);
292     if (!MDs.empty())
293       return true;
294   }
295 
296   return false;
297 }
298 
299 void writeThinLTOBitcode(raw_ostream &OS, Module &M,
300                          const ModuleSummaryIndex *Index) {
301   // See if this module has any type metadata. If so, we need to split it.
302   if (requiresSplit(M))
303     return splitAndWriteThinLTOBitcode(OS, M);
304 
305   // Otherwise we can just write it out as a regular module.
306   WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
307                      /*GenerateHash=*/true);
308 }
309 
310 class WriteThinLTOBitcode : public ModulePass {
311   raw_ostream &OS; // raw_ostream to print on
312 
313 public:
314   static char ID; // Pass identification, replacement for typeid
315   WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) {
316     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
317   }
318 
319   explicit WriteThinLTOBitcode(raw_ostream &o)
320       : ModulePass(ID), OS(o) {
321     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
322   }
323 
324   StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }
325 
326   bool runOnModule(Module &M) override {
327     const ModuleSummaryIndex *Index =
328         &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
329     writeThinLTOBitcode(OS, M, Index);
330     return true;
331   }
332   void getAnalysisUsage(AnalysisUsage &AU) const override {
333     AU.setPreservesAll();
334     AU.addRequired<ModuleSummaryIndexWrapperPass>();
335   }
336 };
337 } // anonymous namespace
338 
339 char WriteThinLTOBitcode::ID = 0;
340 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
341                       "Write ThinLTO Bitcode", false, true)
342 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
343 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
344                     "Write ThinLTO Bitcode", false, true)
345 
346 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str) {
347   return new WriteThinLTOBitcode(Str);
348 }
349