1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass prepares a module containing type metadata for ThinLTO by splitting
11 // it into regular and thin LTO parts if possible, and writing both parts to
12 // a multi-module bitcode file. Modules that do not contain type metadata are
13 // written unmodified as a single module.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Transforms/IPO.h"
18 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
19 #include "llvm/Analysis/TypeMetadataUtils.h"
20 #include "llvm/Bitcode/BitcodeWriter.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/Intrinsics.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/IR/PassManager.h"
25 #include "llvm/Pass.h"
26 #include "llvm/Support/ScopedPrinter.h"
27 #include "llvm/Transforms/Utils/Cloning.h"
28 using namespace llvm;
29 
30 namespace {
31 
32 // Produce a unique identifier for this module by taking the MD5 sum of the
33 // names of the module's strong external symbols. This identifier is
34 // normally guaranteed to be unique, or the program would fail to link due to
35 // multiply defined symbols.
36 //
37 // If the module has no strong external symbols (such a module may still have a
38 // semantic effect if it performs global initialization), we cannot produce a
39 // unique identifier for this module, so we return the empty string, which
40 // causes the entire module to be written as a regular LTO module.
41 std::string getModuleId(Module *M) {
42   MD5 Md5;
43   bool ExportsSymbols = false;
44   auto AddGlobal = [&](GlobalValue &GV) {
45     if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
46         !GV.hasExternalLinkage())
47       return;
48     ExportsSymbols = true;
49     Md5.update(GV.getName());
50     Md5.update(ArrayRef<uint8_t>{0});
51   };
52 
53   for (auto &F : *M)
54     AddGlobal(F);
55   for (auto &GV : M->globals())
56     AddGlobal(GV);
57   for (auto &GA : M->aliases())
58     AddGlobal(GA);
59   for (auto &IF : M->ifuncs())
60     AddGlobal(IF);
61 
62   if (!ExportsSymbols)
63     return "";
64 
65   MD5::MD5Result R;
66   Md5.final(R);
67 
68   SmallString<32> Str;
69   MD5::stringifyResult(R, Str);
70   return ("$" + Str).str();
71 }
72 
73 // Promote each local-linkage entity defined by ExportM and used by ImportM by
74 // changing visibility and appending the given ModuleId.
75 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
76   auto PromoteInternal = [&](GlobalValue &ExportGV) {
77     if (!ExportGV.hasLocalLinkage())
78       return;
79 
80     GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName());
81     if (!ImportGV || ImportGV->use_empty())
82       return;
83 
84     std::string NewName = (ExportGV.getName() + ModuleId).str();
85 
86     ExportGV.setName(NewName);
87     ExportGV.setLinkage(GlobalValue::ExternalLinkage);
88     ExportGV.setVisibility(GlobalValue::HiddenVisibility);
89 
90     ImportGV->setName(NewName);
91     ImportGV->setVisibility(GlobalValue::HiddenVisibility);
92   };
93 
94   for (auto &F : ExportM)
95     PromoteInternal(F);
96   for (auto &GV : ExportM.globals())
97     PromoteInternal(GV);
98   for (auto &GA : ExportM.aliases())
99     PromoteInternal(GA);
100   for (auto &IF : ExportM.ifuncs())
101     PromoteInternal(IF);
102 }
103 
104 // Promote all internal (i.e. distinct) type ids used by the module by replacing
105 // them with external type ids formed using the module id.
106 //
107 // Note that this needs to be done before we clone the module because each clone
108 // will receive its own set of distinct metadata nodes.
109 void promoteTypeIds(Module &M, StringRef ModuleId) {
110   DenseMap<Metadata *, Metadata *> LocalToGlobal;
111   auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
112     Metadata *MD =
113         cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();
114 
115     if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
116       Metadata *&GlobalMD = LocalToGlobal[MD];
117       if (!GlobalMD) {
118         std::string NewName =
119             (to_string(LocalToGlobal.size()) + ModuleId).str();
120         GlobalMD = MDString::get(M.getContext(), NewName);
121       }
122 
123       CI->setArgOperand(ArgNo,
124                         MetadataAsValue::get(M.getContext(), GlobalMD));
125     }
126   };
127 
128   if (Function *TypeTestFunc =
129           M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
130     for (const Use &U : TypeTestFunc->uses()) {
131       auto CI = cast<CallInst>(U.getUser());
132       ExternalizeTypeId(CI, 1);
133     }
134   }
135 
136   if (Function *TypeCheckedLoadFunc =
137           M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
138     for (const Use &U : TypeCheckedLoadFunc->uses()) {
139       auto CI = cast<CallInst>(U.getUser());
140       ExternalizeTypeId(CI, 2);
141     }
142   }
143 
144   for (GlobalObject &GO : M.global_objects()) {
145     SmallVector<MDNode *, 1> MDs;
146     GO.getMetadata(LLVMContext::MD_type, MDs);
147 
148     GO.eraseMetadata(LLVMContext::MD_type);
149     for (auto MD : MDs) {
150       auto I = LocalToGlobal.find(MD->getOperand(1));
151       if (I == LocalToGlobal.end()) {
152         GO.addMetadata(LLVMContext::MD_type, *MD);
153         continue;
154       }
155       GO.addMetadata(
156           LLVMContext::MD_type,
157           *MDNode::get(M.getContext(),
158                        ArrayRef<Metadata *>{MD->getOperand(0), I->second}));
159     }
160   }
161 }
162 
163 // Drop unused globals, and drop type information from function declarations.
164 // FIXME: If we made functions typeless then there would be no need to do this.
165 void simplifyExternals(Module &M) {
166   FunctionType *EmptyFT =
167       FunctionType::get(Type::getVoidTy(M.getContext()), false);
168 
169   for (auto I = M.begin(), E = M.end(); I != E;) {
170     Function &F = *I++;
171     if (F.isDeclaration() && F.use_empty()) {
172       F.eraseFromParent();
173       continue;
174     }
175 
176     if (!F.isDeclaration() || F.getFunctionType() == EmptyFT)
177       continue;
178 
179     Function *NewF =
180         Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M);
181     NewF->setVisibility(F.getVisibility());
182     NewF->takeName(&F);
183     F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
184     F.eraseFromParent();
185   }
186 
187   for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
188     GlobalVariable &GV = *I++;
189     if (GV.isDeclaration() && GV.use_empty()) {
190       GV.eraseFromParent();
191       continue;
192     }
193   }
194 }
195 
196 void filterModule(
197     Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
198   for (Function &F : *M) {
199     if (ShouldKeepDefinition(&F))
200       continue;
201 
202     F.deleteBody();
203     F.setComdat(nullptr);
204     F.clearMetadata();
205   }
206 
207   for (GlobalVariable &GV : M->globals()) {
208     if (ShouldKeepDefinition(&GV))
209       continue;
210 
211     GV.setInitializer(nullptr);
212     GV.setLinkage(GlobalValue::ExternalLinkage);
213     GV.setComdat(nullptr);
214     GV.clearMetadata();
215   }
216 
217   for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
218        I != E;) {
219     GlobalAlias *GA = &*I++;
220     if (ShouldKeepDefinition(GA))
221       continue;
222 
223     GlobalObject *GO;
224     if (I->getValueType()->isFunctionTy())
225       GO = Function::Create(cast<FunctionType>(GA->getValueType()),
226                             GlobalValue::ExternalLinkage, "", M);
227     else
228       GO = new GlobalVariable(
229           *M, GA->getValueType(), false, GlobalValue::ExternalLinkage,
230           (Constant *)nullptr, "", (GlobalVariable *)nullptr,
231           GA->getThreadLocalMode(), GA->getType()->getAddressSpace());
232     GO->takeName(GA);
233     GA->replaceAllUsesWith(GO);
234     GA->eraseFromParent();
235   }
236 }
237 
238 // If it's possible to split M into regular and thin LTO parts, do so and write
239 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a
240 // regular LTO bitcode file to OS.
241 void splitAndWriteThinLTOBitcode(raw_ostream &OS, Module &M) {
242   std::string ModuleId = getModuleId(&M);
243   if (ModuleId.empty()) {
244     // We couldn't generate a module ID for this module, just write it out as a
245     // regular LTO module.
246     WriteBitcodeToFile(&M, OS);
247     return;
248   }
249 
250   promoteTypeIds(M, ModuleId);
251 
252   auto IsInMergedM = [&](const GlobalValue *GV) {
253     auto *GVar = dyn_cast<GlobalVariable>(GV->getBaseObject());
254     if (!GVar)
255       return false;
256 
257     SmallVector<MDNode *, 1> MDs;
258     GVar->getMetadata(LLVMContext::MD_type, MDs);
259     return !MDs.empty();
260   };
261 
262   ValueToValueMapTy VMap;
263   std::unique_ptr<Module> MergedM(CloneModule(&M, VMap, IsInMergedM));
264 
265   filterModule(&M, [&](const GlobalValue *GV) { return !IsInMergedM(GV); });
266 
267   promoteInternals(*MergedM, M, ModuleId);
268   promoteInternals(M, *MergedM, ModuleId);
269 
270   simplifyExternals(*MergedM);
271 
272   SmallVector<char, 0> Buffer;
273   BitcodeWriter W(Buffer);
274 
275   // FIXME: Try to re-use BSI and PFI from the original module here.
276   ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr);
277   W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
278                 /*GenerateHash=*/true);
279 
280   W.writeModule(MergedM.get());
281 
282   OS << Buffer;
283 }
284 
285 // Returns whether this module needs to be split because it uses type metadata.
286 bool requiresSplit(Module &M) {
287   SmallVector<MDNode *, 1> MDs;
288   for (auto &GO : M.global_objects()) {
289     GO.getMetadata(LLVMContext::MD_type, MDs);
290     if (!MDs.empty())
291       return true;
292   }
293 
294   return false;
295 }
296 
297 void writeThinLTOBitcode(raw_ostream &OS, Module &M,
298                          const ModuleSummaryIndex *Index) {
299   // See if this module has any type metadata. If so, we need to split it.
300   if (requiresSplit(M))
301     return splitAndWriteThinLTOBitcode(OS, M);
302 
303   // Otherwise we can just write it out as a regular module.
304   WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
305                      /*GenerateHash=*/true);
306 }
307 
308 class WriteThinLTOBitcode : public ModulePass {
309   raw_ostream &OS; // raw_ostream to print on
310 
311 public:
312   static char ID; // Pass identification, replacement for typeid
313   WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) {
314     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
315   }
316 
317   explicit WriteThinLTOBitcode(raw_ostream &o)
318       : ModulePass(ID), OS(o) {
319     initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
320   }
321 
322   StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }
323 
324   bool runOnModule(Module &M) override {
325     const ModuleSummaryIndex *Index =
326         &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
327     writeThinLTOBitcode(OS, M, Index);
328     return true;
329   }
330   void getAnalysisUsage(AnalysisUsage &AU) const override {
331     AU.setPreservesAll();
332     AU.addRequired<ModuleSummaryIndexWrapperPass>();
333   }
334 };
335 } // anonymous namespace
336 
337 char WriteThinLTOBitcode::ID = 0;
338 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
339                       "Write ThinLTO Bitcode", false, true)
340 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
341 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
342                     "Write ThinLTO Bitcode", false, true)
343 
344 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str) {
345   return new WriteThinLTOBitcode(Str);
346 }
347