1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass prepares a module containing type metadata for ThinLTO by splitting 11 // it into regular and thin LTO parts if possible, and writing both parts to 12 // a multi-module bitcode file. Modules that do not contain type metadata are 13 // written unmodified as a single module. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/IPO.h" 18 #include "llvm/Analysis/ModuleSummaryAnalysis.h" 19 #include "llvm/Analysis/TypeMetadataUtils.h" 20 #include "llvm/Bitcode/BitcodeWriter.h" 21 #include "llvm/IR/Constants.h" 22 #include "llvm/IR/DebugInfo.h" 23 #include "llvm/IR/Intrinsics.h" 24 #include "llvm/IR/Module.h" 25 #include "llvm/IR/PassManager.h" 26 #include "llvm/Pass.h" 27 #include "llvm/Support/ScopedPrinter.h" 28 #include "llvm/Transforms/Utils/Cloning.h" 29 using namespace llvm; 30 31 namespace { 32 33 // Produce a unique identifier for this module by taking the MD5 sum of the 34 // names of the module's strong external symbols. This identifier is 35 // normally guaranteed to be unique, or the program would fail to link due to 36 // multiply defined symbols. 37 // 38 // If the module has no strong external symbols (such a module may still have a 39 // semantic effect if it performs global initialization), we cannot produce a 40 // unique identifier for this module, so we return the empty string, which 41 // causes the entire module to be written as a regular LTO module. 42 std::string getModuleId(Module *M) { 43 MD5 Md5; 44 bool ExportsSymbols = false; 45 auto AddGlobal = [&](GlobalValue &GV) { 46 if (GV.isDeclaration() || GV.getName().startswith("llvm.") || 47 !GV.hasExternalLinkage()) 48 return; 49 ExportsSymbols = true; 50 Md5.update(GV.getName()); 51 Md5.update(ArrayRef<uint8_t>{0}); 52 }; 53 54 for (auto &F : *M) 55 AddGlobal(F); 56 for (auto &GV : M->globals()) 57 AddGlobal(GV); 58 for (auto &GA : M->aliases()) 59 AddGlobal(GA); 60 for (auto &IF : M->ifuncs()) 61 AddGlobal(IF); 62 63 if (!ExportsSymbols) 64 return ""; 65 66 MD5::MD5Result R; 67 Md5.final(R); 68 69 SmallString<32> Str; 70 MD5::stringifyResult(R, Str); 71 return ("$" + Str).str(); 72 } 73 74 // Promote each local-linkage entity defined by ExportM and used by ImportM by 75 // changing visibility and appending the given ModuleId. 76 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { 77 auto PromoteInternal = [&](GlobalValue &ExportGV) { 78 if (!ExportGV.hasLocalLinkage()) 79 return; 80 81 GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName()); 82 if (!ImportGV || ImportGV->use_empty()) 83 return; 84 85 std::string NewName = (ExportGV.getName() + ModuleId).str(); 86 87 ExportGV.setName(NewName); 88 ExportGV.setLinkage(GlobalValue::ExternalLinkage); 89 ExportGV.setVisibility(GlobalValue::HiddenVisibility); 90 91 ImportGV->setName(NewName); 92 ImportGV->setVisibility(GlobalValue::HiddenVisibility); 93 }; 94 95 for (auto &F : ExportM) 96 PromoteInternal(F); 97 for (auto &GV : ExportM.globals()) 98 PromoteInternal(GV); 99 for (auto &GA : ExportM.aliases()) 100 PromoteInternal(GA); 101 for (auto &IF : ExportM.ifuncs()) 102 PromoteInternal(IF); 103 } 104 105 // Promote all internal (i.e. distinct) type ids used by the module by replacing 106 // them with external type ids formed using the module id. 107 // 108 // Note that this needs to be done before we clone the module because each clone 109 // will receive its own set of distinct metadata nodes. 110 void promoteTypeIds(Module &M, StringRef ModuleId) { 111 DenseMap<Metadata *, Metadata *> LocalToGlobal; 112 auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) { 113 Metadata *MD = 114 cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata(); 115 116 if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) { 117 Metadata *&GlobalMD = LocalToGlobal[MD]; 118 if (!GlobalMD) { 119 std::string NewName = 120 (to_string(LocalToGlobal.size()) + ModuleId).str(); 121 GlobalMD = MDString::get(M.getContext(), NewName); 122 } 123 124 CI->setArgOperand(ArgNo, 125 MetadataAsValue::get(M.getContext(), GlobalMD)); 126 } 127 }; 128 129 if (Function *TypeTestFunc = 130 M.getFunction(Intrinsic::getName(Intrinsic::type_test))) { 131 for (const Use &U : TypeTestFunc->uses()) { 132 auto CI = cast<CallInst>(U.getUser()); 133 ExternalizeTypeId(CI, 1); 134 } 135 } 136 137 if (Function *TypeCheckedLoadFunc = 138 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) { 139 for (const Use &U : TypeCheckedLoadFunc->uses()) { 140 auto CI = cast<CallInst>(U.getUser()); 141 ExternalizeTypeId(CI, 2); 142 } 143 } 144 145 for (GlobalObject &GO : M.global_objects()) { 146 SmallVector<MDNode *, 1> MDs; 147 GO.getMetadata(LLVMContext::MD_type, MDs); 148 149 GO.eraseMetadata(LLVMContext::MD_type); 150 for (auto MD : MDs) { 151 auto I = LocalToGlobal.find(MD->getOperand(1)); 152 if (I == LocalToGlobal.end()) { 153 GO.addMetadata(LLVMContext::MD_type, *MD); 154 continue; 155 } 156 GO.addMetadata( 157 LLVMContext::MD_type, 158 *MDNode::get(M.getContext(), 159 ArrayRef<Metadata *>{MD->getOperand(0), I->second})); 160 } 161 } 162 } 163 164 // Drop unused globals, and drop type information from function declarations. 165 // FIXME: If we made functions typeless then there would be no need to do this. 166 void simplifyExternals(Module &M) { 167 FunctionType *EmptyFT = 168 FunctionType::get(Type::getVoidTy(M.getContext()), false); 169 170 for (auto I = M.begin(), E = M.end(); I != E;) { 171 Function &F = *I++; 172 if (F.isDeclaration() && F.use_empty()) { 173 F.eraseFromParent(); 174 continue; 175 } 176 177 if (!F.isDeclaration() || F.getFunctionType() == EmptyFT) 178 continue; 179 180 Function *NewF = 181 Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M); 182 NewF->setVisibility(F.getVisibility()); 183 NewF->takeName(&F); 184 F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType())); 185 F.eraseFromParent(); 186 } 187 188 for (auto I = M.global_begin(), E = M.global_end(); I != E;) { 189 GlobalVariable &GV = *I++; 190 if (GV.isDeclaration() && GV.use_empty()) { 191 GV.eraseFromParent(); 192 continue; 193 } 194 } 195 } 196 197 void filterModule( 198 Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) { 199 for (Function &F : *M) { 200 if (ShouldKeepDefinition(&F)) 201 continue; 202 203 F.deleteBody(); 204 F.setComdat(nullptr); 205 F.clearMetadata(); 206 } 207 208 for (GlobalVariable &GV : M->globals()) { 209 if (ShouldKeepDefinition(&GV)) 210 continue; 211 212 GV.setInitializer(nullptr); 213 GV.setLinkage(GlobalValue::ExternalLinkage); 214 GV.setComdat(nullptr); 215 GV.clearMetadata(); 216 } 217 218 for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end(); 219 I != E;) { 220 GlobalAlias *GA = &*I++; 221 if (ShouldKeepDefinition(GA)) 222 continue; 223 224 GlobalObject *GO; 225 if (I->getValueType()->isFunctionTy()) 226 GO = Function::Create(cast<FunctionType>(GA->getValueType()), 227 GlobalValue::ExternalLinkage, "", M); 228 else 229 GO = new GlobalVariable( 230 *M, GA->getValueType(), false, GlobalValue::ExternalLinkage, 231 (Constant *)nullptr, "", (GlobalVariable *)nullptr, 232 GA->getThreadLocalMode(), GA->getType()->getAddressSpace()); 233 GO->takeName(GA); 234 GA->replaceAllUsesWith(GO); 235 GA->eraseFromParent(); 236 } 237 } 238 239 // If it's possible to split M into regular and thin LTO parts, do so and write 240 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a 241 // regular LTO bitcode file to OS. 242 void splitAndWriteThinLTOBitcode(raw_ostream &OS, Module &M) { 243 std::string ModuleId = getModuleId(&M); 244 if (ModuleId.empty()) { 245 // We couldn't generate a module ID for this module, just write it out as a 246 // regular LTO module. 247 WriteBitcodeToFile(&M, OS); 248 return; 249 } 250 251 promoteTypeIds(M, ModuleId); 252 253 auto IsInMergedM = [&](const GlobalValue *GV) { 254 auto *GVar = dyn_cast<GlobalVariable>(GV->getBaseObject()); 255 if (!GVar) 256 return false; 257 258 SmallVector<MDNode *, 1> MDs; 259 GVar->getMetadata(LLVMContext::MD_type, MDs); 260 return !MDs.empty(); 261 }; 262 263 ValueToValueMapTy VMap; 264 std::unique_ptr<Module> MergedM(CloneModule(&M, VMap, IsInMergedM)); 265 StripDebugInfo(*MergedM); 266 267 filterModule(&M, [&](const GlobalValue *GV) { return !IsInMergedM(GV); }); 268 269 promoteInternals(*MergedM, M, ModuleId); 270 promoteInternals(M, *MergedM, ModuleId); 271 272 simplifyExternals(*MergedM); 273 274 SmallVector<char, 0> Buffer; 275 BitcodeWriter W(Buffer); 276 277 // FIXME: Try to re-use BSI and PFI from the original module here. 278 ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr); 279 W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, 280 /*GenerateHash=*/true); 281 282 W.writeModule(MergedM.get()); 283 284 OS << Buffer; 285 } 286 287 // Returns whether this module needs to be split because it uses type metadata. 288 bool requiresSplit(Module &M) { 289 SmallVector<MDNode *, 1> MDs; 290 for (auto &GO : M.global_objects()) { 291 GO.getMetadata(LLVMContext::MD_type, MDs); 292 if (!MDs.empty()) 293 return true; 294 } 295 296 return false; 297 } 298 299 void writeThinLTOBitcode(raw_ostream &OS, Module &M, 300 const ModuleSummaryIndex *Index) { 301 // See if this module has any type metadata. If so, we need to split it. 302 if (requiresSplit(M)) 303 return splitAndWriteThinLTOBitcode(OS, M); 304 305 // Otherwise we can just write it out as a regular module. 306 WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index, 307 /*GenerateHash=*/true); 308 } 309 310 class WriteThinLTOBitcode : public ModulePass { 311 raw_ostream &OS; // raw_ostream to print on 312 313 public: 314 static char ID; // Pass identification, replacement for typeid 315 WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) { 316 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); 317 } 318 319 explicit WriteThinLTOBitcode(raw_ostream &o) 320 : ModulePass(ID), OS(o) { 321 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); 322 } 323 324 StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; } 325 326 bool runOnModule(Module &M) override { 327 const ModuleSummaryIndex *Index = 328 &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex()); 329 writeThinLTOBitcode(OS, M, Index); 330 return true; 331 } 332 void getAnalysisUsage(AnalysisUsage &AU) const override { 333 AU.setPreservesAll(); 334 AU.addRequired<ModuleSummaryIndexWrapperPass>(); 335 } 336 }; 337 } // anonymous namespace 338 339 char WriteThinLTOBitcode::ID = 0; 340 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode", 341 "Write ThinLTO Bitcode", false, true) 342 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass) 343 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode", 344 "Write ThinLTO Bitcode", false, true) 345 346 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str) { 347 return new WriteThinLTOBitcode(Str); 348 } 349