1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass prepares a module containing type metadata for ThinLTO by splitting 11 // it into regular and thin LTO parts if possible, and writing both parts to 12 // a multi-module bitcode file. Modules that do not contain type metadata are 13 // written unmodified as a single module. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/IPO.h" 18 #include "llvm/Analysis/ModuleSummaryAnalysis.h" 19 #include "llvm/Analysis/TypeMetadataUtils.h" 20 #include "llvm/Bitcode/BitcodeWriter.h" 21 #include "llvm/IR/Constants.h" 22 #include "llvm/IR/Intrinsics.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/IR/PassManager.h" 25 #include "llvm/Pass.h" 26 #include "llvm/Support/ScopedPrinter.h" 27 #include "llvm/Transforms/Utils/Cloning.h" 28 using namespace llvm; 29 30 namespace { 31 32 // Produce a unique identifier for this module by taking the MD5 sum of the 33 // names of the module's strong external symbols. This identifier is 34 // normally guaranteed to be unique, or the program would fail to link due to 35 // multiply defined symbols. 36 // 37 // If the module has no strong external symbols (such a module may still have a 38 // semantic effect if it performs global initialization), we cannot produce a 39 // unique identifier for this module, so we return the empty string, which 40 // causes the entire module to be written as a regular LTO module. 41 std::string getModuleId(Module *M) { 42 MD5 Md5; 43 bool ExportsSymbols = false; 44 auto AddGlobal = [&](GlobalValue &GV) { 45 if (GV.isDeclaration() || GV.getName().startswith("llvm.") || 46 !GV.hasExternalLinkage()) 47 return; 48 ExportsSymbols = true; 49 Md5.update(GV.getName()); 50 Md5.update(ArrayRef<uint8_t>{0}); 51 }; 52 53 for (auto &F : *M) 54 AddGlobal(F); 55 for (auto &GV : M->globals()) 56 AddGlobal(GV); 57 for (auto &GA : M->aliases()) 58 AddGlobal(GA); 59 for (auto &IF : M->ifuncs()) 60 AddGlobal(IF); 61 62 if (!ExportsSymbols) 63 return ""; 64 65 MD5::MD5Result R; 66 Md5.final(R); 67 68 SmallString<32> Str; 69 MD5::stringifyResult(R, Str); 70 return ("$" + Str).str(); 71 } 72 73 // Promote each local-linkage entity defined by ExportM and used by ImportM by 74 // changing visibility and appending the given ModuleId. 75 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { 76 auto PromoteInternal = [&](GlobalValue &ExportGV) { 77 if (!ExportGV.hasLocalLinkage()) 78 return; 79 80 GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName()); 81 if (!ImportGV || ImportGV->use_empty()) 82 return; 83 84 std::string NewName = (ExportGV.getName() + ModuleId).str(); 85 86 ExportGV.setName(NewName); 87 ExportGV.setLinkage(GlobalValue::ExternalLinkage); 88 ExportGV.setVisibility(GlobalValue::HiddenVisibility); 89 90 ImportGV->setName(NewName); 91 ImportGV->setVisibility(GlobalValue::HiddenVisibility); 92 }; 93 94 for (auto &F : ExportM) 95 PromoteInternal(F); 96 for (auto &GV : ExportM.globals()) 97 PromoteInternal(GV); 98 for (auto &GA : ExportM.aliases()) 99 PromoteInternal(GA); 100 for (auto &IF : ExportM.ifuncs()) 101 PromoteInternal(IF); 102 } 103 104 // Promote all internal (i.e. distinct) type ids used by the module by replacing 105 // them with external type ids formed using the module id. 106 // 107 // Note that this needs to be done before we clone the module because each clone 108 // will receive its own set of distinct metadata nodes. 109 void promoteTypeIds(Module &M, StringRef ModuleId) { 110 DenseMap<Metadata *, Metadata *> LocalToGlobal; 111 auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) { 112 Metadata *MD = 113 cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata(); 114 115 if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) { 116 Metadata *&GlobalMD = LocalToGlobal[MD]; 117 if (!GlobalMD) { 118 std::string NewName = 119 (to_string(LocalToGlobal.size()) + ModuleId).str(); 120 GlobalMD = MDString::get(M.getContext(), NewName); 121 } 122 123 CI->setArgOperand(ArgNo, 124 MetadataAsValue::get(M.getContext(), GlobalMD)); 125 } 126 }; 127 128 if (Function *TypeTestFunc = 129 M.getFunction(Intrinsic::getName(Intrinsic::type_test))) { 130 for (const Use &U : TypeTestFunc->uses()) { 131 auto CI = cast<CallInst>(U.getUser()); 132 ExternalizeTypeId(CI, 1); 133 } 134 } 135 136 if (Function *TypeCheckedLoadFunc = 137 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) { 138 for (const Use &U : TypeCheckedLoadFunc->uses()) { 139 auto CI = cast<CallInst>(U.getUser()); 140 ExternalizeTypeId(CI, 2); 141 } 142 } 143 144 for (GlobalObject &GO : M.global_objects()) { 145 SmallVector<MDNode *, 1> MDs; 146 GO.getMetadata(LLVMContext::MD_type, MDs); 147 148 GO.eraseMetadata(LLVMContext::MD_type); 149 for (auto MD : MDs) { 150 auto I = LocalToGlobal.find(MD->getOperand(1)); 151 if (I == LocalToGlobal.end()) { 152 GO.addMetadata(LLVMContext::MD_type, *MD); 153 continue; 154 } 155 GO.addMetadata( 156 LLVMContext::MD_type, 157 *MDNode::get(M.getContext(), 158 ArrayRef<Metadata *>{MD->getOperand(0), I->second})); 159 } 160 } 161 } 162 163 // Drop unused globals, and drop type information from function declarations. 164 // FIXME: If we made functions typeless then there would be no need to do this. 165 void simplifyExternals(Module &M) { 166 FunctionType *EmptyFT = 167 FunctionType::get(Type::getVoidTy(M.getContext()), false); 168 169 for (auto I = M.begin(), E = M.end(); I != E;) { 170 Function &F = *I++; 171 if (F.isDeclaration() && F.use_empty()) { 172 F.eraseFromParent(); 173 continue; 174 } 175 176 if (!F.isDeclaration() || F.getFunctionType() == EmptyFT) 177 continue; 178 179 Function *NewF = 180 Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M); 181 NewF->setVisibility(F.getVisibility()); 182 NewF->takeName(&F); 183 F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType())); 184 F.eraseFromParent(); 185 } 186 187 for (auto I = M.global_begin(), E = M.global_end(); I != E;) { 188 GlobalVariable &GV = *I++; 189 if (GV.isDeclaration() && GV.use_empty()) { 190 GV.eraseFromParent(); 191 continue; 192 } 193 } 194 } 195 196 void filterModule( 197 Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) { 198 for (Function &F : *M) { 199 if (ShouldKeepDefinition(&F)) 200 continue; 201 202 F.deleteBody(); 203 F.setComdat(nullptr); 204 F.clearMetadata(); 205 } 206 207 for (GlobalVariable &GV : M->globals()) { 208 if (ShouldKeepDefinition(&GV)) 209 continue; 210 211 GV.setInitializer(nullptr); 212 GV.setLinkage(GlobalValue::ExternalLinkage); 213 GV.setComdat(nullptr); 214 GV.clearMetadata(); 215 } 216 217 for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end(); 218 I != E;) { 219 GlobalAlias *GA = &*I++; 220 if (ShouldKeepDefinition(GA)) 221 continue; 222 223 GlobalObject *GO; 224 if (I->getValueType()->isFunctionTy()) 225 GO = Function::Create(cast<FunctionType>(GA->getValueType()), 226 GlobalValue::ExternalLinkage, "", M); 227 else 228 GO = new GlobalVariable( 229 *M, GA->getValueType(), false, GlobalValue::ExternalLinkage, 230 (Constant *)nullptr, "", (GlobalVariable *)nullptr, 231 GA->getThreadLocalMode(), GA->getType()->getAddressSpace()); 232 GO->takeName(GA); 233 GA->replaceAllUsesWith(GO); 234 GA->eraseFromParent(); 235 } 236 } 237 238 // If it's possible to split M into regular and thin LTO parts, do so and write 239 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a 240 // regular LTO bitcode file to OS. 241 void splitAndWriteThinLTOBitcode(raw_ostream &OS, Module &M) { 242 std::string ModuleId = getModuleId(&M); 243 if (ModuleId.empty()) { 244 // We couldn't generate a module ID for this module, just write it out as a 245 // regular LTO module. 246 WriteBitcodeToFile(&M, OS); 247 return; 248 } 249 250 promoteTypeIds(M, ModuleId); 251 252 auto IsInMergedM = [&](const GlobalValue *GV) { 253 auto *GVar = dyn_cast<GlobalVariable>(GV->getBaseObject()); 254 if (!GVar) 255 return false; 256 257 SmallVector<MDNode *, 1> MDs; 258 GVar->getMetadata(LLVMContext::MD_type, MDs); 259 return !MDs.empty(); 260 }; 261 262 ValueToValueMapTy VMap; 263 std::unique_ptr<Module> MergedM(CloneModule(&M, VMap, IsInMergedM)); 264 265 filterModule(&M, [&](const GlobalValue *GV) { return !IsInMergedM(GV); }); 266 267 promoteInternals(*MergedM, M, ModuleId); 268 promoteInternals(M, *MergedM, ModuleId); 269 270 simplifyExternals(*MergedM); 271 272 SmallVector<char, 0> Buffer; 273 BitcodeWriter W(Buffer); 274 275 // FIXME: Try to re-use BSI and PFI from the original module here. 276 ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr); 277 W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, 278 /*GenerateHash=*/true); 279 280 W.writeModule(MergedM.get()); 281 282 OS << Buffer; 283 } 284 285 // Returns whether this module needs to be split because it uses type metadata. 286 bool requiresSplit(Module &M) { 287 SmallVector<MDNode *, 1> MDs; 288 for (auto &GO : M.global_objects()) { 289 GO.getMetadata(LLVMContext::MD_type, MDs); 290 if (!MDs.empty()) 291 return true; 292 } 293 294 return false; 295 } 296 297 void writeThinLTOBitcode(raw_ostream &OS, Module &M, 298 const ModuleSummaryIndex *Index) { 299 // See if this module has any type metadata. If so, we need to split it. 300 if (requiresSplit(M)) 301 return splitAndWriteThinLTOBitcode(OS, M); 302 303 // Otherwise we can just write it out as a regular module. 304 WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index, 305 /*GenerateHash=*/true); 306 } 307 308 class WriteThinLTOBitcode : public ModulePass { 309 raw_ostream &OS; // raw_ostream to print on 310 311 public: 312 static char ID; // Pass identification, replacement for typeid 313 WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) { 314 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); 315 } 316 317 explicit WriteThinLTOBitcode(raw_ostream &o) 318 : ModulePass(ID), OS(o) { 319 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); 320 } 321 322 StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; } 323 324 bool runOnModule(Module &M) override { 325 const ModuleSummaryIndex *Index = 326 &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex()); 327 writeThinLTOBitcode(OS, M, Index); 328 return true; 329 } 330 void getAnalysisUsage(AnalysisUsage &AU) const override { 331 AU.setPreservesAll(); 332 AU.addRequired<ModuleSummaryIndexWrapperPass>(); 333 } 334 }; 335 } // anonymous namespace 336 337 char WriteThinLTOBitcode::ID = 0; 338 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode", 339 "Write ThinLTO Bitcode", false, true) 340 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass) 341 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode", 342 "Write ThinLTO Bitcode", false, true) 343 344 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str) { 345 return new WriteThinLTOBitcode(Str); 346 } 347