1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass prepares a module containing type metadata for ThinLTO by splitting 11 // it into regular and thin LTO parts if possible, and writing both parts to 12 // a multi-module bitcode file. Modules that do not contain type metadata are 13 // written unmodified as a single module. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/IPO.h" 18 #include "llvm/Analysis/ModuleSummaryAnalysis.h" 19 #include "llvm/Analysis/TypeMetadataUtils.h" 20 #include "llvm/Bitcode/BitcodeWriter.h" 21 #include "llvm/IR/Constants.h" 22 #include "llvm/IR/Intrinsics.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/IR/PassManager.h" 25 #include "llvm/Pass.h" 26 #include "llvm/Support/ScopedPrinter.h" 27 #include "llvm/Transforms/Utils/Cloning.h" 28 using namespace llvm; 29 30 namespace { 31 32 // Produce a unique identifier for this module by taking the MD5 sum of the 33 // names of the module's strong external symbols. This identifier is 34 // normally guaranteed to be unique, or the program would fail to link due to 35 // multiply defined symbols. 36 // 37 // If the module has no strong external symbols (such a module may still have a 38 // semantic effect if it performs global initialization), we cannot produce a 39 // unique identifier for this module, so we return the empty string, which 40 // causes the entire module to be written as a regular LTO module. 41 std::string getModuleId(Module *M) { 42 MD5 Md5; 43 bool ExportsSymbols = false; 44 auto AddGlobal = [&](GlobalValue &GV) { 45 if (GV.isDeclaration() || GV.getName().startswith("llvm.") || 46 !GV.hasExternalLinkage()) 47 return; 48 ExportsSymbols = true; 49 Md5.update(GV.getName()); 50 Md5.update(ArrayRef<uint8_t>{0}); 51 }; 52 53 for (auto &F : *M) 54 AddGlobal(F); 55 for (auto &GV : M->globals()) 56 AddGlobal(GV); 57 for (auto &GA : M->aliases()) 58 AddGlobal(GA); 59 for (auto &IF : M->ifuncs()) 60 AddGlobal(IF); 61 62 if (!ExportsSymbols) 63 return ""; 64 65 MD5::MD5Result R; 66 Md5.final(R); 67 68 SmallString<32> Str; 69 MD5::stringifyResult(R, Str); 70 return ("$" + Str).str(); 71 } 72 73 // Promote each local-linkage entity defined by ExportM and used by ImportM by 74 // changing visibility and appending the given ModuleId. 75 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { 76 auto PromoteInternal = [&](GlobalValue &ExportGV) { 77 if (!ExportGV.hasLocalLinkage()) 78 return; 79 80 GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName()); 81 if (!ImportGV || ImportGV->use_empty()) 82 return; 83 84 std::string NewName = (ExportGV.getName() + ModuleId).str(); 85 86 ExportGV.setName(NewName); 87 ExportGV.setLinkage(GlobalValue::ExternalLinkage); 88 ExportGV.setVisibility(GlobalValue::HiddenVisibility); 89 90 ImportGV->setName(NewName); 91 ImportGV->setVisibility(GlobalValue::HiddenVisibility); 92 }; 93 94 for (auto &F : ExportM) 95 PromoteInternal(F); 96 for (auto &GV : ExportM.globals()) 97 PromoteInternal(GV); 98 for (auto &GA : ExportM.aliases()) 99 PromoteInternal(GA); 100 for (auto &IF : ExportM.ifuncs()) 101 PromoteInternal(IF); 102 } 103 104 // Promote all internal (i.e. distinct) type ids used by the module by replacing 105 // them with external type ids formed using the module id. 106 // 107 // Note that this needs to be done before we clone the module because each clone 108 // will receive its own set of distinct metadata nodes. 109 void promoteTypeIds(Module &M, StringRef ModuleId) { 110 DenseMap<Metadata *, Metadata *> LocalToGlobal; 111 auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) { 112 Metadata *MD = 113 cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata(); 114 115 if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) { 116 Metadata *&GlobalMD = LocalToGlobal[MD]; 117 if (!GlobalMD) { 118 std::string NewName = 119 (to_string(LocalToGlobal.size()) + ModuleId).str(); 120 GlobalMD = MDString::get(M.getContext(), NewName); 121 } 122 123 CI->setArgOperand(ArgNo, 124 MetadataAsValue::get(M.getContext(), GlobalMD)); 125 } 126 }; 127 128 if (Function *TypeTestFunc = 129 M.getFunction(Intrinsic::getName(Intrinsic::type_test))) { 130 for (const Use &U : TypeTestFunc->uses()) { 131 auto CI = cast<CallInst>(U.getUser()); 132 ExternalizeTypeId(CI, 1); 133 } 134 } 135 136 if (Function *TypeCheckedLoadFunc = 137 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) { 138 for (const Use &U : TypeCheckedLoadFunc->uses()) { 139 auto CI = cast<CallInst>(U.getUser()); 140 ExternalizeTypeId(CI, 2); 141 } 142 } 143 144 for (GlobalObject &GO : M.global_objects()) { 145 SmallVector<MDNode *, 1> MDs; 146 GO.getMetadata(LLVMContext::MD_type, MDs); 147 148 GO.eraseMetadata(LLVMContext::MD_type); 149 for (auto MD : MDs) { 150 auto I = LocalToGlobal.find(MD->getOperand(1)); 151 if (I == LocalToGlobal.end()) { 152 GO.addMetadata(LLVMContext::MD_type, *MD); 153 continue; 154 } 155 GO.addMetadata( 156 LLVMContext::MD_type, 157 *MDNode::get(M.getContext(), 158 ArrayRef<Metadata *>{MD->getOperand(0), I->second})); 159 } 160 } 161 } 162 163 // Drop unused globals, and drop type information from function declarations. 164 // FIXME: If we made functions typeless then there would be no need to do this. 165 void simplifyExternals(Module &M) { 166 FunctionType *EmptyFT = 167 FunctionType::get(Type::getVoidTy(M.getContext()), false); 168 169 for (auto I = M.begin(), E = M.end(); I != E;) { 170 Function &F = *I++; 171 if (F.isDeclaration() && F.use_empty()) { 172 F.eraseFromParent(); 173 continue; 174 } 175 176 if (!F.isDeclaration() || F.getFunctionType() == EmptyFT) 177 continue; 178 179 Function *NewF = 180 Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M); 181 NewF->setVisibility(F.getVisibility()); 182 NewF->takeName(&F); 183 F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType())); 184 F.eraseFromParent(); 185 } 186 187 for (auto I = M.global_begin(), E = M.global_end(); I != E;) { 188 GlobalVariable &GV = *I++; 189 if (GV.isDeclaration() && GV.use_empty()) { 190 GV.eraseFromParent(); 191 continue; 192 } 193 } 194 } 195 196 void filterModule( 197 Module *M, std::function<bool(const GlobalValue *)> ShouldKeepDefinition) { 198 for (Function &F : *M) { 199 if (ShouldKeepDefinition(&F)) 200 continue; 201 202 F.deleteBody(); 203 F.clearMetadata(); 204 } 205 206 for (GlobalVariable &GV : M->globals()) { 207 if (ShouldKeepDefinition(&GV)) 208 continue; 209 210 GV.setInitializer(nullptr); 211 GV.setLinkage(GlobalValue::ExternalLinkage); 212 GV.clearMetadata(); 213 } 214 215 for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end(); 216 I != E;) { 217 GlobalAlias *GA = &*I++; 218 if (ShouldKeepDefinition(GA)) 219 continue; 220 221 GlobalObject *GO; 222 if (I->getValueType()->isFunctionTy()) 223 GO = Function::Create(cast<FunctionType>(GA->getValueType()), 224 GlobalValue::ExternalLinkage, "", M); 225 else 226 GO = new GlobalVariable( 227 *M, GA->getValueType(), false, GlobalValue::ExternalLinkage, 228 (Constant *)nullptr, "", (GlobalVariable *)nullptr, 229 GA->getThreadLocalMode(), GA->getType()->getAddressSpace()); 230 GO->takeName(GA); 231 GA->replaceAllUsesWith(GO); 232 GA->eraseFromParent(); 233 } 234 } 235 236 // If it's possible to split M into regular and thin LTO parts, do so and write 237 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a 238 // regular LTO bitcode file to OS. 239 void splitAndWriteThinLTOBitcode(raw_ostream &OS, Module &M) { 240 std::string ModuleId = getModuleId(&M); 241 if (ModuleId.empty()) { 242 // We couldn't generate a module ID for this module, just write it out as a 243 // regular LTO module. 244 WriteBitcodeToFile(&M, OS); 245 return; 246 } 247 248 promoteTypeIds(M, ModuleId); 249 250 auto IsInMergedM = [&](const GlobalValue *GV) { 251 auto *GVar = dyn_cast<GlobalVariable>(GV->getBaseObject()); 252 if (!GVar) 253 return false; 254 255 SmallVector<MDNode *, 1> MDs; 256 GVar->getMetadata(LLVMContext::MD_type, MDs); 257 return !MDs.empty(); 258 }; 259 260 ValueToValueMapTy VMap; 261 std::unique_ptr<Module> MergedM(CloneModule(&M, VMap, IsInMergedM)); 262 263 filterModule(&M, [&](const GlobalValue *GV) { return !IsInMergedM(GV); }); 264 265 promoteInternals(*MergedM, M, ModuleId); 266 promoteInternals(M, *MergedM, ModuleId); 267 268 simplifyExternals(*MergedM); 269 270 SmallVector<char, 0> Buffer; 271 BitcodeWriter W(Buffer); 272 273 // FIXME: Try to re-use BSI and PFI from the original module here. 274 ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr); 275 W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, 276 /*GenerateHash=*/true); 277 278 W.writeModule(MergedM.get()); 279 280 OS << Buffer; 281 } 282 283 // Returns whether this module needs to be split because it uses type metadata. 284 bool requiresSplit(Module &M) { 285 SmallVector<MDNode *, 1> MDs; 286 for (auto &GO : M.global_objects()) { 287 GO.getMetadata(LLVMContext::MD_type, MDs); 288 if (!MDs.empty()) 289 return true; 290 } 291 292 return false; 293 } 294 295 void writeThinLTOBitcode(raw_ostream &OS, Module &M, 296 const ModuleSummaryIndex *Index) { 297 // See if this module has any type metadata. If so, we need to split it. 298 if (requiresSplit(M)) 299 return splitAndWriteThinLTOBitcode(OS, M); 300 301 // Otherwise we can just write it out as a regular module. 302 WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index, 303 /*GenerateHash=*/true); 304 } 305 306 class WriteThinLTOBitcode : public ModulePass { 307 raw_ostream &OS; // raw_ostream to print on 308 309 public: 310 static char ID; // Pass identification, replacement for typeid 311 WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) { 312 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); 313 } 314 315 explicit WriteThinLTOBitcode(raw_ostream &o) 316 : ModulePass(ID), OS(o) { 317 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); 318 } 319 320 StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; } 321 322 bool runOnModule(Module &M) override { 323 const ModuleSummaryIndex *Index = 324 &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex()); 325 writeThinLTOBitcode(OS, M, Index); 326 return true; 327 } 328 void getAnalysisUsage(AnalysisUsage &AU) const override { 329 AU.setPreservesAll(); 330 AU.addRequired<ModuleSummaryIndexWrapperPass>(); 331 } 332 }; 333 } // anonymous namespace 334 335 char WriteThinLTOBitcode::ID = 0; 336 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode", 337 "Write ThinLTO Bitcode", false, true) 338 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass) 339 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode", 340 "Write ThinLTO Bitcode", false, true) 341 342 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str) { 343 return new WriteThinLTOBitcode(Str); 344 } 345